#!/usr/bin/env python3
"""
Exporte les derniers articles WordPress via l’API REST.

Lit les identifiants dans .secrets/.env (non versionné) :
  - format recommandé (une variable par ligne) :
      WP_BASE_URL=https://www.arnaud-merigeau.fr
      WP_USER=...
      WP_APPLICATION_PASSWORD=...   # mot de passe d’application WordPress
  - ou format actuel simplifié :
      id: utilisateur
      appllication password: xxxxx   # typo tolérée

Sans auth valide : export public (pas de meta Yoast « focus » complète).

Usage :
  python3 scripts/wp_fetch_posts.py
  python3 scripts/wp_fetch_posts.py --output exports/posts.csv --limit 20
"""

from __future__ import annotations

import argparse
import base64
import csv
import json
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
from html import unescape
from pathlib import Path


def strip_html(html: str, max_len: int = 500) -> str:
    if not html:
        return ""
    text = re.sub(r"<[^>]+>", " ", html)
    text = unescape(re.sub(r"\s+", " ", text).strip())
    return text[:max_len]


def load_secrets(path: Path) -> dict[str, str]:
    raw = path.read_text(encoding="utf-8")
    cfg: dict[str, str] = {}

    for line in raw.splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        if line.rstrip() == "wordpress:":
            continue
        # KEY=value (recommandé)
        if "=" in line and ":" not in line.split("=", 1)[0]:
            k, _, v = line.partition("=")
            cfg[k.strip()] = v.strip().strip('"').strip("'")
            continue
        # cle: valeur (fichier actuel)
        if ":" in line:
            k, _, v = line.partition(":")
            key = k.strip().lower()
            val = v.strip()
            if key in ("id", "user", "login"):
                cfg.setdefault("WP_USER", val)
            elif "application" in key.replace(" ", "") or "appllication" in key:
                cfg.setdefault("WP_APPLICATION_PASSWORD", val)
            elif key == "password" and "WP_APPLICATION_PASSWORD" not in cfg:
                cfg["_WP_PASSWORD_LEGACY"] = val
            elif key in ("wp_base_url", "base_url", "url"):
                cfg.setdefault("WP_BASE_URL", val.rstrip("/"))

    cfg.setdefault("WP_BASE_URL", "https://www.arnaud-merigeau.fr")
    # Mot de passe d’application : espaces affichés par WP inutiles pour Basic Auth
    if "WP_APPLICATION_PASSWORD" in cfg:
        cfg["WP_APPLICATION_PASSWORD"] = cfg["WP_APPLICATION_PASSWORD"].replace(" ", "")
    return cfg


def api_request(url: str, user: str | None, password: str | None) -> tuple[int, bytes]:
    headers = {"Accept": "application/json", "User-Agent": "arnaudmerigeau-wp-fetch/1.0"}
    if user and password:
        token = base64.b64encode(f"{user}:{password}".encode()).decode()
        headers["Authorization"] = f"Basic {token}"
    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=60) as resp:
            return resp.getcode(), resp.read()
    except urllib.error.HTTPError as e:
        return e.code, e.read()


def article_keywords(yoast: dict) -> str:
    schema = yoast.get("schema") or {}
    for g in schema.get("@graph", []):
        if isinstance(g, dict) and g.get("@type") == "Article":
            kw = g.get("keywords")
            if isinstance(kw, list):
                return ", ".join(str(x) for x in kw)
            if kw:
                return str(kw)
    return ""


def fetch_posts(
    base: str,
    user: str | None,
    password: str | None,
    limit: int,
    context: str,
) -> list[dict]:
    q = urllib.parse.urlencode(
        {
            "per_page": limit,
            "orderby": "date",
            "order": "desc",
            "context": context,
            "_embed": "1",
        }
    )
    url = f"{base.rstrip('/')}/wp-json/wp/v2/posts?{q}"
    code, body = api_request(url, user, password)
    if code != 200:
        raise RuntimeError(f"HTTP {code}: {body[:500].decode(errors='replace')}")
    return json.loads(body.decode())


def row_from_post(p: dict) -> dict[str, str]:
    title = strip_html(p.get("title", {}).get("rendered", ""), 500)
    y = p.get("yoast_head_json") or {}
    meta = p.get("meta") or {}

    cats, tags = [], []
    for group in (p.get("_embedded") or {}).get("wp:term") or []:
        for t in group:
            if t.get("taxonomy") == "category":
                cats.append(t.get("name", ""))
            elif t.get("taxonomy") == "post_tag":
                tags.append(t.get("name", ""))

    focus = meta.get("_yoast_wpseo_focuskw") or meta.get("yoast_wpseo_focuskw") or ""

    return {
        "post_id": str(p.get("id", "")),
        "date": str(p.get("date", "")),
        "status": str(p.get("status", "")),
        "title": title,
        "slug": str(p.get("slug", "")),
        "link": str(p.get("link", "")),
        "categories": ", ".join(cats),
        "tags": ", ".join(tags),
        "yoast_seo_title": str(y.get("title") or y.get("og_title") or ""),
        "yoast_meta_description": str(y.get("description") or y.get("og_description") or ""),
        "yoast_focus_keyword": str(focus),
        "yoast_schema_keywords": article_keywords(y),
        "excerpt": strip_html(p.get("excerpt", {}).get("rendered", ""), 800),
    }


def main() -> int:
    ap = argparse.ArgumentParser(description="Export CSV des articles WordPress (REST).")
    ap.add_argument(
        "--env",
        type=Path,
        default=Path(__file__).resolve().parents[1] / ".secrets" / ".env",
        help="Chemin vers .env",
    )
    ap.add_argument("--limit", type=int, default=20, help="Nombre d’articles")
    ap.add_argument("--output", "-o", type=Path, help="Fichier CSV (défaut : stdout)")
    args = ap.parse_args()

    if not args.env.is_file():
        print(f"Fichier introuvable : {args.env}", file=sys.stderr)
        return 1

    cfg = load_secrets(args.env)
    base = cfg.get("WP_BASE_URL", "https://www.arnaud-merigeau.fr").rstrip("/")
    user = cfg.get("WP_USER") or cfg.get("WP_USERNAME")
    password = cfg.get("WP_APPLICATION_PASSWORD")

    context = "edit" if user and password else "view"
    used_public_fallback = False
    if password and len(password) < 24:
        print(
            "Attention : un mot de passe d’application WordPress fait 24 caractères "
            "(souvent affiché avec des espaces). Le tien semble tronqué — l’API renverra 401.",
            file=sys.stderr,
        )
    try:
        posts = fetch_posts(base, user, password, args.limit, context=context)
    except RuntimeError as e:
        msg = str(e)
        if "401" in msg and user and password:
            print(
                "Auth refusée (401) : identifiants refusés par WordPress — export en mode public (context=view).",
                file=sys.stderr,
            )
            print(
                "Causes fréquentes : mot de passe d’application incomplet ou expiré ; "
                "mauvais login ; mots de passe d’application désactivés sur le site.",
                file=sys.stderr,
            )
            used_public_fallback = True
            posts = fetch_posts(base, None, None, args.limit, context="view")
        else:
            print(msg, file=sys.stderr)
            return 1

    if used_public_fallback:
        print(
            "Corrige WP_APPLICATION_PASSWORD dans .secrets/.env (copier-coller les 24 caractères "
            "depuis Utilisateurs → Profil → Mots de passe d’application).",
            file=sys.stderr,
        )

    fieldnames = list(row_from_post(posts[0]).keys()) if posts else []
    rows = [row_from_post(p) for p in posts]

    out = open(args.output, "w", newline="", encoding="utf-8") if args.output else sys.stdout
    try:
        w = csv.DictWriter(out, fieldnames=fieldnames, extrasaction="ignore")
        w.writeheader()
        w.writerows(rows)
    finally:
        if args.output:
            out.close()
            print(f"Écrit : {args.output.resolve()}", file=sys.stderr)

    return 0


if __name__ == "__main__":
    raise SystemExit(main())
