#!/usr/bin/env python3.12
import argparse
import datetime
import fcntl
import pathlib
import shutil
import subprocess
import sys
from typing import List, Tuple


def main() -> int:
    parser = argparse.ArgumentParser(description="Implement workflow runner")
    parser.add_argument("--batch", default="auto", help="Batch id or 'auto'")
    parser.add_argument("--concurrency", type=int, default=4, help="Worker count")
    parser.add_argument("--dry-run", action="store_true", help="Simulate without writing assets")
    parser.add_argument("--root", default=None, help="Override repository root (defaults to CWD)")
    parser.add_argument("--max-retries", type=int, default=1, help="Retries per site on failure")
    args = parser.parse_args()

    root = pathlib.Path(args.root).resolve() if args.root else pathlib.Path.cwd().resolve()
    registry = root / ".smbatcher" / "REGISTRY.md"
    lock_path = root / ".smbatcher" / "REGISTRY.lock"
    runs_dir = root / ".smbatcher" / "runs"
    runs_dir.mkdir(parents=True, exist_ok=True)
    log_path = runs_dir / f"implement-{datetime.datetime.now().strftime('%Y%m%dT%H%M%S')}.log"

    start_time = datetime.datetime.now()

    if args.dry_run:
        print("OK:dry_run:skipping writes")
        return 0

    batch_id = args.batch
    if batch_id == "auto":
        # Auto-detect AND claim next batch using find-next with locking
        # Exit codes: 0=found, 1=no work, 2=error
        import json as _json
        try:
            find_result = subprocess.run(
                [
                    str(root / "tools/shared/find-next.sh"),
                    "--registry", str(registry),
                    "--mode", "batch",
                    "--phase", "implement",
                    "--format", "json",
                    "--claim",  # Atomic find + claim with fcntl lock
                ],
                capture_output=True,
                text=True,
                check=False,
            )
            if find_result.returncode == 2:
                # Actual error - show output and exit
                print("[implement] Error during auto-detection:")
                print(find_result.stdout)
                if find_result.stderr:
                    print(find_result.stderr)
                return 2
            elif find_result.returncode == 0:
                result_data = _json.loads(find_result.stdout)
                batch_id = result_data.get("batch", "")
            else:
                # Exit 1 = no work available
                result_data = _json.loads(find_result.stdout) if find_result.stdout.strip() else {}
                batch_id = ""
        except Exception as e:
            print(f"[implement] Exception during auto-detection: {e}")
            batch_id = ""

        if not batch_id:
            print("FAIL:no_batch_ready:run with --batch <id>")
            return 1
        print(f"[implement] Auto-detected and claimed batch: {batch_id}")
    lock_result = subprocess.run(
        [str(root / "tools/implement/lock.sh"), batch_id],
        capture_output=True,
        text=True,
        check=False,
    )
    if lock_result.stdout:
        print(lock_result.stdout.strip())
    if lock_result.stderr:
        print(lock_result.stderr.strip(), file=sys.stderr)
    if lock_result.returncode != 0:
        return lock_result.returncode

    # Collect sites from registry (locked)
    domains: List[str] = []
    lock_path.parent.mkdir(parents=True, exist_ok=True)
    with lock_path.open("w") as lf:
        fcntl.flock(lf, fcntl.LOCK_EX)
        lines = registry.read_text().splitlines()
        for line in lines:
            if line.startswith("|") and "Domain" not in line and "--------" not in line:
                cols = [c.strip() for c in line.strip("|").split("|")]
                if len(cols) >= 6 and cols[4] == batch_id:
                    domains.append(cols[0])
        fcntl.flock(lf, fcntl.LOCK_UN)

    # Transition O→i→I→Q and generate outputs
    def update_status(status: str, subset: List[str]) -> None:
        lock_path.parent.mkdir(parents=True, exist_ok=True)
        with lock_path.open("w") as lf:
            fcntl.flock(lf, fcntl.LOCK_EX)
            current = registry.read_text().splitlines()
            new_lines = []
            now = datetime.datetime.now().isoformat(timespec="seconds")
            for ln in current:
                if ln.startswith("|") and "Domain" not in ln and "--------" not in ln:
                    cols = [c.strip() for c in ln.strip("|").split("|")]
                    if len(cols) >= 6 and cols[0] in subset:
                        cols[3] = status
                        cols[5] = now
                        ln = f"| {cols[0]} | {cols[1]} | {cols[2]} | {cols[3]} | {cols[4]} | {cols[5]} |"
                new_lines.append(ln)
            registry.write_text("\n".join(new_lines) + "\n")
            fcntl.flock(lf, fcntl.LOCK_UN)

    def cleanup_site(site_dir: pathlib.Path) -> None:
        for target in [
            site_dir / "index.html",
            site_dir / "styles.css",
            site_dir / "script.js",
            site_dir / "manifest.json",
        ]:
            if target.exists():
                target.unlink()
        assets_dir = site_dir / "assets"
        if assets_dir.exists():
            shutil.rmtree(assets_dir, ignore_errors=True)

    successes: List[str] = []
    failures: List[Tuple[str, str]] = []

    for domain in domains:
        candidates = list(root.glob(f"sites/{domain}-v1/DESIGN.md"))
        if not candidates:
            print(f"[implement] missing Design.md for {domain}; skipping and reverting to O")
            update_status("O", [domain])
            failures.append((domain, "missing-design"))
            continue
        design_path = candidates[0]
        site_dir = design_path.parent

        for attempt in range(args.max_retries + 1):
            try:
                update_status("i", [domain])
                subprocess.run([str(root / "tools/implement/generate.py"), str(design_path)], check=True)
                subprocess.run([str(root / "tools/shared/validate.py"), str(site_dir / "index.html")], check=True)
                subprocess.run([str(root / "tools/implement/check-outputs.py"), str(site_dir)], check=True)
                update_status("I", [domain])
                update_status("Q", [domain])
                successes.append(domain)
                break
            except subprocess.CalledProcessError as exc:
                cleanup_site(site_dir)
                if attempt >= args.max_retries:
                    print(f"[implement] failed for {domain} after retries; resetting to O")
                    update_status("O", [domain])
                    failures.append((domain, f"error-{exc.returncode}"))
                else:
                    next_attempt = attempt + 1
                    print(f"[implement] retrying {domain} (attempt {next_attempt}/{args.max_retries + 1}) after error {exc.returncode}")

    with open(log_path, "a", encoding="utf-8") as fh:
        fh.write(f"batch_id={batch_id}\n")
        fh.write(f"domains={','.join(domains)}\n")
        fh.write(f"successes={','.join(successes)}\n")
        fh.write(f"failures={';'.join([f'{d}:{r}' for d, r in failures])}\n")
        fh.write(f"duration_seconds={(datetime.datetime.now()-start_time).total_seconds():.2f}\n")

    if failures:
        print(f"WARN:partial_completion:{len(successes)} succeeded, {len(failures)} failed")
    print(f"OK:batch_complete:{batch_id} ({len(successes)} sites)")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())