#!/usr/bin/env python3
import fcntl
import sys
import pathlib
import datetime
import re
from typing import List, Dict

# Add root directory to path for imports
_current_root = pathlib.Path(sys.argv[1])
if str(_current_root) not in sys.path:
    sys.path.insert(0, str(_current_root))

root = pathlib.Path(sys.argv[1])
registry_path = pathlib.Path(sys.argv[2])
batch_dir = pathlib.Path(sys.argv[3])
sites_dir = pathlib.Path(sys.argv[4])
sites_raw = sys.argv[5]
batch_size = int(sys.argv[6])
manual_batch_id = sys.argv[7]

now = datetime.datetime.now().isoformat(timespec="seconds")
lock_path = registry_path.parent / "REGISTRY.lock"


def next_batch_id() -> str:
    if manual_batch_id:
        return manual_batch_id
    ids = []
    for path in batch_dir.glob("batch-*-design.md"):
        m = re.search(r"batch-(\d+)-design\.md", path.name)
        if m:
            ids.append(int(m.group(1)))
    return f"{(max(ids) + 1) if ids else 1:03d}"


def parse_registry() -> Dict[str, List[str]]:
    rows = {}
    if registry_path.exists():
        lines = registry_path.read_text().splitlines()
        for line in lines:
            if line.startswith("|") and "Domain" not in line and "--------" not in line:
                cols = [c.strip() for c in line.strip("|").split("|")]
                if len(cols) >= 6:
                    domain, title, desc, status, batch, updated = cols[:6]
                    rows[domain] = [domain, title, desc, status or "-", batch or "-", updated]
    return rows


def write_registry(rows: Dict[str, List[str]]):
    header = [
        "# SMaking Sites Registry (template)\n",
        "\n",
        "| Domain | Title | Description | Status | Batch | Updated |\n",
        "|--------|-------|-------------|--------|-------|---------|\n",
    ]
    body = [f"| {r[0]} | {r[1]} | {r[2]} | {r[3]} | {r[4]} | {r[5]} |\n" for r in sorted(rows.values(), key=lambda r: r[0])]
    footer = [
        "\n",
        "Status codes: none/- → B → d → D → O → i → I → Q\n",
        "\n",
        "Notes:\n",
        "- Status \"-\" means registered but not in design batch.\n",
        "- Design phases use B/d/D; implement phases use O/i/I/Q.\n",
        "- Update timestamps on each transition.\n",
    ]
    registry_path.write_text("".join(header + body + footer))


entries = []
for item in sites_raw.split(","):
    parts = item.split(":")
    if len(parts) < 2:
        continue
    domain = parts[0]
    title = parts[1]
    desc = parts[2] if len(parts) > 2 else ""
    entries.append((domain, title, desc))

entries = entries[:batch_size]
batch_id = next_batch_id()

lock_path.parent.mkdir(parents=True, exist_ok=True)
with lock_path.open("w") as lf:
    fcntl.flock(lf, fcntl.LOCK_EX)
    try:
        rows = parse_registry()
        for domain, title, desc in entries:
            if domain not in rows:
                rows[domain] = [domain, title, desc, "-", "-", now]
            rows[domain][3] = "B"
            rows[domain][4] = batch_id
            rows[domain][5] = now
        write_registry(rows)
    finally:
        fcntl.flock(lf, fcntl.LOCK_UN)

tracker_path = batch_dir / f"batch-{batch_id}-design.md"
tracker_header = [
    f"# Design Batch {batch_id}\n",
    f"**Created**: {now}\n",
    "**Phase**: design\n",
    "\n",
    "| Directory | Domain | Status | Worker | Start | End | Notes |\n",
    "|-----------|--------|--------|--------|-------|-----|-------|\n",
]

tracker_rows = []
for domain, title, desc in entries:
    site_dir = sites_dir / f"{domain}-v1"
    site_dir.mkdir(parents=True, exist_ok=True)
    tracker_rows.append(f"| {site_dir.relative_to(root)} | {domain} | B | - | - | - | Registered |\n")

tracker_path.write_text("".join(tracker_header + tracker_rows))

# Similarity check (best-effort)
design_files = list(root.glob("sites/**/DESIGN.md"))
if design_files:
    try:
        from tools.shared import similarity  # type: ignore
    except Exception:
        sys.path.append(str(root / "tools/shared"))
        import similarity  # type: ignore
    texts = []
    for fp in design_files:
        try:
            texts.append(fp.read_text(encoding="utf-8"))
        except Exception:
            pass
    ms = similarity.max_similarity(texts) if texts else 0.0
    print(f"max_similarity_existing={ms:.4f}")

if not entries:
    print("ERROR:no_entries:no valid sites to batch")
    sys.exit(2)
print(f"OK:batch_created:{batch_id} ({len(entries)} sites)")
