import os

cmass_dirs = [d for d in os.listdir("/Volumes/Scratch/Sites") if d.startswith("CMass")]

base_path = "/Volumes/Scratch/Sites"

def check_site(site_path):
    issues = []
    
    # Check index.html
    index_path = os.path.join(site_path, "index.html")
    design_path = os.path.join(site_path, "DESIGN.md")
    
    if not os.path.exists(index_path):
        if os.path.exists(design_path):
            return ["Design only (missing index.html)"]
        else:
            return ["Completely empty/missing index.html"]
            
    size = os.path.getsize(index_path)
    if size < 500:
        issues.append(f"index.html very small ({size} bytes)")
    
    try:
        with open(index_path, 'r', encoding='utf-8') as f:
            content = f.read().lower()
            if "todo" in content:
                issues.append("Contains TODO")
            if "lorem ipsum" in content:
                issues.append("Contains Lorem Ipsum")
            
            # Check for body content properly
            body_start = content.find("<body")
            body_end = content.find("</body>")
            if body_start == -1 or body_end == -1:
                issues.append("Missing body tags")
            else:
                body_content = content[body_start:body_end]
                # Filter out scripts and styles within body if any
                if len(body_content) < 100:
                    issues.append("Body seems nearly empty")
    except Exception as e:
        issues.append(f"Error reading index.html: {str(e)}")

    # Check styles.css
    css_path = os.path.join(site_path, "styles.css")
    if not os.path.exists(css_path):
        issues.append("Missing styles.css")
    else:
        size = os.path.getsize(css_path)
        if size < 100:
            issues.append(f"styles.css very small ({size} bytes)")

    return issues

all_results = {}

for cmass in cmass_dirs:
    cmass_path = os.path.join(base_path, cmass)
    sites_dir = os.path.join(cmass_path, "sites")
    if cmass == "CMass0":
        sites_dir = os.path.join(cmass_path, "tools", "sites")
        
    if not os.path.exists(sites_dir):
        continue
        
    cmass_results = []
    for site_name in os.listdir(sites_dir):
        site_path = os.path.join(sites_dir, site_name)
        if os.path.isdir(site_path):
            issues = check_site(site_path)
            if issues:
                cmass_results.append((site_name, issues))
    
    if cmass_results:
        all_results[cmass] = cmass_results

# Summary report
print("HEALTH CHECK SUMMARY")
print("====================")
total_broken = 0
for cmass in sorted(all_results.keys()):
    sites = all_results[cmass]
    total_broken += len(sites)
    print(f"\n{cmass}: {len(sites)} sites with issues")
    # Categorize
    design_only = [s for s, i in sites if i == ["Design only (missing index.html)"]]
    others = [(s, i) for s, i in sites if i != ["Design only (missing index.html)"]]
    
    if design_only:
        print(f"  - Design only: {len(design_only)} sites")
        if len(design_only) < 10:
            print(f"    ({', '.join(design_only)})")
        else:
            print(f"    (e.g., {', '.join(design_only[:5])}...)")
            
    if others:
        print(f"  - Other issues: {len(others)} sites")
        for s, i in others[:20]: # Show up to 20
            print(f"    * {s}: {', '.join(i)}")
        if len(others) > 20:
            print(f"    ... and {len(others)-20} more")

print(f"\nTOTAL SITES WITH ISSUES: {total_broken}")
