#!/usr/bin/env python3
"""
Documentation coverage checker for xrpld.

Parses coverxygen LCOV output, compares against per-module thresholds
defined in .github/doc-coverage-thresholds.json, and generates a
markdown report suitable for posting as a PR comment.

Usage:
    python3 doc-coverage-check.py \
        --lcov-file doc-coverage.info \
        --threshold-file .github/doc-coverage-thresholds.json \
        --output doc-coverage-report.md \
        [--base-lcov-file base-doc-coverage.info]
"""

import argparse
import json
import re
import sys
from collections import defaultdict
from pathlib import Path


def parse_lcov(lcov_path: str) -> dict[str, dict[str, int]]:
    """Parse LCOV-format file into per-file coverage data.

    Returns a dict mapping file paths to {"documented": N, "total": N}.
    """
    coverage = {}
    current_file = None
    documented = 0
    total = 0

    with open(lcov_path) as f:
        for line in f:
            line = line.strip()
            if line.startswith("SF:"):
                current_file = line[3:]
                documented = 0
                total = 0
            elif line.startswith("DA:"):
                parts = line[3:].split(",")
                if len(parts) >= 2:
                    total += 1
                    if int(parts[1]) > 0:
                        documented += 1
            elif line == "end_of_record":
                if current_file:
                    coverage[current_file] = {
                        "documented": documented,
                        "total": total,
                    }
                current_file = None

    return coverage


def compute_module_coverage(
    coverage: dict[str, dict[str, int]],
    module_prefixes: list[str],
) -> dict[str, dict[str, int | float]]:
    """Aggregate file-level coverage into module-level stats."""
    modules = {}
    for prefix in module_prefixes:
        doc = 0
        tot = 0
        for filepath, stats in coverage.items():
            if filepath.startswith(prefix) or f"/{prefix}" in filepath:
                doc += stats["documented"]
                tot += stats["total"]
        pct = (doc / tot * 100) if tot > 0 else 0.0
        modules[prefix] = {"documented": doc, "total": tot, "percent": round(pct, 1)}
    return modules


def compute_global_coverage(
    coverage: dict[str, dict[str, int]],
) -> dict[str, int | float]:
    """Compute overall coverage across all files."""
    doc = sum(s["documented"] for s in coverage.values())
    tot = sum(s["total"] for s in coverage.values())
    pct = (doc / tot * 100) if tot > 0 else 0.0
    return {"documented": doc, "total": tot, "percent": round(pct, 1)}


def check_ratchet(
    current: dict[str, dict[str, int | float]],
    base: dict[str, dict[str, int | float]] | None,
    current_global: dict[str, int | float],
    base_global: dict[str, int | float] | None,
) -> list[str]:
    """Check that no module or global coverage decreased vs base branch."""
    violations = []

    if base_global and current_global["percent"] < base_global["percent"]:
        violations.append(
            f"Global coverage decreased: {base_global['percent']}% -> "
            f"{current_global['percent']}%"
        )

    if base:
        for module, stats in current.items():
            if module in base and stats["percent"] < base[module]["percent"]:
                violations.append(
                    f"`{module}` coverage decreased: "
                    f"{base[module]['percent']}% -> {stats['percent']}%"
                )

    return violations


def check_new_files(
    coverage: dict[str, dict[str, int]],
    new_files: list[str],
    min_coverage: int,
) -> list[str]:
    """Check that new files meet minimum documentation coverage."""
    violations = []
    for filepath in new_files:
        for covered_path, stats in coverage.items():
            if filepath in covered_path or covered_path.endswith(filepath):
                if stats["total"] > 0:
                    pct = stats["documented"] / stats["total"] * 100
                    if pct < min_coverage:
                        violations.append(
                            f"`{filepath}` has {pct:.0f}% doc coverage "
                            f"(minimum {min_coverage}%)"
                        )
                break
    return violations


def coverage_emoji(pct: float) -> str:
    if pct >= 80:
        return "+"
    if pct >= 50:
        return "~"
    return "-"


def generate_report(
    global_stats: dict[str, int | float],
    module_stats: dict[str, dict[str, int | float]],
    thresholds: dict,
    violations: list[str],
    new_file_violations: list[str],
) -> str:
    """Generate a markdown report for the PR comment."""
    lines = []
    lines.append("## Documentation Coverage Report")
    lines.append("")

    passed = not violations and not new_file_violations
    status = "PASSED" if passed else "FAILED"
    lines.append(f"**Status:** {status}")
    lines.append(
        f"**Global Coverage:** {global_stats['percent']}% "
        f"({global_stats['documented']}/{global_stats['total']} entities documented)"
    )
    lines.append(
        f"**Minimum Threshold:** {thresholds.get('global_minimum', 0)}%"
    )
    lines.append("")

    if violations or new_file_violations:
        lines.append("### Violations")
        lines.append("")
        for v in violations + new_file_violations:
            lines.append(f"- {v}")
        lines.append("")

    lines.append("### Module Coverage")
    lines.append("")
    lines.append("| Module | Coverage | Documented | Total | Threshold |")
    lines.append("|--------|----------|------------|-------|-----------|")

    module_thresholds = thresholds.get("module_thresholds", {})
    for module in sorted(module_stats.keys()):
        stats = module_stats[module]
        threshold = module_thresholds.get(module, 0)
        emoji = coverage_emoji(stats["percent"])
        lines.append(
            f"| `{module}` | {stats['percent']}% | "
            f"{stats['documented']} | {stats['total']} | {threshold}% |"
        )

    lines.append("")
    lines.append(
        "*Coverage measured by [coverxygen](https://github.com/psycofdj/coverxygen). "
        "See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
        "for documentation guidelines.*"
    )

    return "\n".join(lines)


def main():
    parser = argparse.ArgumentParser(description="Check documentation coverage")
    parser.add_argument("--lcov-file", required=True, help="Path to LCOV coverage file")
    parser.add_argument(
        "--threshold-file", required=True, help="Path to thresholds JSON"
    )
    parser.add_argument("--output", required=True, help="Path to write markdown report")
    parser.add_argument(
        "--base-lcov-file", default=None, help="Path to base branch LCOV file"
    )
    parser.add_argument(
        "--new-files",
        default="",
        help="Comma-separated list of new C++ files in this PR",
    )
    args = parser.parse_args()

    with open(args.threshold_file) as f:
        thresholds = json.load(f)

    coverage = parse_lcov(args.lcov_file)
    module_prefixes = list(thresholds.get("module_thresholds", {}).keys())
    module_stats = compute_module_coverage(coverage, module_prefixes)
    global_stats = compute_global_coverage(coverage)

    base_coverage = None
    base_module_stats = None
    base_global_stats = None
    if args.base_lcov_file and Path(args.base_lcov_file).exists():
        base_coverage = parse_lcov(args.base_lcov_file)
        base_module_stats = compute_module_coverage(base_coverage, module_prefixes)
        base_global_stats = compute_global_coverage(base_coverage)

    violations = []

    if global_stats["percent"] < thresholds.get("global_minimum", 0):
        violations.append(
            f"Global coverage {global_stats['percent']}% is below minimum "
            f"{thresholds['global_minimum']}%"
        )

    for module, threshold in thresholds.get("module_thresholds", {}).items():
        if module in module_stats and module_stats[module]["percent"] < threshold:
            violations.append(
                f"`{module}` coverage {module_stats[module]['percent']}% is below "
                f"threshold {threshold}%"
            )

    if thresholds.get("ratchet_mode") == "no_decrease":
        violations.extend(
            check_ratchet(
                module_stats, base_module_stats, global_stats, base_global_stats
            )
        )

    new_file_violations = []
    if args.new_files:
        new_files = [f.strip() for f in args.new_files.split(",") if f.strip()]
        new_file_min = thresholds.get("new_file_minimum", 80)
        new_file_violations = check_new_files(coverage, new_files, new_file_min)

    report = generate_report(
        global_stats, module_stats, thresholds, violations, new_file_violations
    )

    with open(args.output, "w") as f:
        f.write(report)

    print(report)

    if violations or new_file_violations:
        print(f"\nFAILED: {len(violations) + len(new_file_violations)} violation(s)")
        sys.exit(1)
    else:
        print("\nPASSED: All coverage thresholds met")
        sys.exit(0)


if __name__ == "__main__":
    main()