"""
engine/template_inspector.py
----------------------------
Validates an uploaded PPTX against the 7-slide template contract before
generation runs.

The contract is role-based, NOT layout-uniform — each template keeps its
individual identity. We check that each of the 7 expected slide ROLES is
present and structured enough for the pipeline to populate.

Expected slide order (positional):
    1 Cover           - course intro page (wide title)
    2 About Us        - paragraph copied as-is
    3 Course Syllabus - title + N module slots
    4 Module Intro    - module title + N topic slots
    5 Content Layout  - wide title + wide body + chrome (gets replicated)
    6 Quiz            - question block (1 MCQ per slide, cloned 1-2 per module)
    7 Ending          - any closing copy (Congratulations / Contact Us / etc.)

Result envelope:
    {
        "status":   "pass" | "warn" | "fail",
        "summary":  "...",
        "meta":     { slide_count, aspect_ratio, brand_color, masters },
        "slides":   [ { num, role, status, reasons:[...], detected:{...} }, ... ],
    }
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

_EMU = 914_400
_LOGO_TOKENS = {"theknowledgeacademy", "the knowledge academy", "theknowledge"}

# Role spec — positional index → expected role + title keywords
_ROLE_SPEC = [
    {"role": "cover",        "title_kw": []},
    {"role": "about_us",     "title_kw": ["about us", "about"]},
    {"role": "syllabus",     "title_kw": ["syllabus", "course outline", "course contents", "contents"]},
    {"role": "module_intro", "title_kw": ["module", "chapter", "section"]},
    {"role": "content",      "title_kw": []},
    {"role": "quiz",         "title_kw": ["quiz", "question", "assessment", "knowledge check"]},
    {"role": "ending",       "title_kw": ["congratulations", "thank you", "contact us", "end", "summary"]},
]


# ── Public entry point ─────────────────────────────────────────────────────────

def inspect_template(tmpl_path: str | Path) -> dict[str, Any]:
    """
    Inspect a PPTX template and return a structured report.
    Never raises — wraps all errors and returns a fail envelope.
    """
    path = Path(tmpl_path)
    if not path.exists():
        return _fail_envelope(f"Template file not found: {path}")

    try:
        from pptx import Presentation
        prs = Presentation(str(path))
    except Exception as exc:
        return _fail_envelope(f"Could not open PPTX: {exc!r}")

    sw = prs.slide_width / _EMU
    sh = prs.slide_height / _EMU
    aspect = f"{sw:.2f}x{sh:.2f}"
    is_169 = abs((sw / sh) - (16 / 9)) < 0.05

    meta = {
        "slide_count":  len(prs.slides),
        "aspect_ratio": aspect,
        "is_16_9":      is_169,
        "masters":      len(prs.slide_masters),
        "brand_color":  _detect_brand_color(prs),
        "file_size_kb": round(path.stat().st_size / 1024, 1),
    }

    slide_reports: list[dict] = []

    # Meta-level hard fails
    if meta["slide_count"] != 7:
        slide_reports.append({
            "num":     0,
            "role":    "meta",
            "status":  "fail",
            "reasons": [f"Template must have exactly 7 slides (found {meta['slide_count']})"],
        })
    if not is_169:
        slide_reports.append({
            "num":     0,
            "role":    "meta",
            "status":  "fail",
            "reasons": [f"Template must be 16:9 (found {aspect})"],
        })

    # Per-slide role validation
    for idx, spec in enumerate(_ROLE_SPEC):
        if idx >= len(prs.slides):
            slide_reports.append({
                "num":     idx + 1,
                "role":    spec["role"],
                "status":  "fail",
                "reasons": [f"Slide {idx + 1} ({spec['role']}) is missing"],
                "detected": {},
            })
            continue

        slide = prs.slides[idx]
        report = _inspect_slide(slide, idx, spec, sw)
        slide_reports.append(report)

    overall = _aggregate_status(slide_reports)
    return {
        "status":  overall,
        "summary": _summarize(overall, slide_reports),
        "meta":    meta,
        "slides":  slide_reports,
    }


# ── Per-slide inspection ──────────────────────────────────────────────────────

def _inspect_slide(slide, idx: int, spec: dict, slide_w: float) -> dict:
    role = spec["role"]
    title_kw = spec["title_kw"]

    text_boxes = _collect_text_boxes(slide)
    auto_shapes = _collect_auto_shapes(slide)
    groups = _collect_groups(slide)

    title_text = _first_title_text(text_boxes)
    has_kw = any(kw in title_text.lower() for kw in title_kw) if title_kw else True

    reasons: list[str] = []
    detected = {
        "title_text":      title_text[:80],
        "text_box_count":  len(text_boxes),
        "auto_shape_count": len(auto_shapes),
        "group_count":     len(groups),
    }

    if role == "cover":
        wide_titles = [tb for tb in text_boxes if tb["cx"] > slide_w * 0.30]
        detected["wide_title_count"] = len(wide_titles)
        if not wide_titles:
            reasons.append("No wide title textbox found (cover needs a course title)")
        status = "fail" if not wide_titles else "pass"

    elif role == "about_us":
        body_blocks = [tb for tb in text_boxes if tb["cy"] > 0.5 and tb["cx"] > slide_w * 0.30]
        long_paragraphs = [tb for tb in text_boxes if len(tb["txt"]) > 40]
        any_text = any(tb["txt"] for tb in text_boxes if not _is_logo_text(tb["txt"]))
        detected["body_block_count"] = len(body_blocks)
        detected["paragraph_count"] = len(long_paragraphs)
        if not any_text:
            reasons.append("About-us slide appears empty — needs at least some copy")
            status = "fail"
        elif not has_kw and not long_paragraphs:
            reasons.append("No 'About Us' title and no long paragraph — verify this is the about-us page")
            status = "warn"
        elif not has_kw:
            reasons.append("Title doesn't say 'About Us' but content is present — accepting on body")
            status = "warn"
        else:
            status = "pass"

    elif role == "syllabus":
        if not has_kw:
            reasons.append("Title doesn't contain 'syllabus' / 'outline' / 'contents'")
        slots = _count_stacked_slots(text_boxes + groups, title_y_max=2.0)
        detected["module_slots"] = slots
        if slots < 3:
            reasons.append(f"Only {slots} module slots found (need at least 3 — recommend 4)")
        if not has_kw and slots < 3:
            status = "fail"
        elif slots < 3 or not has_kw:
            status = "warn"
        else:
            status = "pass"

    elif role == "module_intro":
        topic_slots = _count_stacked_slots(text_boxes, title_y_max=1.5)
        any_text = any(tb["txt"] for tb in text_boxes if not _is_logo_text(tb["txt"]))
        detected["topic_slots"] = topic_slots
        if not any_text:
            reasons.append("Module-intro slide appears empty — needs module title placeholder")
            status = "fail"
        elif topic_slots < 1:
            reasons.append("No discrete topic slots — single text block will receive all topics packed together")
            status = "warn"
        elif topic_slots < 3:
            reasons.append(f"Only {topic_slots} topic slot(s) — pipeline will fill what's available")
            status = "warn"
        else:
            status = "pass"

    elif role == "content":
        wide_title = next((tb for tb in text_boxes if tb["cx"] > slide_w * 0.45 and tb["y"] < 1.5), None)
        wide_body = next((tb for tb in text_boxes if tb["cx"] > slide_w * 0.45 and tb["y"] > 1.2 and tb["cy"] > 1.0), None)
        # Walk slide + layout + master for chrome — bands often live on the master
        chrome = _collect_chrome_shapes(slide)
        chrome_autos = chrome["autos"]
        chrome_wordmarks = chrome["wordmarks"]
        has_top_banner = any(s for s in chrome_autos if s["cx"] > slide_w * 0.85 and s["y"] < 1.5 and s["cy"] > 0.2)
        has_bottom_banner = any(s for s in chrome_autos if s["cx"] > slide_w * 0.85 and s["y"] > 5.5 and s["cy"] > 0.2)
        has_corner_accent = any(
            s for s in chrome_autos
            if (s["cx"] > slide_w * 0.25 and s["cy"] > 0.8)
            and (s["x"] < 0.3 or (s["x"] + s["cx"]) > slide_w - 0.3)
        )
        has_side_stripe = any(s for s in chrome_autos if s["cy"] > 5.0 and s["cx"] < slide_w * 0.25)
        has_wordmark = bool(chrome_wordmarks)
        has_any_chrome = has_top_banner or has_bottom_banner or has_corner_accent or has_side_stripe or has_wordmark
        detected["has_wide_title"] = bool(wide_title)
        detected["has_wide_body"] = bool(wide_body)
        detected["has_top_banner"] = has_top_banner
        detected["has_bottom_banner"] = has_bottom_banner
        detected["has_corner_accent"] = has_corner_accent
        detected["has_side_stripe"] = has_side_stripe
        detected["has_wordmark"] = has_wordmark

        if not wide_title:
            reasons.append("No wide title textbox (content slide needs a title placeholder)")
        if not wide_body:
            reasons.append("No wide body textbox (content slide needs a body placeholder)")
        if not has_any_chrome:
            reasons.append("No chrome (banner, corner accent, side stripe, or wordmark) — content slides will have no template framing")

        if not wide_title or not wide_body:
            status = "fail"
        elif not has_any_chrome:
            status = "warn"
        else:
            status = "pass"

    elif role == "quiz":
        if not has_kw:
            reasons.append("Title doesn't contain 'quiz' / 'question' / 'assessment'")
        prominent_block = next(
            (s for s in auto_shapes if s["cx"] > slide_w * 0.4 and s["cy"] > 2.0),
            None,
        )
        detected["has_question_block"] = bool(prominent_block)
        if not prominent_block:
            reasons.append("No prominent question container shape — quiz needs a block to hold the MCQ text")

        if not has_kw and not prominent_block:
            status = "fail"
        elif not has_kw or not prominent_block:
            status = "warn"
        else:
            status = "pass"

    elif role == "ending":
        if not title_text:
            reasons.append("No title text on the ending slide")
            status = "fail"
        elif not has_kw:
            reasons.append(f"Ending title is '{title_text[:40]}' — not a recognised ending phrase (will still work)")
            status = "warn"
        else:
            status = "pass"

    else:
        status = "warn"
        reasons.append(f"Unknown role: {role}")

    return {
        "num":     idx + 1,
        "role":    role,
        "status":  status,
        "reasons": reasons,
        "detected": detected,
    }


# ── Shape collectors ──────────────────────────────────────────────────────────

def _collect_text_boxes(slide) -> list[dict]:
    """
    Collect every shape carrying text — textboxes, auto-shapes with text,
    and shapes nested in groups. Design-heavy templates often put titles
    and body copy inside auto-shapes (rectangles) instead of plain textboxes.
    """
    out: list[dict] = []
    _walk_text_shapes(slide.shapes, out)
    return out


def _walk_text_shapes(shape_iter, out: list[dict]) -> None:
    for shape in shape_iter:
        st = _shape_type_str(shape)
        if st == "group":
            try:
                _walk_text_shapes(shape.shapes, out)
            except Exception:
                pass
            continue
        try:
            has_tf = shape.has_text_frame
        except Exception:
            has_tf = False
        if not has_tf:
            continue
        rect = _shape_rect(shape)
        if rect is None:
            continue
        try:
            txt = shape.text_frame.text.strip()
        except Exception:
            txt = ""
        out.append({**rect, "txt": txt})


def _collect_auto_shapes(slide) -> list[dict]:
    """Auto shapes + freeforms, walking into groups recursively."""
    out: list[dict] = []
    _walk_shapes(slide.shapes, out, want={"auto_shape", "freeform"})
    return out


def _collect_chrome_shapes(slide) -> dict[str, list[dict]]:
    """
    Collect chrome candidates from slide + its layout + its master.
    Many templates put their brand bands/stripes/wordmarks on the master
    so they appear on every slide. Without walking master shapes the
    chrome check produces false negatives.
    """
    autos: list[dict] = []
    wordmarks: list[dict] = []

    sources = [slide.shapes]
    try:
        sources.append(slide.slide_layout.shapes)
        sources.append(slide.slide_layout.slide_master.shapes)
    except Exception:
        pass

    for src in sources:
        _walk_shapes(src, autos, want={"auto_shape", "freeform"})
        _walk_wordmarks(src, wordmarks)

    return {"autos": autos, "wordmarks": wordmarks}


def _walk_wordmarks(shape_iter, out: list[dict]) -> None:
    for shape in shape_iter:
        st = _shape_type_str(shape)
        if st == "group":
            try:
                _walk_wordmarks(shape.shapes, out)
            except Exception:
                pass
            continue
        try:
            has_tf = shape.has_text_frame
        except Exception:
            has_tf = False
        if not has_tf:
            continue
        try:
            txt = shape.text_frame.text.strip()
        except Exception:
            continue
        if not _is_logo_text(txt):
            continue
        rect = _shape_rect(shape)
        if rect is None:
            continue
        out.append({**rect, "txt": txt})


def _collect_groups(slide) -> list[dict]:
    out = []
    for shape in slide.shapes:
        if _shape_type_str(shape) != "group":
            continue
        rect = _shape_rect(shape)
        if rect is None:
            continue
        out.append({**rect, "txt": ""})
    return out


def _walk_shapes(shape_iter, out: list[dict], want: set[str]) -> None:
    for shape in shape_iter:
        st = _shape_type_str(shape)
        if st == "group":
            try:
                _walk_shapes(shape.shapes, out, want)
            except Exception:
                pass
            continue
        if st not in want:
            continue
        rect = _shape_rect(shape)
        if rect is None:
            continue
        out.append(rect)


def _shape_rect(shape) -> dict | None:
    try:
        return {
            "x":  shape.left / _EMU,
            "y":  shape.top / _EMU,
            "cx": shape.width / _EMU,
            "cy": shape.height / _EMU,
        }
    except Exception:
        return None


def _shape_type_str(shape) -> str:
    try:
        st = int(shape.shape_type)
    except Exception:
        return "unknown"
    return {1: "auto_shape", 5: "freeform", 6: "group", 13: "picture", 17: "text_box"}.get(st, "unknown")


# ── Heuristics ────────────────────────────────────────────────────────────────

def _first_title_text(text_boxes: list[dict]) -> str:
    """Return text of the topmost non-logo textbox (likely the slide title)."""
    candidates = [tb for tb in text_boxes if tb["txt"] and not _is_logo_text(tb["txt"])]
    if not candidates:
        return ""
    candidates.sort(key=lambda tb: tb["y"])
    return candidates[0]["txt"]


def _is_logo_text(txt: str) -> bool:
    low = txt.lower().replace(" ", "")
    return any(tok.replace(" ", "") in low for tok in _LOGO_TOKENS)


def _count_stacked_slots(shapes: list[dict], title_y_max: float, min_y: float | None = None) -> int:
    """
    Count repeating slot rows stacked vertically below a title.
    Looks for shapes whose Y differs by > 0.4in to dedupe overlapping decorations.
    """
    candidates = [s for s in shapes if s["y"] > title_y_max]
    if min_y is not None:
        candidates = [s for s in candidates if s["y"] >= min_y or s["y"] > title_y_max]
    candidates.sort(key=lambda s: s["y"])
    rows: list[float] = []
    for s in candidates:
        if not rows or abs(s["y"] - rows[-1]) > 0.4:
            rows.append(s["y"])
    return len(rows)


def _detect_brand_color(prs) -> str:
    """Best-effort: pull the first non-grey solid fill we encounter."""
    for slide in prs.slides:
        for shape in slide.shapes:
            try:
                fill = shape.fill
                if fill.type is None:
                    continue
                rgb = fill.fore_color.rgb
                if rgb is None:
                    continue
                hexcol = str(rgb).upper()
                if hexcol in {"FFFFFF", "000000"} or hexcol.startswith(("EE", "F0", "F5")):
                    continue
                return hexcol
            except Exception:
                continue
    return "44318D"


# ── Aggregation / envelopes ───────────────────────────────────────────────────

def _aggregate_status(reports: list[dict]) -> str:
    if any(r["status"] == "fail" for r in reports):
        return "fail"
    if any(r["status"] == "warn" for r in reports):
        return "warn"
    return "pass"


def _summarize(overall: str, reports: list[dict]) -> str:
    fails = [r for r in reports if r["status"] == "fail"]
    warns = [r for r in reports if r["status"] == "warn"]
    if overall == "pass":
        return "Template passes all 7-slide structure checks."
    if overall == "warn":
        roles = ", ".join(r["role"] for r in warns)
        return f"Template usable with caveats on: {roles}"
    roles = ", ".join(r["role"] for r in fails)
    return f"Template needs fixes on: {roles}"


def _fail_envelope(msg: str) -> dict:
    return {
        "status":  "fail",
        "summary": msg,
        "meta":    {},
        "slides":  [],
    }


# ── CLI entry ─────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import json
    import sys

    if len(sys.argv) < 2:
        print("Usage: python -m ppt_generator.engine.template_inspector <path.pptx>")
        sys.exit(1)

    report = inspect_template(sys.argv[1])
    print(json.dumps(report, indent=2))