"""
ppt_generator/pipeline.py
--------------------------
Lean courseware generation pipeline — no LangGraph.

Flow:
    1. plan_course   — single Sonnet call
    2. pad           — one targeted Sonnet call only if still short (<92% of target)
    3. qa_agent      — Haiku reviews plan; on FAIL calls fix_plan
    4. fix_plan      — Haiku returns targeted patch ops (rename/add/remove) applied in-place
    5. build_pptx    — deterministic PPTX builder (no Claude)
    6. qa_check      — fast post-build deterministic gate; pads + rebuilds if still short

Usage:
    from pipeline import run_pipeline
    result = run_pipeline(
        api_key       = "sk-...",
        outline       = outline_text,
        course_title  = "Public Speaking Course",
        duration_days = 1,
        output_path   = "output/generated.pptx",
    )
    # result keys: slide_plan, output_path, errors
"""

from __future__ import annotations

import json
import re
import sys
from collections import Counter
from pathlib import Path

import anthropic

sys.path.insert(0, str(Path(__file__).parent))
from engine.content_planner import plan_course, _DURATION_TARGETS
from engine.slide_builder   import build_presentation


# ═══════════════════════════════════════════════════════════════════════════════
# Pad (cheap top-up — only sends missing count, not full plan)
# ═══════════════════════════════════════════════════════════════════════════════

def _pad_to_target(
    slide_plan: list[dict],
    target:     int,
    outline:    str,
    api_key:    str,
) -> list[dict]:
    """
    One targeted Sonnet call that generates only the missing slides.
    Much cheaper than regenerating the full plan (old fix_slides approach).
    """
    needed = target - len(slide_plan)
    if needed <= 0:
        return slide_plan

    print(f"  [pad] {len(slide_plan)} slides, need {needed} more to reach {target}")

    existing_titles = [
        s.get("title") or s.get("module_title", "")
        for s in slide_plan
        if s.get("type") not in ("cover", "about_us", "syllabus", "ending")
    ]

    prompt = (
        f"A training course slide plan has {len(slide_plan)} slides but needs {target}.\n"
        f"Generate EXACTLY {needed} additional text_only slides to bridge the gap.\n\n"
        f"COURSE OUTLINE:\n{outline[:3000]}\n\n"
        f"EXISTING SLIDE TITLES (do NOT duplicate):\n"
        f"{json.dumps(existing_titles[:60], indent=2)}\n\n"
        f"RULES:\n"
        f"- EXACTLY {needed} slides — no more, no less.\n"
        f'- type: "text_only" with "bullets" (4-6 full sentences, 15-25 words each).\n'
        f"- Expand topics from the outline that deserve more depth.\n"
        f"- Do NOT duplicate any existing title.\n"
        f'- EVERY slide object MUST include a `notes` field — 80-150 words of\n'
        f"  trainer speaker-notes (second-person, conversational; expand the WHY\n"
        f"  behind the bullets with one example or analogy, one question to ask\n"
        f"  the room, and a pacing cue). NO EXCEPTIONS — a slide without notes\n"
        f"  is invalid and the response will be rejected.\n"
        f"- Return ONLY a JSON array, no markdown.\n\n"
        f"EXAMPLE SHAPE (notes is mandatory):\n"
        f'  {{"type":"text_only","title":"…","bullets":["…"],"notes":"…80-150 words…"}}'
    )

    client = anthropic.Anthropic(api_key=api_key)
    resp   = client.messages.create(
        model      = "claude-sonnet-4-6",
        max_tokens = 8192,
        messages   = [{"role": "user", "content": prompt}],
    )
    raw = resp.content[0].text.strip()
    raw = re.sub(r"^```(?:json)?\s*", "", raw)
    raw = re.sub(r"\s*```$",          "", raw)

    last_obj = raw.rfind("},")
    if last_obj > 0 and not raw.rstrip().endswith("]"):
        raw = raw[:last_obj + 1] + "\n]"

    try:
        new_slides = json.loads(raw)
        if isinstance(new_slides, dict):
            new_slides = new_slides.get("slides", [])
        new_slides = [s for s in new_slides if isinstance(s, dict)]
        print(f"  [pad] added {len(new_slides)} slides")

        # Safety net: the LLM occasionally drops the `notes` field on pad slides
        # even when the prompt mandates it. Synthesise a basic note from the
        # title + bullets so no slide reaches the builder blank.
        missing = 0
        for s in new_slides:
            if not (s.get("notes") or "").strip():
                s["notes"] = _synthesise_slide_notes(s)
                missing += 1
        if missing:
            print(f"  [pad] synthesised notes for {missing} slide(s) the LLM left blank")

        ending = [s for s in slide_plan if s.get("type") == "ending"]
        rest   = [s for s in slide_plan if s.get("type") != "ending"]
        return rest + new_slides + ending
    except Exception as e:
        print(f"  [pad] ERROR parsing response: {e}")
        return slide_plan


def _ensure_notes(slide_plan: list[dict]) -> tuple[list[dict], int]:
    """
    Walk the plan and fill in `notes` for any entry where the LLM left it blank.
    Returns (plan, filled_count). Safe to call multiple times along the pipeline
    (planner → QA fix → pad → build) — entries that already have notes are
    untouched.

    Also fills item-level notes inside overview_groups / groups entries — those
    expand into child slides and each child reads its own item.notes.
    """
    filled = 0
    for slide in slide_plan:
        if not (slide.get("notes") or "").strip():
            slide["notes"] = _synthesise_slide_notes(slide)
            filled += 1
        # Per-item notes for multi-slide builders
        items = slide.get("items") or []
        for item in items:
            if isinstance(item, dict) and not (item.get("notes") or "").strip():
                item["notes"] = _synthesise_item_notes_lite(slide, item)
                filled += 1
    return slide_plan, filled


def _synthesise_item_notes_lite(parent: dict, item: dict) -> str:
    """
    Lightweight item-notes synthesiser for pipeline-stage fill (slide_builder
    has a richer version that runs at build time as defence in depth). Kept
    parallel to slide_builder._synthesise_item_notes so the produced notes
    feel consistent regardless of which path filled them.
    """
    label = (item.get("label") or "").strip()
    bullets = item.get("bullets") or []
    parent_title = (parent.get("title") or "").strip()

    lines: list[str] = []
    if label and parent_title:
        lines.append(
            f"This card covers '{label}' as part of {parent_title}. "
            f"Walk delegates through the key points one at a time."
        )
    elif label:
        lines.append(
            f"This card covers '{label}'. "
            f"Walk delegates through the key points one at a time."
        )
    else:
        lines.append("Walk delegates through this card one bullet at a time.")
    if bullets:
        lines.append("Talking points:")
        for b in bullets[:4]:
            txt = (b if isinstance(b, str) else (b.get("text") or "")).strip()
            if txt:
                lines.append(f"  - {txt}")
    lines.append(
        f"Pause after the points and ask the room when they've seen "
        f"{(label or 'this').lower()} in practice — use one answer to anchor "
        f"the next slide. About 90 seconds on this card."
    )
    return "\n".join(lines)


def _synthesise_slide_notes(slide_dict: dict) -> str:
    """
    Deterministic fallback when an LLM-planned slide arrives without `notes`.

    Produces a 80-120 word trainer brief from the slide's title + first 3 bullets
    so the trainer never sees a blank notes pane. This is a backstop — the LLM
    is supposed to write proper notes; this only fires when it forgets.
    """
    title = (slide_dict.get("title") or slide_dict.get("module_title") or "").strip()
    bullets = slide_dict.get("bullets") or []
    points  = slide_dict.get("points")  or []
    paras   = slide_dict.get("paragraphs") or []

    talking_points: list[str] = []
    if bullets:
        for b in bullets[:3]:
            text = b if isinstance(b, str) else (b.get("text") or "")
            if text:
                talking_points.append(text.strip())
    elif points:
        for p in points[:3]:
            label = (p.get("label") or "").strip()
            text  = (p.get("text")  or "").strip()
            if label and text:
                talking_points.append(f"{label}: {text}")
            elif text:
                talking_points.append(text)
    elif paras:
        for para in paras[:2]:
            if isinstance(para, str) and para.strip():
                talking_points.append(para.strip())

    lines: list[str] = []
    if title:
        lines.append(
            f"This slide covers '{title}'. Open with the headline idea in your "
            f"own words, then take delegates through each point one at a time."
        )
    else:
        lines.append(
            "Open with the headline idea in your own words, then take delegates "
            "through each point one at a time."
        )
    if talking_points:
        lines.append("Talking points to cover:")
        for tp in talking_points:
            lines.append(f"  - {tp}")
    lines.append(
        "Pause after the points and ask the room how this connects to their own "
        "work — one volunteer example is enough to anchor the learning before "
        "moving on. Spend about 2-3 minutes on this slide."
    )
    return "\n".join(lines)


# ═══════════════════════════════════════════════════════════════════════════════
# Deterministic QA — fast free gate (runs before Haiku agent)
# ═══════════════════════════════════════════════════════════════════════════════

_STRUCTURAL_TYPES = {"cover", "about_us", "syllabus", "ending", "module_intro"}

def _qa_check(slide_plan: list[dict], target: int) -> list[str]:
    """
    Fast local quality checks. Returns issue strings (empty list = pass).
    Checks: slide count ≥ 90% of target, text-rich slides ≥ 50% of content slides.

    Text-rich = text_only + drill_down (drill_down is a specialised text slide).
    """
    issues = []
    actual = len(slide_plan)

    if target > 0 and actual < target * 0.90:
        pct = int(100 * actual / target)
        issues.append(f"slide_count: {actual}/{target} ({pct}%)")

    counts  = Counter(s.get("type") for s in slide_plan)
    content = actual - sum(counts.get(t, 0) for t in _STRUCTURAL_TYPES)
    if content > 0:
        text_rich = counts.get("text_only", 0) + counts.get("drill_down", 0)
        to_pct = text_rich / content
        if to_pct < 0.50:
            issues.append(
                f"type_mix: text_rich={text_rich}/{content} ({to_pct:.0%})"
            )

    return issues


# ═══════════════════════════════════════════════════════════════════════════════
# Deterministic drill_down title normaliser (no LLM cost)
# ═══════════════════════════════════════════════════════════════════════════════

def _normalize_drill_down_titles(slide_plan: list[dict]) -> list[dict]:
    """
    Give every drill_down slide a unique title of the form 'Parent: StepLabel'.
    Runs after planning so the LLM never needs to produce the combined title.
    """
    result = []
    for slide in slide_plan:
        if slide.get("type") == "drill_down" and slide.get("step_label"):
            parent = (slide.get("title") or slide.get("module_title") or "").strip()
            step   = slide["step_label"].strip()
            if parent and ": " not in parent:
                slide = dict(slide)
                slide["title"] = f"{parent}: {step}"
        result.append(slide)
    return result


# ═══════════════════════════════════════════════════════════════════════════════
# QA — hybrid deterministic + one targeted LLM call (missing topics only)
# ═══════════════════════════════════════════════════════════════════════════════

_QA_PASS_THRESHOLD = 80

_QA_DEDUCTIONS = {
    "missing_topic":  20,
    "poor_structure": 10,
    "thin_content":   10,
}

# Titles that are purely generic with no subject matter
_GENERIC_TITLE_RE = re.compile(
    r"^(introduction|overview|summary|conclusion|module\s*\d*|welcome|recap|review|"
    r"getting\s*started|course\s*intro(duction)?|day\s*\d+)$",
    re.IGNORECASE,
)


def _qa_score(issues: list[dict]) -> int:
    deduction = sum(_QA_DEDUCTIONS.get(i.get("code", ""), 10) for i in issues)
    return max(0, 100 - deduction)


def _det_thin_content(slide_plan: list[dict]) -> dict | None:
    """Deterministic: flag if >2 slides have purely generic titles."""
    generic = []
    for s in slide_plan:
        if s.get("type") not in ("module_intro", "text_only"):
            continue
        title = (s.get("title") or s.get("module_title") or "").strip()
        if _GENERIC_TITLE_RE.match(title):
            generic.append(title)
    if len(generic) > 2:
        examples = ", ".join(f'"{t}"' for t in generic[:3])
        return {"code": "thin_content", "detail": f"{len(generic)} slides have generic titles with no subject: {examples}"}
    return None


def _det_poor_structure(slide_plan: list[dict]) -> dict | None:
    """Deterministic: flag modules that have content slides but no module_intro."""
    intro_nums = {s.get("module_num") for s in slide_plan
                  if s.get("type") == "module_intro" and s.get("module_num")}
    content_nums = {s.get("module_num") for s in slide_plan
                    if s.get("type") not in ("cover", "about_us", "syllabus", "ending", "module_intro")
                    and s.get("module_num")}
    missing = sorted(content_nums - intro_nums)
    if missing:
        return {"code": "poor_structure", "detail": f"Module(s) {missing} have content slides but no module_intro"}
    return None


def _llm_missing_topics(slide_plan: list[dict], outline: str, api_key: str) -> list[dict]:
    """One targeted Sonnet call: check only for topics missing from outline coverage."""
    titles = [s.get("title") or s.get("module_title") or "" for s in slide_plan]
    titles_text = "\n".join(f"- {t}" for t in titles if t)

    prompt = (
        f"Course outline (first 2500 chars):\n{outline[:2500]}\n\n"
        f"All slide titles in the plan:\n{titles_text[:3000]}\n\n"
        "List ONLY topics explicitly named in the outline that have NO corresponding slide.\n"
        "Be strict — only flag topics that are truly absent, not just covered briefly.\n"
        'Return a JSON array: [{"code":"missing_topic","detail":"<topic> — <why missing>"}]\n'
        "Return [] if everything is covered."
    )
    try:
        client = anthropic.Anthropic(api_key=api_key)
        resp = client.messages.create(
            model="claude-sonnet-4-6",
            max_tokens=512,
            messages=[{"role": "user", "content": prompt}],
        )
        raw = resp.content[0].text.strip()
        raw = re.sub(r"^```(?:json)?\s*", "", raw)
        raw = re.sub(r"\s*```$",          "", raw)
        items = json.loads(raw)
        return [i for i in items if isinstance(i, dict) and i.get("code") == "missing_topic"]
    except Exception as e:
        print(f"  [qa-missing] ERROR: {e}")
        return []


def _qa_agent(
    slide_plan: list[dict],
    outline:    str,
    target:     int,
    api_key:    str,
) -> dict:
    """
    Hybrid QA: deterministic checks (thin_content, poor_structure) + one LLM call (missing_topic).
    No false positives from duplicate_titles or type_imbalance — those are never checked.
    """
    issues: list[dict] = []

    thin = _det_thin_content(slide_plan)
    if thin:
        issues.append(thin)

    struct = _det_poor_structure(slide_plan)
    if struct:
        issues.append(struct)

    missing = _llm_missing_topics(slide_plan, outline, api_key)
    issues.extend(missing)

    score = _qa_score(issues)
    if score >= _QA_PASS_THRESHOLD:
        verdict = "PASS"
    elif score >= 60:
        verdict = "WARN"
    else:
        verdict = "FAIL"

    print(f"  [qa] score={score}/100  issues={[i['code'] for i in issues]}")
    return {"verdict": verdict, "issues": issues, "score": score}


# ═══════════════════════════════════════════════════════════════════════════════
# Fix agent — targeted patch ops to repair QA-flagged issues
# ═══════════════════════════════════════════════════════════════════════════════

_FIX_SYSTEM = """\
You are a slide plan repair agent for AI-generated training course slides.
You receive a numbered slide plan (index, type, title), a list of QA issues, and the outline.
Return a JSON array of patch operations that fix EVERY listed issue.

Operation types:
  {"op":"rename",  "index":N, "title":"new specific title"}
  {"op":"add",     "after_index":N, "slide":{full slide dict}}
  {"op":"remove",  "index":N}

Fix rules by issue code:
- thin_content:  Rename generic module_intro or text_only titles to be specific and descriptive
  (e.g. "Introduction" → "Introduction to UK Employment Law", "Module 1" → "Module 1: Equality Act Overview").
  Keep the module number prefix if present.
- missing_topic:  Add 1-2 text_only slides with 4-5 detailed bullet sentences covering the
  missing topic. Insert at the most logical module position.
  Slide dict: {"type":"text_only","title":"...","bullets":["full sentence…","full sentence…"]}
- poor_structure:  Add any missing module_intro slides at the start of the relevant module.
  Slide dict: {"type":"module_intro","module_num":N,"module_title":"...","topics":["t1","t2","t3"]}

Return ONLY the JSON array — no markdown fences, no explanation.
"""


def _apply_patches(slide_plan: list[dict], patches: list[dict]) -> list[dict]:
    """Apply rename / add / remove patch operations returned by the fix agent."""
    plan = list(slide_plan)

    # Renames first (index-stable)
    for p in patches:
        if p.get("op") != "rename":
            continue
        idx = p.get("index", -1)
        if not (0 <= idx < len(plan)):
            continue
        s = dict(plan[idx])
        if "title" in s:
            s["title"] = p["title"]
        elif "module_title" in s:
            s["module_title"] = p["title"]
        else:
            s["title"] = p["title"]
        plan[idx] = s

    # Removes in reverse order so indices stay valid
    for p in sorted((p for p in patches if p.get("op") == "remove"),
                    key=lambda x: x.get("index", 0), reverse=True):
        idx = p.get("index", -1)
        if 0 <= idx < len(plan):
            plan.pop(idx)

    # Adds in reverse after_index order to preserve positions
    for p in sorted((p for p in patches if p.get("op") == "add"),
                    key=lambda x: x.get("after_index", 0), reverse=True):
        after = p.get("after_index", len(plan) - 1)
        slide = p.get("slide", {})
        if not slide:
            continue
        insert_at = min(after + 1, len(plan))
        plan.insert(insert_at, slide)

    return plan


def _fix_plan(
    slide_plan: list[dict],
    qa_issues:  list[dict],
    outline:    str,
    target:     int,
    api_key:    str,
) -> list[dict]:
    """
    Ask Haiku to return targeted patch operations fixing every QA issue,
    then apply them in-place. Falls back to _pad_to_target on any error.
    """
    if not qa_issues:
        return slide_plan

    # Compressed plan — index + type + title only (keeps token cost low)
    # Titles already normalized; use them directly.
    compressed = []
    for i, s in enumerate(slide_plan):
        stype = s.get("type", "")
        title = s.get("title") or s.get("module_title") or s.get("step_label") or stype
        compressed.append({"index": i, "type": stype, "title": title})

    issue_text = "\n".join(
        f"[{iss.get('code','?')}] {iss.get('detail','')}" for iss in qa_issues
    )

    prompt = (
        f"QA ISSUES TO FIX:\n{issue_text}\n\n"
        f"SLIDE PLAN ({len(slide_plan)} slides):\n"
        f"{json.dumps(compressed, indent=2)}\n\n"
        f"COURSE OUTLINE (first 4000 chars):\n{outline[:4000]}\n\n"
        f"Return patch operations as a JSON array fixing ALL issues above."
    )

    try:
        client = anthropic.Anthropic(api_key=api_key)
        resp = client.messages.create(
            model      = "claude-sonnet-4-6",
            max_tokens = 4096,
            system     = _FIX_SYSTEM,
            messages   = [{"role": "user", "content": prompt}],
        )
        raw = resp.content[0].text.strip()
        raw = re.sub(r"^```(?:json)?\s*", "", raw)
        raw = re.sub(r"\s*```$",          "", raw)
        patches = json.loads(raw)
        if isinstance(patches, dict):
            patches = patches.get("patches", patches.get("operations", []))
        print(f"  [fix] {len(patches)} patch operations from QA fix agent")
        fixed = _apply_patches(slide_plan, patches)
        print(f"  [fix] plan: {len(slide_plan)} → {len(fixed)} slides after patches")
        return fixed
    except Exception as e:
        print(f"  [fix] ERROR: {e} — falling back to pad")
        return _pad_to_target(slide_plan, target, outline, api_key)


# ═══════════════════════════════════════════════════════════════════════════════
# Main runner
# ═══════════════════════════════════════════════════════════════════════════════

def run_pipeline(
    api_key:           str,
    outline:           str,
    course_title:      str = "",
    target_slides:     int = 0,
    duration_days:     int = 0,
    output_path:       str = "",
    template_path:     str = "",
    bank_dir:          str = "",
    catalogue_path:    str = "",
    gold_path:         str = "",
    images_dir:        str = "",
    language:          str = "uk",
    progress_callback = None,   # callable(stage: int, pct: int, msg: str) | None
    plan_callback     = None,   # callable(slide_plan: list[dict]) | None — fires once when planning completes
) -> dict:
    """
    Build a complete training course PPTX from a plain-text outline.
    Returns dict with keys: slide_plan, output_path, errors.

    progress_callback(stage, pct, msg):
        stage  1 = Planning, 2 = QA Review, 3 = Building, 4 = Done
        pct    0-100 overall progress
        msg    human-readable status string
    """
    def _cb(stage: int, pct: int, msg: str):
        # Console encoding (Windows cp1252) can raise UnicodeEncodeError on
        # characters like ≥/≤/—.  Never let logging crash the build.
        try:
            print(msg)
        except UnicodeEncodeError:
            try:
                print(msg.encode("ascii", "replace").decode("ascii"))
            except Exception:
                pass
        if progress_callback:
            try:
                progress_callback(stage, pct, msg)
            except Exception:
                pass

    root = Path(__file__).parent.parent
    # Tagged-infographic bank: each shape has an "Item N" tag → deterministic
    # slot fill.  Catalogue lives in tagged_bank/.
    bank          = root / "tagged infographics" / "new labeled grouped infographics"
    catalogue_def = root / "tagged_bank" / "tagged_catalogue.json"

    if not target_slides and duration_days:
        target_slides = _DURATION_TARGETS.get(duration_days, 85)

    output_path    = output_path    or str(root / "ppt_generator" / "output" / "generated.pptx")
    template_path  = template_path  or str(root / "Template new.pptx")
    bank_dir       = bank_dir       or str(bank)
    catalogue_path = catalogue_path or str(catalogue_def)

    errors: list[str] = []

    # ── Stage 1: Plan (1 Sonnet call) ────────────────────────────────────────
    _cb(1, 5, "Planning slides with Claude…")
    try:
        slide_plan = plan_course(
            api_key       = api_key,
            outline       = outline,
            course_title  = course_title,
            target_slides = target_slides,
            duration_days = duration_days,
            language      = language,
        )
        _cb(1, 30, f"Slide plan ready — {len(slide_plan)} slides")
    except Exception as e:
        msg = f"plan_course failed: {e}"
        print(f"      ERROR: {msg}")
        return {"slide_plan": [], "output_path": output_path, "errors": [msg]}

    # Normalize drill_down titles deterministically so QA never sees duplicates
    slide_plan = _normalize_drill_down_titles(slide_plan)

    # Pad if obviously short
    if target_slides > 0 and len(slide_plan) < target_slides * 0.92:
        _cb(1, 32, f"Plan short ({len(slide_plan)}/{target_slides}) — topping up…")
        slide_plan = _pad_to_target(slide_plan, target_slides, outline, api_key)
        _cb(1, 38, f"Plan padded to {len(slide_plan)} slides")

    # Fire plan_callback so the UI can show the slide outline mid-build
    if plan_callback:
        try:
            plan_callback(slide_plan)
        except Exception as exc:
            print(f"  [warn] plan_callback raised: {exc!r}")

    # ── Stage 2: QA fix loop ──────────────────────────────────────────────────
    # Rule: score ≤ 1 → build.  score > 1 → fix and retry.
    # After QA_MAX_ITERATIONS, if score still > 1 → block build entirely.
    QA_MAX_ITERATIONS = 4
    _cb(2, 40, "Reviewing slide plan with QA agent…")

    qa_result = _qa_agent(slide_plan, outline, target_slides, api_key)
    qa_issues = qa_result.get("issues", [])
    score     = qa_result.get("score", _qa_score(qa_issues))
    print(f"  [qa] initial: score={score}, {len(qa_issues)} issue(s)")
    for iss in qa_issues:
        print(f"      [{iss.get('code','?')}] {iss.get('detail','')}")

    for qa_iter in range(QA_MAX_ITERATIONS):
        if score >= _QA_PASS_THRESHOLD:
            _cb(2, 50 + qa_iter, f"QA score {score}/100 — passed (≥{_QA_PASS_THRESHOLD}), building…")
            break

        _cb(2, 42 + qa_iter * 3,
            f"QA score {score}/100 (iter {qa_iter+1}/{QA_MAX_ITERATIONS}) — fixing {len(qa_issues)} issue(s)…")
        slide_plan = _fix_plan(slide_plan, qa_issues, outline, target_slides, api_key)
        slide_plan = _normalize_drill_down_titles(slide_plan)

        qa_result = _qa_agent(slide_plan, outline, target_slides, api_key)
        qa_issues = qa_result.get("issues", [])
        score     = qa_result.get("score", _qa_score(qa_issues))
        print(f"  [qa-loop] iter {qa_iter+1}: score={score}/100, {len(qa_issues)} issue(s)")
        for iss in qa_issues:
            print(f"      [{iss.get('code','?')}] {iss.get('detail','')}")

    # Block the build if score still below passing threshold
    if score < _QA_PASS_THRESHOLD:
        issue_summary = "; ".join(i.get("detail", i.get("code", "?")) for i in qa_issues)
        _cb(2, 56, f"QA score {score}/100 — below {_QA_PASS_THRESHOLD} after {QA_MAX_ITERATIONS} attempts, build blocked")
        return {
            "slide_plan":  slide_plan,
            "output_path": "",
            "errors":      [f"QA score {score}/100 after {QA_MAX_ITERATIONS} fix attempts. {issue_summary}"],
            "qa_verdict":  qa_result.get("verdict", "FAIL"),
            "qa_issues":   qa_issues,
        }

    _cb(2, 55, f"QA score {score}/100 — building PPTX…")

    # Notes guarantor: walk the final plan and synthesise notes for any entry
    # the LLM left blank (pad / QA fix paths can drop the field). Runs once
    # here so every slide that reaches the builder has trainer notes.
    slide_plan, _filled = _ensure_notes(slide_plan)
    if _filled:
        print(f"  [notes] synthesised notes for {_filled} slide(s) with empty notes field")

    # ── Stage 3: Build PPTX ───────────────────────────────────────────────────
    MAX_RETRIES = 2
    for attempt in range(MAX_RETRIES + 1):
        build_pct = 55 + attempt * 10
        _cb(3, build_pct, f"Building PPTX ({len(slide_plan)} slides)…")
        try:
            build_presentation(
                slide_plan         = slide_plan,
                output_path        = output_path,
                template_path      = template_path,
                bank_dir           = bank_dir,
                catalogue_path     = catalogue_path,
                gold_standard_path = gold_path,
                images_dir         = images_dir,
            )
        except Exception as e:
            msg = f"build_pptx failed: {e}"
            print(f"      ERROR: {msg}")
            errors.append(msg)
            break

        issues = _qa_check(slide_plan, target_slides)
        if not issues:
            _cb(3, 90, f"Build complete — {len(slide_plan)} slides assembled")
            break

        print(f"      post-build issues: {issues}")
        if attempt < MAX_RETRIES:
            _cb(3, build_pct + 5, "Post-build check found gaps — topping up…")
            slide_plan = _pad_to_target(slide_plan, target_slides, outline, api_key)
            slide_plan, _ = _ensure_notes(slide_plan)

    # ── Stage 4: Done ─────────────────────────────────────────────────────────
    counts = Counter(s.get("type") for s in slide_plan)
    _cb(4, 100, f"Done — {len(slide_plan)} slides generated")
    for t, n in sorted(counts.items()):
        print(f"  {t:24s}: {n}")
    print(f"Output: {output_path}")

    return {
        "slide_plan":  slide_plan,
        "output_path": output_path,
        "errors":      errors,
        "qa_verdict":  qa_result.get("verdict", "PASS"),
        "qa_issues":   qa_result.get("issues", []),
        "qa_score":    qa_result.get("score", 100),
    }