"""
engine/slide_builder.py
------------------------
Assembles a full PPTX from a typed slide plan (output of content_planner.py).

ARCHITECTURE (based on gold standard human course analysis):
─────────────────────────────────────────────────────────────────────────────
The template content slide is ALWAYS the base for every content slide.
Bank infographic slides are NOT cloned wholesale — their GROUP SHAPES are
extracted and placed in the lower zone of the template content slide.
This ensures the template's chrome (watermarks, corner accents, accent bands)
is present on every slide, making the deck look cohesive.

Per-type build strategy:
  cover, about_us, syllabus, module_intro, ending
      → clone from template, replace text content

  text_only (paragraphs)   → template content slide + full-height prose body
  text_only (points)       → template content slide + inline bold:regular paragraphs
  text_only (bullets)      → template content slide + checkmark bullet body

  groups                   → template content slide + short intro body
                             + bank GROUP SHAPES extracted into lower zone
                             + color-harmonised to template brand color

  overview_groups          → same as groups (labels only, no descriptions)
  drill_down               → template content slide + navy step bar + bullet body

Template slide index mapping (0-based):
    0 = Cover
    1 = About Us
    2 = Syllabus
    3 = Module Intro
    4 = Content (the base for all content slides)
    5 = Ending
"""

from __future__ import annotations

import copy
import json
import math
import re
from pathlib import Path

from pptx.oxml.ns import qn

from engine.slide_cloner    import SlideCloner
from engine.text_replacer   import _collect_slots, _set_text
from engine.slide_selector  import select_slide
from engine.template_analyzer import (
    analyze_template, apply_chrome, body_cx_for_text_only, title_cx_for_layout,
)

_EMU = 914_400

_NS_P = "http://schemas.openxmlformats.org/presentationml/2006/main"
_NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"

_LOGO_TOKENS = {"theknowledgeacademy", "the knowledge academy"}
_IMG_EXTS    = {".gif", ".png", ".jpg", ".jpeg"}

# Default template slide indices (overridden per-build by dna["slide_indices"])
_TMPL_COVER        = 0
_TMPL_ABOUT_US     = 1
_TMPL_SYLLABUS     = 2
_TMPL_MODULE_INTRO = 3
_TMPL_CONTENT      = 4
_TMPL_ENDING       = 5

# Image noise words stripped when extracting keywords from image filenames
_IMG_NOISE = {
    "pana", "bro", "amico", "cuate", "rafiki", "removebg", "preview",
    "dazzle", "dizzy", "juicy", "techny", "sammy", "flame", "beam",
    "pixeltrue", "with", "from", "your", "that", "this", "have",
    "hand", "woman", "girl", "person", "people", "man", "guy",
}
_TECH_IMAGE_SIGNALS = {
    "java", "javascript", "python", "css", "html", "code", "coding",
    "programming", "backend", "frontend", "vscode", "github", "gitlab",
    "docker", "kubernetes", "devops", "software", "developer", "development",
    "api", "modeling", "database", "cloud", "debugging", "icons8",
    "techny", "control", "panel", "operating", "system", "prototyping",
    "scrum", "segmentation", "interaction", "testing",
}
_SOCIAL_MARKETING_IMAGE_SIGNALS = {
    "social", "marketing", "ecommerce", "seo", "influencer", "targeting",
    "advertising", "online ads", "online shop", "online store", "qr code",
    "qr", "megaphone", "dashboard", "site stats", "website", "web designer",
    "landing page", "mobile marketing", "content marketing", "digital marketing",
    "clothing store", "blogging", "subscriber", "online clothing",
}
_TECH_COURSE_SIGNALS = {
    "software", "programming", "coding", "development", "web", "python",
    "java", "javascript", "html", "css", "data", "cloud", "devops",
    "technology", "it ", " it ", "cyber", "database", "network",
    "machine learning", "artificial intelligence", "ai ", " ai",
}
_MARKETING_COURSE_SIGNALS = {
    "marketing", "advertising", "digital marketing", "social media",
    "seo", "content marketing", "ecommerce", "e-commerce", "brand",
    "campaign", "promotion", "sales funnel", "copywriting",
}

# Soft-skill / communication / leadership courses don't share vocabulary with
# our image-bank filenames (which lean toward "presentation", "lesson",
# "meeting", "business"). Without this, the picker has no good fallback and
# falls through to alphabetical-unused — which puts sci-fi/robot images on a
# public-speaking course. When the course title hits one of these signals,
# we INJECT the matching image keywords into course_words so the existing
# fb_score logic surfaces sensible visuals.
_SOFT_SKILL_COURSE_SIGNALS = {
    "public speaking", "presentation", "communication", "communicating",
    "leadership", "negotiation", "negotiating", "conflict",
    "feedback", "coaching", "mentoring", "facilitation", "facilitating",
    "interview", "interviewing", "delegation", "delegating",
    "stakeholder", "influence", "influencing", "persuasion",
    "team", "teamwork", "collaboration", "collaborating",
    "emotional intelligence", "assertiveness", "active listening",
    "storytelling", "speaking", "speech", "presenting",
    "trainer", "training", "workshop", "facilitator",
}

# Image keywords to prefer when the course is detected as soft-skill.
# These match real filenames in images/: Presentation*, Lesson*, Business*,
# Documents*, Animation*, Business support*, Business mission*, Business Plan*
_SOFT_SKILL_PREFERRED_IMG_KEYWORDS = {
    "presentation", "lesson", "business", "support", "mission",
    "plan", "animation", "documents", "instant", "information",
    "blogging",
}


# ═══════════════════════════════════════════════════════════════════════════════
# Public entry point
# ═══════════════════════════════════════════════════════════════════════════════

def build_presentation(
    slide_plan: list[dict],
    output_path: str,
    template_path: str,
    bank_dir: str,
    catalogue_path: str,
    gold_standard_path: str = "",
    images_dir: str = "",
    api_key: str = "",
) -> str:
    """
    Build a PPTX from *slide_plan* and save to *output_path*.
    api_key is passed to analyze_template for one-shot LLM zone validation.
    """
    bank_dir  = Path(bank_dir)
    tmpl_path = str(template_path)

    if not images_dir:
        images_dir = str(Path(__file__).parent.parent.parent / "images")

    with open(catalogue_path, encoding="utf-8") as f:
        catalogue = json.load(f)

    # Drill-down card catalog (gold-standard card shapes). Optional — falls back
    # to the procedural icon-banner builder when missing.
    drill_dir = Path(__file__).parent.parent.parent / "drill_down_bank"
    drill_cat_path = drill_dir / "drill_down_catalogue.json"
    drill_catalogue: list[dict] = []
    if drill_cat_path.exists():
        try:
            with open(drill_cat_path, encoding="utf-8") as f:
                drill_catalogue = json.load(f)
        except Exception as exc:
            print(f"  [warn] drill catalog load failed: {exc!r}")

    # Template DNA — parsed once, used everywhere.
    # api_key enables one-shot LLM validation (reads/writes manifest file).
    dna = analyze_template(tmpl_path, api_key=api_key)

    cloner       = SlideCloner(str(output_path), template_path=tmpl_path)
    used_slides: dict[tuple, int] = {}
    used_families: list[str]      = []
    used_files:    list[str]      = []
    used_card_families: list[str] = []   # drill_down card family rotation

    # Detect course type from cover title
    course_title = ""
    for s in slide_plan:
        if s.get("type") == "cover":
            course_title = s.get("title", "")
            break
    is_tech      = _is_tech_course(course_title)
    is_marketing = _is_marketing_course(course_title)
    is_soft_skill = _is_soft_skill_course(course_title)
    course_words: set[str] = {
        w.lower() for w in re.sub(r"[^a-zA-Z ]", " ", course_title).split()
        if len(w) > 3 and w.lower() not in _IMG_NOISE
    }
    # Soft-skill courses share no vocabulary with image-bank filenames — inject
    # preferred image keywords so the picker has sensible fallback options
    # instead of grabbing the next alphabetical-unused (robot/sci-fi) image.
    if is_soft_skill:
        course_words = course_words | _SOFT_SKILL_PREFERRED_IMG_KEYWORDS
    print(f"  Course: {course_title!r}  tech={is_tech}  marketing={is_marketing}  soft_skill={is_soft_skill}")

    # Illustration pool
    illus_images = _load_illustration_images(
        images_dir,
        tech_course=is_tech,
        marketing_course=is_marketing,
        course_title=course_title,
    )
    illus_used:  set[str] = set()
    print(f"  Illustration images: {len(illus_images)}")

    # Topic-level family lock for standalone drill_down runs.
    # When consecutive drill_downs share the same parent topic, they all use
    # ONE card style. The planner sometimes appends ": <step_label>" to the
    # title, so we normalise the key by stripping that suffix before compare.
    _drill_lock_title  = None
    _drill_lock_entry  = None

    def _topic_key(d: dict) -> str:
        title = (d.get("title") or "").strip()
        label = (d.get("step_label") or "").strip()
        if label and title.lower().endswith(f": {label}".lower()):
            title = title[: -(len(label) + 2)].rstrip()
        return title

    # Per-topic uniform font for drill_down card labels — so every card in
    # one parent topic renders at the same size, instead of "Informative" at
    # 12pt next to "Demonstrative" at 10pt (visually jarring within one set).
    from collections import defaultdict as _defaultdict
    _drill_groups: dict[str, list[str]] = _defaultdict(list)
    for _d in slide_plan:
        if _d.get("type") == "drill_down":
            _drill_groups[_topic_key(_d)].append(
                _d.get("step_label") or _d.get("title") or ""
            )
    _drill_label_fonts: dict[str, float] = {
        k: _drill_label_pt_for_topic(labels) for k, labels in _drill_groups.items()
    }

    for slide_dict in slide_plan:
        stype = slide_dict.get("type", "")
        # Reset the topic lock whenever the run of drill_downs is broken
        # (groups slides have their own internal lock so they're exempt).
        if stype != "drill_down":
            _drill_lock_title = None
            _drill_lock_entry = None
        # Record slide count BEFORE dispatch so we can identify the new slide(s)
        # this iteration produces — needed to stamp trainer notes onto the
        # primary (first new) slide regardless of which builder ran.
        _slides_before = cloner.slide_count()
        try:
            if stype == "cover":
                _build_cover(cloner, tmpl_path, slide_dict, dna)

            elif stype == "about_us":
                cloner.clone_slide(tmpl_path, dna["slide_indices"]["about_us"])

            elif stype == "syllabus":
                _build_syllabus(cloner, tmpl_path, slide_dict, dna)

            elif stype == "module_intro":
                _build_module_intro(cloner, tmpl_path, slide_dict, dna, illus_images, illus_used, course_words)

            elif stype == "text_only":
                _build_text_only(cloner, tmpl_path, slide_dict, dna, illus_images, illus_used, course_words)

            elif stype in ("groups", "infographic", "text_infographic"):
                _build_groups_slide(cloner, tmpl_path, bank_dir, catalogue,
                                    slide_dict, used_slides, used_families,
                                    is_tech, used_files, dna,
                                    drill_dir, drill_catalogue, used_card_families,
                                    illus_images=illus_images,
                                    illus_used=illus_used,
                                    course_words=course_words)

            elif stype in ("overview_groups", "infographic_overview"):
                _build_overview_groups(cloner, tmpl_path, bank_dir, catalogue,
                                       slide_dict, used_slides, used_families,
                                       is_tech, used_files, dna)

            elif stype == "drill_down":
                # Lock the card family for consecutive drill_downs that share
                # the same parent topic.  Picks a fresh family when the topic
                # changes so each topic has its own visual treatment.
                _this_key = _topic_key(slide_dict)
                if _this_key != _drill_lock_title:
                    _drill_lock_title = _this_key
                    _drill_lock_entry = None
                    if drill_catalogue:
                        _drill_lock_entry = _pick_card_family(drill_catalogue, used_card_families)
                        if _drill_lock_entry is not None:
                            used_card_families.append(_drill_lock_entry.get("name", ""))
                _build_drill_down_card(
                    cloner, tmpl_path, drill_dir, drill_catalogue,
                    slide_dict, dna, used_card_families,
                    card_entry=_drill_lock_entry,
                    illus_images=illus_images,
                    illus_used=illus_used,
                    course_words=course_words,
                    label_font_pt=_drill_label_fonts.get(_topic_key(slide_dict)),
                )

            elif stype == "quiz":
                # 7-slide contract: real MCQ slide. Falls back to the
                # generic marker if the template has no dedicated quiz slot.
                if "quiz" in dna.get("slide_indices", {}):
                    _build_quiz(cloner, tmpl_path, slide_dict, dna)
                else:
                    _build_assessment_marker(cloner, tmpl_path, slide_dict, dna)

            elif stype == "qa":
                _build_assessment_marker(cloner, tmpl_path, slide_dict, dna)

            elif stype == "scenario_qa":
                _build_scenario_qa(cloner, tmpl_path, slide_dict, dna)

            elif stype == "scenario":
                _build_scenario(cloner, tmpl_path, slide_dict, dna)

            elif stype in ("case_study", "activity"):
                _build_prose_slide(cloner, tmpl_path, slide_dict, dna)

            elif stype == "ending":
                _build_ending(cloner, tmpl_path, slide_dict, dna)

            else:
                print(f"  [skip] unknown slide type: {stype!r}")

        except Exception as exc:
            title = slide_dict.get("title") or slide_dict.get("module_title", stype)
            print(f"  [warn] {stype} failed ({exc!r}), falling back: {title!r}")
            try:
                fallback = _make_text_fallback(slide_dict)
                _build_text_only(cloner, tmpl_path, fallback, dna, illus_images, illus_used, course_words)
            except Exception as exc2:
                print(f"  [error] fallback also failed ({exc2!r}), skipping")

        # Stamp trainer notes onto every slide produced this iteration.
        #
        # Single-slide builders (text_only, drill_down, quiz, etc): one note
        # on one slide — the slide_dict's `notes` field.
        #
        # Multi-slide builders (overview_groups expands into 1 overview +
        # N child cards): each child preferentially uses its OWN item-level
        # `notes` field (LLM-written, course-specific) — falling back to the
        # deterministic synthesiser only if the LLM omitted it. Parent gets
        # the top-level slide_dict["notes"].
        new_count = cloner.slide_count() - _slides_before
        if new_count > 0:
            parent_notes = (slide_dict.get("notes") or "").strip()
            # Defense in depth: if the plan reached the builder without notes
            # (e.g. the pad call's safety net didn't run, or a future code path
            # bypasses it), synthesise notes from the slide content so the
            # trainer never opens the deck to find blank speaker notes.
            if not parent_notes:
                parent_notes = _synthesise_slide_notes(slide_dict)
            try:
                # Slide 0 of the new range: parent / primary slide
                if parent_notes:
                    _set_notes(cloner._prs.slides[_slides_before], parent_notes)

                # Slides 1..N-1: child cards from items[]. Prefer LLM-written
                # item.notes; fall back to synthesised template if missing.
                items = slide_dict.get("items") or []
                for offset in range(1, new_count):
                    item_idx = offset - 1
                    child_notes = ""
                    if item_idx < len(items):
                        item = items[item_idx]
                        item_notes = (item.get("notes") or "").strip()
                        if item_notes:
                            child_notes = item_notes
                        else:
                            child_notes = _synthesise_item_notes(slide_dict, item)
                    else:
                        # No item to anchor against — replicate parent notes
                        # rather than leave the slide blank.
                        child_notes = parent_notes
                    if child_notes:
                        _set_notes(cloner._prs.slides[_slides_before + offset], child_notes)
            except Exception as exc:
                print(f"  [warn] could not set notes on {stype}: {exc!r}")

    _sanitize_slides(cloner)
    cloner.save()
    return str(output_path)


def _sanitize_slides(cloner) -> None:
    """
    Final pass to remove malformed XML that triggers PowerPoint's repair prompt.

    Fixes:
    - <a:r><a:t></a:t></a:r>  — empty run with no rPr (PowerPoint flags as invalid)
    - <a:r><a:t/></a:r>       — self-closing empty t with no rPr
    """
    sp_tag = f"{{{_NS_P}}}sp"
    r_tag  = f"{{{_NS_A}}}r"
    rPr_tag = f"{{{_NS_A}}}rPr"
    t_tag  = f"{{{_NS_A}}}t"

    for slide in cloner._prs.slides:
        for r in list(slide.shapes._spTree.iter(r_tag)):
            has_rPr = r.find(rPr_tag) is not None
            t = r.find(t_tag)
            text = (t.text or "") if t is not None else ""
            if not has_rPr and not text.strip():
                # Remove the empty malformed run entirely
                parent = r.getparent()
                if parent is not None:
                    parent.remove(r)


# ═══════════════════════════════════════════════════════════════════════════════
# Structural slide builders
# ═══════════════════════════════════════════════════════════════════════════════

def _build_cover(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["cover"])
    slots = _content_slots(slide)
    title_slot, _, _ = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if title_slot:
        cy_eff = min(title_slot.get("cy_in", 1.5), 1.5)
        _set_text(title_slot["element"], d.get("title", ""),
                  cx_in=title_slot.get("cx_in", 0), cy_in=cy_eff)


def _build_syllabus(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    slide   = cloner.clone_slide(tmpl_path, dna["slide_indices"]["syllabus"])
    slots   = _content_slots(slide)
    modules = d.get("modules", [])
    non_empty    = [s for s in slots if s.get("text", "").strip()]
    module_slots = non_empty[1:] if len(non_empty) > 1 else non_empty

    if module_slots:
        tight_cx = min(s.get("cx_in", 10) for s in module_slots)
        tight_cy = min(s.get("cy_in", 10) for s in module_slots)
    else:
        tight_cx = tight_cy = 10

    for i, s in enumerate(module_slots):
        if i < len(modules):
            _set_text(s["element"], modules[i], cx_in=tight_cx, cy_in=tight_cy)
        else:
            _clear_txbody(s["element"])

    # Normalise font size across all module slots
    min_sz = None
    for i, s in enumerate(module_slots[:len(modules)]):
        para = s["element"].find(qn("a:p"))
        if para is not None:
            run = para.find(qn("a:r"))
            if run is not None:
                rPr = run.find(qn("a:rPr"))
                if rPr is not None and rPr.get("sz"):
                    v = int(rPr.get("sz"))
                    min_sz = v if min_sz is None else min(min_sz, v)
    if min_sz:
        for i, s in enumerate(module_slots[:len(modules)]):
            para = s["element"].find(qn("a:p"))
            if para is not None:
                run = para.find(qn("a:r"))
                if run is not None:
                    rPr = run.find(qn("a:rPr"))
                    if rPr is not None:
                        rPr.set("sz", str(min_sz))


def _fill_combined_module_box(txBody_el, module_num, module_title: str, topics: list) -> None:
    """
    Fill a single combined text box that holds both the module title and topics.
    Template structure (2023 template):
        para[0]  — bold large font  → "Module N: Title"
        para[1]  — blank spacer     → preserved as-is
        para[2+] — bullet items     → one topic per paragraph (pPr carries Wingdings bullet)
    Preserves both pPr (bullet/indent/spacing) and rPr (font size, color, bold).
    """
    import copy
    from lxml import etree
    _NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"

    paras = txBody_el.findall(f"{{{_NS_A}}}p")
    if not paras:
        return

    def _get_formatting(para):
        pPr = para.find(f"{{{_NS_A}}}pPr")
        runs = para.findall(f"{{{_NS_A}}}r")
        rPr = runs[0].find(f"{{{_NS_A}}}rPr") if runs else None
        return copy.deepcopy(pPr), copy.deepcopy(rPr)

    def _set_para_text(para, text, pPr_template, rPr_template):
        for child in list(para):
            para.remove(child)
        if pPr_template is not None:
            para.append(copy.deepcopy(pPr_template))
        if not text:
            return
        r = etree.SubElement(para, f"{{{_NS_A}}}r")
        if rPr_template is not None:
            r.append(copy.deepcopy(rPr_template))
        t = etree.SubElement(r, f"{{{_NS_A}}}t")
        t.text = text

    # Para[0]: title — preserve pPr + rPr (bold + large size, no bullet)
    title_pPr, title_rPr = _get_formatting(paras[0])
    _set_para_text(paras[0], f"Module {module_num}: {module_title}", title_pPr, title_rPr)

    # Para[1]: blank spacer — leave completely untouched

    # Para[2+]: bullet topics — use pPr+rPr from para[2] as template (carries Wingdings bullet)
    topic_pPr, topic_rPr = _get_formatting(paras[2]) if len(paras) > 2 else (None, None)

    # Force topic font to 16pt — overrides whatever the template had.
    if topic_rPr is not None:
        topic_rPr.set("sz", "1600")

    # Remove existing topic paragraphs (para[2] onwards)
    for p in paras[2:]:
        txBody_el.remove(p)

    # Add one paragraph per topic, each carrying the bullet pPr
    for topic in topics:
        new_p = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        _set_para_text(new_p, topic, topic_pPr, topic_rPr)


def _build_module_intro(
    cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict,
    illus_images: list, illus_used: set, course_words: set,
):
    """
    Module intro: clone template slide 3 and replace placeholder text in-place.

    Template slide 3 (identical to the gold standard) has:
        • Picture 1        — full-bleed dark background photo (DO NOT REMOVE)
        • Group 17         — contains TextBox 19 ("Module N:") + TextBox 18 (title)
                             white Poppins SemiBold text on the LEFT half (x < 6in)
        • TextBox 22/23/24 — topic bullet points on the RIGHT half (x ≥ 7in),
                             white Poppins Light with Wingdings checkmark bullets
        • TextBox 27       — "theknowledgeacademy" wordmark (DO NOT TOUCH)

    We do NOT call _remove_decorative_groups (would delete Group 17 = module title)
    and do NOT remove pictures (Picture 1 is the background). We simply replace
    the text content in each slot in-place, preserving all white-text formatting.
    """
    from engine.text_replacer import _collect_slots as _raw_slots

    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["module_intro"])

    # Collect ALL slots including those inside Group 17 — do NOT filter by logo
    # here because we need positions first; filter logo after.
    all_slots = _raw_slots(slide)
    slots = [s for s in all_slots
             if s.get("text", "").strip()   # only slots that had template text
             and not any(tok in s["text"].lower() for tok in _LOGO_TOKENS)]

    if not slots:
        return

    # Split by x position: left = module number + title; right = topic bullets.
    # Threshold is relative to slide width so it works for templates where the
    # bullet column starts at x ~ 6 in (Template new) or x ~ 7 in (2023 template).
    _split_x = dna["slide_w_in"] * 0.42
    left  = sorted([s for s in slots if s["abs_x"] <  _split_x], key=lambda s: s["abs_y"])
    right = sorted([s for s in slots if s["abs_x"] >= _split_x], key=lambda s: s["abs_y"])

    module_num   = d.get("module_num", "")
    module_title = d.get("module_title", "")
    topics       = d.get("topics", [])

    # ── Combined-box template (e.g. 2023 template) ────────────────────────────
    # Detected by: the primary left slot is tall (cy_in > 2.5in), meaning it
    # holds ALL content — title on para[0] (large bold) + topic list on para[2+].
    # The right half contains illustration shapes, not text slots.
    if left and left[0].get("cy_in", 0) > 2.5:
        _fill_combined_module_box(
            left[0]["element"],
            module_num, module_title, topics,
        )
        return

    # ── Separate-box template (original): left[0]=label, left[1]=title ────────
    # Two left slots mean: topmost = "Module N:" header, second = module title.
    # BUT if the second left slot is far below the top one (>0.5in gap), it is
    # actually a separate body topic slot, not a "title continuation".  In that
    # case combine module_num + title into the top slot and treat the second
    # as a topic slot for the topics column.
    if len(left) >= 2:
        gap = left[1].get("abs_y", 0) - (left[0].get("abs_y", 0) + left[0].get("cy_in", 0))
        if gap < 0.5:
            # Vertically adjacent — split header/title across the two slots
            _set_text(left[0]["element"], f"Module {module_num}:",
                      cx_in=left[0].get("cx_in", 0), cy_in=left[0].get("cy_in", 0))
            _set_text(left[1]["element"], module_title,
                      cx_in=left[1].get("cx_in", 0), cy_in=left[1].get("cy_in", 0))
        else:
            # Far apart — second left slot is a topic. Combine title and use
            # the remaining left slots as additional topic slots.
            _set_text(left[0]["element"], f"Module {module_num}: {module_title}",
                      cx_in=left[0].get("cx_in", 0), cy_in=left[0].get("cy_in", 0))
            # Feed leftover left slots into the right-column topic pool
            right = list(left[1:]) + list(right)
    elif left:
        _set_text(left[0]["element"], f"Module {module_num}: {module_title}",
                  cx_in=left[0].get("cx_in", 0), cy_in=left[0].get("cy_in", 0))

    # Topic placement:
    # • If we have AT LEAST as many right-column slots as topics → one per slot
    #   (this is the original/2023-template module-intro design with 3-4 slots).
    # • If we have FEWER slots than topics (e.g. Template 2 has only one
    #   topic slot in its module-intro design) → cram ALL topics into the
    #   first slot as a bulleted list so none are lost.
    if right and len(right) < len(topics):
        # Render all topics as bullets inside the single available slot.
        # Lock the font at 16pt — auto-fit was shrinking topic text too small.
        first_slot = right[0]
        _set_paragraphs(
            first_slot["element"],
            list(topics),
            cx_in=first_slot.get("cx_in", 6.0),
            cy_in=first_slot.get("cy_in", 4.0),
            bullet=True,
            font_pt=16.0,
        )
        # Clear any extra slots (defensive — should only ever be 0 here)
        for s in right[1:]:
            _clear_txbody(s["element"])
    else:
        # One topic per slot, clear extras. Lock all topics at 16pt so bullets
        # render at the same size regardless of how long each topic string is.
        for i, s in enumerate(right):
            if i < len(topics):
                _set_text(s["element"], topics[i],
                          cx_in=s.get("cx_in", 0), cy_in=s.get("cy_in", 0),
                          font_pt=16.0)
            else:
                _clear_txbody(s["element"])


# ═══════════════════════════════════════════════════════════════════════════════
# Content slide builders
# ═══════════════════════════════════════════════════════════════════════════════

def _build_text_only(
    cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict,
    illus_images: list = None, illus_used: set = None, course_words: set = None,
):
    """
    Build a text_only slide. Supports three body formats:
        paragraphs — flowing prose
        points     — inline bold label + regular explanation paragraph pairs
        bullets    — checkmark full-sentence bullets (DEFAULT)
    """
    from pptx.util import Inches

    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["content"])
    _remove_decorative_groups(slide)
    slots = _content_slots(slide)

    title   = d.get("title", "")
    paras   = d.get("paragraphs", [])
    points  = d.get("points", [])
    bullets = d.get("bullets", [])
    lead_in = d.get("lead_in", "")
    section_heading = d.get("section_heading", "")

    title_slot, body_slot, other_slots = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if body_slot is None and len(slots) > 1:
        body_slot = slots[1]

    for s in other_slots:
        _clear_txbody(s["element"])

    # Decide whether to show illustration (affects title AND body width)
    has_image  = bool(illus_images) and not points  # points use full width
    body_cx    = body_cx_for_text_only(dna, has_image=has_image)

    # Title — widen the slot when there's no right-side image so the title can
    # render at 32-36pt single-line instead of shrinking into the narrow default.
    title_cx_target = title_cx_for_layout(dna, has_right_content=has_image)
    if title_slot:
        title_cy_in = title_slot.get("cy_in", dna.get("title", {}).get("cy", 0.71))
        if title_cx_target > title_slot.get("cx_in", 0) + 0.05:
            _resize_shape(
                title_slot["element"], title_cx_target, title_cy_in,
                new_x_in=title_slot.get("abs_x", dna.get("title", {}).get("x", 0.66)),
                new_y_in=title_slot.get("abs_y", dna.get("title", {}).get("y", 0.58)),
            )
        _set_text(title_slot["element"], title,
                  cx_in=title_cx_target, cy_in=title_cy_in,
                  min_font_pt=32,
                  font_family=dna.get("brand_font") or None)
    body_x     = dna["body"]["x"]
    body_y     = dna["body"]["y"]
    # Use the full content_zone height so body has room to render at a
    # comfortable font size.  Some templates have a tiny body placeholder cy
    # (e.g. Template 2's body is only 0.4in tall) which would force normAutofit
    # to shrink text to unreadable sizes.  content_zone.cy spans from below
    # the title down to the chrome floor — plenty of room.
    cz = dna.get("content_zone")
    if cz and cz.get("cy", 0) > 0:
        body_cy = float(cz["cy"]) - max(0.0, body_y - float(cz.get("y", body_y)))
        body_cy = max(body_cy, 3.0)   # never less than 3in
    else:
        body_cy = max(dna["body"]["cy"], 3.0)
    
    # Push body down to clear the title, accounting for text wrapping.
    # cy_in is the XML box height, NOT the rendered height when text wraps —
    # so we estimate rendered height from title length and box width instead.
    if title_slot:
        CHAR_WIDTH_IN  = 0.23        # avg char width at 36pt bold (calibrated)
        LINE_HEIGHT_IN = (36.0 / 72.0) * 1.4   # ~0.7in per line incl. spacing
        title_cx_in    = title_cx_target       # use the widened width if applied
        chars_per_line = max(15, int(title_cx_in / CHAR_WIDTH_IN))
        n_lines        = max(1, math.ceil(len(title) / chars_per_line))
        effective_h    = n_lines * LINE_HEIGHT_IN
        title_y        = title_slot.get("abs_y", 0.58)
        min_body_y     = title_y + effective_h + 0.15
        if body_y < min_body_y:
            adjustment = min_body_y - body_y
            body_y  = min_body_y
            body_cy = max(body_cy - adjustment, 2.5)

    if body_slot:
        _resize_shape(body_slot["element"], body_cx, body_cy,
                      new_x_in=body_x, new_y_in=body_y)

        if points:
            # POINTS format: inline bold label + regular explanation
            _write_points_body(body_slot["element"], points,
                               cx_in=body_cx, cy_in=body_cy,
                               heading=section_heading, lead_in=lead_in)
        elif paras:
            _set_paragraphs(body_slot["element"], paras,
                            cx_in=body_cx, cy_in=body_cy,
                            bullet=False, heading=section_heading, lead_in=lead_in)
        elif bullets:
            _set_paragraphs(body_slot["element"], bullets,
                            cx_in=body_cx, cy_in=body_cy,
                            bullet=True, heading=section_heading, lead_in=lead_in)
        else:
            # Nothing to write — clear any template placeholder text so the box
            # doesn't appear as a large empty styled rectangle on the slide.
            _clear_txbody(body_slot["element"])

    # Right-side illustration (not for points slides — they need full width)
    if illus_images and not points:
        illus = dna["illus"]
        # Pass slide body content as additional context for keyword matching —
        # so the picker can match against the actual concepts on the slide,
        # not just the short title.
        body_context = " ".join(
            (paras or []) +
            (bullets or []) +
            [lead_in or ""]
        )
        img_path = _pick_illustration_image(
            illus_images, title, illus_used or set(),
            course_words=course_words,
            context_text=body_context,
        )
        if img_path:
            if illus_used is not None:
                illus_used.add(img_path)
            try:
                from pptx.util import Inches as _In
                max_h = _In(illus["cy"])
                pic   = slide.shapes.add_picture(
                    img_path, _In(illus["x"]), _In(illus["y"]), width=_In(illus["cx"])
                )
                if pic.height > max_h:
                    pic._element.getparent().remove(pic._element)
                    slide.shapes.add_picture(
                        img_path, _In(illus["x"]), _In(illus["y"]), height=max_h
                    )
            except Exception as exc:
                print(f"  [warn] illustration failed: {exc!r}")


def _build_groups_slide(
    cloner: SlideCloner, tmpl_path: str, bank_dir: Path, catalogue: list,
    d: dict, used_slides: dict, used_families: list, is_tech: bool,
    used_files: list, dna: dict,
    drill_dir: Path = None, drill_catalogue: list = None,
    used_card_families: list = None,
    illus_images: list = None, illus_used: set = None, course_words: set = None,
):
    """
    V3: Clone bank slide as full base, overlay template chrome, fill text in-place.

    This avoids the fragility of extracting/scaling group shapes into a lower zone.
    The bank slide keeps its designed layout; we only replace its title zone and
    add back the template chrome (watermark, corner accent, left accent strip).
    """
    items = d.get("items") or d.get("steps") or []
    title_lower = d.get("title", "").strip().lower()
    items = [it for it in items if it.get("label", "").strip().lower() != title_lower]

    if not items:
        fallback = _make_text_fallback(d)
        _build_text_only(cloner, tmpl_path, fallback, dna)
        return

    # Select bank slide with spatial layout check only
    # (Capacity check removed — it was too strict and rejected valid slides)
    from engine.slide_layout_checker import check_layout

    reqs = {
        "item_count":    len(items),
        "max_label_len": max((len(it.get("label", "")) for it in items), default=0),
        "max_desc_len":  max((len(it.get("description", "")) for it in items), default=0),
        "visual_type":   d.get("visual_type", ""),
    }
    title_words = [w.lower() for w in d.get("title", "").split() if len(w) > 3]
    excluded_keys: set = set()
    entry = None
    for _ in range(15):
        tmp = select_slide(catalogue, reqs, used_counts=used_slides,
                           topic_keywords=title_words, is_tech_course=is_tech,
                           used_families=used_families, used_files=used_files,
                           excluded_keys=excluded_keys)
        if tmp is None:
            break
        key = (tmp.get("file"), tmp.get("slide_index"))
        bank_path_tmp = str(bank_dir / tmp["file"])

        # Spatial layout check — reject slides where text boxes are
        # disconnected from their visual elements
        usable, reason = check_layout(bank_path_tmp, tmp["slide_index"], len(items))
        if not usable:
            print(f"  [layout] rejected {tmp['file']}:{tmp['slide_index']} — {reason}")
            excluded_keys.add(key)
            continue

        entry = tmp
        break

    if entry is None:
        print(f"  [warn] No bank slide for groups: {d.get('title')!r} — falling back")
        fallback = _make_text_fallback(d)
        _build_text_only(cloner, tmpl_path, fallback, dna)
        return

    bank_path = str(bank_dir / entry["file"])

    # V3 core: clone bank slide as the full base slide
    slide = cloner.clone_slide(bank_path, entry["slide_index"])

    # Robustness check: if the clone produced an empty slide (e.g. the source
    # bank slide was SmartArt-based and every shape got skipped due to
    # unresolved diagram rIds), abandon the clone and fall back to text_only.
    if not _has_visible_body_content(slide):
        print(f"  [warn] bank slide {entry['file']}:{entry['slide_index']} cloned empty (SmartArt?) — falling back")
        # Remove the empty cloned slide so the fallback doesn't add a second
        _remove_last_slide(cloner)
        fallback = _make_text_fallback(d)
        _build_text_only(cloner, tmpl_path, fallback, dna)
        return

    # Strip bank's own title zone and watermark — we replace with template chrome
    _strip_bank_header(slide, dna)

    # Fill text BEFORE overlaying chrome so template text boxes aren't mistaken
    # for content slots by _collect_bank_text_boxes (Pattern B detection).
    # Always labels-only — bank boxes are not sized for description text.
    # Writing descriptions into them causes normAutofit to shrink to unreadable
    # sizes or the text overlaps icons (bank boxes are designed for 3-8 word labels).
    # Try tag-based fill first (new tagged-infographics bank with "Item N" tags).
    # Fall back to spatial fill for old bank slides without tags.
    if not _fill_bank_slide_by_tag(slide, items):
        _fill_bank_slide_inplace(slide, items, False)

    # Pre-compute intro bullets — the planner emits intro_points (list of 2
    # short bullets). Legacy plans with a string `intro` are normalised in
    # content_planner._migrate_slide so this read always sees a list.
    _intro_sentences: list[str] = [
        p.strip() for p in (d.get("intro_points") or []) if p and p.strip()
    ]
    _intro_box_cy = 0.0
    if _intro_sentences:
        # A long single bullet still needs 2-line height for wrap.
        n_lines = len(_intro_sentences)
        if n_lines == 1 and len(_intro_sentences[0]) > 100:
            n_lines = 2
        _intro_box_cy = max(0.55 * n_lines, 0.6)

    # Overlay template chrome (watermark, corner accents) and title.
    # Pass intro_height so the repositioning clears the intro text area.
    _apply_template_chrome(cloner, slide, tmpl_path, d.get("title", ""), dna,
                           intro_height_in=_intro_box_cy)

    # ── Intro text above the infographic ─────────────────────────────────────
    if _intro_sentences:
        from pptx.util import Inches as _In
        _cz = dna.get("content_zone", dna["body"])
        bx  = _cz["x"]
        by  = _cz["y"]
        bcx = body_cx_for_text_only(dna, has_image=False)
        txbox = slide.shapes.add_textbox(_In(bx), _In(by), _In(bcx), _In(_intro_box_cy))
        _set_paragraphs(txbox.text_frame._txBody, _intro_sentences,
                        cx_in=bcx, cy_in=_intro_box_cy, bullet=True)

    # Track usage
    key = (entry["file"], entry["slide_index"])
    used_slides[key] = used_slides.get(key, 0) + 1
    used_families.append(entry.get("visual_family", ""))
    used_files.append(entry.get("file", ""))

    # ── Per-item drill_down slides (gold standard pattern) ────────────────────
    # For groups slides that carry item descriptions, emit one drill_down slide
    # per item immediately after the infographic overview.
    # (overview_groups passes items without descriptions so this block is skipped.)
    #
    # IMPORTANT: pick ONE card family for the whole topic so every drill_down
    # in this group uses the SAME shape style. The next groups topic gets a
    # different family. This mirrors the gold-standard convention where related
    # points share one visual treatment.
    title = d.get("title", "")
    _card_entry = None
    if drill_catalogue and drill_dir is not None:
        _ucf = used_card_families if used_card_families is not None else []
        _card_entry = _pick_card_family(drill_catalogue, _ucf)
        if _card_entry is not None and used_card_families is not None:
            used_card_families.append(_card_entry.get("name", ""))

    # Uniform label font across this set of cards so they look consistent.
    _set_label_pt = _drill_label_pt_for_topic(
        [item.get("label", "") for item in items]
    )

    for i, item in enumerate(items):
        # New schema: each item has explicit `bullets` (3 short sentences).
        # Backwards-compat: if `bullets` missing, derive from `description`.
        bullets = item.get("bullets") or []
        if not bullets:
            desc = (item.get("description") or "").strip()
            if not desc:
                continue
            raw = desc.rstrip(".")
            bullets = [s.strip() for s in raw.replace("; ", ".|").replace(". ", ".|").split("|") if s.strip()]
            if not bullets:
                bullets = [desc]
        bullets = bullets[:3]

        item_d = {
            "title":      title,
            "step_num":   i + 1,
            "step_label": item.get("label", ""),
            "bullets":    bullets,
        }
        _build_drill_down_card(
            cloner, tmpl_path, drill_dir, drill_catalogue or [],
            item_d, dna,
            used_card_families if used_card_families is not None else [],
            card_entry=_card_entry,
            illus_images=illus_images,
            illus_used=illus_used,
            course_words=course_words,
            label_font_pt=_set_label_pt,
        )


def _build_overview_groups(
    cloner: SlideCloner, tmpl_path: str, bank_dir: Path, catalogue: list,
    d: dict, used_slides: dict, used_families: list, is_tech: bool,
    used_files: list, dna: dict,
):
    """
    Overview groups slide: labels only (no descriptions).
    Same architecture as _build_groups_slide but items have no descriptions.
    """
    items = d.get("items") or d.get("steps") or []
    title_lower = d.get("title", "").strip().lower()
    items = [it for it in items if it.get("label", "").strip().lower() != title_lower]
    # Strip descriptions for overview — labels only.  KEEP intro_points so the
    # overview slide gets its 2-bullet introduction above the infographic
    # (matches the gold standard convention).
    labels_only = [{"label": it.get("label", "")} for it in items]

    overview_d = dict(d, items=labels_only)
    _build_groups_slide(cloner, tmpl_path, bank_dir, catalogue,
                        overview_d, used_slides, used_families,
                        is_tech, used_files, dna)


def _pick_card_family(drill_catalogue: list, used_card_families: list) -> dict | None:
    """
    Choose one card entry from the catalog, penalising recently-used families.
    Caller is responsible for appending the chosen name to used_card_families
    so the rotation tracks topic-level usage (not per-drill_down usage).
    """
    if not drill_catalogue:
        return None
    import random as _random

    def _score(entry):
        name = entry.get("name", "")
        s = 0.0
        recent = used_card_families[-3:]
        for i, used in enumerate(reversed(recent)):
            if used == name:
                s -= (3.0 - i)
        return s + _random.uniform(0, 0.3)

    return max(drill_catalogue, key=_score)


def _build_drill_down_card(
    cloner: SlideCloner, tmpl_path: str, drill_dir: Path,
    drill_catalogue: list, d: dict, dna: dict,
    used_card_families: list,
    card_entry: dict = None,
    illus_images: list = None,
    illus_used: set = None,
    course_words: set = None,
    label_font_pt: float | None = None,
):
    """
    Drill-down rendered as a gold-standard card cloned from the drill_down_bank.

    When card_entry is provided, that family is used (lets a groups slide lock
    all its drill_downs to the same card style).  When card_entry is None, a
    fresh family is picked and used_card_families is updated.

    For card families with has_image_zone=True, the source slide's baked-in
    illustration is swapped for one picked from illus_images so each drill_down
    gets a distinct image instead of repeating the gold standard's example.

    Falls back to the procedural _build_drill_down when no catalog is available.
    """
    if not drill_catalogue or drill_dir is None:
        return _build_drill_down(cloner, tmpl_path, d, dna)

    if card_entry is None:
        card_entry = _pick_card_family(drill_catalogue, used_card_families)
        if card_entry is not None:
            used_card_families.append(card_entry.get("name", ""))

    entry = card_entry
    if entry is None:
        return _build_drill_down(cloner, tmpl_path, d, dna)

    src_path = str(drill_dir / entry["file"])
    try:
        slide = cloner.clone_slide(src_path, entry["slide_index"])
    except Exception as exc:
        print(f"  [warn] drill card clone failed ({exc!r}) — using procedural fallback")
        return _build_drill_down(cloner, tmpl_path, d, dna)

    _strip_gold_card_chrome(slide)

    step_num   = str(d.get("step_num", 1))
    step_label = d.get("step_label", "") or d.get("title", "")
    bullets    = d.get("bullets", []) or []
    body_text  = "\n".join(bullets) if bullets else d.get("body", "")

    _fill_card_slots(slide, step_num, step_label, body_text,
                     bullets_supported=entry.get("bullets_supported", True),
                     label_font_pt=label_font_pt)

    # If this card family has an illustration zone, swap the gold-standard's
    # baked-in picture for a fresh one from our illustration pool — so each
    # drill_down gets its own image instead of repeating the same one.
    if entry.get("has_image_zone") and illus_images:
        # Use the item label as the keyword hint so the illustration matches
        # the drill_down's specific topic (e.g. "Recruitment" → a hiring image).
        keyword = step_label or d.get("title", "")
        img_path = _pick_illustration_image(
            illus_images, keyword, illus_used or set(), course_words=course_words
        )
        if img_path:
            if illus_used is not None:
                illus_used.add(img_path)
            _swap_card_illustration(slide, img_path)

    # Apply template chrome on top — keep the gold-standard card at its
    # original position so the card design renders exactly as authored.
    _apply_template_chrome(cloner, slide, tmpl_path, d.get("title", ""), dna,
                           preserve_group_position=True)


def _swap_card_illustration(slide, new_img_path: str) -> None:
    """
    Find the body-area illustration picture on a cloned card slide and replace
    it with new_img_path, preserving the original position and size so the
    new image lands exactly where the gold-standard illustration was.

    Skips small corner-chrome pictures (already removed by _strip_gold_card_chrome).
    """
    pic_tag = f"{{{_NS_P}}}pic"
    spTree  = slide.shapes._spTree

    target = None
    target_geom = None
    target_area = 0
    for el in spTree:
        if el.tag != pic_tag:
            continue
        x, y, cx, cy = _get_grp_xfrm(el)
        # Only consider substantial body-area pictures (>= 2.5×2.5 in)
        if cx < int(2.5 * _EMU) or cy < int(2.5 * _EMU):
            continue
        if y < int(1.0 * _EMU):
            continue
        area = cx * cy
        if area > target_area:
            target = el
            target_geom = (x, y, cx, cy)
            target_area = area

    if target is None or target_geom is None:
        return

    x, y, cx, cy = target_geom
    # Remove the old picture
    parent = target.getparent()
    if parent is not None:
        parent.remove(target)
    # Add the new picture preserving its natural aspect ratio.
    # First insert at width=cx so pptx computes height from aspect; if that
    # height exceeds cy, re-insert with height=cy and computed width.
    # Then centre within the target box so the image sits cleanly inside it.
    try:
        from pptx.util import Emu
        pic = slide.shapes.add_picture(new_img_path, Emu(x), Emu(y), width=Emu(cx))
        if pic.height > cy:
            pic._element.getparent().remove(pic._element)
            pic = slide.shapes.add_picture(new_img_path, Emu(x), Emu(y), height=Emu(cy))
        # Centre within the target box
        pic.left = Emu(x + (cx - pic.width) // 2)
        pic.top  = Emu(y + (cy - pic.height) // 2)
    except Exception as exc:
        print(f"  [warn] illustration swap failed ({exc!r})")


def _strip_gold_card_chrome(slide) -> None:
    """
    Remove the gold-standard's own title, watermark, and corner-decoration
    pictures from a cloned card slide.  Leaves the card group + any body-area
    illustration picture intact.
    """
    sp_tag  = f"{{{_NS_P}}}sp"
    pic_tag = f"{{{_NS_P}}}pic"
    t_tag   = f"{{{_NS_A}}}t"
    spTree  = slide.shapes._spTree

    to_remove = []
    for el in list(spTree):
        x, y, cx, cy = _get_grp_xfrm(el)

        if el.tag == sp_tag:
            # Strip placeholders
            if el.find(f".//{{{_NS_P}}}ph") is not None:
                to_remove.append(el); continue
            txb = el.find(f"{{{_NS_P}}}txBody")
            if txb is None:
                continue
            text = "".join(t.text or "" for t in txb.iter(t_tag)).strip().lower()
            # Strip wide title-zone text
            if y < int(1.4 * _EMU) and cx > int(4 * _EMU) and text:
                to_remove.append(el); continue
            # Strip watermark
            if "knowledgeacademy" in text or "the knowledge academy" in text:
                to_remove.append(el); continue

        elif el.tag == pic_tag:
            # Strip small corner-chrome pictures only — keep body-area illustrations.
            small = cx < int(2.2 * _EMU) and cy < int(2.2 * _EMU)
            in_corner = (
                (y < int(1.5 * _EMU) and (x < int(2.5 * _EMU) or x + cx > int(11 * _EMU)))
                or (y + cy > int(5.5 * _EMU) and (x < int(2.5 * _EMU) or x + cx > int(8 * _EMU)))
            )
            if small and in_corner:
                to_remove.append(el)

    for el in to_remove:
        parent = el.getparent()
        if parent is not None:
            parent.remove(el)


def _fill_card_slots(
    slide, step_num: str, step_label: str, body_text: str,
    bullets_supported: bool = True,
    label_font_pt: float | None = None,
) -> None:
    """
    Detect role slots on a cloned card slide by text-pattern and replace each:
      • shape with a single digit text  → number slot (filled with step_num)
      • shortest non-numeric, non-empty → label slot (filled with step_label)
      • longest text shape              → body slot (filled with body_text)

    Searches both top-level sp elements and sps nested inside grpSp containers
    so it works for cards whose number lives inside a decorative group.
    """
    from lxml import etree
    sp_tag = f"{{{_NS_P}}}sp"
    t_tag  = f"{{{_NS_A}}}t"
    txBody_tag = f"{{{_NS_P}}}txBody"

    candidates: list[tuple] = []  # (sp_el, text, len)
    for el in slide.shapes._spTree.iter(sp_tag):
        if el.find(f".//{{{_NS_P}}}ph") is not None:
            continue
        txb = el.find(txBody_tag)
        if txb is None:
            continue
        text = "".join(t.text or "" for t in txb.iter(t_tag)).strip()
        if not text:
            continue
        # Skip remaining chrome
        if "knowledgeacademy" in text.lower():
            continue
        candidates.append((el, text, len(text)))

    if not candidates:
        return

    # Identify number slot — text is short digit(s) "1"-"99"
    number_el = None
    for el, text, _ in candidates:
        if text.isdigit() and len(text) <= 2:
            number_el = el
            break

    # Remaining (non-number) text shapes
    others = [(el, text, n) for (el, text, n) in candidates if el is not number_el]
    if not others:
        return
    others_sorted = sorted(others, key=lambda r: r[2])
    label_el = others_sorted[0][0]
    body_el  = others_sorted[-1][0] if len(others_sorted) > 1 else None

    if number_el is not None:
        _replace_text_keeping_run_style(number_el, step_num)
    if label_el is not None:
        # Pre-shrink the label's font (and vertical-centre it). If the caller
        # passed a uniform size (so all cards in a set match), use it directly;
        # otherwise auto-fit per label.
        _shrink_label_font_to_fit(label_el, step_label, forced_pt=label_font_pt)
        _replace_text_keeping_run_style(label_el, step_label)
    if body_el is not None:
        # Constrain body box so it doesn't extend into the template's corner-
        # chrome zone (bottom-left decoration image typically starts at y≈5.58).
        # The drill_down_bank cards were authored with bodies sized for a
        # different template; without this, long bullet text overflows the box
        # and visually overlaps the corner picture.
        _constrain_card_body_box(body_el, chrome_top_y_in=5.4)
        if bullets_supported and "\n" in body_text:
            _replace_text_with_paragraphs(body_el, body_text.split("\n"), bullet=True)
        else:
            _replace_text_keeping_run_style(body_el, body_text.replace("\n", " "))
        # Layer normAutofit on top so PowerPoint shrinks the font further at
        # runtime if the LLM produced extra-dense bullets that still overflow.
        txb = body_el.find(f"{{{_NS_P}}}txBody")
        if txb is not None:
            _enable_normAutofit(txb)


def _constrain_card_body_box(sp_el, chrome_top_y_in: float = 5.4) -> None:
    """
    Resize a drill_down card's body box so its bottom edge stays above the
    template's bottom-row chrome. Without this, the bank card's authored body
    height (typically 5.2-5.5in bottom) sits right where the chrome corner
    picture lives — long bullets render through it.
    """
    spPr = sp_el.find(f"{{{_NS_P}}}spPr")
    if spPr is None:
        return
    xf = spPr.find(f"{{{_NS_A}}}xfrm")
    if xf is None:
        return
    off = xf.find(f"{{{_NS_A}}}off")
    ext = xf.find(f"{{{_NS_A}}}ext")
    if off is None or ext is None:
        return
    try:
        y_in  = int(off.get("y", 0)) / _EMU
        cy_in = int(ext.get("cy", 0)) / _EMU
    except Exception:
        return
    if y_in <= 0 or cy_in <= 0:
        return
    max_bottom = chrome_top_y_in
    new_cy = max(min(cy_in, max_bottom - y_in), 1.5)   # never shorter than 1.5in
    if new_cy < cy_in - 0.05:
        ext.set("cy", str(int(new_cy * _EMU)))


def _shrink_label_font_to_fit(sp_el, new_text: str, forced_pt: float | None = None) -> None:
    """
    Set the pill label font size so the longest single word never wraps,
    and vertically centre the text inside its box (capsule labels look top-
    anchored otherwise).

    If `forced_pt` is given, that exact size is used (caller computed a uniform
    font for all cards in a set so they look consistent). Otherwise the largest
    font that fits the longest single word is picked — target 14pt, floor 10pt.
    """
    if not new_text:
        return
    spPr = sp_el.find(f"{{{_NS_P}}}spPr")
    xf = spPr.find(f"{{{_NS_A}}}xfrm") if spPr is not None else None
    ext = xf.find(f"{{{_NS_A}}}ext") if xf is not None else None
    if ext is None:
        return
    try:
        cx_in = int(ext.get("cx", 0)) / _EMU
    except Exception:
        return
    if cx_in <= 0:
        return

    txb = sp_el.find(f"{{{_NS_P}}}txBody")
    if txb is None:
        return
    rPr = txb.find(f".//{{{_NS_A}}}rPr")
    if rPr is None:
        return

    # Vertically centre the text inside the capsule. Without anchor="ctr" the
    # text sits at the top of the box, which reads as visually mis-aligned
    # when the box is taller than one line of text.
    bodyPr = txb.find(f"{{{_NS_A}}}bodyPr")
    if bodyPr is not None:
        bodyPr.set("anchor", "ctr")

    if forced_pt is not None and forced_pt > 0:
        rPr.set("sz", str(int(forced_pt * 100)))
        return

    # Subtract default text-frame internal margins (lIns/rIns ~ 0.1in each)
    # so the capacity estimate reflects the usable inside width.
    effective_cx = max(cx_in - 0.2, 0.3)
    words = new_text.split()
    longest_word_len = max((len(w) for w in words), default=len(new_text))

    # Char width factor for bold Calibri-ish faces: ~0.0095in per pt at 1.0pt.
    # i.e. a 14pt bold char averages ~0.133in wide.
    def fits(pt: float) -> bool:
        char_w = pt * 0.0095
        if char_w <= 0:
            return False
        per_line = effective_cx / char_w
        if longest_word_len > per_line:
            return False
        return len(new_text) <= per_line * 2

    target_pt = 10.0
    for pt_try in (14.0, 13.0, 12.0, 11.0, 10.0):
        if fits(pt_try):
            target_pt = pt_try
            break
    rPr.set("sz", str(int(target_pt * 100)))


def _drill_label_pt_for_topic(step_labels: list[str], box_cx_in: float = 1.47) -> float:
    """
    Pick a single uniform font size for a set of drill_down cards sharing one
    parent topic, so all cards in the set render at the same size.

    Picks the largest pt (capped at 14, floor 10) where the longest single
    word across `step_labels` fits one line in a box of `box_cx_in` inches.
    """
    if not step_labels:
        return 14.0
    effective_cx = max(box_cx_in - 0.2, 0.3)
    longest_word_len = max(
        (len(w) for label in step_labels for w in label.split()),
        default=0,
    )
    if longest_word_len == 0:
        return 14.0
    for pt_try in (14.0, 13.0, 12.0, 11.0, 10.0):
        if longest_word_len * pt_try * 0.0095 <= effective_cx:
            return pt_try
    return 10.0


def _replace_text_keeping_run_style(sp_el, new_text: str) -> None:
    """
    Replace all text inside the first <a:p> of an sp's txBody, preserving the
    first <a:r>'s <a:rPr> (font, size, color).  Removes any extra paragraphs.

    IMPORTANT: per the OOXML spec, <a:r> elements must appear BEFORE any
    trailing <a:endParaRPr>.  When PowerPoint sees a run after endParaRPr it
    silently fails to render the run (white-text-on-white symptom).  We build
    the new run, then re-insert at the correct position relative to endParaRPr.
    """
    from lxml import etree
    txb = sp_el.find(f"{{{_NS_P}}}txBody")
    if txb is None:
        return
    paras = txb.findall(f"{{{_NS_A}}}p")
    if not paras:
        return
    first_p = paras[0]
    # Capture first run's rPr to reuse
    first_r = first_p.find(f"{{{_NS_A}}}r")
    rPr_src = first_r.find(f"{{{_NS_A}}}rPr") if first_r is not None else None
    rPr_copy = etree.fromstring(etree.tostring(rPr_src)) if rPr_src is not None else None
    # Strip all runs from first paragraph (keep endParaRPr intact)
    for r in first_p.findall(f"{{{_NS_A}}}r"):
        first_p.remove(r)
    # Strip extra paragraphs
    for extra_p in paras[1:]:
        txb.remove(extra_p)
    # Force center alignment on the paragraph (user preference for card text).
    pPr = first_p.find(f"{{{_NS_A}}}pPr")
    if pPr is None:
        pPr = etree.Element(f"{{{_NS_A}}}pPr")
        first_p.insert(0, pPr)
    pPr.set("algn", "ctr")
    # Build new run
    new_r = etree.Element(f"{{{_NS_A}}}r")
    if rPr_copy is not None:
        new_r.append(rPr_copy)
    else:
        rPr_new = etree.SubElement(new_r, f"{{{_NS_A}}}rPr")
        rPr_new.set("lang", "en-US")
    t_el = etree.SubElement(new_r, f"{{{_NS_A}}}t")
    t_el.text = new_text
    # Insert BEFORE endParaRPr if present, else append.
    endParaRPr = first_p.find(f"{{{_NS_A}}}endParaRPr")
    if endParaRPr is not None:
        endParaRPr.addprevious(new_r)
    else:
        first_p.append(new_r)


def _replace_text_with_paragraphs(sp_el, lines: list[str], bullet: bool = False) -> None:
    """
    Replace sp_el's txBody contents with one paragraph per line, preserving
    the first run's <a:rPr> for font styling.  When bullet=True, paragraphs
    get a Wingdings checkmark bullet.

    Auto-shrinks the body font (down to 10pt minimum) so 3 bullets always fit
    in the card's body box height — prevents the vertical-line/card families
    where authored cy is small from overflowing the chrome at the bottom.
    """
    from lxml import etree
    txb = sp_el.find(f"{{{_NS_P}}}txBody")
    if txb is None:
        return
    paras = txb.findall(f"{{{_NS_A}}}p")
    rPr_copy = None
    if paras:
        first_r = paras[0].find(f"{{{_NS_A}}}r")
        rPr_src = first_r.find(f"{{{_NS_A}}}rPr") if first_r is not None else None
        if rPr_src is not None:
            rPr_copy = etree.fromstring(etree.tostring(rPr_src))
    # Remove all existing paragraphs
    for p in paras:
        txb.remove(p)
    # Centralised body layout — top-anchored, top/side padding, noAutofit.
    # Card-specific: extra top-pad (0.3in) clears the banner/header that sits
    # just above the body in many card layouts.
    _apply_body_layout(txb, top_pad_in=0.3, side_pad_in=0.25, bottom_pad_in=0.1)

    # Compute the font size that lets all paragraphs fit in the box height.
    # Reads the sp's spPr/xfrm cx/cy, accounts for the insets we just applied.
    fit_pt = _fit_card_body_pt(sp_el, lines,
                               top_pad_in=0.3, side_pad_in=0.25, bottom_pad_in=0.1,
                               gap_pt=9.0, max_pt=16.0, min_pt=10.0)
    if rPr_copy is not None and fit_pt is not None:
        rPr_copy.set("sz", str(int(fit_pt * 100)))

    for idx, line in enumerate(lines):
        line = line.strip()
        if not line:
            continue
        p_el = etree.SubElement(txb, f"{{{_NS_A}}}p")
        pPr = etree.SubElement(p_el, f"{{{_NS_A}}}pPr")
        # Bullet body stays LEFT-aligned (the standard hanging-bullet pattern).
        # Card labels/numbers are centered separately in _replace_text_keeping_run_style.
        pPr.set("algn", "l")
        # Line spacing 115% within a bullet; space-after between bullets for
        # the gold-standard breathing room. Skip space-after on the last line.
        lnSpc = etree.SubElement(pPr, f"{{{_NS_A}}}lnSpc")
        spcPct_ln = etree.SubElement(lnSpc, f"{{{_NS_A}}}spcPct")
        spcPct_ln.set("val", "115000")
        is_last = idx >= len([l for l in lines if l.strip()]) - 1
        if not is_last:
            spcAft = etree.SubElement(pPr, f"{{{_NS_A}}}spcAft")
            spcPts = etree.SubElement(spcAft, f"{{{_NS_A}}}spcPts")
            spcPts.set("val", "900")  # 9pt space after each bullet
        if bullet:
            pPr.set("marL", "285750"); pPr.set("indent", "-285750")
            buFont = etree.SubElement(pPr, f"{{{_NS_A}}}buFont")
            buFont.set("typeface", "Wingdings")
            buFont.set("panose", "05000000000000000000")
            buFont.set("pitchFamily", "2"); buFont.set("charset", "2")
            buChar = etree.SubElement(pPr, f"{{{_NS_A}}}buChar")
            buChar.set("char", "\xfc")
        r_el = etree.SubElement(p_el, f"{{{_NS_A}}}r")
        if rPr_copy is not None:
            rPr_new = etree.fromstring(etree.tostring(rPr_copy))
            # Enforce minimum readable font size (14pt) on bullet text
            try:
                sz = int(rPr_new.get("sz", "1600"))
                if sz < 1400:
                    rPr_new.set("sz", "1400")
            except Exception:
                pass
            r_el.append(rPr_new)
        else:
            rPr_new = etree.SubElement(r_el, f"{{{_NS_A}}}rPr")
            rPr_new.set("lang", "en-US")
            rPr_new.set("sz", "1400")
        t_el = etree.SubElement(r_el, f"{{{_NS_A}}}t")
        t_el.text = line


def _build_drill_down(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    """
    Drill-down content slide: large icon circle (left) + brand banner + bullet body (right).
    Layout mirrors the gold standard: circle with step number on left, banner header
    + checkmark bullets on the right two-thirds.
    """
    from pptx.util import Inches, Pt
    from pptx.dml.color import RGBColor
    from pptx.enum.text import PP_ALIGN
    from pptx.oxml.ns import qn as _qn

    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["content"])
    _remove_decorative_groups(slide)
    slots = _content_slots(slide)

    title      = d.get("title", "")
    step_label = d.get("step_label", "")
    step_num   = d.get("step_num", 1)
    bullets    = d.get("bullets", [])

    title_slot, body_slot, other_slots = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if body_slot is None and len(slots) > 1:
        body_slot = slots[1]

    for s in other_slots:
        _clear_txbody(s["element"])

    if title_slot:
        _set_text(title_slot["element"], title,
                  cx_in=title_slot.get("cx_in", 0), cy_in=title_slot.get("cy_in", 0),
                  min_font_pt=24,
                  font_family=dna.get("brand_font") or None)

    # Brand colour from DNA (navy fallback)
    brand = dna.get("brand_color", "1F3564")
    try:
        BRAND = RGBColor(int(brand[0:2],16), int(brand[2:4],16), int(brand[4:6],16))
    except Exception:
        BRAND = RGBColor(0x1F, 0x35, 0x64)
    WHITE = RGBColor(0xFF, 0xFF, 0xFF)

    body_x  = dna["body"]["x"]
    body_y  = dna["body"]["y"]
    body_cx = body_cx_for_text_only(dna, has_image=False)

    # ── Icon circle (left) ────────────────────────────────────────────────────
    ICON_D   = 1.25   # diameter in inches — compact, matches gold standard
    ICON_GAP = 0.25   # gap between circle right edge and content left
    content_x  = body_x + ICON_D + ICON_GAP
    content_cx = body_cx - ICON_D - ICON_GAP

    icon = slide.shapes.add_shape(
        9,  # oval/circle
        Inches(body_x), Inches(body_y),
        Inches(ICON_D), Inches(ICON_D),
    )
    icon.fill.solid()
    icon.fill.fore_color.rgb = BRAND
    icon.line.fill.background()
    tf_i = icon.text_frame
    tf_i.word_wrap = False
    tf_i.margin_top = Inches(ICON_D / 2 - 0.22)
    p_i = tf_i.paragraphs[0]
    p_i.text           = str(step_num)
    p_i.font.bold      = True
    p_i.font.size      = Pt(36)
    p_i.font.color.rgb = WHITE
    p_i.alignment      = PP_ALIGN.CENTER

    # ── Brand banner (right of icon) ──────────────────────────────────────────
    bar = slide.shapes.add_shape(
        1, Inches(content_x), Inches(body_y), Inches(content_cx), Inches(0.45)
    )
    bar.fill.solid()
    bar.fill.fore_color.rgb = BRAND
    bar.line.fill.background()
    tf = bar.text_frame
    tf.margin_left = Inches(0.12)
    tf.margin_top  = Inches(0.04)
    p = tf.paragraphs[0]
    p.text           = step_label
    p.font.bold      = True
    p.font.size      = Pt(16)
    p.font.color.rgb = WHITE

    # ── Bullet body below the banner ──────────────────────────────────────────
    if body_slot:
        bullet_y = body_y + 0.55
        _resize_shape(body_slot["element"], content_cx, 3.2, new_x_in=content_x)
        sp_el = body_slot["element"].getparent()
        spPr  = sp_el.find(_qn("p:spPr"))
        if spPr is not None:
            xfrm = spPr.find(_qn("a:xfrm"))
            if xfrm is not None:
                off = xfrm.find(_qn("a:off"))
                if off is not None:
                    off.set("y", str(int(bullet_y * _EMU)))
        _set_paragraphs(body_slot["element"], bullets,
                        cx_in=content_cx, cy_in=3.2, bullet=True)


# ═══════════════════════════════════════════════════════════════════════════════
# Assessment and practical slide builders
# ═══════════════════════════════════════════════════════════════════════════════

def _build_quiz(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    """
    Clone the template's quiz slide (slide 6 in the 7-slide contract) and
    fill it with one MCQ — question + 4 options labelled A/B/C/D.

    The quiz layout across all current templates has the same shape pattern:
      • A wide title textbox carrying the literal word "Quiz"
      • A prominent question container (Rectangle: Diagonal Corners Rounded)
        nested inside a group, holding the question text + answer placeholders
      • Two small decorative 3-oval groups in opposing corners

    We replace:
      • title    → "Quiz — Question N"
      • question container body → question text + 4 newline-separated options

    The answer is *not* marked visually — trainers reveal the answer verbally.
    """
    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["quiz"])

    question_num = int(d.get("question_num", 1))
    question_text = (d.get("question") or "").strip()
    options = list(d.get("options") or [])
    # Pad / truncate to exactly 4 options so the visual is always consistent
    options = (options + ["", "", "", ""])[:4]

    labels = ["A", "B", "C", "D"]
    body_text = question_text + "\n\n" + "\n".join(
        f"{labels[i]}.  {opt}" for i, opt in enumerate(options) if opt
    )

    title_text = f"Quiz — Question {question_num}"

    # Find slots — walk into groups so we catch the question rectangle
    # which is nested inside the decorative quiz group.
    slots = _collect_slots(slide)

    # Title slot: topmost wide text-bearing shape with "Quiz" text, OR the
    # topmost shape whose original text matches "Quiz".
    title_slot = None
    question_slot = None
    for s in slots:
        original = (s.get("text", "") or "").strip().lower()
        cx_in = s.get("cx_in", 0)
        cy_in = s.get("cy_in", 0)
        y_in = s.get("abs_y", 0)
        # Title: wide shape near top with "quiz" in original text
        if title_slot is None and "quiz" in original and y_in < 2.0 and cx_in > 6.0:
            title_slot = s
            continue
        # Question container: large rectangle below the title
        if cy_in > 2.0 and cx_in > 6.0:
            if question_slot is None or s.get("cy_in", 0) > question_slot.get("cy_in", 0):
                question_slot = s

    if title_slot is not None:
        _set_text(title_slot["element"], title_text,
                  cx_in=title_slot.get("cx_in", 0),
                  cy_in=title_slot.get("cy_in", 0))

    if question_slot is not None:
        _set_text(question_slot["element"], body_text,
                  cx_in=question_slot.get("cx_in", 0),
                  cy_in=question_slot.get("cy_in", 0))


def _build_assessment_marker(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    """
    quiz / qa — a trainer-delivery marker slide.
    No learner-visible content beyond a title and a one-line instruction.
    The trainer delivers the questions verbally or facilitates discussion.
    """
    stype = d.get("type", "quiz")
    if stype == "qa":
        title     = d.get("title", "Questions and Answers")
        body_text = "Open Q&A session — invite learners to ask questions and discuss the topics covered so far."
    else:
        title     = d.get("title", "Knowledge Check")
        body_text = "Trainer-led quiz — the trainer will pose questions verbally for the group to discuss and answer."

    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["content"])
    _remove_decorative_groups(slide)
    slots = _content_slots(slide)

    title_slot, body_slot, other_slots = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if body_slot is None and len(slots) > 1:
        body_slot = slots[1]

    for s in other_slots:
        _clear_txbody(s["element"])

    if title_slot:
        _set_text(title_slot["element"], title,
                  cx_in=title_slot.get("cx_in", 0), cy_in=title_slot.get("cy_in", 0))

    body_cx = body_cx_for_text_only(dna, has_image=False)
    if body_slot:
        _resize_shape(body_slot["element"], body_cx, dna["body"]["cy"],
                      new_x_in=dna["body"]["x"], new_y_in=dna["body"]["y"])
        _set_paragraphs(body_slot["element"], [body_text],
                        cx_in=body_cx, cy_in=dna["body"]["cy"], bullet=False)


def _build_scenario_qa(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    """
    scenario_qa — workplace scenario for trainer-led discussion.

    Preferred format (STRUCTURED, 4 sections): d carries
        d["background"], d["task"], d["challenge"], d["objective"]
    Renders as four labelled point blocks via _write_points_body.

    Legacy format: d["scenarios"] = list of {"num", "text"} dicts.
    Renders as plain paragraphs.
    """
    title = d.get("title", "Scenario Discussion")

    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["content"])
    _remove_decorative_groups(slide)
    slots = _content_slots(slide)

    title_slot, body_slot, other_slots = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if body_slot is None and len(slots) > 1:
        body_slot = slots[1]

    for s in other_slots:
        _clear_txbody(s["element"])

    if title_slot:
        _set_text(title_slot["element"], title,
                  cx_in=title_slot.get("cx_in", 0), cy_in=title_slot.get("cy_in", 0))

    body_cx = body_cx_for_text_only(dna, has_image=False)
    # Use full content_zone height for breathing room
    cz = dna.get("content_zone")
    body_y = dna["body"]["y"]
    if cz and cz.get("cy", 0) > 0:
        body_cy = float(cz["cy"]) - max(0.0, body_y - float(cz.get("y", body_y)))
        body_cy = max(body_cy, 3.0)
    else:
        body_cy = max(dna["body"]["cy"], 3.0)

    # Detect structured 4-section format
    section_keys = ("background", "task", "challenge", "objective")
    if any(d.get(k) for k in section_keys):
        points = []
        for key in section_keys:
            text = (d.get(key) or "").strip()
            if text:
                points.append({"label": key.title(), "text": text})
        if body_slot and points:
            _resize_shape(body_slot["element"], body_cx, body_cy,
                          new_x_in=dna["body"]["x"], new_y_in=body_y)
            _write_points_body(body_slot["element"], points,
                               cx_in=body_cx, cy_in=body_cy)
            return
        elif body_slot:
            _clear_txbody(body_slot["element"])
            return

    # Legacy format: paragraph list
    scenarios = d.get("scenarios", [])
    paras = [s.get("text", "").strip() for s in scenarios if s.get("text", "").strip()]
    if body_slot and paras:
        _resize_shape(body_slot["element"], body_cx, body_cy,
                      new_x_in=dna["body"]["x"], new_y_in=body_y)
        _set_paragraphs(body_slot["element"], paras,
                        cx_in=body_cx, cy_in=body_cy, bullet=False)
    elif body_slot:
        _clear_txbody(body_slot["element"])


def _build_scenario(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    """
    scenario — practice scenario slide (skills courses).
    Renders the background text as flowing prose paragraphs.

    LAYOUT DECISIONS (deliberate):
      • Title slot is RESIZED to 1.3in tall so a 2-line scenario title
        ("Scenario N: Long Descriptive Title") renders at a readable size
        (~24-28pt) instead of being auto-shrunk into 14pt-on-one-line.
      • Body slot is EXTENDED to fill the remaining vertical space — the
        old layout left ~40% of the slide blank below the body.
      • Body uses normAutofit so even the expanded background paragraphs
        (4-6 sentences with names + stakes + constraints) fit cleanly.
    """
    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["content"])
    _remove_decorative_groups(slide)
    slots = _content_slots(slide)

    title_slot, body_slot, other_slots = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if body_slot is None and len(slots) > 1:
        body_slot = slots[1]

    for s in other_slots:
        _clear_txbody(s["element"])

    # Layout dimensions targeting the full content area below the chrome.
    slide_h = dna.get("slide_h_in", 7.5)
    title_x = dna.get("title", {}).get("x", 0.66)
    title_y = dna.get("title", {}).get("y", 0.58)
    title_cx = dna.get("title", {}).get("cx", 11.4)
    title_cy_target = 1.3   # taller than default 0.71 to host 2-line titles at ~26pt

    body_cx_target = body_cx_for_text_only(dna, has_image=False)
    body_x = dna.get("body", {}).get("x", 0.66)
    body_y_target = title_y + title_cy_target + 0.2   # 0.2in gap after title
    body_cy_target = max(slide_h - body_y_target - 0.8, 3.0)   # leave 0.8in bottom margin

    if title_slot:
        _resize_shape(title_slot["element"], title_cx, title_cy_target,
                      new_x_in=title_x, new_y_in=title_y)
        # Cap title font at 28pt by pinning explicitly — prevents the placeholder
        # from rendering huge if the original template font was 36-44pt.
        _set_text(title_slot["element"], d.get("title", ""),
                  cx_in=title_cx, cy_in=title_cy_target,
                  font_pt=min(28.0, _calc_title_pt(d.get("title", ""), title_cx, title_cy_target)))

    background = d.get("background", "")
    paras = [p.strip() for p in background.split("\n\n") if p.strip()] if background else []

    if body_slot:
        _resize_shape(body_slot["element"], body_cx_target, body_cy_target,
                      new_x_in=body_x, new_y_in=body_y_target)
        _set_paragraphs(body_slot["element"], paras or [""],
                        cx_in=body_cx_target, cy_in=body_cy_target, bullet=False)
        # Layer normAutofit on top of the fitted font so PowerPoint can shrink
        # further at runtime if needed (defence in depth against dense prose).
        _enable_normAutofit(body_slot["element"])


def _build_prose_slide(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict):
    """
    case_study / activity — flowing prose paragraphs with a section heading
    embedded in the first paragraph (e.g., "Background:\\n\\n...").

    LAYOUT DECISIONS (deliberate):
      • Title font is CAPPED at 24pt — case-study titles like "Case Study:
        NexTech Solutions" are short, so the title placeholder's native
        36-44pt rendering used to produce a comically oversized heading.
      • Body slot is EXTENDED to fill the available vertical area below the
        title, so 2-3 dense paragraphs of case content have room.
      • Body uses normAutofit + min 9pt floor so prose-heavy slides (the
        elaborated case_study spec is 150-250 words per slide) actually
        fit without overflowing the slide chrome.
    """
    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["content"])
    _remove_decorative_groups(slide)
    slots = _content_slots(slide)

    title_slot, body_slot, other_slots = _find_title_body_slots(slots)
    if title_slot is None and slots:
        title_slot = slots[0]
    if body_slot is None and len(slots) > 1:
        body_slot = slots[1]

    for s in other_slots:
        _clear_txbody(s["element"])

    slide_h = dna.get("slide_h_in", 7.5)
    title_x = dna.get("title", {}).get("x", 0.66)
    title_y = dna.get("title", {}).get("y", 0.58)
    title_cx = dna.get("title", {}).get("cx", 11.4)
    title_cy_target = 1.0   # comfortable for a short single-line case_study title

    body_cx_target = body_cx_for_text_only(dna, has_image=False)
    body_x = dna.get("body", {}).get("x", 0.66)
    body_y_target = title_y + title_cy_target + 0.2
    body_cy_target = max(slide_h - body_y_target - 0.8, 3.5)

    if title_slot:
        _resize_shape(title_slot["element"], title_cx, title_cy_target,
                      new_x_in=title_x, new_y_in=title_y)
        # Cap title at 24pt — case_study titles are short and look huge otherwise.
        _set_text(title_slot["element"], d.get("title", ""),
                  cx_in=title_cx, cy_in=title_cy_target, font_pt=24.0)

    paras = d.get("paragraphs", [])

    if body_slot and paras:
        _resize_shape(body_slot["element"], body_cx_target, body_cy_target,
                      new_x_in=body_x, new_y_in=body_y_target)
        _set_paragraphs(body_slot["element"], paras,
                        cx_in=body_cx_target, cy_in=body_cy_target, bullet=False)
        # normAutofit so PowerPoint shrinks further if the LLM produced
        # extra-dense content (the elaborated case_study spec can run long).
        _enable_normAutofit(body_slot["element"])
    elif body_slot:
        _clear_txbody(body_slot["element"])


def _build_ending(cloner: SlideCloner, tmpl_path: str, d: dict, dna: dict) -> None:
    """
    Clone the template's ending slide and fix the two long-text overflows the
    template ships with: the 48pt "Congratulations" heading box is too narrow
    for the word at that size, and the social-handle boxes (e.g.
    "/The.Knowledge.Academy.Ltd") are sized for shorter handles than the
    template now carries.
    """
    slide = cloner.clone_slide(tmpl_path, dna["slide_indices"]["ending"])

    sp_tag = f"{{{_NS_P}}}sp"
    t_tag  = f"{{{_NS_A}}}t"

    for sp in slide.shapes._spTree.iter(sp_tag):
        text = "".join(t.text or "" for t in sp.iter(t_tag)).strip()
        if not text:
            continue
        low = text.lower()

        if low == "congratulations":
            # Width needed at 48pt bold: ~15ch × 48 × 0.0095 ≈ 6.84in.
            # Widen to 7.0in (well within 13.33in slide); keep original y/x.
            txBody = sp.find(f"{{{_NS_P}}}txBody")
            if txBody is not None:
                spPr = sp.find(f"{{{_NS_P}}}spPr")
                xfrm = spPr.find(f"{{{_NS_A}}}xfrm") if spPr is not None else None
                if xfrm is not None:
                    ext = xfrm.find(f"{{{_NS_A}}}ext")
                    if ext is not None:
                        ext.set("cx", str(int(7.0 * _EMU)))

        elif text.startswith("/") and len(text) > 14:
            # Social-handle text "/The.Knowledge.Academy.Ltd",
            # "/the-knowledge-academy". The boxes are 3.5in at 16pt; at 26ch
            # this needs ~4.0in. Shrinking to 13pt fits comfortably (~3.2in)
            # without touching the icon+text group layout.
            for rPr in sp.iter(f"{{{_NS_A}}}rPr"):
                rPr.set("sz", "1300")


# ═══════════════════════════════════════════════════════════════════════════════
# V3 helpers: bank-as-base slide building
# ═══════════════════════════════════════════════════════════════════════════════

def _strip_bank_header(slide, dna: dict) -> None:
    """
    Remove the bank slide's title zone text shapes, placeholder shapes, and
    any existing watermark so we can replace them with template chrome.
    Decorative visual groups in the title zone are left untouched.
    """
    sp_tag = f"{{{_NS_P}}}sp"
    t_tag  = f"{{{_NS_A}}}t"
    title_zone_y = int(dna.get("content_zone", dna["body"])["y"] * _EMU)
    spTree = slide.shapes._spTree

    to_remove = []
    for el in list(spTree):
        if el.tag != sp_tag:
            continue
        # Always strip placeholder shapes
        if el.find(f".//{{{_NS_P}}}ph") is not None:
            to_remove.append(el)
            continue
        txb = el.find(f"{{{_NS_P}}}txBody")
        if txb is None:
            continue
        text = "".join(t.text or "" for t in txb.iter(t_tag)).lower().strip()
        x, y, cx, cy = _get_grp_xfrm(el)
        # Remove text shapes in the title zone
        if y < title_zone_y and text:
            to_remove.append(el)
            continue
        # Remove bank watermark / knowledge academy branding
        if y > int(6.0 * _EMU) and ("knowledge" in text or len(text) < 5):
            to_remove.append(el)

    for el in to_remove:
        parent = el.getparent()
        if parent is not None:
            parent.remove(el)


def _grp_actual_content_top(grp_el) -> int:
    """
    Return the y of the topmost CHILD shape in slide-space EMU.
    Unlike _min_child_y_in_parent_space this does NOT fall back to the group
    bounding-box y — it skips zero-height stubs and returns the actual first
    visible child.  Falls back to group bounding-box y if no children found.
    """
    spPr = grp_el.find(f"{{{_NS_P}}}grpSpPr")
    if spPr is None:
        return _get_grp_xfrm(grp_el)[1]
    xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
    if xfrm is None:
        return _get_grp_xfrm(grp_el)[1]
    off_el   = xfrm.find(f"{{{_NS_A}}}off")
    ext_el   = xfrm.find(f"{{{_NS_A}}}ext")
    chOff_el = xfrm.find(f"{{{_NS_A}}}chOff")
    chExt_el = xfrm.find(f"{{{_NS_A}}}chExt")
    grp_y  = int(off_el.get("y", 0))  if off_el   is not None else 0
    grp_cy = int(ext_el.get("cy", 0)) if ext_el   is not None else 0
    chOff_y  = int(chOff_el.get("y",  0)) if chOff_el is not None else 0
    chExt_cy = int(chExt_el.get("cy", 0)) if chExt_el is not None else grp_cy
    scale = (grp_cy / chExt_cy) if chExt_cy else 1.0

    min_y = float("inf")
    for child in grp_el:
        ctag    = child.tag
        cPr_tag = (f"{{{_NS_P}}}grpSpPr" if ctag == f"{{{_NS_P}}}grpSp"
                   else f"{{{_NS_P}}}spPr")
        cPr = child.find(cPr_tag)
        if cPr is None:
            continue
        cxfrm = cPr.find(f"{{{_NS_A}}}xfrm")
        if cxfrm is None:
            continue
        coff = cxfrm.find(f"{{{_NS_A}}}off")
        cext = cxfrm.find(f"{{{_NS_A}}}ext")
        if coff is None:
            continue
        c_cy = int(cext.get("cy", 0)) if cext is not None else 0
        if c_cy == 0:
            continue   # zero-height stub — skip
        c_y   = int(coff.get("y", 0))
        slide_y = grp_y + (c_y - chOff_y) * scale
        if slide_y < min_y:
            min_y = slide_y

    return int(min_y) if min_y < float("inf") else grp_y


def _bottom_align_bank_shapes(slide, body_y_in: float, intro_height_in: float = 0.0) -> None:
    """
    Shift all non-placeholder top-level bank shapes downward so the highest
    visible content clears the intro text area.  Groups are inspected via their
    child coordinate system so the computed shift is exact rather than based on
    the outer bounding box (which may include the stripped bank-title zone).

    Skips shapes with cy >= 6.5 in (full-slide-height background containers).
    Call BEFORE _apply_template_chrome so template chrome shapes are not shifted.
    """
    _FULL_SLIDE_CY = int(6.5  * _EMU)
    _MIN_CY        = int(0.3  * _EMU)
    _SLIDE_MAX_Y   = int(7.0  * _EMU)   # don't push content below here

    gap_in    = 0.15 if intro_height_in > 0 else 0.05
    floor_emu = int((body_y_in + intro_height_in + gap_in) * _EMU)

    spTree  = slide.shapes._spTree
    grp_tag = f"{{{_NS_P}}}grpSp"
    sp_tag  = f"{{{_NS_P}}}sp"
    pic_tag = f"{{{_NS_P}}}pic"

    elems: list = []
    max_shift = 0

    for el in spTree:
        if el.tag not in (grp_tag, sp_tag, pic_tag):
            continue
        if el.find(f".//{{{_NS_P}}}ph") is not None:
            continue
        _, y_emu, _, cy_emu = _get_grp_xfrm(el)
        if cy_emu >= _FULL_SLIDE_CY or cy_emu < _MIN_CY:
            continue

        if el.tag == grp_tag:
            content_top = _grp_actual_content_top(el)
        else:
            content_top = y_emu

        needed = max(0, floor_emu - content_top)
        # Cap so shapes don't go below _SLIDE_MAX_Y
        cap    = max(0, _SLIDE_MAX_Y - (y_emu + cy_emu))
        needed = min(needed, cap)

        max_shift = max(max_shift, needed)
        elems.append(el)

    if not elems or max_shift == 0:
        return

    for el in elems:
        _is_grp = el.tag == f"{{{_NS_P}}}grpSp"
        _spPr_tag = f"{{{_NS_P}}}grpSpPr" if _is_grp else f"{{{_NS_P}}}spPr"
        _spPr = el.find(_spPr_tag)
        if _spPr is None:
            continue
        _xfrm = _spPr.find(f"{{{_NS_A}}}xfrm")
        if _xfrm is None:
            continue
        _off = _xfrm.find(f"{{{_NS_A}}}off")
        if _off is None:
            continue
        _off.set("y", str(int(_off.get("y", 0)) + max_shift))


def _apply_template_chrome(
    cloner: SlideCloner, slide, tmpl_path: str, title: str, dna: dict,
    intro_height_in: float = 0.0,
    preserve_group_position: bool = False,
) -> None:
    """
    Copy chrome shapes (watermark, corner accents, left accent) from the template
    content slide into a bank-based slide, then add the template's title text box
    filled with *title*.
    """
    from lxml import etree
    sp_tag  = f"{{{_NS_P}}}sp"
    grp_tag = f"{{{_NS_P}}}grpSp"
    pic_tag = f"{{{_NS_P}}}pic"

    # Chrome zones in the template content slide:
    #   • Bottom watermark area  (y > 6.3 in)
    #   • Top-right corner accent (x > 8.0 in, y < 2.0 in)
    #   • Bottom-right corner accent (x > 8.0 in, bottom edge > 5.5 in)
    #   • Left accent strip / corner image (x < 0.6 in)
    _CHROME_Y_BOTTOM  = int(6.3  * _EMU)
    _CHROME_X_RIGHT   = int(8.0  * _EMU)
    _CHROME_Y_TOP_MAX = int(2.0  * _EMU)
    _CHROME_Y_BOT_MIN = int(5.5  * _EMU)   # bottom-right accent: shape bottom > 5.5 in
    _CHROME_X_LEFT    = int(0.6  * _EMU)

    def _is_chrome(el):
        # Include pic elements (corner/border images) as well as sp/grpSp
        if el.tag not in (sp_tag, grp_tag, pic_tag):
            return False
        if el.find(f".//{{{_NS_P}}}ph") is not None:
            return False
        # Any non-logo text means this is a content slot (body placeholder,
        # marker word like "Text", body freeform, etc.) — NOT chrome.
        # Genuine chrome (borders, watermarks, corner accents) is either
        # empty or contains the brand logo text only.
        _t_tag = f"{{{_NS_A}}}t"
        text   = "".join(t.text or "" for t in el.iter(_t_tag)).strip()
        if text and not any(tok in text.lower() for tok in _LOGO_TOKENS):
            return False
        x, y, cx, cy = _get_grp_xfrm(el)
        return (
            y > _CHROME_Y_BOTTOM
            or (x > _CHROME_X_RIGHT and y < _CHROME_Y_TOP_MAX)
            or (x > _CHROME_X_RIGHT and (y + cy) > _CHROME_Y_BOT_MIN)
            or x < _CHROME_X_LEFT
        )

    # Title zone = the actual title placeholder position from the template DNA,
    # with a small tolerance.  Using a hardcoded zone fails for templates whose
    # body placeholder sits right at the lower edge (e.g. body at y=1.397 in).
    _t_y  = int(dna["title"]["y"]  * _EMU)
    _t_cy = int(dna["title"]["cy"] * _EMU)
    _TOL  = int(0.05 * _EMU)
    _TITLE_Y1 = max(0, _t_y - _TOL)
    _TITLE_Y2 = _t_y + _t_cy + _TOL

    def _is_chrome_or_title(el):
        if _is_chrome(el):
            return True
        if el.tag != sp_tag:
            return False
        if el.find(f".//{{{_NS_P}}}ph") is not None:
            return False
        x, y, cx, cy = _get_grp_xfrm(el)
        txb = el.find(f"{{{_NS_P}}}txBody")
        return txb is not None and _TITLE_Y1 < y < _TITLE_Y2

    spTree = slide.shapes._spTree
    count_before = len(list(spTree))

    cloner.copy_elements_from(
        tmpl_path, dna["slide_indices"]["content"], slide, filter_fn=_is_chrome_or_title
    )

    # Find the newly added title text box (largest cx in title zone)
    title_el = None
    best_cx  = 0
    for el in list(spTree)[count_before:]:
        if el.tag != sp_tag:
            continue
        txb = el.find(f"{{{_NS_P}}}txBody")
        if txb is None:
            continue
        x, y, cx, cy = _get_grp_xfrm(el)
        if _TITLE_Y1 < y < _TITLE_Y2 and cx > best_cx:
            best_cx  = cx
            title_el = el

    if title_el is not None:
        txb = title_el.find(f"{{{_NS_P}}}txBody")
        if txb is not None:
            # Bank slides (overview_groups, groups) put their visual content in
            # the body area (y > 1.5); the title row is empty so the title can
            # use the wide layout. Card slides pass preserve_group_position=True
            # because their cards may extend into the title row.
            wide_title = not preserve_group_position
            title_cx_target = title_cx_for_layout(dna, has_right_content=not wide_title)
            title_cy_in = (_TITLE_Y2 - _TITLE_Y1) / _EMU
            if wide_title and title_cx_target > best_cx / _EMU + 0.05:
                _resize_shape(
                    txb, title_cx_target, title_cy_in,
                    new_x_in=dna["title"]["x"],
                    new_y_in=dna["title"]["y"],
                )
            _set_text(txb, title,
                      cx_in=title_cx_target,
                      cy_in=title_cy_in,
                      min_font_pt=32,
                      font_family=dna.get("brand_font") or None)
        
        chrome_shapes  = list(spTree)[count_before:]
        _SLIDE_WIDTH   = int(13.33 * _EMU)
        _GAP_BELOW_TTL = int(0.50 * _EMU)   # desired gap between title bottom and first visual content

        # Find the main infographic group (largest non-chrome grpSp)
        main_group = None
        max_area   = 0
        for el in spTree:
            if el in chrome_shapes:
                continue
            if el.tag != grp_tag:
                continue
            x, y, cx, cy = _get_grp_xfrm(el)
            if cx > int(0.5 * _EMU) and cy > int(0.5 * _EMU):
                if cx * cy > max_area:
                    max_area   = cx * cy
                    main_group = el

        if main_group is not None and not preserve_group_position:
            grp_x, grp_y, grp_cx, grp_cy = _get_grp_xfrm(main_group)
            grpSpPr = main_group.find(f"{{{_NS_P}}}grpSpPr")

            # Read child coordinate system to compute group→slide mapping
            chOff_y  = 0
            chExt_cy = grp_cy
            if grpSpPr is not None:
                xf = grpSpPr.find(f"{{{_NS_A}}}xfrm")
                if xf is not None:
                    chO = xf.find(f"{{{_NS_A}}}chOff")
                    chE = xf.find(f"{{{_NS_A}}}chExt")
                    if chO is not None:
                        chOff_y  = int(chO.get("y", 0))
                    if chE is not None:
                        chExt_cy = int(chE.get("cy", 0)) or grp_cy

            # Find topmost VISIBLE child (local/child coords).
            # Rules:
            #   - Skip full-width header txBoxes (blanked by _strip_bank_header).
            #   - Skip blank txBoxes (background/placeholder shapes with no text) —
            #     these would drag the infographic down, hiding the real visual content.
            #   - grpSp visual shapes and filled label txBoxes are always included.
            _HEADER_CX = int(0.70 * _SLIDE_WIDTH)
            _HEADER_Y  = int(1.00 * _EMU)
            _t_tag     = f"{{{_NS_A}}}t"
            top_content_y = float("inf")
            for child in main_group:
                if child.tag not in (grp_tag, sp_tag):
                    continue
                cr = _get_grp_xfrm(child)
                if cr is None or cr[3] <= 0:
                    continue
                cNv   = child.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
                is_tx = cNv is not None and cNv.get("txBox") == "1"
                # Skip full-width header
                if is_tx and cr[2] >= _HEADER_CX and cr[1] < _HEADER_Y:
                    continue
                # Skip blank txBoxes (no visible text content)
                if is_tx:
                    txb  = child.find(f"{{{_NS_P}}}txBody")
                    text = "".join(t.text or "" for t in txb.iter(_t_tag)).strip() if txb is not None else ""
                    if not text:
                        continue
                top_content_y = min(top_content_y, cr[1])

            if top_content_y == float("inf"):
                top_content_y = chOff_y

            # Slide-space y we want the first content to appear at.
            # Intro text is placed at dna["body"]["y"] (just below the title placeholder),
            # so when intro exists the infographic must clear body_y + intro_height.
            title_r        = _get_grp_xfrm(title_el)
            title_bottom   = title_r[1] + title_r[3]
            if intro_height_in > 0:
                _cz_y = dna.get("content_zone", dna["body"])["y"]
                target_slide_y = int(_cz_y * _EMU) + int(intro_height_in * _EMU) + int(0.15 * _EMU)
            else:
                target_slide_y = title_bottom + _GAP_BELOW_TTL

            # Inverse of: slide_y = off_y + (child_y - chOff_y) * scale_y
            scale_y   = (grp_cy / chExt_cy) if chExt_cy > 0 else 1.0
            new_off_y = int(target_slide_y - (top_content_y - chOff_y) * scale_y)
            new_off_y = max(0, new_off_y)   # never above slide top

            # Scale group down if it would overflow the slide bottom.
            # Change ext.cy/cx (keeps chOff/chExt fixed so PPTX scales all children).
            _SLIDE_HEIGHT = int(7.50 * _EMU)
            available_h   = _SLIDE_HEIGHT - new_off_y
            new_cx, new_cy = grp_cx, grp_cy
            if grp_cy > available_h > 0:
                new_cy    = available_h
                new_cx    = int(grp_cx * new_cy / grp_cy) if grp_cy > 0 else grp_cx

            # Center horizontally with (possibly scaled) width
            center_x = max(0, (_SLIDE_WIDTH - new_cx) // 2)

            # Apply position and (if needed) scaled size
            if grpSpPr is not None:
                xf = grpSpPr.find(f"{{{_NS_A}}}xfrm")
                if xf is not None:
                    off_el = xf.find(f"{{{_NS_A}}}off")
                    ext_el = xf.find(f"{{{_NS_A}}}ext")
                    if off_el is not None:
                        if abs(center_x - grp_x) > int(0.05 * _EMU):
                            off_el.set("x", str(center_x))
                        off_el.set("y", str(new_off_y))
                    if ext_el is not None and (new_cx != grp_cx or new_cy != grp_cy):
                        ext_el.set("cx", str(new_cx))
                        ext_el.set("cy", str(new_cy))

    # ── Z-order: push chrome decoration to the BACK ──────────────────────────
    # copy_elements_from() appends to spTree, which puts chrome on TOP of the
    # bank's body content. That means the corner-accent pictures render over
    # any overflowing bullet text. Move pure-decoration chrome (pictures and
    # logo watermarks) to the front of spTree so they sit behind the body in
    # z-order. The title text element is left at its original (top) position
    # so it stays visible above the body.
    _logo_tags = _LOGO_TOKENS
    def _is_pure_decoration(el):
        if el is title_el:
            return False
        if el.tag == pic_tag:
            return True
        if el.tag == sp_tag:
            # Logo watermark text is also pure decoration
            _t_tag = f"{{{_NS_A}}}t"
            text = "".join(t.text or "" for t in el.iter(_t_tag)).strip().lower()
            if text and any(tok in text for tok in _logo_tags):
                return True
            # Empty auto-shape used as a border accent
            if not text:
                return True
        return False

    # spTree starts with mandatory <p:nvGrpSpPr> + <p:grpSpPr> headers; insert
    # after those (typically index 2). Locate the first non-header position.
    header_offset = 0
    for child in spTree:
        local = etree.QName(child).localname
        if local in ("nvGrpSpPr", "grpSpPr"):
            header_offset += 1
        else:
            break

    insert_at = header_offset
    for el in list(spTree)[count_before:]:
        if _is_pure_decoration(el):
            spTree.remove(el)
            spTree.insert(insert_at, el)
            insert_at += 1


def _harmonise_slide_colors(slide, brand_color: str) -> None:
    """
    Selective brand color harmonization for a bank-as-base slide.

    Replaces only the DOMINANT ACCENT color — the most common srgbClr that is
    saturated (not gray/cream/white) and mid-brightness (not background or text).
    Leaves neutral fills (white, cream, light panels) untouched.
    Also converts accent1-6 schemeClr references to the brand color.
    """
    from collections import Counter
    from lxml import etree

    target = brand_color.upper()
    spTree = slide.shapes._spTree

    def _is_accent(hex6: str) -> bool:
        try:
            r, g, b = int(hex6[0:2],16), int(hex6[2:4],16), int(hex6[4:6],16)
            brightness  = (r + g + b) / 3
            saturation  = max(r, g, b) - min(r, g, b)
            return 20 < brightness < 215 and saturation > 45
        except Exception:
            return False

    # Collect all srgbClr fills that qualify as accent
    accent_vals: list[str] = []
    for srgb in spTree.iter(f"{{{_NS_A}}}srgbClr"):
        val = srgb.get("val", "").upper()
        if len(val) == 6 and _is_accent(val):
            accent_vals.append(val)

    if accent_vals:
        dominant = Counter(accent_vals).most_common(1)[0][0]
        if dominant != target:
            for srgb in spTree.iter(f"{{{_NS_A}}}srgbClr"):
                if srgb.get("val", "").upper() == dominant:
                    srgb.set("val", target)

    # Replace schemeClr accent1-6 with explicit brand color
    _ACCENT_SCHEMES = {"accent1","accent2","accent3","accent4","accent5","accent6"}
    for schemeClr in list(spTree.iter(f"{{{_NS_A}}}schemeClr")):
        if schemeClr.get("val", "") not in _ACCENT_SCHEMES:
            continue
        parent = schemeClr.getparent()
        if parent is None:
            continue
        idx = list(parent).index(schemeClr)
        parent.remove(schemeClr)
        new_srgb = etree.Element(f"{{{_NS_A}}}srgbClr")
        new_srgb.set("val", target)
        parent.insert(idx, new_srgb)


def _cluster_txboxes(text_els: list) -> list[list]:
    """
    Group text boxes by 2D visual position.

    Two consecutive boxes (sorted by x, then y) belong to the same cluster when:
      dx < 0.5 in  AND  dy < 1.0 in
    (They're near the same visual element — label + desc slots for one item.)

    Returns list of clusters; each cluster is a list of (y, x, cy, el) tuples.
    The first element in each cluster is the primary label slot.
    """
    _CLUSTER_DX = int(0.50 * _EMU)
    _CLUSTER_DY = int(1.00 * _EMU)

    if not text_els:
        return []

    sorted_t = sorted(text_els, key=lambda t: (t[1], t[0]))   # by (x, y)
    clusters: list[list] = [[sorted_t[0]]]

    for tup in sorted_t[1:]:
        prev = clusters[-1][-1]
        dx = abs(tup[1] - prev[1])
        dy = abs(tup[0] - prev[0])
        if dx < _CLUSTER_DX and dy < _CLUSTER_DY:
            clusters[-1].append(tup)
        else:
            clusters.append([tup])

    return clusters


def _collect_visual_shapes(container) -> list:
    """
    Collect visual (non-text) shapes from a slide container element (spTree or grpSp).
    Returns list of (x, y, cx, cy, center_x, center_y) tuples.
    Excludes text boxes, placeholders, zero-size shapes, and full-slide backgrounds.
    """
    result = []
    sp_tag  = f"{{{_NS_P}}}sp"
    grp_tag = f"{{{_NS_P}}}grpSp"
    _MIN_DIM = int(0.3  * _EMU)   # must be at least 0.3in to be a content shape
    _MAX_CX  = int(8.5  * _EMU)   # exclude full-width background banners
    _MAX_CY  = int(5.0  * _EMU)   # exclude full-height background shapes

    for el in container:
        if el.tag == grp_tag:
            x, y, cx, cy = _get_grp_xfrm(el)
            if _MIN_DIM <= cx <= _MAX_CX and _MIN_DIM <= cy <= _MAX_CY:
                result.append((x, y, cx, cy, x + cx // 2, y + cy // 2))
        elif el.tag == sp_tag:
            if el.find(f".//{{{_NS_P}}}ph") is not None:
                continue
            cNv = el.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
            if cNv is not None and cNv.get("txBox") == "1":
                continue
            spPr = el.find(f"{{{_NS_P}}}spPr")
            if spPr is None:
                continue
            if spPr.find(f"{{{_NS_A}}}noFill") is not None:
                continue
            has_fill = (
                spPr.find(f".//{{{_NS_A}}}solidFill") is not None or
                spPr.find(f".//{{{_NS_A}}}gradFill")  is not None or
                spPr.find(f".//{{{_NS_A}}}pattFill")  is not None
            )
            if not has_fill:
                continue
            x, y, cx, cy = _get_grp_xfrm(el)
            if _MIN_DIM <= cx <= _MAX_CX and _MIN_DIM <= cy <= _MAX_CY:
                result.append((x, y, cx, cy, x + cx // 2, y + cy // 2))
    return result


def _label_font_pt(cx_emu: int, cy_emu: int, label: str) -> float:
    """
    Return the largest font size (pt) where *label* fits in a box of cx × cy EMU
    **without breaking a single word across lines**.

    Picks the largest pt such that the longest single word fits on one line
    AND the whole label fits within 2 visible lines vertically — at any size
    down to 7pt. The longest-word rule is what stops "Demonstrative" wrapping
    as "Demonstrat\\nive" inside narrow card-label boxes.
    """
    if cx_emu <= 0 or cy_emu <= 0 or not label:
        return 11.0
    cx_in = cx_emu / _EMU
    cy_in = cy_emu / _EMU
    effective_cx = max(cx_in - 0.2, 0.3)         # account for default lIns/rIns
    words = label.split()
    longest_word_len = max((len(w) for w in words), default=len(label))

    for pt in (14, 13, 12, 11, 10, 9, 8, 7):
        char_w_in = pt * 0.0095                  # bold avg char width per pt
        per_line  = effective_cx / char_w_in
        if per_line <= 0:
            continue
        if longest_word_len > per_line:
            continue                             # would split this word
        line_h_in = pt * 1.35 / 72.0
        n_lines   = math.ceil(len(label) / per_line)
        if n_lines * line_h_in <= cy_in * 0.92 and n_lines <= 2:
            return float(pt)
    return 7.0


def _visual_center_fill(txt_els: list, vis_els: list, items: list[dict]) -> None:
    """
    Assign item labels to text boxes via visual-element nearest-neighbour matching.

    For each text box, find the nearest visual shape (circle, bar, card, etc.) and
    group the text boxes by that shape.  Then sort shapes by reading order (top-left
    → bottom-right) and assign one item label per shape group.  Within each group
    the largest text box (by area) is the primary label slot; all others are blanked.
    Font size is calculated to fit the label within the primary box.

    txt_els  : (y, x, cx, cy, el) tuples  from _collect_bank_text_boxes or direct scan
    vis_els  : (x, y, cx, cy, center_x, center_y) from _collect_visual_shapes
    items    : [{"label": str}, ...] ordered list of content items
    """
    groups: dict[int, list] = {i: [] for i in range(len(vis_els))}
    for tup in txt_els:
        ty, tx, tcx, tcy, el = tup
        tc_x = tx + tcx // 2
        tc_y = ty + tcy // 2
        nearest = min(
            range(len(vis_els)),
            key=lambda i: (tc_x - vis_els[i][4]) ** 2 + (tc_y - vis_els[i][5]) ** 2,
        )
        groups[nearest].append(tup)

    sorted_vis = sorted(range(len(vis_els)),
                        key=lambda i: (vis_els[i][1], vis_els[i][0]))  # (y, x)

    for slot_i, vi in enumerate(sorted_vis):
        cluster = groups[vi]
        if slot_i < len(items) and cluster:
            cluster.sort(key=lambda t: t[2] * t[3], reverse=True)   # largest box first
            y, x, cx, cy, el = cluster[0]
            label = items[slot_i].get("label", "")
            pt    = _label_font_pt(cx, cy, label)
            _replace_sp_text(el, label, font_pt=pt, bold=True, center_align=True)
            for _, _, _, _, sp_el in cluster[1:]:
                _replace_sp_text(sp_el, "")
        else:
            for _, _, _, _, sp_el in cluster:
                _replace_sp_text(sp_el, "")


def _fill_bank_slide_by_tag(slide, items: list[dict]) -> bool:
    """
    Tag-based fill for the new tagged-infographics bank.

    Each shape that needs content has a placeholder text tag identifying its
    role and position:
        "Item 1", "Item 2", ... "Item N"  → label slots, ordered by tag number
        "1.", "2.", ...  OR  "1", "2", ...  → number badge slots
        "Text here"                       → generic placeholder (cleared if unused)

    For each item in *items*, find the shape with text exactly matching
    "Item {i+1}" (case-insensitive, whitespace-normalised) and replace its
    text with the item's label.  Number badge tags are left as-is when their
    index is within range, otherwise their text is cleared.  Unused "Item N"
    slots beyond len(items) have their text cleared.

    Honours the placeholder's existing <a:rPr sz> so font sizing is governed
    by the source designer, not by our rendering layer.

    Returns True if at least one item was placed; False if no Item tags
    were found (caller can fall back to the spatial filler).
    """
    import re as _re
    sp_tag = f"{{{_NS_P}}}sp"
    t_tag  = f"{{{_NS_A}}}t"
    item_re = _re.compile(r"^\s*Item\s+(\d+)\s*$", _re.IGNORECASE)
    num_re  = _re.compile(r"^\s*(\d+)\.?\s*$")

    # Collect every tagged sp on the slide.
    item_slots: dict[int, object] = {}    # idx (1-based) → sp element
    number_slots: dict[int, object] = {}  # idx (1-based) → sp element
    text_here_slots: list[object] = []

    for sp in slide.shapes._spTree.iter(sp_tag):
        text = "".join(t.text or "" for t in sp.iter(t_tag)).strip()
        if not text:
            continue
        m = item_re.match(text)
        if m:
            item_slots[int(m.group(1))] = sp
            continue
        m = num_re.match(text)
        if m and len(text) <= 4:
            number_slots[int(m.group(1))] = sp
            continue
        if text.lower() == "text here":
            text_here_slots.append(sp)

    if not item_slots:
        return False

    n_items = len(items)

    # Compute a single font size that makes EVERY label on this slide fit
    # cleanly (no mid-word breaks) given the narrowest item box on the slide.
    # Applies uniformly so all labels render at the same point size — matching
    # the source designer's intent for slide-wide consistency.
    uniform_pt = _compute_uniform_label_pt(item_slots, items)

    # Fill / clear "Item N" slots
    for idx, sp in item_slots.items():
        if idx <= n_items:
            label = items[idx - 1].get("label", "")
            if uniform_pt is not None:
                _force_first_run_font_size(sp, uniform_pt)
            _replace_text_keeping_run_style(sp, label)
        else:
            _clear_sp_text(sp)

    # Number badges: keep tags 1..n_items, clear the rest
    for idx, sp in number_slots.items():
        if idx > n_items:
            _clear_sp_text(sp)
        # else: leave the badge text as the source authored it (e.g. "1.")

    # Clear "Text here" generic placeholders so they don't render
    for sp in text_here_slots:
        _clear_sp_text(sp)

    return True


def _compute_uniform_label_pt(item_slots: dict, items: list[dict]) -> float | None:
    """
    Pick a font size that makes every item label fit in its slot box WIDTH
    AND HEIGHT — applied uniformly so all labels render at the same size.

    Strategy for each (slot, label) pair:
      • box_cx × 144 / longest_word_chars  →  max pt that prevents mid-word
        breaks (longest single word fits one line at this pt)
      • box_cx × 144 / total_chars         →  pt at which the whole label
        fits one line (best — no wrap)
      • If full-label-on-one-line pt ≥ 10pt, use it.  Otherwise, fall back to
        the longest-word constraint (allows wrap at word boundaries).
      • Also bound by box HEIGHT: label must fit in at most 2 wrapped lines
        within the box cy.  If 2 lines × line_height > box_cy, shrink further.

    Take the minimum across all pairs; clamp into [10pt, source_default_pt].
    """
    if not item_slots or not items:
        return None
    # Source default pt — read from any slot's first run rPr
    source_pt = 16.0
    for sp in item_slots.values():
        rPr = sp.find(f".//{{{_NS_A}}}rPr")
        if rPr is not None and rPr.get("sz"):
            try:
                source_pt = int(rPr.get("sz")) / 100.0
                break
            except Exception:
                pass

    SAFETY = 0.90
    LINE_SPACING = 1.15
    min_pt = source_pt
    for idx, sp in item_slots.items():
        if idx > len(items):
            continue
        label = (items[idx - 1].get("label") or "").strip()
        if not label:
            continue
        # Box geometry
        spPr = sp.find(f"{{{_NS_P}}}spPr")
        xf = spPr.find(f"{{{_NS_A}}}xfrm") if spPr is not None else None
        ext = xf.find(f"{{{_NS_A}}}ext") if xf is not None else None
        if ext is None:
            continue
        try:
            cx_in = int(ext.get("cx", 0)) / _EMU
            cy_in = int(ext.get("cy", 0)) / _EMU
        except Exception:
            continue
        if cx_in <= 0 or cy_in <= 0:
            continue

        n_chars = len(label)
        longest_word = max(label.split(), key=len, default=label)
        n_longword = max(len(longest_word), 1)

        # Width constraint: try whole-label-one-line first
        pt_full = (cx_in * 144 * SAFETY) / n_chars            # whole label on 1 line
        pt_word = (cx_in * 144 * SAFETY) / n_longword         # longest word on 1 line
        width_pt = pt_full if pt_full >= 10.0 else pt_word

        # Height constraint: label may wrap.  Cap so wrapped lines fit box cy.
        # n_lines_at(pt) ≈ ceil(n_chars × pt / 144 / box_cx)
        # height_used   = n_lines × pt × LINE_SPACING / 72
        # Solve so height_used <= cy_in × 0.95 (5% safety margin):
        max_lines = max(int(cy_in * 0.95 * 72 / (width_pt * LINE_SPACING)), 1)
        # If label needs more lines than fit, shrink width_pt until it does
        if max_lines < 2:
            # Box too short for 2 lines at width_pt — force 1-line fit
            width_pt = min(width_pt, pt_full)
            # Then iteratively reduce until 1 line fits
            while width_pt > 10.0:
                chars_per_line = cx_in * 144 * SAFETY / width_pt
                if n_chars <= chars_per_line:
                    break
                width_pt -= 0.5
        else:
            # 2 lines OK — width_pt already prevents mid-word breaks
            pass

        if width_pt < min_pt:
            min_pt = width_pt

    return max(10.0, min(min_pt, source_pt))


def _force_first_run_font_size(sp_el, pt: float) -> None:
    """Set sz on the first <a:rPr> inside an sp's txBody to *pt* (in points)."""
    txb = sp_el.find(f"{{{_NS_P}}}txBody")
    if txb is None:
        return
    rPr = txb.find(f".//{{{_NS_A}}}rPr")
    if rPr is None:
        return
    rPr.set("sz", str(int(pt * 100)))


def _clear_sp_text(sp_el) -> None:
    """Empty all text inside an sp's txBody while preserving the txBody itself."""
    from lxml import etree
    txb = sp_el.find(f"{{{_NS_P}}}txBody")
    if txb is None:
        return
    for p in list(txb.findall(f"{{{_NS_A}}}p")):
        txb.remove(p)
    # Add one empty <a:p> so the box still exists but is blank
    etree.SubElement(txb, f"{{{_NS_A}}}p")


def _fill_bank_slide_inplace(
    slide, items: list[dict], has_descriptions: bool
) -> None:
    """
    Fill content items into the bank slide's existing text slots in-place.
    Tries Pattern B (direct txBox sp) first, then Pattern A (group shapes).

    Pattern B clustering: text boxes that are visually co-located (dx < 0.5 in
    AND dy < 1.0 in from their neighbour) are treated as a label+desc pair for
    ONE item.  Only the first box in each cluster receives the label; the rest
    are removed (we never write descriptions into bank shapes).
    """
    spTree  = slide.shapes._spTree
    grp_tag = f"{{{_NS_P}}}grpSp"

    text_els = _collect_bank_text_boxes(spTree)  # (y, x, cx, cy, el) direct txBox

    if text_els:
        vis_els = _collect_visual_shapes(spTree)
        if vis_els and len(vis_els) >= len(items):
            _visual_center_fill(text_els, vis_els, items)
        else:
            clusters = _cluster_txboxes(text_els)
            for i, cluster in enumerate(clusters):
                if i < len(items):
                    y, x, cx, cy, el = cluster[0]
                    pt = _label_font_pt(cx, cy, items[i].get("label", ""))
                    _replace_sp_text(el, items[i].get("label", ""), font_pt=pt, bold=True, center_align=True)
                    for tup in cluster[1:]:
                        p = tup[4].getparent()
                        if p is not None:
                            p.remove(tup[4])
                else:
                    for tup in cluster:
                        p = tup[4].getparent()
                        if p is not None:
                            p.remove(tup[4])
    else:
        group_els = [el for el in spTree if el.tag == grp_tag]
        labels_only = [{"label": it.get("label", "")} for it in items]
        _fill_group_text(group_els, labels_only)


# ═══════════════════════════════════════════════════════════════════════════════
# Bank group extraction and embedding
# ═══════════════════════════════════════════════════════════════════════════════

def _count_usable_group_slots(bank_path: str, bank_slide_idx: int) -> int:
    """
    Count how many top-level group slots a bank slide actually provides for
    Pattern A (text-in-groups) filling.

    Applies the same Stage-1 and Stage-2 filters as _fill_group_text so the
    selector can verify a bank slide before committing to it.
    Returns 0 if the file can't be opened or has no groups.
    Also returns the count of direct txBox sp elements for Pattern B slides.
    """
    from pptx import Presentation

    _MIN_GRP_CX = int(0.7 * _EMU)
    _MIN_GRP_CY = int(0.5 * _EMU)
    _LARGE_VIS   = int(2.0 * _EMU)

    def _is_purely_visual_local(grp_el) -> bool:
        _sp_tag  = f"{{{_NS_P}}}sp"
        _grp_tag = f"{{{_NS_P}}}grpSp"
        for child in grp_el:
            if child.tag == _grp_tag:
                return False
            if child.tag == _sp_tag:
                cNvSpPr = child.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
                if cNvSpPr is not None and cNvSpPr.get("txBox") == "1":
                    return False
        return True

    try:
        prs  = Presentation(bank_path)
        spTree = prs.slides[bank_slide_idx].shapes._spTree
    except Exception:
        return 0

    grp_tag = f"{{{_NS_P}}}grpSp"
    sp_tag  = f"{{{_NS_P}}}sp"

    # Check Pattern B first: direct txBox sp elements in spTree
    _TITLE_Y   = int(0.7  * _EMU)
    _MAX_LBL_W = int(3.5  * _EMU)   # watermarks/title bars are wider than this
    direct_txbox = [
        el for el in spTree
        if el.tag == sp_tag
        and el.find(f".//{{{_NS_P}}}ph") is None
        and (lambda c: c is not None and c.get("txBox") == "1")(
            el.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
        )
        and 45_720 <= _get_grp_xfrm(el)[2] <= _MAX_LBL_W
        and _get_grp_xfrm(el)[1] >= _TITLE_Y
    ]
    if direct_txbox:
        # Cluster by 2D proximity — two boxes are co-located (label+desc pair for
        # ONE item) when consecutive sorted boxes have dx < 0.5in AND dy < 1.0in.
        _CLUSTER_DX = int(0.50 * _EMU)
        _CLUSTER_DY = int(1.00 * _EMU)
        positions = sorted((_get_grp_xfrm(el)[0], _get_grp_xfrm(el)[1]) for el in direct_txbox)
        n_clusters = 1
        for k in range(1, len(positions)):
            dx = abs(positions[k][0] - positions[k-1][0])
            dy = abs(positions[k][1] - positions[k-1][1])
            if dx >= _CLUSTER_DX or dy >= _CLUSTER_DY:
                n_clusters += 1
        return n_clusters

    # Pattern A: count usable top-level group slots
    group_els = [el for el in spTree if el.tag == grp_tag]

    sized = [
        g for g in group_els
        if _get_grp_xfrm(g)[2] >= _MIN_GRP_CX and _get_grp_xfrm(g)[3] >= _MIN_GRP_CY
    ]
    if not sized:
        sized = group_els

    non_visual = [
        g for g in sized
        if not (
            _get_grp_xfrm(g)[2] >= _LARGE_VIS
            and _get_grp_xfrm(g)[3] >= _LARGE_VIS
            and _is_purely_visual_local(g)
        )
    ]
    if not non_visual:
        non_visual = [
            g for g in group_els
            if not (
                _get_grp_xfrm(g)[2] >= _LARGE_VIS
                and _get_grp_xfrm(g)[3] >= _LARGE_VIS
                and _is_purely_visual_local(g)
            )
        ]
    if not non_visual:
        non_visual = group_els

    # If exactly one wrapper group remains, drill down to count the actual
    # per-item card sub-grpSp.  Without this, bank slides that wrap N cards
    # inside one or two outer grpSp layers report 1 usable slot instead of N.
    if len(non_visual) == 1:
        inner = [c for c in non_visual[0] if c.tag == grp_tag]
        if len(inner) == 1:
            # Double-wrapped (outer → 1 inner → N cards): go one more level.
            inner2 = [c for c in inner[0] if c.tag == grp_tag]
            if inner2:
                return len(inner2)
        if inner:
            return len(inner)

    return len(non_visual)


def _embed_bank_groups(
    dest_slide, bank_path: str, bank_slide_idx: int,
    items: list[dict], lower_zone: dict, brand_color: str,
    has_descriptions: bool = False,
) -> bool:
    """
    Extract infographic shapes from a bank slide and place them in the lower
    zone of *dest_slide*, colour-harmonised to *brand_color*.

    Bank slides use two patterns:
    A) Text-in-groups: the grpSp cards contain txBody sp children — legacy pattern.
    B) Separate text boxes: grpSp = visual decoration (icons); text lives in
       direct txBox sp children of the spTree at specific x/y positions.

    This function handles both by copying BOTH grpSp and the direct txBox sp
    elements, then filling text into whichever set has the content slots.
    """
    from pptx import Presentation

    try:
        prs        = Presentation(bank_path)
        bank_slide = prs.slides[bank_slide_idx]
    except Exception as exc:
        print(f"  [warn] Cannot open bank slide {bank_path}:{bank_slide_idx}: {exc!r}")
        return False

    dest_spTree = dest_slide.shapes._spTree
    bank_spTree = bank_slide.shapes._spTree

    grp_tag = f"{{{_NS_P}}}grpSp"
    sp_tag  = f"{{{_NS_P}}}sp"

    # ── Collect shapes from bank slide ────────────────────────────────────────
    visual_els = [el for el in bank_spTree if el.tag == grp_tag]
    text_els   = _collect_bank_text_boxes(bank_spTree)   # direct txBox sp shapes

    all_source_els = visual_els + [el for _, _, _, el in text_els]
    if not all_source_els:
        print(f"  [warn] No infographic shapes in {bank_path} slide {bank_slide_idx}")
        return False

    # ── Bounding box of ALL shapes (visual + text) ────────────────────────────
    orig_bounds = _shapes_bounding_box(all_source_els)
    if orig_bounds is None:
        return False
    orig_x, orig_y, orig_cx, orig_cy = orig_bounds

    # ── Scale to fit lower_zone ───────────────────────────────────────────────
    lz_x  = lower_zone["x"]  * _EMU
    lz_y  = lower_zone["y"]  * _EMU
    lz_cx = lower_zone["cx"] * _EMU
    lz_cy = lower_zone["cy"] * _EMU

    scale_x = lz_cx / orig_cx if orig_cx > 0 else 1.0
    scale_y = lz_cy / orig_cy if orig_cy > 0 else 1.0
    scale   = min(scale_x, scale_y, 1.2)

    off_x = int(lz_x - orig_x * scale)
    off_y = int(lz_y - orig_y * scale)

    # ── Deep-copy, reposition, colour-harmonise ───────────────────────────────
    new_visual_els = []
    for el in visual_els:
        new_el = copy.deepcopy(el)
        _rescale_and_reposition_group(new_el, scale, off_x, off_y)
        _harmonise_group_colors(new_el, brand_color)
        new_visual_els.append(new_el)

    # text_els tuples: (orig_y, orig_x, orig_cy, el)
    new_text_tuples = []
    for (oy, ox, ocy, el) in text_els:
        new_el = copy.deepcopy(el)
        _rescale_and_reposition_sp(new_el, scale, off_x, off_y)
        _harmonise_group_colors(new_el, brand_color)
        new_text_tuples.append((oy, ox, ocy, new_el))

    # Sort text boxes by (x, y, cy) — column-first ordering so each column's
    # label and description stay together as a pair.
    # (y, x) would cluster all labels across columns before all descriptions,
    # scrambling the label+desc pairing for stride=2 assignment.)
    new_text_tuples.sort(key=lambda t: (t[1], t[0], t[2]))
    new_text_els = [t[3] for t in new_text_tuples]

    # ── Fix off-screen: shift everything down if any shape bleeds above lz_y ─
    all_new_els = new_visual_els + new_text_els
    top_ys = [_get_shape_top_y(el) for el in all_new_els]
    min_top = min(top_ys) if top_ys else lz_y
    if min_top < lz_y:
        extra = int(lz_y - min_top)
        for el in all_new_els:
            _shift_shape_y(el, extra)

    # ── Append to dest spTree ─────────────────────────────────────────────────
    for el in new_visual_els:
        dest_spTree.append(el)
    for el in new_text_els:
        dest_spTree.append(el)

    # ── Fill text ─────────────────────────────────────────────────────────────
    if new_text_els:
        # Pattern B: fill direct text boxes (sorted by position)
        _fill_direct_text_boxes(new_text_els, items, has_descriptions)
    else:
        # Pattern A: fill text inside group shapes (legacy bank slides)
        appended_grps = [el for el in dest_spTree if el.tag == grp_tag]
        new_groups    = appended_grps[-len(visual_els):]
        _fill_group_text(new_groups, items)

    return True


def _shapes_bounding_box(els: list) -> tuple | None:
    """Return (min_x, min_y, total_cx, total_cy) in EMU for a mixed list of grpSp/sp."""
    min_x = min_y = float("inf")
    max_x = max_y = float("-inf")
    for el in els:
        x, y, cx, cy = _get_grp_xfrm(el)
        if cx == 0 and cy == 0:
            continue
        min_x = min(min_x, x)
        min_y = min(min_y, y)
        max_x = max(max_x, x + cx)
        max_y = max(max_y, y + cy)
    if min_x == float("inf"):
        return None
    return min_x, min_y, max_x - min_x, max_y - min_y


def _get_grp_xfrm(el) -> tuple[int, int, int, int]:
    """Return (x, y, cx, cy) in EMU for a grpSp or sp element."""
    # grpSp uses grpSpPr/xfrm; sp uses spPr/xfrm
    spPr_tags = [
        f"{{{_NS_P}}}grpSpPr",
        f"{{{_NS_P}}}spPr",
    ]
    for tag in spPr_tags:
        spPr = el.find(tag)
        if spPr is not None:
            xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
            if xfrm is not None:
                off = xfrm.find(f"{{{_NS_A}}}off")
                ext = xfrm.find(f"{{{_NS_A}}}ext")
                if off is not None and ext is not None:
                    x  = int(off.get("x", 0))
                    y  = int(off.get("y", 0))
                    cx = int(ext.get("cx", 0))
                    cy = int(ext.get("cy", 0))
                    return x, y, cx, cy
    return 0, 0, 0, 0


def _rescale_and_reposition_group(el, scale: float, off_x: int, off_y: int) -> None:
    """
    Reposition and scale a group shape by updating only its top-level off/ext.

    PPTX group coordinate model:
        grpSpPr/xfrm/off  — group position in parent (slide) space
        grpSpPr/xfrm/ext  — group size in parent space
        grpSpPr/xfrm/chOff — origin of child coordinate system (unchanged)
        grpSpPr/xfrm/chExt — size of child coordinate system (unchanged)

    Children use the child coordinate system.  The renderer maps them into
    parent space via:
        parent_x = off.x + (child_x - chOff.x) * (ext.cx / chExt.cx)

    So updating only off/ext (keeping chOff/chExt fixed) correctly scales and
    repositions all children without touching their own coordinates.
    """
    spPr_tag = f"{{{_NS_P}}}grpSpPr"
    spPr = el.find(spPr_tag)
    if spPr is None:
        return
    xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
    if xfrm is None:
        return
    off = xfrm.find(f"{{{_NS_A}}}off")
    ext = xfrm.find(f"{{{_NS_A}}}ext")
    if off is not None:
        x = int(off.get("x", 0))
        y = int(off.get("y", 0))
        off.set("x", str(int(x * scale + off_x)))
        off.set("y", str(int(y * scale + off_y)))
    if ext is not None:
        cx = int(ext.get("cx", 0))
        cy = int(ext.get("cy", 0))
        ext.set("cx", str(int(cx * scale)))
        ext.set("cy", str(int(cy * scale)))


def _min_child_y_in_parent_space(grp_el) -> float:
    """
    Return the minimum y in SLIDE (parent) space occupied by any direct child
    of *grp_el* after it has been repositioned.

    Children whose child-space y < chOff.y map to ABOVE the group's off.y in
    slide space — causing them to bleed into the intro-text zone.  This function
    finds the worst-case upward extension so callers can shift groups down.
    """
    spPr = grp_el.find(f"{{{_NS_P}}}grpSpPr")
    if spPr is None:
        return float("inf")
    xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
    if xfrm is None:
        return float("inf")

    off_el   = xfrm.find(f"{{{_NS_A}}}off")
    ext_el   = xfrm.find(f"{{{_NS_A}}}ext")
    chOff_el = xfrm.find(f"{{{_NS_A}}}chOff")
    chExt_el = xfrm.find(f"{{{_NS_A}}}chExt")

    if off_el is None:
        return float("inf")

    grp_y    = int(off_el.get("y", 0))
    grp_cy   = int(ext_el.get("cy", 0))   if ext_el   is not None else 0
    chOff_y  = int(chOff_el.get("y", 0))  if chOff_el is not None else 0
    chExt_cy = int(chExt_el.get("cy", 0)) if chExt_el is not None else grp_cy

    if chExt_cy == 0:
        return float(grp_y)

    scale_y = grp_cy / chExt_cy
    min_y   = float(grp_y)

    for child in grp_el:
        child_spPr_tag = (f"{{{_NS_P}}}grpSpPr"
                          if child.tag == f"{{{_NS_P}}}grpSp"
                          else f"{{{_NS_P}}}spPr")
        child_spPr = child.find(child_spPr_tag)
        if child_spPr is None:
            continue
        child_xfrm = child_spPr.find(f"{{{_NS_A}}}xfrm")
        if child_xfrm is None:
            continue
        child_off = child_xfrm.find(f"{{{_NS_A}}}off")
        if child_off is None:
            continue
        c_y      = int(child_off.get("y", 0))
        parent_y = grp_y + (c_y - chOff_y) * scale_y
        min_y    = min(min_y, parent_y)

    return min_y


def _shift_group_y(grp_el, delta_y: int) -> None:
    """Shift a repositioned grpSp downward by *delta_y* EMU (updates off.y only)."""
    spPr = grp_el.find(f"{{{_NS_P}}}grpSpPr")
    if spPr is None:
        return
    xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
    if xfrm is None:
        return
    off = xfrm.find(f"{{{_NS_A}}}off")
    if off is not None:
        off.set("y", str(int(off.get("y", 0)) + delta_y))


def _get_shape_top_y(el) -> float:
    """Return the top y coordinate (EMU) of a grpSp or sp element in slide space."""
    x, y, cx, cy = _get_grp_xfrm(el)
    return float(y)


def _shift_shape_y(el, delta_y: int) -> None:
    """Shift any shape element (grpSp or sp) downward by *delta_y* EMU."""
    grp_tag = f"{{{_NS_P}}}grpSp"
    if el.tag == grp_tag:
        _shift_group_y(el, delta_y)
        return
    spPr = el.find(f"{{{_NS_P}}}spPr")
    if spPr is None:
        return
    xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
    if xfrm is None:
        return
    off = xfrm.find(f"{{{_NS_A}}}off")
    if off is not None:
        off.set("y", str(int(off.get("y", 0)) + delta_y))


def _rescale_and_reposition_sp(el, scale: float, off_x: int, off_y: int) -> None:
    """Rescale and reposition a direct sp element (not a group)."""
    spPr = el.find(f"{{{_NS_P}}}spPr")
    if spPr is None:
        return
    xfrm = spPr.find(f"{{{_NS_A}}}xfrm")
    if xfrm is None:
        return
    off = xfrm.find(f"{{{_NS_A}}}off")
    ext = xfrm.find(f"{{{_NS_A}}}ext")
    if off is not None:
        x = int(off.get("x", 0))
        y = int(off.get("y", 0))
        off.set("x", str(int(x * scale + off_x)))
        off.set("y", str(int(y * scale + off_y)))
    if ext is not None:
        cx = int(ext.get("cx", 0))
        cy = int(ext.get("cy", 0))
        ext.set("cx", str(int(cx * scale)))
        if cy > 0:
            ext.set("cy", str(int(cy * scale)))
        # cy==0 means auto-height (spAutoFit) — leave as-is


def _collect_bank_text_boxes(bank_spTree) -> list:
    """
    Collect direct txBox sp elements from a bank slide's spTree that are:
      - txBox=True (explicit text boxes, not auto-shapes)
      - Not in title zone (y >= 0.7in = 640,080 EMU)
      - Not zero-width (cx > 0)

    Returns list of (orig_y, orig_x, orig_cy, el) tuples for position-sorted
    assignment.
    """
    _TITLE_ZONE_EMU = int(0.7 * _EMU)
    sp_tag = f"{{{_NS_P}}}sp"
    result = []

    for el in bank_spTree:
        if el.tag != sp_tag:
            continue
        # Skip placeholders (title/body placeholders from the bank template)
        if el.find(f".//{{{_NS_P}}}ph") is not None:
            continue
        # Must be a txBox
        nvSpPr = el.find(f"{{{_NS_P}}}nvSpPr")
        if nvSpPr is None:
            continue
        cNvSpPr = nvSpPr.find(f"{{{_NS_P}}}cNvSpPr")
        if cNvSpPr is None or cNvSpPr.get("txBox") != "1":
            continue
        # Must have txBody
        if el.find(f"{{{_NS_P}}}txBody") is None:
            continue
        # Get position
        x, y, cx, cy = _get_grp_xfrm(el)
        # Skip title-zone shapes
        if y < _TITLE_ZONE_EMU:
            continue
        # Skip degenerate shapes (cx < 0.05in = 45,720 EMU). Some bank
        # slides store near-zero cx values (1 EMU) for connector shapes.
        if cx < 45_720:
            continue
        result.append((y, x, cx, cy, el))

    return result


def _fill_direct_text_boxes(
    sorted_text_els: list, items: list[dict], has_descriptions: bool
) -> None:
    """
    Fill content items into direct text box sp elements sorted by (y, x, cy).

    With has_descriptions=True:  pairs → (label, description) per item
    With has_descriptions=False: singles → label per item
    Unused text boxes beyond n_items are removed from the slide.
    """
    if not sorted_text_els or not items:
        return

    stride = 2 if has_descriptions else 1
    n_items = len(items)

    for i, item in enumerate(items):
        base = i * stride
        if base >= len(sorted_text_els):
            break
        label = item.get("label", "")
        desc  = item.get("description", "")
        _replace_sp_text(sorted_text_els[base], label, bold=True, center_align=True)
        if has_descriptions and base + 1 < len(sorted_text_els):
            _replace_sp_text(sorted_text_els[base + 1], desc)

    # Remove unused text boxes
    used_count = min(n_items * stride, len(sorted_text_els))
    for el in sorted_text_els[used_count:]:
        parent = el.getparent()
        if parent is not None:
            parent.remove(el)


def _harmonise_group_colors(el, brand_color: str) -> None:
    """
    Replace the dominant accent colour in bank group shapes with the template
    brand colour.

    Two paths:
    A) Bank uses srgbClr (explicit hex) — find the most common non-neutral fill
       and swap it for brand_color.
    B) Bank uses schemeClr (theme-relative) — shapes become invisible when the
       XML is pasted into a different theme.  Replace accent1-6 scheme colours
       with brand_color as a flat srgbClr so the shapes always render visibly.
    """
    from lxml import etree
    from collections import Counter

    target = brand_color.upper()

    # ── Path A: srgbClr fills ────────────────────────────────────────────────
    all_fills: list[str] = []
    for srgb in el.iter(f"{{{_NS_A}}}srgbClr"):
        val = srgb.get("val", "").upper()
        if len(val) == 6:
            r, g, b = int(val[0:2],16), int(val[2:4],16), int(val[4:6],16)
            brightness = (r + g + b) / 3
            saturation = max(r, g, b) - min(r, g, b)
            if brightness > 230 or brightness < 25 or saturation < 30:
                continue
            all_fills.append(val)

    if all_fills:
        bank_accent = Counter(all_fills).most_common(1)[0][0]
        if bank_accent != target:
            for srgb in el.iter(f"{{{_NS_A}}}srgbClr"):
                if srgb.get("val", "").upper() == bank_accent:
                    srgb.set("val", target)
        # Don't return — schemeClr fills may coexist with srgbClr fills

    # ── Path B: schemeClr fills — replace accent1-6 with brand_color ────────
    # Shapes that use theme-relative colours render as the template theme colour,
    # which is often white on a white background → invisible.  Replacing accent
    # scheme slots with a flat srgbClr restores visibility.
    _ACCENT_SCHEMES = {"accent1", "accent2", "accent3", "accent4", "accent5", "accent6"}
    for schemeClr in list(el.iter(f"{{{_NS_A}}}schemeClr")):
        if schemeClr.get("val", "") not in _ACCENT_SCHEMES:
            continue
        parent = schemeClr.getparent()
        if parent is None:
            continue
        idx = list(parent).index(schemeClr)
        parent.remove(schemeClr)
        new_srgb = etree.Element(f"{{{_NS_A}}}srgbClr")
        new_srgb.set("val", target)
        parent.insert(idx, new_srgb)


def _fill_group_text(group_els: list, items: list[dict]) -> None:
    """
    Write item labels and descriptions into text boxes in bank group shapes.

    Handles three bank layouts:
    A) 1 top-level group with N direct txBox sp as labels (pointer/multi type)
       → assign items[i].label directly to sp[i] sorted by (y, x)
    B) 1 top-level group with N sub-grpSp as cards
       → descend into sub-groups, one per item
    C) N top-level groups as cards (flat card layout)
       → assign items[i] to group[i]

    Unused group slots are blanked so bank placeholder text does not bleed through.
    Portrait-oriented text boxes (cx < cy) get truncated text to prevent vertical
    character-by-character wrapping.
    """
    if not group_els or not items:
        return

    grp_tag = f"{{{_NS_P}}}grpSp"
    sp_tag  = f"{{{_NS_P}}}sp"

    def _portrait_safe_label(sp_el, label: str) -> str:
        """Truncate label if the text box is portrait-oriented (narrow column)."""
        _, _, cx, cy = _get_grp_xfrm(sp_el)
        if cx > 0 and cy > 0 and cx < cy * 0.75:
            words = label.split()
            return " ".join(words[:3])
        return label

    def _find_icon_shape(text_sps: list):
        """
        Return the best candidate auto-shape to display a label inside a circle.

        Looks for non-txBox shapes among text_sps that:
          - Have a solid colour fill (coloured icon circle, not transparent bg)
          - Are roughly square (0.5 ≤ cx/cy ≤ 2.0) in child-space
          - Larger than tiny (both dims > _MIN_TEXT_DIM)
        Returns (shape_el, area) for the largest passing candidate, or None.
        """
        _A_SOLID = f"{{{_NS_A}}}solidFill"
        candidates = []
        for sp in text_sps:
            cNvSpPr = sp.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
            if cNvSpPr is not None and cNvSpPr.get("txBox") == "1":
                continue   # skip txBox, only want auto-shapes
            spPr = sp.find(f"{{{_NS_P}}}spPr")
            if spPr is None:
                continue
            # Must have a solid colour fill so text is visible against background
            if spPr.find(f".//{_A_SOLID}") is None:
                continue
            _, _, cx, cy = _get_grp_xfrm(sp)
            if cx < 100_000 or cy < 100_000:
                continue  # tiny icon glyph, skip
            aspect = cx / cy if cy > 0 else 0
            if not (0.4 <= aspect <= 2.5):
                continue  # wide background rectangles are excluded
            candidates.append((cx * cy, len(candidates), sp))
        if not candidates:
            return None
        candidates.sort(key=lambda t: t[0], reverse=True)
        return candidates[0][2]

    def _write_label_into_icon(sp_el, label: str) -> None:
        """Write label text centred in white inside an icon auto-shape."""
        from lxml import etree
        txb = sp_el.find(f"{{{_NS_P}}}txBody")
        if txb is None:
            return
        bodyPr = txb.find(f"{{{_NS_A}}}bodyPr")
        if bodyPr is not None:
            bodyPr.attrib.pop("vert", None)
            bodyPr.set("anchor", "ctr")
            bodyPr.set("anchorCtr", "0")
            bodyPr.set("wrap", "square")
            for tag in ("noAutofit", "spAutoFit", "normAutofit"):
                el = bodyPr.find(f"{{{_NS_A}}}{tag}")
                if el is not None:
                    bodyPr.remove(el)
            bodyPr.append(etree.Element(f"{{{_NS_A}}}normAutofit"))
        for p_el in list(txb.findall(f"{{{_NS_A}}}p")):
            txb.remove(p_el)
        new_p = etree.SubElement(txb, f"{{{_NS_A}}}p")
        pPr   = etree.SubElement(new_p, f"{{{_NS_A}}}pPr")
        pPr.set("algn", "ctr")
        new_r = etree.SubElement(new_p, f"{{{_NS_A}}}r")
        rPr   = etree.SubElement(new_r, f"{{{_NS_A}}}rPr")
        rPr.set("lang", "en-US")
        rPr.set("sz", "1200")   # 12 pt start; normAutofit shrinks if needed
        rPr.set("b", "1")
        rPr.set("dirty", "0")
        sf  = etree.SubElement(rPr, f"{{{_NS_A}}}solidFill")
        clr = etree.SubElement(sf,  f"{{{_NS_A}}}srgbClr")
        clr.set("val", "FFFFFF")
        new_t      = etree.SubElement(new_r, f"{{{_NS_A}}}t")
        new_t.text = label

    if len(group_els) == 1:
        top = group_els[0]

        def _is_txbox_child(child):
            nvSpPr = child.find(f"{{{_NS_P}}}nvSpPr")
            if nvSpPr is None:
                return False
            cNv = nvSpPr.find(f"{{{_NS_P}}}cNvSpPr")
            return cNv is not None and cNv.get("txBox") == "1"

        # Collect direct txBox sp children as (y, x, cx, cy, child).
        direct_txbox = []
        for child in top:
            if child.tag != sp_tag:
                continue
            if child.find(f"{{{_NS_P}}}txBody") is None:
                continue
            if _is_txbox_child(child):
                x, y, cx, cy = _get_grp_xfrm(child)
                direct_txbox.append((y, x, cx, cy, child))

        # Strip infographic header: the topmost direct txBox that is ALONE at its
        # y-level (no other txBox within 0.3in) AND separated from the next batch
        # of txBoxes by ≥ 0.5in.  Such a box is a visual title/header of the
        # infographic design, not a content slot.
        if direct_txbox and len(direct_txbox) > len(items):
            min_y = min(t[0] for t in direct_txbox)
            _Y_TOL = int(0.3 * _EMU)
            at_min = [t for t in direct_txbox if abs(t[0] - min_y) < _Y_TOL]
            if len(at_min) == 1:
                rest_ys = [t[0] for t in direct_txbox if abs(t[0] - min_y) >= _Y_TOL]
                if rest_ys and (min(rest_ys) - min_y) >= int(0.5 * _EMU):
                    _replace_sp_text(at_min[0][4], "")
                    hdr_id = id(at_min[0][4])
                    direct_txbox = [t for t in direct_txbox if id(t[4]) != hdr_id]

        # Sub-grpSp children of the outer group.
        sub_groups = [child for child in top if child.tag == grp_tag]

        # AI-bank pattern: one visual sub-grpSp whose direct children include the
        # label txBoxes, while the outer grp's direct txBoxes are description slots.
        # Detect this when exactly 1 sub-grpSp contains ≥ n_items txBox children.
        if len(sub_groups) == 1:
            inner_labels = []
            for child in sub_groups[0]:
                if child.tag == sp_tag and _is_txbox_child(child):
                    x, y, cx, cy = _get_grp_xfrm(child)
                    inner_labels.append((y, x, cx, cy, child))

            if len(inner_labels) >= len(items):
                # Labels live inside the sub-grpSp; direct txboxes are desc slots.
                inner_sorted  = sorted(inner_labels,  key=lambda t: t[1])  # sort by local x
                direct_sorted = sorted(direct_txbox,  key=lambda t: t[1])
                # Uniform font size across all inner label slots
                _inner_u_pt: float | None = None
                for _i2, (_y2, _x2, _cx2, _cy2, _el2) in enumerate(inner_sorted[:len(items)]):
                    _pt2 = _label_font_pt(_cx2, _cy2, items[_i2].get("label", ""))
                    _inner_u_pt = _pt2 if _inner_u_pt is None else min(_inner_u_pt, _pt2)
                for i, item in enumerate(items):
                    label = _portrait_safe_label(
                        inner_sorted[i][4] if i < len(inner_sorted) else inner_sorted[0][4],
                        item.get("label", ""),
                    )
                    desc = item.get("description", "")
                    if i < len(inner_sorted):
                        _, _, cx, cy, el = inner_sorted[i]
                        _replace_sp_text(el, label, font_pt=_inner_u_pt or _label_font_pt(cx, cy, label), bold=True, center_align=True)
                    if i < len(direct_sorted):
                        _replace_sp_text(direct_sorted[i][4], desc)
                for j in range(len(items), len(inner_sorted)):
                    _replace_sp_text(inner_sorted[j][4], "")
                for j in range(len(items), len(direct_sorted)):
                    _replace_sp_text(direct_sorted[j][4], "")
                return

            # Double-wrap pattern: outer → 1 wrapper → N card sub-grpSp.
            # The single inner group has no txBox children but contains the actual
            # per-item card containers as its own sub-grpSp.  Unwrap one level so
            # Pattern C can handle those N cards.
            deep_groups = [c for c in sub_groups[0] if c.tag == grp_tag]
            if len(deep_groups) >= 2:
                group_els = deep_groups

        if len(direct_txbox) >= len(items):
            # Pre-filter 1: drop very thin connector-label bars when taller shapes
            # already supply enough slots.  Thin bars (cy < 0.45 in) sit between
            # card rows in 2-row layouts and are not content slots.
            _THIN_CY = int(0.45 * _EMU)
            _thin = [t for t in direct_txbox if t[3] < _THIN_CY]
            _large = [t for t in direct_txbox if t[3] >= _THIN_CY]
            if _thin and len(_large) >= len(items):
                for t in _thin:
                    _replace_sp_text(t[4], "")
                direct_txbox = _large

            # Pre-filter 2: drop a right-side outlier shape whose x-gap from its
            # nearest left neighbour exceeds 1.5 in, when removing it still leaves
            # enough shapes.  This removes sidebar/description panels that have been
            # mixed into the label pool.
            _X_GAP_OUTLIER = int(1.5 * _EMU)
            _srt_x = sorted(direct_txbox, key=lambda t: t[1])
            if len(_srt_x) >= 2:
                _rx_gap = _srt_x[-1][1] - _srt_x[-2][1]
                if _rx_gap > _X_GAP_OUTLIER and len(_srt_x) - 1 >= len(items):
                    _replace_sp_text(_srt_x[-1][4], "")
                    direct_txbox = _srt_x[:-1]

            # Cluster text boxes by 2D proximity: boxes within _CL_DX/DY of each
            # other represent the same visual slot (e.g. label + secondary line for
            # one circle/arrow).  Sort clusters by (avg_y, avg_x) reading order and
            # assign one item per cluster.
            _CL_DX = int(0.6 * _EMU)
            _CL_DY = int(0.8 * _EMU)
            sorted_t = sorted(direct_txbox, key=lambda t: (t[1], t[0]))
            clusters: list[list] = [[sorted_t[0]]]
            for tup in sorted_t[1:]:
                prev = clusters[-1][-1]
                if abs(tup[1] - prev[1]) < _CL_DX and abs(tup[0] - prev[0]) < _CL_DY:
                    clusters[-1].append(tup)
                else:
                    clusters.append([tup])
            clusters.sort(key=lambda cl: (
                sum(t[0] for t in cl) / len(cl),
                sum(t[1] for t in cl) / len(cl),
            ))
            for i, cluster in enumerate(clusters):
                if i < len(items):
                    y, x, cx, cy, el = cluster[0]
                    label = _portrait_safe_label(el, items[i].get("label", ""))
                    pt    = _label_font_pt(cx, cy, label)
                    _replace_sp_text(el, label, font_pt=pt, bold=True, center_align=True)
                    for _, _, _, _, sp_el in cluster[1:]:
                        _replace_sp_text(sp_el, "")
                else:
                    for _, _, _, _, sp_el in cluster:
                        _replace_sp_text(sp_el, "")
            return

        # Pattern B: multiple sub-grpSp children are the card containers
        if len(sub_groups) >= 2:
            group_els = sub_groups

    # Pattern C (and B after reassignment): each group = one item
    # Three-stage filtering:
    # Stage 1 (size): dot/connector groups smaller than 0.7 × 0.5 in are never
    #   content slots — filter them out unconditionally.
    # Stage 2 (visual-only): large groups (> 2 × 2 in) that contain no txBox
    #   children and no sub-grpSp are purely visual icon circles — skip them.
    # Stage 3 (content): among remaining groups, prefer ones where at least
    #   one text box contains > 2 chars (real content, not icon glyphs).
    #   If all size-passing groups still appear empty (bank template has blank
    #   placeholder slots), use all size-passing groups as-is.
    _MIN_GRP_CX = int(0.7 * _EMU)   # 0.7 in
    _MIN_GRP_CY = int(0.5 * _EMU)   # 0.5 in
    _LARGE_VISUAL_THRESHOLD = int(2.0 * _EMU)  # 2.0 in

    def _has_real_text(grp_el) -> bool:
        sps = _collect_text_sps_in_group(grp_el)
        for sp in sps:
            txb = sp.find(f"{{{_NS_P}}}txBody")
            if txb is None:
                continue
            t = "".join(el.text or "" for el in txb.iter(f"{{{_NS_A}}}t")).strip()
            if len(t) > 2:
                return True
        return False

    def _is_purely_visual(grp_el) -> bool:
        """True if group has no txBox children and no sub-grpSp — purely decorative."""
        _sp_tag  = f"{{{_NS_P}}}sp"
        _grp_tag = f"{{{_NS_P}}}grpSp"
        for child in grp_el:
            if child.tag == _grp_tag:
                return False  # has sub-groups → likely content layout
            if child.tag == _sp_tag:
                cNvSpPr = child.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
                if cNvSpPr is not None and cNvSpPr.get("txBox") == "1":
                    return False  # has txBox child → not purely visual
        return True

    def _grp_pos(el):
        x, y, _, _ = _get_grp_xfrm(el)
        return (y, x)

    # Stage 1: discard shapes too small to hold content
    sized_groups = [
        g for g in group_els
        if _get_grp_xfrm(g)[2] >= _MIN_GRP_CX and _get_grp_xfrm(g)[3] >= _MIN_GRP_CY
    ]
    if not sized_groups:
        sized_groups = group_els   # safety fallback (shouldn't happen)

    # Stage 2: remove large purely-visual icon groups (no txBox, no sub-grpSp)
    non_visual_groups = [
        g for g in sized_groups
        if not (
            _get_grp_xfrm(g)[2] >= _LARGE_VISUAL_THRESHOLD
            and _get_grp_xfrm(g)[3] >= _LARGE_VISUAL_THRESHOLD
            and _is_purely_visual(g)
        )
    ]
    if not non_visual_groups:
        # All Stage-1-passing groups were purely visual decorations.
        # The actual content slots are thin rows that failed Stage 1 —
        # fall back to ALL groups minus large purely-visual ones.
        non_visual_groups = [
            g for g in group_els
            if not (
                _get_grp_xfrm(g)[2] >= _LARGE_VISUAL_THRESHOLD
                and _get_grp_xfrm(g)[3] >= _LARGE_VISUAL_THRESHOLD
                and _is_purely_visual(g)
            )
        ]
        if not non_visual_groups:
            non_visual_groups = group_els  # absolute last resort

    # Stage 3: prefer groups with real text content; fall back to all non-visual groups
    # FIX: Don't filter out empty groups - they need to be filled with content!
    # The old logic skipped empty groups, leaving them blank in the output.
    content_groups = [g for g in non_visual_groups if _has_real_text(g)]
    if not content_groups:
        # FIX: Use ALL non-visual groups, not just ones with existing text
        # Empty groups are VALID content slots that need to be filled
        content_groups = non_visual_groups

    sorted_groups = sorted(content_groups, key=_grp_pos)

    # Pre-compute uniform font size: scan every writable slot, take the minimum
    # so all labels on the same infographic render at the same point size.
    def _is_txbox_sp(sp_el):
        c = sp_el.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
        return c is not None and c.get("txBox") == "1"

    uniform_pt: float | None = None
    _tmp_idx = 0
    for _g in sorted_groups:
        _sps = _collect_text_sps_in_group(_g)
        if not _sps:
            continue
        if _tmp_idx >= len(items):
            break
        _lbl = items[_tmp_idx].get("label", "")
        _txb = [s for s in _sps if _is_txbox_sp(s)]
        _ref = _txb[0] if _txb else _sps[0]
        _, _, _cx, _cy = _get_grp_xfrm(_ref)
        _pt = _label_font_pt(_cx, _cy, _lbl)
        uniform_pt = _pt if uniform_pt is None else min(uniform_pt, _pt)
        _tmp_idx += 1

    # Use a separate item counter so visual-only groups (no text slots) are
    # skipped without consuming an item index.  Previously, using enumerate()
    # meant that if N visual groups appear before the label groups in sort order,
    # those label groups got indices >= N and their text boxes were removed as
    # "unused" even when N == len(items).
    item_idx = 0
    for grp_el in sorted_groups:
        text_sps = _collect_text_sps_in_group(grp_el)
        if not text_sps:
            continue  # visual-only group — skip without consuming an item slot

        if item_idx >= len(items):
            # Remove unused text slots — empty boxes with fills render as lines.
            for sp in text_sps:
                parent = sp.getparent()
                if parent is not None:
                    parent.remove(sp)
            continue

        item  = items[item_idx]
        item_idx += 1
        desc  = item.get("description", "")

        # Separate txBox slots (label / desc) from auto-shape icon candidates
        txbox_sps = [
            sp for sp in text_sps
            if (lambda c: c is not None and c.get("txBox") == "1")(
                sp.find(f"{{{_NS_P}}}nvSpPr/{{{_NS_P}}}cNvSpPr")
            )
        ]

        if txbox_sps:
            # Always write the label into the txBox (the arrow/label slot).
            # Avoid writing into icon auto-shapes — they have images on top that
            # would cover the text (e.g. hexagon+arrow layouts).
            _, _, cx, cy = _get_grp_xfrm(txbox_sps[0])
            label = _portrait_safe_label(txbox_sps[0], item.get("label", ""))
            pt    = uniform_pt if uniform_pt else _label_font_pt(cx, cy, label)
            _replace_sp_text(txbox_sps[0], label, font_pt=pt, bold=True, center_align=True)
            if len(txbox_sps) >= 2:
                if desc:
                    _replace_sp_text(txbox_sps[1], desc)
                else:
                    p = txbox_sps[1].getparent()
                    if p is not None:
                        p.remove(txbox_sps[1])
        else:
            # No txBox found — try writing the label inside a coloured icon/circle
            # auto-shape (e.g. rounded badge, circle).  Only correct when no txBox
            # exists because auto-shapes may carry icon images on top.
            icon_sp = _find_icon_shape(text_sps)
            if icon_sp is not None:
                _write_label_into_icon(icon_sp, item.get("label", ""))
            else:
                label = _portrait_safe_label(text_sps[0], item.get("label", ""))
                _, _, cx, cy = _get_grp_xfrm(text_sps[0])
                pt = uniform_pt if uniform_pt else _label_font_pt(cx, cy, label)
                _replace_sp_text(text_sps[0], label, font_pt=pt, bold=True, center_align=True)
                if desc and len(text_sps) >= 2:
                    _replace_sp_text(text_sps[1], desc)
                elif len(text_sps) >= 2:
                    p = text_sps[1].getparent()
                    if p is not None:
                        p.remove(text_sps[1])


def _collect_text_sps_in_group(grp_el) -> list:
    """Recursively collect sp elements with txBody, sorted by y in child space.

    Filtering rules:
    - Skip shapes whose BOTH dimensions are < 100 000 EMU (~0.11 in). These are
      decorative icon/bullet shapes that happen to carry an empty txBody; writing
      content into them causes catastrophic vertical text overflow.
    - txBox=1 shapes sort before auto-shapes so label slots come before desc slots.
    
    FIX: Reduced minimum dimension from 100k to 50k EMU to catch more text boxes.
    Many bank slides have smaller text boxes that were being skipped.
    """
    _MIN_TEXT_DIM = 50_000   # EMU; reduced from 100k to catch smaller text boxes

    results = []
    sp_tag  = f"{{{_NS_P}}}sp"
    grp_tag = f"{{{_NS_P}}}grpSp"

    def _recurse(el):
        for child in el:
            if child.tag == sp_tag:
                txb = child.find(f"{{{_NS_P}}}txBody")
                if txb is None:
                    continue
                _, y, cx, cy = _get_grp_xfrm(child)
                # Skip icon-sized shapes — writing text into them causes overflow
                # FIX: Changed from AND to OR - skip only if BOTH dimensions are tiny
                if cx > 0 and cy > 0 and cx < _MIN_TEXT_DIM and cy < _MIN_TEXT_DIM:
                    continue
                nvSpPr = child.find(f"{{{_NS_P}}}nvSpPr")
                is_txbox = False
                if nvSpPr is not None:
                    cNvSpPr = nvSpPr.find(f"{{{_NS_P}}}cNvSpPr")
                    is_txbox = cNvSpPr is not None and cNvSpPr.get("txBox") == "1"
                results.append((not is_txbox, y, child))
            elif child.tag == grp_tag:
                _recurse(child)

    _recurse(grp_el)
    results.sort(key=lambda t: (t[0], t[1]))
    return [sp for _, _, sp in results]


def _replace_sp_text(
    sp_el, new_text: str, autofit: bool = True, font_pt: float = None,
    bold: bool | None = None, center_align: bool = False,
) -> None:
    """
    Replace ALL text in a bank shape's txBody with *new_text*.

    font_pt:      if given, overrides the run's sz with this point size.
    bold:         if True/False, forces bold on/off; None preserves original.
    center_align: if True, sets algn="ctr" on the paragraph.
    """
    from lxml import etree

    txb = sp_el.find(f"{{{_NS_P}}}txBody")
    if txb is None:
        return

    # Strip vertical-text orientation — some bank shapes carry vert="vert" or
    # vert="vert270" on bodyPr which makes every character render top-to-bottom.
    bodyPr = txb.find(f"{{{_NS_A}}}bodyPr")
    if bodyPr is not None:
        bodyPr.attrib.pop("vert", None)
        if autofit:
            # Remove any existing autofit child, then add normAutofit so
            # PowerPoint shrinks the font to fit the fixed-size text box.
            for tag in ("noAutofit", "spAutoFit", "normAutofit"):
                el = bodyPr.find(f"{{{_NS_A}}}{tag}")
                if el is not None:
                    bodyPr.remove(el)
            bodyPr.append(etree.Element(f"{{{_NS_A}}}normAutofit"))

    paras = txb.findall(f"{{{_NS_A}}}p")

    # Extract pPr from the first paragraph (alignment, spacing, indent, etc.)
    first_pPr = paras[0].find(f"{{{_NS_A}}}pPr") if paras else None

    # Extract rPr from the very first run anywhere in the txBody for formatting.
    first_rPr = None
    for p_el in paras:
        for r_el in p_el.findall(f"{{{_NS_A}}}r"):
            first_rPr = r_el.find(f"{{{_NS_A}}}rPr")
            break
        if first_rPr is not None:
            break

    # Remove ALL paragraphs — we rebuild from scratch.
    for p_el in list(paras):
        txb.remove(p_el)

    # Re-create one clean paragraph with new text.
    new_p = etree.SubElement(txb, f"{{{_NS_A}}}p")

    # Restore paragraph properties (alignment, indent, spacing).
    pPr_copy = copy.deepcopy(first_pPr) if first_pPr is not None else None
    if center_align:
        if pPr_copy is None:
            pPr_copy = etree.Element(f"{{{_NS_A}}}pPr")
        pPr_copy.set("algn", "ctr")
    if pPr_copy is not None:
        new_p.append(pPr_copy)

    new_r  = etree.SubElement(new_p, f"{{{_NS_A}}}r")
    if first_rPr is not None:
        rPr_copy = copy.deepcopy(first_rPr)
        if font_pt is not None:
            rPr_copy.set("sz", str(int(font_pt * 100)))
            rPr_copy.attrib.pop("dirty", None)
        if bold is not None:
            rPr_copy.set("b", "1" if bold else "0")
        new_r.append(rPr_copy)
    elif font_pt is not None or bold is not None:
        rPr = etree.SubElement(new_r, f"{{{_NS_A}}}rPr")
        rPr.set("lang", "en-GB")
        if font_pt is not None:
            rPr.set("sz", str(int(font_pt * 100)))
        if bold is not None:
            rPr.set("b", "1" if bold else "0")
        rPr.set("dirty", "0")
    new_t  = etree.SubElement(new_r, f"{{{_NS_A}}}t")
    new_t.text = new_text


# ═══════════════════════════════════════════════════════════════════════════════
# Body text writers
# ═══════════════════════════════════════════════════════════════════════════════
#
# CENTRALISED BODY-TEXT LAYOUT
# ----------------------------
# Every body-text renderer (text_only paragraphs, points, bullets;
# drill-down card bodies; scenario_qa structured; case_study; scenario)
# routes its bodyPr + paragraph spacing through these helpers so the
# visual result is consistent and predictable.
#
# Guarantees enforced by `_apply_body_layout`:
#   • anchor="t"             — content starts at the top of the box, not centred
#                              (prevents the "first bullet merges into header"
#                              symptom we kept patching)
#   • tIns=top_pad_in        — clearance from anything sitting just above the
#                              body box (banner headers, pill labels, etc.)
#   • lIns/rIns=side_pad_in  — text doesn't hug card borders
#   • bIns=bottom_pad_in     — gap before chrome/watermark
#   • noAutofit              — PowerPoint cannot collapse spcAft or shrink font
#                              behind our backs.  Content must fit at the
#                              chosen font; if it doesn't, the planner needs
#                              to split the slide.
#
# Guarantees enforced by `_stamp_para_spacing`:
#   • 115% line spacing within a paragraph
#   • Configurable paragraph gap (default 24pt — > line height at 18pt
#     so the gap is visibly larger than a line, reads as a true break)
#   • No spcAft on the last paragraph (no trailing white space)

def _apply_body_layout(txBody_el, *,
                       top_pad_in: float = 0.2,
                       side_pad_in: float = 0.2,
                       bottom_pad_in: float = 0.1) -> None:
    """Apply the consistent bodyPr settings to *txBody_el*."""
    from lxml import etree
    bodyPr = txBody_el.find(f"{{{_NS_A}}}bodyPr")
    if bodyPr is None:
        bodyPr = etree.Element(f"{{{_NS_A}}}bodyPr")
        txBody_el.insert(0, bodyPr)
    # Remove any existing autoFit children — we always use noAutofit
    for tag in ("noAutofit", "spAutoFit", "normAutofit"):
        existing = bodyPr.find(f"{{{_NS_A}}}{tag}")
        if existing is not None:
            bodyPr.remove(existing)
    etree.SubElement(bodyPr, f"{{{_NS_A}}}noAutofit")
    bodyPr.set("anchor", "t")
    bodyPr.set("anchorCtr", "0")
    bodyPr.set("wrap", "square")
    bodyPr.set("lIns", str(int(side_pad_in * _EMU)))
    bodyPr.set("rIns", str(int(side_pad_in * _EMU)))
    bodyPr.set("tIns", str(int(top_pad_in * _EMU)))
    bodyPr.set("bIns", str(int(bottom_pad_in * _EMU)))


def _stamp_para_spacing(pPr_el, *, is_last: bool,
                        gap_pt: float = 36.0,
                        line_spacing_pct: int = 115) -> None:
    """Add lnSpc + spcAft to a pPr element.  is_last suppresses spcAft."""
    from lxml import etree
    lnSpc = etree.SubElement(pPr_el, f"{{{_NS_A}}}lnSpc")
    spcPct = etree.SubElement(lnSpc, f"{{{_NS_A}}}spcPct")
    spcPct.set("val", str(int(line_spacing_pct * 1000)))
    if not is_last:
        spcAft = etree.SubElement(pPr_el, f"{{{_NS_A}}}spcAft")
        spcPts = etree.SubElement(spcAft, f"{{{_NS_A}}}spcPts")
        spcPts.set("val", str(int(gap_pt * 100)))


def _fit_card_body_pt(sp_el, lines: list[str], *,
                      top_pad_in: float = 0.3,
                      side_pad_in: float = 0.25,
                      bottom_pad_in: float = 0.1,
                      gap_pt: float = 9.0,
                      line_spacing: float = 1.15,
                      max_pt: float = 16.0,
                      min_pt: float = 10.0) -> float:
    """
    Largest font point size at which all paragraphs in *lines* fit within
    sp_el's box height (after accounting for insets and inter-paragraph gap).
    Iteratively reduces from max_pt to min_pt by 0.5pt steps.
    """
    spPr = sp_el.find(f"{{{_NS_P}}}spPr")
    xf = spPr.find(f"{{{_NS_A}}}xfrm") if spPr is not None else None
    ext = xf.find(f"{{{_NS_A}}}ext") if xf is not None else None
    if ext is None:
        return max_pt
    try:
        box_cx = int(ext.get("cx", 0)) / _EMU
        box_cy = int(ext.get("cy", 0)) / _EMU
    except Exception:
        return max_pt
    if box_cx <= 0 or box_cy <= 0:
        return max_pt

    avail_cx = max(box_cx - 2 * side_pad_in, 1.0)
    avail_cy = max(box_cy - top_pad_in - bottom_pad_in, 0.5)
    real_lines = [l.strip() for l in lines if l.strip()]
    if not real_lines:
        return max_pt

    SAFETY = 0.92
    pt = max_pt
    while pt >= min_pt:
        line_h = pt * line_spacing / 72.0   # inches per rendered line
        chars_per_line = max(1, int(avail_cx * 144 / pt * SAFETY))
        total_h = 0.0
        for ln in real_lines:
            n_lines = max(1, -(-len(ln) // chars_per_line))   # ceil divison
            total_h += n_lines * line_h
        # Inter-paragraph gaps (n-1 of them)
        total_h += max(0, len(real_lines) - 1) * (gap_pt / 72.0)
        if total_h <= avail_cy:
            return pt
        pt -= 0.5
    return min_pt


def _write_points_body(txBody_el, points: list[dict],
                       cx_in: float, cy_in: float, heading: str = "",
                       lead_in: str = "") -> None:
    """
    Write inline bold label + regular explanation paragraphs.

    Each point renders as:
        [Bold "Label: "][Regular "explanation text..."]
    in a single paragraph, matching the gold standard pattern.
    """
    from lxml import etree
    from engine.text_replacer import _fit_font_pt

    if not points:
        return

    # Centralised body layout — top anchor, clear top-pad, noAutofit, insets.
    _apply_body_layout(txBody_el)

    # Lock body font at 16pt for all points content.
    fit_pt = 16.0

    # Remove existing paragraphs
    for p in list(txBody_el.findall(qn("a:p"))):
        txBody_el.remove(p)

    # Optional section heading
    if heading:
        h_el  = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        hPr   = etree.SubElement(h_el, f"{{{_NS_A}}}pPr")
        etree.SubElement(hPr, f"{{{_NS_A}}}buNone")
        hr_el = etree.SubElement(h_el, f"{{{_NS_A}}}r")
        hrPr  = etree.SubElement(hr_el, f"{{{_NS_A}}}rPr")
        hrPr.set("lang", "en-US"); hrPr.set("b", "1")
        hrPr.set("sz", "2200")
        ht_el = etree.SubElement(hr_el, f"{{{_NS_A}}}t")
        ht_el.text = heading

    # Helper: stamp 115% line spacing + ~18pt space-after on every pPr so
    # paragraphs render with a clear full-line gap between them (matches the
    # gold standard's blank-line separation pattern).  Skip space-after on
    # the final paragraph so there's no trailing white space.
    def _stamp_spacing(pPr_el, is_last: bool):
        lnSpc = etree.SubElement(pPr_el, f"{{{_NS_A}}}lnSpc")
        spcPct = etree.SubElement(lnSpc, f"{{{_NS_A}}}spcPct")
        spcPct.set("val", "115000")
        if not is_last:
            spcAft = etree.SubElement(pPr_el, f"{{{_NS_A}}}spcAft")
            spcPts = etree.SubElement(spcAft, f"{{{_NS_A}}}spcPts")
            spcPts.set("val", "3600")   # 24pt — bigger than line-height so visible

    # Wingdings checkmark bullet for the labelled points
    BULLET_CHAR_PT = "\xfc"

    _total = (1 if lead_in else 0) + len(points)

    # Optional lead-in plain prose sentence before the points.
    # Lead-in is NOT bulleted — it's a prose intro.  Gap below it is the
    # paragraph-break gap (18pt) so the points start cleanly below.
    if lead_in:
        li_el = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        liPr  = etree.SubElement(li_el, f"{{{_NS_A}}}pPr")
        etree.SubElement(liPr, f"{{{_NS_A}}}buNone")
        _stamp_spacing(liPr, is_last=(_total == 1))
        r_li = etree.SubElement(li_el, f"{{{_NS_A}}}r")
        rPr_li = etree.Element(f"{{{_NS_A}}}rPr")
        rPr_li.set("lang", "en-US"); rPr_li.set("b", "0")
        rPr_li.set("sz", str(int(fit_pt * 100)))
        r_li.insert(0, rPr_li)
        t_li = etree.SubElement(r_li, f"{{{_NS_A}}}t")
        t_li.text = lead_in

    for idx, point in enumerate(points):
        label = point.get("label", "").strip()
        text  = point.get("text",  "").strip()

        p_el = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        pPr  = etree.SubElement(p_el, f"{{{_NS_A}}}pPr")
        # Add a Wingdings checkmark bullet to identify each point at a glance.
        pPr.set("marL", "285750"); pPr.set("indent", "-285750")
        buFont = etree.SubElement(pPr, f"{{{_NS_A}}}buFont")
        buFont.set("typeface", "Wingdings")
        buFont.set("panose", "05000000000000000000")
        buFont.set("pitchFamily", "2"); buFont.set("charset", "2")
        buChar = etree.SubElement(pPr, f"{{{_NS_A}}}buChar")
        buChar.set("char", BULLET_CHAR_PT)
        # Apply spacing — 115% line + 18pt space-after between points
        is_last = idx == len(points) - 1
        _stamp_spacing(pPr, is_last=is_last)

        # Bold label run: "Label: "
        if label:
            r_bold = etree.SubElement(p_el, f"{{{_NS_A}}}r")
            rPr_b  = etree.SubElement(r_bold, f"{{{_NS_A}}}rPr")
            rPr_b.set("lang", "en-US"); rPr_b.set("b", "1")
            rPr_b.set("sz", str(int(fit_pt * 100)))
            t_b = etree.SubElement(r_bold, f"{{{_NS_A}}}t")
            t_b.text = f"{label}: "

        # Regular text run
        if text:
            r_reg = etree.SubElement(p_el, f"{{{_NS_A}}}r")
            rPr_r = etree.SubElement(r_reg, f"{{{_NS_A}}}rPr")
            rPr_r.set("lang", "en-US"); rPr_r.set("b", "0")
            rPr_r.set("sz", str(int(fit_pt * 100)))
            t_r = etree.SubElement(r_reg, f"{{{_NS_A}}}t")
            t_r.text = text


# ═══════════════════════════════════════════════════════════════════════════════
# Multi-paragraph body builder (prose / bullets)
# ═══════════════════════════════════════════════════════════════════════════════

def _set_paragraphs(
    txBody_el, paragraphs: list[str],
    cx_in: float = 0, cy_in: float = 0,
    bullet: bool = False, heading: str = "",
    lead_in: str = "",
    font_pt: float | None = None,
) -> None:
    """
    Replace text box body with multiple paragraphs.

    bullet=True  → Wingdings checkmark (✓) bullets
    bullet=False → plain prose paragraphs

    Line spacing and space-after are left unset so PowerPoint inherits its
    defaults (100% / 0pt), matching the gold standard files.
    normAutofit is used so PowerPoint shrinks the font automatically if the
    content is too long — we supply an 18pt starting size.
    """
    from lxml import etree
    from engine.text_replacer import _fit_font_pt

    BULLET_CHAR   = "\xfc"
    BULLET_FONT   = "Wingdings"
    BULLET_PANOSE = "05000000000000000000"
    BULLET_MARL   = 285750
    BULLET_INDENT = -285750

    if not paragraphs:
        return

    # Centralised body layout — top anchor, top-pad clearance, insets, noAutofit.
    _apply_body_layout(txBody_el)

    # Read original run properties to preserve template font colour/face.
    existing_paras = txBody_el.findall(qn("a:p"))
    rPr_template = None
    orig_pt = 0.0
    if existing_paras:
        first_run = existing_paras[0].find(qn("a:r"))
        if first_run is not None:
            rPr_template = first_run.find(qn("a:rPr"))
            if rPr_template is not None:
                sz = rPr_template.get("sz")
                if sz:
                    orig_pt = int(sz) / 100.0

    # Starting font size: 18pt (gold standard default for body text).
    # normAutofit reduces this proportionally if content overflows.
    eff_cx = cx_in - (BULLET_MARL / _EMU) if bullet else cx_in
    heading_reserved = (22 / 72 * 1.15) if heading else 0.0
    lead_in_reserved = (18 / 72 * 1.15) if lead_in else 0.0
    eff_cy = max(cy_in - heading_reserved - lead_in_reserved, cy_in * 0.7) if cy_in > 0 else 0
    all_text = (lead_in + " " if lead_in else "") + " ".join(paragraphs)
    if font_pt is not None:
        # Caller pinned an exact size — skip auto-fit entirely.
        fit_pt = float(font_pt)
    elif eff_cx > 0 and eff_cy > 0:
        fit_pt = _fit_font_pt(
            eff_cx, eff_cy, all_text, orig_pt,
            default_pt=18.0, line_spacing=1.15,
            n_items=len(paragraphs) + (1 if lead_in else 0),
            spc_aft_pt=0, min_pt=11,
        )
    else:
        fit_pt = max(orig_pt if orig_pt >= 11 else 18.0, 11.0)

    for p in list(txBody_el.findall(qn("a:p"))):
        txBody_el.remove(p)

    if heading:
        h_el = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        hPr  = etree.SubElement(h_el, f"{{{_NS_A}}}pPr")
        etree.SubElement(hPr, f"{{{_NS_A}}}buNone")
        hr_el = etree.SubElement(h_el, f"{{{_NS_A}}}r")
        if rPr_template is not None:
            hrPr = etree.fromstring(etree.tostring(rPr_template))
        else:
            hrPr = etree.Element(f"{{{_NS_A}}}rPr")
            hrPr.set("lang", "en-US"); hrPr.set("dirty", "0")
        hrPr.set("sz", "2200"); hrPr.set("b", "1")
        hr_el.insert(0, hrPr)
        ht_el = etree.SubElement(hr_el, f"{{{_NS_A}}}t")
        ht_el.text = heading

    # Helper: stamp standard paragraph spacing (115% line + 18pt space-after
    # — a full blank-line gap between paragraphs, matching the gold standard).
    # Applied to every paragraph generated below so text never visually sticks.
    def _stamp_spacing(pPr_el, is_last: bool):
        lnSpc = etree.SubElement(pPr_el, f"{{{_NS_A}}}lnSpc")
        spcPct = etree.SubElement(lnSpc, f"{{{_NS_A}}}spcPct")
        spcPct.set("val", "115000")
        if not is_last:
            spcAft = etree.SubElement(pPr_el, f"{{{_NS_A}}}spcAft")
            spcPts = etree.SubElement(spcAft, f"{{{_NS_A}}}spcPts")
            spcPts.set("val", "3600")   # 24pt — bigger than line-height so visible

    # Total paragraphs that will be rendered (for "is_last" detection)
    _total = (1 if lead_in else 0) + len(paragraphs)

    # Optional plain prose sentence before bullets/points
    if lead_in:
        li_el = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        liPr  = etree.SubElement(li_el, f"{{{_NS_A}}}pPr")
        etree.SubElement(liPr, f"{{{_NS_A}}}buNone")
        _stamp_spacing(liPr, is_last=(_total == 1))
        r_li = etree.SubElement(li_el, f"{{{_NS_A}}}r")
        if rPr_template is not None:
            rPr_li = etree.fromstring(etree.tostring(rPr_template))
        else:
            rPr_li = etree.Element(f"{{{_NS_A}}}rPr")
            rPr_li.set("lang", "en-US"); rPr_li.set("dirty", "0")
        rPr_li.set("sz", str(int(fit_pt * 100))); rPr_li.set("b", "0")
        r_li.insert(0, rPr_li)
        t_li      = etree.SubElement(r_li, f"{{{_NS_A}}}t")
        t_li.text = lead_in

    for idx, para_text in enumerate(paragraphs):
        p_el = etree.SubElement(txBody_el, f"{{{_NS_A}}}p")
        pPr  = etree.SubElement(p_el, f"{{{_NS_A}}}pPr")
        if bullet:
            pPr.set("marL", str(BULLET_MARL))
            pPr.set("indent", str(BULLET_INDENT))
            buFont = etree.SubElement(pPr, f"{{{_NS_A}}}buFont")
            buFont.set("typeface", BULLET_FONT)
            buFont.set("panose", BULLET_PANOSE)
            buFont.set("pitchFamily", "2")
            buFont.set("charset", "2")
            buChar = etree.SubElement(pPr, f"{{{_NS_A}}}buChar")
            buChar.set("char", BULLET_CHAR)
        else:
            etree.SubElement(pPr, f"{{{_NS_A}}}buNone")
        # Apply 115% line spacing + 9pt space-after (skip on last paragraph)
        is_last = idx == len(paragraphs) - 1
        _stamp_spacing(pPr, is_last=is_last)

        r_el = etree.SubElement(p_el, f"{{{_NS_A}}}r")
        if rPr_template is not None:
            new_rPr = etree.fromstring(etree.tostring(rPr_template))
        else:
            new_rPr = etree.Element(f"{{{_NS_A}}}rPr")
            new_rPr.set("lang", "en-US"); new_rPr.set("dirty", "0")
        new_rPr.set("sz", str(int(fit_pt * 100)))
        r_el.insert(0, new_rPr)
        t_el      = etree.SubElement(r_el, f"{{{_NS_A}}}t")
        t_el.text = para_text


# ═══════════════════════════════════════════════════════════════════════════════
# Shared helpers
# ═══════════════════════════════════════════════════════════════════════════════

def _calc_title_pt(text: str, cx_in: float, cy_in: float, max_pt: float = 28.0) -> float:
    """
    Return a sensible title font size: the largest pt at which `text` fits
    inside (cx_in x cy_in), capped at max_pt. Mirrors _fit_font_pt's math
    but works without reading orig_pt from XML — useful when we want to
    override what the template's placeholder would naturally render.
    """
    import math
    text = (text or "").strip()
    if not text or cx_in <= 0 or cy_in <= 0:
        return max_pt
    for pt in range(int(max_pt), 11, -1):
        chars_per_line = cx_in * 144 / pt
        lines_needed = max(math.ceil(len(text) / max(chars_per_line, 1)), 1)
        text_height = lines_needed * (pt / 72) * 1.3
        if text_height <= cy_in:
            return float(pt)
    return 12.0


def _enable_normAutofit(txBody_el) -> None:
    """
    Switch the body's autofit mode to normAutofit so PowerPoint shrinks the
    font further at runtime if the content overflows the box. Used on prose-
    heavy slide types (case_study, activity, scenario) where the fitted font
    from _fit_font_pt may still be too large at its 10-11pt floor.
    """
    from lxml import etree
    bodyPr = txBody_el.find(f"{{{_NS_A}}}bodyPr")
    if bodyPr is None:
        bodyPr = etree.Element(f"{{{_NS_A}}}bodyPr")
        txBody_el.insert(0, bodyPr)
    # Strip any existing autofit children
    for tag in ("noAutofit", "spAutoFit", "normAutofit"):
        existing = bodyPr.find(f"{{{_NS_A}}}{tag}")
        if existing is not None:
            bodyPr.remove(existing)
    # normAutofit with no fontScale/lnSpcReduction lets PPT pick the shrink amount
    norm = etree.SubElement(bodyPr, f"{{{_NS_A}}}normAutofit")
    # Leave attributes off — PowerPoint will set fontScale automatically


def _synthesise_slide_notes(slide_dict: dict) -> str:
    """
    Deterministic fallback when a plan entry arrives without `notes`. Builds
    a 80-120 word trainer brief from the slide's title + first 3 bullets so
    no slide ever ships with a blank speaker-notes pane.

    Mirrors the post-pad synthesiser in pipeline.py so the builder is
    self-sufficient — it can be invoked directly (e.g. from run_full_course
    with a hand-edited plan) and still produce non-blank notes.
    """
    title   = (slide_dict.get("title") or slide_dict.get("module_title") or "").strip()
    bullets = slide_dict.get("bullets") or []
    points  = slide_dict.get("points")  or []
    paras   = slide_dict.get("paragraphs") or []

    talking_points: list[str] = []
    if bullets:
        for b in bullets[:3]:
            text = b if isinstance(b, str) else (b.get("text") or "")
            if text:
                talking_points.append(text.strip())
    elif points:
        for p in points[:3]:
            label = (p.get("label") or "").strip()
            text  = (p.get("text")  or "").strip()
            if label and text:
                talking_points.append(f"{label}: {text}")
            elif text:
                talking_points.append(text)
    elif paras:
        for para in paras[:2]:
            if isinstance(para, str) and para.strip():
                talking_points.append(para.strip())

    lines: list[str] = []
    if title:
        lines.append(
            f"This slide covers '{title}'. Open with the headline idea in your "
            f"own words, then take delegates through each point one at a time."
        )
    else:
        lines.append(
            "Open with the headline idea in your own words, then take delegates "
            "through each point one at a time."
        )
    if talking_points:
        lines.append("Talking points to cover:")
        for tp in talking_points:
            lines.append(f"  - {tp}")
    lines.append(
        "Pause after the points and ask the room how this connects to their own "
        "work — one volunteer example is enough to anchor the learning before "
        "moving on. Spend about 2-3 minutes on this slide."
    )
    return "\n".join(lines)


def _synthesise_item_notes(parent: dict, item: dict) -> str:
    """
    Build trainer notes for an expanded child card (overview_groups item)
    deterministically from the item's own content. Used when a single
    overview_groups plan entry expands into 1 parent + N child slides — we
    want each child to have a usable, item-specific note rather than an
    empty notes pane or a duplicated parent note.

    Output is ~80-120 words. Trainer can always rewrite, but never sees blank.
    """
    label = (item.get("label") or "").strip()
    bullets = item.get("bullets") or []
    desc = (item.get("description") or item.get("text") or "").strip()
    parent_title = (parent.get("title") or "").strip()

    if not label and not bullets and not desc:
        return ""

    lines: list[str] = []

    # Opening framing: tie back to the parent topic + name the item
    if label and parent_title:
        lines.append(
            f"This card covers '{label}' as part of {parent_title}. "
            f"Walk delegates through the key points one at a time."
        )
    elif label:
        lines.append(
            f"This card covers '{label}'. "
            f"Walk delegates through the key points one at a time."
        )
    elif desc:
        lines.append(desc)
    else:
        lines.append("Walk delegates through this card one bullet at a time.")

    # Mid-section: expand on each bullet
    if bullets:
        lines.append("Talking points:")
        for b in bullets:
            b_text = (b if isinstance(b, str) else (b.get("text") or "")).strip()
            if b_text:
                lines.append(f"  - {b_text}")
    elif desc:
        lines.append(desc)

    # Closing prompt: engagement cue
    if label:
        lines.append(
            f"Pause after the points and ask the room: 'When have you seen "
            f"{label.lower()} done well — or done badly? What stood out?' "
            f"Use the answers to anchor the next slide."
        )
    else:
        lines.append(
            "Pause for one question from the room before moving on, "
            "to check understanding."
        )

    return "\n".join(lines)


def _set_notes(slide, notes_text: str) -> None:
    """
    Write trainer notes into the slide's speaker-notes pane (visible to the
    presenter, hidden from the audience). Idempotent — overwrites any
    existing notes content.

    Splits on newlines so multi-paragraph notes render as separate paragraphs
    in the PowerPoint notes pane.
    """
    if not notes_text or not notes_text.strip():
        return
    # Accessing notes_slide auto-creates the notes XML part if missing
    tf = slide.notes_slide.notes_text_frame
    paragraphs = [p.strip() for p in notes_text.split("\n") if p.strip()]
    if not paragraphs:
        return
    tf.text = paragraphs[0]
    for extra in paragraphs[1:]:
        p = tf.add_paragraph()
        p.text = extra


def _content_slots(slide) -> list[dict]:
    slots = _collect_slots(slide)
    return [s for s in slots if not any(tok in s["text"].lower() for tok in _LOGO_TOKENS)]


def _find_title_body_slots(slots: list[dict]) -> tuple:
    non_empty = [s for s in slots if s.get("text", "").strip()]
    if not non_empty:
        return None, None, list(slots)
    title_slot = min(non_empty, key=lambda s: s["abs_y"])
    below = [s for s in slots if s["abs_y"] > title_slot["abs_y"] + 0.1]
    body_slot = max(below, key=lambda s: s.get("cx_in", 0) * s.get("cy_in", 0)) if below else None
    other = [s for s in slots if s is not title_slot and s is not body_slot]
    return title_slot, body_slot, other


def _clear_txbody(txBody_el) -> None:
    from lxml import etree
    for p in list(txBody_el.findall(qn("a:p"))):
        txBody_el.remove(p)
    etree.SubElement(txBody_el, f"{{{_NS_A}}}p")


def _remove_decorative_groups(slide) -> None:
    """Remove decorative group shapes AND non-corner pictures from a cloned slide.

    Group shapes from the template (decorative infographic containers) are fully
    removed.  Pictures are removed unless they are small corner-chrome accents
    (width < 2.5in AND positioned in a corner of the slide) — those serve as
    brand decoration and are intentional.
    """
    spTree  = slide.shapes._spTree
    grp_tag = f"{{{_NS_P}}}grpSp"
    for el in list(spTree):
        if el.tag == grp_tag:
            spTree.remove(el)

    SLIDE_W = 13.33
    SLIDE_H = 7.5
    to_remove = []
    for shape in slide.shapes:
        if shape.shape_type != 13:   # 13 = MSO_SHAPE_TYPE.PICTURE
            continue
        try:
            cx = shape.width  / _EMU
            cy = shape.height / _EMU
            x  = shape.left   / _EMU
            y  = shape.top    / _EMU
        except Exception:
            to_remove.append(shape._element)
            continue
        # Keep small pictures that are in a slide corner (chrome accent)
        if cx < 2.5:
            in_top    = y < 1.5
            in_bottom = (y + cy) > SLIDE_H - 2.0
            in_left   = x < 1.5
            in_right  = (x + cx) > SLIDE_W - 2.0
            if (in_top or in_bottom) and (in_left or in_right):
                continue   # keep this corner-chrome picture
        to_remove.append(shape._element)
    for el in to_remove:
        parent = el.getparent()
        if parent is not None:
            parent.remove(el)


def _remove_all_pictures(slide) -> None:
    """Remove ALL pictures from a cloned slide (used for module intro which has
    full-slide background images that would otherwise cover content text boxes)."""
    to_remove = [s._element for s in slide.shapes if s.shape_type == 13]
    for el in to_remove:
        parent = el.getparent()
        if parent is not None:
            parent.remove(el)


def _resize_shape(txBody_el, new_cx_in: float, new_cy_in: float,
                  new_x_in: float = None, new_y_in: float = None):
    from lxml import etree
    sp_el = txBody_el.getparent()
    spPr  = sp_el.find(qn("p:spPr"))
    if spPr is None:
        return
    xfrm = spPr.find(qn("a:xfrm"))
    if xfrm is None:
        xfrm = etree.SubElement(spPr, f"{{{_NS_A}}}xfrm")
    off = xfrm.find(qn("a:off"))
    ext = xfrm.find(qn("a:ext"))
    if off is None:
        off = etree.SubElement(xfrm, f"{{{_NS_A}}}off")
        off.set("x", "0"); off.set("y", "0")
    if ext is None:
        ext = etree.SubElement(xfrm, f"{{{_NS_A}}}ext")
        ext.set("cx", "0"); ext.set("cy", "0")
    ext.set("cx", str(int(new_cx_in * _EMU)))
    ext.set("cy", str(int(new_cy_in * _EMU)))
    if new_x_in is not None:
        off.set("x", str(int(new_x_in * _EMU)))
    if new_y_in is not None:
        off.set("y", str(int(new_y_in * _EMU)))


def _has_visible_body_content(slide) -> bool:
    """
    Return True if the slide has at least one body-area shape that would
    render visibly (a grpSp, a sp with text, or a pic).  Used to detect
    when a bank clone produced an effectively empty slide because all of
    the source shapes were skipped (e.g. SmartArt with unresolved rIds).
    """
    spTree = slide.shapes._spTree
    grp_tag = f"{{{_NS_P}}}grpSp"
    sp_tag  = f"{{{_NS_P}}}sp"
    pic_tag = f"{{{_NS_P}}}pic"
    t_tag   = f"{{{_NS_A}}}t"
    body_y_min = int(1.0 * _EMU)
    for el in spTree:
        if el.tag == grp_tag:
            _, y, _, cy = _get_grp_xfrm(el)
            if y + cy > body_y_min:
                return True
        elif el.tag == sp_tag:
            _, y, _, cy = _get_grp_xfrm(el)
            if y + cy <= body_y_min:
                continue
            txb = el.find(f"{{{_NS_P}}}txBody")
            if txb is not None:
                text = "".join(t.text or "" for t in txb.iter(t_tag)).strip()
                if text:
                    return True
        elif el.tag == pic_tag:
            _, y, _, cy = _get_grp_xfrm(el)
            if y + cy > body_y_min:
                return True
    return False


def _remove_last_slide(cloner) -> None:
    """Remove the most-recently-added slide from the output presentation."""
    try:
        prs = cloner._prs
        sldIdLst = prs.slides._sldIdLst
        last_sldId = list(sldIdLst)[-1]
        rId = last_sldId.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id")
        sldIdLst.remove(last_sldId)
        if rId:
            prs.part.drop_rel(rId)
    except Exception as exc:
        print(f"  [warn] could not remove last slide: {exc!r}")


def _make_text_fallback(d: dict) -> dict:
    """Convert any slide dict into a text_only fallback."""
    fb = dict(d)
    fb["type"] = "text_only"
    if not fb.get("bullets") and not fb.get("paragraphs") and not fb.get("points"):
        items = fb.get("items") or fb.get("steps") or []
        if items:
            fb["bullets"] = [
                f"{it['label']}: {it['description']}" if it.get("description")
                else it.get("label", "")
                for it in items if it.get("label")
            ]
        if not fb.get("bullets"):
            fb["bullets"] = [f"Content: {fb.get('title', '')}"]
    return fb


# ── Course type detection ─────────────────────────────────────────────────────

def _is_tech_course(t: str) -> bool:
    t = t.lower()
    return any(sig in t for sig in _TECH_COURSE_SIGNALS)


def _is_marketing_course(t: str) -> bool:
    t = t.lower()
    return any(sig in t for sig in _MARKETING_COURSE_SIGNALS)


def _is_soft_skill_course(t: str) -> bool:
    """True if the course title hits a soft-skill / communication signal."""
    t = t.lower()
    return any(sig in t for sig in _SOFT_SKILL_COURSE_SIGNALS)


# ── Illustration pool ─────────────────────────────────────────────────────────

# Stopwords ignored when matching a course title against topic folder names.
_TOPIC_STOPWORDS = {
    "and", "of", "the", "for", "in", "on", "at", "to", "with",
    "course", "skills", "training", "workshop", "fundamentals",
}

# Explicit aliases per topic folder. Word-boundary matched against the
# lowercased course title. Covers common team shorthand (HR, AI, L&D, PM,
# H&S, D&I, …) that the slug/token matcher can't see (too short or no
# vocabulary overlap with the folder name).
_TOPIC_ALIASES: dict[str, list[str]] = {
    "human_resources":
        ["hr", "human resources", "people management"],
    "ai_in_business":
        ["ai in business", "ai for business", "ai for managers"],
    "data_analytics_and_ai":
        ["data analytics", "data science", "analytics"],
    "learning_and_development":
        ["l&d", "l and d", "ld", "learning development",
         "learning and development"],
    "project_management_fundamentals":
        ["project management", "pm", "pmp", "prince2"],
    "health_and_safety":
        ["health safety", "h&s", "ehs", "iosh"],
    "workplace_diversity_and_inclusion":
        ["diversity", "d&i", "dei", "inclusion", "diversity and inclusion"],
    "customer_service_excellence":
        ["customer service", "csat", "cx", "customer experience"],
    "public_speaking_and_presentation_skills":
        ["public speaking", "presentation skills", "presenting"],
    "sales_and_persuasion_techniques":
        ["sales", "selling", "persuasion"],
    "stress_management_and_workplace_wellbeing":
        ["stress management", "wellbeing", "wellness", "mental health"],
    "time_management_and_productivity":
        ["time management", "productivity"],
    "leadership_and_management_skills":
        ["leadership", "management skills", "people leadership"],
    "workplace_communication_skills":
        ["workplace communication", "business communication",
         "communication skills"],
    "teamwork_and_collaboration":
        ["teamwork", "collaboration", "team building"],
    "emotional_intelligence_in_the_workplace":
        ["emotional intelligence", "eq", "self awareness"],
    "conflict_resolution_and_negotiation":
        ["conflict resolution", "negotiation", "conflict management"],
    "change_management":
        ["change management", "managing change"],
    "business_analysis":
        ["business analysis", "ba ", "requirements analysis"],
    "accounting_and_finance":
        ["accounting", "finance for", "financial", "bookkeeping"],
    "digital_marketing":
        ["digital marketing", "online marketing", "social media marketing"],
}


def _course_topic_slug(course_title: str, images_root: Path) -> str | None:
    """Resolve a course title to an existing topic subfolder under
    *images_root*. Returns the folder name (slug) or None.

    Match strategy (first hit wins):
      1. Exact slug match (course title slugified = folder name).
      2. Alias match — explicit team shorthand (HR, AI, L&D, PM, …).
         Aliases ranked by length descending so 'digital marketing' beats
         'marketing'.
      3. Token-overlap match: pick folder with the most overlapping
         meaningful tokens (stopwords removed, len > 2).
    """
    if not course_title or not images_root.exists():
        return None

    def _slugify(s: str) -> str:
        s = s.lower()
        s = re.sub(r"&", "and", s)
        s = re.sub(r"[^a-z0-9]+", "_", s).strip("_")
        return s

    folders = [p.name for p in images_root.iterdir() if p.is_dir()]
    if not folders:
        return None

    folder_set = set(folders)
    title_lower = course_title.lower()
    title_slug  = _slugify(course_title)

    # 1) Exact slug match
    if title_slug in folder_set:
        return title_slug

    # 2) Alias match — longest alias first
    alias_pairs: list[tuple[str, str]] = [
        (a, folder) for folder, aliases in _TOPIC_ALIASES.items()
        for a in aliases
        if folder in folder_set
    ]
    alias_pairs.sort(key=lambda pair: -len(pair[0]))
    for alias, folder in alias_pairs:
        if re.search(rf"\b{re.escape(alias)}\b", title_lower):
            return folder

    # 3) Token-overlap fallback
    def _tokens(slug: str) -> set[str]:
        return {t for t in slug.split("_")
                if t and t not in _TOPIC_STOPWORDS and len(t) > 2}

    title_tokens = _tokens(title_slug)
    if not title_tokens:
        return None

    best_folder, best_score = None, 0
    for f in folders:
        overlap = len(title_tokens & _tokens(f))
        if overlap > best_score:
            best_score, best_folder = overlap, f
    return best_folder if best_score >= 1 else None


def _load_illustration_images(images_dir: str, tech_course=False,
                              marketing_course=False, course_title: str = ""):
    """Load illustrations as a list of (path, keywords, is_topic) tuples.

    Topic images come from images/<topic_slug>/ when the course title
    resolves to a known topic folder. They have empty keywords and
    ``is_topic=True`` — the picker prioritises them.

    Root images (existing flat pool) come after, with extracted keywords
    and ``is_topic=False``.
    """
    result: list[tuple[str, list[str], bool]] = []
    p = Path(images_dir)
    if not p.exists():
        return result

    # Topic pool — shuffled so picks aren't always 01, 02, 03...
    topic_slug = _course_topic_slug(course_title, p)
    if topic_slug:
        topic_dir = p / topic_slug
        topic_files = [
            f for f in topic_dir.iterdir()
            if f.is_file() and f.suffix.lower() in _IMG_EXTS
        ]
        import random as _random
        _random.shuffle(topic_files)
        for fpath in topic_files:
            result.append((str(fpath), [], True))
        print(f"  Topic pool: {topic_slug!r} ({len(topic_files)} images)")
    else:
        print(f"  Topic pool: none (no folder match for course title)")

    # Root pool — existing behaviour. Skip subdirectories (topic folders).
    for fpath in sorted(p.iterdir()):
        if not fpath.is_file():
            continue
        if fpath.suffix.lower() not in _IMG_EXTS:
            continue
        stem = fpath.stem.lower()
        stem_n = re.sub(r"[^a-z ]", " ", stem)
        stem_w  = stem_n.split()
        if not tech_course and any(s in stem_w or s in stem_n for s in _TECH_IMAGE_SIGNALS):
            continue
        if not marketing_course and any(s in stem_n for s in _SOCIAL_MARKETING_IMAGE_SIGNALS):
            continue
        words = [w for w in stem_w if len(w) > 3 and w not in _IMG_NOISE]
        result.append((str(fpath), words, False))
    return result


def _pick_illustration_image(images, title: str, used: set,
                             course_words: set = None,
                             context_text: str = "") -> str | None:
    """
    Pick an illustration whose keywords best match the slide title (primary)
    and the slide's broader content (secondary), with the course theme as a
    safety-net.

    *context_text* lets the caller feed in the slide's body content (lead_in,
    point texts, bullets) so the match isn't limited to the short title.
    For "Building a Proactive HR Mindset", body words like "manager",
    "consistency", "policy" will steer matching toward an HR-themed image
    instead of falling back to random unused (which gave us "ONLINE TEST"
    on what should have been a manager-coaching slide).
    """
    if not images:
        return None

    # Topic pool priority — if the course resolved to a topic folder, those
    # entries (is_topic=True) are always preferred over the generic root pool.
    # Within the topic pool any unused entry is equally valid (the team
    # curated each PPT as a topic set; no keyword scoring needed).
    for path, kws, is_topic in images:
        if is_topic and path not in used:
            return path

    def _words(text):
        return {w.lower() for w in re.sub(r"[^a-zA-Z ]", " ", text).split()
                if len(w) > 3 and w.lower() not in _IMG_NOISE}

    title_words   = _words(title)
    context_words = _words(context_text) if context_text else set()
    course_words  = course_words or set()
    best_score, best_idx = -1, -1
    fb_score, fb_idx     = -1, -1
    ctx_score, ctx_idx   = -1, -1
    first_unused         = -1

    for i, (path, kws, is_topic) in enumerate(images):
        if is_topic:
            continue  # topic pool already exhausted above
        if path in used:
            continue
        if first_unused == -1:
            first_unused = i
        kw_set = set(kws)
        # Title match — strongest signal
        score  = len(title_words & kw_set)
        if score > best_score:
            best_score, best_idx = score, i
        # Body-content match — secondary signal (weight 2× per match
        # but only counts when title has no match)
        ctx = len(context_words & kw_set)
        if ctx > ctx_score:
            ctx_score, ctx_idx = ctx, i
        # Course theme — safety net
        fb = len(course_words & kw_set)
        if fb > fb_score:
            fb_score, fb_idx = fb, i

    if best_idx == -1:
        # Every non-topic image is already used.  Instead of resetting the
        # used set (which makes the first image get picked again and again
        # — the "stuck on 6 different slides" symptom we fixed earlier),
        # spread the repeats EVENLY across the pool by picking randomly.
        import random as _random
        candidates = [i for i, (_, kws, is_t) in enumerate(images)
                      if kws and not is_t]
        best_idx = _random.choice(candidates) if candidates else 0
    elif best_score == 0:
        # No title match — try body-content match first, then course theme.
        if ctx_score > 0 and ctx_idx != -1:
            best_idx = ctx_idx
        elif fb_score > 0 and fb_idx != -1:
            best_idx = fb_idx
        else:
            best_idx = next(
                (i for i, (p, kws, is_t) in enumerate(images)
                 if p not in used and kws and not is_t),
                first_unused if first_unused != -1 else 0,
            )
    return images[best_idx][0]