""" engine/template_analyzer.py ---------------------------- Reads an uploaded template PPTX and extracts its design DNA. Template contract (variable number of slides): The CONTENT slide is auto-detected (not assumed to be index 4). It is the slide with the widest title + widest body placeholder both spanning > 70 % of the slide width — i.e. a standard top-title layout. Returns a TemplateDNA dict: { "brand_color": "44318D", "slide_w_in": 13.33, "slide_h_in": 7.5, "layout_type": "top_title", # top_title | left_panel | split_right # Raw placeholder positions (extracted from template) "title": { "x","y","cx","cy", "font_pt","bold","color" }, "body": { "x","y","cx","cy" }, # Usable content rectangle (derived — where our content goes) "content_zone": { "x","y","cx","cy" }, # Illustration zone (right portion when image is shown) "illus": { "x","y","cx","cy" }, # Chrome shape XML elements to copy to every bank-based slide "chrome": [ {"tag","xml","role"}, ... ], } The manifest file (_manifest.json) caches the LLM-validated zones so the API is only called once per new template. """ from __future__ import annotations import copy import json from pathlib import Path _EMU = 914_400 _NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main" _NS_P = "http://schemas.openxmlformats.org/presentationml/2006/main" _LOGO_TOKENS = {"theknowledgeacademy", "the knowledge academy", "theknowledge"} # Cache so we don't re-parse the same file in one run _DNA_CACHE: dict[str, dict] = {} # ── Public entry point ───────────────────────────────────────────────────────── def analyze_template(tmpl_path: str, api_key: str = "") -> dict: """ Parse the template PPTX and return a TemplateDNA dict. If api_key is provided and no manifest exists, runs one-shot LLM validation. Result is cached by resolved path for the lifetime of the process. """ key = str(Path(tmpl_path).resolve()) if key in _DNA_CACHE: return _DNA_CACHE[key] from pptx import Presentation dna = _default_dna() try: prs = Presentation(tmpl_path) dna["slide_w_in"] = round(prs.slide_width / _EMU, 3) dna["slide_h_in"] = round(prs.slide_height / _EMU, 3) dna["brand_color"] = _detect_brand_color(prs) # 7-slide contract: cover, about_us, syllabus, module_intro, # content, quiz, ending — fixed positional mapping. Auto-detection # was misclassifying the Quiz slide as content because of its wide # title textbox, breaking module_intro and content slide builds. if len(prs.slides) == 7: content_idx = 4 dna["slide_indices"] = { "cover": 0, "about_us": 1, "syllabus": 2, "module_intro": 3, "content": 4, "quiz": 5, "ending": 6, } else: content_idx = _find_content_slide_index(prs) dna["slide_indices"] = _detect_slide_indices(prs, content_idx) if content_idx < len(prs.slides): _analyse_content_slide(prs.slides[content_idx], dna) si = dna["slide_indices"] quiz_str = f" quiz={si['quiz']}" if "quiz" in si else "" print(f" [template DNA] brand={dna['brand_color']} layout={dna['layout_type']} " f"chrome={len(dna['chrome'])} shapes") print(f" [slide roles ] cover={si['cover']} about_us={si['about_us']} " f"syllabus={si['syllabus']} module_intro={si['module_intro']} " f"content={si['content']}{quiz_str} ending={si['ending']}") except Exception as exc: print(f" [warn] template_analyzer failed ({exc!r}), using defaults") # One-shot LLM validation (reads/writes manifest file) if api_key: _load_or_validate_manifest(tmpl_path, dna, api_key) _DNA_CACHE[key] = dna return dna # ── Defaults ────────────────────────────────────────────────────────────────── def _default_dna() -> dict: return { "brand_color": "44318D", "slide_w_in": 13.33, "slide_h_in": 7.5, "layout_type": "top_title", "title": { "x": 0.66, "y": 0.58, "cx": 11.4, "cy": 0.71, "font_pt": 36, "bold": True, "color": "000000", }, "body": { "x": 0.66, "y": 1.58, "cx": 12.09, "cy": 4.34, }, "content_zone": { "x": 0.66, "y": 1.58, "cx": 12.01, "cy": 5.0, }, "lower_zone": { "x": 0.5, "y": 3.1, "cx": 12.3, "cy": 3.3, }, "illus": { "x": 8.3, "y": 1.4, "cx": 4.7, "cy": 4.8, }, "chrome": [], "slide_indices": { "cover": 0, "about_us": 1, "syllabus": 2, "module_intro": 3, "content": 4, "ending": 5, }, } # ── Slide-role detection (cover / about_us / syllabus / module_intro / content / ending) ── def _detect_slide_indices(prs, content_idx: int) -> dict: """ Identify the slide index for each role in the template. Works for both 6-slide and 8-slide templates (and any variant). Heuristics: cover → slide 0 (always) ending → last slide (always) content → auto-detected (passed in) module_intro → the slide immediately before content (matches all known templates) about_us → slide 1 (always — the second slide) syllabus → first slide between about_us and module_intro that has "syllabus" in its title text, else slide 2 (default) """ n = len(prs.slides) last_idx = n - 1 cover_idx = 0 about_us_idx = min(1, last_idx) module_intro_idx = max(content_idx - 1, 0) ending_idx = last_idx # Find syllabus by title text syllabus_idx = 2 for i in range(about_us_idx + 1, module_intro_idx): slide = prs.slides[i] for shape in slide.shapes: try: txt = shape.text_frame.text.strip().lower() except Exception: continue if "syllabus" in txt or "course outline" in txt: syllabus_idx = i break else: continue break return { "cover": cover_idx, "about_us": about_us_idx, "syllabus": syllabus_idx, "module_intro": module_intro_idx, "content": content_idx, "ending": ending_idx, } # ── Content slide auto-detection ────────────────────────────────────────────── def _find_content_slide_index(prs) -> int: """ Score each slide by how well it matches a standard content slide: • wide title + wide body just below it • a chrome top banner (full-width auto-shape near y=0) is the single strongest signal that this is the LAYOUT slide vs a populated example slide • avoid slides whose body looks like real course content (long sentences, paragraph-style) — those are example pages the designer left in, not the template layout Returns the best-matching index (fallback: min(4, last_idx)). """ slide_w = prs.slide_width / _EMU best_score = -1 best_idx = min(4, len(prs.slides) - 1) for i, slide in enumerate(prs.slides): wide_boxes = [] has_top_banner = False has_bottom_wordmark = False for shape in slide.shapes: try: x = shape.left / _EMU y = shape.top / _EMU cx = shape.width / _EMU cy = shape.height / _EMU except Exception: continue try: txt = shape.text_frame.text.strip() except Exception: txt = "" stype = _shape_type_str(shape) # Top banner: a full-width auto-shape near the top of the slide if stype == "auto_shape" and cx > slide_w * 0.9 and y < 0.3 and cy > 0.4: has_top_banner = True # Bottom wordmark: a small text near bottom carrying the brand if stype == "text_box" and y > 6.3 and _is_logo_text(txt): has_bottom_wordmark = True if stype == "text_box" and cx > slide_w * 0.45 and not _is_logo_text(txt): wide_boxes.append({"x": x, "y": y, "cx": cx, "cy": cy, "txt": txt}) if len(wide_boxes) < 2: continue wide_boxes.sort(key=lambda r: r["y"]) title = wide_boxes[0] below = [r for r in wide_boxes if r["y"] > title["y"] + 0.1] if not below: continue body = max(below, key=lambda r: r["cx"] * r["cy"]) score = 0 if title["cx"] > slide_w * 0.70: score += 2 if body["cx"] > slide_w * 0.70: score += 3 if body["y"] - (title["y"] + title["cy"]) < 0.8: score += 1 if body["cy"] > 1.0: score += 1 # NEW signals — chrome decorations strongly imply this is the # template's content LAYOUT slide rather than a populated example. if has_top_banner: score += 5 if has_bottom_wordmark: score += 2 # Penalty: if the body text looks like real course content # (long paragraph-style sentences), this is likely a populated # example slide left over from the trainer's previous course. body_txt = body.get("txt", "") if len(body_txt) > 60 and body_txt.count(" ") > 8: score -= 3 if score > best_score: best_score = score best_idx = i return best_idx # ── Layout type detection ────────────────────────────────────────────────────── def _detect_layout_type(title_info: dict | None, body_info: dict | None, slide_w: float) -> str: """ Classify the template content slide layout. Returns: "top_title" | "left_panel" | "split_right" """ if not title_info or not body_info: return "top_title" t_cx = title_info["cx"] b_x = body_info["x"] # Wide title + body starting from left margin → standard top-title layout if t_cx > slide_w * 0.70 and b_x < slide_w * 0.15: return "top_title" # Body is on the right half of the slide if b_x > slide_w * 0.40: return "split_right" # Title is a narrow left-panel element if t_cx < slide_w * 0.50 and title_info["x"] < slide_w * 0.10: return "left_panel" return "top_title" # ── Content zone computation ─────────────────────────────────────────────────── def _compute_content_zone( title_info: dict | None, body_info: dict | None, layout_type: str, slide_w: float, slide_h: float, ) -> dict: """ Compute the usable rectangle where generated content (text, infographics, intro text) should be placed. For top_title: starts just below the title, spans full usable width. For split_right/left_panel: span full width starting below title — the body placeholder is on the right in these templates, but our generated content should still use the full slide width. """ chrome_floor = slide_h - 0.9 # leave ~0.9 in at bottom for watermark/chrome title_bot = (title_info["y"] + title_info["cy"]) if title_info else 1.5 if layout_type == "top_title": margin = body_info["x"] if body_info else 0.66 return { "x": round(margin, 3), "y": round(title_bot, 3), "cx": round(slide_w - margin * 2, 3), "cy": round(max(chrome_floor - title_bot, 2.0), 3), } # split_right or left_panel: body placeholder is off-centre, # but generated content should span the slide from a sensible left margin. margin = 0.66 return { "x": round(margin, 3), "y": round(title_bot, 3), "cx": round(slide_w - margin * 2, 3), "cy": round(max(chrome_floor - title_bot, 2.0), 3), } # ── Brand colour detection ──────────────────────────────────────────────────── def _detect_brand_color(prs) -> str: from collections import Counter counts: Counter = Counter() for slide in prs.slides: for shape in slide.shapes: try: fill = shape.fill if fill.type is None: continue rgb = str(fill.fore_color.rgb) r, g, b = int(rgb[0:2], 16), int(rgb[2:4], 16), int(rgb[4:6], 16) brightness = (r + g + b) / 3 saturation = max(r, g, b) - min(r, g, b) if brightness > 230 or brightness < 25: continue if saturation < 30: continue counts[rgb.upper()] += 1 except Exception: continue if counts: return counts.most_common(1)[0][0] return "44318D" # ── Content slide analysis ──────────────────────────────────────────────────── def _analyse_content_slide(slide, dna: dict) -> None: shapes = list(slide.shapes) slide_w = dna["slide_w_in"] slide_h = dna["slide_h_in"] raw: list[dict] = [] for shape in shapes: try: x = shape.left / _EMU y = shape.top / _EMU cx = shape.width / _EMU cy = shape.height / _EMU except Exception: continue stype = _shape_type_str(shape) txt = _shape_text(shape) raw.append({"shape": shape, "stype": stype, "x": x, "y": y, "cx": cx, "cy": cy, "area": cx * cy, "text": txt}) if not raw: return # ── Title ────────────────────────────────────────────────────────────────── text_boxes = [ r for r in raw if r["stype"] == "text_box" and r["cx"] > slide_w * 0.45 and not _is_logo_text(r["text"]) and r["text"].strip() ] title_info = None if text_boxes: title_r = min(text_boxes, key=lambda r: r["y"]) title_info = title_r font_pt, bold, color, font_family = _extract_font_info(title_r["shape"]) dna["title"] = { "x": round(title_r["x"], 3), "y": round(title_r["y"], 3), "cx": round(title_r["cx"], 3), "cy": round(title_r["cy"], 3), "font_pt": font_pt, "bold": bold, "color": color, "font_family": font_family, } # Also surface the brand font at DNA root so any builder (including # ones that don't operate on the title slot directly — e.g. _build_quiz # which clones the QUIZ slide's title placeholder, not the content # slide's) can force-apply it to keep the deck visually uniform. if font_family: dna["brand_font"] = font_family # ── Body ─────────────────────────────────────────────────────────────────── title_y = title_info["y"] if title_info else 0.0 body_candidates = [ r for r in raw if r["stype"] == "text_box" and r["y"] > title_y + 0.1 and r["cx"] > slide_w * 0.40 and not _is_logo_text(r["text"]) ] body_info = None if body_candidates: body_r = max(body_candidates, key=lambda r: r["area"]) body_info = body_r dna["body"] = { "x": round(body_r["x"], 3), "y": round(body_r["y"], 3), "cx": round(body_r["cx"], 3), "cy": round(body_r["cy"], 3), } # ── Layout type + content zone ───────────────────────────────────────────── layout_type = _detect_layout_type(title_info, body_info, slide_w) dna["layout_type"] = layout_type dna["content_zone"] = _compute_content_zone( title_info, body_info, layout_type, slide_w, slide_h ) # ── Derived zones (lower_zone, illus) ────────────────────────────────────── cz = dna["content_zone"] cz_x, cz_y, cz_cx, cz_cy = cz["x"], cz["y"], cz["cx"], cz["cy"] lz_y = round(cz_y + cz_cy * 0.40, 2) lz_bot = round(slide_h - 0.9, 2) dna["lower_zone"] = { "x": round(cz_x, 3), "y": lz_y, "cx": cz_cx, "cy": round(max(lz_bot - lz_y, 2.0), 2), } max_text_cx = round(slide_w * 0.55, 2) illus_gap = 0.3 illus_x = round(cz_x + min(cz_cx, max_text_cx) + illus_gap, 2) illus_cx = round(slide_w - illus_x - 0.1, 2) illus_cy = round(min(cz_cy, slide_w * 0.37), 2) dna["illus"] = { "x": illus_x, "y": round(max(cz_y, 1.2), 2), "cx": max(illus_cx, 3.5), "cy": illus_cy, } # Narrow body cx so it doesn't overlap the illustration zone dna["body"]["cx"] = round(min(dna["body"]["cx"], max_text_cx), 3) # ── Chrome shapes ────────────────────────────────────────────────────────── content_ids = set() if title_info: content_ids.add(id(title_info["shape"])) if body_info: content_ids.add(id(body_info["shape"])) chrome_entries = [] for r in raw: if id(r["shape"]) in content_ids: continue role = _classify_chrome_role(r, slide_w, slide_h) if role is None: continue try: from lxml import etree xml_bytes = etree.tostring(r["shape"]._element) chrome_entries.append({ "role": role, "xml": xml_bytes, "tag": r["shape"]._element.tag.split("}")[-1] if "}" in r["shape"]._element.tag else r["shape"]._element.tag, }) except Exception: continue dna["chrome"] = chrome_entries # ── One-shot LLM manifest ───────────────────────────────────────────────────── def _manifest_path(tmpl_path: str) -> Path: p = Path(tmpl_path) return p.parent / (p.stem + "_manifest.json") def _load_or_validate_manifest(tmpl_path: str, dna: dict, api_key: str) -> None: """ Load the cached manifest if it exists, otherwise call the LLM to validate and correct the geometric analysis, then save the manifest. Mutates dna in place. """ mpath = _manifest_path(tmpl_path) if mpath.exists(): try: with open(mpath, encoding="utf-8") as f: saved = json.load(f) _apply_manifest(dna, saved) print(f" [manifest] loaded {mpath.name}") return except Exception as exc: print(f" [warn] manifest load failed ({exc!r}), re-validating") # Build shape description for LLM desc = _describe_template_shapes(tmpl_path, dna) if not desc: return try: import anthropic, re client = anthropic.Anthropic(api_key=api_key) prompt = _build_validation_prompt(desc, dna) resp = client.messages.create( model="claude-haiku-4-5-20251001", max_tokens=1024, messages=[{"role": "user", "content": prompt}], ) raw = resp.content[0].text.strip() m = re.search(r'\{.*\}', raw, re.DOTALL) if m: result = json.loads(m.group(0)) _apply_manifest(dna, result) # Persist mpath.write_text(json.dumps(result, indent=2), encoding="utf-8") print(f" [manifest] created {mpath.name} (LLM validated)") except Exception as exc: print(f" [warn] LLM manifest validation failed ({exc!r}) — geometric analysis used") def _describe_template_shapes(tmpl_path: str, dna: dict) -> str: """Build a plain-text description of all slides + the detected content slide.""" try: from pptx import Presentation prs = Presentation(tmpl_path) slide_w = dna["slide_w_in"] slide_h = dna["slide_h_in"] n_slides = len(prs.slides) lines = [f"Template has {n_slides} slides ({slide_w:.2f}×{slide_h:.2f} in)."] lines.append("Summary of each slide (1st text + shape counts):") for i, slide in enumerate(prs.slides): first_text = "" sp_count = pic_count = grp_count = txt_count = 0 for shape in slide.shapes: stype = _shape_type_str(shape) if stype == "text_box": txt_count += 1 if not first_text: try: t = shape.text_frame.text.strip() if t and not _is_logo_text(t): first_text = t[:50] except Exception: pass elif stype == "picture": pic_count += 1 elif stype == "group": grp_count += 1 else: sp_count += 1 lines.append(f" slide {i}: txt={txt_count} pic={pic_count} grp={grp_count} sh={sp_count} " f"first_text={repr(first_text)}") # Detail on the detected content slide content_idx = dna["slide_indices"]["content"] lines.append(f"\nDETECTED CONTENT SLIDE (index {content_idx}) shapes:") for shape in prs.slides[content_idx].shapes: try: x = shape.left / _EMU; y = shape.top / _EMU cx = shape.width / _EMU; cy = shape.height / _EMU except Exception: continue stype = _shape_type_str(shape) txt = _shape_text(shape) lines.append(f" {stype:10s} x={x:.2f} y={y:.2f} cx={cx:.2f} cy={cy:.2f} {repr(txt[:40])}") lines.append(f"\nGeometric analysis:") lines.append(f" layout_type: {dna['layout_type']}") cz = dna['content_zone'] lines.append(f" content_zone: x={cz['x']} y={cz['y']} cx={cz['cx']} cy={cz['cy']}") iz = dna['illus'] lines.append(f" image_zone: x={iz['x']} y={iz['y']} cx={iz['cx']} cy={iz['cy']}") si = dna['slide_indices'] lines.append(f" slide_roles: cover={si['cover']} about_us={si['about_us']} " f"syllabus={si['syllabus']} module_intro={si['module_intro']} " f"content={si['content']} ending={si['ending']}") return "\n".join(lines) except Exception: return "" def _build_validation_prompt(desc: str, dna: dict) -> str: return f"""You are validating the layout zones of a PowerPoint template slide for automated content generation. {desc} TASK: Return the correct layout zones AND slide-role mapping. ZONES: - layout_type: "top_title" if title is wide (>70% slide width) at the top; "left_panel" if title is a narrow left column; "split_right" if body is on the right half. - content_zone: the FULL usable rectangle from LEFT margin to RIGHT margin, just below the title. cx should be (slide_width - left_margin*2), typically 11–12.5 in. This represents the FULL width regardless of whether an image is shown — narrowing for images is computed downstream. - image_zone: right-side area for an illustration image. Typically x starts at 55–65% of slide width. SLIDE ROLES: identify which slide index in the template is used for each role. The system clones these slides verbatim, so picking the wrong index leaks template text. - cover: title-only intro slide (usually slide 0) - about_us: marketing/about page with logo and tagline - syllabus: a slide listing the module/chapter outline - module_intro: introduces a module — image + bullets, usually image left, narrow bullets right - content: the standard content slide — WIDE title + WIDE body text (>70% width both) - ending: contact/thank-you page (usually the last slide) Note: some templates have multiple syllabus pages or extra content slides. Pick the FIRST instance. Return ONLY valid JSON (no markdown, no explanation): {{ "layout_type": "{dna['layout_type']}", "content_zone": {{"x": {dna['content_zone']['x']}, "y": {dna['content_zone']['y']}, "cx": {dna['content_zone']['cx']}, "cy": {dna['content_zone']['cy']}}}, "image_zone": {{"x": {dna['illus']['x']}, "y": {dna['illus']['y']}, "cx": {dna['illus']['cx']}, "cy": {dna['illus']['cy']}}}, "slide_indices": {{"cover": {dna['slide_indices']['cover']}, "about_us": {dna['slide_indices']['about_us']}, "syllabus": {dna['slide_indices']['syllabus']}, "module_intro": {dna['slide_indices']['module_intro']}, "content": {dna['slide_indices']['content']}, "ending": {dna['slide_indices']['ending']}}} }}""" def _apply_manifest(dna: dict, manifest: dict) -> None: """Merge validated manifest data into dna.""" if "layout_type" in manifest: dna["layout_type"] = manifest["layout_type"] if "content_zone" in manifest: dna["content_zone"] = manifest["content_zone"] if "image_zone" in manifest and manifest["image_zone"]: dna["illus"] = manifest["image_zone"] if "slide_indices" in manifest: dna["slide_indices"].update(manifest["slide_indices"]) # ── Shape classification helpers ────────────────────────────────────────────── def _classify_chrome_role(r: dict, slide_w: float, slide_h: float) -> str | None: stype = r["stype"] x, y, cx, cy = r["x"], r["y"], r["cx"], r["cy"] if stype == "text_box" and _is_logo_text(r["text"]): return "watermark" if stype == "picture": if cx < 2.5: in_top = y < 1.5 in_bottom = (y + cy) > slide_h - 2.0 in_left = x < 1.5 in_right = (x + cx) > slide_w - 2.0 if (in_top or in_bottom) and (in_left or in_right): return "corner_img" return None if stype in ("auto_shape", "freeform"): area = cx * cy if area > slide_w * slide_h * 0.5: return None if x <= 0.1 and y <= 0.1 and cx < slide_w * 0.6: return "accent" if (x + cx) > slide_w - 2.5 and (y + cy) > slide_h - 2.0: return "accent" if area < 5.0: return "accent" return None def _shape_type_str(shape) -> str: try: t = int(shape.shape_type) except Exception: return "other" return {1: "auto_shape", 5: "freeform", 6: "group", 13: "picture", 14: "text_box", 17: "text_box", 19: "table"}.get(t, "other") def _shape_text(shape) -> str: try: return shape.text_frame.text.strip() except Exception: return "" def _is_logo_text(text: str) -> bool: t = text.lower() return any(tok in t for tok in _LOGO_TOKENS) def _extract_font_info(shape) -> tuple[int, bool, str, str]: """ Return (font_pt, bold, hex_color, font_family) for the first run found in shape's text. font_family falls back to "" if the run inherits the typeface from the layout/master. """ try: tf = shape.text_frame for para in tf.paragraphs: for run in para.runs: sz = run.font.size bold = run.font.bold or False color = "000000" try: color = str(run.font.color.rgb) except Exception: pass font_family = "" try: if run.font.name: font_family = run.font.name except Exception: pass pt = int(sz / 12700) if sz else 36 return pt, bold, color, font_family except Exception: pass return 36, True, "000000", "" # ── Public helpers ──────────────────────────────────────────────────────────── def apply_chrome(dna: dict, dest_slide) -> None: from lxml import etree spTree = dest_slide.shapes._spTree for entry in dna.get("chrome", []): try: el = etree.fromstring(entry["xml"]) spTree.append(el) except Exception as exc: print(f" [warn] apply_chrome: could not copy {entry.get('role')} shape: {exc!r}") def body_cx_for_text_only(dna: dict, has_image: bool = True) -> float: """ Return the body box width for a text slide. has_image=True → narrowed to leave room for the illustration on the right. has_image=False → full content_zone width (left margin to right margin). """ cz = dna.get("content_zone") if has_image: illus = dna.get("illus") if illus and cz: # Text occupies content_zone.x to (image_x - gap) text_right = illus["x"] - 0.25 return round(max(text_right - cz["x"], 5.0), 3) return dna["body"]["cx"] # fallback: raw placeholder width # Full-width: use the content_zone width if cz: return cz["cx"] return round(dna["slide_w_in"] - dna["body"]["x"] * 2, 3) def title_cx_for_layout(dna: dict, has_right_content: bool = True) -> float: """ Return the title slot width. has_right_content=True → keep template-default title width (~7.25in so the title doesn't overlap a right-side image/infographic). has_right_content=False → extend the title across most of the slide so it can render at 32-36pt single-line instead of shrinking into the narrow template default. """ default_cx = float(dna.get("title", {}).get("cx", 7.25)) if has_right_content: return default_cx slide_w = float(dna.get("slide_w_in", 13.33)) title_x = float(dna.get("title", {}).get("x", 0.66)) # Reserve ~1.1in on the right for the standard corner decoration. wide = slide_w - title_x - 1.1 return round(max(default_cx, wide), 3)