{"path": "scripts/check-briefs.py", "filename": "check-briefs.py", "size_bytes": 128491, "ext": ".py", "content": "#!/usr/bin/env python3\n\"\"\"\nShadow Dynamics — unified QUALITY runner.\n\nSingle source of truth for all editorial/structural checks against a brief\nHTML. Usable as:\n\n  CLI:    python3 scripts/check-briefs.py <file-or-glob>\n  Lib:    from check_briefs import run_checks\n  HTTP:   POST /quality-check {\"htmlContent\": \"...\"}  (brief-saver)\n\nNOTE — module caching: brief-saver.py imports this file once and caches the\nmodule in sys.modules['sd_check_briefs'] for the lifetime of the process.\nEditing this file does NOT affect the HTTP endpoint until the service is\nrestarted: `sudo systemctl restart brief-saver`. The CLI re-imports per\ninvocation, so `python3 scripts/check-briefs.py` always reflects the\non-disk version. Test battery (`scripts/test-system.py`) also runs CLI-side,\nso 72/72 passing does not prove the live API picked up the changes.\n\nEach check returns either None (passed) or a string (failure reason).\nChecks are categorised:\n  - BLOCKERS: hard fails — would refuse publication\n  - WARNINGS: soft fails — logged for review\n\nVeracity (numerical-claim correctness) is OUT OF SCOPE for this runner;\nthat requires an LLM-judge or live web verification. The runner only\ncatches structural/editorial regressions and known patterns observed in\nprior failure modes.\n\nWhen a check fails in production (called from n8n), the runner appends to\n/var/log/sd-quality-failures.log so failure modes can be reviewed and\nthe prompt template iterated. That log is the input side of the feedback\nloop.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport argparse\nimport glob\nimport json\nimport os\nimport re\nimport sys\nfrom dataclasses import dataclass, asdict\nfrom datetime import date, datetime, timezone\nfrom typing import Callable\n\n# ── BLOCKERS ─────────────────────────────────────────────────────────────────\n\ndef check_scaffold_pseudo_citations(html: str) -> str | None:\n    \"\"\"True scaffold leaks: bare markers without content, or pseudo-pipe table breaks.\n\n    Content-bearing markers like [WEB: FT] or [INFERENCE: based on X+Y] are\n    intentional citations per EDITORIAL-CITE-DETAIL-01 and must remain in\n    output. This check only blocks markers that are clearly unfilled templates\n    or markers broken across HTML cells.\n    \"\"\"\n    patterns = [\n        # Bare markers — model emitted the template label but no content.\n        (r'\\[WEB\\s*\\]|\\[WEB:\\s*\\]', 'bare_web_marker'),\n        (r'\\[INFERENCE\\s*\\]|\\[INFERENCE:\\s*\\]', 'bare_inference_marker'),\n        (r'\\[POLL\\s*\\]|\\[POLL:\\s*\\]', 'bare_poll_marker'),\n        # Pseudo-pipe form that breaks tables when rendered:\n        (r'\\[\\w+\\s*\\\\?</td><td>', 'table_broken_citation'),\n    ]\n    hits = [name for rx, name in patterns if re.search(rx, html)]\n    return f'scaffold pseudo-citations: {\", \".join(hits)}' if hits else None\n\n\ndef check_result_label_duplicate(html: str) -> str | None:\n    \"\"\"RESULT/RESULTADO duplicate prefix from prompt template.\"\"\"\n    if re.search(r'RESULT:\\s*RESULT(?:ADO)?', html):\n        return 'duplicated RESULT/RESULTADO label'\n    return None\n\n\ndef check_preflight_section_leak(html: str) -> str | None:\n    \"\"\"Pre-flight section §0 should never reach published HTML.\"\"\"\n    if re.search(r'PRE-FLIGHT VERIFICATION|VERIFICACI[ÓO]N PREVIA', html):\n        return 'pre-flight verification section leaked'\n    return None\n\n\ndef check_event_date_leak(html: str) -> str | None:\n    \"\"\"`EVENT DATE:` and `FORMAT DECISION` markers from prompt scaffolding.\"\"\"\n    hits = []\n    if re.search(r'EVENT DATE:\\s', html):\n        hits.append('EVENT_DATE')\n    if re.search(r'FORMAT DECISION', html):\n        hits.append('FORMAT_DECISION')\n    return f'prompt scaffolding leaked: {\", \".join(hits)}' if hits else None\n\n\ndef check_inline_paywall(html: str) -> str | None:\n    \"\"\"Mid-content paywall divs — distinct from the legitimate footer CTA.\"\"\"\n    if re.search(\n        r'<div class=\"cta-box\"[^>]*style=\"margin:32px 0 0\"',\n        html,\n    ):\n        return 'inline paywall div mid-content'\n    if 'class=\"pw-full\"' in html or 'class=\"pw-btn\"' in html:\n        return 'pw-full/pw-btn paywall block present'\n    if re.search(r'filter:\\s*blur\\(\\s*5px', html):\n        return 'blur(5px) paywall overlay present'\n    if re.search(r'\\$19/(month|mes)\\b', html) and \\\n       'cta-box' in html.split('$19/', 1)[0][-200:]:\n        return 'inline price tag mid-content'\n    return None\n\n\ndef check_old_branding(html: str) -> str | None:\n    \"\"\"Brand renamed Forecaster → Intelligence on 2026-04-26.\"\"\"\n    if re.search(r'Shadow Dynamics Forecaster|SHADOW DYNAMICS FORECASTER',\n                 html):\n        return 'old branding \"Forecaster\" present'\n    return None\n\n\ndef check_about_scaffold_label_list(html: str) -> str | None:\n    \"\"\"About-section sentence enumerating scaffold tags as a feature.\"\"\"\n    if re.search(\n        r'\\[WEF\\],\\s*\\[EURASIA\\],\\s*\\[POLL\\],\\s*\\[WEB\\],\\s*\\[INFERENCE\\]',\n        html,\n    ):\n        return 'about-section lists scaffold-source tags as feature'\n    return None\n\n\ndef check_hero_scaffold_pill(html: str) -> str | None:\n    \"\"\"Hero pill that surfaced scaffold markers as a UI element.\"\"\"\n    if re.search(r'\\[WEF\\]\\s*\\[EURASIA\\]\\s*\\[POLL\\]\\s*\\[INFERENCE\\]', html):\n        return 'hero pill displays scaffold markers'\n    return None\n\n\ndef check_section_count_parity(html: str) -> str | None:\n    \"\"\"EN and ES h2 sections should have the same count (±1 tolerance).\"\"\"\n    en = len(re.findall(\n        r'<h2[^>]*class=\"[^\"]*\\blang-en\\b[^\"]*\"', html\n    ))\n    es = len(re.findall(\n        r'<h2[^>]*class=\"[^\"]*\\blang-es\\b[^\"]*\"', html\n    ))\n    if en == 0 and es == 0:\n        # Maybe wrapped sections, not lang-class h2 — skip\n        return None\n    if abs(en - es) > 1:\n        return f'EN/ES section count mismatch: EN={en}, ES={es}'\n    return None\n\n\ndef check_unbalanced_tables(html: str) -> str | None:\n    \"\"\"Catch broken tables (e.g. from the pseudo-pipe artefact).\"\"\"\n    opens = html.count('<table')\n    closes = html.count('</table>')\n    tr_o = html.count('<tr')\n    tr_c = html.count('</tr>')\n    td_o = html.count('<td')\n    td_c = html.count('</td>')\n    issues = []\n    if opens != closes:\n        issues.append(f'table {opens}/{closes}')\n    if tr_o != tr_c:\n        issues.append(f'tr {tr_o}/{tr_c}')\n    if td_o != td_c:\n        issues.append(f'td {td_o}/{td_c}')\n    return f'unbalanced HTML: {\"; \".join(issues)}' if issues else None\n\n\n# ── Structural-integrity helpers (Brief 7 prep priorities item 4) ──\n# Origin: BACKLOG §ESTA SEMANA #8 item 4 (pre-generation hardening); was\n# \"design-only; not yet ticketed.\" Activated 2026-05-12 PM via\n# BRIER-DISCIPLINE-SET-APPLICATION-01 autonomous task path.\n\n_H3_INSIDE_P_RE = re.compile(\n    r'<p\\b[^>]*>(?:(?!</p>)[\\s\\S]){0,2000}?<h3\\b',\n    re.IGNORECASE,\n)\n\n_DARK_BG_BARE_STRONG_RE = re.compile(\n    # Match a container with dark background, then within ~3000 chars (without\n    # closing the container) find a <strong> that does NOT have inline color\n    # style override. Anchors: hex bg starting #0/#1 (slate/navy range),\n    # CSS var --slate, or class cover/site-footer/dark-bg.\n    r'(?:background\\s*:\\s*(?:#[01][0-9a-fA-F]{5}\\b|var\\(\\s*--slate\\s*\\))|'\n    r'<(?:div|section|footer)[^>]*class=\"[^\"]*\\b(?:cover|site-footer|dark-bg)\\b[^\"]*\")'\n    r'[^>]*>(?:(?!</(?:div|section|footer)>)[\\s\\S]){0,3000}?'\n    r'<strong(?![^>]*style=\"[^\"]*color)',\n    re.IGNORECASE,\n)\n\n\ndef _check_toc_sync_issues(html: str) -> list[str]:\n    \"\"\"Return list of TOC anchor href targets that don't exist as id= in the document.\n\n    Scoped to anchors that look TOC-shaped (path is fragment-only, target is\n    section/§-style identifier). Filters out external links + self-canonical.\n    \"\"\"\n    # Collect all id= values in the document\n    ids = set(re.findall(r'\\bid=\"([^\"]+)\"', html, re.IGNORECASE))\n    # Collect href=\"#xxx\" fragments\n    anchors = re.findall(r'<a\\s+[^>]*href=\"#([^\"]+)\"', html, re.IGNORECASE)\n    # Filter to TOC-shape (alphanumeric + dash; skip pure-numeric footnote anchors\n    # like #fn1 that may be defined elsewhere via different schemes)\n    toc_shaped = [a for a in anchors\n                  if re.fullmatch(r'[a-zA-Z][\\w\\-§]{1,80}', a)\n                  and not a.startswith('fn')\n                  and not a.startswith('cite')]\n    missing = [a for a in toc_shaped if a not in ids]\n    # Dedupe preserving order\n    seen: set[str] = set()\n    out: list[str] = []\n    for a in missing:\n        if a not in seen:\n            seen.add(a)\n            out.append(a)\n    return out\n\n\ndef _check_sections_inside_tab_full(html: str) -> list[str]:\n    \"\"\"Detect <section class=\"sources|disclaimer\"> placed inside <div id=\"tab-full\">.\n\n    Mirror of test_sources_disclaimer_outside_tab_full in scripts/test-system.py\n    but scoped to single-brief input. Surfaces during quality_check (before\n    promote), not only via test battery.\n    \"\"\"\n    m = re.search(r'<div id=\"tab-full\"[^>]*>', html)\n    if not m:\n        return []\n    pos = m.end()\n    depth = 1\n    end_of_tab_full = None\n    for tag in re.finditer(r'<(/?)div\\b', html[pos:]):\n        if tag.group(1) == '/':\n            depth -= 1\n            if depth == 0:\n                end_of_tab_full = pos + tag.end()\n                break\n        else:\n            depth += 1\n    if end_of_tab_full is None:\n        return []\n    issues: list[str] = []\n    for sec_class in ('sources', 'disclaimer'):\n        sec_pos = html.find(f'<section class=\"{sec_class}\"')\n        if sec_pos != -1 and sec_pos < end_of_tab_full:\n            issues.append(sec_class)\n    return issues\n\n\ndef check_html_structural_integrity(html: str) -> str | None:\n    \"\"\"Detect 4 classes of structural malformation that render unpredictably or hide content.\n\n    Origin: BACKLOG §ESTA SEMANA #8 (Brief 7 prep priorities) item 4 — Brief 6\n    surfaced sources-inside-tab-full + dark-bg-strong invisibility patterns;\n    Brief 5 surfaced h3-inside-p; TOC drift hit Brief 2 / 4 historically. This\n    check consolidates the family as a single brief-level WARN.\n\n    Failure modes:\n\n      1. h3-inside-p — block-level <h3> nested inside an open <p>; HTML invalid\n         per the content-model rules; renders unpredictably across browsers and\n         RSS readers.\n\n      2. dark-bg-bare-strong — <strong> inside a dark-background container\n         (style background:#0X/#1X, var(--slate), or class cover/site-footer/\n         dark-bg) without an inline color override. The brief CSS template's\n         `strong{color:var(--slate)}` rule (#1A1F2E) renders bold labels\n         invisible against dark navy/slate backgrounds. First hit cluster:\n         a424cea (Brief 5 revisions) + feeff61 (Brief 6 executive alert).\n\n      3. toc-sync-broken — TOC anchor href targets missing as id= in document.\n         Drift cause: section retitled but TOC entry not updated, or\n         vice-versa. Symptom: in-page navigation breaks silently.\n\n      4. sections-inside-tab-full — <section class=\"sources|disclaimer\"> placed\n         inside <div id=\"tab-full\">. CSS rule `.tab-content{display:none}` on\n         inactive tab hides everything inside; readers on the default Brief\n         tab never see Sources or the legal disclaimer. Twice-bitten on Spain\n         Blackout 2026-04-27. Test-battery analog:\n         test_sources_disclaimer_outside_tab_full.\n\n    WARNING level — calibrate against Brief 7+8 emissions before promoting to\n    BLOCKER per feedback_runner_calibrate_then_ratchet. Reports each issue\n    distinctly so per-mode false-positive rates calibrate independently.\n    \"\"\"\n    issues: list[str] = []\n\n    if _H3_INSIDE_P_RE.search(html):\n        issues.append('h3-inside-p')\n\n    if _DARK_BG_BARE_STRONG_RE.search(html):\n        issues.append('dark-bg-bare-strong')\n\n    toc_missing = _check_toc_sync_issues(html)\n    if toc_missing:\n        sample = toc_missing[:3]\n        more = f' (+{len(toc_missing) - 3} more)' if len(toc_missing) > 3 else ''\n        issues.append(f'toc-sync-broken: missing id= for {sample}{more}')\n\n    tab_issues = _check_sections_inside_tab_full(html)\n    if tab_issues:\n        issues.append(f'sections-inside-tab-full: {\", \".join(tab_issues)}')\n\n    if issues:\n        return f'{len(issues)} structural integrity issue(s): {\" || \".join(issues)}'\n    return None\n\n\n# ── Posterior-predictive runner checks (FORECASTING-DISCIPLINE-LESSONS-2026-05-12-01\n#    sub-item 3): runner predicts brief properties that SHOULD appear under Tier A\n#    rules, then measures actual brief output. Catches drift between prompt-encoded\n#    discipline and emitted briefs. Per Gelman posterior-predictive-check pattern. ──\n\n# Tolerant of SD header style drift across briefs (audit 2026-05-13):\n#   - Bare \"FORMAL PREDICTIONS\" (Briefs 1-2)\n#   - \"10. FORMAL PREDICTIONS\" / \"10. PREDICCIONES FORMALES\" (Brief 3 numeric prefix)\n#   - \"SECTION VII: FORMAL PREDICTIONS\" / \"SECCIÓN VII: PREDICCIONES FORMALES\" (Brief 5)\n#   - \"SECTION VIII: FORMAL PREDICTION\" singular + \"SECCIÓN VIII: PREDICCIÓN FORMAL\" (Brief 6)\n# Excludes \"HISTORIAL DE PREDICCIONES\" (track record, semantically distinct).\n# Bug history: prior regex required plural English-only with optional \"§\" prefix\n# only; silently bypassed Brief 5 (SECTION VII prefix) + Brief 6 (singular bilingual)\n# + Brief 3 (numeric prefix). E3/E5/E9 posterior-predictive checks did not fire on\n# those briefs at promote-time. Filed as RUNNER-FORMAL-PREDICTIONS-REGEX-BILINGUAL-01.\n_FORMAL_PREDICTIONS_SECTION_RE = re.compile(\n    r'<h2[^>]*>\\s*'\n    r'(?:§\\s*|'\n    r'SECTION\\s+[IVX]+\\s*:?\\s*|'\n    r'SECCI[ÓO]N\\s+[IVX]+\\s*:?\\s*|'\n    r'\\d+\\.\\s*'\n    r')?'\n    r'(?:FORMAL\\s+PREDICTION(?:S)?|PREDICCI[ÓO]N(?:ES)?\\s+FORMAL(?:ES)?)'\n    r'[^<]*</h2>(.*?)(?=<h2|<footer\\b|$)',\n    re.IGNORECASE | re.DOTALL,\n)\n\n\ndef check_prediction_e3_cluster_id_reference(html: str) -> str | None:\n    \"\"\"E3-cluster-ref (posterior-predictive): when a prediction includes an\n    \"Independence:\" sentence, that sentence should reference the cluster taxonomy\n    OR positively assert structural independence — not just be a generic boilerplate.\n\n    Per Tier A E3 rule + data/prediction_clusters.yaml 5 clusters identified in\n    audit §7. Catches \"Independence: this prediction is independent.\" class of\n    empty assertion that doesn't engage the cluster framework.\n    \"\"\"\n    m = _FORMAL_PREDICTIONS_SECTION_RE.search(html)\n    if not m:\n        return None\n    section = m.group(1)\n\n    ind_re = re.compile(\n        r'Independenc(?:e|ia)\\s*:\\s*([^.<]{5,400}\\.)',\n        re.IGNORECASE,\n    )\n    inds = ind_re.findall(section)\n    if not inds:\n        return None\n\n    issues = []\n    for i, ind_text in enumerate(inds, 1):\n        has_cluster_ref = bool(re.search(\n            r'\\bcluster[-_]?\\d|cluster[-_]?id|cluster\\s+\\w+|cluster-\\d-[a-z-]+',\n            ind_text,\n            re.IGNORECASE,\n        ))\n        has_structural_ind = bool(re.search(\n            r'structural(ly)?\\s+independ|independ\\w*\\s+from|no\\s+correlation|'\n            r'sin\\s+correlación|independencia\\s+estructural|distinct\\s+driver|'\n            r'unrelated\\s+upstream',\n            ind_text,\n            re.IGNORECASE,\n        ))\n        if not (has_cluster_ref or has_structural_ind):\n            issues.append(i)\n\n    if issues:\n        return (\n            f'E3 independence-cluster-ref: Independence sentence(s) at position(s) '\n            f'{issues} (1-indexed within Formal Predictions section) lack explicit '\n            f'cluster_id reference OR structural-independence assertion. Per Tier A '\n            f'E3 + data/prediction_clusters.yaml taxonomy, Independence sentences '\n            f'should engage the cluster framework, not state generic independence.'\n        )\n    return None\n\n\ndef check_prediction_e5_ternary_sum_100(html: str) -> str | None:\n    \"\"\"E5-ternary-sum (posterior-predictive): TERNARY-SCENARIO label triggers\n    expectation that 3 scenarios with explicit probabilities summing to 100% (±2%\n    tolerance for rounding) appear in the prediction block.\n\n    Per Tier A E5 rule. Catches \"TERNARY-SCENARIO: 40% / 35% / 20%\" (sums to 95%)\n    or \"TERNARY-SCENARIO: 50% / 30%\" (only 2 probabilities present) emission errors.\n    \"\"\"\n    m = _FORMAL_PREDICTIONS_SECTION_RE.search(html)\n    if not m:\n        return None\n    section = m.group(1)\n\n    ternary_re = re.compile(r'TERNARY[-\\s]SCENARIO', re.IGNORECASE)\n    ternary_matches = list(ternary_re.finditer(section))\n    if not ternary_matches:\n        return None\n\n    issues = []\n    for i, t_match in enumerate(ternary_matches, 1):\n        # Window: 2000 chars after the label (covers most prediction block sizes)\n        window = section[t_match.end():t_match.end() + 2000]\n        # Find probabilities; constrain to 1-99 to avoid matching footnote numbers\n        probs = re.findall(r'\\b(\\d{1,2})\\s*%', window)\n        if len(probs) < 3:\n            issues.append(f'ternary #{i}: only {len(probs)} probability tokens found within 2000-char window')\n        else:\n            three = [int(p) for p in probs[:3]]\n            s = sum(three)\n            if not (98 <= s <= 102):\n                issues.append(f'ternary #{i}: probabilities {three}% sum to {s}% (expected 100%±2)')\n\n    if issues:\n        return (\n            f'E5 ternary-sum-100: TERNARY-SCENARIO emission(s) violate sum-to-100 '\n            f'expectation: {\"; \".join(issues)}. Per Tier A E5 rule, scenario-decomposed '\n            f'predictions must have ≥2 mutually-exclusive scenarios with probabilities '\n            f'summing to 100% (±2% rounding tolerance).'\n        )\n    return None\n\n\ndef check_prediction_e9_pos_threshold_numeric(html: str) -> str | None:\n    \"\"\"E9-POS-numeric (posterior-predictive): POS-THRESHOLD / POS-EVENT-DATE labels\n    trigger expectation that a numeric threshold (X% / $X / N units / over Y) appears\n    in proximity to the label, not narrative magnitude (\"substantial\" / \"large\").\n\n    Per Tier A E9 rule. Catches \"POS-THRESHOLD: substantial defence spending increase\"\n    (no number) vs the expected \"POS-THRESHOLD: ≥$400B annual defence procurement\".\n    \"\"\"\n    m = _FORMAL_PREDICTIONS_SECTION_RE.search(html)\n    if not m:\n        return None\n    section = m.group(1)\n\n    pos_re = re.compile(\n        r'POS[-\\s](?:THRESHOLD|EVENT[-\\s]DATE|EVENT)',\n        re.IGNORECASE,\n    )\n    pos_matches = list(pos_re.finditer(section))\n    if not pos_matches:\n        return None\n\n    numeric_threshold_re = re.compile(\n        r'(?:≥|>=|>|at\\s+least|exceeds?|over|greater\\s+than|más\\s+de|al\\s+menos)\\s*'\n        r'[\\$€£]?\\s*[\\d,]+(?:\\.\\d+)?\\s*(?:[%]|billion|million|trillion|bn|mn|tn|'\n        r'\\b[a-zA-Z]{1,30}\\b)?',\n        re.IGNORECASE,\n    )\n    issues = []\n    for i, p_match in enumerate(pos_matches, 1):\n        # Window: 1000 chars after label\n        window = section[p_match.end():p_match.end() + 1000]\n        if not numeric_threshold_re.search(window):\n            label = section[p_match.start():p_match.end()].strip()\n            issues.append(f'POS label #{i} (\"{label}\")')\n\n    if issues:\n        return (\n            f'E9 POS-numeric: POS-event prediction(s) lack explicit numeric threshold '\n            f'within 1000-char proximity to class label: {\"; \".join(issues)}. Per Tier A '\n            f'E9 rule, POS-event predictions must specify quantitative threshold (≥ N / '\n            f'over $X / exceeds N% / at least Y units), not narrative magnitude.'\n        )\n    return None\n\n\n# ── WARNINGS ─────────────────────────────────────────────────────────────────\n\ndef check_translation_body_parity(html: str) -> str | None:\n    \"\"\"ES word count should be within reasonable ratio of EN (typically 1.0-1.5).\"\"\"\n    en_blocks = re.findall(\n        r'<div[^>]*class=\"[^\"]*\\blang-en\\b[^\"]*\"[^>]*>(.*?)</div>',\n        html, re.DOTALL,\n    )\n    es_blocks = re.findall(\n        r'<div[^>]*class=\"[^\"]*\\blang-es\\b[^\"]*\"[^>]*>(.*?)</div>',\n        html, re.DOTALL,\n    )\n    en_text = ' '.join(re.sub(r'<[^>]+>', ' ', b) for b in en_blocks)\n    es_text = ' '.join(re.sub(r'<[^>]+>', ' ', b) for b in es_blocks)\n    en_w = len(en_text.split())\n    es_w = len(es_text.split())\n    if en_w == 0:\n        return None\n    ratio = es_w / en_w\n    if ratio < 0.85:\n        return (f'ES body shorter than EN: EN={en_w}w ES={es_w}w '\n                f'(ratio={ratio:.2f}, expected 0.95-1.5)')\n    if ratio > 1.7:\n        return (f'ES body unexpectedly long: EN={en_w}w ES={es_w}w '\n                f'(ratio={ratio:.2f}, expected 0.95-1.5)')\n    return None\n\n\ndef check_meta_tags_present(html: str) -> str | None:\n    \"\"\"OG / canonical / description must be in <head>.\"\"\"\n    missing = []\n    if 'property=\"og:title\"' not in html:\n        missing.append('og:title')\n    if 'property=\"og:description\"' not in html:\n        missing.append('og:description')\n    if 'rel=\"canonical\"' not in html:\n        missing.append('canonical')\n    if not re.search(r'<meta\\s+name=\"description\"', html):\n        missing.append('description')\n    return f'missing meta tags: {\", \".join(missing)}' if missing else None\n\n\ndef check_local_link_integrity(html: str, briefs_dir: str) -> str | None:\n    \"\"\"Local /briefs/SD_*.html links must point to existing files.\"\"\"\n    missing = []\n    seen = set()\n    for m in re.finditer(r'href=[\"\\'](/briefs/SD_\\d+_\\d+_[\\w]+\\.html)[\"\\']',\n                         html):\n        target = m.group(1)\n        if target in seen:\n            continue\n        seen.add(target)\n        local = os.path.join(briefs_dir, os.path.basename(target))\n        if not os.path.exists(local):\n            missing.append(target)\n    return f'broken local links: {\", \".join(missing[:3])}' if missing else None\n\n\ndef check_minimum_word_count(html: str) -> str | None:\n    \"\"\"Brief should be substantive — flag suspiciously short content.\"\"\"\n    visible = re.sub(r'<style.*?</style>|<script.*?</script>', '',\n                     html, flags=re.DOTALL)\n    text = re.sub(r'<[^>]+>', ' ', visible)\n    words = len(text.split())\n    if words < 1500:\n        return f'low word count: {words}w (typical brief 1500+)'\n    return None\n\n\n# Heuristic: significant numerical claims that should carry a citation.\n# The pattern catches: percentages above noise floor (>5%), dollar/euro\n# amounts in millions/billions, large round numbers, year-prefixed claims.\n# A \"citation marker\" is anything within 200 chars that looks like a\n# source attribution: bracketed source label, a hyperlink, a parenthetical\n# author/year, or a footnote anchor.\n\nNUMERICAL_CLAIM_RE = re.compile(\n    r'(?:'\n    # Percentages ≥5% (matches the prompt rule threshold; excludes 1-4%\n    # which the editorial standard does not require to be sourced).\n    # 5-9% or 10-99% or 100-999%.\n    r'\\b(?:[5-9]|\\d{2,3})(?:[.,]\\d+)?\\s*%(?!s)'\n    # Currency amounts ≥1 million (millions/billions/trillions in any form).\n    r'|[€$£]\\s*\\d+(?:[.,]\\d+)?\\s*(?:billion|million|trillion|bn|mn|tn|millones|miles de millones)\\b'\n    r'|\\b\\d+(?:[.,]\\d+)?\\s*(?:billion|million|trillion)\\s+(?:dollars|euros|USD|EUR)\\b'\n    # Year ranges (2014-2025 etc.) — useful for evaluating event windows.\n    r'|\\b(?:1[89]\\d{2}|20[0-4]\\d)\\b(?=\\s*[-–—]\\s*(?:1[89]\\d{2}|20[0-4]\\d))'\n    r')',\n    re.IGNORECASE,\n)\n\n# Source-organization names that count as \"cited in prose\" when they\n# appear within the proximity window. Editorial style favours\n# conversational citation (\"the IMF's April 2026 WEO identifies...\")\n# over bracket-formal (\"[IMF, WEO 2026]\"). The runner must accept both.\nSOURCE_ORG_NAMES = (\n    r'IMF|FMI|NATO|OTAN|OECD|OCDE|WEF|World\\s+Economic\\s+Forum|'\n    r'IEA|EIA|BIS|ECB|BCE|Federal\\s+Reserve|Fed|World\\s+Bank|Banco\\s+Mundial|'\n    r'Eurostat|UN|United\\s+Nations|ONU|EU|European\\s+(?:Commission|Council|Parliament|Union)|'\n    r'CE|Comisión\\s+Europea|Reuters|Bloomberg|Financial\\s+Times|FT|'\n    r'New\\s+York\\s+Times|NYT|Washington\\s+Post|Wall\\s+Street\\s+Journal|WSJ|'\n    r'Economist|Foreign\\s+Affairs|Foreign\\s+Policy|Politico|Le\\s+Monde|'\n    r'El\\s+País|S&P|Moody\\'?s|Fitch|REE|CNMC|ENTSO-E|TSMC|ASML|'\n    r'Tesla|Microsoft|Google|Apple|Meta|OpenAI|Anthropic|xAI|COSCO|'\n    r'Pew|Eurasia\\s+Group|RAND|Brookings|Atlantic\\s+Council|CSIS|Chatham\\s+House|'\n    r'CWA|Munich\\s+Security\\s+Conference|'\n    # Defense + critical-minerals research institutions (added 2026-05-02 retrofit Brief 5)\n    r'SIPRI|EDA|Agencia\\s+Europea\\s+de\\s+Defensa|IISS|MERICS|EIB|BEI|USGS|Lynas|EUR-Lex'\n)\n\nCITATION_NEAR_RE = re.compile(\n    r'(?:'\n    r'\\[[A-Z]{2,}[^\\]]{0,80}\\]'                # [WEF Global Risks Report 2026]\n    r'|\\([A-Z][a-zA-Z]+\\s+(?:et\\s+al\\.?,?\\s+)?\\d{4}\\)'  # (Smith et al. 2024)\n    r'|\\bhttps?://[^\\s<>\"]+'                   # bare URL\n    r'|<a\\s+[^>]*href=[\"\\']https?://'          # hyperlink\n    r'|\\b(?:source|fuente|según|per|via)\\s*:\\s*[A-Z]'  # \"Source: NATO...\"\n    r'|\\b(?:' + SOURCE_ORG_NAMES + r')\\b'      # prose mention of known source org\n    r')',\n    re.IGNORECASE,\n)\n\n\ndef check_numerical_citation_contract(html: str) -> str | None:\n    \"\"\"Heuristic: numerical claims should sit near a citation marker.\n\n    Removes <style>, <script>, and elements with KPI-like classes\n    (kpi-val, score, badge) that show summary numbers already cited\n    elsewhere in the body. Then for each remaining numerical claim,\n    checks ±250 chars of surrounding text for any citation-shaped token.\n\n    NB: Heuristic with known limits — false positives in dense tables\n    where one citation covers many figures. Reported as a WARNING for\n    human review, not a blocker. A high count (≥8) is the signal worth\n    investigating.\n    \"\"\"\n    # Strip non-prose containers\n    cleaned = re.sub(r'<style[^>]*>.*?</style>', ' ', html, flags=re.DOTALL)\n    cleaned = re.sub(r'<script[^>]*>.*?</script>', ' ', cleaned, flags=re.DOTALL)\n    # Remove elements that exist for UI summary, not editorial claim:\n    # KPI cards (.kpi, .kpi-val, .kpi-label), score chips (.score),\n    # and badges (.badge, .pill).\n    cleaned = re.sub(\n        r'<(?:span|div|td)[^>]*class=\"[^\"]*\\b(?:kpi|kpi-val|kpi-label|score|badge|pill|tag|sh|chart-title)\\b[^\"]*\"[^>]*>.*?</(?:span|div|td)>',\n        ' ', cleaned, flags=re.DOTALL,\n    )\n    text = re.sub(r'<[^>]+>', ' ', cleaned)\n    text = re.sub(r'&[a-z]+;', ' ', text)\n    text = re.sub(r'\\s+', ' ', text)\n\n    uncited: list[str] = []\n    for m in NUMERICAL_CLAIM_RE.finditer(text):\n        if len(uncited) >= 12:\n            break\n        window_start = max(0, m.start() - 250)\n        window_end = min(len(text), m.end() + 250)\n        window = text[window_start:window_end]\n        if not CITATION_NEAR_RE.search(window):\n            ctx = text[max(0, m.start() - 50):min(len(text), m.end() + 50)]\n            uncited.append(f'\"…{ctx.strip()}…\"')\n\n    # Threshold: 8 strikes ≈ enough to suspect systemic uncited claims\n    if len(uncited) >= 8:\n        return (f'{len(uncited)}+ numerical claims without nearby citation; '\n                f'sample: {\" | \".join(uncited[:3])}')\n    return None\n\n\nURL_RE = re.compile(r'href=[\"\\'](https?://[^\"\\'<>]+)[\"\\']')\n\n\n# ── L5: critical-fact registry ───────────────────────────────────────────────\n# Loaded lazily on first call. Set FACTS_REGISTRY_PATH env var to override.\n\n_FACTS_CACHE: dict | None = None\n\n\ndef _load_facts_registry() -> dict:\n    \"\"\"Load data/facts.yaml once. Returns {} if missing or yaml not installed.\"\"\"\n    global _FACTS_CACHE\n    if _FACTS_CACHE is not None:\n        return _FACTS_CACHE\n    try:\n        import yaml\n    except ImportError:\n        _FACTS_CACHE = {}\n        return _FACTS_CACHE\n    path = os.environ.get(\n        'FACTS_REGISTRY_PATH',\n        os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'facts.yaml'),\n    )\n    try:\n        with open(path, encoding='utf-8') as f:\n            data = yaml.safe_load(f) or {}\n    except OSError:\n        data = {}\n    _FACTS_CACHE = data if isinstance(data, dict) else {}\n    return _FACTS_CACHE\n\n\ndef _filter_registry_by_brand(registry: dict, brand: str | None) -> dict:\n    \"\"\"Filter facts.yaml-style registry by brand field. Default behavior:\n\n    - If brand is None: return all entries (back-compat — pre-FORK-PREP-GAP-1\n      callers without brand context get full registry).\n    - If brand is a specific value (e.g. 'shadow-dynamics' or 'clave'):\n      return entries whose `brand` field matches OR equals 'both'. Entries\n      with no `brand` field default to 'shadow-dynamics' (pre-2026-05-09\n      schema implicit assumption preserved).\n\n    FORK-PREP-GAP-1-DATA-BRAND-AWARE-01 (BACKLOG 2026-05-09): brand-aware\n    schema unblocks first Clave brief by ensuring SD-specific facts\n    (NATO 5%, IMF 60%, COSCO Hamburg 24.9%, etc.) don't fire on Clave\n    Hispanic-context briefs and vice versa.\n    \"\"\"\n    if brand is None:\n        return registry\n    filtered = {}\n    for fact_id, entry in registry.items():\n        if not isinstance(entry, dict):\n            continue\n        entry_brand = entry.get('brand', 'shadow-dynamics')\n        if entry_brand == brand or entry_brand == 'both':\n            filtered[fact_id] = entry\n    return filtered\n\n\ndef check_critical_facts(html: str, brand: str | None = None) -> str | None:\n    \"\"\"Match HTML against the curated registry of recurring fact errors.\n\n    Each registry entry has `forbidden: [{pattern, reason}, ...]`. A pattern\n    match (case-insensitive) is a warning — not a blocker, since false\n    positives are possible for narrow contexts. Use `[fact_id] reason …` in\n    the detail so the failure log clusters cleanly for the retrospective.\n\n    `brand` (optional) filters registry to entries matching the brief's brand\n    (or `both`). When None, all entries apply (back-compat; pre-FORK-PREP-GAP-1\n    callers preserved). Derived from brief filename by `_derive_brand_from_filename`.\n    \"\"\"\n    registry = _filter_registry_by_brand(_load_facts_registry(), brand)\n    if not registry:\n        return None\n    hits: list[str] = []\n    for fact_id, entry in registry.items():\n        if not isinstance(entry, dict):\n            continue\n        for rule in entry.get('forbidden') or []:\n            pat = rule.get('pattern')\n            reason = rule.get('reason', '')\n            context_negate = rule.get('context_negate')\n            if not pat:\n                continue\n            try:\n                m = re.search(pat, html, flags=re.IGNORECASE)\n                if not m:\n                    continue\n                if context_negate:\n                    win = 400\n                    s = max(0, m.start() - win)\n                    e = min(len(html), m.end() + win)\n                    if re.search(context_negate, html[s:e], flags=re.IGNORECASE):\n                        continue\n                hits.append(f'[{fact_id}] {reason}')\n                break  # one hit per fact is enough\n            except re.error:\n                continue\n    if hits:\n        return f'{len(hits)} fact(s) flagged: {\" || \".join(hits[:3])}'\n    return None\n\n\ndef check_magnitude_framing_consistency(html: str) -> str | None:\n    \"\"\"Detect anti-canonical magnitude framings against facts.yaml::magnitude_flips.\n\n    Catches errors like 'capacity runs 40% short of wartime' when canonical is\n    'capacity at 40% of wartime' — same number, opposite semantic. Per-claim\n    Layer 4 (Tavily) verification cannot catch this because both framings verify\n    against the same source numerically; the bug is cross-section coherence\n    against a canonical magnitude framing recorded in data/facts.yaml.\n\n    Schema: facts.yaml entries optionally carry a `magnitude_flips` list of\n    `{pattern, reason}` regex rules — same structure as `forbidden` but\n    semantically scoped to magnitude-flip errors (vs direct contradictions).\n\n    Origin: Brief 6 Critical Minerals pre-promote audit 2026-05-08 caught the\n    same fact (`ammunition_capacity_wartime`) framed canonically in 2 places\n    and anti-canonically in 2 places within the same document; runner missed\n    because the existing `forbidden` patterns target the contradiction\n    direction ('fully adequate'), not the magnitude flip ('40% short').\n    \"\"\"\n    registry = _load_facts_registry()\n    if not registry:\n        return None\n    hits: list[str] = []\n    for fact_id, entry in registry.items():\n        if not isinstance(entry, dict):\n            continue\n        for rule in entry.get('magnitude_flips') or []:\n            pat = rule.get('pattern')\n            reason = rule.get('reason', '')\n            if not pat:\n                continue\n            try:\n                if re.search(pat, html, flags=re.IGNORECASE):\n                    hits.append(f'[{fact_id}] {reason}')\n                    break  # one hit per fact is enough\n            except re.error:\n                continue\n    if hits:\n        return f'{len(hits)} magnitude flip(s): {\" || \".join(hits[:3])}'\n    return None\n\n\n# ── PATTERN RECOGNITION CITATIONS ────────────────────────────────────────────\n# Tracker: PATTERN-RECOGNITION-SYSTEM-01\n# Spec:    EDITORIAL-PATTERN-XREF-01 (Forecaster prompt) + data/patterns.yaml\n# Loaded lazily; respects PATTERNS_REGISTRY_PATH env override.\n\n_PATTERNS_CACHE: set[str] | None = None\n\n\ndef _load_pattern_ids() -> set[str]:\n    \"\"\"Return set of registered pattern_ids from data/patterns.yaml.\n    Empty set if missing or yaml unavailable (gracefully no-ops the check).\"\"\"\n    global _PATTERNS_CACHE\n    if _PATTERNS_CACHE is not None:\n        return _PATTERNS_CACHE\n    try:\n        import yaml\n    except ImportError:\n        _PATTERNS_CACHE = set()\n        return _PATTERNS_CACHE\n    path = os.environ.get(\n        'PATTERNS_REGISTRY_PATH',\n        os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'patterns.yaml'),\n    )\n    try:\n        with open(path, encoding='utf-8') as f:\n            data = yaml.safe_load(f) or {}\n    except OSError:\n        data = {}\n    ids: set[str] = set()\n    for entry in (data.get('patterns') or []):\n        pid = entry.get('pattern_id') if isinstance(entry, dict) else None\n        if pid:\n            ids.add(pid)\n    _PATTERNS_CACHE = ids\n    return _PATTERNS_CACHE\n\n\n_PATTERN_ID_RE = re.compile(r'\\b(PATTERN-[A-Z]+(?:-[A-Z]+)+)\\b')\n_BRIEF_FILE_RE = re.compile(r'\\b((?:SD|CL)_\\d{8}_\\d{4}_[A-Za-z_]+\\.html)\\b')\n\n\ndef check_pattern_citations(html: str, briefs_dir: str | None = None) -> str | None:\n    \"\"\"Validate §12 PATTERN RECOGNITION integrity (post EDITORIAL-PATTERN-XREF-01).\n\n    - Pattern_ids cited (PATTERN-NOUN-MECHANISM uppercase) must exist in\n      data/patterns.yaml.\n    - Brief filenames cited (SD_*.html / CL_*.html) must exist on disk.\n    - Briefs that use prose-only references (no formal markers) pass trivially.\n\n    WARNING level — false positives possible if a §12 reformulates a pattern\n    without naming a pattern_id, or references a brief by title rather than\n    filename. Editorial review remains the source of truth.\n    \"\"\"\n    m = re.search(r'(?is)PATTERN\\s+RECOGNITION.*?(?=<h[12]|</section|</body)', html)\n    if not m:\n        return None\n    section = m.group(0)\n\n    issues: list[str] = []\n\n    pattern_ids = set(_PATTERN_ID_RE.findall(section))\n    if pattern_ids:\n        registered = _load_pattern_ids()\n        if registered:\n            unknown = pattern_ids - registered\n            if unknown:\n                issues.append(f'unregistered pattern_ids: {sorted(unknown)}')\n\n    cited_files = set(_BRIEF_FILE_RE.findall(section))\n    if cited_files:\n        search_dirs = []\n        if briefs_dir:\n            search_dirs.append(briefs_dir)\n        search_dirs.extend([\n            '/root/n8n/local-files/briefs',\n            '/root/n8n/local-files/briefs-clave',\n        ])\n        missing = [\n            f for f in cited_files\n            if not any(os.path.exists(os.path.join(d, f)) for d in search_dirs)\n        ]\n        if missing:\n            issues.append(f'missing brief filenames: {sorted(missing)}')\n\n    if issues:\n        return '§12 ' + ' || '.join(issues)\n    return None\n\n\ndef check_url_integrity(html: str, *, network: bool = False,\n                        timeout: float = 4.0) -> str | None:\n    \"\"\"Optionally HEAD each cited URL to detect 4xx/5xx and DNS failures.\n\n    Skipped by default (network=False) so the runner stays offline-clean\n    in CI. Enable per-call by passing `--check-urls` to the CLI or\n    setting `network=True` in library calls.\n    \"\"\"\n    if not network:\n        return None\n    import urllib.request\n    import urllib.error\n    import socket\n\n    urls = list(set(URL_RE.findall(html)))\n    failures: list[str] = []\n    for url in urls:\n        try:\n            req = urllib.request.Request(url, method='HEAD',\n                                         headers={'User-Agent': 'sd-quality-check/1.0'})\n            with urllib.request.urlopen(req, timeout=timeout) as resp:\n                if resp.status >= 400:\n                    failures.append(f'{resp.status} {url}')\n        except urllib.error.HTTPError as e:\n            if e.code >= 400:\n                failures.append(f'{e.code} {url}')\n        except (urllib.error.URLError, socket.timeout, OSError) as e:\n            failures.append(f'unreachable {url} ({type(e).__name__})')\n\n    if failures:\n        return f'{len(failures)} broken citation URL(s): {\"; \".join(failures[:3])}'\n    return None\n\n\n# Footnote anchors introduced by EDITORIAL-FOOTNOTE-01.\n# Permissive on attribute order/class/whitespace.\nSUP_FN_RE = re.compile(\n    r'<sup\\b[^>]*>\\s*<a\\b[^>]*\\bhref=[\"\\']#fn-(\\d+)[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\nLI_FN_ID_RE = re.compile(\n    r'<li\\b[^>]*\\bid=[\"\\']fn-(\\d+)[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\n\n\ndef check_footnote_integrity(html: str) -> str | None:\n    \"\"\"Footnote anchor integrity (EDITORIAL-FOOTNOTE-01 F2).\n\n    When a brief uses footnotes, every <sup><a href=\"#fn-N\"> must resolve\n    to a <li id=\"fn-N\">, every <li id=\"fn-N\"> must be referenced ≥1 time,\n    numbering must be contiguous 1..max, and ids must be unique.\n\n    Brief with zero footnote markup → returns None (current corpus state).\n    \"\"\"\n    sup_refs = [int(m.group(1)) for m in SUP_FN_RE.finditer(html)]\n    li_ids = [int(m.group(1)) for m in LI_FN_ID_RE.finditer(html)]\n\n    if not sup_refs and not li_ids:\n        return None\n\n    issues: list[str] = []\n    sup_set = set(sup_refs)\n    li_set = set(li_ids)\n\n    orphan_refs = sorted(sup_set - li_set)\n    if orphan_refs:\n        issues.append(f'<sup> ref(s) without <li>: {orphan_refs[:5]}')\n\n    unreferenced = sorted(li_set - sup_set)\n    if unreferenced:\n        issues.append(f'<li> never referenced: {unreferenced[:5]}')\n\n    if li_ids:\n        max_n = max(li_ids)\n        gaps = sorted(set(range(1, max_n + 1)) - li_set)\n        if gaps:\n            issues.append(f'numbering not contiguous, missing: {gaps[:5]}')\n\n    if len(li_ids) != len(li_set):\n        from collections import Counter\n        dupes = sorted(n for n, c in Counter(li_ids).items() if c > 1)\n        issues.append(f'duplicate <li id=\"fn-N\">: {dupes[:5]}')\n\n    return '; '.join(issues) if issues else None\n\n\n_DATE_NUM_WORDS = {\n    'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5,\n    'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,\n    'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14,\n    'fifteen': 15, 'sixteen': 16, 'seventeen': 17, 'eighteen': 18,\n    'nineteen': 19, 'twenty': 20, 'thirty': 30, 'forty': 40, 'fifty': 50,\n    'uno': 1, 'una': 1, 'dos': 2, 'tres': 3, 'cuatro': 4, 'cinco': 5,\n    'seis': 6, 'siete': 7, 'ocho': 8, 'nueve': 9, 'diez': 10,\n    'once': 11, 'doce': 12, 'trece': 13, 'catorce': 14, 'quince': 15,\n    'dieciséis': 16, 'diecisiete': 17, 'dieciocho': 18,\n    'diecinueve': 19, 'veinte': 20, 'treinta': 30, 'cuarenta': 40,\n    'cincuenta': 50,\n}\n_DATE_UNIT_DAYS = {\n    'day': 1, 'days': 1, 'día': 1, 'días': 1,\n    'week': 7, 'weeks': 7, 'semana': 7, 'semanas': 7,\n    'month': 30, 'months': 30, 'mes': 30, 'meses': 30,\n    'year': 365, 'years': 365, 'año': 365, 'años': 365,\n}\n_DATE_EN_MONTHS = ['january', 'february', 'march', 'april', 'may', 'june',\n                   'july', 'august', 'september', 'october', 'november', 'december']\n_DATE_ES_MONTHS = ['enero', 'febrero', 'marzo', 'abril', 'mayo', 'junio',\n                   'julio', 'agosto', 'septiembre', 'octubre', 'noviembre', 'diciembre']\n_DATE_MONTH_INDEX = {\n    **{m: i + 1 for i, m in enumerate(_DATE_EN_MONTHS)},\n    **{m: i + 1 for i, m in enumerate(_DATE_ES_MONTHS)},\n}\n_DATE_CLAIM_RE = re.compile(\n    r'\\b(\\d{1,3}|' + '|'.join(sorted(_DATE_NUM_WORDS, key=len, reverse=True)) + r')\\s+'\n    r'(' + '|'.join(sorted(_DATE_UNIT_DAYS, key=len, reverse=True)) + r')\\s+'\n    r'(?:after|tras|después\\s+de)\\b',\n    re.IGNORECASE,\n)\n# DATE-ARITH Phase 2 — duration-to-end-date pattern (Brief 6 origin 2026-05-08).\n# Catches \"N-unit [noun] running to / expires / expira / hasta DATE\" — explicit\n# duration tied to an explicit end date. The Phase 1 _DATE_CLAIM_RE catches\n# \"N units after [event]\" but skips when window dates span >1.2× claim\n# (treated as unrelated dates). For the running-to construction the dates\n# ARE the claim's anchors; both directions of mismatch matter.\n# Brief 6 case: \"6-month suspension running to 2026-11-10\" + \"October 2025\n# truce\" in window. claimed=180d, actual≈400d, ratio≈2.2.\n_DATE_RUNNING_TO_RE = re.compile(\n    r'\\b(\\d{1,3})[\\s–—-]+'  # N + (space|hyphen|en-dash|em-dash)\n    r'(' + '|'.join(sorted(_DATE_UNIT_DAYS, key=len, reverse=True)) + r')\\b'\n    r'(?:\\s+[\\wÀ-ſ-]+){0,4}?\\s+'  # 0-4 filler words (suspension, pause, etc.)\n    r'(?:running\\s+to|expires?(?:\\s+on)?|que\\s+expira(?:r[áa])?\\s+(?:el\\s+)?|'\n    r'expira(?:r[áa])?\\s+(?:el\\s+)?|hasta\\s+(?:el\\s+)?|through\\s+|until\\s+(?:the\\s+)?|'\n    r'ending\\s+(?:on\\s+)?|ends?\\s+(?:on\\s+)?|finaliz(?:a|ar[áa])\\s+(?:el\\s+)?)'\n    r'\\s*(?:<[^>]+>\\s*)?(?:el\\s+)?'  # optional inline HTML or article\n    r'(\\d{4}-\\d{2}-\\d{2}|\\d{1,2}-\\d{1,2}-\\d{4})',  # ISO or DD-MM-YYYY\n    re.IGNORECASE,\n)\n# Month-year reference, e.g. \"October 2025\" / \"octubre de 2025\" / \"octubre 2025\"\n_DATE_MONTH_YEAR_RE = re.compile(\n    r'\\b(' + '|'.join(_DATE_MONTH_INDEX.keys()) + r')\\s+(?:de\\s+)?(\\d{4})\\b',\n    re.IGNORECASE,\n)\n_DATE_MD_Y_RE = re.compile(\n    r'\\b(' + '|'.join(_DATE_EN_MONTHS + _DATE_ES_MONTHS) + r')\\s+(\\d{1,2}),?\\s+(\\d{4})\\b',\n    re.IGNORECASE,\n)\n_DATE_DM_Y_RE = re.compile(\n    r'\\b(\\d{1,2})\\s+(?:de\\s+)?(' + '|'.join(_DATE_EN_MONTHS + _DATE_ES_MONTHS) + r')\\s+(?:de\\s+)?(\\d{4})\\b',\n    re.IGNORECASE,\n)\n\n\n_MDTOHTML_BLOCK_WRAP_RE = re.compile(\n    r'<p>\\s*</?(?:section|ol|ul|div|table|h[1-6])\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_mdtohtml_paragraph_wrap_block_tag(html: str) -> str | None:\n    \"\"\"Detect block-level HTML tags wrapped in <p> by mdToHTML.\n\n    The Forecaster's mdToHTML JS function in Format Output treats\n    blank-line-separated tokens as paragraphs. When the model emits raw\n    block-level HTML (<section>, <ol>, <ul>, <div>, <table>, <h1-6>) inside\n    a markdown section per EDITORIAL-FOOTNOTE-01 / sources instructions,\n    mdToHTML wraps those tags in <p> producing malformed markup like\n    `<p><section class=\"sources\"></p>` and `<p></ol></p>`.\n\n    Browser parsers tolerate this (auto-close the <p> when a block tag\n    opens), but the structure is wrong: accessibility tooling chokes,\n    parent-child relationships are broken, and CSS selectors that depend\n    on direct ancestry fail. Brief 5 European Defense shipped with this\n    pattern in its sources block (commit fe8c9e1 retroactive fix).\n\n    Inline tags (<strong>, <em>, <a>, <span>) are valid inside <p> and\n    NOT flagged. Only block-level tags trigger this check.\n\n    WARNING — every hit is a real markup bug, but only 1+ is needed for\n    a reader-visible problem (sources block, footnotes ol, etc.).\n\n    Cross-ref memory: feedback_mdtohtml_block_tag_paragraph_wrap.md\n    (3 fix paths; this check implements the cheapest — post-generation\n    detection. Patching mdToHTML or restructuring EDITORIAL_PROMPT are\n    the upstream fixes that would prevent the bug from being emitted).\n    \"\"\"\n    hits = list(_MDTOHTML_BLOCK_WRAP_RE.finditer(html))\n    if not hits:\n        return None\n    # Sample the first two contexts for the failure log\n    samples = []\n    for m in hits[:2]:\n        s = max(0, m.start() - 25)\n        e = min(len(html), m.end() + 50)\n        samples.append(html[s:e].replace('\\n', ' '))\n    return (f'{len(hits)} block-level HTML tag(s) wrapped in <p> by mdToHTML — '\n            f'malformed markup; sample: {\" | \".join(samples)}')\n\n\n# ── L01: investment-recommendation patterns (regulatory hygiene) ──────────────\n# Cross-ref: data/facts.yaml + EDITORIAL-LEGAL-01/02 + D5-LICENSING.\n# Started as WARNING; promote to BLOCKER once 1 week of clean runs across\n# briefs 5+ confirms the pattern set is calibrated.\n\n# Stock tickers with major exchange suffixes (Frankfurt, Milan, London, Paris,\n# Brussels, Madrid, Amsterdam, Frankfurt-Xetra, Toronto, Hong Kong, Helsinki,\n# Stockholm, Mexico, Copenhagen, Madrid, Vienna, US, NYSE, NASDAQ).\n_L01_TICKER_RE = re.compile(\n    r'\\b[A-Z]{1,5}\\.(?:DE|MI|L|PA|BR|MC|AS|FR|TO|HK|HE|ST|MX|CO|MA|VI|US|N|OQ)\\b'\n)\n\n# Equity-research vocabulary that signals an instrument-level recommendation.\n# Deliberately conservative — generic \"Verdict:\" and bare \"Buy\"/\"Sell\" excluded\n# because briefs use \"Verdict\" as editorial-conclusion structure, not equity rating.\n_L01_VERDICT_PATTERNS = (\n    r'\\bStrong fundamental case\\b',\n    r'\\b(?:asymmetric|structural)\\s+upside\\b',\n    r'\\bhigh[\\-\\s]conviction\\s+long\\b',\n    r'\\b(?:Outperform|Underperform|Overweight|Underweight)\\s+rating\\b',\n    r'\\b(?:Buy|Sell)\\s+rating\\b',\n    r'\\bHold\\s+with\\s+asymmetric\\b',\n)\n_L01_VERDICT_RE = re.compile('|'.join(_L01_VERDICT_PATTERNS), re.IGNORECASE)\n\n# Implicit instrument-level recommendations.\n_L01_IMPLICIT_PATTERNS = (\n    r'\\b(?:currently|presently)\\s+(?:underpriced|overpriced|underweight|overweight)\\b',\n    r'\\bbenefits\\s+from\\s+safe[\\-\\s]haven\\s+demand\\b',\n    r'\\btailwind\\s+for\\s+[A-Z][a-zA-Z]+\\b',  # \"tailwind for Rheinmetall\"\n    r'\\bis\\s+the\\s+\\w+\\s+play\\b',  # \"is the rearmament play\"\n)\n_L01_IMPLICIT_RE = re.compile('|'.join(_L01_IMPLICIT_PATTERNS), re.IGNORECASE)\n\n\ndef check_investment_recommendation_patterns(html: str) -> str | None:\n    \"\"\"Detect investment-recommendation patterns that require regulatory licensing.\n\n    Three categories:\n      1. Stock tickers with exchange suffix (e.g. RHM.DE, LDO.MI, BA.L)\n      2. Equity-research verdict vocabulary (Strong fundamental case, Outperform,\n         high-conviction long, asymmetric upside trigger, etc.)\n      3. Implicit instrument-level recommendations (currently underpriced,\n         tailwind for [Company], is the X play)\n\n    Per D5-LICENSING (operator decision pending): SD has no CNMV/EAF license.\n    Mitigation while editorial-only: keep analysis above the instrument level.\n    Sectors yes, mechanisms yes, predictions yes; tickers no, equity verdicts no.\n\n    WARNING level for the first week post-deployment. Promote to BLOCKER once\n    false-positive rate is calibrated across the 5+ existing briefs.\n    \"\"\"\n    hits = []\n    for m in _L01_TICKER_RE.finditer(html):\n        hits.append(f'ticker:{m.group(0)}')\n    for m in _L01_VERDICT_RE.finditer(html):\n        hits.append(f'verdict:{m.group(0)[:40]}')\n    for m in _L01_IMPLICIT_RE.finditer(html):\n        hits.append(f'implicit:{m.group(0)[:40]}')\n    if not hits:\n        return None\n    # Deduplicate while preserving order\n    seen = set()\n    uniq = [h for h in hits if not (h in seen or seen.add(h))]\n    return (f'{len(hits)} investment-recommendation pattern hit(s) — '\n            f'L01 regulatory-hygiene flag (D5-LICENSING pending); '\n            f'samples: {\", \".join(uniq[:6])}')\n\n\n# ── L03: listed-entity density (sector vs instrument boundary) ───────────────\n# Cross-ref EDITORIAL-LEGAL-02 L03 + D5-LICENSING. Where L01 catches explicit\n# tickers and verdict vocabulary, L03 catches the softer pattern: a listed\n# company named ≥3× and surrounded by financial-metric tokens (revenue,\n# EBITDA, target price, P/E, etc.). Even without a verdict, that combination\n# reads as instrument-level analysis. Mitigation while editorial-only:\n# rephrase at the sector/mechanism level. Catalogue lives at\n# `data/listed-entities.yaml` (chat-Claude proposes additions per role 2 in\n# EDITORIAL-LEGAL-06).\n\n_LISTED_ENTITIES_CACHE: list | None = None\n\n\ndef _load_listed_entities() -> list:\n    \"\"\"Load data/listed-entities.yaml once. Returns [] if missing or yaml not installed.\"\"\"\n    global _LISTED_ENTITIES_CACHE\n    if _LISTED_ENTITIES_CACHE is not None:\n        return _LISTED_ENTITIES_CACHE\n    try:\n        import yaml\n    except ImportError:\n        _LISTED_ENTITIES_CACHE = []\n        return _LISTED_ENTITIES_CACHE\n    path = os.environ.get(\n        'LISTED_ENTITIES_PATH',\n        os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'listed-entities.yaml'),\n    )\n    try:\n        with open(path, encoding='utf-8') as f:\n            data = yaml.safe_load(f) or []\n    except OSError:\n        data = []\n    if not isinstance(data, list):\n        data = []\n    _LISTED_ENTITIES_CACHE = data\n    return _LISTED_ENTITIES_CACHE\n\n\n# Instrument-level metric tokens (EN+ES). Distinct from L01 verdict\n# vocabulary AND from generic financial vocabulary used in sectoral analysis\n# (\"revenue\", \"valuation\", \"EBITDA\" alone are too broad — a brief about a\n# strategic-chokepoint company will mention revenue without recommending it).\n# These tokens require trailing context signalling specific-instrument\n# analysis: target price, share price, per-share metrics with a number,\n# trading-at-discount framings, etc.\n_L03_METRIC_RE = re.compile(\n    # Target/price-target with number context (EN+ES)\n    r'\\btarget\\s+price\\s+of\\s+[\\€\\$£]\\s*\\d+|'\n    r'\\bprecio\\s+objetivo\\s+(?:de\\s+)?[\\€\\$£]\\s*\\d+|'\n    r'\\b(?:price|precio)\\s+target\\s+of\\b|'\n    # Per-share metrics with number\n    r'\\bP/E\\s+(?:ratio\\s+)?(?:of|de|near|around|circa)\\s+\\d+|'\n    r'\\bPER\\s+(?:ratio\\s+)?(?:de|aproximado|cercano)\\s+\\d+|'\n    r'\\bEPS\\s+(?:of|de)\\s+[\\€\\$£]?\\s*\\d+|'\n    r'\\bBPA\\s+(?:de|aproximado)\\s+[\\€\\$£]?\\s*\\d+|'\n    # Trading-at-X framings\n    r'\\b(?:trading|cotizando?|cotiza)\\s+(?:at|en|a)\\s+[\\€\\$£]\\s*\\d+|'\n    r'\\b(?:trading|cotizando?|cotiza)\\s+(?:at|a)\\s+\\d+(?:\\.\\d+)?\\s*(?:×|x|times)\\s+earnings|'\n    # Dividend/market-cap with explicit value\n    r'\\bdividend\\s+yield\\s+(?:of|near|approximately)\\s+\\d+(?:\\.\\d+)?\\s*%|'\n    r'\\brentabilidad\\s+por\\s+dividendo\\s+(?:de|del)\\s+\\d+(?:\\.\\d+)?\\s*%|'\n    r'\\bmarket\\s+cap(?:italization)?\\s+of\\s+[\\€\\$£]\\s*\\d+|'\n    r'\\bcapitalizaci[óo]n\\s+burs[áa]til\\s+de\\s+[\\€\\$£]\\s*\\d+|'\n    # Quarterly/FY guidance + value\n    r'\\bQ[1-4]\\s+(?:revenue|earnings|results)\\s+(?:of|de|beat|missed|exceeded)|'\n    r'\\bFY\\s*20\\d{2}\\s+(?:revenue|earnings|guidance)\\s+(?:of|de)\\s+[\\€\\$£]\\s*\\d+|'\n    # Discount/premium-to-X framings (instrument-level)\n    r'\\btrading\\s+at\\s+\\d+(?:\\.\\d+)?\\s*%\\s+(?:discount|premium)|'\n    r'\\bcotiza\\s+con\\s+(?:un\\s+)?(?:descuento|prima)\\s+del?\\s+\\d+',\n    re.IGNORECASE,\n)\n\n\n# ── A5 / D5.5: sources density floor ─────────────────────────────────────────\n# Origin: Iberian Blackout audit (EL-04) found brief lists 5 sources where\n# baseline editorial density should be 12-13. Threshold ≥10 conservative\n# floor with margin. Counts distinct URLs across footnote bodies + a\n# dedicated sources block. Skips infrastructure URLs (CC license, brief\n# canonical, briefs index, substack, social) so floor reflects editorial\n# sourcing, not template plumbing.\n\n_A5_INFRA_URL_RE = re.compile(\n    r'(?:'\n    r'creativecommons\\.org'\n    r'|substack\\.com'\n    r'|shadowdynamics\\.ai/(?:about|methodology|terms|disclaimer|privacy|briefs/?$|index\\.html)?'\n    r'|clave\\.press/(?:about|methodology|terms|disclaimer|privacy)?'\n    r'|twitter\\.com/.*shadowdynamics'\n    r'|x\\.com/.*shadowdynamics'\n    r'|linkedin\\.com/(?:company/)?shadow-dynamics'\n    r')',\n    re.IGNORECASE,\n)\n\n\ndef check_sources_density_minimum(html: str) -> str | None:\n    \"\"\"Brief sources block + footnote bodies should carry ≥10 distinct URLs.\n\n    Sources counted:\n      - URLs inside <li id=\"fn-N\">…</li> footnote bodies\n      - URLs inside an explicit sources block (h2/h3 with text matching\n        \"Sources\" / \"Fuentes\" / \"Sources & methodology\") until the next h2/h3.\n\n    Excluded:\n      - Infrastructure URLs (CC license, brief canonical, briefs index,\n        substack, social) — see _A5_INFRA_URL_RE.\n      - URLs anywhere outside footnotes/sources block (e.g. inline\n        Pattern Recognition links don't count toward the editorial floor).\n\n    Conditional: brief with 0 footnotes AND no sources block → no-op\n    (legacy briefs without footnote markup; pre-F3 corpus). Cross-ref\n    EDITORIAL-LEGAL-06 A5 + EL-04 D5.5.\n\n    Threshold ≥10 (D5.5 target was 12-13; floor with margin).\n    \"\"\"\n    urls: set[str] = set()\n\n    # Collect URLs from footnote bodies.\n    for m in re.finditer(r'<li\\s+id=[\"\\']fn-\\d+[\"\\'][^>]*>(.*?)</li>',\n                          html, re.IGNORECASE | re.DOTALL):\n        for um in re.finditer(r'https?://[^\\s<>\"\\']+', m.group(1)):\n            url = um.group(0).rstrip('.,;:)')\n            if not _A5_INFRA_URL_RE.search(url):\n                urls.add(url)\n\n    # Collect URLs from an explicit Sources/Fuentes block.\n    src_header = re.search(\n        r'<(h[23])[^>]*>[^<]{0,40}(?:Sources?|Fuentes?|Bibliography|Bibliografía)\\b[^<]{0,40}</\\1>',\n        html, re.IGNORECASE,\n    )\n    if src_header:\n        # Block extends until the next h2/h3 or end of body.\n        start = src_header.end()\n        m_next = re.search(r'<h[23]\\b', html[start:], re.IGNORECASE)\n        end = start + m_next.start() if m_next else len(html)\n        block = html[start:end]\n        for um in re.finditer(r'https?://[^\\s<>\"\\']+', block):\n            url = um.group(0).rstrip('.,;:)')\n            if not _A5_INFRA_URL_RE.search(url):\n                urls.add(url)\n\n    # Conditional skip: legacy brief without footnote markup AND no sources block.\n    has_footnotes = bool(re.search(r'<li\\s+id=[\"\\']fn-\\d+[\"\\']', html, re.I))\n    if not has_footnotes and not src_header:\n        return None\n\n    if len(urls) >= 10:\n        return None\n    return (\n        f'editorial source density below floor: {len(urls)} distinct URL(s) '\n        f'in footnotes+sources block (target ≥10, baseline 12-13 per EL-04 '\n        f'D5.5) — strengthen sourcing in next revision'\n    )\n\n\n# ── A4 / B5-PATTERN: cross-lingual topic self-reference ──────────────────────\n# Origin: Spain Blackout brief 2026-04-27 had EN block saying \"structurally\n# identical to the one identified in Shadow Dynamics' Iberian Blackout\n# analysis\" — but the brief IS the Iberian Blackout brief. The ES half\n# correctly referenced \"el análisis de Shadow Dynamics sobre los puertos\n# europeos de COSCO\" (a different prior brief). Bug class: model treats the\n# current brief as a previous external reference. Fix in commit 9cec1a0.\n# Detection: extract h1's topic kernel; if \"Shadow Dynamics(['s])? [topic]\n# (analysis|brief|report)\" appears in body, that's likely self-reference.\n\n_A4_TOPIC_STOPWORDS = {\n    'the', 'and', 'for', 'with', 'from', 'into', 'about',\n    'analysis', 'brief', 'report', 'shadow', 'dynamics',\n    'una', 'del', 'los', 'las', 'sobre', 'desde', 'hasta',\n    'análisis', 'informe', 'los', 'estudio',\n}\n\n\ndef check_cross_lingual_topic_consistency(html: str) -> str | None:\n    \"\"\"Detect self-reference to current brief topic as if it were external.\n\n    Algorithm:\n      1. Extract <h1> inner text; strip tags/entities.\n      2. Take the segment before first ':' or '—' (topic portion).\n      3. Pull significant words (≥4 chars, not stopwords).\n      4. If topic kernel has ≥2 words, search body for\n         `Shadow Dynamics(?:'s|')? <topic-words> (analysis|brief|report)` —\n         if found, flag as likely self-reference.\n\n    WARNING — may have false positives where a brief legitimately\n    references its own headline elsewhere in the body. Pre-publish reviewer\n    confirms or dismisses. Cross-ref EDITORIAL-LEGAL-06 A4 + bug class\n    B5-PATTERN-EN (commit 9cec1a0).\n    \"\"\"\n    m_h1 = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.IGNORECASE | re.DOTALL)\n    if not m_h1:\n        return None\n    h1_text = re.sub(r'<[^>]+>', ' ', m_h1.group(1))\n    h1_text = re.sub(r'&[a-z]+;', ' ', h1_text, flags=re.IGNORECASE)\n    h1_text = re.sub(r'\\s+', ' ', h1_text).strip()\n    if not h1_text:\n        return None\n    # Topic portion = everything before first ':' or '—'\n    topic_part = re.split(r'[:—\\-–]', h1_text, maxsplit=1)[0].strip()\n    topic_words = [\n        w for w in re.split(r'\\W+', topic_part)\n        if len(w) >= 4 and w.lower() not in _A4_TOPIC_STOPWORDS\n    ]\n    if len(topic_words) < 2:\n        return None\n    # Build a phrase pattern that requires consecutive topic words within 30 chars.\n    topic_phrase = r'\\b' + r'\\s+'.join(re.escape(w) for w in topic_words[:3]) + r'\\b'\n    self_ref_re = re.compile(\n        r'\\bShadow\\s+Dynamics(?:\\'s|\\')?\\s+[^<]{0,40}'\n        + topic_phrase\n        + r'[^<]{0,40}\\b(?:analysis|brief|report|análisis|informe|estudio)\\b',\n        re.IGNORECASE,\n    )\n    # Also reverse form: \"in Shadow Dynamics' analysis (of|sobre) [topic]\"\n    self_ref_re_rev = re.compile(\n        r'\\bShadow\\s+Dynamics(?:\\'s|\\')?\\s+(?:analysis|brief|report|análisis|informe|estudio)\\s+(?:of|on|sobre)\\s+'\n        + r'[^<]{0,40}'\n        + topic_phrase,\n        re.IGNORECASE,\n    )\n    matches = list(self_ref_re.finditer(html)) + list(self_ref_re_rev.finditer(html))\n    if not matches:\n        return None\n    samples = [re.sub(r'\\s+', ' ', m.group(0))[:120] for m in matches[:2]]\n    return (f'{len(matches)} self-reference(s) to current brief topic '\n            f'\"{topic_part}\" framed as external Shadow Dynamics work — '\n            f'B5-PATTERN-EN class: {\" | \".join(samples)}')\n\n\n# ── D01: anchor-number → footnote-URL gate ───────────────────────────────────\n# Cross-ref EDITORIAL-LEGAL-02 D01. Every magnitude wrapped in <strong>/<b>\n# (i.e. an \"anchor number\" the brief asks the reader to remember) must point\n# to a footnote whose body carries either a hyperlink (http(s)) or a known\n# source-org name. Anchor numbers attached to fn-N entries that are nothing\n# but \"SD estimate\" / \"internal inference\" don't satisfy the contract — that\n# was the Brief 5 pattern (S01 caught the ratio; D01 catches the per-anchor\n# integrity). WARNING for one baseline week, then promote to BLOCKER.\n# Conditional: brief with 0 <li id=\"fn-N\"> entries → no-op (legacy/pre-F3).\n\n_D01_MAGNITUDE_RE = re.compile(\n    r'(?:'\n    r'\\b\\d+(?:[.,]\\d+)?\\s*%'                                  # 5%, 24.9%\n    r'|[€$£]\\s*\\d+(?:[.,]\\d+)?\\s*(?:[BMK]|bn|mn|tn|billion|million|trillion|millones|miles\\s+de\\s+millones)?\\b'\n    r'|\\b\\d+(?:[.,]\\d+)?\\s*(?:GW|MW|TWh|GWh|km|tons?|toneladas|barrels|barriles)\\b'\n    r'|\\b\\d{1,3}(?:[,.]\\d{3})+\\b'                            # 1,000,000 / 1.000.000\n    r')',\n    re.IGNORECASE,\n)\n\n_D01_FN_REF_RE = re.compile(\n    r'<sup[^>]*>\\s*<a\\s+href=[\"\\']#fn-(\\d+)[\"\\'][^>]*>',\n    re.IGNORECASE,\n)\n\n_D01_FN_BODY_RE = re.compile(\n    r'<li\\s+id=[\"\\']fn-(\\d+)[\"\\'][^>]*>(.*?)</li>',\n    re.IGNORECASE | re.DOTALL,\n)\n\n_D01_URL_OR_SOURCE_RE = re.compile(\n    r'https?://[^\\s<>\"\\']+'\n    r'|\\b(?:' + SOURCE_ORG_NAMES + r')\\b'\n    r'|\\bBOE-A-\\d{4}-\\d+\\b'                                  # Spanish Official Bulletin refs\n    r'|\\b(?:SIPRI|MERICS|Bruegel|EPRS|Carnegie|Politico|Statista|EDA|EIB|CNAT|Inelfe)\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_anchor_number_footnote_url(html: str) -> str | None:\n    \"\"\"D01 — anchor-numbers (magnitude in <strong>/<b>) must point to a\n    footnote whose body has a URL or a known source-org token.\n\n    Algorithm:\n      1. If brief has 0 <li id=\"fn-N\"> entries → no-op (legacy briefs).\n      2. Build map fn_id → (URL_or_source_token_present: bool).\n      3. For each <strong>/<b> containing a magnitude, look in the next\n         100 chars for <sup><a href=\"#fn-N\">. If absent OR fn-N body has\n         no URL and no source-org token → strike.\n      4. Fire WARNING if ≥3 strikes (calibration window — too few suggests\n         editorial choice; ≥3 suggests systemic).\n\n    Started as WARNING. Promote to BLOCKER after 1 week clean. Cross-ref\n    EDITORIAL-LEGAL-02 D01.\n    \"\"\"\n    fn_bodies: dict[str, str] = {\n        m.group(1): m.group(2)\n        for m in _D01_FN_BODY_RE.finditer(html)\n    }\n    if not fn_bodies:\n        return None  # legacy brief without footnote scaffolding\n\n    # Map fn_id → boolean \"footnote body cites URL or source org\"\n    fn_has_source: dict[str, bool] = {}\n    for fn_id, body in fn_bodies.items():\n        fn_has_source[fn_id] = bool(_D01_URL_OR_SOURCE_RE.search(body))\n\n    # Find magnitude-bearing <strong>/<b> tags.\n    strong_re = re.compile(\n        r'<(strong|b)\\b[^>]*>(.*?)</\\1>',\n        re.IGNORECASE | re.DOTALL,\n    )\n\n    strikes: list[str] = []\n    for sm in strong_re.finditer(html):\n        body = sm.group(2)\n        # Must contain a magnitude pattern.\n        if not _D01_MAGNITUDE_RE.search(body):\n            continue\n        # Look for <sup><a href=\"#fn-N\"> in next ~100 chars after </strong>/</b>.\n        end = sm.end()\n        window = html[end:end + 100]\n        ref = _D01_FN_REF_RE.search(window)\n        magnitude_snip = re.sub(r'\\s+', ' ', body)[:50]\n        if not ref:\n            strikes.append(f'no fn-ref: <strong>{magnitude_snip}</strong>')\n            continue\n        fn_id = ref.group(1)\n        if not fn_has_source.get(fn_id, False):\n            strikes.append(\n                f'fn-{fn_id} lacks URL/source-org token: '\n                f'<strong>{magnitude_snip}</strong>'\n            )\n\n    # Conservative threshold during baseline week.\n    if len(strikes) < 3:\n        return None\n    return (\n        f'{len(strikes)} anchor-number(s) without URL-bearing footnote (D01); '\n        f'samples: {\" | \".join(strikes[:3])} — every magnitude in <strong>/<b> '\n        f'should resolve to a footnote with a URL or named source organisation'\n    )\n\n\ndef check_bilingual_sources_present(html: str) -> str | None:\n    \"\"\"Briefs should ship with ES sources alongside EN; Pattern C (EN-only) breaks ES toggle UX.\n\n    Catches: brief has English footnotes + body fn-N references + ZERO Spanish-suffix footnote IDs.\n    - Pattern A (Briefs 1+2): dual <ol class=\"lang-XX\"> siblings in one <section> — has fn-N-es ✓\n    - Pattern B (Brief 6+):   two separate <section> blocks per lang div — has fn-N-es ✓\n    - Pattern C (Briefs 3-5 pre-erratum 2026-05-08): single EN-only <ol> — NO fn-N-es → fires.\n\n    Conditional skip: 0 <li id=\"fn-N\"> entries (legacy briefs without footnote markup),\n    OR 0 body href=\"#fn-N\" references (no toggle-UX exposure if sources are decorative-only).\n\n    WARNING (calibrate-then-ratchet); promote to BLOCKER after the published archive runs\n    clean for a stable window. Currently passes for all 7 briefs after the Briefs 3-5\n    erratum back-fix landed (commit 7d87415 + 683b4bd). Origin: post-erratum gap closure\n    2026-05-08; cross-ref project_sd_briefs_sources_block_patterns.md.\n    \"\"\"\n    en_fn = re.findall(r'<li\\s+id=[\"\\']fn-\\d+[\"\\']', html, re.IGNORECASE)\n    if not en_fn:\n        return None  # legacy brief without footnote scaffolding\n\n    body_refs = re.findall(r'href=[\"\\']#fn-\\d+[\"\\']', html, re.IGNORECASE)\n    if not body_refs:\n        return None  # no body refs, sources don't matter for toggle UX\n\n    es_fn = re.findall(r'<li\\s+id=[\"\\']fn-\\d+-es[\"\\']', html, re.IGNORECASE)\n    if es_fn:\n        return None  # Pattern A or B — bilingual coverage present\n\n    return (\n        f'bilingual sources missing: {len(en_fn)} English footnote(s) and '\n        f'{len(body_refs)} body reference(s) but 0 Spanish-suffix footnote(s) '\n        f'(`id=\"fn-N-es\"`); Spanish-toggle readers cannot access citation '\n        f'infrastructure. Add a translated sources block — Pattern A '\n        f'(`<ol class=\"lang-en\">` + `<ol class=\"lang-es\">` inside one '\n        f'`<section class=\"sources\">`) or Pattern B (one `<section>` inside '\n        f'each `<div class=\"lang-XX\">`); see project_sd_briefs_sources_block_patterns.'\n    )\n\n\n\ndef _filter_entities_by_brand(catalogue: list, brand: str | None) -> list:\n    \"\"\"Filter listed-entities.yaml-style catalogue by brand field.\n    Same semantics as _filter_registry_by_brand: None → all; specific brand →\n    entries matching brand or 'both'; missing brand defaults 'shadow-dynamics'.\n    Per FORK-PREP-GAP-1-DATA-BRAND-AWARE-01.\n    \"\"\"\n    if brand is None:\n        return catalogue\n    filtered = []\n    for entry in catalogue:\n        if not isinstance(entry, dict):\n            continue\n        entry_brand = entry.get('brand', 'shadow-dynamics')\n        if entry_brand == brand or entry_brand == 'both':\n            filtered.append(entry)\n    return filtered\n\n\ndef check_listed_entity_density(html: str, brand: str | None = None) -> str | None:\n    \"\"\"Listed-entity density × financial-metric proximity (sector/instrument boundary).\n\n    Algorithm:\n      1. Strip tags to plain text.\n      2. For each entity in `data/listed-entities.yaml`, count word-boundary\n         occurrences of `name` + every alias (case-insensitive).\n      3. If total ≥ 3, scan ±150 chars around any single mention for a\n         financial-metric token (revenue, EBITDA, target price, P/E, EPS, etc.).\n      4. Fire one warning per entity that meets both conditions.\n\n    WARNING (not BLOCKER) — legitimate sectoral discussion can name companies.\n    The warning surfaces the boundary case for editorial review.\n\n    Skip entities with empty `aliases` and empty `name` (defensive).\n    Skip entries whose `name` has < 4 chars to avoid acronym false-positives\n    (e.g. \"EDP\" matching \"EDP\" inside other contexts) unless the entry\n    explicitly opts into short-token matching via a future flag (not today).\n    \"\"\"\n    catalogue = _filter_entities_by_brand(_load_listed_entities(), brand)\n    if not catalogue:\n        return None\n\n    # Plain text — strip tags so we don't count hits inside <head>/<style> etc.\n    text = re.sub(r'<(?:script|style)[^>]*>.*?</(?:script|style)>', ' ',\n                  html, flags=re.IGNORECASE | re.DOTALL)\n    text = re.sub(r'<[^>]+>', ' ', text)\n\n    flagged: list[str] = []\n    for entry in catalogue:\n        if not isinstance(entry, dict):\n            continue\n        name = (entry.get('name') or '').strip()\n        if not name:\n            continue\n        aliases = entry.get('aliases') or []\n        forms = [name] + [a for a in aliases if a]\n        # Build a single regex matching any form. Sort longest-first so\n        # \"BAE Systems\" wins before bare \"BAE\".\n        forms_sorted = sorted(set(forms), key=len, reverse=True)\n        # Skip extremely short forms (<4 chars) unless they're the only form\n        # — too prone to false matches on acronym collisions.\n        forms_sorted = [\n            re.escape(f) for f in forms_sorted\n            if len(f) >= 4 or len(forms_sorted) == 1\n        ]\n        if not forms_sorted:\n            continue\n        try:\n            entity_re = re.compile(r'\\b(?:' + '|'.join(forms_sorted) + r')\\b',\n                                    re.IGNORECASE)\n        except re.error:\n            continue\n\n        matches = list(entity_re.finditer(text))\n        if len(matches) < 3:\n            continue\n\n        # Check for instrument-metric proximity: require ≥2 distinct hits to\n        # avoid one-coincidence noise on briefs that legitimately discuss a\n        # company's strategic role.\n        metric_hits: list[str] = []\n        seen_positions: set[int] = set()\n        for m in matches:\n            s = max(0, m.start() - 150)\n            e = min(len(text), m.end() + 150)\n            for mm in _L03_METRIC_RE.finditer(text[s:e]):\n                pos = s + mm.start()\n                if pos in seen_positions:\n                    continue\n                seen_positions.add(pos)\n                metric_hits.append(mm.group(0)[:50])\n                if len(metric_hits) >= 3:\n                    break\n            if len(metric_hits) >= 3:\n                break\n        if len(metric_hits) < 2:\n            continue\n\n        flagged.append(\n            f'{name} ({len(matches)}×, instrument metrics: '\n            + ', '.join(f'\"{h}\"' for h in metric_hits[:2]) + ')'\n        )\n\n    if not flagged:\n        return None\n    return (f'{len(flagged)} listed entit{\"y\" if len(flagged)==1 else \"ies\"} '\n            f'with financial-metric proximity (L03 sector-vs-instrument): '\n            + '; '.join(flagged[:5]) + ' — consider sectoral rephrase')\n\n\n# ── P01: placeholder/template-failure detection ───────────────────────────────\n# Specific patterns observed in Brief 5 + likely model-template misfires.\n# Generic \"double space anywhere\" too noisy; we target known shapes.\n\n_P01_PATTERNS = (\n    # English patterns\n    r'\\bDrawing\\s+on\\s*,',\n    r'\\bcarry\\s+where\\b',\n    r'\\bcarry\\s\\s+\\w+',\n    r'\\braising\\s\\s+\\w+',\n    r'\\bAll\\s+carry\\s+where\\b',\n    # Spanish patterns\n    r'\\bA\\s+partir\\s+de\\s*,',\n    r'\\bLas\\s+fuentes\\s\\s+\\w+',\n    r'\\bTodas\\s+llevan\\s\\s+\\w+',\n    r'\\bvisible\\s+en\\s+las\\s+fuentes\\s+—\\s+\\b',\n    r'\\bbasada\\s+en\\s*,',\n    r'\\bsegún\\s*,',\n    # Generic \"verb + comma + nothing meaningful before next noun\"\n    r'\\b(?:on|in|de|en)\\s*,\\s*(?:corpus|sources|el|la|los|las)\\b',\n)\n_P01_RE = re.compile('|'.join(_P01_PATTERNS), re.IGNORECASE)\n\n\ndef check_placeholder_template_failure(html: str) -> str | None:\n    \"\"\"Detect placeholder gaps from model-template misfires.\n\n    Origin: Brief 5 European Defence shipped 7 placeholders (commit 1818bcb\n    retroactive A3 fix): \"raising  questions\", \"Drawing on , corpus,\",\n    \"All carry  where not directly sourced\", \"Las fuentes  documentan\",\n    \"A partir de , el corpus\", \"visible en las fuentes —\", \"Todas llevan\n    donde no cuentan con fuente directa\". Pattern class: model emitted\n    template tokens (verb + comma + missing-noun-gap + connective) that\n    the editorial pass should have filled.\n\n    Started as WARNING. Promote to BLOCKER once 1 week of clean runs\n    confirms the pattern set is calibrated. Cross-ref EDITORIAL-LEGAL-02 P01.\n    \"\"\"\n    hits = list(_P01_RE.finditer(html))\n    if not hits:\n        return None\n    samples = []\n    for m in hits[:3]:\n        ctx = html[max(0, m.start()-25):min(len(html), m.end()+30)]\n        samples.append(ctx.replace('\\n', ' '))\n    return (f'{len(hits)} placeholder/template-gap hit(s) — '\n            f'P01 model-misfire flag; samples: {\" | \".join(samples)}')\n\n\n# ── P03 (partial): cross-lingual false-friend \"billón/trillón\" ───────────────\n# Spanish \"billón\" = 10^12 (a trillion in US sense); English \"billion\" = 10^9.\n# When a translator carries \"billion\" → \"billón\" the figure is 1000× off.\n# Current corpus convention keeps shorthand `$X B` in both languages, so any\n# occurrence of `billón` / `trillón` in the lang-es scope is a translator\n# slip until proven otherwise. WARNING (not BLOCKER) because legitimate\n# Spanish-academic use of billón=10^12 exists; the warning surfaces the\n# token for human review. Cross-ref EDITORIAL-LEGAL-02 P03 (this is the\n# false-friend half — the full numerical-equivalence sweep is deferred).\n\n_P03_FALSE_FRIEND_RE = re.compile(r'\\b(?:bill[óo]n(?:es)?|trill[óo]n(?:es)?)\\b', re.IGNORECASE)\n\n\ndef check_cross_lingual_false_friend(html: str) -> str | None:\n    \"\"\"Flag billón/trillón usage in ES scope — likely false-friend translation\n    error (EN billion=10^9 vs ES billón=10^12, 1000× discrepancy).\"\"\"\n    hits = []\n    for m in _P03_FALSE_FRIEND_RE.finditer(html):\n        pos = m.start()\n        ctx = html[:pos]\n        en_idx = ctx.rfind('class=\"lang-en\"')\n        es_idx = ctx.rfind('class=\"lang-es\"')\n        if max(en_idx, es_idx) < 0:\n            continue\n        if es_idx > en_idx:\n            ctx_snip = html[max(0, pos-30):min(len(html), m.end()+30)]\n            ctx_snip = re.sub(r'\\s+', ' ', ctx_snip).strip()\n            hits.append(f'{m.group(0)!r} ({ctx_snip!r})')\n    if not hits:\n        return None\n    return (f'{len(hits)} false-friend hit(s) in ES scope — '\n            f'EN \"billion\"=10^9 vs ES \"billón\"=10^12 (use \"mil millones\" '\n            f'for 10^9): ' + '; '.join(hits[:3]))\n\n\n# ── S01: inference-citation ratio (SD-estimate footnote share) ───────────────\n# Cross-ref EDITORIAL-LEGAL-02 S01 + EDITORIAL-PROMPT INFERENCE SOURCE\n# TRANSPARENCY rule. Editorial standard: at most 25% of footnote-anchored\n# claims may rely solely on internal SD analytical inference (vs corpus\n# citation or external source). Detection:\n#   - Find <li id=\"fn-N\">…</li> entries whose body matches inference tokens\n#     (\"SD estimate\" / \"no direct corpus citation\" / \"internal estimate\").\n#   - Count <sup><a href=\"#fn-N\"> refs to those vs all sup refs.\n#   - Fire if total sup ≥ 4 AND inference ratio > 0.25.\n# Threshold ≥ 4 avoids noise on briefs with very few footnotes (Brief 5 had\n# 15+ sups; below 4 the ratio is statistically meaningless).\n\n_S01_INFERENCE_TOKENS = re.compile(\n    r'\\b(?:SD\\s+estimate|no\\s+direct\\s+corpus\\s+citation|internal\\s+estimate|'\n    r'SD\\s+analytical|estimaci[óo]n\\s+(?:interna|SD)|sin\\s+cita\\s+directa\\s+del\\s+corpus)\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_inference_citation_ratio(html: str) -> str | None:\n    \"\"\"Flag briefs where >25% of footnote-anchored claims rely on SD estimate\n    rather than corpus or external citation.\"\"\"\n    # Build set of inference-footnote IDs\n    inference_ids = set()\n    for m in re.finditer(r'<li\\s+id=\"fn-(\\d+)\"[^>]*>(.*?)</li>', html, re.S):\n        fn_id, body = m.group(1), m.group(2)\n        if _S01_INFERENCE_TOKENS.search(body):\n            inference_ids.add(fn_id)\n    # Count sup references\n    total_refs = 0\n    inf_refs = 0\n    for m in re.finditer(r'<sup[^>]*>.*?href=\"#fn-(\\d+)\".*?</sup>', html, re.S):\n        total_refs += 1\n        if m.group(1) in inference_ids:\n            inf_refs += 1\n    if total_refs < 4:\n        return None  # too few refs to be statistically meaningful\n    if not inference_ids:\n        return None\n    ratio = inf_refs / total_refs\n    if ratio <= 0.25:\n        return None\n    return (f'inference-citation ratio {inf_refs}/{total_refs} = {ratio:.0%} '\n            f'exceeds 25% (S01 threshold); inference fn IDs: '\n            f'{sorted(inference_ids, key=int)} — strengthen with corpus or '\n            f'external citation, or label specific numbers as \"SD estimate\"')\n\n\n# ── P02: bilingual structural parity (h3 + table + p) ────────────────────────\n# Extends `section_count_parity` (which covers h2 only) to detect ES\n# truncation that survives at the section-header level but loses subsections\n# or content paragraphs. Origin: EDITORIAL-LEGAL-02 P02 — Brief 5 European\n# Defense ES half had Sección VIII truncated (h2 OK, h3 + p missing).\n# Tolerances calibrated against the 5-brief corpus on 2026-05-02:\n#   h3:    Δ ≤ 2  (corpus max Δ=1, allow some drift for headings)\n#   table: Δ ≤ 1  (corpus max Δ=0)\n#   p:     Δ ≤ max(8, 20% of larger)  (corpus max Δ=4 on 63 p tags)\n\ndef _count_in_lang_scope(html: str, tag: str) -> tuple[int, int]:\n    \"\"\"Count <tag> occurrences attributed to lang-en vs lang-es by nearest\n    preceding `class=\"lang-XX\"` opener. Tags outside any lang scope are\n    ignored. Used by P02; keep consistent with the heuristic in\n    EDITORIAL-LEGAL-05 audit.\"\"\"\n    en = es = 0\n    for m in re.finditer(rf'<{tag}[\\s>]', html):\n        ctx = html[:m.start()]\n        en_idx = ctx.rfind('class=\"lang-en\"')\n        es_idx = ctx.rfind('class=\"lang-es\"')\n        if max(en_idx, es_idx) < 0:\n            continue\n        if en_idx > es_idx:\n            en += 1\n        else:\n            es += 1\n    return en, es\n\n\ndef check_bilingual_extended_parity(html: str) -> str | None:\n    \"\"\"Bilingual structural parity beyond h2 — guards ES truncation that\n    section_count_parity (h2-only) misses.\n\n    Cross-ref EDITORIAL-LEGAL-02 P02. Tolerances calibrated 2026-05-02.\n    Conditional guard: brief without any lang-en or lang-es scope (legacy\n    monolingual or test fixture) → silent no-op.\n    \"\"\"\n    failures = []\n    for tag, abs_tol, ratio_tol in (\n        ('h3',    2,    None),\n        ('table', 1,    None),\n        ('p',     8,    0.20),\n    ):\n        en, es = _count_in_lang_scope(html, tag)\n        if en == 0 and es == 0:\n            continue\n        diff = abs(en - es)\n        threshold = abs_tol\n        if ratio_tol is not None:\n            threshold = max(abs_tol, int(max(en, es) * ratio_tol))\n        if diff > threshold:\n            failures.append(f'<{tag}> EN={en} ES={es} Δ={diff} > tol={threshold}')\n    if not failures:\n        return None\n    return ('bilingual structural parity exceeded (P02 truncation guard): '\n            + '; '.join(failures))\n\n\n# ── L02: regulatory disclaimer presence (EN + ES) ────────────────────────────\n# Mandates that every published brief carries the bilingual regulatory\n# disclaimer in its footer block. Origin: EDITORIAL-LEGAL-02 L02 — added\n# 2026-05-02 after disclaimer was wired into the global footer template\n# (commit 687d8f8). Acts as a regression guard against accidental template\n# rollback. Two anchors required (one per language) to avoid false-pass on\n# briefs that include only one half of the disclaimer.\n\n_L02_EN_RE = re.compile(\n    r'\\b(?:not\\s+investment\\s+advice|editorial\\s+analysis(?:\\s+and\\s+opinion)?|'\n    r'does\\s+not\\s+constitute\\s+investment\\s+advice)\\b',\n    re.IGNORECASE,\n)\n_L02_ES_RE = re.compile(\n    r'\\b(?:no\\s+constituye\\s+asesoramiento|an[áa]lisis\\s+editorial(?:\\s+y\\s+opini[óo]n)?|'\n    r'no\\s+constituye\\s+recomendaci[óo]n)\\b',\n    re.IGNORECASE,\n)\n\n\ndef check_regulatory_disclaimer_present(html: str) -> str | None:\n    \"\"\"Bilingual regulatory disclaimer must be present in the brief.\n\n    Cross-ref: EDITORIAL-LEGAL-02 L02. The footer template renders both\n    EN and ES disclaimers (commit 687d8f8). This check fires if either\n    language anchor is missing — that signals a template regression or a\n    brief produced before the global footer was wired.\n    \"\"\"\n    en_hit = _L02_EN_RE.search(html)\n    es_hit = _L02_ES_RE.search(html)\n    missing = []\n    if not en_hit:\n        missing.append('EN (\"not investment advice\" / \"editorial analysis\")')\n    if not es_hit:\n        missing.append('ES (\"no constituye asesoramiento\" / \"análisis editorial\")')\n    if not missing:\n        return None\n    return (f'regulatory disclaimer missing in: {\", \".join(missing)} — '\n            f'L02 footer template regression (cross-ref EDITORIAL-LEGAL-02)')\n\n\n_FOOTER_DISCIPLINE_FORBIDDEN = (\n    \"WEF Global Risks Report 2026\",\n    \"Eurasia Group Top Risks 2026\",\n    \"Prospect Theory · Loss Aversion\",\n    \"Full Analysis: all rights reserved\",\n    \"Barkow Status Competition\",\n    \"Meadows Leverage Points\",\n)\n\n_FOOTER_REGION_RES = (\n    re.compile(r'<footer\\b[^>]*class=\"[^\"]*\\bsite-footer\\b[^\"]*\"[^>]*>(.*?)</footer>',\n               re.S | re.I),\n    re.compile(r'<div\\b[^>]*class=\"[^\"]*\\bcover-meta\\b[^\"]*\"[^>]*>(.*?)</div>',\n               re.S | re.I),\n)\n\n\ndef check_footer_namedrop_discipline(html: str) -> str | None:\n    \"\"\"Block name-drop strings from <footer.site-footer> / <div.cover-meta>.\n\n    Cross-ref: EDITORIAL-FOOTER-DISCIPLINE-01. Defensive guard against\n    silent reintroduction of the legal patch from 2026-04-30 (commit\n    2dddef7), which deleted WEF/Eurasia/Prospect-Theory name-drops from\n    footer + cover-meta after WEF terms (no implied affiliation), Eurasia\n    Group \"Top Risks\" trademark, and nominative-fair-use factor 3.\n\n    Body-level citations of WEF/Eurasia with [SOURCE | DOC | PAGE] are\n    legitimate nominative use and not affected — this check is scoped\n    to footer/cover-meta regions only.\n    \"\"\"\n    regions = []\n    for rgx in _FOOTER_REGION_RES:\n        regions.extend(rgx.findall(html))\n    if not regions:\n        return None\n    hits = []\n    for region in regions:\n        for forbidden in _FOOTER_DISCIPLINE_FORBIDDEN:\n            if forbidden in region:\n                hits.append(forbidden)\n    if not hits:\n        return None\n    unique = sorted(set(hits))\n    return (f\"footer/cover-meta name-drop reintroduced: {unique} — \"\n            f\"EDITORIAL-FOOTER-DISCIPLINE-01 (legal patch 2dddef7 regression)\")\n\n\ndef check_investor_verdict_contradiction(html: str) -> str | None:\n    \"\"\"Investor Verdict line contradicting an upside scenario in the body.\n\n    Origin: Brief 5 (European Defense, 2026-05-01) discussed NATO fracture\n    creating procurement-space upside for European sovereigns, but the Verdict\n    line read \"downside from NATO fracture\" — contradicting its own scenario.\n    \"\"\"\n    has_fracture_upside = bool(re.search(\n        r'fracture[^<]{0,150}(?:upside|benefit|procurement\\s+space)',\n        html, re.I,\n    ))\n    has_downside_verdict = bool(re.search(\n        r'Verdict:[^<]{0,200}downside\\s+from\\s+NATO\\s+fracture',\n        html, re.I,\n    ))\n    if has_fracture_upside and has_downside_verdict:\n        return ('Investor verdict contradicts scenario analysis '\n                '(fracture upside described but verdict says downside)')\n    return None\n\n\ndef check_bare_inference_number(html: str) -> str | None:\n    \"\"\"Specific quantitative threshold cited only via fn-3 (analytical inference).\n\n    A number followed immediately by <sup> pointing to fn-3 means the figure\n    has no corpus citation — only analytical inference. Acceptable when the\n    figure is generalized; not acceptable when the brief makes a specific\n    threshold claim (days/weeks).\n    \"\"\"\n    if re.search(\n        r'\\d+\\s*(?:days?|weeks?|d[ií]as?|semanas?)\\s*<sup>\\s*<a\\s+href=[\"\\']#fn-3[\"\\']',\n        html, re.I,\n    ):\n        return ('quantitative threshold (days/weeks) cited only from fn-3 '\n                'inference — needs corpus citation or \"SD estimate\" label')\n    return None\n\n\ndef check_internal_monitor_clarity(html: str) -> str | None:\n    \"\"\"Reference to the internal signal digest without its full form.\n\n    Readers don't know what \"Shadow Dynamics 2026-04-30\" means on its own.\n    Full form on first reference: \"Shadow Dynamics Intelligence Monitor\n    (internal signal digest, YYYY-MM-DD)\".\n    \"\"\"\n    if re.search(\n        r'Shadow\\s+Dynamics\\s+\\d{4}-\\d{2}-\\d{2}(?!\\s*\\()',\n        html, re.I,\n    ):\n        return ('internal signal digest referenced without full form — '\n                'use \"Shadow Dynamics Intelligence Monitor (internal signal '\n                'digest, YYYY-MM-DD)\"')\n    return None\n\n\ndef check_date_arithmetic(html: str) -> str | None:\n    \"\"\"DATE-ARITHMETIC-CHECK-01 (Phase 1).\n\n    Detect \"N units after [date]\" claims whose magnitude contradicts\n    explicit dates within ±300 chars of the claim.\n\n    Origin: Spain Blackout 2026-04-27 shipped \"OP 7.4 approved June 12,\n    2025 — fourteen months after the cascade [April 28, 2025]\". Real\n    gap is ~45 days, not 14 months. Eight occurrences shipped to\n    Substack newsletter + LinkedIn. Runner missed it; this catches that\n    pattern class.\n\n    Algorithm: for each `(N) (unit) after` claim, look for ≥2 explicit\n    Month-Day-Year dates in ±300 chars (English + Spanish months,\n    \"April 28, 2025\" or \"28 abril 2025\" forms). If present, compute\n    actual gap (max-min) and flag only when the claim is LARGER than\n    what window dates can support (ratio < 0.8) — i.e., the arithmetic\n    is impossible no matter which two window dates pair with the claim.\n    The reverse direction (ratio > 1.2) is skipped because window dates\n    that are wider than the claim are typically unrelated dates, not a\n    contradiction (the claim's actual anchor may be a named event).\n\n    Skip claims preceded by upper-bound qualifiers (\"less than\",\n    \"fewer than\", \"almost\", \"nearly\", \"menos de\", \"casi\") — those are\n    approximations where a smaller actual gap is correct.\n\n    Warning, not blocker — rhetorical framings without explicit dates\n    (\"decades after the war\", \"months later\") don't fire because they\n    require ≥2 explicit dates in window.\n    \"\"\"\n    UPPER_BOUND_RE = re.compile(\n        r'(less\\s+than|fewer\\s+than|almost|nearly|menos\\s+de|casi|'\n        r'aproximadamente|aprox\\.?|approximately|approx\\.?)\\s*$',\n        re.IGNORECASE,\n    )\n    text = re.sub(r'<[^>]+>', ' ', html)\n\n    def _extract_dates(window: str) -> list:\n        out = []\n        for m in _DATE_MD_Y_RE.finditer(window):\n            mname, day, year = m.group(1).lower(), int(m.group(2)), int(m.group(3))\n            mi = _DATE_MONTH_INDEX.get(mname)\n            if mi and 1 <= day <= 31 and 1900 <= year <= 2100:\n                try:\n                    out.append(date(year, mi, day))\n                except ValueError:\n                    pass\n        for m in _DATE_DM_Y_RE.finditer(window):\n            day, mname, year = int(m.group(1)), m.group(2).lower(), int(m.group(3))\n            mi = _DATE_MONTH_INDEX.get(mname)\n            if mi and 1 <= day <= 31 and 1900 <= year <= 2100:\n                try:\n                    out.append(date(year, mi, day))\n                except ValueError:\n                    pass\n        return out\n\n    findings: list[str] = []\n    for cm in _DATE_CLAIM_RE.finditer(text):\n        # Skip approximations where smaller actual is correct.\n        prefix = text[max(0, cm.start() - 40):cm.start()]\n        if UPPER_BOUND_RE.search(prefix):\n            continue\n\n        n_str, unit = cm.group(1), cm.group(2)\n        if n_str.isdigit():\n            n = int(n_str)\n        else:\n            n = _DATE_NUM_WORDS.get(n_str.lower())\n            if n is None:\n                continue\n        unit_days = _DATE_UNIT_DAYS.get(unit.lower())\n        if not unit_days:\n            continue\n        claimed_days = n * unit_days\n\n        start = max(0, cm.start() - 300)\n        end = min(len(text), cm.end() + 300)\n        dates = _extract_dates(text[start:end])\n        if len(dates) < 2:\n            continue\n        actual_gap = (max(dates) - min(dates)).days\n        if actual_gap == 0:\n            continue\n        ratio = actual_gap / claimed_days\n        # Fire only when claim is larger than what window dates can support.\n        # ratio > 1.2 is skipped — typically unrelated dates in the window,\n        # not a real contradiction (the claim's anchor may be a named event).\n        if ratio < 0.8:\n            findings.append(\n                f'\"{cm.group(0)}\" claims ~{claimed_days}d but window dates span only '\n                f'{actual_gap}d ({min(dates).isoformat()}→{max(dates).isoformat()}, '\n                f'ratio={ratio:.2f})'\n            )\n\n    # Phase 2 — duration-to-end-date pattern: \"N-unit ... running to DATE\".\n    # For each match, look in ±300 chars BEFORE for a start anchor (explicit\n    # date or month-year reference). Compute span vs claimed; fire when\n    # mismatch in EITHER direction (unlike Phase 1 which skips ratio > 1.2\n    # because window dates are typically unrelated to the claim's anchor).\n    def _parse_iso_or_dmy(s: str) -> \"date | None\":\n        try:\n            if re.match(r'^\\d{4}-\\d{2}-\\d{2}$', s):\n                y, mo, d = s.split('-')\n                return date(int(y), int(mo), int(d))\n            m = re.match(r'^(\\d{1,2})-(\\d{1,2})-(\\d{4})$', s)\n            if m:\n                d, mo, y = int(m.group(1)), int(m.group(2)), int(m.group(3))\n                # ES convention DD-MM-YYYY (the brief 6 ES side uses 10-11-2026\n                # for Nov 10 2026). If MM > 12, swap.\n                if mo > 12 and d <= 12:\n                    d, mo = mo, d\n                return date(y, mo, d)\n        except (ValueError, IndexError):\n            return None\n        return None\n\n    for cm in _DATE_RUNNING_TO_RE.finditer(text):\n        n_str, unit, end_str = cm.group(1), cm.group(2), cm.group(3)\n        if not n_str.isdigit():\n            continue\n        n = int(n_str)\n        unit_days = _DATE_UNIT_DAYS.get(unit.lower())\n        if not unit_days:\n            continue\n        claimed_days = n * unit_days\n        end_date = _parse_iso_or_dmy(end_str)\n        if end_date is None:\n            continue\n        # Search ±300 chars BEFORE the claim for a plausible start anchor.\n        win_start = max(0, cm.start() - 300)\n        win = text[win_start:cm.start()]\n        # Try explicit dates first (most precise).\n        anchors = _extract_dates(win)\n        # Then add month-year references (less precise — use day 15).\n        for mym in _DATE_MONTH_YEAR_RE.finditer(win):\n            mname, year = mym.group(1).lower(), int(mym.group(2))\n            mi = _DATE_MONTH_INDEX.get(mname)\n            if mi:\n                try:\n                    anchors.append(date(year, mi, 15))\n                except ValueError:\n                    pass\n        # Filter to anchors strictly before the end date.\n        anchors = [a for a in anchors if a < end_date]\n        if not anchors:\n            continue\n        # Most plausible start = latest anchor before end (closest to claim).\n        candidate_start = max(anchors)\n        actual_days = (end_date - candidate_start).days\n        if actual_days <= 0:\n            continue\n        ratio = actual_days / claimed_days\n        # Fire when claim is meaningfully off in EITHER direction (≥50% drift).\n        # Wider tolerance than Phase 1 because month-year fallback uses day 15\n        # approximation (±15 days slack baked in).\n        if ratio > 1.5 or ratio < 0.67:\n            findings.append(\n                f'\"{cm.group(0)[:80]}\" claims ~{n} {unit} ({claimed_days}d) '\n                f'but {candidate_start.isoformat()}→{end_date.isoformat()} '\n                f'spans {actual_days}d (ratio={ratio:.2f})'\n            )\n\n    if findings:\n        head = '; '.join(findings[:2])\n        more = f' (+{len(findings) - 2} more)' if len(findings) > 2 else ''\n        return f'date-arithmetic vs window-dates mismatch: {head}{more}'\n    return None\n\n\n# ── DEF01-DEF03: defensibility checks (RUNNER-DEFENSIBILITY-01) ────────────────\n# Strategic origin: SD product moat = \"what a generic LLM with web search can't\n# reproduce in 4h\". DEF01-03 are heuristic gates that surface defensibility\n# regressions before publish. Calibration audit 2026-05-02:\n# reports/defensibility-audit_2026-05-02.md.\n#\n# All three start as WARNING. Promote to BLOCKER after stable false-positive\n# baseline across ≥3 newly-generated briefs (mirrors L01/footnote_integrity\n# pattern). Threshold rationale lives in audit-defensibility.py docstring.\n\n# Allowlist — primary-source domains. Hits to these \"displace LLM time\" because\n# they require precise citation to a primary document, not summarization.\n# Aggregator domains (Wikipedia, generic news) and self-references do NOT count.\n_DEF03_CANONICAL_DOMAINS: frozenset[str] = frozenset({\n    # Spain / EU primary\n    'boe.es', 'cnmc.es', 'ree.es', 'csn.es', 'ec.europa.eu', 'europa.eu',\n    'eib.org', 'ecb.europa.eu', 'consilium.europa.eu', 'europarl.europa.eu',\n    'eda.europa.eu', 'frontex.europa.eu',\n    # Energy / electricity\n    'entsoe.eu', 'iea.org', 'irena.org',\n    # Defence / nuclear\n    'nato.int', 'sipri.org', 'iiss.org', 'world-nuclear-news.org',\n    # Trade / multilateral\n    'imf.org', 'worldbank.org', 'oecd.org', 'wto.org', 'bis.org',\n    'unctad.org', 'un.org',\n    # US official\n    'sec.gov', 'treasury.gov', 'federalreserve.gov', 'bls.gov', 'bea.gov',\n    'energy.gov', 'state.gov', 'defense.gov',\n    # Sectoral / industry primary\n    'unef.es', 'tsmc.com', 'asml.com', 'smic.com',\n    # Research institutions (curated)\n    'rand.org', 'brookings.edu', 'atlanticcouncil.org', 'csis.org',\n    'cfr.org', 'carnegieendowment.org', 'merics.org', 'bruegel.org',\n    'europeanpolicycentre.eu',\n    # Risk-research\n    'weforum.org', 'eurasiagroup.net',\n})\n\n_DEF03_AGGREGATOR_DOMAINS: frozenset[str] = frozenset({\n    'wikipedia.org', 'wikimedia.org', 'creativecommons.org',\n    'shadowdynamics.ai', 'substack.com',\n})\n\n\n# QA-INCONTESTABLE-03 — temporal_window_consistency check.\n# Catches: time-range labels in §VERDICT or §FORMAL PREDICTIONS that\n# don't match prediction-deadline arithmetic ±10%, when no hedge word.\n# Brief 6 issue #2 case: \"6-month window\" claim with explicit\n# (2026-05-08 → 2026-10-31) actual span = 5.78 months. Without hedge\n# word, ratio 0.96 → flag.\n\n_TWC_WINDOW_RE = re.compile(\n    r'(\\d+(?:[\\.,]\\d+)?)\\s*(?:[–-]\\s*(\\d+(?:[\\.,]\\d+)?)\\s*)?'\n    r'[-\\s]?(month|day|week|year|mes|mese[s]?|d[ií]a|semana|año)s?\\s*'\n    r'(?:prediction\\s+)?(?:window|period|ventana|periodo|per[ií]odo)',\n    re.IGNORECASE,\n)\n_TWC_ISO_DATE_RE = re.compile(r'\\b(20\\d{2})-(\\d{2})-(\\d{2})\\b')\n_TWC_HEDGE_RE = re.compile(\n    r'(approximately|roughly|aproximadamente|aprox\\.?|approx\\.?|~|circa|cerca\\s+de|'\n    r'about|around|alrededor|en\\s+torno)',\n    re.IGNORECASE,\n)\n_TWC_UNIT_DAYS = {\n    'day': 1, 'd[ií]a': 1, 'dia': 1, 'día': 1,\n    'week': 7, 'semana': 7,\n    'month': 30.4375, 'mes': 30.4375, 'mese': 30.4375, 'meses': 30.4375,\n    'year': 365.25, 'año': 365.25,\n}\n\n\ndef check_temporal_window_consistency(html: str) -> str | None:\n    \"\"\"Catch time-range labels (e.g. '6-month window') that don't match\n    the actual span between two adjacent ISO dates ±10%, when no hedge\n    word is present.\n\n    Origin: Brief 6 issue #2 ('6-month prediction window' label vs\n    actual 5.78-month span). The existing date_arithmetic check catches\n    duration-to-end-date mismatches (e.g. '6-month suspension running\n    to 2026-11-10' from a 2025-10-15 source). This is the\n    prediction-window specialisation: looks for `(N|N-M) (unit) window`\n    + 2 ISO dates in proximity, computes ratio.\n\n    Algorithm:\n      1. For each match of `(N) (unit) window` in the prose:\n      2. Find ≤2 ISO dates within ±200 chars (the implied window endpoints).\n      3. Compute claim_days = N * unit_days. If range form (N-M), use mean.\n      4. Compute actual_days = |date_max - date_min|.\n      5. If both present and ratio outside [0.9, 1.1]:\n         - Skip if hedge word ('approximately', 'roughly', '~') in\n           ±50 chars before claim.\n         - Otherwise flag.\n\n    WARNING-only. Cost: 0 runtime.\n    \"\"\"\n    text = re.sub(r'<[^>]+>', ' ', html)\n    findings = []\n    for m in _TWC_WINDOW_RE.finditer(text):\n        n_low = m.group(1).replace(',', '.')\n        n_high = m.group(2).replace(',', '.') if m.group(2) else None\n        unit = m.group(3).lower()\n\n        # Hedge in ±50 chars before claim → skip\n        hedge_window = text[max(0, m.start() - 50):m.start()]\n        if _TWC_HEDGE_RE.search(hedge_window):\n            continue\n\n        try:\n            n = (float(n_low) + float(n_high)) / 2 if n_high else float(n_low)\n        except ValueError:\n            continue\n        unit_days = None\n        for k, v in _TWC_UNIT_DAYS.items():\n            if re.match(k, unit):\n                unit_days = v\n                break\n        if unit_days is None:\n            continue\n        claim_days = n * unit_days\n\n        # Find ISO dates in ±200 chars\n        proximity = text[max(0, m.start() - 200):min(len(text), m.end() + 200)]\n        iso_dates = []\n        for d_match in _TWC_ISO_DATE_RE.finditer(proximity):\n            try:\n                y, mo, d = int(d_match.group(1)), int(d_match.group(2)), int(d_match.group(3))\n                iso_dates.append(date(y, mo, d))\n            except ValueError:\n                pass\n        if len(iso_dates) < 2:\n            continue\n        actual_days = (max(iso_dates) - min(iso_dates)).days\n        if actual_days <= 0:\n            continue\n        ratio = claim_days / actual_days\n        if 0.9 <= ratio <= 1.1:\n            continue\n        # Direction: claim larger or smaller than reality\n        direction = 'overstates' if ratio > 1.1 else 'understates'\n        findings.append(\n            f'\"{m.group(0).strip()}\" {direction} window: claim ~{claim_days:.0f}d '\n            f'vs actual {actual_days}d between {min(iso_dates)} → {max(iso_dates)} '\n            f'(ratio={ratio:.2f}; threshold ±10%)'\n        )\n\n    if findings:\n        return '; '.join(findings[:3])\n    return None\n\n\ndef _domain_of(url: str) -> str:\n    m = re.match(r'https?://([^/]+)/?', url)\n    if not m:\n        return ''\n    host = m.group(1).lower()\n    if host.startswith('www.'):\n        host = host[4:]\n    parts = host.split('.')\n    if len(parts) >= 3 and parts[-2] in {'gov', 'co', 'com', 'org', 'ac'}:\n        return '.'.join(parts[-3:])\n    return '.'.join(parts[-2:]) if len(parts) >= 2 else host\n\n\n_PREDICTIONS_CACHE: dict[str, list] | None = None\n\n\ndef _load_predictions_yaml() -> dict[str, list]:\n    \"\"\"Returns {brief_filename: [{'id': ..., 'falsifiable_by_date': ...}, ...]}.\n\n    Light parser — extracts brief_source -> id+date dicts without PyYAML\n    dependency. Cache persists across calls within the same process.\n    DEF01 only measures len(); D-QA-22 floor#2 width check uses the date.\n    \"\"\"\n    global _PREDICTIONS_CACHE\n    if _PREDICTIONS_CACHE is not None:\n        return _PREDICTIONS_CACHE\n\n    yaml_path = os.path.join(\n        os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'predictions.yaml',\n    )\n    if not os.path.exists(yaml_path):\n        _PREDICTIONS_CACHE = {}\n        return _PREDICTIONS_CACHE\n\n    with open(yaml_path, encoding='utf-8') as f:\n        text = f.read()\n\n    # Strip comment lines so schema-doc examples (`#   - id: PRED-...`) don't\n    # get picked up as phantom entries by the regex below. Real entries live\n    # at column 0 with `  - id:` (indented under `predictions:`); comment\n    # examples in the header start with `#` and trip the regex otherwise.\n    text = '\\n'.join(\n        line for line in text.splitlines()\n        if not line.lstrip().startswith('#')\n    )\n\n    by_brief: dict[str, list] = {}\n    for entry in re.finditer(\n        r'-\\s+id:\\s*(\\S+).*?(?=\\n\\s*-\\s+id:|\\Z)',\n        text, re.DOTALL,\n    ):\n        block = entry.group(0)\n        pid = entry.group(1)\n        bs = re.search(r'brief_source:\\s*(\\S+)', block)\n        if not bs:\n            continue\n        bs_val = bs.group(1).strip().strip('\"').strip(\"'\")\n        fbd = re.search(r'falsifiable_by_date:\\s*(\\S+)', block)\n        fbd_val = fbd.group(1).strip().strip('\"').strip(\"'\") if fbd else ''\n        by_brief.setdefault(bs_val, []).append({\n            'id': pid,\n            'falsifiable_by_date': fbd_val,\n        })\n\n    _PREDICTIONS_CACHE = by_brief\n    return by_brief\n\n\ndef _extract_brief_filename(html: str) -> str | None:\n    \"\"\"Extract SD_*.html filename from og:url or canonical link, if present.\"\"\"\n    for pat in (\n        r'<meta\\s+property=\"og:url\"\\s+content=\"https?://[^\"]+/([^\"/]+\\.html)\"',\n        r'<link\\s+rel=\"canonical\"\\s+href=\"https?://[^\"]+/([^\"/]+\\.html)\"',\n    ):\n        m = re.search(pat, html)\n        if m:\n            name = m.group(1)\n            if name.startswith('SD_'):\n                return name\n    return None\n\n\n_DEF01_PROB_RE = re.compile(\n    r'(?:we\\s+assess|assess(?:ment)?|estimate|probabilit[yáí]a?|probability|'\n    r'probabilidad)[^.<>]{0,40}?(\\d{2,3})(?:\\s*[-–]\\s*(\\d{2,3}))?\\s*%',\n    re.IGNORECASE,\n)\n_DEF01_FALSIFY_RE = re.compile(\n    r'(?:falsifi(?:able|cation|ed)|verify[:\\s]|verificar[:\\s]|'\n    r'resolution[:\\s]|observable[\\s_]condition|resolution\\s+source)',\n    re.IGNORECASE,\n)\n_DEF01_DATE_RE = re.compile(\n    r'(?:by|before|antes\\s+de|hasta)\\s+(?:\\d{4}-\\d{2}-\\d{2}|'\n    r'(?:Q[1-4]|H[12]|end\\s+of|finales?\\s+de)\\s*20\\d{2}|'\n    r'\\d{1,2}\\s+(?:[A-Z][a-zé]+|[a-zé]+\\s+(?:de|of)?)\\s*20\\d{2})',\n    re.IGNORECASE,\n)\n\n\ndef check_def01_formal_predictions_present(html: str) -> str | None:\n    \"\"\"DEF01 — brief must declare ≥1 formal prediction AND ledger them.\n\n    Three cases:\n      (1) Ledger has ≥1 entry for brief_source=filename → PASS\n      (2) Ledger empty AND body has prob+falsify+date triad → WARN\n          \"ledger gap\" (new 2026-05-10): predictions made but extraction\n          step (`extract-prediction-stubs.py`) was skipped before promote.\n          Brief 6 Critical Minerals 2026-05-10 was the canonical instance.\n      (3) Ledger empty AND no triad → WARN \"defensibility gap\": brief is\n          matchable by a generic LLM with web search; the track-record\n          moat requires verifiable forecasts.\n\n    WARNING level — false positives expected on early briefs where the\n    predictions ledger lacks a backfill entry. Promote to BLOCKER (in\n    runner) and to brief-saver promote-time gate after corpus stabilisation\n    + Brief 7 calibration confirms 0 false-positive rate.\n\n    Strategic origin: a brief without verifiable forecasts is matchable by\n    a generic LLM with web search. The track-record moat is built by the\n    predictions ledger (see docs/content-planning/PREDICTION_DESIGN.md).\n    \"\"\"\n    fname = _extract_brief_filename(html)\n    ledger_count = 0\n    if fname:\n        ledger = _load_predictions_yaml()\n        ledger_count = len(ledger.get(fname, []))\n\n    has_prob = bool(_DEF01_PROB_RE.search(html))\n    has_falsify = bool(_DEF01_FALSIFY_RE.search(html))\n    has_date = bool(_DEF01_DATE_RE.search(html))\n    has_triad = has_prob and has_falsify and has_date\n\n    # Case 1: ledger has entries — pass.\n    if ledger_count >= 1:\n        return None\n\n    # Case 2: HTML has triad but ledger is empty — extraction was skipped.\n    # This is the Brief 6 2026-05-10 defect class. Pre-2026-05-10 the OR\n    # fallback returned None here, allowing the brief to promote with an\n    # empty ledger (audit-trail-as-moat momentarily false).\n    if has_triad:\n        return (\n            f'DEF01 ledger gap: HTML contains formal-prediction triad '\n            f'(probability+verification+falsifiable date) but predictions.yaml '\n            f'has 0 entries with brief_source={fname or \"(unknown)\"}. '\n            f'Run: python3 scripts/extract-prediction-stubs.py '\n            f'briefs/{fname or \"<filename>\"} → review notes → commit. '\n            f'cf. feedback_pre_promote_coherence_audit.md'\n        )\n\n    # Case 3: ledger empty AND no triad — defensibility gap.\n    missing: list[str] = []\n    if not has_prob:\n        missing.append('probability number')\n    if not has_falsify:\n        missing.append('verification/falsification recipe')\n    if not has_date:\n        missing.append('falsifiable date/window')\n\n    return (\n        f'DEF01 defensibility gap: no entries in data/predictions.yaml for '\n        f'this brief AND body lacks formal-prediction triad '\n        f'(missing: {\", \".join(missing) if missing else \"all three\"}); '\n        f'cf. docs/content-planning/PREDICTION_DESIGN.md'\n    )\n\n\n_DEF02_MONITOR_RE = re.compile(\n    r'Shadow\\s+Dynamics\\s+Intelligence\\s+Monitor', re.IGNORECASE,\n)\n_DEF02_PATTERN_RECOGNITION_RE = re.compile(\n    r'<h2[^>]*>[^<]*?'\n    r'(?:PATTERN\\s+RECOGNITION|RECONOCIMIENTO\\s+DE\\s+PATR[OÓ]N)'\n    r'[^<]*?</h2>',\n    re.IGNORECASE,\n)\n\n\ndef check_def02_primary_contribution_present(html: str) -> str | None:\n    \"\"\"DEF02 — brief must declare its primary contribution.\n\n    Pass if EITHER:\n      (a) Body references \"Shadow Dynamics Intelligence Monitor\" (internal\n          signal corpus), OR\n      (b) Brief contains a \"PATTERN RECOGNITION\" / \"RECONOCIMIENTO DE PATRÓN\"\n          h2 section (cross-brief synthesis).\n\n    Without one of these markers, the brief reads as repackaged public\n    sources — defensibility-equivalent to LLM-with-web-search output.\n    \"\"\"\n    if _DEF02_MONITOR_RE.search(html):\n        return None\n    if _DEF02_PATTERN_RECOGNITION_RE.search(html):\n        return None\n    return (\n        'DEF02 defensibility gap: no primary-contribution marker — neither '\n        'Shadow Dynamics Intelligence Monitor reference nor Pattern '\n        'Recognition / Reconocimiento de Patrón section detected. Add '\n        'internal-corpus reference or cross-brief synthesis section.'\n    )\n\n\ndef check_def03_canonical_url_floor(html: str) -> str | None:\n    \"\"\"DEF03 — sources block must have ≥5 distinct canonical primary-source URLs.\n\n    Canonical = primary-source domain allowlist (BOE, ENTSO-E, EIB, SEC, IMF,\n    OECD, ECB, NATO, etc.). Aggregators (Wikipedia) and self-references\n    (shadowdynamics.ai, substack.com) do not count.\n\n    No-op when no <section class=\"sources\"> exists (legacy briefs).\n\n    WARNING (not BLOCKER) — false positives expected on briefs whose primary\n    sources lack public canonical URLs (REE prospective studies, CSN licences).\n    Threshold ≥5 calibrated against published corpus 2026-05-02.\n    \"\"\"\n    sources_match = re.search(\n        r'<section class=\"sources\"[^>]*>.*?</section>',\n        html, re.DOTALL,\n    )\n    if not sources_match:\n        return None  # no sources block — other checks handle this\n\n    sources_block = sources_match.group(0)\n    urls = set(re.findall(r'href=\"(https?://[^\"]+)\"', sources_block))\n\n    canonical = 0\n    for u in urls:\n        d = _domain_of(u)\n        if d in _DEF03_AGGREGATOR_DOMAINS:\n            continue\n        if d in _DEF03_CANONICAL_DOMAINS:\n            canonical += 1\n        elif any(d.endswith('.' + dom) for dom in _DEF03_CANONICAL_DOMAINS):\n            canonical += 1\n\n    if canonical >= 5:\n        return None\n    return (\n        f'DEF03 defensibility gap: only {canonical} canonical primary-source '\n        f'URL(s) in <section class=\"sources\"> (target ≥5; aggregator/self URLs '\n        f'excluded). Strengthen with primary-source links '\n        f'(BOE, ENTSO-E, SEC, IMF, ECB, official institutional).'\n    )\n\n\n# D-QA-22 floor #2 width — operator-adopted 2026-05-04 (Y).\n# Tied to STRATEGY.md v0.6 §Q2 hybrid kill criterion deadline 2026-10-31.\n# When kill criterion reformulates, update this constant + the message.\n_FLOOR2_DEADLINE = '2026-10-31'\n\n\n# D-QA-22 adoption date: from 2026-05-04 onward, Briefs 6/7/8+ must carry\n# ≥2 sub-deadline predictions. Briefs published before this date are exempt\n# (lenient regime — fire only on ≥3 total + 0 sub-deadline as audit trail).\n_DQA22_ADOPTION_DATE = '2026-05-04'\n\n\ndef check_predictions_floor2_width(html: str) -> str | None:\n    \"\"\"D-QA-22 — flag briefs missing required sub-deadline prediction count.\n\n    Per operator decision 2026-05-04 (Y), briefs published on/after that\n    adoption date must include ≥2 predictions with falsifiable_by_date ≤\n    2026-10-31 to widen kill-criterion floor #2 base from N=3 to N=9.\n\n    Two extraction paths (ledger preferred when populated):\n\n    1. `predictions.yaml` — populated post-promote, authoritative.\n    2. HTML regex `Falsifiable by YYYY-MM-DD` — used pre-promote when\n       ledger has no entries for this brief yet. Origin: Brief 6 Critical\n       Minerals pre-promote audit 2026-05-08 caught 0 sub-deadline\n       contribution; original ledger-only path no-op'd in staging because\n       predictions extract to YAML at promote-time, not at save-time.\n\n    Two scoping regimes by publication date (filename-derived):\n\n    - Pre-2026-05-04 (legacy): lenient — fire only when ≥3 total entries\n      AND 0 sub-deadline. Preserves audit-trail for legacy briefs that\n      flagged the gap motivating D-QA-22.\n    - 2026-05-04+ (post-adoption): strict — fire whenever <2 sub-deadline\n      and brief has any formal predictions. Catches the contractual-default\n      class invisible to the legacy threshold.\n\n    No-op when:\n    - Brief has no formal predictions (no `Falsifiable by` markers AND no\n      ledger entries) — early/legacy brief without prediction section.\n    - Brief filename not detectable (preview HTML, etc.).\n    \"\"\"\n    fname = _extract_brief_filename(html)\n    if not fname:\n        return None\n\n    # Regime by publication date\n    m_date = re.search(r'SD_(\\d{4})(\\d{2})(\\d{2})_', fname)\n    if m_date:\n        file_date = f'{m_date.group(1)}-{m_date.group(2)}-{m_date.group(3)}'\n        post_adoption = file_date >= _DQA22_ADOPTION_DATE\n    else:\n        post_adoption = False  # unparseable filename → default lenient\n\n    # Source of truth: ledger first, fall back to HTML pre-promote\n    ledger = _load_predictions_yaml()\n    entries = ledger.get(fname, [])\n\n    if entries:\n        total = len(entries)\n        sub_deadline_count = sum(\n            1 for e in entries\n            if e.get('falsifiable_by_date')\n            and str(e['falsifiable_by_date']) <= _FLOOR2_DEADLINE\n        )\n        source = 'ledger'\n    else:\n        # HTML extraction: count distinct `Falsifiable by YYYY-MM-DD` markers.\n        # EN-side only (ES translation duplicates same predictions).\n        en_dates = re.findall(\n            r'Falsifiable\\s+by\\s+(\\d{4}-\\d{2}-\\d{2})', html, re.IGNORECASE\n        )\n        if not en_dates:\n            return None  # no formal predictions section\n        total = len(en_dates)\n        sub_deadline_count = sum(1 for d in en_dates if d <= _FLOOR2_DEADLINE)\n        source = 'HTML pre-promote'\n\n    if post_adoption:\n        # Strict: ≥2 sub-deadline required per D-QA-22 Brief 6/7/8 contract\n        if sub_deadline_count >= 2:\n            return None\n        return (\n            f'D-QA-22 floor#2 width ({source}): brief has {total} formal '\n            f'prediction(s) but only {sub_deadline_count} sub-deadline '\n            f'(falsifiable_by_date ≤ {_FLOOR2_DEADLINE}). Post-{_DQA22_ADOPTION_DATE} '\n            f'briefs require ≥2 sub-deadline per D-QA-22; add sub-deadline '\n            f'prediction(s) before promote.'\n        )\n    # Lenient legacy: ≥3 total + 0 sub-deadline → fire (audit-trail honesty)\n    if total < 3 or sub_deadline_count >= 1:\n        return None\n    return (\n        f'D-QA-22 floor#2 width ({source}): legacy brief has {total} '\n        f'predictions but 0 with falsifiable_by_date ≤ {_FLOOR2_DEADLINE}. '\n        f'Pre-D-QA-22 brief; flagged honestly per the gap that motivated '\n        f'the rule.'\n    )\n\n\ndef check_toc_anchor_integrity(html: str) -> str | None:\n    \"\"\"TOC ↔ body section-anchor integrity check.\n\n    Brief 6 (commit cb3ac78) shipped with the ES TOC missing the ACRÓNIMOS\n    section link that existed in the EN TOC, and the en-sec-9 (Section IX\n    Sector Exposure) entry was missing MOFCOM. Each was caught only by\n    post-promote human inspection. Pre-promote runner check prevents\n    recurrence by enforcing that every TOC `<a href=\"#sec-id\">` resolves\n    to a body `id=\"sec-id\"`, and (optionally, audit-only) every body\n    `<section>`/`<h2>` with a sec-id is referenced in the TOC.\n\n    Two failure classes:\n      (a) DANGLING TOC LINK — `<a href=\"#X\">` in TOC but no `id=\"X\"` in body.\n          Result: clicking the TOC entry goes nowhere. Hard structural bug.\n      (b) ORPHAN SECTION — body has `id=\"X\"` matching the section-id pattern\n          but no TOC entry. Section exists but isn't discoverable from TOC.\n\n    Scope: only `#en-sec-N` / `#es-sec-N` / `#en-sec-acronyms` / `#es-sec-acronyms`\n    patterns. Excludes footnote anchors (`#fn-N`) and arbitrary in-body anchors\n    which have their own integrity (footnote_reference_integrity).\n\n    WARNING-class. Promote to BLOCKER after one clean Brief 7+ window per\n    feedback_runner_calibrate_then_ratchet.\n    \"\"\"\n    sec_anchor_re = re.compile(\n        r'<a\\s+href=\"#((?:en-sec|es-sec)-[a-z0-9_-]+)\"',\n        re.IGNORECASE,\n    )\n    sec_id_re = re.compile(\n        r'\\bid=\"((?:en-sec|es-sec)-[a-z0-9_-]+)\"',\n        re.IGNORECASE,\n    )\n\n    toc_targets = {m.group(1).lower() for m in sec_anchor_re.finditer(html)}\n    body_ids = {m.group(1).lower() for m in sec_id_re.finditer(html)}\n\n    if not toc_targets and not body_ids:\n        # legacy brief without sec-id pattern — skip\n        return None\n\n    dangling = sorted(toc_targets - body_ids)\n    orphan = sorted(body_ids - toc_targets)\n\n    msgs = []\n    if dangling:\n        msgs.append(\n            f\"{len(dangling)} dangling TOC link(s) — `<a href=\\\"#X\\\">` \"\n            f\"with no matching body id=\\\"X\\\": {dangling[:5]}\"\n            + (f' (+{len(dangling)-5} more)' if len(dangling) > 5 else '')\n        )\n    if orphan:\n        msgs.append(\n            f\"{len(orphan)} orphan section(s) — body id=\\\"X\\\" with no TOC \"\n            f\"entry: {orphan[:5]}\"\n            + (f' (+{len(orphan)-5} more)' if len(orphan) > 5 else '')\n        )\n\n    if not msgs:\n        return None\n\n    return (\n        \"TOC ↔ body section-anchor mismatch: \" + \"; \".join(msgs) +\n        \" — every TOC link must resolve to a body section, and every body \"\n        \"section with a sec-id should appear in the TOC.\"\n    )\n\n\n# ── runner ───────────────────────────────────────────────────────────────────\n\n@dataclass\nclass CheckResult:\n    name: str\n    severity: str       # \"blocker\" | \"warning\"\n    passed: bool\n    detail: str | None  # failure reason, or None on pass\n\n\nBLOCKERS: list[tuple[str, Callable[[str], str | None]]] = [\n    ('scaffold_pseudo_citations',  check_scaffold_pseudo_citations),\n    ('result_label_duplicate',     check_result_label_duplicate),\n    ('preflight_section_leak',     check_preflight_section_leak),\n    ('event_date_leak',            check_event_date_leak),\n    ('inline_paywall',             check_inline_paywall),\n    ('old_branding',               check_old_branding),\n    ('about_scaffold_label_list',  check_about_scaffold_label_list),\n    ('hero_scaffold_pill',         check_hero_scaffold_pill),\n    ('section_count_parity',       check_section_count_parity),\n    ('unbalanced_tables',          check_unbalanced_tables),\n    # EDITORIAL-FOOTNOTE-01 F5 — promoted from WARNING to BLOCKER 2026-04-28 noche.\n    # The check is conditional: if a brief has zero <sup> and zero <li id=\"fn-N\">\n    # it returns None (no-op). Legacy briefs without footnote markup pass\n    # trivially; new briefs (post-F3 commit 8235112) emit the pattern and any\n    # malformed footnote graph (orphan refs, gaps, duplicates) blocks publish.\n    ('footnote_integrity',         check_footnote_integrity),\n    # EDITORIAL-LEGAL-02 L02 — bilingual regulatory disclaimer regression guard.\n    # Wired as BLOCKER from inception: the disclaimer is rendered by the\n    # global footer template (commit 687d8f8) so all 5 published briefs pass\n    # by construction. Failure = template rollback, not editorial drift.\n    ('regulatory_disclaimer_present', check_regulatory_disclaimer_present),\n    # EDITORIAL-LEGAL-02 P02 — bilingual structural parity beyond h2.\n    # Extends section_count_parity to h3/table/p. Tolerances calibrated\n    # against the 5-brief corpus on 2026-05-02; Brief 5 (Δ4 on 63 p tags)\n    # passes with margin under p tolerance max(8, 20%×larger).\n    ('bilingual_extended_parity',  check_bilingual_extended_parity),\n    # EDITORIAL-LEGAL-02 S01 — inference-citation ratio guard. Pairs with the\n    # EDITORIAL-PROMPT INFERENCE SOURCE TRANSPARENCY rule. Threshold ≥4 sup\n    # refs + ratio > 25% to avoid noise on light-footnote briefs.\n    ('inference_citation_ratio',   check_inference_citation_ratio),\n    # EDITORIAL-FOOTER-DISCIPLINE-01 — defensive regression guard scoped to\n    # <footer.site-footer> + <div.cover-meta>. Closes the gap left by the\n    # 2026-04-30 legal patch (commit 2dddef7): the name-drop was deleted\n    # but nothing prevents a future Forecaster Format Output edit from\n    # silently reintroducing it. Body-level citations of WEF/Eurasia\n    # remain legitimate nominative use (different scope).\n    ('footer_namedrop_discipline', check_footer_namedrop_discipline),\n]\n\nWARNINGS: list[tuple[str, Callable[[str], str | None]]] = [\n    ('translation_body_parity',         check_translation_body_parity),\n    ('meta_tags_present',               check_meta_tags_present),\n    ('minimum_word_count',              check_minimum_word_count),\n    ('numerical_citation_contract',     check_numerical_citation_contract),\n    ('critical_facts',                  check_critical_facts),\n    # MAGNITUDE-FRAMING-CONSISTENCY 2026-05-08 — catches same-number\n    # opposite-semantic flips against facts.yaml::magnitude_flips. Origin:\n    # Brief 6 Critical Minerals pre-promote audit ('40% short' vs 'at 40%').\n    # WARNING (calibrate-then-ratchet); promote to BLOCKER after corpus\n    # baseline clean across 3+ briefs.\n    ('magnitude_framing_consistency',   check_magnitude_framing_consistency),\n    ('date_arithmetic',                 check_date_arithmetic),\n    # QA-INCONTESTABLE-03 — temporal_window_consistency\n    # Catches time-range labels in §VERDICT or §FORMAL PREDICTIONS that\n    # don't match adjacent ISO date-span arithmetic ±10%, with hedge-word\n    # exemption. Complements date_arithmetic: this targets explicit\n    # \"(N|N-M) (unit) window\" patterns; date_arithmetic targets\n    # \"(N) (unit) running to/expires/expira/hasta DATE\" duration claims.\n    ('temporal_window_consistency',     check_temporal_window_consistency),\n    ('investor_verdict_contradiction',  check_investor_verdict_contradiction),\n    ('bare_inference_number',           check_bare_inference_number),\n    ('internal_monitor_clarity',        check_internal_monitor_clarity),\n    ('mdtohtml_paragraph_wrap_block_tag', check_mdtohtml_paragraph_wrap_block_tag),\n    # §ESTA SEMANA item 5 priority #4 (2026-05-11) — structural-malformation\n    # HTML check: TOC ↔ body section-anchor integrity. Origin: Brief 6\n    # (commit cb3ac78) ES TOC missing ACRÓNIMOS + en-sec-9 missing MOFCOM;\n    # caught only by post-promote human inspection. WARNING-class; promote\n    # to BLOCKER after one clean Brief 7+ window per calibrate-then-ratchet.\n    ('toc_anchor_integrity',            check_toc_anchor_integrity),\n    ('investment_recommendation_patterns', check_investment_recommendation_patterns),\n    # EDITORIAL-LEGAL-06 A4 — cross-lingual topic self-reference detector\n    # (B5-PATTERN-EN bug class, commit 9cec1a0). Heuristic; may have false\n    # positives — pre-publish reviewer confirms.\n    ('cross_lingual_topic_consistency',    check_cross_lingual_topic_consistency),\n    # EDITORIAL-LEGAL-06 A5 / EL-04 D5.5 — sources density floor (≥10\n    # distinct URLs in footnotes + sources block; target 12-13). Conditional\n    # no-op on legacy briefs without footnote markup or sources block.\n    ('sources_density_minimum',            check_sources_density_minimum),\n    # EDITORIAL-LEGAL-02 D01 — anchor-number → footnote-URL gate. WARNING for\n    # baseline week (Brief 5 baseline; promote to BLOCKER after 1 week clean,\n    # following the L01/footnote_integrity precedent). Conditional: brief\n    # with 0 <li id=\"fn-N\"> entries → no-op.\n    ('anchor_number_footnote_url',         check_anchor_number_footnote_url),\n    # Bilingual sources presence — catches Pattern C (EN-only sources block on\n    # a brief that has body fn-N refs). Origin: Briefs 3-5 erratum 2026-05-08\n    # (commit 7d87415). WARNING; promote to BLOCKER after a stable corpus window.\n    # Cross-ref project_sd_briefs_sources_block_patterns.md.\n    ('bilingual_sources_present',          check_bilingual_sources_present),\n    # EDITORIAL-LEGAL-02 L03 — listed-entity density × financial-metric proximity.\n    # WARNING (not BLOCKER) because legitimate sectoral discussion may name\n    # companies. Catalogue at data/listed-entities.yaml; chat-Claude proposes\n    # additions per EDITORIAL-LEGAL-06 role 2.\n    ('listed_entity_density',              check_listed_entity_density),\n    ('placeholder_template_failure',       check_placeholder_template_failure),\n    # EDITORIAL-LEGAL-02 P03 partial — false-friend \"billón/trillón\" detector\n    # in ES scope. WARNING (not BLOCKER) because legitimate Spanish use of\n    # billón=10^12 exists. Full P03 numerical-equivalence sweep deferred.\n    ('cross_lingual_false_friend',  check_cross_lingual_false_friend),\n    # RUNNER-DEFENSIBILITY-01 DEF01-DEF03 — defensibility heuristics\n    # 2026-05-02. Calibrated against 5-brief published corpus\n    # (reports/defensibility-audit_2026-05-02.md). All WARNING in first\n    # round; promote to BLOCKER after stable false-positive baseline.\n    ('def01_formal_predictions_present',  check_def01_formal_predictions_present),\n    ('def02_primary_contribution_present', check_def02_primary_contribution_present),\n    ('def03_canonical_url_floor',          check_def03_canonical_url_floor),\n    # D-QA-22 — kill-criterion floor #2 width. Operator-adopted 2026-05-04 (Y):\n    # ≥2 predictions with falsifiable_by_date ≤ 2026-10-31 per Brief 6/7/8\n    # to widen base from 3 → 9. WARNING (heuristic guidance, not enforcing);\n    # legacy briefs honestly flag the gap that motivated the rule.\n    ('predictions_floor2_width',           check_predictions_floor2_width),\n    # Brief 7 prep priorities #8 item 4 — structural-malformation WARN\n    # consolidates h3-inside-p + dark-bg-bare-strong + toc-sync-broken +\n    # sections-inside-tab-full. Calibrate against Brief 7+8; ratchet to\n    # BLOCKER Brief 9+ per feedback_runner_calibrate_then_ratchet.\n    ('html_structural_integrity',          check_html_structural_integrity),\n    # FORECASTING-DISCIPLINE-LESSONS-2026-05-12-01 sub-item 3: posterior-predictive\n    # checks per Gelman pattern. Validate Tier A E3/E5/E9 emission discipline by\n    # encoding what brief output SHOULD look like and measuring vs prediction.\n    # WARN-only; calibrate against Brief 7+8 emission under Tier A; ratchet to\n    # BLOCKER Brief 9+ per feedback_runner_calibrate_then_ratchet.\n    ('prediction_e3_cluster_id_reference', check_prediction_e3_cluster_id_reference),\n    ('prediction_e5_ternary_sum_100',      check_prediction_e5_ternary_sum_100),\n    ('prediction_e9_pos_threshold_numeric',check_prediction_e9_pos_threshold_numeric),\n]\n\n\ndef _derive_brand_from_filename(filename: str | None) -> str | None:\n    \"\"\"Derive brand from brief filename prefix:\n    - SD_*       → 'shadow-dynamics'\n    - CL_* / CLAVE_* → 'clave'\n    - other / None → None (no brand filter; full registry applies)\n\n    FORK-PREP-GAP-1-DATA-BRAND-AWARE-01: brand-aware facts/listed-entities\n    filtering keyed off brief filename. SD_*.html prefix is canonical per\n    existing convention; CL_*/CLAVE_* reserved for first Clave brief naming.\n    \"\"\"\n    if not filename:\n        return None\n    base = os.path.basename(filename).upper()\n    if base.startswith('SD_'):\n        return 'shadow-dynamics'\n    if base.startswith('CL_') or base.startswith('CLAVE_'):\n        return 'clave'\n    return None\n\n\n# Checks that accept an optional brand context. Run via dispatch in run_checks\n# rather than the BLOCKERS/WARNINGS Callable[[str], ...] iteration.\n_BRAND_AWARE_CHECKS = frozenset({'critical_facts', 'listed_entity_density'})\n\n\ndef run_checks(html: str, briefs_dir: str | None = None,\n               check_urls: bool = False,\n               filename: str | None = None) -> list[CheckResult]:\n    \"\"\"Run all checks against html, return list of CheckResult.\n\n    `check_urls=True` enables network-dependent URL HEAD probes (off by\n    default — CI shouldn't depend on outbound network).\n\n    `filename` (optional) feeds brand-derivation for brand-aware checks\n    (`critical_facts` + `listed_entity_density`). When None, those checks\n    run with brand=None (back-compat: full registry, no filter).\n    \"\"\"\n    results: list[CheckResult] = []\n    brand = _derive_brand_from_filename(filename)\n\n    for name, fn in BLOCKERS:\n        if name in _BRAND_AWARE_CHECKS:\n            detail = fn(html, brand=brand)\n        else:\n            detail = fn(html)\n        results.append(CheckResult(\n            name=name, severity='blocker',\n            passed=detail is None, detail=detail,\n        ))\n\n    for name, fn in WARNINGS:\n        if name in _BRAND_AWARE_CHECKS:\n            detail = fn(html, brand=brand)\n        else:\n            detail = fn(html)\n        results.append(CheckResult(\n            name=name, severity='warning',\n            passed=detail is None, detail=detail,\n        ))\n\n    # Special-case: link integrity needs filesystem context\n    if briefs_dir:\n        detail = check_local_link_integrity(html, briefs_dir)\n        results.append(CheckResult(\n            name='local_link_integrity', severity='warning',\n            passed=detail is None, detail=detail,\n        ))\n\n    # Special-case: pattern citations check optionally consults briefs_dir\n    detail = check_pattern_citations(html, briefs_dir=briefs_dir)\n    results.append(CheckResult(\n        name='pattern_citations', severity='warning',\n        passed=detail is None, detail=detail,\n    ))\n\n    # Network-dependent: cited URL integrity\n    if check_urls:\n        detail = check_url_integrity(html, network=True)\n        results.append(CheckResult(\n            name='url_integrity', severity='warning',\n            passed=detail is None, detail=detail,\n        ))\n\n    return results\n\n\ndef summarise(results: list[CheckResult]) -> dict:\n    blockers = [r for r in results if r.severity == 'blocker' and not r.passed]\n    warnings = [r for r in results if r.severity == 'warning' and not r.passed]\n    return {\n        'pass': len(blockers) == 0,\n        'blockers': [asdict(r) for r in blockers],\n        'warnings': [asdict(r) for r in warnings],\n        'total_checks': len(results),\n    }\n\n\ndef log_failure(filename: str, summary: dict, log_path: str = '/var/log/sd-quality-failures.log') -> None:\n    \"\"\"Append failed checks to a log for retrospective analysis.\"\"\"\n    if summary['pass'] and not summary['warnings']:\n        return  # nothing worth logging\n    try:\n        with open(log_path, 'a') as f:\n            entry = {\n                'ts': datetime.now(timezone.utc).isoformat(),\n                'file': filename,\n                **summary,\n            }\n            f.write(json.dumps(entry, ensure_ascii=False) + '\\n')\n    except OSError:\n        # Failure to log is not failure of the check; just stderr it\n        sys.stderr.write(f'[check-briefs] could not write {log_path}\\n')\n\n\n# ── CLI ──────────────────────────────────────────────────────────────────────\n\n_BRIEFS_DIR_BY_BRAND = {\n    'shadowdynamics': '/root/n8n/local-files/briefs',\n    'clave':          '/root/n8n/local-files/briefs-clave',\n}\n\n\ndef main(argv: list[str] | None = None) -> int:\n    parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0])\n    parser.add_argument('paths', nargs='*',\n                        help='HTML files or globs (default: scan brand briefs dir)')\n    parser.add_argument('--brand', choices=['shadowdynamics', 'clave'],\n                        default='shadowdynamics',\n                        help='Brand context — affects default --briefs-dir and default scan path')\n    parser.add_argument('--briefs-dir', default=None,\n                        help='Briefs directory for link-integrity check (default: brand-specific)')\n    parser.add_argument('--json', action='store_true', help='Emit JSON output')\n    parser.add_argument('--no-log', action='store_true',\n                        help='Do not append failures to log file')\n    parser.add_argument('--blockers-only', action='store_true',\n                        help='Exit non-zero only on blocker failures')\n    parser.add_argument('--check-urls', action='store_true',\n                        help='Enable network-dependent URL HEAD probes')\n    args = parser.parse_args(argv)\n\n    if args.briefs_dir is None:\n        args.briefs_dir = _BRIEFS_DIR_BY_BRAND[args.brand]\n\n    files: list[str] = []\n    if not args.paths:\n        files = sorted(glob.glob(os.path.join(args.briefs_dir, '*.html')))\n    else:\n        for p in args.paths:\n            if os.path.isfile(p):\n                files.append(p)\n            else:\n                files.extend(glob.glob(p))\n        files = sorted(set(files))\n\n    if not files:\n        print('no files matched', file=sys.stderr)\n        return 2\n\n    overall_blockers = 0\n    overall_warnings = 0\n    summaries: dict[str, dict] = {}\n\n    for fp in files:\n        html = open(fp, encoding='utf-8').read()\n        results = run_checks(html, briefs_dir=args.briefs_dir,\n                             check_urls=args.check_urls,\n                             filename=fp)\n        summary = summarise(results)\n        summaries[fp] = summary\n\n        if not args.no_log and (summary['blockers'] or summary['warnings']):\n            log_failure(fp, summary)\n\n        overall_blockers += len(summary['blockers'])\n        overall_warnings += len(summary['warnings'])\n\n    if args.json:\n        print(json.dumps(summaries, indent=2, ensure_ascii=False))\n    else:\n        for fp, s in summaries.items():\n            verdict = 'PASS' if s['pass'] and not s['warnings'] else \\\n                      'WARN' if s['pass'] else 'FAIL'\n            print(f'{verdict}  {os.path.basename(fp)}')\n            for b in s['blockers']:\n                print(f'   BLOCK  {b[\"name\"]}: {b[\"detail\"]}')\n            for w in s['warnings']:\n                print(f'   warn   {w[\"name\"]}: {w[\"detail\"]}')\n        print()\n        print(f'Files: {len(files)}  Blockers: {overall_blockers}  '\n              f'Warnings: {overall_warnings}')\n\n    if args.blockers_only:\n        return 1 if overall_blockers > 0 else 0\n    return 1 if (overall_blockers + overall_warnings) > 0 else 0\n\n\nif __name__ == '__main__':\n    sys.exit(main())\n"}