"""Tool: get_county_scorecard(fips) → CDI scorecard + pre-baked citations.""" from __future__ import annotations import logging from mcp.server.fastmcp.exceptions import ToolError from scripts.machine_layer.data_loaders import ( load_cdi_dataset_metadata, load_scorecard_blob, normalize_fips, ) from scripts.machine_layer.freshness import freshness_for_dataset from scripts.machine_layer.schemas import ( Citation, ScorecardDomain, ScorecardResponse, ) from scripts.machine_layer.validators import validate_tool_input, validate_tool_output logger = logging.getLogger(__name__) TOOL_NAME = "get_county_scorecard" # CDI is recomputed annually (gated on Census ACS + Urban Institute Debt in # America releases). Use the dataset cadence rather than per-county dates. CDI_CADENCE = "annual" def run(fips: str) -> dict: """Fetch a county scorecard + domain breakdown + pre-baked citations. The blob's `press_kit.citation` is pre-baked by C2 — we pass the strings through without regeneration. Raises ToolError("fips_not_found" | "invalid_input" | "output_schema_violation"). """ # Two-stage validation: the schema enforces the regex shape, then # normalize_fips zero-pads the 4-digit form. Schema rejects # anything that's not 4 or 5 digits — normalize handles the # zero-pad. Garbage input (alpha, punctuation) fails the schema # with `invalid_input`. validate_tool_input(TOOL_NAME, {"fips": fips}) normalized = normalize_fips(fips) if normalized is None: # Unreachable in practice — schema regex ^[0-9]{4,5}$ already # rejects non-numeric input. Defensive guard for direct callers # that bypass the validator (none in our codebase, but third- # party tooling might). raise ToolError( f"invalid_input: fips must be a 4- or 5-digit numeric string, got {fips!r}" ) blob = load_scorecard_blob(normalized) if blob is None: raise ToolError(f"fips_not_found: no scorecard for FIPS '{normalized}'") # Pre-baked citations (C2) cit = (blob.get("press_kit") or {}).get("citation") or {} citation = Citation( newscopy=cit.get("newscopy", ""), apa=cit.get("apa", ""), mla=cit.get("mla", ""), chicago=cit.get("chicago", ""), source_attribution="American Default Research (County Distress Index)", ) # Domain breakdown — shape is `domain_breakdown.domains[]` domain_data = (blob.get("domain_breakdown") or {}).get("domains") or [] domains = [ ScorecardDomain( name=d.get("name", ""), score=float(d.get("score", 0)), weight_pct=float(d.get("weight_pct", 0)), rank=d.get("rank"), percentile=d.get("percentile"), primary_driver=bool(d.get("primary_driver", False)), ) for d in domain_data ] key_findings_raw = blob.get("key_findings") or [] key_findings = [ kf if isinstance(kf, str) else (kf.get("text", "") if isinstance(kf, dict) else "") for kf in key_findings_raw ] state_slug = blob.get("state_slug", "") county_slug = blob.get("county_slug", "") url = ( f"https://americandefault.org/counties/{state_slug}/{county_slug}/" if state_slug and county_slug else "" ) cdi_meta = load_cdi_dataset_metadata() as_of_date, freshness_warning = freshness_for_dataset( cdi_meta.get("last_updated"), CDI_CADENCE, ) distress_fifth = blob.get("distress_fifth") or blob.get("zone", "") distress_fifth_color = blob.get("distress_fifth_color") or blob.get("zone_color", "") response = ScorecardResponse( fips=blob.get("fips", normalized), county_name=blob.get("county_name", ""), state_name=blob.get("state_name", ""), state_abbr=blob.get("state_abbr", ""), composite_score=float(blob.get("composite_score", 0)), distress_fifth=distress_fifth, distress_fifth_color=distress_fifth_color, zone=distress_fifth, zone_color=distress_fifth_color, national_rank=int(blob.get("national_rank", 0)), national_rank_ordinal=blob.get("national_rank_ordinal", ""), state_rank=int(blob.get("state_rank", 0)), population=blob.get("population"), domain_breakdown=domains, key_findings=key_findings, url=url, citation=citation, as_of_date=as_of_date, freshness_warning=freshness_warning, ) payload = response.model_dump(mode="json") validate_tool_output(TOOL_NAME, payload) return payload