{
  "$schema": "https://www.slavin.ai/data/ai-failure-modes-catalog-schema.json",
  "dataset": {
    "name": "AI-Generated Code Failure Modes Catalog",
    "version": "2026-06",
    "publisher": "Slavin AI (SLAtech LTD)",
    "publisherUrl": "https://www.slavin.ai/",
    "license": "CC-BY-4.0",
    "lastUpdated": "2026-06-14",
    "description": "Catalog of 22 failure modes that AI-generated code exhibits in production despite passing demo, prototype, and happy-path testing. Each entry: failure category, what the AI generates that looks correct, the concrete production-time failure, the detection method a senior architect uses, and the prevention pattern.",
    "audience": ["AI Architect", "Senior Engineer", "Code Reviewer", "CTO", "Engineering Lead"],
    "methodology": "Compiled from 150+ Slavin/SLAtech engagements 2022-2026 where AI-assisted code passed initial review and then failed under production conditions. Each failure mode has been observed at least three times across independent client engagements before inclusion. Entries are documented as patterns, not as specific client incidents."
  },
  "categories": [
    { "id": "CAT-CONCUR", "name": "Concurrency", "description": "Failures that emerge under simultaneous operations the AI did not model." },
    { "id": "CAT-DATA", "name": "Data Integrity", "description": "Failures that corrupt or lose data under conditions outside happy-path." },
    { "id": "CAT-LOAD", "name": "Behavior Under Load", "description": "Failures that manifest only at scale the AI did not test against." },
    { "id": "CAT-SECURE", "name": "Security", "description": "Vulnerabilities the AI introduced because it does not threat-model your specific surface." },
    { "id": "CAT-COST", "name": "Cost at Scale", "description": "Patterns that are cheap at prototype scale and become unaffordable in production." },
    { "id": "CAT-RECOVER", "name": "Recovery and Failure Modes", "description": "Code that has no plan for partial failure, retry, or rollback." },
    { "id": "CAT-EVOLVE", "name": "Long-Term Evolution", "description": "Code that is correct now and will be a refactoring blocker in 18 months." }
  ],
  "failure_modes": [
    {
      "id": "FM-01",
      "name": "Optimistic locking absent",
      "category": "CAT-CONCUR",
      "ai_generates": "Read-modify-write code without version check or row-level lock. Passes single-threaded test.",
      "production_failure": "Two concurrent writes overwrite each other silently. Lost update problem at moderate concurrency.",
      "detection": "Code review for any read-modify-write on shared records. Load test with 10+ concurrent users on the same entity.",
      "prevention": "Optimistic concurrency token (rowversion / ETag) or explicit pessimistic lock with clear timeout."
    },
    {
      "id": "FM-02",
      "name": "N+1 query pattern",
      "category": "CAT-LOAD",
      "ai_generates": "ORM access in a loop that issues a separate query per item. Looks idiomatic.",
      "production_failure": "Page load goes from 50ms to 5+ seconds as the collection grows. Database CPU spikes.",
      "detection": "Profile real queries on a representative dataset. Watch for query count proportional to result size.",
      "prevention": "Eager loading / join / batched query. Set a query-count budget per endpoint and assert in tests."
    },
    {
      "id": "FM-03",
      "name": "Missing transaction boundaries",
      "category": "CAT-DATA",
      "ai_generates": "Multi-step write sequence (order, payment, inventory) without explicit transaction.",
      "production_failure": "Partial commit on crash mid-sequence leaves inconsistent state. Orders without inventory decrement.",
      "detection": "Trace every multi-write operation. If failure between steps leaves bad state, transaction is missing.",
      "prevention": "Explicit transaction scope around logical units. Outbox pattern for cross-service writes."
    },
    {
      "id": "FM-04",
      "name": "Idempotency missing on retry path",
      "category": "CAT-RECOVER",
      "ai_generates": "Webhook handler or job runner that processes message once, no dedupe.",
      "production_failure": "Network retry sends duplicate event. Customer is charged twice, email sent twice.",
      "detection": "Ask: 'if this runs twice with the same input, what happens?' Verify dedupe key exists.",
      "prevention": "Idempotency key on every external-side-effect operation. Persist seen-keys for retention window."
    },
    {
      "id": "FM-05",
      "name": "Unbounded resource allocation",
      "category": "CAT-LOAD",
      "ai_generates": "List = readAll(); foreach item ... . No pagination, no cap.",
      "production_failure": "Memory exhausted when dataset grows. OutOfMemoryException at 50K rows.",
      "detection": "Identify every readAll-style call. Confirm result size is bounded by request or by paging.",
      "prevention": "Streaming or pagination by default. Reject readAll on unbounded sources at code review."
    },
    {
      "id": "FM-06",
      "name": "Timeout-less external call",
      "category": "CAT-RECOVER",
      "ai_generates": "HttpClient.GetAsync(url) with no timeout. Looks clean.",
      "production_failure": "Vendor outage hangs every dependent request. Thread pool exhausted; whole service down.",
      "detection": "Grep for HTTP clients, message queues, DB calls without explicit timeouts.",
      "prevention": "Explicit timeout on every IO call. Circuit breaker for repeated failures. Bulkhead pool isolation."
    },
    {
      "id": "FM-07",
      "name": "SQL injection via string concatenation",
      "category": "CAT-SECURE",
      "ai_generates": "Dynamic SQL with concatenated user input when parameterized query was awkward.",
      "production_failure": "Trivial SQL injection. Data exfiltration or destruction by a malicious or fuzzed input.",
      "detection": "Static analysis flag on string + sql. Code review for every dynamic query.",
      "prevention": "Parameterized queries by default. Lint rule that flags string concatenation in query construction."
    },
    {
      "id": "FM-08",
      "name": "Authorization missing inside data access",
      "category": "CAT-SECURE",
      "ai_generates": "Endpoint authenticates the user but the data query does not filter by ownership.",
      "production_failure": "Authenticated user retrieves another tenant's records by guessing IDs. Cross-tenant data leak.",
      "detection": "Every multi-tenant read must filter by tenant key in the query. Test with two users + IDOR probe.",
      "prevention": "Row-level security in the database OR a tenant filter helper that wraps every query."
    },
    {
      "id": "FM-09",
      "name": "Token / API key in code",
      "category": "CAT-SECURE",
      "ai_generates": "Hardcoded secret committed to repo while wiring an integration.",
      "production_failure": "Public repo leaks key. Credential rotation required. Sometimes followed by bill shock.",
      "detection": "Pre-commit hook scanning for high-entropy strings; periodic secret scan over history.",
      "prevention": "Secret manager. Never accept a string literal that looks like a key in code review."
    },
    {
      "id": "FM-10",
      "name": "Unbounded LLM context cost",
      "category": "CAT-COST",
      "ai_generates": "RAG retrieval that always sends top-50 chunks to the LLM regardless of relevance.",
      "production_failure": "Bill is 10-50x what was planned because most tokens are noise. Latency degrades too.",
      "detection": "Measure tokens-per-query vs answered-with-citations rate. Anomalies in either are signal.",
      "prevention": "Reranker before LLM, confidence threshold, top-k tuned per use case. Budget alarm on token spend."
    },
    {
      "id": "FM-11",
      "name": "Missing dead-letter handling",
      "category": "CAT-RECOVER",
      "ai_generates": "Message handler that retries on failure forever.",
      "production_failure": "Poison message blocks the queue. Backlog grows; processing freezes.",
      "detection": "Every retry policy needs a give-up condition and a destination for the give-up.",
      "prevention": "Bounded retries, dead-letter queue, alerting on dead-letter count. Manual review path."
    },
    {
      "id": "FM-12",
      "name": "Decimal precision loss in money math",
      "category": "CAT-DATA",
      "ai_generates": "Float / double for monetary amounts. Looks like a number type.",
      "production_failure": "Cents disappear or appear over time. Reconciliation drift. Audit fail.",
      "detection": "Any money field that is not Decimal / fixed-point is wrong. Code review rule.",
      "prevention": "Decimal type everywhere for money. Database column with explicit precision. Unit-tested edge cases."
    },
    {
      "id": "FM-13",
      "name": "Timezone-unaware date handling",
      "category": "CAT-DATA",
      "ai_generates": "DateTime stored without timezone; client converts in JS arbitrarily.",
      "production_failure": "Reports off by hours. Daily-rollup tasks miss data near midnight. Audit trail wrong.",
      "detection": "Every datetime field — confirm UTC storage; every display — confirm explicit timezone conversion.",
      "prevention": "Store UTC. Display in user-local at the edge. Never compare naive datetimes."
    },
    {
      "id": "FM-14",
      "name": "Cache without invalidation",
      "category": "CAT-DATA",
      "ai_generates": "Lookup-cache around a slow query. No invalidation logic on the writer side.",
      "production_failure": "Stale data served to users hours after the change. 'Why does the dashboard not update?'",
      "detection": "Every cache must have a documented invalidation trigger. If not — flag.",
      "prevention": "Bounded TTL + explicit invalidation on write. Stale-while-revalidate where freshness is loose."
    },
    {
      "id": "FM-15",
      "name": "Cross-cutting logging coupled to business code",
      "category": "CAT-EVOLVE",
      "ai_generates": "Log lines threaded through business methods, mixed with returns.",
      "production_failure": "Refactoring drops critical observability silently. Incident response degrades.",
      "detection": "Audit logging to confirm it is a cross-cutting concern, not inline copy-paste.",
      "prevention": "Structured logging via middleware / aspect. Logging contract per layer, enforced in review."
    },
    {
      "id": "FM-16",
      "name": "Tight coupling to LLM vendor",
      "category": "CAT-EVOLVE",
      "ai_generates": "Direct vendor SDK calls scattered through business code.",
      "production_failure": "Vendor price hike or deprecation forces touching every call site. Migration is a quarter.",
      "detection": "Grep for vendor SDK names. If they appear in business code, abstraction is missing.",
      "prevention": "LLM gateway with versioned prompts and a stable internal interface. Vendor swap = one config change."
    },
    {
      "id": "FM-17",
      "name": "Schema migration without backfill",
      "category": "CAT-DATA",
      "ai_generates": "ALTER TABLE adding a non-null column with a default but no backfill plan for existing rows.",
      "production_failure": "Long-running migration locks production table for hours. Or worse: silent constraint break.",
      "detection": "Every schema migration on a non-trivial table needs a backfill plan reviewed in advance.",
      "prevention": "Expand-contract pattern. Nullable column first, backfill, then enforce non-null."
    },
    {
      "id": "FM-18",
      "name": "Sync work inside HTTP handler",
      "category": "CAT-LOAD",
      "ai_generates": "Endpoint that does an external API call inline before responding.",
      "production_failure": "p99 latency tracks the slowest vendor. Cascading failure when vendor slows.",
      "detection": "Anything in a sync handler taking >100ms is a candidate for async / queue.",
      "prevention": "Background job + status endpoint for slow work. Async pipeline for non-critical-path work."
    },
    {
      "id": "FM-19",
      "name": "Missing rate limit on AI endpoint",
      "category": "CAT-COST",
      "ai_generates": "Public AI endpoint with no per-user / per-IP throttle.",
      "production_failure": "Abuse runs up the LLM bill. Single bad actor can exceed a month of budget in an hour.",
      "detection": "Every LLM-backed endpoint must have a per-key rate limit. Alarm above threshold.",
      "prevention": "Rate limiter with budget alerting. Tiered quotas. Authenticated-only AI endpoints by default."
    },
    {
      "id": "FM-20",
      "name": "Prompt injection through retrieved content",
      "category": "CAT-SECURE",
      "ai_generates": "RAG handler treats retrieved chunks as trusted instructions to the LLM.",
      "production_failure": "Malicious document poisons the response. AI assistant exfiltrates context or executes a tool unsafely.",
      "detection": "Threat-model the corpus. If any document can be authored by untrusted parties, retrieval is an injection vector.",
      "prevention": "Retrieved content as data, never as instruction. System prompt that forbids following retrieved instructions. Output validation."
    },
    {
      "id": "FM-21",
      "name": "No rollback path for AI feature",
      "category": "CAT-RECOVER",
      "ai_generates": "Replaces a deterministic computation with an LLM call. No fallback.",
      "production_failure": "LLM vendor outage takes the feature down. There is no degraded mode.",
      "detection": "For every AI feature: what does the system do when the AI is unavailable? If 'nothing' — fix.",
      "prevention": "Feature flag + deterministic fallback. Graceful degradation. Health probes on the AI dependency."
    },
    {
      "id": "FM-22",
      "name": "Time-window vulnerability in promotional code",
      "category": "CAT-DATA",
      "ai_generates": "Coupon redemption that checks remaining count then decrements in a separate step.",
      "production_failure": "Two concurrent redemptions both see remaining=1. Both succeed. Inventory underflows.",
      "detection": "Atomic check-and-decrement; otherwise race exists.",
      "prevention": "Atomic database operation (UPDATE ... WHERE remaining > 0). Or distributed lock with timeout."
    }
  ],
  "summary": {
    "total": 22,
    "by_category": {
      "Concurrency": 1,
      "Data Integrity": 6,
      "Behavior Under Load": 3,
      "Security": 4,
      "Cost at Scale": 2,
      "Recovery and Failure Modes": 4,
      "Long-Term Evolution": 2
    }
  },
  "see_also": {
    "position_page": "https://www.slavin.ai/Architect-vs-AI.aspx",
    "governance_baseline": "https://www.slavin.ai/data/ai-governance-baseline.json",
    "vendor_pricing": "https://www.slavin.ai/data/llm-vendor-pricing.json",
    "use_case_catalog": "https://www.slavin.ai/data/ai-use-case-catalog.json"
  }
}
