{
  "version": "2026-05-22-programmatic-coverage",
  "summary": "Composite IQ and EQ scores are derived from public benchmark results, conservative imputation rules, and chart display policies documented on the methodology page.",
  "derivedRankings": [
    {
      "id": "composite-iq",
      "rankingName": "Composite IQ",
      "direction": "higher_is_better",
      "scoreField": "iq",
      "dimensions": [
        {
          "id": "D1",
          "name": "Fluid Abstraction",
          "minBenchmarks": 1,
          "benchmarks": [
            {
              "field": "arcAgi2",
              "name": "ARC-AGI-2"
            },
            {
              "field": "arcAgi1",
              "name": "ARC-AGI-1"
            }
          ]
        },
        {
          "id": "D2",
          "name": "Mathematical Reasoning",
          "minBenchmarks": 1,
          "benchmarks": [
            {
              "field": "fmT4Acc",
              "name": "FrontierMath Tier 4"
            },
            {
              "field": "fmT13Acc",
              "name": "FrontierMath Tier 1-3"
            },
            {
              "field": "aime",
              "name": "AIME"
            },
            {
              "field": "proofbench",
              "name": "ProofBench"
            }
          ]
        },
        {
          "id": "D3",
          "name": "Programmatic Reasoning",
          "minBenchmarks": 2,
          "benchmarks": [
            {
              "field": "swebench",
              "name": "SWE-Bench Verified"
            },
            {
              "field": "swebenchPro",
              "name": "SWE-Bench Pro"
            },
            {
              "field": "livecodebench",
              "name": "LiveCodeBench"
            }
          ]
        },
        {
          "id": "D4",
          "name": "Critical Reasoning",
          "minBenchmarks": 1,
          "benchmarks": [
            {
              "field": "hle",
              "name": "Humanity's Last Exam"
            },
            {
              "field": "critPt",
              "name": "CritPt"
            },
            {
              "field": "sciCode",
              "name": "SciCode"
            },
            {
              "field": "gpqa",
              "name": "GPQA Diamond"
            }
          ]
        },
        {
          "id": "D5",
          "name": "Agentic Reasoning",
          "minBenchmarks": 1,
          "benchmarks": [
            {
              "field": "terminalbench",
              "name": "Terminal-Bench 2.0"
            },
            {
              "field": "terminalbenchHard",
              "name": "Terminal-Bench Hard"
            },
            {
              "field": "browseComp",
              "name": "BrowseComp"
            },
            {
              "field": "osworldVerified",
              "name": "OSWorld-Verified"
            },
            {
              "field": "toolathlon",
              "name": "Toolathlon"
            },
            {
              "field": "ifBench",
              "name": "IFBench"
            }
          ]
        }
      ]
    },
    {
      "id": "composite-eq",
      "rankingName": "Composite EQ",
      "direction": "higher_is_better",
      "scoreField": "eq",
      "dimensions": [
        {
          "id": "eq-bench",
          "name": "Emotional and Social Reasoning",
          "benchmarks": [
            {
              "field": "eqBenchElo",
              "name": "EQ-Bench 3 Elo"
            }
          ]
        },
        {
          "id": "arena",
          "name": "Human Preference and Interaction Quality",
          "benchmarks": [
            {
              "field": "arenaScore",
              "name": "Chatbot Arena"
            }
          ]
        }
      ]
    },
    {
      "id": "effective-cost",
      "rankingName": "Effective Cost",
      "direction": "lower_is_better",
      "scoreField": "effectiveCost",
      "unit": "USD per 1M I/O Tokens",
      "breakdown": [
        {
          "id": "published-pricing",
          "name": "Published Token Pricing",
          "inputs": [
            {
              "field": "inP",
              "name": "Input token price"
            },
            {
              "field": "outP",
              "name": "Output token price"
            }
          ],
          "summary": "Base cost is input price plus output price for 1M input tokens and 1M output tokens."
        },
        {
          "id": "observed-token-usage",
          "name": "Observed Token Usage",
          "inputs": [
            {
              "field": "aaTokensM",
              "name": "Artificial Analysis token usage"
            },
            {
              "field": "aaInputTokensM",
              "name": "Artificial Analysis input token usage"
            },
            {
              "field": "aaOutputTokensM",
              "name": "Artificial Analysis output token usage"
            }
          ],
          "summary": "Validated token usage calibrates the median workload multiplier when available."
        },
        {
          "id": "task-cost-residuals",
          "name": "Task Cost Residuals",
          "benchmarks": [
            {
              "field": "arcCostPerTask",
              "name": "ARC-AGI cost per task"
            },
            {
              "field": "valsCostPerTest",
              "name": "VALS cost per test"
            },
            {
              "field": "swebenchCostPerTask",
              "name": "SWE-Bench cost per task"
            },
            {
              "field": "hleCostPerQuestion",
              "name": "Humanity's Last Exam cost per question"
            }
          ],
          "summary": "Task-level cost benchmarks adjust for observed cost differences not explained by published token prices alone."
        }
      ]
    }
  ],
  "updatedAt": "2026-05-22T23:46:48.443Z",
  "url": "https://aiiq.org/methodology/"
}
