{
  "_comment": "MOCK fixture — see examples/MOCK-ai.md.",
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "ml": "http://mlcommons.org/croissant/"
  },
  "@type": "ml:Dataset",
  "name": "frontier-llm-v4-training-corpus",
  "version": "4.0.0",
  "description": "Training corpus for frontier-llm-v4. Article 53(1)(d) summary attached at summary.md.",
  "license": "internal-use-only",
  "creator": { "@type": "Organization", "name": "FrontierLabs Inc." },
  "datePublished": "2026-04-30",
  "ml:art53Summary": {
    "_comment": "veric-extension. Article 53(1)(d) summary anchor — declared list of publishers + descriptions.",
    "summaryUri": "summary.md",
    "summaryHash": "sha256:c2b3...e7a1",
    "publishedAt": "2026-04-08T00:00:00Z",
    "declared_publishers": [
      "commoncrawl.org",
      "wikipedia.org",
      "github.com",
      "arxiv.org",
      "stackexchange.com",
      "reddit.com (r/* subset, see summary §3.4)"
    ]
  },
  "ml:distribution": [
    {
      "@type": "ml:FileObject",
      "@id": "common-crawl-2024",
      "name": "common-crawl-2024.parquet",
      "ml:source": { "publisher": "commoncrawl.org", "license": "CC0-1.0" }
    },
    {
      "@type": "ml:FileObject",
      "@id": "partner-z-news-archive-2024",
      "name": "partner-z-news-archive-2024.tar",
      "description": "News archive shard licensed via partner Z. Per-article publisher captured in metadata.",
      "ml:source": {
        "publisher": "nytimes.com",
        "license": "partner-Z-license-v3 (commercial use undefined for AI training)",
        "addedAt": "2026-04-29T12:00:00Z"
      }
    }
  ],
  "ml:recordSet": [
    {
      "@type": "ml:RecordSet",
      "@id": "training-corpus",
      "name": "training_corpus_v4",
      "ml:field": [
        { "name": "doc_id", "ml:dataType": "ml:Integer" },
        { "name": "text", "ml:dataType": "ml:Text", "ml:tags": ["training-input"] },
        { "name": "publisher", "ml:dataType": "ml:Text", "ml:tags": ["source-attribution"] }
      ]
    }
  ]
}
