{
    "@context": [
        "https://w3id.org/ro/crate/1.1/context",
        {
            "croissant": "http://mlcommons.org/croissant/",
            "rai": "http://mlcommons.org/croissant/RAI/",
            "dct": "http://purl.org/dc/terms/"
        }
    ],
    "@graph": [
        {
            "@id": "ro-crate-metadata.json",
            "@type": "CreativeWork",
            "conformsTo": {
                "@id": "https://w3id.org/ro/crate/1.1"
            },
            "about": {
                "@id": "./"
            }
        },
        {
            "@id": "./",
            "@type": "Dataset",
            "name": "chili100k_strat: Dataset to train or fine-tune CrystaLLM-pi for the targeted generation of experimental materials conditioned on XRD profiles",
            "description": "The dataset contains experimentally determined crystal structures sourced from [Chemically-Informed Large-scale Inorganic Nanomaterials Dataset for Advancing Graph Machine Learning](https://github.com/UlrikFriisJensen/CHILI) as described in [CHILI: Chemically-Informed Large-scale Inorganic Nanomaterials Dataset for Advancing Graph Machine Learning](https://dx.doi.org/10.1145/3637528.3671538), a curated and filtered subset of the Crystallography Open Database [COD](https://www.crystallography.net/cod/). The structural data underwent text augmentation as per the pre-processing pipeline in [CrystaLLM-pi](https://github.com/C-Bone-UCL/CrystaLLM-pi). Each structure was labelled with its theoretical X-ray diffraction (XRD) pattern. The complete dataset, published on Hugging Face (https://huggingface.co/datasets/c-bone/chili100k_strat), can be used to train or fine-tune CrystaLLM-pi for the targeted generation of materials conditioned on XRD profiles.",
            "url": "https://huggingface.co/datasets/c-bone/chili100k_strat",
            "version": "1.0.0",
            "keywords": [
                "1K - 10K",
                "parquet",
                "Text",
                "Datasets",
                "pandas",
                "Croissant",
                "Polars",
                "US Region: US",
                "CrystaLLM-pi",
                "Crystallography Open Database (COD)",
                "CIF",
                "condition vector",
                "crystal"
            ],
            "datePublished": "2026-04-24T00:00:00Z",
            "creator": {
                "@id": "https://huggingface.co/c-bone"
            },
            "mainEntity": {
                "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/train-00000-of-00001.parquet?download=true"
            },
            "hasPart": [
                {
                    "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/train-00000-of-00001.parquet?download=true"
                },
                {
                    "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/test-00000-of-00001.parquet?download=true"
                },
                {
                    "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/validation-00000-of-00001.parquet?download=true"
                }
            ]
        },
        {
            "@id": "https://huggingface.co/c-bone",
            "@type": "Person",
            "name": "Cyprien Bone"
        },
        {
            "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/train-00000-of-00001.parquet?download=true",
            "@type": "File",
            "name": "train-00000-of-00001.parquet",
            "description": "Training subset of dataset (11091 records)",
            "encodingFormat": "application/x-parquet"
        },
        {
            "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/test-00000-of-00001.parquet?download=true",
            "@type": "File",
            "name": "test-00000-of-00001.parquet",
            "description": "Test subset of dataset (1500 records)",
            "encodingFormat": "application/x-parquet"
        },
        {
            "@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/validation-00000-of-00001.parquet?download=true",
            "@type": "File",
            "name": "validation-00000-of-00001.parquet",
            "description": "Validation subset of dataset (1500 records)",
            "encodingFormat": "application/x-parquet"
        }
    ]
}