{
"@context": [
"https://w3id.org/ro/crate/1.1/context",
{
"croissant": "http://mlcommons.org/croissant/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dct": "http://purl.org/dc/terms/"
}
],
"@graph": [
{
"@id": "ro-crate-metadata.json",
"@type": "CreativeWork",
"conformsTo": {
"@id": "https://w3id.org/ro/crate/1.1"
},
"about": {
"@id": "./"
}
},
{
"@id": "./",
"@type": "Dataset",
"name": "chili100k_strat: Dataset to train or fine-tune CrystaLLM-pi for the targeted generation of experimental materials conditioned on XRD profiles",
"description": "The dataset contains experimentally determined crystal structures sourced from [Chemically-Informed Large-scale Inorganic Nanomaterials Dataset for Advancing Graph Machine Learning](https://github.com/UlrikFriisJensen/CHILI) as described in [CHILI: Chemically-Informed Large-scale Inorganic Nanomaterials Dataset for Advancing Graph Machine Learning](https://dx.doi.org/10.1145/3637528.3671538), a curated and filtered subset of the Crystallography Open Database [COD](https://www.crystallography.net/cod/). The structural data underwent text augmentation as per the pre-processing pipeline in [CrystaLLM-pi](https://github.com/C-Bone-UCL/CrystaLLM-pi). Each structure was labelled with its theoretical X-ray diffraction (XRD) pattern. The complete dataset, published on Hugging Face (https://huggingface.co/datasets/c-bone/chili100k_strat), can be used to train or fine-tune CrystaLLM-pi for the targeted generation of materials conditioned on XRD profiles.",
"url": "https://huggingface.co/datasets/c-bone/chili100k_strat",
"version": "1.0.0",
"keywords": [
"1K - 10K",
"parquet",
"Text",
"Datasets",
"pandas",
"Croissant",
"Polars",
"US Region: US",
"CrystaLLM-pi",
"Crystallography Open Database (COD)",
"CIF",
"condition vector",
"crystal"
],
"datePublished": "2026-04-24T00:00:00Z",
"creator": {
"@id": "https://huggingface.co/c-bone"
},
"mainEntity": {
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/train-00000-of-00001.parquet?download=true"
},
"hasPart": [
{
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/train-00000-of-00001.parquet?download=true"
},
{
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/test-00000-of-00001.parquet?download=true"
},
{
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/validation-00000-of-00001.parquet?download=true"
}
]
},
{
"@id": "https://huggingface.co/c-bone",
"@type": "Person",
"name": "Cyprien Bone"
},
{
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/train-00000-of-00001.parquet?download=true",
"@type": "File",
"name": "train-00000-of-00001.parquet",
"description": "Training subset of dataset (11091 records)",
"encodingFormat": "application/x-parquet"
},
{
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/test-00000-of-00001.parquet?download=true",
"@type": "File",
"name": "test-00000-of-00001.parquet",
"description": "Test subset of dataset (1500 records)",
"encodingFormat": "application/x-parquet"
},
{
"@id": "https://huggingface.co/datasets/c-bone/chili100k_strat/resolve/main/data/validation-00000-of-00001.parquet?download=true",
"@type": "File",
"name": "validation-00000-of-00001.parquet",
"description": "Validation subset of dataset (1500 records)",
"encodingFormat": "application/x-parquet"
}
]
}