{
"@context": [
"https://w3id.org/ro/crate/1.1/context",
{
"croissant": "http://mlcommons.org/croissant/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dct": "http://purl.org/dc/terms/"
}
],
"@graph": [
{
"@id": "ro-crate-metadata.json",
"@type": "CreativeWork",
"conformsTo": {
"@id": "https://w3id.org/ro/crate/1.1"
},
"about": {
"@id": "./"
}
},
{
"@id": "./",
"@type": "Dataset",
"name": "BenchmarkSet1500 aggregated AI Ready Dataset",
"description": "High-Accuracy Excited-State Reference Benchmark Dataset for Organic Semiconductors. The BenchmarkSet1500 resource theme provides a dataset of multireference excited states for 1500 small organic semiconductors, alongside a Python-based workflow used to generate the associated high-level excited-state calculations. It is designed for researchers in organic electronics and data-driven chemistry who require reliable and reproducible excited-state data, as well as those developing machine learning models or screening pipelines. By combining standardised computational workflows with multi-level electronic structure methods (TD-DFT, CASSCF, NEVPT2), the resource enables reproducible data generation and delivers an AI-ready dataset suitable for structure-property analysis, direct quantum chemistry method comparison, and molecular design.",
"license": "CC-BY-4.0 (https://spdx.org/licenses/CC-BY-4.0.html)",
"url": "https://data-collections.psdi.ac.uk/records/mktrj-smy12/latest",
"version": "1.0.0",
"datePublished": "2026-04-17T00:00:00Z",
"creator": [
{
"@id": "https://orcid.org/0000-0002-0371-4047"
},
{
"@id": "https://orcid.org/0009-0000-9662-0869"
}
],
"mainEntity": {
"@id": "./BenchmarkSet1500.csv"
},
"hasPart": [
{
"@id": "./BenchmarkSet1500.csv"
}
]
},
{
"@id": "https://orcid.org/0000-0002-0371-4047",
"@type": "Person",
"name": "Tahereh Nematiaram"
},
{
"@id": "https://orcid.org/0009-0000-9662-0869",
"@type": "Person",
"name": "Malin Zollner"
},
{
"@id": "./BenchmarkSet1500.csv",
"@type": "File",
"name": "BenchmarkSet1500.csv",
"description": "Multireference excited-state data file which contains aggregated data for organic semiconductors. Each row corresponds to a single molecule. Columns include molecular identifiers (CCDC ID, SMILES, InChI, formula, number of atoms, CCDC URL, DOI) and SA-CASSCF and NEVPT2 computed excited-state energies (S1, S2, T1, T2) and oscillator strengths (f1, f2).",
"encodingFormat": "text/csv"
}
]
}