{
"@context": {
"@language": "en",
"@vocab": "https://schema.org/",
"sc": "https://schema.org/",
"cr": "http://mlcommons.org/croissant/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dct": "http://purl.org/dc/terms/",
"citeAs": "cr:citeAs",
"column": "cr:column",
"conformsTo": "dct:conformsTo",
"data": {
"@id": "cr:data",
"@type": "@json"
},
"dataType": {
"@id": "cr:dataType",
"@type": "@vocab"
},
"examples": {
"@id": "cr:examples",
"@type": "@json"
},
"extract": "cr:extract",
"field": "cr:field",
"fileProperty": "cr:fileProperty",
"fileObject": "cr:fileObject",
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "cr:key",
"md5": "cr:md5",
"parentField": "cr:parentField",
"path": "cr:path",
"recordSet": "cr:recordSet",
"references": "cr:references",
"regex": "cr:regex",
"repeated": "cr:repeated",
"replace": "cr:replace",
"samplingRate": "cr:samplingRate",
"separator": "cr:separator",
"source": "cr:source",
"subField": "cr:subField",
"transform": "cr:transform"
},
"@type": "sc:Dataset",
"conformsTo": "http://mlcommons.org/croissant/1.0",
"name": "MP_BP_records",
"description": "These two files contain the exported melting points and boiling point records from PChProp version 1 provided by the Physical Science Data Infrastructure project. The records file contias the data and molecular IDs, the compounds file contains further molecular information.",
"license": "https://spdx.org/licenses/CC-BY-4.0.html",
"isLiveDataset": false,
"distribution": [
{
"@type": "cr:FileObject",
"@id": "MP_BP_records.csv",
"name": "MP_BP_records.csv",
"contentUrl": "MP_BP_records.csv",
"encodingFormat": "text/csv",
"sha256": "91bcd3f16910fc31b0995fee8f41fb0b45be4f440ecfa3588764e10d4ec0af16"
},
{
"@type": "cr:FileObject",
"@id": "MP_BP_compounds.csv",
"name": "MP_BP_compounds.csv",
"contentUrl": "MP_BP_compounds.csv",
"encodingFormat": "text/csv",
"sha256": "7cf6b8182ad3a49dc0d6eaffd67093a755eb3975d8297de8d056b37fb64e6f42"
}
],
"recordSet": [
{
"@type": "cr:RecordSet",
"@id": "MP_BP_records",
"name": "MP_BP_records",
"description": "This file contains the melting point and boiling point information for the compounds within PChProp version 1 that have both boiling point and melting",
"field": [
{
"@type": "cr:Field",
"@id": "MP_BP_records/PSDI",
"name": "PSDI",
"description": "Stable identifier for the entry in PChProp. Use as the primary key for joining, deduplication, and dataset indexing",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "PSDI"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_records/InChi",
"name": "InChi",
"description": "IUPAC International Chemical Identifier for the structure. Recommended canonical identity string for deduplication and joining across datasets. Use InChIKey for indexing",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "InChi"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_records/Melting_Point_C",
"name": "Melting Point (C)",
"description": "Temperature at which a pure substance transitions between solid and liquid at the specified pressure and (if relevant) composition. Values are condition-dependent; do not treat multiple values as duplicates without checking conditions and measurement basis",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "Melting Point (C)"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_records/Boiling_Point_C",
"name": "Boiling Point (C)",
"description": "Temperature at which a liquid\u2019s vapour pressure equals the specified external pressure. Strongly pressure-dependent; pressure should be captured for comparability",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "Boiling Point (C)"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_records/Source",
"name": "Source",
"description": "The originating data file for this data",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "Source"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_records/Citation",
"name": "Citation",
"description": "The citation text provided by the originating paper or location for that data point",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "Citation"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_records/Master_Version",
"name": "Master Version",
"description": "Version number of the originating data collection. This is for internal use to understand the upload history of the data into the data collection",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_records.csv"
},
"extract": {
"column": "Master Version"
}
}
}
]
},
{
"@type": "cr:RecordSet",
"@id": "MP_BP_compounds",
"name": "MP_BP_compounds",
"description": "Records extracted from the CSV file, with their schema.",
"field": [
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/PSDI_ID",
"name": "PSDI_ID",
"description": "Stable identifier for the entry in PChProp. Use as the primary key for joining, deduplication, and dataset indexing.",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "PSDI_ID"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Canonical_name",
"name": "Canonical name",
"description": "The canonical name of the compounds. This col is empty as this data was not exported from PChProp",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Canonical name"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/InChi",
"name": "InChi",
"description": "IUPAC International Chemical Identifier for the structure. Recommended canonical identity string for deduplication and joining across datasets. Use InChIKey for indexing",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "InChi"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/InChiKey",
"name": "InChiKey",
"description": "Hashed InChI identifier. Useful for indexing/joining. Not reversible; keep InChI if you need structure reconstruction",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "InChiKey"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/SMILES",
"name": "SMILES",
"description": "SMILES string representing the structure. May not be canonical unless your pipeline enforces canonicalisation; treat as a representation, not guaranteed unique identity",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "SMILES"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Tautomers",
"name": "Tautomers",
"description": "Number of enumerated tautomers under the calculation method used. Counts depend on enumeration rules; calcualted by RDKit",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Tautomers"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Isomers",
"name": "Isomers",
"description": "Number of enumerated isomers under the calculation method used. Interpretation depends on whether stereochemistry/tautomerism is included; calcualted by RDKit",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Isomers"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/n_MeltingPoint",
"name": "n_MeltingPoint",
"description": "Number of melting point entries for that compound within PChProp",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "n_MeltingPoint"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/n_BoilingPoint",
"name": "n_BoilingPoint",
"description": "Number of boiling point entries for that compound within PChProp",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "n_BoilingPoint"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/n_HLC",
"name": "n_HLC",
"description": "Number of henry's law constant entries for that compound within PChProp",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "n_HLC"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/n_LogS",
"name": "n_LogS",
"description": "Number of solubility entries for that compound within PChProp",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "n_LogS"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/n_Miscibility",
"name": "n_Miscibility",
"description": "Number of miscibility entries for that compound within PChPro",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "n_Miscibility"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Molecular_Weight",
"name": "Molecular Weight",
"description": "Molecular weight of the compound",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Molecular Weight"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/CLogP",
"name": "CLogP",
"description": "Calculated octanol/water partition coefficient on a log10 scale. Calcuakted by RDKit",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "CLogP"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Heavy_Atom_Count",
"name": "Heavy Atom Count",
"description": "Count of non-hydrogen atoms in the structure. Deterministic given a defined structure representation",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Heavy Atom Count"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Hydrogen_Bond_Acceptors",
"name": "Hydrogen Bond Acceptors",
"description": "Number of hydrogen bond acceptor sites as defined by RDKit",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Hydrogen Bond Acceptors"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Hydrogen_Bond_Donors",
"name": "Hydrogen Bond Donors",
"description": "Number of hydrogen bond donor sites as defined by RDKit",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Hydrogen Bond Donors"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Rotatable_Bonds",
"name": "Rotatable Bonds",
"description": "Number of rotatable bonds as calculated by RDKit",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Rotatable Bonds"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Rings",
"name": "Rings",
"description": "Total ring count under the ring perception model used as calcualted by RDKit",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Rings"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Hetero_Aromatic_Rings",
"name": "Hetero Aromatic Rings",
"description": "Count of aromatic rings containing heteroatoms under the method\u2019s aromaticity model",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Hetero Aromatic Rings"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Aromatic_Rings",
"name": "Aromatic Rings",
"description": "Count of aromatic rings under the method\u2019s aromaticity model",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Aromatic Rings"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Topological_Polar_Surface_Area",
"name": "Topological Polar Surface Area",
"description": "Topological polar surface area in square \u00e5ngstr\u00f6ms",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Topological Polar Surface Area"
}
}
},
{
"@type": "cr:Field",
"@id": "MP_BP_compounds/Quantitative_Estimation_of_Drug-likeness",
"name": "Quantitative Estimation of Drug-likeness",
"description": "Quantitative estimation of drug-likeness score",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "MP_BP_compounds.csv"
},
"extract": {
"column": "Quantitative Estimation of Drug-likeness"
}
}
}
]
}
],
"url": "https://resources.psdi.ac.uk/data/31dcd324-1574-4eeb-ba17-e8c2e9b4e0f2",
"version": "1.0",
"citeAs": "Jeremy G. Frey, Samantha Pearman-Kanza, Joshua Cheung, Joanna Grundy and Matthew Partridge. Physical Chemistry Properties Data Collection. Online. 20 October 2024",
"inLanguage": "en-GB",
"dateCreated": "2025-10-20",
"dateModified": "2025-10-20",
"datePublished": "2025-04-23",
"publisher": [
{
"@type": "sc:Organization",
"name": "Physical Science Data Infrastructure"
}
],
"creator": [
{
"@type": "sc:Person",
"name": "Matthew Partridge"
}
],
"contactPoint": [
{
"@type": "sc:ContactPoint",
"email": "m.c.partridge@soton.ac.uk"
}
]
}