Preview

{
    "@context": {
        "@language": "en",
        "@vocab": "https://schema.org/",
        "citeAs": "cr:citeAs",
        "column": "cr:column",
        "conformsTo": "dct:conformsTo",
        "cr": "http://mlcommons.org/croissant/",
        "rai": "http://mlcommons.org/croissant/RAI/",
        "data": {
            "@id": "cr:data",
            "@type": "@json"
        },
        "datePublished": "sc:datePublished",
        "dataType": {
            "@id": "cr:dataType",
            "@type": "@vocab"
        },
        "dct": "http://purl.org/dc/terms/",
        "examples": {
            "@id": "cr:examples",
            "@type": "@json"
        },
        "extract": "cr:extract",
        "field": "cr:field",
        "fileProperty": "cr:fileProperty",
        "fileObject": "cr:fileObject",
        "fileSet": "cr:fileSet",
        "format": "cr:format",
        "includes": "cr:includes",
        "isLiveDataset": "cr:isLiveDataset",
        "jsonPath": "cr:jsonPath",
        "key": "cr:key",
        "md5": "cr:md5",
        "parentField": "cr:parentField",
        "path": "cr:path",
        "recordSet": "cr:recordSet",
        "references": "cr:references",
        "regex": "cr:regex",
        "repeated": "cr:repeated",
        "replace": "cr:replace",
        "sc": "https://schema.org/",
        "separator": "cr:separator",
        "source": "cr:source",
        "subField": "cr:subField",
        "transform": "cr:transform",
        "wd": "https://www.wikidata.org/wiki/"
    },
    "@type": "sc:Dataset",
    "name": "Testing, Training and Validation Synthetic Dataset of Transmission Electron Microscopy (TEM) Images of Gold Nano-particles for Segmentation",
    "description": "This is a dataset of Transmission Electron Microscopy (TEM) images of gold nano-particles and comprises synthetic data (generated using TEMPOS) mimicking experimental TEM images (from Professor Lee Cronin, University of Glasgow, https://doi.org/10.1038/s41467-020-16501-4) for segmentation. This data has been split into test and training sets. A validation set of experimental images with manual annotations has also been included. This croissant file was heavily inspired by https://github.com/mlcommons/croissant/tree/828034a45d5c536789c7f6311d4c4a68f7804129/datasets/1.0/coco2014",
    "conformsTo": "http://mlcommons.org/croissant/1.0",
    "citeAs": "Andrew Stewart and Natalia Da Silva De Sa. Dataset. 2026. Testing and Training Synthetic Dataset of Transmission Electron Microscopy (TEM) Images of Gold Nano-particles for Segmentation. [accessed YYYY-MM-DD].",
    "creator": [
        {
            "@type": "sc:Person",
            "name": "Andrew Stewart",
            "url": "https://orcid.org/0000-0002-3081-5644"
        },
        {
            "@type": "sc:Person",
            "name": "Natalia Da Silva De Sa",
            "url": "https://orcid.org/0009-0005-2972-9975"
        }
    ],
    "datePublished": "2026-04-01T00:00:00Z",
    "license": "CC-BY-4.0 (https://spdx.org/licenses/CC-BY-4.0.html)",
    "url": "https://data-collections.psdi.ac.uk/communities/ai-ready-datasets/RECORDFORTHISDATASET",
    "version": "1.0.0",
    "rai:dataCollection": "Experimental TEM images of gold nanoparticles from Professor Lee Cronin, University of Glasgow (https://doi.org/10.1038/s41467-020-16501-4) are included in data set for validation. A synthetic dataset has been generated using the TEMPOS software, to look visually similar to this corresponding experimental images and to be used for training and testing. TEMPOS generates synthetic TEM images by procedurally creating randomized nanoparticle scenes and applying image transformations to mimic imaging effects, while simultaneously producing pixel-perfect annotations from the known ground-truth geometry.",
    "rai:dataCollectionType": [
        "Synthetic",
        "Experimental"
    ],
    "rai:dataCollectionMissingData": "Not applicable",
    "rai:dataCollectionRawData": "Transmission Electron Microscopy (TEM) image files (.png format)",
    "rai:dataAnnotationProtocol": "Annotations are segmentation results. For experimental validation dataset, manual annotations have been performed.",
    "rai:dataAnnotationPlatform": "See machineAnnotationTools for details",
    "rai:dataAnnotationAnalysis": "Not applicable",
    "rai:annotatorDemographics": "Single annotator - demographics not relevant",
    "rai:machineAnnotationTools": "For experimental validation dataset, the VIA annotator was used to make the segmentation annotations https://www.robots.ox.ac.uk/~vgg/software/via/). Synthetic datasets have been annotated with MASKRCNN.",
    "rai:annotationsPerItem": "Multiple segmentation annotations per TEM image (depending on number of particles)",
    "rai:dataPreprocessingProtocol": [
        "Not applicable"
    ],
    "rai:dataManipulationProtocol": "Split of TEM images into training and test sets was random (with overall percentage split set to 80:20).",
    "rai:dataImputationProtocol": "Not applicable",
    "rai:dataUseCases": [
        "Training, test and validation datasets for segmentation using machine learning models. Initial segmentation has been done using MASKRCNN but other machine learning models can be compared to this."
    ],
    "rai:dataBiases": [
        "Not applicable"
    ],
    "rai:personalSensitiveInformation": [
        "No personal or sensitive information is included in the data."
    ],
    "rai:dataSocialImpact": "Not applicable",
    "rai:dataLimitations": [
        "Not applicable"
    ],
    "rai:dataReleaseMaintenancePlan": "The data are being released as a one off with no immediate plans for revisions.",
    "distribution": [
        {
            "@type": "cr:FileObject",
            "@id": "images.zip",
            "name": "images.zip",
            "description": "zip file containing TEM png image files, grouped into test/train/val folders.",
            "contentSize": "925602797 B",
            "contentUrl": "./images.zip",
            "encodingFormat": "application/zip",
            "sha256": "114b8a1da4ce185acfc080c43d9c47674fd90d7d4da25748f128694977accf19"
        },
        {
            "@type": "cr:FileSet",
            "@id": "image-files",
            "name": "image-files",
            "description": "Croissant FileSet including all TEM png image files within the images.zip file.",
            "containedIn": [
                {
                    "@id": "images.zip"
                }
            ],
            "encodingFormat": "image/png",
            "includes": "*.png"
        },
        {
            "@type": "cr:FileObject",
            "@id": "instances_annotations_train_fileobject",
            "name": "instances_annotations_train.json",
            "description": "Metadata for training dataset and each image file along with segmentation annotations for each image file in COCO format (https://cocodataset.org/#home, very common for instance segmentation).",
            "contentSize": "295688609 B",
            "contentUrl": "./instances_annotations_train.json",
            "encodingFormat": "application/json",
            "sha256": "40bfbaa00fde164b8b6ff511d3a4349a9ea09225bd3bf222ee8d815cac7c12af"
        },
        {
            "@type": "cr:FileObject",
            "@id": "instances_annotations_test_fileobject",
            "name": "instances_annotations_test.json",
            "description": "Metadata for test dataset and each image file along with segmentation annotations for each image file in COCO format (https://cocodataset.org/#home, very common for instance segmentation).",
            "contentSize": "29298269 B",
            "contentUrl": "./instances_annotations_test.json",
            "encodingFormat": "application/json",
            "sha256": "4be81e0276e0f4603b3cab5f51c0b2e52a6b1e9ea746daf334f9e3efbf4e959e"
        },
        {
            "@type": "cr:FileObject",
            "@id": "instances_annotations_val_fileobject",
            "name": "instances_annotations_val.json",
            "description": "Metadata for experimental validation dataset and each image file along with segmentation annotations for each image file in COCO format (https://cocodataset.org/#home, very common for instance segmentation).",
            "contentSize": "1058576 B",
            "contentUrl": "./instances_annotations_val.json",
            "encodingFormat": "application/json",
            "sha256": "8afa90ddf63195b1ef6d6c74f8da1ecfe716b6af70bde1c855e1cc45c2fc0f94"
        },
        {
            "@type": "cr:FileSet",
            "@id": "instances_annotations_annotations_fileset",
            "name": "instances_annotations_annotations_fileset",
            "description": "Croissant FileSet including all instances_annotations.json metadata files for training, test and validation datasets. Three separate filesets are required - a separate one for each of the recordsets that are read from them since these have different lengths to each other.",
            "cr:FileObject": [
                {
                    "@id": "instances_annotations_val_fileobject"
                }
            ],
            "encodingFormat": "application/json",
            "includes": "*.json"
        },
        {
            "@type": "cr:FileSet",
            "@id": "instances_annotations_categories_fileset",
            "name": "instances_annotations_categories_fileset",
            "description": "Croissant FileSet including all instances_annotations.json metadata files for training, test and validation datasets. Three separate filesets are required - a separate one for each of the recordsets that are read from them since these have different lengths to each other.",
            "cr:FileObject": [
                {
                    "@id": "instances_annotations_val_fileobject"
                }
            ],
            "encodingFormat": "application/json",
            "includes": "*.json"
        },
        {
            "@type": "cr:FileSet",
            "@id": "instances_annotations_images_fileset",
            "name": "instances_annotations_images_fileset",
            "description": "Croissant FileSet including all instances_annotations.json metadata files for training, test and validation datasets. Three separate filesets are required - a separate one for each of the recordsets that are read from them since these have different lengths to each other.",
            "cr:FileObject": [
                {
                    "@id": "instances_annotations_val_fileobject"
                }
            ],
            "encodingFormat": "application/json",
            "includes": "*.json"
        },
        {
            "@type": "cr:FileObject",
            "@id": "val_binary_masks.zip",
            "name": "val_binary_masks.zip",
            "description": "Zip file containing binary masks file image (.png) files for the experimental validation dataset. These are very used for U-NET models (popular models in the microscopy field).",
            "contentSize": "961494 B",
            "contentUrl": "./val_binary_masks.zip",
            "encodingFormat": "application/zip",
            "sha256": "4fa3a6afbed358fbdbb61068c95a38aace802cb354147a46e047ef6efc7b23a5"
        }
    ],
    "recordSet": [
        {
            "@type": "cr:RecordSet",
            "@id": "split_enums",
            "name": "split_enums",
            "description": "Maps split names to semantic values.",
            "key": {
                "@id": "name"
            },
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "split_enums/name",
                    "name": "split_enums/name",
                    "description": "One of: train, val, test.",
                    "dataType": "sc:Text"
                },
                {
                    "@type": "cr:Field",
                    "@id": "split_enums/url",
                    "name": "split_enums/url",
                    "description": "Corresponding mlcommons.org definition URL",
                    "dataType": [
                        "sc:URL",
                        "wd:Q3985153"
                    ]
                }
            ],
            "data": [
                {
                    "split_enums/name": "train",
                    "split_enums/url": "https://mlcommons.org/definitions/training_split"
                },
                {
                    "split_enums/name": "val",
                    "split_enums/url": "https://mlcommons.org/definitions/validation_split"
                },
                {
                    "split_enums/name": "test",
                    "split_enums/url": "https://mlcommons.org/definitions/test_split"
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "images",
            "name": "images",
            "key": {
                "@id": "images/image_filename"
            },
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "images/image_filename",
                    "name": "images/image_filename",
                    "description": "The filename of the image. eg: 1.png",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "image-files"
                        },
                        "extract": {
                            "fileProperty": "filename"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "images/image_content",
                    "name": "images/image_content",
                    "description": "The content of the image.",
                    "dataType": "sc:ImageObject",
                    "source": {
                        "fileSet": {
                            "@id": "image-files"
                        },
                        "extract": {
                            "fileProperty": "content"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "images/split",
                    "name": "images/split",
                    "description": "Split of the dataset into enumerated values test/train/val",
                    "dataType": [
                        "sc:Text",
                        "wd:Q3985153"
                    ],
                    "references": {
                        "field": {
                            "@id": "split_enums/name"
                        }
                    },
                    "source": {
                        "fileSet": {
                            "@id": "image-files"
                        },
                        "extract": {
                            "fileProperty": "fullpath"
                        },
                        "transform": {
                            "regex": "^\\\\(test|train|val)\\\\.+.png$"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "instances_annotations_images_recordset",
            "name": "instances_annotations_images_recordset",
            "key": [
                {
                    "@id": "instances_annotations_images_recordset/id"
                }
            ],
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_images_recordset/split",
                    "name": "instances_annotations_images_recordset/split",
                    "description": "Split of the dataset into enumerated values test/train/val",
                    "dataType": [
                        "sc:Text",
                        "wd:Q3985153"
                    ],
                    "references": {
                        "field": {
                            "@id": "split_enums/name"
                        }
                    },
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_images_fileset"
                        },
                        "extract": {
                            "fileProperty": "filename"
                        },
                        "transform": {
                            "regex": "^instances_annotations_(test|train|val).json$"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_images_recordset/id",
                    "name": "Image ID",
                    "description": "The ID of the image.",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_images_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.images[*].id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_images_recordset/file_name",
                    "name": "Image filename",
                    "description": "Filename of TEM image file.",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_images_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.images[*].file_name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_images_recordset/images/width",
                    "name": "Image width",
                    "description": "Width of TEM image file in pixels",
                    "dataType": [
                        "sc:Integer"
                    ],
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_images_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.images[*].width"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_images_recordset/images/height",
                    "name": "Image height",
                    "description": "Height of TEM image file in pixels",
                    "dataType": [
                        "sc:Integer"
                    ],
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_images_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.images[*].height"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "instances_annotations_annotations_recordset",
            "name": "instances_annotations_annotations_recordset",
            "key": [
                {
                    "@id": "instances_annotations_annotations_recordset/image_id"
                },
                {
                    "@id": "instances_annotations_annotations_recordset/category_id"
                }
            ],
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/split",
                    "name": "instances_annotations_annotations_recordset/split",
                    "description": "Split of the dataset into enumerated values test/train/val",
                    "dataType": [
                        "sc:Text",
                        "wd:Q3985153"
                    ],
                    "references": {
                        "field": {
                            "@id": "split_enums/name"
                        }
                    },
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "fileProperty": "filename"
                        },
                        "transform": {
                            "regex": "^instances_annotations_(test|train|val).json$"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/id",
                    "name": "Annotation ID",
                    "description": "The ID of the annotation.",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/image_id",
                    "name": "Image ID",
                    "description": "The ID of the image.",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].image_id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/category_id",
                    "name": "Category ID",
                    "description": "The ID of the annotation category.",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].category_id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/iscrowd",
                    "name": "Is Crowd",
                    "description": "A binary parameter used to indicate whether the segmentation annotation is an individual object instance (0) or a group or cluster of objects (1).",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].iscrowd"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/segmentation",
                    "name": "Segmentation",
                    "description": "Segmentation annotations of each TEM image file",
                    "dataType": [
                        "sc:Integer",
                        "cr:SegmentationMask",
                        "sc:GeoShape"
                    ],
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].segmentation"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/bbox",
                    "name": "Bounding box",
                    "description": "The bounding box around annotated object[s].",
                    "dataType": "cr:BoundingBox",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].bbox"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/area",
                    "name": "Segmented Area",
                    "description": "The area of the segmented area.",
                    "dataType": "sc:Float",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].area"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_annotations_recordset/bbox_mode",
                    "name": "Bounding box mode",
                    "description": "The ID of the bounding box mode.",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_annotations_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.annotations[*].bbox_mode"
                        }
                    }
                }
            ]
        },
        {
            "@type": "cr:RecordSet",
            "@id": "instances_annotations_categories_recordset",
            "name": "instances_annotations_categories_recordset",
            "key": {
                "@id": "instances_annotations_categories_recordset/id"
            },
            "field": [
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_categories_recordset/split",
                    "name": "instances_annotations_categories_recordset/split",
                    "description": "Split of the dataset into enumerated values test/train/val",
                    "dataType": [
                        "sc:Text",
                        "wd:Q3985153"
                    ],
                    "references": {
                        "field": {
                            "@id": "split_enums/name"
                        }
                    },
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_categories_fileset"
                        },
                        "extract": {
                            "fileProperty": "filename"
                        },
                        "transform": {
                            "regex": "^instances_annotations_(test|train|val).json$"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_categories_recordset/id",
                    "name": "Category ID",
                    "description": "The ID of the annotation category.",
                    "dataType": "sc:Integer",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_categories_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.categories[*].id"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_categories_recordset/name",
                    "name": "Category name",
                    "description": "The name of the annotation category.",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_categories_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.categories[*].name"
                        }
                    }
                },
                {
                    "@type": "cr:Field",
                    "@id": "instances_annotations_categories_recordset/supercategory",
                    "name": "Supercategory name",
                    "description": "The name of the supercategory of the segmentation annotation category.",
                    "dataType": "sc:Text",
                    "source": {
                        "fileSet": {
                            "@id": "instances_annotations_categories_fileset"
                        },
                        "extract": {
                            "jsonPath": "$.categories[*].supercategory"
                        }
                    }
                }
            ]
        }
    ]
}