Skip to content

API Reference

Datastore

oqd_dataschema.datastore

Datastore

Bases: BaseModel

Saves the model and its associated data to an HDF5 file. This method serializes the model's data and attributes into an HDF5 file at the specified filepath.

Attributes:

Name Type Description
filepath Path

The path to the HDF5 file where the model data will be saved.

Source code in oqd-dataschema/src/oqd_dataschema/datastore.py
class Datastore(BaseModel):
    """
    Saves the model and its associated data to an HDF5 file.
    This method serializes the model's data and attributes into an HDF5 file
    at the specified filepath.

    Attributes:
        filepath (pathlib.Path): The path to the HDF5 file where the model data will be saved.
    """

    groups: dict[str, GroupSubtypes]

    def model_dump_hdf5(self, filepath: pathlib.Path):
        """
        Saves the model and its associated data to an HDF5 file.
        This method serializes the model's data and attributes into an HDF5 file
        at the specified filepath.

        Args:
            filepath (pathlib.Path): The path to the HDF5 file where the model data will be saved.
        """
        filepath.parent.mkdir(exist_ok=True, parents=True)

        with h5py.File(filepath, "a") as f:
            f.attrs["model"] = self.model_dump_json()
            for gkey, group in self.groups.items():
                if gkey in f.keys():
                    del f[gkey]
                h5_group = f.create_group(gkey)
                for akey, attr in group.attrs.items():
                    h5_group.attrs[akey] = attr

                for dkey, dataset in group.__dict__.items():
                    if not isinstance(dataset, Dataset):
                        continue
                    h5_dataset = h5_group.create_dataset(dkey, data=dataset.data)
                    for akey, attr in dataset.attrs.items():
                        h5_dataset.attrs[akey] = attr

    @classmethod
    def model_validate_hdf5(cls, filepath: pathlib.Path):
        """
        Loads the model from an HDF5 file at the specified filepath.

        Args:
            filepath (pathlib.Path): The path to the HDF5 file where the model data will be read and validated from.
        """
        with h5py.File(filepath, "r") as f:
            self = cls.model_validate_json(f.attrs["model"])
            for gkey, group in self.groups.items():
                for dkey, val in group.__dict__.items():
                    if dkey == "attrs":
                        continue
                    group.__dict__[dkey].data = np.array(f[gkey][dkey][()])
            return self
model_dump_hdf5(filepath: pathlib.Path)

Saves the model and its associated data to an HDF5 file. This method serializes the model's data and attributes into an HDF5 file at the specified filepath.

Parameters:

Name Type Description Default
filepath Path

The path to the HDF5 file where the model data will be saved.

required
Source code in oqd-dataschema/src/oqd_dataschema/datastore.py
def model_dump_hdf5(self, filepath: pathlib.Path):
    """
    Saves the model and its associated data to an HDF5 file.
    This method serializes the model's data and attributes into an HDF5 file
    at the specified filepath.

    Args:
        filepath (pathlib.Path): The path to the HDF5 file where the model data will be saved.
    """
    filepath.parent.mkdir(exist_ok=True, parents=True)

    with h5py.File(filepath, "a") as f:
        f.attrs["model"] = self.model_dump_json()
        for gkey, group in self.groups.items():
            if gkey in f.keys():
                del f[gkey]
            h5_group = f.create_group(gkey)
            for akey, attr in group.attrs.items():
                h5_group.attrs[akey] = attr

            for dkey, dataset in group.__dict__.items():
                if not isinstance(dataset, Dataset):
                    continue
                h5_dataset = h5_group.create_dataset(dkey, data=dataset.data)
                for akey, attr in dataset.attrs.items():
                    h5_dataset.attrs[akey] = attr
model_validate_hdf5(filepath: pathlib.Path) classmethod

Loads the model from an HDF5 file at the specified filepath.

Parameters:

Name Type Description Default
filepath Path

The path to the HDF5 file where the model data will be read and validated from.

required
Source code in oqd-dataschema/src/oqd_dataschema/datastore.py
@classmethod
def model_validate_hdf5(cls, filepath: pathlib.Path):
    """
    Loads the model from an HDF5 file at the specified filepath.

    Args:
        filepath (pathlib.Path): The path to the HDF5 file where the model data will be read and validated from.
    """
    with h5py.File(filepath, "r") as f:
        self = cls.model_validate_json(f.attrs["model"])
        for gkey, group in self.groups.items():
            for dkey, val in group.__dict__.items():
                if dkey == "attrs":
                    continue
                group.__dict__[dkey].data = np.array(f[gkey][dkey][()])
        return self

Base HDF5 Objects

oqd_dataschema.base

Group

Bases: BaseModel

Schema representation for a group object within an HDF5 file.

Each grouping of data should be defined as a subclass of Group, and specify the datasets that it will contain. This base object only has attributes, attrs, which are associated to the HDF5 group.

Attributes:

Name Type Description
attrs Optional[dict[str, Union[int, float, str, complex]]]

A dictionary of attributes to append to the dataset.

Example
group = Group(attrs={'version': 2, 'date': '2025-01-01'})
Source code in oqd-dataschema/src/oqd_dataschema/base.py
class Group(BaseModel):
    """
    Schema representation for a group object within an HDF5 file.

    Each grouping of data should be defined as a subclass of `Group`, and specify the datasets that it will contain.
    This base object only has attributes, `attrs`, which are associated to the HDF5 group.

    Attributes:
        attrs: A dictionary of attributes to append to the dataset.

    Example:
        ```
        group = Group(attrs={'version': 2, 'date': '2025-01-01'})
        ```
    """

    attrs: Optional[dict[str, Union[int, float, str, complex]]] = {}

Dataset

Bases: BaseModel

Schema representation for a dataset object to be saved within an HDF5 file.

Attributes:

Name Type Description
dtype Optional[Literal[tuple(keys())]]

The datatype of the dataset, such as int32, float32, int64, float64, etc. Types are inferred from the data attribute if provided.

shape Optional[tuple[int, ...]]

The shape of the dataset.

data Optional[Any]

The numpy ndarray of the data, from which dtype and shape are inferred.

attrs Optional[dict[str, Union[int, float, str, complex]]]

A dictionary of attributes to append to the dataset.

Example
dataset = Dataset(data=np.array([1, 2, 3, 4]))

dataset = Dataset(dtype='int64', shape=[4,])
dataset.data = np.array([1, 2, 3, 4])
Source code in oqd-dataschema/src/oqd_dataschema/base.py
class Dataset(BaseModel):
    """
    Schema representation for a dataset object to be saved within an HDF5 file.

    Attributes:
        dtype: The datatype of the dataset, such as `int32`, `float32`, `int64`, `float64`, etc.
            Types are inferred from the `data` attribute if provided.
        shape: The shape of the dataset.
        data: The numpy ndarray of the data, from which `dtype` and `shape` are inferred.

        attrs: A dictionary of attributes to append to the dataset.

    Example:
        ```
        dataset = Dataset(data=np.array([1, 2, 3, 4]))

        dataset = Dataset(dtype='int64', shape=[4,])
        dataset.data = np.array([1, 2, 3, 4])
        ```
    """

    dtype: Optional[Literal[tuple(mapping.keys())]] = None
    shape: Optional[tuple[int, ...]] = None
    data: Optional[Any] = Field(default=None, exclude=True)

    attrs: Optional[dict[str, Union[int, float, str, complex]]] = {}

    model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)

    @model_validator(mode="before")
    @classmethod
    def validate_and_update(cls, values: dict):
        data = values.get("data")
        dtype = values.get("dtype")
        shape = values.get("shape")

        if data is None and (dtype is not None and shape is not None):
            return values

        elif data is not None and (dtype is None and shape is None):
            if not isinstance(data, np.ndarray):
                raise TypeError("`data` must be a numpy.ndarray.")

            if data.dtype not in mapping.values():
                raise TypeError(
                    f"`data` must be a numpy array of dtype in {tuple(mapping.keys())}."
                )

            values["dtype"] = mapping.inverse[data.dtype]
            values["shape"] = data.shape

        return values

        # else:
        #     assert data.dtype == dtype and data.shape == shape

        # else:
        #     raise ValueError("Must provide either `dtype` and `shape` or `data`.")

    @model_validator(mode="after")
    def validate_data_matches_shape_dtype(self):
        """Ensure that `data` matches `dtype` and `shape`."""
        if self.data is not None:
            expected_dtype = mapping[self.dtype]
            if self.data.dtype != expected_dtype:
                raise ValueError(
                    f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`."
                )
            if self.data.shape != self.shape:
                raise ValueError(
                    f"Expected shape {self.shape}, but got {self.data.shape}."
                )
        return self
validate_data_matches_shape_dtype()

Ensure that data matches dtype and shape.

Source code in oqd-dataschema/src/oqd_dataschema/base.py
@model_validator(mode="after")
def validate_data_matches_shape_dtype(self):
    """Ensure that `data` matches `dtype` and `shape`."""
    if self.data is not None:
        expected_dtype = mapping[self.dtype]
        if self.data.dtype != expected_dtype:
            raise ValueError(
                f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`."
            )
        if self.data.shape != self.shape:
            raise ValueError(
                f"Expected shape {self.shape}, but got {self.data.shape}."
            )
    return self

Specified Groups

oqd_dataschema.groups

SinaraRawDataGroup

Bases: Group

Example Group for raw data from the Sinara real-time control system. This is a placeholder for demonstration and development.

Source code in oqd-dataschema/src/oqd_dataschema/groups.py
class SinaraRawDataGroup(Group):
    """
    Example `Group` for raw data from the Sinara real-time control system.
    This is a placeholder for demonstration and development.
    """

    camera_images: Dataset

MeasurementOutcomesDataGroup

Bases: Group

Example Group for processed data classifying the readout of the state. This is a placeholder for demonstration and development.

Source code in oqd-dataschema/src/oqd_dataschema/groups.py
class MeasurementOutcomesDataGroup(Group):
    """
    Example `Group` for processed data classifying the readout of the state.
    This is a placeholder for demonstration and development.
    """

    outcomes: Dataset

ExpectationValueDataGroup

Bases: Group

Example Group for processed data calculating the expectation values. This is a placeholder for demonstration and development.

Source code in oqd-dataschema/src/oqd_dataschema/groups.py
class ExpectationValueDataGroup(Group):
    """
    Example `Group` for processed data calculating the expectation values.
    This is a placeholder for demonstration and development.
    """

    expectation_value: Dataset