Skip to content

Group Field

oqd_dataschema.base

GroupField

Bases: BaseModel, ABC

Abstract class for a valid data field of Group.

Attributes:

Name Type Description
attrs Attrs

A dictionary of attributes to append to the object.

Source code in oqd-dataschema/src/oqd_dataschema/base.py
class GroupField(BaseModel, ABC):
    """
    Abstract class for a valid data field of Group.

    Attributes:
        attrs: A dictionary of attributes to append to the object.
    """

    attrs: Attrs = Field(default_factory=lambda: {})

    @classmethod
    def _is_supported_type(cls, type_):
        return type_ == cls or (
            typing.get_origin(type_) is Annotated and type_.__origin__ is cls
        )

    @abstractmethod
    def _handle_data_dump(self, data: np.ndarray) -> np.ndarray:
        """Hook into [Datastore.model_dump_hdf5][oqd_dataschema.datastore.Datastore.model_dump_hdf5] for compatibility mapping to HDF5."""
        pass

    @abstractmethod
    def _handle_data_load(self, data: np.ndarray) -> np.ndarray:
        """Hook into [Datastore.model_validate_hdf5][oqd_dataschema.datastore.Datastore.model_validate_hdf5] for reversing compatibility mapping, i.e. mapping data back to original type."""
        pass
_handle_data_dump(data: np.ndarray) -> np.ndarray abstractmethod

Hook into Datastore.model_dump_hdf5 for compatibility mapping to HDF5.

Source code in oqd-dataschema/src/oqd_dataschema/base.py
@abstractmethod
def _handle_data_dump(self, data: np.ndarray) -> np.ndarray:
    """Hook into [Datastore.model_dump_hdf5][oqd_dataschema.datastore.Datastore.model_dump_hdf5] for compatibility mapping to HDF5."""
    pass
_handle_data_load(data: np.ndarray) -> np.ndarray abstractmethod

Hook into Datastore.model_validate_hdf5 for reversing compatibility mapping, i.e. mapping data back to original type.

Source code in oqd-dataschema/src/oqd_dataschema/base.py
@abstractmethod
def _handle_data_load(self, data: np.ndarray) -> np.ndarray:
    """Hook into [Datastore.model_validate_hdf5][oqd_dataschema.datastore.Datastore.model_validate_hdf5] for reversing compatibility mapping, i.e. mapping data back to original type."""
    pass

Dataset

oqd_dataschema.dataset

CastDataset = Annotated[Dataset, BeforeValidator(Dataset.cast)] module-attribute

Annotated type that automatically executes Dataset.cast

Dataset

Bases: GroupField

Schema representation for a dataset object to be saved within an HDF5 file.

Attributes:

Name Type Description
dtype Optional[DTypeNames]

The datatype of the dataset, such as int32, float32, int64, float64, etc. Types are inferred from the data attribute if provided.

shape Optional[Tuple[Union[int, None], ...]]

The shape of the dataset.

data Optional[Any]

The numpy ndarray of the data, from which dtype and shape are inferred.

attrs Attrs

A dictionary of attributes to append to the dataset.

Example
dataset = Dataset(data=np.array([1, 2, 3, 4]))

dataset = Dataset(dtype='int64', shape=[4,])
dataset.data = np.array([1, 2, 3, 4])
Source code in oqd-dataschema/src/oqd_dataschema/dataset.py
class Dataset(GroupField, extra="forbid"):
    """
    Schema representation for a dataset object to be saved within an HDF5 file.

    Attributes:
        dtype: The datatype of the dataset, such as `int32`, `float32`, `int64`, `float64`, etc.
            Types are inferred from the `data` attribute if provided.
        shape: The shape of the dataset.
        data: The numpy ndarray of the data, from which `dtype` and `shape` are inferred.

        attrs: A dictionary of attributes to append to the dataset.

    Example:
        ```
        dataset = Dataset(data=np.array([1, 2, 3, 4]))

        dataset = Dataset(dtype='int64', shape=[4,])
        dataset.data = np.array([1, 2, 3, 4])
        ```
    """

    dtype: Optional[DTypeNames] = None  # type: ignore
    shape: Optional[Tuple[Union[int, None], ...]] = None
    data: Optional[Any] = Field(default=None, exclude=True)

    attrs: Attrs = Field(default_factory=lambda: {})

    model_config = ConfigDict(
        use_enum_values=False, arbitrary_types_allowed=True, validate_assignment=True
    )

    @field_validator("data", mode="before")
    @classmethod
    def _validate_and_update(cls, value):
        # check if data exist
        if value is None:
            return value

        # check if data is a numpy array
        if not isinstance(value, np.ndarray):
            raise TypeError("`data` must be a numpy.ndarray.")

        return value

    @model_validator(mode="after")
    def _validate_data_matches_shape_dtype(self):
        """Ensure that `data` matches `dtype` and `shape`."""

        # check if data exist
        if self.data is None:
            return self

        # check if dtype matches data
        if (
            self.dtype is not None
            and type(self.data.dtype) is not DTypes.get(self.dtype).value
        ):
            raise ValueError(
                f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`."
            )

        # check if shape mataches data
        if self.shape is not None and not _flex_shape_equal(
            self.data.shape, self.shape
        ):
            raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")

        # reassign dtype if it is None
        if self.dtype != DTypes(type(self.data.dtype)).name.lower():
            self.dtype = DTypes(type(self.data.dtype)).name.lower()

        # resassign shape to concrete value if it is None or a flexible shape
        if self.shape != self.data.shape:
            self.shape = self.data.shape

        return self

    @classmethod
    def cast(cls, data: np.ndarray) -> Dataset:
        """Casts data from numpy array to Dataset."""
        if isinstance(data, np.ndarray):
            return cls(data=data)
        return data

    def __getitem__(self, idx):
        return self.data[idx]

    def _handle_data_dump(self, data):
        np_dtype = (
            np.dtypes.BytesDType if type(data.dtype) is np.dtypes.StrDType else None
        )

        if np_dtype is None:
            return data

        return data.astype(np_dtype)

    def _handle_data_load(self, data):
        np_dtype = DTypes.get(self.dtype).value
        return data.astype(np_dtype)
_validate_data_matches_shape_dtype()

Ensure that data matches dtype and shape.

Source code in oqd-dataschema/src/oqd_dataschema/dataset.py
@model_validator(mode="after")
def _validate_data_matches_shape_dtype(self):
    """Ensure that `data` matches `dtype` and `shape`."""

    # check if data exist
    if self.data is None:
        return self

    # check if dtype matches data
    if (
        self.dtype is not None
        and type(self.data.dtype) is not DTypes.get(self.dtype).value
    ):
        raise ValueError(
            f"Expected data dtype `{self.dtype}`, but got `{self.data.dtype.name}`."
        )

    # check if shape mataches data
    if self.shape is not None and not _flex_shape_equal(
        self.data.shape, self.shape
    ):
        raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")

    # reassign dtype if it is None
    if self.dtype != DTypes(type(self.data.dtype)).name.lower():
        self.dtype = DTypes(type(self.data.dtype)).name.lower()

    # resassign shape to concrete value if it is None or a flexible shape
    if self.shape != self.data.shape:
        self.shape = self.data.shape

    return self
cast(data: np.ndarray) -> Dataset classmethod

Casts data from numpy array to Dataset.

Source code in oqd-dataschema/src/oqd_dataschema/dataset.py
@classmethod
def cast(cls, data: np.ndarray) -> Dataset:
    """Casts data from numpy array to Dataset."""
    if isinstance(data, np.ndarray):
        return cls(data=data)
    return data

Table

oqd_dataschema.table

CastTable = Annotated[Table, BeforeValidator(Table.cast)] module-attribute

Annotated type that automatically executes Table.cast

Table

Bases: GroupField

Schema representation for a table object to be saved within an HDF5 file.

Attributes:

Name Type Description
columns List[Column]

The columns in the table accompanied by their datatype. Types are inferred from the data attribute if not provided.

shape Optional[Tuple[Union[int, None], ...]]

The shape of the table (excludes the column index).

data Optional[Any]

The numpy ndarray or recarray (of structured dtype) of the data, from which dtype and shape can be inferred.

attrs Attrs

A dictionary of attributes to append to the table.

Example
dt = np.dtype(
    [
        ("index", np.int32),
        ("t", np.float64),
        ("z", np.complex128),
        ("label", np.dtype("<U10")),
    ]
)
table = Table(
    columns=[("index", "int32"), ("t", "float64"), ("z", "complex128"), ("label", "str")],
    data=np.array([(1, 0.1, 1 + 1j, "first"), (2, 0.2, 2 + 2j, "second")], dtype=dt),
)
Source code in oqd-dataschema/src/oqd_dataschema/table.py
class Table(GroupField, extra="forbid"):
    """
    Schema representation for a table object to be saved within an HDF5 file.

    Attributes:
        columns: The columns in the table accompanied by their datatype. Types are inferred from the `data` attribute if not provided.
        shape: The shape of the table (excludes the column index).
        data: The numpy ndarray or recarray (of structured dtype) of the data, from which `dtype` and `shape` can be inferred.

        attrs: A dictionary of attributes to append to the table.

    Example:
        ```python
        dt = np.dtype(
            [
                ("index", np.int32),
                ("t", np.float64),
                ("z", np.complex128),
                ("label", np.dtype("<U10")),
            ]
        )
        table = Table(
            columns=[("index", "int32"), ("t", "float64"), ("z", "complex128"), ("label", "str")],
            data=np.array([(1, 0.1, 1 + 1j, "first"), (2, 0.2, 2 + 2j, "second")], dtype=dt),
        )
        ```
    """

    columns: List[Column]
    shape: Optional[Tuple[Union[int, None], ...]] = None
    data: Optional[Any] = Field(default=None, exclude=True)

    attrs: Attrs = Field(default_factory=lambda: {})

    model_config = ConfigDict(
        use_enum_values=False, arbitrary_types_allowed=True, validate_assignment=True
    )

    @field_validator("columns", mode="before")
    @classmethod
    def validate_unique(cls, value):
        column_names = [c[0] for c in value]

        is_unique, duplicates = _is_list_unique(column_names)
        if not is_unique:
            raise ValueError(f"More than one column with the same name ({duplicates}).")

        return value

    @property
    def dataframe(self) -> pd.DataFrame:
        """Converts flat table to pandas DataFrame."""
        if len(self.shape) > 1:
            raise ValueError(
                "Conversion to pandas DataFrame only supported on 1D Table."
            )
        return pd.DataFrame(
            data=self.data, columns=[c[0] for c in self.columns]
        ).astype({k: v for k, v in self.columns})

    @staticmethod
    def _pd_to_np(df):
        np_dtype = []
        for k, v in df.dtypes.items():
            if type(v) is not np.dtypes.ObjectDType:
                field_np_dtype = (k, v)
                np_dtype.append(field_np_dtype)
                continue

            # Check if column of object dtype is actually str dtype
            if (np.vectorize(lambda x: isinstance(x, str))(df[k].to_numpy())).all():
                dt = df[k].to_numpy().astype(np.dtypes.StrDType).dtype
                field_np_dtype = (k, dt)

                np_dtype.append(field_np_dtype)
                continue

            raise ValueError(f"Unsupported datatype for column {k}")

        return np.rec.fromarrays(
            df.to_numpy().transpose(),
            names=[dt[0] for dt in np_dtype],
            formats=[dt[1] for dt in np_dtype],
        ).astype(np.dtype(np_dtype))

    @field_validator("data", mode="before")
    @classmethod
    def _validate_and_update(cls, value):
        # check if data exist
        if value is None:
            return value

        # check if data is a numpy array
        if not isinstance(value, (np.ndarray, pd.DataFrame)):
            raise TypeError("`data` must be a numpy.ndarray or pandas.DataFrame.")

        if isinstance(value, pd.DataFrame):
            value = cls._pd_to_np(value)

        if not isinstance(value.dtype.fields, MappingProxyType):
            raise TypeError("dtype of data must be a structured dtype.")

        if isinstance(value, np.ndarray):
            value = value.view(np.recarray)

        return value

    @model_validator(mode="after")
    def _validate_data_matches_shape_dtype(self):
        """Ensure that `data` matches `dtype` and `shape`."""

        # check if data exist
        if self.data is None:
            return self

        if set(self.data.dtype.fields.keys()) != set([c[0] for c in self.columns]):
            raise ValueError("Fields of data do not match expected field for Table.")

        # check if dtype matches data
        for k, v in self.data.dtype.fields.items():
            if (
                dict(self.columns)[k] is not None
                and type(v[0]) is not DTypes.get(dict(self.columns)[k]).value
            ):
                raise ValueError(
                    f"Expected data dtype `{dict(self.columns)[k]}`, but got `{v[0].name}`."
                )

        # check if shape mataches data
        if self.shape is not None and not _flex_shape_equal(
            self.data.shape, self.shape
        ):
            raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")

        # reassign dtype if it is None
        for n, (k, v) in enumerate(self.columns):
            if v != DTypes(type(self.data.dtype.fields[k][0])).name.lower():
                self.columns[n] = (
                    k,
                    DTypes(type(self.data.dtype.fields[k][0])).name.lower(),
                )

        # resassign shape to concrete value if it is None or a flexible shape
        if self.shape != self.data.shape:
            self.shape = self.data.shape

        return self

    def numpy_dtype(self, *, str_size=64, bytes_size=64):
        np_dtype = []

        for k, v in self.columns:
            if v is None:
                raise ValueError(
                    "Method numpy_dtype can only be called on concrete types."
                )
            if v == "str":
                dt = np.dtypes.StrDType(str_size)
            elif v == "bytes":
                dt = np.dtypes.BytesDType(bytes_size)
            else:
                dt = DTypes.get(v).value()

            np_dtype.append((k, dt))

        return np.dtype(np_dtype)

    @classmethod
    def cast(cls, data: np.ndarray | pd.DataFrame) -> Table:
        """Casts data from pandas DataFrame or numpy structured array to Table."""
        if isinstance(data, pd.DataFrame):
            data = cls._pd_to_np(data)

        if isinstance(data, np.ndarray):
            if not isinstance(data.dtype.fields, MappingProxyType):
                raise TypeError("dtype of data must be a structured dtype.")

            columns = [
                (k, DTypes(type(v)).name.lower())
                for k, (v, _) in data.dtype.fields.items()
            ]

            return cls(columns=columns, data=data)
        return data

    def _handle_data_dump(self, data):
        np_dtype = np.dtype(
            [
                (k, np.empty(0, dtype=v).astype(np.dtypes.BytesDType).dtype)
                if type(v) is np.dtypes.StrDType
                else (k, v)
                for k, (v, _) in data.dtype.fields.items()
            ]
        )

        return data.astype(np_dtype)

    def _handle_data_load(self, data):
        np_dtype = np.dtype(
            [
                (
                    k,
                    np.empty(0, dtype=v).astype(np.dtypes.StrDType).dtype,
                )
                if dict(self.columns)[k] == "str"
                else (k, v)
                for k, (v, _) in np.array(data).dtype.fields.items()
            ]
        )
        return data.astype(np_dtype)
dataframe: pd.DataFrame property

Converts flat table to pandas DataFrame.

_validate_data_matches_shape_dtype()

Ensure that data matches dtype and shape.

Source code in oqd-dataschema/src/oqd_dataschema/table.py
@model_validator(mode="after")
def _validate_data_matches_shape_dtype(self):
    """Ensure that `data` matches `dtype` and `shape`."""

    # check if data exist
    if self.data is None:
        return self

    if set(self.data.dtype.fields.keys()) != set([c[0] for c in self.columns]):
        raise ValueError("Fields of data do not match expected field for Table.")

    # check if dtype matches data
    for k, v in self.data.dtype.fields.items():
        if (
            dict(self.columns)[k] is not None
            and type(v[0]) is not DTypes.get(dict(self.columns)[k]).value
        ):
            raise ValueError(
                f"Expected data dtype `{dict(self.columns)[k]}`, but got `{v[0].name}`."
            )

    # check if shape mataches data
    if self.shape is not None and not _flex_shape_equal(
        self.data.shape, self.shape
    ):
        raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")

    # reassign dtype if it is None
    for n, (k, v) in enumerate(self.columns):
        if v != DTypes(type(self.data.dtype.fields[k][0])).name.lower():
            self.columns[n] = (
                k,
                DTypes(type(self.data.dtype.fields[k][0])).name.lower(),
            )

    # resassign shape to concrete value if it is None or a flexible shape
    if self.shape != self.data.shape:
        self.shape = self.data.shape

    return self
cast(data: np.ndarray | pd.DataFrame) -> Table classmethod

Casts data from pandas DataFrame or numpy structured array to Table.

Source code in oqd-dataschema/src/oqd_dataschema/table.py
@classmethod
def cast(cls, data: np.ndarray | pd.DataFrame) -> Table:
    """Casts data from pandas DataFrame or numpy structured array to Table."""
    if isinstance(data, pd.DataFrame):
        data = cls._pd_to_np(data)

    if isinstance(data, np.ndarray):
        if not isinstance(data.dtype.fields, MappingProxyType):
            raise TypeError("dtype of data must be a structured dtype.")

        columns = [
            (k, DTypes(type(v)).name.lower())
            for k, (v, _) in data.dtype.fields.items()
        ]

        return cls(columns=columns, data=data)
    return data

Folder

oqd_dataschema.folder

CastFolder = Annotated[Folder, BeforeValidator(Folder.cast)] module-attribute

Annotated type that automatically executes Folder.cast

Folder

Bases: GroupField

Schema representation for a table object to be saved within an HDF5 file.

Attributes:

Name Type Description
document_schema DocumentSchema

The schema for a document (structured type with keys and their datatype). Types are inferred from the data attribute if not provided.

shape Optional[Tuple[Union[int, None], ...]]

The shape of the folder.

data Optional[Any]

The numpy ndarray or recarray (of structured dtype) of the data, from which dtype and shape can be inferred.

attrs Attrs

A dictionary of attributes to append to the folder.

Example
schema = dict(
    index="int32",
    t="float64",
    channels=dict(ch1="complex128", ch2="complex128"),
    label="str",
)
dt = np.dtype(
    [
        ("index", np.int32),
        ("t", np.float64),
        ("channels", np.dtype([("ch1", np.complex128), ("ch2", np.complex128)])),
        ("label", np.dtype("<U10")),
    ]
)
folder = Folder(
    document_schema=schema,
    data=np.array(
        [(1, 0.1, (1 + 1j, 1 - 1j), "first"), (2, 0.2, (2 + 2j, 2 - 2j), "second")],
        dtype=dt,
    ),
)
Source code in oqd-dataschema/src/oqd_dataschema/folder.py
class Folder(GroupField, extra="forbid"):
    """
    Schema representation for a table object to be saved within an HDF5 file.

    Attributes:
        document_schema: The schema for a document (structured type with keys and their datatype). Types are inferred from the `data` attribute if not provided.
        shape: The shape of the folder.
        data: The numpy ndarray or recarray (of structured dtype) of the data, from which `dtype` and `shape` can be inferred.

        attrs: A dictionary of attributes to append to the folder.

    Example:
        ```python
        schema = dict(
            index="int32",
            t="float64",
            channels=dict(ch1="complex128", ch2="complex128"),
            label="str",
        )
        dt = np.dtype(
            [
                ("index", np.int32),
                ("t", np.float64),
                ("channels", np.dtype([("ch1", np.complex128), ("ch2", np.complex128)])),
                ("label", np.dtype("<U10")),
            ]
        )
        folder = Folder(
            document_schema=schema,
            data=np.array(
                [(1, 0.1, (1 + 1j, 1 - 1j), "first"), (2, 0.2, (2 + 2j, 2 - 2j), "second")],
                dtype=dt,
            ),
        )
        ```
    """

    document_schema: DocumentSchema
    shape: Optional[Tuple[Union[int, None], ...]] = None
    data: Optional[Any] = Field(default=None, exclude=True)

    attrs: Attrs = Field(default_factory=lambda: {})

    model_config = ConfigDict(
        use_enum_values=False, arbitrary_types_allowed=True, validate_assignment=True
    )

    @field_validator("data", mode="before")
    @classmethod
    def _validate_and_update(cls, value):
        # check if data exist
        if value is None:
            return value

        # check if data is a numpy array
        if not isinstance(value, np.ndarray):
            raise TypeError("`data` must be a numpy.ndarray.")

        if not isinstance(value.dtype.fields, MappingProxyType):
            raise TypeError("dtype of data must be a structured dtype.")

        value = value.view(np.recarray)

        return value

    @staticmethod
    def _is_valid_array(document_schema, data_dtype, position=""):
        # check if data_dtype is a structured dtype
        if not isinstance(data_dtype.fields, MappingProxyType):
            raise TypeError(
                f"Error {f'in key `{position}`' if position else 'at root'}, expected structured dtype matching {document_schema = } but got unstructured dtype {data_dtype = }."
            )

        # check if fields all match
        if set(document_schema.keys()) != set(data_dtype.fields.keys()):
            diff = set(document_schema.keys()).difference(set(data_dtype.fields.keys()))
            rv_diff = set(data_dtype.fields.keys()).difference(
                set(document_schema.keys())
            )
            raise ValueError(
                f"Error {f'in key `{position}`' if position else 'at root '}, mismatched {'subkeys' if position else 'keys'} between `document_schema` (unmatched = {diff}) and numpy data structured dtype (unmatched = {rv_diff})."
            )

        # recursively check document_schema matches structured dtype data_dtype
        for k, v in document_schema.items():
            if isinstance(v, dict):
                Folder._is_valid_array(
                    v, data_dtype.fields[k][0], position + "." + k if position else k
                )
                continue

            # check if dtypes match
            if (
                v is not None
                and type(data_dtype.fields[k][0]) is not DTypes.get(v).value
            ):
                raise ValueError(
                    f"Error {f'in key `{position}`' if position else 'at root '}, expected {'subkey' if position else 'key'} `{k}` to be of dtype compatible with {v} but got dtype {data_dtype.fields[k][0]}."
                )

    @model_validator(mode="after")
    def _validate_data_matches_shape_dtype(self):
        """Ensure that `data` matches `dtype` and `shape`."""

        # check if data exist
        if self.data is None:
            return self

        # check if document_schema matches the data's structured dtype
        self._is_valid_array(self.document_schema, self.data.dtype)

        # check if shape mataches data
        if self.shape is not None and not _flex_shape_equal(
            self.data.shape, self.shape
        ):
            raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")

        # reassign dtype if it is None
        document_schema_from_dtype = self._get_document_schema_from_dtype(
            self.data.dtype
        )
        if self.document_schema != document_schema_from_dtype:
            self.document_schema = document_schema_from_dtype

        # resassign shape to concrete value if it is None or a flexible shape
        if self.shape != self.data.shape:
            self.shape = self.data.shape

        return self

    @staticmethod
    def _get_document_schema_from_dtype(dtype):
        document_schema = {}

        for k, (v, _) in dtype.fields.items():
            if isinstance(v.fields, MappingProxyType):
                dt = Folder._get_document_schema_from_dtype(v)
            else:
                dt = DTypes(type(v)).name.lower()

            document_schema[k] = dt

        return document_schema

    @staticmethod
    def _numpy_dtype(document_schema, *, str_size=64, bytes_size=64):
        np_dtype = []

        for k, v in document_schema.items():
            if v is None:
                raise ValueError(
                    "Method numpy_dtype can only be called on concrete types."
                )

            if isinstance(v, dict):
                dt = Folder._numpy_dtype(
                    document_schema[k], str_size=str_size, bytes_size=bytes_size
                )
            elif v == "str":
                dt = np.dtypes.StrDType(str_size)
            elif v == "bytes":
                dt = np.dtypes.BytesDType(bytes_size)
            else:
                dt = DTypes.get(v).value()

            np_dtype.append((k, dt))

        return np.dtype(np_dtype)

    def numpy_dtype(self, *, str_size=64, bytes_size=64) -> np.dtype:
        return self._numpy_dtype(
            self.document_schema, str_size=str_size, bytes_size=bytes_size
        )

    @staticmethod
    def _dump_dtype_str_to_bytes(dtype):
        np_dtype = []

        for k, (v, _) in dtype.fields.items():
            if isinstance(v.fields, MappingProxyType):
                dt = Folder._dump_dtype_str_to_bytes(v)
            elif type(v) is np.dtypes.StrDType:
                dt = np.empty(0, dtype=v).astype(np.dtypes.BytesDType).dtype
            else:
                dt = v

            np_dtype.append((k, dt))

        return np.dtype(np_dtype)

    def _handle_data_dump(self, data):
        np_dtype = self._dump_dtype_str_to_bytes(data.dtype)

        return data.astype(np_dtype)

    @staticmethod
    def _load_dtype_bytes_to_str(document_schema, dtype):
        np_dtype = []

        for k, (v, _) in dtype.fields.items():
            if isinstance(v.fields, MappingProxyType):
                dt = Folder._load_dtype_bytes_to_str(document_schema[k], v)
            elif document_schema[k] == "str":
                dt = np.empty(0, dtype=v).astype(np.dtypes.StrDType).dtype
            else:
                dt = v

            np_dtype.append((k, dt))

        return np.dtype(np_dtype)

    def _handle_data_load(self, data):
        np_dtype = self._load_dtype_bytes_to_str(self.document_schema, data.dtype)

        return data.astype(np_dtype)

    @classmethod
    def cast(cls, data: np.ndarray) -> Folder:
        """Casts data from numpy structured array to Folder."""
        if isinstance(data, np.ndarray):
            if not isinstance(data.dtype.fields, MappingProxyType):
                raise TypeError("dtype of data must be a structured dtype.")

            document_schema = cls._get_document_schema_from_dtype(data.dtype)

            return cls(document_schema=document_schema, data=data)
        return data
_validate_data_matches_shape_dtype()

Ensure that data matches dtype and shape.

Source code in oqd-dataschema/src/oqd_dataschema/folder.py
@model_validator(mode="after")
def _validate_data_matches_shape_dtype(self):
    """Ensure that `data` matches `dtype` and `shape`."""

    # check if data exist
    if self.data is None:
        return self

    # check if document_schema matches the data's structured dtype
    self._is_valid_array(self.document_schema, self.data.dtype)

    # check if shape mataches data
    if self.shape is not None and not _flex_shape_equal(
        self.data.shape, self.shape
    ):
        raise ValueError(f"Expected shape {self.shape}, but got {self.data.shape}.")

    # reassign dtype if it is None
    document_schema_from_dtype = self._get_document_schema_from_dtype(
        self.data.dtype
    )
    if self.document_schema != document_schema_from_dtype:
        self.document_schema = document_schema_from_dtype

    # resassign shape to concrete value if it is None or a flexible shape
    if self.shape != self.data.shape:
        self.shape = self.data.shape

    return self
cast(data: np.ndarray) -> Folder classmethod

Casts data from numpy structured array to Folder.

Source code in oqd-dataschema/src/oqd_dataschema/folder.py
@classmethod
def cast(cls, data: np.ndarray) -> Folder:
    """Casts data from numpy structured array to Folder."""
    if isinstance(data, np.ndarray):
        if not isinstance(data.dtype.fields, MappingProxyType):
            raise TypeError("dtype of data must be a structured dtype.")

        document_schema = cls._get_document_schema_from_dtype(data.dtype)

        return cls(document_schema=document_schema, data=data)
    return data

Constrained Group Fields

oqd_dataschema.constrained

condataset(*, shape_constraint=None, dtype_constraint=None, min_dim=None, max_dim=None) -> TypeAlias

Implements dtype, dimension and shape constrains on the Dataset.

Parameters:

Name Type Description Default
shape_constraint Tuple[Union[None, int], ...]
None
dtype_constraint Tuple[DTypeNames, ...]
None
min_dim int
None
max_dim int
None
Example
class CustomGroup:
    x: condataset(dtype_contraint=("int16","int32","int64))
    y: condataset(shape_constraint=(100,))
    z: condataset(min_dim=1, max_dim=1)

group = CustomGroup(x=,y=,z=) # succeeds as it obeys the constraints

group = CustomGroup(x=,y=,z=) # fails as it violates the constraints
Source code in oqd-dataschema/src/oqd_dataschema/constrained.py
def condataset(
    *,
    shape_constraint=None,
    dtype_constraint=None,
    min_dim=None,
    max_dim=None,
) -> TypeAlias:
    """Implements dtype, dimension and shape constrains on the Dataset.

    Arguments:
        shape_constraint (Tuple[Union[None, int],...]):
        dtype_constraint (Tuple[DTypeNames,...]):
        min_dim (int):
        max_dim (int):

    Example:
        ```
        class CustomGroup:
            x: condataset(dtype_contraint=("int16","int32","int64))
            y: condataset(shape_constraint=(100,))
            z: condataset(min_dim=1, max_dim=1)

        group = CustomGroup(x=,y=,z=) # succeeds as it obeys the constraints

        group = CustomGroup(x=,y=,z=) # fails as it violates the constraints
        ```

    """
    return Annotated[
        CastDataset,
        AfterValidator(_constrain_dtype_dataset(dtype_constraint=dtype_constraint)),
        AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)),
        AfterValidator(_constrain_shape(shape_constraint=shape_constraint)),
    ]

contable(*, required_fields=None, strict_fields=False, dtype_constraint={}, shape_constraint=None, min_dim=None, max_dim=None) -> TypeAlias

Implements field, dtype, dimension and shape constrains on the Table.

Example
class CustomGroup:
    x: contable(dtype_contraint=("int16","int32","int64))
    y: contable(shape_constraint=(100,))
    z: contable(min_dim=1, max_dim=1)
    u: contable(required_field=("c1","c2"))
    v: contable(required_field=("c1", "c2"), strict_fields=True)


group = CustomGroup(x=,y=,z=,u=,v=) # succeeds as it obeys the constraints

group = CustomGroup(x=,y=,z=,u=,v=) # fails as it violates the constraints
Source code in oqd-dataschema/src/oqd_dataschema/constrained.py
def contable(
    *,
    required_fields=None,
    strict_fields=False,
    dtype_constraint={},
    shape_constraint=None,
    min_dim=None,
    max_dim=None,
) -> TypeAlias:
    """Implements field, dtype, dimension and shape constrains on the Table.

    Example:
        ```
        class CustomGroup:
            x: contable(dtype_contraint=("int16","int32","int64))
            y: contable(shape_constraint=(100,))
            z: contable(min_dim=1, max_dim=1)
            u: contable(required_field=("c1","c2"))
            v: contable(required_field=("c1", "c2"), strict_fields=True)


        group = CustomGroup(x=,y=,z=,u=,v=) # succeeds as it obeys the constraints

        group = CustomGroup(x=,y=,z=,u=,v=) # fails as it violates the constraints
        ```

    """
    return Annotated[
        CastTable,
        AfterValidator(
            _constrain_required_field(
                required_fields=required_fields, strict_fields=strict_fields
            )
        ),
        AfterValidator(_constrain_dtype_table(dtype_constraint=dtype_constraint)),
        AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)),
        AfterValidator(_constrain_shape(shape_constraint=shape_constraint)),
    ]

confolder(*, shape_constraint=None, min_dim=None, max_dim=None) -> TypeAlias

Implements dimension and shape constrains on the Folder.

Example
class CustomGroup:
    x: confolder(shape_constraint=(100,))
    y: confolder(min_dim=1, max_dim=1)


group = CustomGroup(x=,y=) # succeeds as it obeys the constraints

group = CustomGroup(x=,y=) # fails as it violates the constraints
Source code in oqd-dataschema/src/oqd_dataschema/constrained.py
def confolder(
    *,
    shape_constraint=None,
    min_dim=None,
    max_dim=None,
) -> TypeAlias:
    """Implements dimension and shape constrains on the Folder.

    Example:
        ```
        class CustomGroup:
            x: confolder(shape_constraint=(100,))
            y: confolder(min_dim=1, max_dim=1)


        group = CustomGroup(x=,y=) # succeeds as it obeys the constraints

        group = CustomGroup(x=,y=) # fails as it violates the constraints
        ```

    """
    return Annotated[
        Folder,
        AfterValidator(_constrain_dim(min_dim=min_dim, max_dim=max_dim)),
        AfterValidator(_constrain_shape(shape_constraint=shape_constraint)),
    ]