Skip to content

Entity and methods

Dataitem

Bases: Entity

A class representing a dataitem.

Source code in digitalhub_data/entities/dataitems/entity/_base.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
class Dataitem(Entity):
    """
    A class representing a dataitem.
    """

    ENTITY_TYPE = EntityTypes.DATAITEMS.value

    def __init__(
        self,
        project: str,
        name: str,
        uuid: str,
        kind: str,
        metadata: Metadata,
        spec: DataitemSpec,
        status: DataitemStatus,
        user: str | None = None,
    ) -> None:
        """
        Constructor.

        Parameters
        ----------
        project : str
            Project name.
        name : str
            Name of the object.
        uuid : str
            Version of the object.
        kind : str
            Kind the object.
        metadata : Metadata
            Metadata of the object.
        spec : DataitemSpec
            Specification of the object.
        status : DataitemStatus
            Status of the object.
        user : str
            Owner of the object.
        """
        super().__init__()
        self.project = project
        self.name = name
        self.id = uuid
        self.kind = kind
        self.key = f"store://{project}/{self.ENTITY_TYPE}/{kind}/{name}:{uuid}"
        self.metadata = metadata
        self.spec = spec
        self.status = status
        self.user = user

        # Add attributes to be used in the to_dict method
        self._obj_attr.extend(["project", "name", "id", "key"])

    #############################
    #  Save / Refresh / Export
    #############################

    def save(self, update: bool = False) -> Dataitem:
        """
        Save entity into backend.

        Parameters
        ----------
        update : bool
            Flag to indicate update.

        Returns
        -------
        Dataitem
            Entity saved.
        """
        obj = self.to_dict()

        if not update:
            new_obj = create_entity_api_ctx(self.project, self.ENTITY_TYPE, obj)
            self._update_attributes(new_obj)
            return self

        self.metadata.updated = obj["metadata"]["updated"] = get_timestamp()
        new_obj = update_entity_api_ctx(self.project, self.ENTITY_TYPE, self.id, obj)
        self._update_attributes(new_obj)
        return self

    def refresh(self) -> Dataitem:
        """
        Refresh object from backend.

        Returns
        -------
        Dataitem
            Entity refreshed.
        """
        new_obj = read_entity_api_ctx(self.key)
        self._update_attributes(new_obj)
        return self

    def export(self, filename: str | None = None) -> None:
        """
        Export object as a YAML file.

        Parameters
        ----------
        filename : str
            Name of the export YAML file. If not specified, the default value is used.

        Returns
        -------
        None
        """
        obj = self.to_dict()
        if filename is None:
            filename = f"{self.kind}_{self.name}_{self.id}.yml"
        pth = self._context().root / filename
        pth.parent.mkdir(parents=True, exist_ok=True)
        write_yaml(pth, obj)

    #############################
    #  Context
    #############################

    def _context(self) -> Context:
        """
        Get context.

        Returns
        -------
        Context
            Context.
        """
        return get_context(self.project)

    #############################
    #  Dataitem methods
    #############################

    def as_file(self) -> str:
        """
        Get dataitem as file. In the case of a local store, the store returns the current
        path of the dataitem. In the case of a remote store, the dataitem is downloaded in
        a temporary directory.

        Returns
        -------
        str
            Path of the dataitem.
        """
        path = self.spec.path
        store = get_store(path)
        if store.is_local():
            return path
        return store.download(path)

    def download(
        self,
        destination: str | None = None,
        overwrite: bool = False,
    ) -> str:
        """
        Download dataitem from remote storage.

        Parameters
        ----------
        destination : str
            Destination path as filename.
        overwrite : bool
            Specify if overwrite an existing file. Default value is False.

        Returns
        -------
        str
            Path of the downloaded dataitem.
        """

        # Check if target path is remote
        path = self.spec.path
        store = get_store(path)
        if store.is_local():
            raise RuntimeError("Local files cannot be downloaded. Use as_file().")

        # Check if download destination path is specified and rebuild it if necessary
        if destination is None:
            filename = Path(urlparse(path).path).name
            destination = f"{self.project}/{self.ENTITY_TYPE}/{self.name}/{self.id}/{filename}"

        # Check if destination path is local
        self._check_local(destination)

        # Check if destination path exists for overwrite
        check_overwrite(destination, overwrite)

        # Download dataitem and return path
        return store.download(path, destination)

    def write_file(self, src: str) -> str:
        """
        Write file into dataitem path.

        Parameters
        ----------
        src : str
            Local path of the file to write.

        Returns
        -------
        str
            Path to the written file.
        """
        path = self.spec.path
        store = get_store(path)
        if store.is_local():
            raise RuntimeError("Only remote paths are supported for upload.")

        # Get store and upload file and return remote path
        target = store.upload(src, path)
        file_info = store.get_file_info(target, src)

        if file_info is not None:
            self.refresh()
            self.status.add_file(file_info)
            self.save(update=True)

        return target

    #############################
    #  Helper Methods
    #############################

    @staticmethod
    def _check_local(path: str) -> bool:
        """
        Check if path is local.

        Parameters
        ----------
        path : str
            Path to check.

        Returns
        -------
        bool
            True if local, False otherwise.
        """
        return check_local_path(path)

    @staticmethod
    def _get_extension(path: str, file_format: str | None = None) -> str:
        """
        Get extension of path.

        Parameters
        ----------
        path : str
            Path to get extension from.
        file_format : str
            File format.

        Returns
        -------
        str
            File extension.

        Raises
        ------
        EntityError
            If file format is not supported.
        """
        if file_format is not None:
            return file_format

        scheme = map_uri_scheme(path)
        if scheme == "sql":
            return "parquet"

        ext = Path(path).suffix[1:]
        if ext is not None:
            return ext
        raise EntityError("Unknown file format. Only csv and parquet are supported.")

    #############################
    #  Static interface methods
    #############################

    @staticmethod
    def _parse_dict(obj: dict, validate: bool = True) -> dict:
        """
        Get dictionary and parse it to a valid entity dictionary.

        Parameters
        ----------
        entity : str
            Entity type.
        obj : dict
            Dictionary to parse.

        Returns
        -------
        dict
            A dictionary containing the attributes of the entity instance.
        """
        project = obj.get("project")
        name = build_name(obj.get("name"))
        kind = obj.get("kind")
        uuid = build_uuid(obj.get("id"))
        metadata = build_metadata(kind, **obj.get("metadata", {}))
        spec = build_spec(kind, validate=validate, **obj.get("spec", {}))
        status = build_status(kind, **obj.get("status", {}))
        user = obj.get("user")
        return {
            "project": project,
            "name": name,
            "uuid": uuid,
            "kind": kind,
            "metadata": metadata,
            "spec": spec,
            "status": status,
            "user": user,
        }

__init__(project, name, uuid, kind, metadata, spec, status, user=None)

Constructor.

Parameters:

Name Type Description Default
project str

Project name.

required
name str

Name of the object.

required
uuid str

Version of the object.

required
kind str

Kind the object.

required
metadata Metadata

Metadata of the object.

required
spec DataitemSpec

Specification of the object.

required
status DataitemStatus

Status of the object.

required
user str

Owner of the object.

None
Source code in digitalhub_data/entities/dataitems/entity/_base.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def __init__(
    self,
    project: str,
    name: str,
    uuid: str,
    kind: str,
    metadata: Metadata,
    spec: DataitemSpec,
    status: DataitemStatus,
    user: str | None = None,
) -> None:
    """
    Constructor.

    Parameters
    ----------
    project : str
        Project name.
    name : str
        Name of the object.
    uuid : str
        Version of the object.
    kind : str
        Kind the object.
    metadata : Metadata
        Metadata of the object.
    spec : DataitemSpec
        Specification of the object.
    status : DataitemStatus
        Status of the object.
    user : str
        Owner of the object.
    """
    super().__init__()
    self.project = project
    self.name = name
    self.id = uuid
    self.kind = kind
    self.key = f"store://{project}/{self.ENTITY_TYPE}/{kind}/{name}:{uuid}"
    self.metadata = metadata
    self.spec = spec
    self.status = status
    self.user = user

    # Add attributes to be used in the to_dict method
    self._obj_attr.extend(["project", "name", "id", "key"])

as_file()

Get dataitem as file. In the case of a local store, the store returns the current path of the dataitem. In the case of a remote store, the dataitem is downloaded in a temporary directory.

Returns:

Type Description
str

Path of the dataitem.

Source code in digitalhub_data/entities/dataitems/entity/_base.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def as_file(self) -> str:
    """
    Get dataitem as file. In the case of a local store, the store returns the current
    path of the dataitem. In the case of a remote store, the dataitem is downloaded in
    a temporary directory.

    Returns
    -------
    str
        Path of the dataitem.
    """
    path = self.spec.path
    store = get_store(path)
    if store.is_local():
        return path
    return store.download(path)

download(destination=None, overwrite=False)

Download dataitem from remote storage.

Parameters:

Name Type Description Default
destination str

Destination path as filename.

None
overwrite bool

Specify if overwrite an existing file. Default value is False.

False

Returns:

Type Description
str

Path of the downloaded dataitem.

Source code in digitalhub_data/entities/dataitems/entity/_base.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def download(
    self,
    destination: str | None = None,
    overwrite: bool = False,
) -> str:
    """
    Download dataitem from remote storage.

    Parameters
    ----------
    destination : str
        Destination path as filename.
    overwrite : bool
        Specify if overwrite an existing file. Default value is False.

    Returns
    -------
    str
        Path of the downloaded dataitem.
    """

    # Check if target path is remote
    path = self.spec.path
    store = get_store(path)
    if store.is_local():
        raise RuntimeError("Local files cannot be downloaded. Use as_file().")

    # Check if download destination path is specified and rebuild it if necessary
    if destination is None:
        filename = Path(urlparse(path).path).name
        destination = f"{self.project}/{self.ENTITY_TYPE}/{self.name}/{self.id}/{filename}"

    # Check if destination path is local
    self._check_local(destination)

    # Check if destination path exists for overwrite
    check_overwrite(destination, overwrite)

    # Download dataitem and return path
    return store.download(path, destination)

export(filename=None)

Export object as a YAML file.

Parameters:

Name Type Description Default
filename str

Name of the export YAML file. If not specified, the default value is used.

None

Returns:

Type Description
None
Source code in digitalhub_data/entities/dataitems/entity/_base.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def export(self, filename: str | None = None) -> None:
    """
    Export object as a YAML file.

    Parameters
    ----------
    filename : str
        Name of the export YAML file. If not specified, the default value is used.

    Returns
    -------
    None
    """
    obj = self.to_dict()
    if filename is None:
        filename = f"{self.kind}_{self.name}_{self.id}.yml"
    pth = self._context().root / filename
    pth.parent.mkdir(parents=True, exist_ok=True)
    write_yaml(pth, obj)

refresh()

Refresh object from backend.

Returns:

Type Description
Dataitem

Entity refreshed.

Source code in digitalhub_data/entities/dataitems/entity/_base.py
113
114
115
116
117
118
119
120
121
122
123
124
def refresh(self) -> Dataitem:
    """
    Refresh object from backend.

    Returns
    -------
    Dataitem
        Entity refreshed.
    """
    new_obj = read_entity_api_ctx(self.key)
    self._update_attributes(new_obj)
    return self

save(update=False)

Save entity into backend.

Parameters:

Name Type Description Default
update bool

Flag to indicate update.

False

Returns:

Type Description
Dataitem

Entity saved.

Source code in digitalhub_data/entities/dataitems/entity/_base.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def save(self, update: bool = False) -> Dataitem:
    """
    Save entity into backend.

    Parameters
    ----------
    update : bool
        Flag to indicate update.

    Returns
    -------
    Dataitem
        Entity saved.
    """
    obj = self.to_dict()

    if not update:
        new_obj = create_entity_api_ctx(self.project, self.ENTITY_TYPE, obj)
        self._update_attributes(new_obj)
        return self

    self.metadata.updated = obj["metadata"]["updated"] = get_timestamp()
    new_obj = update_entity_api_ctx(self.project, self.ENTITY_TYPE, self.id, obj)
    self._update_attributes(new_obj)
    return self

write_file(src)

Write file into dataitem path.

Parameters:

Name Type Description Default
src str

Local path of the file to write.

required

Returns:

Type Description
str

Path to the written file.

Source code in digitalhub_data/entities/dataitems/entity/_base.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def write_file(self, src: str) -> str:
    """
    Write file into dataitem path.

    Parameters
    ----------
    src : str
        Local path of the file to write.

    Returns
    -------
    str
        Path to the written file.
    """
    path = self.spec.path
    store = get_store(path)
    if store.is_local():
        raise RuntimeError("Only remote paths are supported for upload.")

    # Get store and upload file and return remote path
    target = store.upload(src, path)
    file_info = store.get_file_info(target, src)

    if file_info is not None:
        self.refresh()
        self.status.add_file(file_info)
        self.save(update=True)

    return target

DataitemDataitem

Bases: Dataitem

Dataitem dataitem.

Source code in digitalhub_data/entities/dataitems/entity/dataitem.py
6
7
8
9
class DataitemDataitem(Dataitem):
    """
    Dataitem dataitem.
    """

DataitemIceberg

Bases: Dataitem

Iceberg dataitem.

Source code in digitalhub_data/entities/dataitems/entity/iceberg.py
4
5
6
7
class DataitemIceberg(Dataitem):
    """
    Iceberg dataitem.
    """

DataitemTable

Bases: Dataitem

Table dataitem.

Source code in digitalhub_data/entities/dataitems/entity/table.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
class DataitemTable(Dataitem):

    """
    Table dataitem.
    """

    def as_df(self, file_format: str | None = None, **kwargs) -> Any:
        """
        Read dataitem as a DataFrame. If the dataitem is not local, it will be downloaded
        to a temporary folder and deleted after the method is executed. If no file_format is passed,
        the function will try to infer it from the dataitem.spec.path attribute.
        The path of the dataitem is specified in the spec attribute, and must be a store aware path.
        If the dataitem is stored on s3 bucket, the path must be s3://<bucket>/<path_to_dataitem>.
        If the dataitem is stored on database (Postgres is the only one supported), the path must
        be sql://postgres/<database>/<schema>/<table/view>.

        Parameters
        ----------
        file_format : str
            Format of the file. (Supported csv and parquet).
        **kwargs : dict
            Keyword arguments passed to the read_df function.

        Returns
        -------
        Any
            DataFrame.
        """
        datastore = get_datastore(self.spec.path)
        tmp_path = False

        # Download dataitem if not local
        if not self._check_local(self.spec.path):
            path = datastore.download(self.spec.path)
            tmp_path = True
        else:
            path = self.spec.path

        # Get file info
        store = get_store(path)
        file_info = store.get_file_info(self.spec.path, path)
        if file_info is not None:
            self.refresh()
            self.status.add_file(file_info)
            self.save(update=True)

        # Check file format and get dataitem as DataFrame
        extension = self._get_extension(self.spec.path, file_format)
        df = datastore.read_df(path, extension, **kwargs)

        # Delete tmp folder
        if tmp_path:
            pth = Path(path)
            if pth.is_file():
                pth = pth.parent
            shutil.rmtree(pth)

        return df

    def write_df(
        self,
        df: Any,
        extension: str | None = None,
        **kwargs,
    ) -> str:
        """
        Write DataFrame as parquet/csv/table into dataitem path.

        Parameters
        ----------
        df : Any
            DataFrame to write.
        extension : str
            Extension of the file.
        **kwargs : dict
            Keyword arguments passed to the write_df function.

        Returns
        -------
        str
            Path to the written dataframe.
        """
        datastore = get_datastore(self.spec.path)
        target = datastore.write_df(df, self.spec.path, extension=extension, **kwargs)

        # Get file info
        store = get_store(target)
        file_info = store.get_file_info(target, self.spec.path)
        if file_info is not None:
            self.status.add_file(file_info)
        return target

as_df(file_format=None, **kwargs)

Read dataitem as a DataFrame. If the dataitem is not local, it will be downloaded to a temporary folder and deleted after the method is executed. If no file_format is passed, the function will try to infer it from the dataitem.spec.path attribute. The path of the dataitem is specified in the spec attribute, and must be a store aware path. If the dataitem is stored on s3 bucket, the path must be s3:///. If the dataitem is stored on database (Postgres is the only one supported), the path must be sql://postgres///

.

Parameters:

Name Type Description Default
file_format str

Format of the file. (Supported csv and parquet).

None
**kwargs dict

Keyword arguments passed to the read_df function.

{}

Returns:

Type Description
Any

DataFrame.

Source code in digitalhub_data/entities/dataitems/entity/table.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def as_df(self, file_format: str | None = None, **kwargs) -> Any:
    """
    Read dataitem as a DataFrame. If the dataitem is not local, it will be downloaded
    to a temporary folder and deleted after the method is executed. If no file_format is passed,
    the function will try to infer it from the dataitem.spec.path attribute.
    The path of the dataitem is specified in the spec attribute, and must be a store aware path.
    If the dataitem is stored on s3 bucket, the path must be s3://<bucket>/<path_to_dataitem>.
    If the dataitem is stored on database (Postgres is the only one supported), the path must
    be sql://postgres/<database>/<schema>/<table/view>.

    Parameters
    ----------
    file_format : str
        Format of the file. (Supported csv and parquet).
    **kwargs : dict
        Keyword arguments passed to the read_df function.

    Returns
    -------
    Any
        DataFrame.
    """
    datastore = get_datastore(self.spec.path)
    tmp_path = False

    # Download dataitem if not local
    if not self._check_local(self.spec.path):
        path = datastore.download(self.spec.path)
        tmp_path = True
    else:
        path = self.spec.path

    # Get file info
    store = get_store(path)
    file_info = store.get_file_info(self.spec.path, path)
    if file_info is not None:
        self.refresh()
        self.status.add_file(file_info)
        self.save(update=True)

    # Check file format and get dataitem as DataFrame
    extension = self._get_extension(self.spec.path, file_format)
    df = datastore.read_df(path, extension, **kwargs)

    # Delete tmp folder
    if tmp_path:
        pth = Path(path)
        if pth.is_file():
            pth = pth.parent
        shutil.rmtree(pth)

    return df

write_df(df, extension=None, **kwargs)

Write DataFrame as parquet/csv/table into dataitem path.

Parameters:

Name Type Description Default
df Any

DataFrame to write.

required
extension str

Extension of the file.

None
**kwargs dict

Keyword arguments passed to the write_df function.

{}

Returns:

Type Description
str

Path to the written dataframe.

Source code in digitalhub_data/entities/dataitems/entity/table.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def write_df(
    self,
    df: Any,
    extension: str | None = None,
    **kwargs,
) -> str:
    """
    Write DataFrame as parquet/csv/table into dataitem path.

    Parameters
    ----------
    df : Any
        DataFrame to write.
    extension : str
        Extension of the file.
    **kwargs : dict
        Keyword arguments passed to the write_df function.

    Returns
    -------
    str
        Path to the written dataframe.
    """
    datastore = get_datastore(self.spec.path)
    target = datastore.write_df(df, self.spec.path, extension=extension, **kwargs)

    # Get file info
    store = get_store(target)
    file_info = store.get_file_info(target, self.spec.path)
    if file_info is not None:
        self.status.add_file(file_info)
    return target

dataitem_from_dict(obj)

Create dataitem from dictionary.

Parameters:

Name Type Description Default
obj dict

Dictionary to create object from.

required

Returns:

Type Description
Dataitem

Dataitem object.

Source code in digitalhub_data/entities/dataitems/builder.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def dataitem_from_dict(obj: dict) -> Dataitem:
    """
    Create dataitem from dictionary.

    Parameters
    ----------
    obj : dict
        Dictionary to create object from.

    Returns
    -------
    Dataitem
        Dataitem object.
    """
    kind = obj.get("kind")
    cls = _choose_dataitem_type(kind)
    return cls.from_dict(obj)

dataitem_from_parameters(project, name, kind, uuid=None, description=None, git_source=None, labels=None, embedded=True, path=None, **kwargs)

Create a new object instance.

Parameters:

Name Type Description Default
project str

Project name.

required
name str

Object name.

required
kind str

Kind the object.

required
uuid str

ID of the object (UUID4).

None
description str

Description of the object (human readable).

None
git_source str

Remote git source for object.

None
labels list[str]

List of labels.

None
embedded bool

Flag to determine if object must be embedded in project.

True
path str

Object path on local file system or remote storage. If not provided, it's generated.

None
**kwargs dict

Spec keyword arguments.

{}

Returns:

Type Description
Dataitem

Object instance.

Source code in digitalhub_data/entities/dataitems/builder.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def dataitem_from_parameters(
    project: str,
    name: str,
    kind: str,
    uuid: str | None = None,
    description: str | None = None,
    git_source: str | None = None,
    labels: list[str] | None = None,
    embedded: bool = True,
    path: str | None = None,
    **kwargs,
) -> Dataitem:
    """
    Create a new object instance.

    Parameters
    ----------
    project : str
        Project name.
    name : str
        Object name.
    kind : str
        Kind the object.
    uuid : str
        ID of the object (UUID4).
    description : str
        Description of the object (human readable).
    git_source : str
        Remote git source for object.
    labels : list[str]
        List of labels.
    embedded : bool
        Flag to determine if object must be embedded in project.
    path : str
        Object path on local file system or remote storage.
        If not provided, it's generated.
    **kwargs : dict
        Spec keyword arguments.

    Returns
    -------
    Dataitem
        Object instance.
    """
    name = build_name(name)
    uuid = build_uuid(uuid)
    if path is None:
        bucket = get_s3_bucket()
        entity_type = EntityTypes.DATAITEMS.value
        path = f"s3://{bucket}/{project}/{entity_type}/{name}/{uuid}/data.parquet"
    metadata = build_metadata(
        kind,
        project=project,
        name=name,
        version=uuid,
        description=description,
        source=git_source,
        labels=labels,
        embedded=embedded,
    )
    spec = build_spec(
        kind,
        path=path,
        **kwargs,
    )
    status = build_status(kind)
    cls = _choose_dataitem_type(kind)
    return cls(
        project=project,
        name=name,
        uuid=uuid,
        kind=kind,
        metadata=metadata,
        spec=spec,
        status=status,
    )