Skip to content

CRUD

delete_dataitem(identifier, project=None, entity_id=None, delete_all_versions=False, **kwargs)

Delete object from backend.

Parameters:

Name Type Description Default
identifier str

Entity key (store://...) or entity name.

required
project str

Project name.

None
entity_id str

Entity ID.

None
delete_all_versions bool

Delete all versions of the named entity. If True, use entity name instead of entity key as identifier.

False
**kwargs dict

Parameters to pass to the API call.

{}

Returns:

Type Description
dict

Response from backend.

Examples:

If delete_all_versions is False:

>>> obj = delete_dataitem("store://my-dataitem-key")

Otherwise:

>>> obj = delete_dataitem("my-dataitem-name",
>>>                       project="my-project",
>>>                       delete_all_versions=True)
Source code in digitalhub_data/entities/dataitem/crud.py
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
def delete_dataitem(
    identifier: str,
    project: str | None = None,
    entity_id: str | None = None,
    delete_all_versions: bool = False,
    **kwargs,
) -> dict:
    """
    Delete object from backend.

    Parameters
    ----------
    identifier : str
        Entity key (store://...) or entity name.
    project : str
        Project name.
    entity_id : str
        Entity ID.
    delete_all_versions : bool
        Delete all versions of the named entity. If True, use entity name instead of entity key as identifier.
    **kwargs : dict
        Parameters to pass to the API call.

    Returns
    -------
    dict
        Response from backend.

    Examples
    --------
    If delete_all_versions is False:
    >>> obj = delete_dataitem("store://my-dataitem-key")

    Otherwise:
    >>> obj = delete_dataitem("my-dataitem-name",
    >>>                       project="my-project",
    >>>                       delete_all_versions=True)
    """
    return delete_entity_api_ctx(
        identifier=identifier,
        entity_type=ENTITY_TYPE,
        project=project,
        entity_id=entity_id,
        delete_all_versions=delete_all_versions,
        **kwargs,
    )

get_dataitem(identifier, project=None, entity_id=None, **kwargs)

Get object from backend.

Parameters:

Name Type Description Default
identifier str

Entity key (store://...) or entity name.

required
project str

Project name.

None
entity_id str

Entity ID.

None
**kwargs dict

Parameters to pass to the API call.

{}

Returns:

Type Description
Dataitem

Object instance.

Examples:

Using entity key:

>>> obj = get_dataitem("store://my-dataitem-key")

Using entity name:

>>> obj = get_dataitem("my-dataitem-name"
>>>                    project="my-project",
>>>                    entity_id="my-dataitem-id")
Source code in digitalhub_data/entities/dataitem/crud.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def get_dataitem(
    identifier: str,
    project: str | None = None,
    entity_id: str | None = None,
    **kwargs,
) -> Dataitem:
    """
    Get object from backend.

    Parameters
    ----------
    identifier : str
        Entity key (store://...) or entity name.
    project : str
        Project name.
    entity_id : str
        Entity ID.
    **kwargs : dict
        Parameters to pass to the API call.

    Returns
    -------
    Dataitem
        Object instance.

    Examples
    --------
    Using entity key:
    >>> obj = get_dataitem("store://my-dataitem-key")

    Using entity name:
    >>> obj = get_dataitem("my-dataitem-name"
    >>>                    project="my-project",
    >>>                    entity_id="my-dataitem-id")
    """
    obj = read_entity_api_ctx(
        identifier,
        ENTITY_TYPE,
        project=project,
        entity_id=entity_id,
        **kwargs,
    )
    entity = dataitem_from_dict(obj)
    entity._get_files_info()
    return entity

get_dataitem_versions(identifier, project=None, **kwargs)

Get object versions from backend.

Parameters:

Name Type Description Default
identifier str

Entity key (store://...) or entity name.

required
project str

Project name.

None
**kwargs dict

Parameters to pass to the API call.

{}

Returns:

Type Description
list[Dataitem]

List of object instances.

Examples:

Using entity key:

>>> objs = get_dataitem_versions("store://my-dataitem-key")

Using entity name:

>>> objs = get_dataitem_versions("my-dataitem-name",
>>>                              project="my-project")
Source code in digitalhub_data/entities/dataitem/crud.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
def get_dataitem_versions(
    identifier: str,
    project: str | None = None,
    **kwargs,
) -> list[Dataitem]:
    """
    Get object versions from backend.

    Parameters
    ----------
    identifier : str
        Entity key (store://...) or entity name.
    project : str
        Project name.
    **kwargs : dict
        Parameters to pass to the API call.

    Returns
    -------
    list[Dataitem]
        List of object instances.

    Examples
    --------
    Using entity key:
    >>> objs = get_dataitem_versions("store://my-dataitem-key")

    Using entity name:
    >>> objs = get_dataitem_versions("my-dataitem-name",
    >>>                              project="my-project")
    """
    objs = read_entity_api_ctx_versions(
        identifier,
        entity_type=ENTITY_TYPE,
        project=project,
        **kwargs,
    )
    objects = []
    for o in objs:
        entity = dataitem_from_dict(o)
        entity._get_files_info()
        objects.append(entity)
    return objects

import_dataitem(file)

Import object from a YAML file.

Parameters:

Name Type Description Default
file str

Path to YAML file.

required

Returns:

Type Description
Dataitem

Object instance.

Examples:

>>> obj = import_dataitem("my-dataitem.yaml")
Source code in digitalhub_data/entities/dataitem/crud.py
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def import_dataitem(file: str) -> Dataitem:
    """
    Import object from a YAML file.

    Parameters
    ----------
    file : str
        Path to YAML file.

    Returns
    -------
    Dataitem
        Object instance.

    Examples
    --------
    >>> obj = import_dataitem("my-dataitem.yaml")
    """
    obj: dict = read_yaml(file)
    return dataitem_from_dict(obj)

list_dataitems(project, **kwargs)

List all latest version objects from backend.

Parameters:

Name Type Description Default
project str

Project name.

required
**kwargs dict

Parameters to pass to the API call.

{}

Returns:

Type Description
list[Dataitem]

List of object instances.

Examples:

>>> objs = list_dataitems(project="my-project")
Source code in digitalhub_data/entities/dataitem/crud.py
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
def list_dataitems(project: str, **kwargs) -> list[Dataitem]:
    """
    List all latest version objects from backend.

    Parameters
    ----------
    project : str
        Project name.
    **kwargs : dict
        Parameters to pass to the API call.

    Returns
    -------
    list[Dataitem]
        List of object instances.

    Examples
    --------
    >>> objs = list_dataitems(project="my-project")
    """
    objs = list_entity_api_ctx(
        project=project,
        entity_type=ENTITY_TYPE,
        **kwargs,
    )
    objects = []
    for o in objs:
        entity = dataitem_from_dict(o)
        entity._get_files_info()
        objects.append(entity)
    return objects

log_dataitem(project, name, kind, source=None, data=None, extension=None, path=None, **kwargs)

Log a dataitem to the project.

Parameters:

Name Type Description Default
project str

Project name.

required
name str

Object name.

required
kind str

Kind the object.

required
source str

Dataitem location on local path.

None
data Any

Dataframe to log. Alternative to source.

None
extension str

Extension of the output dataframe.

None
path str

Destination path of the dataitem. If not provided, it's generated.

None
**kwargs dict

New dataitem spec parameters.

{}

Returns:

Type Description
Dataitem

Object instance.

Examples:

>>> obj = log_dataitem(project="my-project",
>>>                    name="my-dataitem",
>>>                    kind="table",
>>>                    data=df)
Source code in digitalhub_data/entities/dataitem/crud.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def log_dataitem(
    project: str,
    name: str,
    kind: str,
    source: list[str] | str | None = None,
    data: Any | None = None,
    extension: str | None = None,
    path: str | None = None,
    **kwargs,
) -> Dataitem:
    """
    Log a dataitem to the project.

    Parameters
    ----------
    project : str
        Project name.
    name : str
        Object name.
    kind : str
        Kind the object.
    source : str
        Dataitem location on local path.
    data : Any
        Dataframe to log. Alternative to source.
    extension : str
        Extension of the output dataframe.
    path : str
        Destination path of the dataitem. If not provided, it's generated.
    **kwargs : dict
        New dataitem spec parameters.

    Returns
    -------
    Dataitem
        Object instance.

    Examples
    --------
    >>> obj = log_dataitem(project="my-project",
    >>>                    name="my-dataitem",
    >>>                    kind="table",
    >>>                    data=df)
    """
    if (source is None) == (data is None):
        raise ValueError("You must provide source or data.")

    # Case where source is provided
    if source is not None:
        eval_local_source(source)

        if path is None:
            uuid = build_uuid()
            kwargs["uuid"] = uuid
            path = build_log_path_from_source(project, ENTITY_TYPE, name, uuid, source)

        obj = new_dataitem(project=project, name=name, kind=kind, path=path, **kwargs)
        obj.upload(source)

    # Case where data is provided
    else:
        if path is None:
            uuid = build_uuid()
            kwargs["uuid"] = uuid
            path = build_log_path_from_filename(project, ENTITY_TYPE, name, uuid, "data.parquet")

        obj = dataitem_from_parameters(project=project, name=name, kind=kind, path=path, **kwargs)
        if kind == "table":
            dst = obj.write_df(df=data, extension=extension)
            reader = get_reader_by_object(data)
            obj.spec.schema = reader.get_schema(data)
            obj.status.preview = reader.get_preview(data)
            store = get_store(obj.spec.path)
            src = Path(urlparse(obj.spec.path).path).name
            paths = [(dst, src)]
            infos = store.get_file_info(paths)
            obj.status.add_files_info(infos)
        obj.save()

    return obj

new_dataitem(project, name, kind, uuid=None, description=None, labels=None, embedded=True, path=None, **kwargs)

Create a new object.

Parameters:

Name Type Description Default
project str

Project name.

required
name str

Object name.

required
kind str

Kind the object.

required
uuid str

ID of the object (UUID4, e.g. 40f25c4b-d26b-4221-b048-9527aff291e2).

None
description str

Description of the object (human readable).

None
labels list[str]

List of labels.

None
embedded bool

Flag to determine if object spec must be embedded in project spec.

True
path str

Object path on local file system or remote storage. It is also the destination path of upload() method.

None
**kwargs dict

Spec keyword arguments.

{}

Returns:

Type Description
Dataitem

Object instance.

Examples:

>>> obj = new_dataitem(project="my-project",
>>>                    name="my-dataitem",
>>>                    kind="dataitem",
>>>                    path="s3://my-bucket/my-key")
Source code in digitalhub_data/entities/dataitem/crud.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def new_dataitem(
    project: str,
    name: str,
    kind: str,
    uuid: str | None = None,
    description: str | None = None,
    labels: list[str] | None = None,
    embedded: bool = True,
    path: str | None = None,
    **kwargs,
) -> Dataitem:
    """
    Create a new object.

    Parameters
    ----------
    project : str
        Project name.
    name : str
        Object name.
    kind : str
        Kind the object.
    uuid : str
        ID of the object (UUID4, e.g. 40f25c4b-d26b-4221-b048-9527aff291e2).
    description : str
        Description of the object (human readable).
    labels : list[str]
        List of labels.
    embedded : bool
        Flag to determine if object spec must be embedded in project spec.
    path : str
        Object path on local file system or remote storage. It is also the destination path of upload() method.
    **kwargs : dict
        Spec keyword arguments.

    Returns
    -------
    Dataitem
        Object instance.

    Examples
    --------
    >>> obj = new_dataitem(project="my-project",
    >>>                    name="my-dataitem",
    >>>                    kind="dataitem",
    >>>                    path="s3://my-bucket/my-key")
    """
    check_context(project)
    obj = dataitem_from_parameters(
        project=project,
        name=name,
        kind=kind,
        uuid=uuid,
        description=description,
        labels=labels,
        embedded=embedded,
        path=path,
        **kwargs,
    )
    obj.save()
    return obj

update_dataitem(entity)

Update object. Note that object spec are immutable.

Parameters:

Name Type Description Default
entity Dataitem

Object to update.

required

Returns:

Type Description
Dataitem

Entity updated.

Examples:

>>> obj = update_dataitem(obj)
Source code in digitalhub_data/entities/dataitem/crud.py
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
def update_dataitem(entity: Dataitem) -> Dataitem:
    """
    Update object. Note that object spec are immutable.

    Parameters
    ----------
    entity : Dataitem
        Object to update.

    Returns
    -------
    Dataitem
        Entity updated.

    Examples
    --------
    >>> obj = update_dataitem(obj)
    """
    return entity.save(update=True)