Skip to content

API Reference

Home / icechunk-python / reference

IcechunkStore

Bases: Store, SyncMixin

Source code in icechunk/__init__.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
class IcechunkStore(Store, SyncMixin):
    _store: PyIcechunkStore

    @classmethod
    async def open(cls, *args: Any, **kwargs: Any) -> Self:
        """This method is called by zarr-python, it's not intended for users.

        Use one of `IcechunkStore.open_existing`, `IcechunkStore.create` or `IcechunkStore.open_or_create` instead.
        """
        return cls.open_or_create(*args, **kwargs)

    @classmethod
    def open_or_create(cls, *args: Any, **kwargs: Any) -> Self:
        if "mode" in kwargs:
            mode = kwargs.pop("mode")
        else:
            mode = "r"

        if "storage" in kwargs:
            storage = kwargs.pop("storage")
        else:
            raise ValueError(
                "Storage configuration is required. Pass a Storage object to construct an IcechunkStore"
            )

        store = None
        match mode:
            case "r" | "r+":
                store = cls.open_existing(storage, mode, *args, **kwargs)
            case "a":
                if pyicechunk_store_exists(storage):
                    store = cls.open_existing(storage, mode, *args, **kwargs)
                else:
                    store = cls.create(storage, mode, *args, **kwargs)
            case "w":
                if pyicechunk_store_exists(storage):
                    store = cls.open_existing(storage, mode, *args, **kwargs)
                    store.sync_clear()
                else:
                    store = cls.create(storage, mode, *args, **kwargs)
            case "w-":
                if pyicechunk_store_exists(storage):
                    raise ValueError("""Zarr store already exists, open using mode "w" or "r+""""")
                else:
                    store = cls.create(storage, mode, *args, **kwargs)

        assert(store)
        # We dont want to call _open() because icechunk handles the opening, etc.
        # if we have gotten this far we can mark it as open
        store._is_open = True

        return store


    def __init__(
        self,
        store: PyIcechunkStore,
        mode: AccessModeLiteral = "r",
        *args: Any,
        **kwargs: Any,
    ):
        """Create a new IcechunkStore.

        This should not be called directly, instead use the `create`, `open_existing` or `open_or_create` class methods.
        """
        super().__init__(*args, mode=mode, **kwargs)
        if store is None:
            raise ValueError(
                "An IcechunkStore should not be created with the default constructor, instead use either the create or open_existing class methods."
            )
        self._store = store

    @classmethod
    def open_existing(
        cls,
        storage: StorageConfig,
        mode: AccessModeLiteral = "r",
        config: StoreConfig | None = None,
        *args: Any,
        **kwargs: Any,
    ) -> Self:
        """Open an existing IcechunkStore from the given storage.

        If there is not store at the given location, an error will be raised.

        It is recommended to use the cached storage option for better performance. If cached=True,
        this will be configured automatically with the provided storage_config as the underlying
        storage backend.

        If opened with AccessModeLiteral "r", the store will be read-only. Otherwise the store will be writable.
        """
        config = config or StoreConfig()
        read_only = mode == "r"
        # We have delayed checking if the repository exists, to avoid the delay in the happy case
        # So we need to check now if open fails, to provide a nice error message
        try:
            store = pyicechunk_store_open_existing(
                storage, read_only=read_only, config=config
            )
        # TODO: we should have an exception type to catch here, for the case of non-existing repo
        except Exception as e:
            if pyicechunk_store_exists(storage):
                # if the repo exists, this is an actual error we need to raise
                raise e
            else:
                # if the repo doesn't exists, we want to point users to that issue instead
                raise ValueError("No Icechunk repository at the provided location, try opening in create mode or changing the location") from None
        return cls(store=store, mode=mode, args=args, kwargs=kwargs)

    @classmethod
    def create(
        cls,
        storage: StorageConfig,
        mode: AccessModeLiteral = "w",
        config: StoreConfig | None = None,
        *args: Any,
        **kwargs: Any,
    ) -> Self:
        """Create a new IcechunkStore with the given storage configuration.

        If a store already exists at the given location, an error will be raised.
        """
        config = config or StoreConfig()
        store = pyicechunk_store_create(storage, config=config)
        return cls(store=store, mode=mode, args=args, kwargs=kwargs)

    def with_mode(self, mode: AccessModeLiteral) -> Self:
        """
        Return a new store of the same type pointing to the same location with a new mode.

        The returned Store is not automatically opened. Call :meth:`Store.open` before
        using.

        Parameters
        ----------
        mode: AccessModeLiteral
            The new mode to use.

        Returns
        -------
        store:
            A new store of the same type with the new mode.

        """
        read_only = mode == "r"
        new_store = self._store.with_mode(read_only)
        return self.__class__(new_store, mode=mode)

    def __eq__(self, value: object) -> bool:
        if not isinstance(value, self.__class__):
            return False
        return self._store == value._store

    def __getstate__(self) -> object:
        # we serialize the Rust store as bytes
        d = self.__dict__.copy()
        d["_store"] = self._store.as_bytes()
        return d

    def __setstate__(self, state: Any) -> None:
        # we have to deserialize the bytes of the Rust store
        mode = state["_mode"]
        is_read_only = mode.readonly
        store_repr = state["_store"]
        state["_store"] = pyicechunk_store_from_bytes(store_repr, is_read_only)
        self.__dict__ = state

    @property
    def snapshot_id(self) -> str:
        """Return the current snapshot id."""
        return self._store.snapshot_id

    def change_set_bytes(self) -> bytes:
        """Get the complete list of changes applied in this session, serialized to bytes.

        This method is useful in combination with `IcechunkStore.distributed_commit`. When a
        write session is too large to execute in a single machine, it could be useful to
        distribute it across multiple workers. Each worker can write their changes independently
        (map) and then a single commit is executed by a coordinator (reduce).

        This methods provides a way to send back to gather a "description" of the
        changes applied by a worker. Resulting bytes, together with the `change_set_bytes` of
        other workers, can be fed to `distributed_commit`.

        This API is subject to change, it will be replaced by a merge operation at the Store level.
        """
        return self._store.change_set_bytes()

    @property
    def branch(self) -> str | None:
        """Return the current branch name."""
        return self._store.branch

    def checkout(
        self,
        snapshot_id: str | None = None,
        branch: str | None = None,
        tag: str | None = None,
    ) -> None:
        """Checkout a branch, tag, or specific snapshot.

        If a branch is checked out, any following `commit` attempts will update that branch
        reference if successful. If a tag or snapshot_id are checked out, the repository
        won't allow commits.
        """
        if snapshot_id is not None:
            if branch is not None or tag is not None:
                raise ValueError(
                    "only one of snapshot_id, branch, or tag may be specified"
                )
            return self._store.checkout_snapshot(snapshot_id)
        if branch is not None:
            if tag is not None:
                raise ValueError(
                    "only one of snapshot_id, branch, or tag may be specified"
                )
            return self._store.checkout_branch(branch)
        if tag is not None:
            return self._store.checkout_tag(tag)

        raise ValueError("a snapshot_id, branch, or tag must be specified")

    async def async_checkout(
        self,
        snapshot_id: str | None = None,
        branch: str | None = None,
        tag: str | None = None,
    ) -> None:
        """Checkout a branch, tag, or specific snapshot.

        If a branch is checked out, any following `commit` attempts will update that branch
        reference if successful. If a tag or snapshot_id are checked out, the repository
        won't allow commits.
        """
        if snapshot_id is not None:
            if branch is not None or tag is not None:
                raise ValueError(
                    "only one of snapshot_id, branch, or tag may be specified"
                )
            return await self._store.async_checkout_snapshot(snapshot_id)
        if branch is not None:
            if tag is not None:
                raise ValueError(
                    "only one of snapshot_id, branch, or tag may be specified"
                )
            return await self._store.async_checkout_branch(branch)
        if tag is not None:
            return await self._store.async_checkout_tag(tag)

        raise ValueError("a snapshot_id, branch, or tag must be specified")

    def commit(self, message: str) -> str:
        """Commit any uncommitted changes to the store.

        This will create a new snapshot on the current branch and return
        the new snapshot id.

        This method will fail if:

        * there is no currently checked out branch
        * some other writer updated the current branch since the repository was checked out
        """
        return self._store.commit(message)

    async def async_commit(self, message: str) -> str:
        """Commit any uncommitted changes to the store.

        This will create a new snapshot on the current branch and return
        the new snapshot id.

        This method will fail if:

        * there is no currently checked out branch
        * some other writer updated the current branch since the repository was checked out
        """
        return await self._store.async_commit(message)

    def distributed_commit(
        self, message: str, other_change_set_bytes: list[bytes]
    ) -> str:
        """Commit any uncommitted changes to the store with a set of distributed changes.

        This will create a new snapshot on the current branch and return
        the new snapshot id.

        This method will fail if:

        * there is no currently checked out branch
        * some other writer updated the current branch since the repository was checked out

        other_change_set_bytes must be generated as the output of calling `change_set_bytes`
        on other stores. The resulting commit will include changes from all stores.

        The behavior is undefined if the stores applied conflicting changes.
        """
        return self._store.distributed_commit(message, other_change_set_bytes)

    async def async_distributed_commit(
        self, message: str, other_change_set_bytes: list[bytes]
    ) -> str:
        """Commit any uncommitted changes to the store with a set of distributed changes.

        This will create a new snapshot on the current branch and return
        the new snapshot id.

        This method will fail if:

        * there is no currently checked out branch
        * some other writer updated the current branch since the repository was checked out

        other_change_set_bytes must be generated as the output of calling `change_set_bytes`
        on other stores. The resulting commit will include changes from all stores.

        The behavior is undefined if the stores applied conflicting changes.
        """
        return await self._store.async_distributed_commit(message, other_change_set_bytes)

    @property
    def has_uncommitted_changes(self) -> bool:
        """Return True if there are uncommitted changes to the store"""
        return self._store.has_uncommitted_changes

    async def async_reset(self) -> None:
        """Discard any uncommitted changes and reset to the previous snapshot state."""
        return await self._store.async_reset()

    def reset(self) -> None:
        """Discard any uncommitted changes and reset to the previous snapshot state."""
        return self._store.reset()

    async def async_new_branch(self, branch_name: str) -> str:
        """Create a new branch pointing to the current checked out snapshot.

        This requires having no uncommitted changes.
        """
        return await self._store.async_new_branch(branch_name)

    def new_branch(self, branch_name: str) -> str:
        """Create a new branch pointing to the current checked out snapshot.

        This requires having no uncommitted changes.
        """
        return self._store.new_branch(branch_name)

    def tag(self, tag_name: str, snapshot_id: str) -> None:
        """Create a tag pointing to the current checked out snapshot."""
        return self._store.tag(tag_name, snapshot_id=snapshot_id)

    async def async_tag(self, tag_name: str, snapshot_id: str) -> None:
        """Create a tag pointing to the current checked out snapshot."""
        return await self._store.async_tag(tag_name, snapshot_id=snapshot_id)

    def ancestry(self) -> list[SnapshotMetadata]:
        """Get the list of parents of the current version.
        """
        return self._store.ancestry()

    def async_ancestry(self) -> AsyncGenerator[SnapshotMetadata, None]:
        """Get the list of parents of the current version.

        Returns
        -------
        AsyncGenerator[SnapshotMetadata, None]
        """
        return self._store.async_ancestry()

    async def empty(self) -> bool:
        """Check if the store is empty."""
        return await self._store.empty()

    async def clear(self) -> None:
        """Clear the store.

        This will remove all contents from the current session,
        including all groups and all arrays. But it will not modify the repository history.
        """
        return await self._store.clear()

    def sync_clear(self) -> None:
        """Clear the store.

        This will remove all contents from the current session,
        including all groups and all arrays. But it will not modify the repository history.
        """
        return self._store.sync_clear()

    async def get(
        self,
        key: str,
        prototype: BufferPrototype,
        byte_range: tuple[int | None, int | None] | None = None,
    ) -> Buffer | None:
        """Retrieve the value associated with a given key.

        Parameters
        ----------
        key : str
        byte_range : tuple[int, Optional[int]], optional

        Returns
        -------
        Buffer
        """

        try:
            result = await self._store.get(key, byte_range)
        except KeyNotFound as _e:
            # Zarr python expects None to be returned if the key does not exist
            # but an IcechunkStore returns an error if the key does not exist
            return None

        return prototype.buffer.from_bytes(result)

    async def get_partial_values(
        self,
        prototype: BufferPrototype,
        key_ranges: Iterable[tuple[str, ByteRangeRequest]],
    ) -> list[Buffer | None]:
        """Retrieve possibly partial values from given key_ranges.

        Parameters
        ----------
        key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]]
            Ordered set of key, range pairs, a key may occur multiple times with different ranges

        Returns
        -------
        list of values, in the order of the key_ranges, may contain null/none for missing keys
        """
        # NOTE: pyo3 has not implicit conversion from an Iterable to a rust iterable. So we convert it
        # to a list here first. Possible opportunity for optimization.
        result = await self._store.get_partial_values(list(key_ranges))
        return [prototype.buffer.from_bytes(r) for r in result]

    async def exists(self, key: str) -> bool:
        """Check if a key exists in the store.

        Parameters
        ----------
        key : str

        Returns
        -------
        bool
        """
        return await self._store.exists(key)

    @property
    def supports_writes(self) -> bool:
        """Does the store support writes?"""
        return self._store.supports_writes

    async def set(self, key: str, value: Buffer) -> None:
        """Store a (key, value) pair.

        Parameters
        ----------
        key : str
        value : Buffer
        """
        return await self._store.set(key, value.to_bytes())

    async def set_if_not_exists(self, key: str, value: Buffer) -> None:
        """
        Store a key to ``value`` if the key is not already present.

        Parameters
        -----------
        key : str
        value : Buffer
        """
        return await self._store.set_if_not_exists(key, value.to_bytes())

    async def async_set_virtual_ref(
        self, key: str, location: str, *, offset: int, length: int
    ) -> None:
        """Store a virtual reference to a chunk.

        Parameters
        ----------
        key : str
            The chunk to store the reference under. This is the fully qualified zarr key eg: 'array/c/0/0/0'
        location : str
            The location of the chunk in storage. This is absolute path to the chunk in storage eg: 's3://bucket/path/to/file.nc'
        offset : int
            The offset in bytes from the start of the file location in storage the chunk starts at
        length : int
            The length of the chunk in bytes, measured from the given offset
        """
        return await self._store.async_set_virtual_ref(key, location, offset, length)

    def set_virtual_ref(
        self, key: str, location: str, *, offset: int, length: int
    ) -> None:
        """Store a virtual reference to a chunk.

        Parameters
        ----------
        key : str
            The chunk to store the reference under. This is the fully qualified zarr key eg: 'array/c/0/0/0'
        location : str
            The location of the chunk in storage. This is absolute path to the chunk in storage eg: 's3://bucket/path/to/file.nc'
        offset : int
            The offset in bytes from the start of the file location in storage the chunk starts at
        length : int
            The length of the chunk in bytes, measured from the given offset
        """
        return self._store.set_virtual_ref(key, location, offset, length)

    async def delete(self, key: str) -> None:
        """Remove a key from the store

        Parameters
        ----------
        key : strz
        """
        return await self._store.delete(key)

    @property
    def supports_partial_writes(self) -> bool:
        """Does the store support partial writes?"""
        return self._store.supports_partial_writes

    async def set_partial_values(
        self, key_start_values: Iterable[tuple[str, int, BytesLike]]
    ) -> None:
        """Store values at a given key, starting at byte range_start.

        Parameters
        ----------
        key_start_values : list[tuple[str, int, BytesLike]]
            set of key, range_start, values triples, a key may occur multiple times with different
            range_starts, range_starts (considering the length of the respective values) must not
            specify overlapping ranges for the same key
        """
        # NOTE: pyo3 does not implicit conversion from an Iterable to a rust iterable. So we convert it
        # to a list here first. Possible opportunity for optimization.
        return await self._store.set_partial_values(list(key_start_values))

    @property
    def supports_listing(self) -> bool:
        """Does the store support listing?"""
        return self._store.supports_listing

    @property
    def supports_deletes(self) -> bool:
        return self._store.supports_deletes

    def list(self) -> AsyncGenerator[str, None]:
        """Retrieve all keys in the store.

        Returns
        -------
        AsyncGenerator[str, None]
        """
        # The zarr spec specefies that that this and other
        # listing methods should not be async, so we need to
        # wrap the async method in a sync method.
        return self._store.list()

    def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
        """Retrieve all keys in the store with a given prefix.

        Parameters
        ----------
        prefix : str

        Returns
        -------
        AsyncGenerator[str, None]
        """
        # The zarr spec specefies that that this and other
        # listing methods should not be async, so we need to
        # wrap the async method in a sync method.
        return self._store.list_prefix(prefix)

    def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
        """
        Retrieve all keys and prefixes with a given prefix and which do not contain the character
        “/” after the given prefix.

        Parameters
        ----------
        prefix : str

        Returns
        -------
        AsyncGenerator[str, None]
        """
        # The zarr spec specefies that that this and other
        # listing methods should not be async, so we need to
        # wrap the async method in a sync method.
        return self._store.list_dir(prefix)

branch: str | None property

Return the current branch name.

has_uncommitted_changes: bool property

Return True if there are uncommitted changes to the store

snapshot_id: str property

Return the current snapshot id.

supports_listing: bool property

Does the store support listing?

supports_partial_writes: bool property

Does the store support partial writes?

supports_writes: bool property

Does the store support writes?

__init__(store, mode='r', *args, **kwargs)

Create a new IcechunkStore.

This should not be called directly, instead use the create, open_existing or open_or_create class methods.

Source code in icechunk/__init__.py
def __init__(
    self,
    store: PyIcechunkStore,
    mode: AccessModeLiteral = "r",
    *args: Any,
    **kwargs: Any,
):
    """Create a new IcechunkStore.

    This should not be called directly, instead use the `create`, `open_existing` or `open_or_create` class methods.
    """
    super().__init__(*args, mode=mode, **kwargs)
    if store is None:
        raise ValueError(
            "An IcechunkStore should not be created with the default constructor, instead use either the create or open_existing class methods."
        )
    self._store = store

ancestry()

Get the list of parents of the current version.

Source code in icechunk/__init__.py
def ancestry(self) -> list[SnapshotMetadata]:
    """Get the list of parents of the current version.
    """
    return self._store.ancestry()

async_ancestry()

Get the list of parents of the current version.

Returns

AsyncGenerator[SnapshotMetadata, None]

Source code in icechunk/__init__.py
def async_ancestry(self) -> AsyncGenerator[SnapshotMetadata, None]:
    """Get the list of parents of the current version.

    Returns
    -------
    AsyncGenerator[SnapshotMetadata, None]
    """
    return self._store.async_ancestry()

async_checkout(snapshot_id=None, branch=None, tag=None) async

Checkout a branch, tag, or specific snapshot.

If a branch is checked out, any following commit attempts will update that branch reference if successful. If a tag or snapshot_id are checked out, the repository won't allow commits.

Source code in icechunk/__init__.py
async def async_checkout(
    self,
    snapshot_id: str | None = None,
    branch: str | None = None,
    tag: str | None = None,
) -> None:
    """Checkout a branch, tag, or specific snapshot.

    If a branch is checked out, any following `commit` attempts will update that branch
    reference if successful. If a tag or snapshot_id are checked out, the repository
    won't allow commits.
    """
    if snapshot_id is not None:
        if branch is not None or tag is not None:
            raise ValueError(
                "only one of snapshot_id, branch, or tag may be specified"
            )
        return await self._store.async_checkout_snapshot(snapshot_id)
    if branch is not None:
        if tag is not None:
            raise ValueError(
                "only one of snapshot_id, branch, or tag may be specified"
            )
        return await self._store.async_checkout_branch(branch)
    if tag is not None:
        return await self._store.async_checkout_tag(tag)

    raise ValueError("a snapshot_id, branch, or tag must be specified")

async_commit(message) async

Commit any uncommitted changes to the store.

This will create a new snapshot on the current branch and return the new snapshot id.

This method will fail if:

  • there is no currently checked out branch
  • some other writer updated the current branch since the repository was checked out
Source code in icechunk/__init__.py
async def async_commit(self, message: str) -> str:
    """Commit any uncommitted changes to the store.

    This will create a new snapshot on the current branch and return
    the new snapshot id.

    This method will fail if:

    * there is no currently checked out branch
    * some other writer updated the current branch since the repository was checked out
    """
    return await self._store.async_commit(message)

async_distributed_commit(message, other_change_set_bytes) async

Commit any uncommitted changes to the store with a set of distributed changes.

This will create a new snapshot on the current branch and return the new snapshot id.

This method will fail if:

  • there is no currently checked out branch
  • some other writer updated the current branch since the repository was checked out

other_change_set_bytes must be generated as the output of calling change_set_bytes on other stores. The resulting commit will include changes from all stores.

The behavior is undefined if the stores applied conflicting changes.

Source code in icechunk/__init__.py
async def async_distributed_commit(
    self, message: str, other_change_set_bytes: list[bytes]
) -> str:
    """Commit any uncommitted changes to the store with a set of distributed changes.

    This will create a new snapshot on the current branch and return
    the new snapshot id.

    This method will fail if:

    * there is no currently checked out branch
    * some other writer updated the current branch since the repository was checked out

    other_change_set_bytes must be generated as the output of calling `change_set_bytes`
    on other stores. The resulting commit will include changes from all stores.

    The behavior is undefined if the stores applied conflicting changes.
    """
    return await self._store.async_distributed_commit(message, other_change_set_bytes)

async_new_branch(branch_name) async

Create a new branch pointing to the current checked out snapshot.

This requires having no uncommitted changes.

Source code in icechunk/__init__.py
async def async_new_branch(self, branch_name: str) -> str:
    """Create a new branch pointing to the current checked out snapshot.

    This requires having no uncommitted changes.
    """
    return await self._store.async_new_branch(branch_name)

async_reset() async

Discard any uncommitted changes and reset to the previous snapshot state.

Source code in icechunk/__init__.py
async def async_reset(self) -> None:
    """Discard any uncommitted changes and reset to the previous snapshot state."""
    return await self._store.async_reset()

async_set_virtual_ref(key, location, *, offset, length) async

Store a virtual reference to a chunk.

Parameters

key : str The chunk to store the reference under. This is the fully qualified zarr key eg: 'array/c/0/0/0' location : str The location of the chunk in storage. This is absolute path to the chunk in storage eg: 's3://bucket/path/to/file.nc' offset : int The offset in bytes from the start of the file location in storage the chunk starts at length : int The length of the chunk in bytes, measured from the given offset

Source code in icechunk/__init__.py
async def async_set_virtual_ref(
    self, key: str, location: str, *, offset: int, length: int
) -> None:
    """Store a virtual reference to a chunk.

    Parameters
    ----------
    key : str
        The chunk to store the reference under. This is the fully qualified zarr key eg: 'array/c/0/0/0'
    location : str
        The location of the chunk in storage. This is absolute path to the chunk in storage eg: 's3://bucket/path/to/file.nc'
    offset : int
        The offset in bytes from the start of the file location in storage the chunk starts at
    length : int
        The length of the chunk in bytes, measured from the given offset
    """
    return await self._store.async_set_virtual_ref(key, location, offset, length)

async_tag(tag_name, snapshot_id) async

Create a tag pointing to the current checked out snapshot.

Source code in icechunk/__init__.py
async def async_tag(self, tag_name: str, snapshot_id: str) -> None:
    """Create a tag pointing to the current checked out snapshot."""
    return await self._store.async_tag(tag_name, snapshot_id=snapshot_id)

change_set_bytes()

Get the complete list of changes applied in this session, serialized to bytes.

This method is useful in combination with IcechunkStore.distributed_commit. When a write session is too large to execute in a single machine, it could be useful to distribute it across multiple workers. Each worker can write their changes independently (map) and then a single commit is executed by a coordinator (reduce).

This methods provides a way to send back to gather a "description" of the changes applied by a worker. Resulting bytes, together with the change_set_bytes of other workers, can be fed to distributed_commit.

This API is subject to change, it will be replaced by a merge operation at the Store level.

Source code in icechunk/__init__.py
def change_set_bytes(self) -> bytes:
    """Get the complete list of changes applied in this session, serialized to bytes.

    This method is useful in combination with `IcechunkStore.distributed_commit`. When a
    write session is too large to execute in a single machine, it could be useful to
    distribute it across multiple workers. Each worker can write their changes independently
    (map) and then a single commit is executed by a coordinator (reduce).

    This methods provides a way to send back to gather a "description" of the
    changes applied by a worker. Resulting bytes, together with the `change_set_bytes` of
    other workers, can be fed to `distributed_commit`.

    This API is subject to change, it will be replaced by a merge operation at the Store level.
    """
    return self._store.change_set_bytes()

checkout(snapshot_id=None, branch=None, tag=None)

Checkout a branch, tag, or specific snapshot.

If a branch is checked out, any following commit attempts will update that branch reference if successful. If a tag or snapshot_id are checked out, the repository won't allow commits.

Source code in icechunk/__init__.py
def checkout(
    self,
    snapshot_id: str | None = None,
    branch: str | None = None,
    tag: str | None = None,
) -> None:
    """Checkout a branch, tag, or specific snapshot.

    If a branch is checked out, any following `commit` attempts will update that branch
    reference if successful. If a tag or snapshot_id are checked out, the repository
    won't allow commits.
    """
    if snapshot_id is not None:
        if branch is not None or tag is not None:
            raise ValueError(
                "only one of snapshot_id, branch, or tag may be specified"
            )
        return self._store.checkout_snapshot(snapshot_id)
    if branch is not None:
        if tag is not None:
            raise ValueError(
                "only one of snapshot_id, branch, or tag may be specified"
            )
        return self._store.checkout_branch(branch)
    if tag is not None:
        return self._store.checkout_tag(tag)

    raise ValueError("a snapshot_id, branch, or tag must be specified")

clear() async

Clear the store.

This will remove all contents from the current session, including all groups and all arrays. But it will not modify the repository history.

Source code in icechunk/__init__.py
async def clear(self) -> None:
    """Clear the store.

    This will remove all contents from the current session,
    including all groups and all arrays. But it will not modify the repository history.
    """
    return await self._store.clear()

commit(message)

Commit any uncommitted changes to the store.

This will create a new snapshot on the current branch and return the new snapshot id.

This method will fail if:

  • there is no currently checked out branch
  • some other writer updated the current branch since the repository was checked out
Source code in icechunk/__init__.py
def commit(self, message: str) -> str:
    """Commit any uncommitted changes to the store.

    This will create a new snapshot on the current branch and return
    the new snapshot id.

    This method will fail if:

    * there is no currently checked out branch
    * some other writer updated the current branch since the repository was checked out
    """
    return self._store.commit(message)

create(storage, mode='w', config=None, *args, **kwargs) classmethod

Create a new IcechunkStore with the given storage configuration.

If a store already exists at the given location, an error will be raised.

Source code in icechunk/__init__.py
@classmethod
def create(
    cls,
    storage: StorageConfig,
    mode: AccessModeLiteral = "w",
    config: StoreConfig | None = None,
    *args: Any,
    **kwargs: Any,
) -> Self:
    """Create a new IcechunkStore with the given storage configuration.

    If a store already exists at the given location, an error will be raised.
    """
    config = config or StoreConfig()
    store = pyicechunk_store_create(storage, config=config)
    return cls(store=store, mode=mode, args=args, kwargs=kwargs)

delete(key) async

Remove a key from the store

Parameters

key : strz

Source code in icechunk/__init__.py
async def delete(self, key: str) -> None:
    """Remove a key from the store

    Parameters
    ----------
    key : strz
    """
    return await self._store.delete(key)

distributed_commit(message, other_change_set_bytes)

Commit any uncommitted changes to the store with a set of distributed changes.

This will create a new snapshot on the current branch and return the new snapshot id.

This method will fail if:

  • there is no currently checked out branch
  • some other writer updated the current branch since the repository was checked out

other_change_set_bytes must be generated as the output of calling change_set_bytes on other stores. The resulting commit will include changes from all stores.

The behavior is undefined if the stores applied conflicting changes.

Source code in icechunk/__init__.py
def distributed_commit(
    self, message: str, other_change_set_bytes: list[bytes]
) -> str:
    """Commit any uncommitted changes to the store with a set of distributed changes.

    This will create a new snapshot on the current branch and return
    the new snapshot id.

    This method will fail if:

    * there is no currently checked out branch
    * some other writer updated the current branch since the repository was checked out

    other_change_set_bytes must be generated as the output of calling `change_set_bytes`
    on other stores. The resulting commit will include changes from all stores.

    The behavior is undefined if the stores applied conflicting changes.
    """
    return self._store.distributed_commit(message, other_change_set_bytes)

empty() async

Check if the store is empty.

Source code in icechunk/__init__.py
async def empty(self) -> bool:
    """Check if the store is empty."""
    return await self._store.empty()

exists(key) async

Check if a key exists in the store.

Parameters

key : str

Returns

bool

Source code in icechunk/__init__.py
async def exists(self, key: str) -> bool:
    """Check if a key exists in the store.

    Parameters
    ----------
    key : str

    Returns
    -------
    bool
    """
    return await self._store.exists(key)

get(key, prototype, byte_range=None) async

Retrieve the value associated with a given key.

Parameters

key : str byte_range : tuple[int, Optional[int]], optional

Returns

Buffer

Source code in icechunk/__init__.py
async def get(
    self,
    key: str,
    prototype: BufferPrototype,
    byte_range: tuple[int | None, int | None] | None = None,
) -> Buffer | None:
    """Retrieve the value associated with a given key.

    Parameters
    ----------
    key : str
    byte_range : tuple[int, Optional[int]], optional

    Returns
    -------
    Buffer
    """

    try:
        result = await self._store.get(key, byte_range)
    except KeyNotFound as _e:
        # Zarr python expects None to be returned if the key does not exist
        # but an IcechunkStore returns an error if the key does not exist
        return None

    return prototype.buffer.from_bytes(result)

get_partial_values(prototype, key_ranges) async

Retrieve possibly partial values from given key_ranges.

Parameters

key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]] Ordered set of key, range pairs, a key may occur multiple times with different ranges

Returns

list of values, in the order of the key_ranges, may contain null/none for missing keys

Source code in icechunk/__init__.py
async def get_partial_values(
    self,
    prototype: BufferPrototype,
    key_ranges: Iterable[tuple[str, ByteRangeRequest]],
) -> list[Buffer | None]:
    """Retrieve possibly partial values from given key_ranges.

    Parameters
    ----------
    key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]]
        Ordered set of key, range pairs, a key may occur multiple times with different ranges

    Returns
    -------
    list of values, in the order of the key_ranges, may contain null/none for missing keys
    """
    # NOTE: pyo3 has not implicit conversion from an Iterable to a rust iterable. So we convert it
    # to a list here first. Possible opportunity for optimization.
    result = await self._store.get_partial_values(list(key_ranges))
    return [prototype.buffer.from_bytes(r) for r in result]

list()

Retrieve all keys in the store.

Returns

AsyncGenerator[str, None]

Source code in icechunk/__init__.py
def list(self) -> AsyncGenerator[str, None]:
    """Retrieve all keys in the store.

    Returns
    -------
    AsyncGenerator[str, None]
    """
    # The zarr spec specefies that that this and other
    # listing methods should not be async, so we need to
    # wrap the async method in a sync method.
    return self._store.list()

list_dir(prefix)

Retrieve all keys and prefixes with a given prefix and which do not contain the character “/” after the given prefix.

Parameters

prefix : str

Returns

AsyncGenerator[str, None]

Source code in icechunk/__init__.py
def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
    """
    Retrieve all keys and prefixes with a given prefix and which do not contain the character
    “/” after the given prefix.

    Parameters
    ----------
    prefix : str

    Returns
    -------
    AsyncGenerator[str, None]
    """
    # The zarr spec specefies that that this and other
    # listing methods should not be async, so we need to
    # wrap the async method in a sync method.
    return self._store.list_dir(prefix)

list_prefix(prefix)

Retrieve all keys in the store with a given prefix.

Parameters

prefix : str

Returns

AsyncGenerator[str, None]

Source code in icechunk/__init__.py
def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
    """Retrieve all keys in the store with a given prefix.

    Parameters
    ----------
    prefix : str

    Returns
    -------
    AsyncGenerator[str, None]
    """
    # The zarr spec specefies that that this and other
    # listing methods should not be async, so we need to
    # wrap the async method in a sync method.
    return self._store.list_prefix(prefix)

new_branch(branch_name)

Create a new branch pointing to the current checked out snapshot.

This requires having no uncommitted changes.

Source code in icechunk/__init__.py
def new_branch(self, branch_name: str) -> str:
    """Create a new branch pointing to the current checked out snapshot.

    This requires having no uncommitted changes.
    """
    return self._store.new_branch(branch_name)

open(*args, **kwargs) async classmethod

This method is called by zarr-python, it's not intended for users.

Use one of IcechunkStore.open_existing, IcechunkStore.create or IcechunkStore.open_or_create instead.

Source code in icechunk/__init__.py
@classmethod
async def open(cls, *args: Any, **kwargs: Any) -> Self:
    """This method is called by zarr-python, it's not intended for users.

    Use one of `IcechunkStore.open_existing`, `IcechunkStore.create` or `IcechunkStore.open_or_create` instead.
    """
    return cls.open_or_create(*args, **kwargs)

open_existing(storage, mode='r', config=None, *args, **kwargs) classmethod

Open an existing IcechunkStore from the given storage.

If there is not store at the given location, an error will be raised.

It is recommended to use the cached storage option for better performance. If cached=True, this will be configured automatically with the provided storage_config as the underlying storage backend.

If opened with AccessModeLiteral "r", the store will be read-only. Otherwise the store will be writable.

Source code in icechunk/__init__.py
@classmethod
def open_existing(
    cls,
    storage: StorageConfig,
    mode: AccessModeLiteral = "r",
    config: StoreConfig | None = None,
    *args: Any,
    **kwargs: Any,
) -> Self:
    """Open an existing IcechunkStore from the given storage.

    If there is not store at the given location, an error will be raised.

    It is recommended to use the cached storage option for better performance. If cached=True,
    this will be configured automatically with the provided storage_config as the underlying
    storage backend.

    If opened with AccessModeLiteral "r", the store will be read-only. Otherwise the store will be writable.
    """
    config = config or StoreConfig()
    read_only = mode == "r"
    # We have delayed checking if the repository exists, to avoid the delay in the happy case
    # So we need to check now if open fails, to provide a nice error message
    try:
        store = pyicechunk_store_open_existing(
            storage, read_only=read_only, config=config
        )
    # TODO: we should have an exception type to catch here, for the case of non-existing repo
    except Exception as e:
        if pyicechunk_store_exists(storage):
            # if the repo exists, this is an actual error we need to raise
            raise e
        else:
            # if the repo doesn't exists, we want to point users to that issue instead
            raise ValueError("No Icechunk repository at the provided location, try opening in create mode or changing the location") from None
    return cls(store=store, mode=mode, args=args, kwargs=kwargs)

reset()

Discard any uncommitted changes and reset to the previous snapshot state.

Source code in icechunk/__init__.py
def reset(self) -> None:
    """Discard any uncommitted changes and reset to the previous snapshot state."""
    return self._store.reset()

set(key, value) async

Store a (key, value) pair.

Parameters

key : str value : Buffer

Source code in icechunk/__init__.py
async def set(self, key: str, value: Buffer) -> None:
    """Store a (key, value) pair.

    Parameters
    ----------
    key : str
    value : Buffer
    """
    return await self._store.set(key, value.to_bytes())

set_if_not_exists(key, value) async

Store a key to value if the key is not already present.

Parameters

key : str value : Buffer

Source code in icechunk/__init__.py
async def set_if_not_exists(self, key: str, value: Buffer) -> None:
    """
    Store a key to ``value`` if the key is not already present.

    Parameters
    -----------
    key : str
    value : Buffer
    """
    return await self._store.set_if_not_exists(key, value.to_bytes())

set_partial_values(key_start_values) async

Store values at a given key, starting at byte range_start.

Parameters

key_start_values : list[tuple[str, int, BytesLike]] set of key, range_start, values triples, a key may occur multiple times with different range_starts, range_starts (considering the length of the respective values) must not specify overlapping ranges for the same key

Source code in icechunk/__init__.py
async def set_partial_values(
    self, key_start_values: Iterable[tuple[str, int, BytesLike]]
) -> None:
    """Store values at a given key, starting at byte range_start.

    Parameters
    ----------
    key_start_values : list[tuple[str, int, BytesLike]]
        set of key, range_start, values triples, a key may occur multiple times with different
        range_starts, range_starts (considering the length of the respective values) must not
        specify overlapping ranges for the same key
    """
    # NOTE: pyo3 does not implicit conversion from an Iterable to a rust iterable. So we convert it
    # to a list here first. Possible opportunity for optimization.
    return await self._store.set_partial_values(list(key_start_values))

set_virtual_ref(key, location, *, offset, length)

Store a virtual reference to a chunk.

Parameters

key : str The chunk to store the reference under. This is the fully qualified zarr key eg: 'array/c/0/0/0' location : str The location of the chunk in storage. This is absolute path to the chunk in storage eg: 's3://bucket/path/to/file.nc' offset : int The offset in bytes from the start of the file location in storage the chunk starts at length : int The length of the chunk in bytes, measured from the given offset

Source code in icechunk/__init__.py
def set_virtual_ref(
    self, key: str, location: str, *, offset: int, length: int
) -> None:
    """Store a virtual reference to a chunk.

    Parameters
    ----------
    key : str
        The chunk to store the reference under. This is the fully qualified zarr key eg: 'array/c/0/0/0'
    location : str
        The location of the chunk in storage. This is absolute path to the chunk in storage eg: 's3://bucket/path/to/file.nc'
    offset : int
        The offset in bytes from the start of the file location in storage the chunk starts at
    length : int
        The length of the chunk in bytes, measured from the given offset
    """
    return self._store.set_virtual_ref(key, location, offset, length)

sync_clear()

Clear the store.

This will remove all contents from the current session, including all groups and all arrays. But it will not modify the repository history.

Source code in icechunk/__init__.py
def sync_clear(self) -> None:
    """Clear the store.

    This will remove all contents from the current session,
    including all groups and all arrays. But it will not modify the repository history.
    """
    return self._store.sync_clear()

tag(tag_name, snapshot_id)

Create a tag pointing to the current checked out snapshot.

Source code in icechunk/__init__.py
def tag(self, tag_name: str, snapshot_id: str) -> None:
    """Create a tag pointing to the current checked out snapshot."""
    return self._store.tag(tag_name, snapshot_id=snapshot_id)

with_mode(mode)

Return a new store of the same type pointing to the same location with a new mode.

The returned Store is not automatically opened. Call :meth:Store.open before using.

Parameters

mode: AccessModeLiteral The new mode to use.

Returns

store: A new store of the same type with the new mode.

Source code in icechunk/__init__.py
def with_mode(self, mode: AccessModeLiteral) -> Self:
    """
    Return a new store of the same type pointing to the same location with a new mode.

    The returned Store is not automatically opened. Call :meth:`Store.open` before
    using.

    Parameters
    ----------
    mode: AccessModeLiteral
        The new mode to use.

    Returns
    -------
    store:
        A new store of the same type with the new mode.

    """
    read_only = mode == "r"
    new_store = self._store.with_mode(read_only)
    return self.__class__(new_store, mode=mode)

StorageConfig

Storage configuration for an IcechunkStore

Currently supports memory, filesystem, and S3 storage backends. Use the class methods to create a StorageConfig object with the desired backend.

Ex:

storage_config = StorageConfig.memory("prefix")
storage_config = StorageConfig.filesystem("/path/to/root")
storage_config = StorageConfig.s3_from_env("bucket", "prefix")
storage_config = StorageConfig.s3_from_config("bucket", "prefix", ...)

Source code in icechunk/_icechunk_python.pyi
class StorageConfig:
    """Storage configuration for an IcechunkStore

    Currently supports memory, filesystem, and S3 storage backends.
    Use the class methods to create a StorageConfig object with the desired backend.

    Ex:
    ```
    storage_config = StorageConfig.memory("prefix")
    storage_config = StorageConfig.filesystem("/path/to/root")
    storage_config = StorageConfig.s3_from_env("bucket", "prefix")
    storage_config = StorageConfig.s3_from_config("bucket", "prefix", ...)
    ```
    """
    class Memory:
        """Config for an in-memory storage backend"""

        prefix: str

    class Filesystem:
        """Config for a local filesystem storage backend"""

        root: str

    class S3:
        """Config for an S3 Object Storage compatible storage backend"""

        bucket: str
        prefix: str
        credentials: S3Credentials | None
        endpoint_url: str | None
        allow_http: bool | None
        region: str | None

    def __init__(self, storage: Memory | Filesystem | S3): ...
    @classmethod
    def memory(cls, prefix: str) -> StorageConfig:
        """Create a StorageConfig object for an in-memory storage backend with the given prefix"""
        ...

    @classmethod
    def filesystem(cls, root: str) -> StorageConfig:
        """Create a StorageConfig object for a local filesystem storage backend with the given root directory"""
        ...

    @classmethod
    def s3_from_env(cls, bucket: str, prefix: str) -> StorageConfig:
        """Create a StorageConfig object for an S3 Object Storage compatible storage backend
        with the given bucket and prefix

        This assumes that the necessary credentials are available in the environment:
            AWS_REGION
            AWS_ACCESS_KEY_ID,
            AWS_SECRET_ACCESS_KEY,
            AWS_SESSION_TOKEN (optional)
            AWS_ENDPOINT_URL (optional)
            AWS_ALLOW_HTTP (optional)
        """
        ...

    @classmethod
    def s3_from_config(
        cls,
        bucket: str,
        prefix: str,
        credentials: S3Credentials,
        endpoint_url: str | None,
        allow_http: bool | None = None,
        region: str | None = None,
    ) -> StorageConfig:
        """Create a StorageConfig object for an S3 Object Storage compatible storage
        backend with the given bucket, prefix, and configuration

        This method will directly use the provided credentials to authenticate with the S3 service,
        ignoring any environment variables.
        """
        ...

    @classmethod
    def s3_anonymous(
        cls,
        bucket: str,
        prefix: str,
        endpoint_url: str | None,
        allow_http: bool | None = None,
        region: str | None = None,
    ) -> StorageConfig:
        """Create a StorageConfig object for an S3 Object Storage compatible storage
        using anonymous access
        """
        ...

Filesystem

Config for a local filesystem storage backend

Source code in icechunk/_icechunk_python.pyi
class Filesystem:
    """Config for a local filesystem storage backend"""

    root: str

Memory

Config for an in-memory storage backend

Source code in icechunk/_icechunk_python.pyi
class Memory:
    """Config for an in-memory storage backend"""

    prefix: str

S3

Config for an S3 Object Storage compatible storage backend

Source code in icechunk/_icechunk_python.pyi
class S3:
    """Config for an S3 Object Storage compatible storage backend"""

    bucket: str
    prefix: str
    credentials: S3Credentials | None
    endpoint_url: str | None
    allow_http: bool | None
    region: str | None

filesystem(root) classmethod

Create a StorageConfig object for a local filesystem storage backend with the given root directory

Source code in icechunk/_icechunk_python.pyi
@classmethod
def filesystem(cls, root: str) -> StorageConfig:
    """Create a StorageConfig object for a local filesystem storage backend with the given root directory"""
    ...

memory(prefix) classmethod

Create a StorageConfig object for an in-memory storage backend with the given prefix

Source code in icechunk/_icechunk_python.pyi
@classmethod
def memory(cls, prefix: str) -> StorageConfig:
    """Create a StorageConfig object for an in-memory storage backend with the given prefix"""
    ...

s3_anonymous(bucket, prefix, endpoint_url, allow_http=None, region=None) classmethod

Create a StorageConfig object for an S3 Object Storage compatible storage using anonymous access

Source code in icechunk/_icechunk_python.pyi
@classmethod
def s3_anonymous(
    cls,
    bucket: str,
    prefix: str,
    endpoint_url: str | None,
    allow_http: bool | None = None,
    region: str | None = None,
) -> StorageConfig:
    """Create a StorageConfig object for an S3 Object Storage compatible storage
    using anonymous access
    """
    ...

s3_from_config(bucket, prefix, credentials, endpoint_url, allow_http=None, region=None) classmethod

Create a StorageConfig object for an S3 Object Storage compatible storage backend with the given bucket, prefix, and configuration

This method will directly use the provided credentials to authenticate with the S3 service, ignoring any environment variables.

Source code in icechunk/_icechunk_python.pyi
@classmethod
def s3_from_config(
    cls,
    bucket: str,
    prefix: str,
    credentials: S3Credentials,
    endpoint_url: str | None,
    allow_http: bool | None = None,
    region: str | None = None,
) -> StorageConfig:
    """Create a StorageConfig object for an S3 Object Storage compatible storage
    backend with the given bucket, prefix, and configuration

    This method will directly use the provided credentials to authenticate with the S3 service,
    ignoring any environment variables.
    """
    ...

s3_from_env(bucket, prefix) classmethod

Create a StorageConfig object for an S3 Object Storage compatible storage backend with the given bucket and prefix

This assumes that the necessary credentials are available in the environment

AWS_REGION AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN (optional) AWS_ENDPOINT_URL (optional) AWS_ALLOW_HTTP (optional)

Source code in icechunk/_icechunk_python.pyi
@classmethod
def s3_from_env(cls, bucket: str, prefix: str) -> StorageConfig:
    """Create a StorageConfig object for an S3 Object Storage compatible storage backend
    with the given bucket and prefix

    This assumes that the necessary credentials are available in the environment:
        AWS_REGION
        AWS_ACCESS_KEY_ID,
        AWS_SECRET_ACCESS_KEY,
        AWS_SESSION_TOKEN (optional)
        AWS_ENDPOINT_URL (optional)
        AWS_ALLOW_HTTP (optional)
    """
    ...

StoreConfig

Configuration for an IcechunkStore

Source code in icechunk/_icechunk_python.pyi
class StoreConfig:
    """Configuration for an IcechunkStore"""

    # The number of concurrent requests to make when fetching partial values
    get_partial_values_concurrency: int | None
    # The threshold at which to inline chunks in the store in bytes. When set,
    # chunks smaller than this threshold will be inlined in the store. Default is
    # 512 bytes.
    inline_chunk_threshold_bytes: int | None
    # Whether to allow overwriting refs in the store. Default is False. Experimental.
    unsafe_overwrite_refs: bool | None
    # Configurations for virtual references such as credentials and endpoints
    virtual_ref_config: VirtualRefConfig | None

    def __init__(
        self,
        get_partial_values_concurrency: int | None = None,
        inline_chunk_threshold_bytes: int | None = None,
        unsafe_overwrite_refs: bool | None = None,
        virtual_ref_config: VirtualRefConfig | None = None,
    ): 
        """Create a StoreConfig object with the given configuration options

        Parameters
        ----------
        get_partial_values_concurrency: int | None
            The number of concurrent requests to make when fetching partial values
        inline_chunk_threshold_bytes: int | None
            The threshold at which to inline chunks in the store in bytes. When set,
            chunks smaller than this threshold will be inlined in the store. Default is
            512 bytes when not specified.
        unsafe_overwrite_refs: bool | None
            Whether to allow overwriting refs in the store. Default is False. Experimental.
        virtual_ref_config: VirtualRefConfig | None
            Configurations for virtual references such as credentials and endpoints

        Returns
        -------
        StoreConfig
            A StoreConfig object with the given configuration options
        """    
        ...

__init__(get_partial_values_concurrency=None, inline_chunk_threshold_bytes=None, unsafe_overwrite_refs=None, virtual_ref_config=None)

Create a StoreConfig object with the given configuration options

Parameters

get_partial_values_concurrency: int | None The number of concurrent requests to make when fetching partial values inline_chunk_threshold_bytes: int | None The threshold at which to inline chunks in the store in bytes. When set, chunks smaller than this threshold will be inlined in the store. Default is 512 bytes when not specified. unsafe_overwrite_refs: bool | None Whether to allow overwriting refs in the store. Default is False. Experimental. virtual_ref_config: VirtualRefConfig | None Configurations for virtual references such as credentials and endpoints

Returns

StoreConfig A StoreConfig object with the given configuration options

Source code in icechunk/_icechunk_python.pyi
def __init__(
    self,
    get_partial_values_concurrency: int | None = None,
    inline_chunk_threshold_bytes: int | None = None,
    unsafe_overwrite_refs: bool | None = None,
    virtual_ref_config: VirtualRefConfig | None = None,
): 
    """Create a StoreConfig object with the given configuration options

    Parameters
    ----------
    get_partial_values_concurrency: int | None
        The number of concurrent requests to make when fetching partial values
    inline_chunk_threshold_bytes: int | None
        The threshold at which to inline chunks in the store in bytes. When set,
        chunks smaller than this threshold will be inlined in the store. Default is
        512 bytes when not specified.
    unsafe_overwrite_refs: bool | None
        Whether to allow overwriting refs in the store. Default is False. Experimental.
    virtual_ref_config: VirtualRefConfig | None
        Configurations for virtual references such as credentials and endpoints

    Returns
    -------
    StoreConfig
        A StoreConfig object with the given configuration options
    """    
    ...

VirtualRefConfig

Source code in icechunk/_icechunk_python.pyi
class VirtualRefConfig:
    class S3:
        """Config for an S3 Object Storage compatible storage backend"""

        credentials: S3Credentials | None
        endpoint_url: str | None
        allow_http: bool | None
        region: str | None

    @classmethod
    def s3_from_env(cls) -> VirtualRefConfig:
        """Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage backend
        with the given bucket and prefix

        This assumes that the necessary credentials are available in the environment:
            AWS_REGION or AWS_DEFAULT_REGION
            AWS_ACCESS_KEY_ID,
            AWS_SECRET_ACCESS_KEY,
            AWS_SESSION_TOKEN (optional)
            AWS_ENDPOINT_URL (optional)
            AWS_ALLOW_HTTP (optional)
        """
        ...

    @classmethod
    def s3_from_config(
        cls,
        credentials: S3Credentials,
        *,
        endpoint_url: str | None = None,
        allow_http: bool | None = None,
        region: str | None = None,
    ) -> VirtualRefConfig:
        """Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage
        backend with the given bucket, prefix, and configuration

        This method will directly use the provided credentials to authenticate with the S3 service,
        ignoring any environment variables.
        """
        ...

    @classmethod
    def s3_anonymous(
        cls,
        *,
        endpoint_url: str | None = None,
        allow_http: bool | None = None,
        region: str | None = None,
    ) -> VirtualRefConfig:
        """Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage
        using anonymous access
        """
        ...

S3

Config for an S3 Object Storage compatible storage backend

Source code in icechunk/_icechunk_python.pyi
class S3:
    """Config for an S3 Object Storage compatible storage backend"""

    credentials: S3Credentials | None
    endpoint_url: str | None
    allow_http: bool | None
    region: str | None

s3_anonymous(*, endpoint_url=None, allow_http=None, region=None) classmethod

Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage using anonymous access

Source code in icechunk/_icechunk_python.pyi
@classmethod
def s3_anonymous(
    cls,
    *,
    endpoint_url: str | None = None,
    allow_http: bool | None = None,
    region: str | None = None,
) -> VirtualRefConfig:
    """Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage
    using anonymous access
    """
    ...

s3_from_config(credentials, *, endpoint_url=None, allow_http=None, region=None) classmethod

Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage backend with the given bucket, prefix, and configuration

This method will directly use the provided credentials to authenticate with the S3 service, ignoring any environment variables.

Source code in icechunk/_icechunk_python.pyi
@classmethod
def s3_from_config(
    cls,
    credentials: S3Credentials,
    *,
    endpoint_url: str | None = None,
    allow_http: bool | None = None,
    region: str | None = None,
) -> VirtualRefConfig:
    """Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage
    backend with the given bucket, prefix, and configuration

    This method will directly use the provided credentials to authenticate with the S3 service,
    ignoring any environment variables.
    """
    ...

s3_from_env() classmethod

Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage backend with the given bucket and prefix

This assumes that the necessary credentials are available in the environment

AWS_REGION or AWS_DEFAULT_REGION AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN (optional) AWS_ENDPOINT_URL (optional) AWS_ALLOW_HTTP (optional)

Source code in icechunk/_icechunk_python.pyi
@classmethod
def s3_from_env(cls) -> VirtualRefConfig:
    """Create a VirtualReferenceConfig object for an S3 Object Storage compatible storage backend
    with the given bucket and prefix

    This assumes that the necessary credentials are available in the environment:
        AWS_REGION or AWS_DEFAULT_REGION
        AWS_ACCESS_KEY_ID,
        AWS_SECRET_ACCESS_KEY,
        AWS_SESSION_TOKEN (optional)
        AWS_ENDPOINT_URL (optional)
        AWS_ALLOW_HTTP (optional)
    """
    ...