zarr-developers · brokkoli71 · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025
diff --git a/changes/2839.feature.rst b/changes/2839.feature.rst
@@ -0,0 +1 @@
+Array creation allows string representation of codecs for ``filters``, ``serializer``, and ``compressors``.
diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
@@ -792,7 +792,7 @@ def create_array(
         chunk to bytes.
 
         For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
-        and these values must be instances of ``ArrayArrayCodec``, or dict representations
+        and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
         of ``ArrayArrayCodec``.
         If no ``filters`` are provided, a default set of filters will be used.
         These defaults can be changed by modifying the value of ``array.v3_default_filters``

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -3769,23 +3769,23 @@ def _get_default_codecs(
 
 
 FiltersLike: TypeAlias = (
-    Iterable[dict[str, JSON] | ArrayArrayCodec | numcodecs.abc.Codec]
+    Iterable[dict[str, JSON] | str | ArrayArrayCodec | numcodecs.abc.Codec]
     | ArrayArrayCodec
     | Iterable[numcodecs.abc.Codec]
     | numcodecs.abc.Codec
-    | Literal["auto"]
+    | str
     | None
 )
 CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
 CompressorsLike: TypeAlias = (
-    Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
+    Iterable[dict[str, JSON] | str | BytesBytesCodec | numcodecs.abc.Codec]
     | dict[str, JSON]
     | BytesBytesCodec
     | numcodecs.abc.Codec
-    | Literal["auto"]
+    | str
     | None
 )
-SerializerLike: TypeAlias = dict[str, JSON] | ArrayBytesCodec | Literal["auto"]
+SerializerLike: TypeAlias = dict[str, JSON] | ArrayBytesCodec | str
 
 
 class ShardsConfigParam(TypedDict):
@@ -4053,7 +4053,7 @@ async def create_array(
         chunk to bytes.
 
         For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
-        and these values must be instances of ``ArrayArrayCodec``, or dict representations
+        and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
         of ``ArrayArrayCodec``.
         If no ``filters`` are provided, a default set of filters will be used.
         These defaults can be changed by modifying the value of ``array.v3_default_filters``
@@ -4264,24 +4264,13 @@ def _parse_chunk_encoding_v2(
     elif isinstance(compressor, tuple | list) and len(compressor) == 1:
         _compressor = parse_compressor(compressor[0])
     else:
-        if isinstance(compressor, Iterable) and not isinstance(compressor, dict):
-            msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Got an iterable with type {type(compressor)} instead."
-            raise TypeError(msg)
         _compressor = parse_compressor(compressor)
 
     if filters is None:
         _filters = None
     elif filters == "auto":
         _filters = default_filters
     else:
-        if isinstance(filters, Iterable):
-            for idx, f in enumerate(filters):
-                if not isinstance(f, numcodecs.abc.Codec):
-                    msg = (
-                        "For Zarr format 2 arrays, all elements of `filters` must be numcodecs codecs. "
-                        f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec."
-                    )
-                    raise TypeError(msg)
         _filters = parse_filters(filters)
 
     return _filters, _compressor
@@ -4305,6 +4294,8 @@ def _parse_chunk_encoding_v3(
         out_array_array: tuple[ArrayArrayCodec, ...] = ()
     elif filters == "auto":
         out_array_array = default_array_array
+    elif isinstance(filters, str):
+        out_array_array = (_parse_array_array_codec(filters),)
     else:
         maybe_array_array: Iterable[Codec | dict[str, JSON]]
         if isinstance(filters, dict | Codec):
@@ -4322,6 +4313,8 @@ def _parse_chunk_encoding_v3(
         out_bytes_bytes: tuple[BytesBytesCodec, ...] = ()
     elif compressors == "auto":
         out_bytes_bytes = default_bytes_bytes
+    elif isinstance(compressors, str):
+        out_bytes_bytes = (_parse_bytes_bytes_codec(compressors),)
     else:
         maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]]
         if isinstance(compressors, dict | Codec):

diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
@@ -1045,7 +1045,7 @@ async def create_array(
             chunk to bytes.
 
             For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
-            and these values must be instances of ``ArrayArrayCodec``, or dict representations
+            and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
             of ``ArrayArrayCodec``.
             If no ``filters`` are provided, a default set of filters will be used.
             These defaults can be changed by modifying the value of ``array.v3_default_filters``
@@ -2280,7 +2280,7 @@ def create_array(
             chunk to bytes.
 
             For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
-            and these values must be instances of ``ArrayArrayCodec``, or dict representations
+            and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
             of ``ArrayArrayCodec``.
             If no ``filters`` are provided, a default set of filters will be used.
             These defaults can be changed by modifying the value of ``array.v3_default_filters``
@@ -2678,7 +2678,7 @@ def array(
             chunk to bytes.
 
             For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
-            and these values must be instances of ``ArrayArrayCodec``, or dict representations
+            and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
             of ``ArrayArrayCodec``.
             If no ``filters`` are provided, a default set of filters will be used.
             These defaults can be changed by modifying the value of ``array.v3_default_filters``

diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
@@ -246,20 +246,24 @@ def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None:
 
     if data is None:
         return data
+    if isinstance(data, str):
+        return (numcodecs.get_codec({"id": data}),)
     if isinstance(data, Iterable):
         for idx, val in enumerate(data):
             if isinstance(val, numcodecs.abc.Codec):
                 out.append(val)
             elif isinstance(val, dict):
                 out.append(numcodecs.get_codec(val))
+            elif isinstance(val, str):
+                out.append(numcodecs.get_codec({"id": val}))
             else:
-                msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead."
+                msg = f"For Zarr format 2 arrays, all elements of `filters` must be a numcodecs.abc.Codec or a dict or str representation of numcodecs.abc.Codec. Got {type(val)} at index {idx} instead."
                 raise TypeError(msg)
         return tuple(out)
     # take a single codec instance and wrap it in a tuple
     if isinstance(data, numcodecs.abc.Codec):
         return (data,)
-    msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead."
+    msg = f"For Zarr format 2 arrays, all elements of `filters` must be None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead."
     raise TypeError(msg)
 
 
@@ -271,7 +275,9 @@ def parse_compressor(data: object) -> numcodecs.abc.Codec | None:
         return data
     if isinstance(data, dict):
         return numcodecs.get_codec(data)
-    msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
+    if isinstance(data, str):
+        return numcodecs.get_codec({"id": data})
+    msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Expected None, a numcodecs.abc.Codec, or a dict or str representation of a numcodecs.abc.Codec. Got {type(data)} instead."
     raise ValueError(msg)
 
 

diff --git a/src/zarr/registry.py b/src/zarr/registry.py
@@ -166,14 +166,16 @@ def _resolve_codec(data: dict[str, JSON]) -> Codec:
     return get_codec_class(data["name"]).from_dict(data)  # type: ignore[arg-type]
 
 
-def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec:
+def _parse_bytes_bytes_codec(data: dict[str, JSON] | str | Codec) -> BytesBytesCodec:
     """
     Normalize the input to a ``BytesBytesCodec`` instance.
     If the input is already a ``BytesBytesCodec``, it is returned as is. If the input is a dict, it
     is converted to a ``BytesBytesCodec`` instance via the ``_resolve_codec`` function.
     """
     from zarr.abc.codec import BytesBytesCodec
 
+    if isinstance(data, str):
+        data = {"name": data, "configuration": {}}
     if isinstance(data, dict):
         result = _resolve_codec(data)
         if not isinstance(result, BytesBytesCodec):
@@ -186,14 +188,16 @@ def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec:
     return result
 
 
-def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec:
+def _parse_array_bytes_codec(data: dict[str, JSON] | str | Codec) -> ArrayBytesCodec:
     """
     Normalize the input to a ``ArrayBytesCodec`` instance.
     If the input is already a ``ArrayBytesCodec``, it is returned as is. If the input is a dict, it
     is converted to a ``ArrayBytesCodec`` instance via the ``_resolve_codec`` function.
     """
     from zarr.abc.codec import ArrayBytesCodec
 
+    if isinstance(data, str):
+        data = {"name": data, "configuration": {}}
     if isinstance(data, dict):
         result = _resolve_codec(data)
         if not isinstance(result, ArrayBytesCodec):
@@ -206,14 +210,16 @@ def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec:
     return result
 
 
-def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec:
+def _parse_array_array_codec(data: dict[str, JSON] | str | Codec) -> ArrayArrayCodec:
     """
     Normalize the input to a ``ArrayArrayCodec`` instance.
     If the input is already a ``ArrayArrayCodec``, it is returned as is. If the input is a dict, it
     is converted to a ``ArrayArrayCodec`` instance via the ``_resolve_codec`` function.
     """
     from zarr.abc.codec import ArrayArrayCodec
 
+    if isinstance(data, str):
+        data = {"name": data, "configuration": {}}
     if isinstance(data, dict):
         result = _resolve_codec(data)
         if not isinstance(result, ArrayArrayCodec):

diff --git a/tests/test_array.py b/tests/test_array.py
@@ -30,6 +30,7 @@
 from zarr.core.array import (
     CompressorsLike,
     FiltersLike,
+    SerializerLike,
     _get_default_chunk_encoding_v2,
     _get_default_chunk_encoding_v3,
     _parse_chunk_encoding_v2,
@@ -1025,6 +1026,15 @@ async def test_no_filters_compressors(store: MemoryStore, dtype: str, empty_valu
             ZstdCodec(level=3),
             {"name": "zstd", "configuration": {"level": 3}},
             ({"name": "zstd", "configuration": {"level": 3}},),
+            "zstd",
+            ("crc32c", "zstd"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "serializer",
+        [
+            "auto",
+            "bytes",
         ],
     )
     @pytest.mark.parametrize(
@@ -1065,6 +1075,7 @@ async def test_no_filters_compressors(store: MemoryStore, dtype: str, empty_valu
     async def test_v3_chunk_encoding(
         store: MemoryStore,
         compressors: CompressorsLike,
+        serializer: SerializerLike,
         filters: FiltersLike,
         dtype: str,
         chunks: tuple[int, ...],
@@ -1073,6 +1084,9 @@ async def test_v3_chunk_encoding(
         """
         Test various possibilities for the compressors and filters parameter to create_array
         """
+        if serializer == "bytes" and dtype == "str":
+            serializer = "vlen-utf8"
+
         arr = await create_array(
             store=store,
             dtype=dtype,
@@ -1081,10 +1095,11 @@ async def test_v3_chunk_encoding(
             shards=shards,
             zarr_format=3,
             filters=filters,
+            serializer=serializer,
             compressors=compressors,
         )
-        filters_expected, _, compressors_expected = _parse_chunk_encoding_v3(
-            filters=filters, compressors=compressors, serializer="auto", dtype=np.dtype(dtype)
+        filters_expected, serializer_expected, compressors_expected = _parse_chunk_encoding_v3(
+            filters=filters, compressors=compressors, serializer=serializer, dtype=np.dtype(dtype)
         )
         assert arr.filters == filters_expected
         assert arr.compressors == compressors_expected
@@ -1098,11 +1113,20 @@ async def test_v3_chunk_encoding(
             None,
             numcodecs.Zstd(level=3),
             (),
-            (numcodecs.Zstd(level=3),),
+            (numcodecs.Zstd(level=2),),
+            "zstd",
         ],
     )
     @pytest.mark.parametrize(
-        "filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)]
+        "filters",
+        [
+            "auto",
+            None,
+            numcodecs.GZip(level=1),
+            (numcodecs.GZip(level=2)),
+            "gzip",
+            ("gzip", "zstd"),
+        ],
     )
     async def test_v2_chunk_encoding(
         store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str
@@ -1129,6 +1153,30 @@ async def test_v2_chunk_encoding(
         assert arr.compressors == compressor_expected
         assert arr.filters == filters_expected
 
+    @staticmethod
+    async def test_bad_chunk_encoding(store: MemoryStore) -> None:
+        """
+        Test that passing an invalid compressor or filter to create_array raises an error.
+        """
+        bad_compressor = 2
+        msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Expected None, a numcodecs.abc.Codec, or a dict or str representation of a numcodecs.abc.Codec. Got {type(bad_compressor)} instead."
+        with pytest.raises(ValueError, match=msg):
+            await create_array(
+                store=store,
+                dtype="uint8",
+                shape=(10,),
+                zarr_format=2,
+                compressors=bad_compressor,
+            )
+        with pytest.raises(KeyError):
+            await create_array(
+                store=store,
+                dtype="uint8",
+                shape=(10,),
+                zarr_format=3,
+                filters="bad_filter",
+            )
+
     @staticmethod
     @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
     async def test_default_filters_compressors(
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Array creation allows string representation of codecs for ``filters``, ``serializer``, and ``compressors``.