@@ -191,6 +191,7 @@ def __init__(
191
191
writer_version : str = "1.0" ,
192
192
skip_arrow_metadata : bool = False ,
193
193
compression : Optional [str ] = "zstd(3)" ,
194
+ compression_level : Optional [int ] = None ,
194
195
dictionary_enabled : Optional [bool ] = True ,
195
196
dictionary_page_size_limit : int = 1024 * 1024 ,
196
197
statistics_enabled : Optional [str ] = "page" ,
@@ -213,7 +214,10 @@ def __init__(
213
214
self .write_batch_size = write_batch_size
214
215
self .writer_version = writer_version
215
216
self .skip_arrow_metadata = skip_arrow_metadata
216
- self .compression = compression
217
+ if compression_level is not None :
218
+ self .compression = f"{ compression } ({ compression_level } )"
219
+ else :
220
+ self .compression = compression
217
221
self .dictionary_enabled = dictionary_enabled
218
222
self .dictionary_page_size_limit = dictionary_page_size_limit
219
223
self .statistics_enabled = statistics_enabled
@@ -870,10 +874,34 @@ def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None
870
874
"""
871
875
self .df .write_csv (str (path ), with_header )
872
876
877
+ @overload
878
+ def write_parquet (
879
+ self ,
880
+ path : str | pathlib .Path ,
881
+ compression : str ,
882
+ compression_level : int | None = None ,
883
+ ) -> None : ...
884
+
885
+ @overload
886
+ def write_parquet (
887
+ self ,
888
+ path : str | pathlib .Path ,
889
+ compression : Compression = Compression .ZSTD ,
890
+ compression_level : int | None = None ,
891
+ ) -> None : ...
892
+
893
+ @overload
894
+ def write_parquet (
895
+ self ,
896
+ path : str | pathlib .Path ,
897
+ compression : ParquetWriterOptions ,
898
+ compression_level : None = None ,
899
+ ) -> None : ...
900
+
873
901
def write_parquet (
874
902
self ,
875
903
path : str | pathlib .Path ,
876
- compression : Union [str , Compression ] = Compression .ZSTD ,
904
+ compression : Union [str , Compression , ParquetWriterOptions ] = Compression .ZSTD ,
877
905
compression_level : int | None = None ,
878
906
) -> None :
879
907
"""Execute the :py:class:`DataFrame` and write the results to a Parquet file.
@@ -894,7 +922,13 @@ def write_parquet(
894
922
recommended range is 1 to 22, with the default being 4. Higher levels
895
923
provide better compression but slower speed.
896
924
"""
897
- # Convert string to Compression enum if necessary
925
+ if isinstance (compression , ParquetWriterOptions ):
926
+ if compression_level is not None :
927
+ msg = "compression_level should be None when using ParquetWriterOptions"
928
+ raise ValueError (msg )
929
+ self .write_parquet_with_options (path , compression )
930
+ return
931
+
898
932
if isinstance (compression , str ):
899
933
compression = Compression .from_str (compression )
900
934
0 commit comments