chore: ruff improvements (#2571)

LeonLuttenberger · web-flow · commit f53bdbe31abd · 2023-12-21T15:38:34.000-06:00
diff --git a/.github/workflows/static-checking.yml b/.github/workflows/static-checking.yml
@@ -32,12 +32,10 @@ jobs:
           python -m pip install poetry
           poetry config virtualenvs.create false --local
           poetry install --all-extras -vvv
-      - name: Run ruff format
+      - name: ruff format check
         run: ruff format --check .
-      - name: Run ruff check on source code
-        run: ruff awswrangler --output-format=github
-      - name: Run ruff check on tests and tutorials
-        run: ruff . --ignore "PL" --ignore "D" --output-format=github
+      - name: ruff check
+        run: ruff check --output-format=github .
       - name: mypy check
         run: mypy --install-types --non-interactive awswrangler
       - name: Pylint Lint
diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py
@@ -7,7 +7,7 @@
 
 import logging as _logging
 
-from awswrangler import (  # noqa
+from awswrangler import (
     athena,
     catalog,
     chime,
@@ -34,9 +34,9 @@
     timestream,
     typing,
 )
-from awswrangler.__metadata__ import __description__, __license__, __title__, __version__  # noqa
-from awswrangler._config import config  # noqa
-from awswrangler._distributed import EngineEnum, MemoryFormatEnum, engine, memory_format  # noqa
+from awswrangler.__metadata__ import __description__, __license__, __title__, __version__
+from awswrangler._config import config
+from awswrangler._distributed import EngineEnum, MemoryFormatEnum, engine, memory_format
 
 engine.register()
 
diff --git a/awswrangler/athena/__init__.py b/awswrangler/athena/__init__.py
@@ -12,13 +12,13 @@
     delete_prepared_statement,
     list_prepared_statements,
 )
-from awswrangler.athena._read import (  # noqa
+from awswrangler.athena._read import (
     get_query_results,
     read_sql_query,
     read_sql_table,
     unload,
 )
-from awswrangler.athena._utils import (  # noqa
+from awswrangler.athena._utils import (
     create_athena_bucket,
     create_ctas_table,
     describe_table,
diff --git a/awswrangler/catalog/__init__.py b/awswrangler/catalog/__init__.py
@@ -7,9 +7,7 @@
     add_orc_partitions,
     add_parquet_partitions,
 )
-
-# noqa
-from awswrangler.catalog._create import (  # noqa
+from awswrangler.catalog._create import (
     _create_csv_table,
     _create_json_table,
     _create_parquet_table,
@@ -21,14 +19,14 @@
     overwrite_table_parameters,
     upsert_table_parameters,
 )
-from awswrangler.catalog._delete import (  # noqa
+from awswrangler.catalog._delete import (
     delete_all_partitions,
     delete_column,
     delete_database,
     delete_partitions,
     delete_table_if_exists,
 )
-from awswrangler.catalog._get import (  # noqa
+from awswrangler.catalog._get import (
     _get_table_input,
     databases,
     get_columns_comments,
@@ -48,7 +46,7 @@
     table,
     tables,
 )
-from awswrangler.catalog._utils import (  # noqa
+from awswrangler.catalog._utils import (
     does_table_exist,
     drop_duplicated_columns,
     extract_athena_types,
diff --git a/awswrangler/distributed/ray/__init__.py b/awswrangler/distributed/ray/__init__.py
@@ -1,6 +1,6 @@
 """Ray Module."""
 
-from awswrangler.distributed.ray._core import RayLogger, initialize_ray, ray_get, ray_logger, ray_remote  # noqa
+from awswrangler.distributed.ray._core import RayLogger, initialize_ray, ray_get, ray_logger, ray_remote
 
 __all__ = [
     "RayLogger",
diff --git a/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py b/awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py
@@ -13,7 +13,7 @@
 import numpy as np
 
 # fs required to implicitly trigger S3 subsystem initialization
-import pyarrow.fs  # noqa: F401 pylint: disable=unused-import
+import pyarrow.fs  # pylint: disable=unused-import
 from pyarrow.dataset import ParquetFileFragment
 from pyarrow.lib import Schema
 from ray import cloudpickle
diff --git a/awswrangler/emr.py b/awswrangler/emr.py
@@ -245,7 +245,7 @@ def _build_cluster_args(**pars: Any) -> Dict[str, Any]:  # pylint: disable=too-m
             {
                 "Classification": "spark-hive-site",
                 "Properties": {
-                    "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"  # noqa
+                    "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
                 },
                 "Configurations": [],
             }
diff --git a/awswrangler/lakeformation/__init__.py b/awswrangler/lakeformation/__init__.py
@@ -1,7 +1,7 @@
 """Amazon Lake Formation Module."""
 
-from awswrangler.lakeformation._read import read_sql_query, read_sql_table  # noqa
-from awswrangler.lakeformation._utils import (  # noqa
+from awswrangler.lakeformation._read import read_sql_query, read_sql_table
+from awswrangler.lakeformation._utils import (
     _build_table_objects,
     _get_table_objects,
     _update_table_objects,
diff --git a/awswrangler/pandas/__init__.py b/awswrangler/pandas/__init__.py
@@ -8,7 +8,7 @@
     from pandas import *  # noqa: F403
 
     # Explicit import because mypy doesn't support forward references to a star import
-    from pandas import (  # noqa: F401
+    from pandas import (
         DataFrame,
         Series,
         concat,
@@ -24,7 +24,7 @@
     from modin.pandas import *  # noqa: F403
 
     # Explicit import because mypy doesn't support forward references to a star import
-    from modin.pandas import (  # noqa: F401
+    from modin.pandas import (
         DataFrame,
         Series,
         concat,
diff --git a/awswrangler/quicksight/__init__.py b/awswrangler/quicksight/__init__.py
@@ -1,8 +1,8 @@
 """Amazon QuickSight Module."""
 
-from awswrangler.quicksight._cancel import cancel_ingestion  # noqa
-from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion  # noqa
-from awswrangler.quicksight._delete import (  # noqa
+from awswrangler.quicksight._cancel import cancel_ingestion
+from awswrangler.quicksight._create import create_athena_data_source, create_athena_dataset, create_ingestion
+from awswrangler.quicksight._delete import (
     delete_all_dashboards,
     delete_all_data_sources,
     delete_all_datasets,
@@ -12,14 +12,14 @@
     delete_dataset,
     delete_template,
 )
-from awswrangler.quicksight._describe import (  # noqa
+from awswrangler.quicksight._describe import (
     describe_dashboard,
     describe_data_source,
     describe_data_source_permissions,
     describe_dataset,
     describe_ingestion,
 )
-from awswrangler.quicksight._get_list import (  # noqa
+from awswrangler.quicksight._get_list import (
     get_dashboard_id,
     get_dashboard_ids,
     get_data_source_arn,
diff --git a/awswrangler/s3/__init__.py b/awswrangler/s3/__init__.py
@@ -1,23 +1,23 @@
 """Amazon S3 Read Module."""
 
-from awswrangler.s3._copy import copy_objects, merge_datasets  # noqa
-from awswrangler.s3._delete import delete_objects  # noqa
-from awswrangler.s3._describe import describe_objects, get_bucket_region, size_objects  # noqa
-from awswrangler.s3._download import download  # noqa
-from awswrangler.s3._list import does_object_exist, list_buckets, list_directories, list_objects  # noqa
-from awswrangler.s3._read_deltalake import read_deltalake  # noqa
-from awswrangler.s3._read_excel import read_excel  # noqa
-from awswrangler.s3._read_orc import read_orc, read_orc_metadata, read_orc_table  # noqa
-from awswrangler.s3._read_parquet import read_parquet, read_parquet_metadata, read_parquet_table  # noqa
-from awswrangler.s3._read_text import read_csv, read_fwf, read_json  # noqa
+from awswrangler.s3._copy import copy_objects, merge_datasets
+from awswrangler.s3._delete import delete_objects
+from awswrangler.s3._describe import describe_objects, get_bucket_region, size_objects
+from awswrangler.s3._download import download
+from awswrangler.s3._list import does_object_exist, list_buckets, list_directories, list_objects
+from awswrangler.s3._read_deltalake import read_deltalake
+from awswrangler.s3._read_excel import read_excel
+from awswrangler.s3._read_orc import read_orc, read_orc_metadata, read_orc_table
+from awswrangler.s3._read_parquet import read_parquet, read_parquet_metadata, read_parquet_table
+from awswrangler.s3._read_text import read_csv, read_fwf, read_json
 from awswrangler.s3._select import select_query
-from awswrangler.s3._upload import upload  # noqa
-from awswrangler.s3._wait import wait_objects_exist, wait_objects_not_exist  # noqa
-from awswrangler.s3._write_deltalake import to_deltalake  # noqa
-from awswrangler.s3._write_excel import to_excel  # noqa
-from awswrangler.s3._write_orc import to_orc  # noqa
-from awswrangler.s3._write_parquet import store_parquet_metadata, to_parquet  # noqa
-from awswrangler.s3._write_text import to_csv, to_json  # noqa
+from awswrangler.s3._upload import upload
+from awswrangler.s3._wait import wait_objects_exist, wait_objects_not_exist
+from awswrangler.s3._write_deltalake import to_deltalake
+from awswrangler.s3._write_excel import to_excel
+from awswrangler.s3._write_orc import to_orc
+from awswrangler.s3._write_parquet import store_parquet_metadata, to_parquet
+from awswrangler.s3._write_text import to_csv, to_json
 
 __all__ = [
     "copy_objects",
diff --git a/awswrangler/s3/_write_dataset.py b/awswrangler/s3/_write_dataset.py
@@ -155,7 +155,7 @@ def _to_partitions(
         subgroup.drop(
             columns=[col for col in partition_cols if col in subgroup.columns],
             inplace=True,
-        )  # noqa: PLW2901
+        )
         # Drop index levels if partitioning by index columns
         subgroup = subgroup.droplevel(  # noqa: PLW2901
             level=[col for col in partition_cols if col in subgroup.index.names]
diff --git a/fix.sh b/fix.sh
@@ -2,4 +2,4 @@
 set -ex
 
 ruff format .
-ruff --fix awswrangler
+ruff check --fix .
diff --git a/pyproject.toml b/pyproject.toml
@@ -132,9 +132,9 @@ requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.ruff]
-select = ["D", "E", "F", "I001", "I002", "PL", "W"]
+select = ["D", "E", "F", "I", "PL", "RUF100", "W"]
 ignore = ["E501", "PLR2004", "PLR0911", "PLR0912", "PLR0913", "PLR0915"]
-fixable = ["I001", "I002", "W291"]
+fixable = ["ALL"]
 extend-include = ["*.ipynb"]
 exclude = [
     ".eggs",
@@ -152,14 +152,17 @@ exclude = [
 line-length = 120
 target-version = "py38"
 
-[tool.ruff.lint]
-exclude = ["*.ipynb"]
+[tool.ruff.lint.per-file-ignores]
+"docs/*" = ["D"]
+"test_infra/*" = ["D"]
+"tests/*" = ["PL", "D"]
+"tutorials/*" = ["D", "E402", "F401", "F811", "F821"]
 
 [tool.ruff.pydocstyle]
 convention = "numpy"
 
 [tool.mypy]
-python_version = 3.8
+python_version = "3.8"
 strict = true
 ignore_missing_imports = true
 warn_unused_ignores = true
diff --git a/test_infra/stacks/databases_stack.py b/test_infra/stacks/databases_stack.py
@@ -139,11 +139,11 @@ def _set_catalog_encryption(self) -> None:
             self,
             "aws-sdk-pandas-catalog-encryption",
             catalog_id=f"{Aws.ACCOUNT_ID}",
-            data_catalog_encryption_settings=CfnDataCatalogEncryptionSettings.DataCatalogEncryptionSettingsProperty(  # noqa: E501
+            data_catalog_encryption_settings=CfnDataCatalogEncryptionSettings.DataCatalogEncryptionSettingsProperty(
                 encryption_at_rest=CfnDataCatalogEncryptionSettings.EncryptionAtRestProperty(
                     catalog_encryption_mode="DISABLED",
                 ),
-                connection_password_encryption=CfnDataCatalogEncryptionSettings.ConnectionPasswordEncryptionProperty(  # noqa: E501
+                connection_password_encryption=CfnDataCatalogEncryptionSettings.ConnectionPasswordEncryptionProperty(
                     kms_key_id=self.key.key_id,
                     return_connection_password_encrypted=True,
                 ),
@@ -270,7 +270,7 @@ def _setup_redshift(self) -> None:
             type=glue.ConnectionType.JDBC,
             connection_name="aws-sdk-pandas-redshift",
             properties={
-                "JDBC_CONNECTION_URL": f"jdbc:redshift://{redshift_cluster.cluster_endpoint.hostname}:{port}/{database}",  # noqa: E501
+                "JDBC_CONNECTION_URL": f"jdbc:redshift://{redshift_cluster.cluster_endpoint.hostname}:{port}/{database}",
                 "USERNAME": self.db_username,
                 "PASSWORD": self.db_password,
             },
@@ -663,7 +663,7 @@ def _setup_sqlserver(self) -> None:
             type=glue.ConnectionType.JDBC,
             connection_name="aws-sdk-pandas-sqlserver",
             properties={
-                "JDBC_CONNECTION_URL": f"jdbc:sqlserver://{sqlserver.instance_endpoint.hostname}:{port};databaseName={database}",  # noqa: E501
+                "JDBC_CONNECTION_URL": f"jdbc:sqlserver://{sqlserver.instance_endpoint.hostname}:{port};databaseName={database}",
                 "USERNAME": self.db_username,
                 "PASSWORD": self.db_password,
             },
@@ -725,7 +725,7 @@ def _setup_oracle(self) -> None:
             type=glue.ConnectionType.JDBC,
             connection_name="aws-sdk-pandas-oracle",
             properties={
-                "JDBC_CONNECTION_URL": f"jdbc:oracle:thin://@{oracle.instance_endpoint.hostname}:{port}/{database}",  # noqa: E501
+                "JDBC_CONNECTION_URL": f"jdbc:oracle:thin://@{oracle.instance_endpoint.hostname}:{port}/{database}",
                 "USERNAME": self.db_username,
                 "PASSWORD": self.db_password,
             },
diff --git a/tests/unit/test_athena_parquet.py b/tests/unit/test_athena_parquet.py
@@ -779,12 +779,12 @@ def test_cast_decimal(path, glue_table, glue_database):
 def test_splits():
     s = "a:struct<id:string,name:string>,b:struct<id:string,name:string>"
     assert list(_split_fields(s)) == ["a:struct<id:string,name:string>", "b:struct<id:string,name:string>"]
-    s = "a:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>,b:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>"  # noqa
+    s = "a:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>,b:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>"
     assert list(_split_fields(s)) == [
         "a:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>",
         "b:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>",
     ]
-    s = "a:struct<id:string,name:string>,b:struct<id:string,name:string>,c:struct<id:string,name:string>,d:struct<id:string,name:string>"  # noqa
+    s = "a:struct<id:string,name:string>,b:struct<id:string,name:string>,c:struct<id:string,name:string>,d:struct<id:string,name:string>"
     assert list(_split_fields(s)) == [
         "a:struct<id:string,name:string>",
         "b:struct<id:string,name:string>",
diff --git a/tutorials/017 - Partition Projection.ipynb b/tutorials/017 - Partition Projection.ipynb
@@ -54,7 +54,7 @@
    },
    "outputs": [
     {
-     "name": "stdin",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       " ···········································\n"
@@ -255,7 +255,7 @@
     }
    ],
    "source": [
-    "wr.athena.read_sql_query(f\"SELECT * FROM table_integer\", database=\"default\")"
+    "wr.athena.read_sql_query(\"SELECT * FROM table_integer\", database=\"default\")"
    ]
   },
   {
@@ -439,7 +439,7 @@
     }
    ],
    "source": [
-    "wr.athena.read_sql_query(f\"SELECT * FROM table_enum\", database=\"default\")"
+    "wr.athena.read_sql_query(\"SELECT * FROM table_enum\", database=\"default\")"
    ]
   },
   {
@@ -524,8 +524,13 @@
     }
    ],
    "source": [
-    "ts = lambda x: datetime.strptime(x, \"%Y-%m-%d %H:%M:%S\")\n",
-    "dt = lambda x: datetime.strptime(x, \"%Y-%m-%d\").date()\n",
+    "def ts(x):\n",
+    "    return datetime.strptime(x, \"%Y-%m-%d %H:%M:%S\")\n",
+    "\n",
+    "\n",
+    "def dt(x):\n",
+    "    return datetime.strptime(x, \"%Y-%m-%d\").date()\n",
+    "\n",
     "\n",
     "df = pd.DataFrame(\n",
     "    {\n",
@@ -636,7 +641,7 @@
     }
    ],
    "source": [
-    "wr.athena.read_sql_query(f\"SELECT * FROM table_date\", database=\"default\")"
+    "wr.athena.read_sql_query(\"SELECT * FROM table_date\", database=\"default\")"
    ]
   },
   {
@@ -812,7 +817,7 @@
    ],
    "source": [
     "wr.athena.read_sql_query(\n",
-    "    sql=f\"SELECT * FROM table_injected WHERE uuid='b89ed095-8179-4635-9537-88592c0f6bc3'\", database=\"default\"\n",
+    "    sql=\"SELECT * FROM table_injected WHERE uuid='b89ed095-8179-4635-9537-88592c0f6bc3'\", database=\"default\"\n",
     ")"
    ]
   },
@@ -892,4 +897,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/tutorials/022 - Writing Partitions Concurrently.ipynb b/tutorials/022 - Writing Partitions Concurrently.ipynb
@@ -40,7 +40,7 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdin",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
       " ············\n"
diff --git a/tutorials/023 - Flexible Partitions Filter.ipynb b/tutorials/023 - Flexible Partitions Filter.ipynb
diff --git a/tutorials/032 - Lake Formation Governed Tables.ipynb b/tutorials/032 - Lake Formation Governed Tables.ipynb
diff --git a/validate.sh b/validate.sh

Original file line number	Diff line number	Diff line change
`@@ -245,7 +245,7 @@ def _build_cluster_args(**pars: Any) -> Dict[str, Any]: # pylint: disable=too-m`
`245`	`245`	`{`
`246`	`246`	`"Classification": "spark-hive-site",`
`247`	`247`	`"Properties": {`
`248`		`- "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" # noqa`
	`248`	`+ "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"`
`249`	`249`	`},`
`250`	`250`	`"Configurations": [],`
`251`	`251`	`}`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@`
`54`	`54`	`},`
`55`	`55`	`"outputs": [`
`56`	`56`	`{`
`57`		`- "name": "stdin",`
	`57`	`+ "name": "stdout",`
`58`	`58`	`"output_type": "stream",`
`59`	`59`	`"text": [`
`60`	`60`	`" ···········································\n"`
`@@ -255,7 +255,7 @@`
`255`	`255`	`}`
`256`	`256`	`],`
`257`	`257`	`"source": [`
`258`		`- "wr.athena.read_sql_query(f\"SELECT * FROM table_integer\", database=\"default\")"`
	`258`	`+ "wr.athena.read_sql_query(\"SELECT * FROM table_integer\", database=\"default\")"`
`259`	`259`	`]`
`260`	`260`	`},`
`261`	`261`	`{`
`@@ -439,7 +439,7 @@`
`439`	`439`	`}`
`440`	`440`	`],`
`441`	`441`	`"source": [`
`442`		`- "wr.athena.read_sql_query(f\"SELECT * FROM table_enum\", database=\"default\")"`
	`442`	`+ "wr.athena.read_sql_query(\"SELECT * FROM table_enum\", database=\"default\")"`
`443`	`443`	`]`
`444`	`444`	`},`
`445`	`445`	`{`
`@@ -524,8 +524,13 @@`
`524`	`524`	`}`
`525`	`525`	`],`
`526`	`526`	`"source": [`
`527`		`- "ts = lambda x: datetime.strptime(x, \"%Y-%m-%d %H:%M:%S\")\n",`
`528`		`- "dt = lambda x: datetime.strptime(x, \"%Y-%m-%d\").date()\n",`
	`527`	`+ "def ts(x):\n",`
	`528`	`+ " return datetime.strptime(x, \"%Y-%m-%d %H:%M:%S\")\n",`
	`529`	`+ "\n",`
	`530`	`+ "\n",`
	`531`	`+ "def dt(x):\n",`
	`532`	`+ " return datetime.strptime(x, \"%Y-%m-%d\").date()\n",`
	`533`	`+ "\n",`
`529`	`534`	`"\n",`
`530`	`535`	`"df = pd.DataFrame(\n",`
`531`	`536`	`" {\n",`
`@@ -636,7 +641,7 @@`
`636`	`641`	`}`
`637`	`642`	`],`
`638`	`643`	`"source": [`
`639`		`- "wr.athena.read_sql_query(f\"SELECT * FROM table_date\", database=\"default\")"`
	`644`	`+ "wr.athena.read_sql_query(\"SELECT * FROM table_date\", database=\"default\")"`
`640`	`645`	`]`
`641`	`646`	`},`
`642`	`647`	`{`
`@@ -812,7 +817,7 @@`
`812`	`817`	`],`
`813`	`818`	`"source": [`
`814`	`819`	`"wr.athena.read_sql_query(\n",`
`815`		`- " sql=f\"SELECT * FROM table_injected WHERE uuid='b89ed095-8179-4635-9537-88592c0f6bc3'\", database=\"default\"\n",`
	`820`	`+ " sql=\"SELECT * FROM table_injected WHERE uuid='b89ed095-8179-4635-9537-88592c0f6bc3'\", database=\"default\"\n",`
`816`	`821`	`")"`
`817`	`822`	`]`
`818`	`823`	`},`
`@@ -892,4 +897,4 @@`
`892`	`897`	`},`
`893`	`898`	`"nbformat": 4,`
`894`	`899`	`"nbformat_minor": 4`
`895`		`-}`
	`900`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@`
`40`	`40`	`"metadata": {},`
`41`	`41`	`"outputs": [`
`42`	`42`	`{`
`43`		`- "name": "stdin",`
	`43`	`+ "name": "stdout",`
`44`	`44`	`"output_type": "stream",`
`45`	`45`	`"text": [`
`46`	`46`	`" ············\n"`