Skip to content

Commit 3507fda

Browse files
feat: Python 3.12 (#2559)
Signed-off-by: Anton Kukushkin <[email protected]> Co-authored-by: Leon Luttenberger <[email protected]>
1 parent 8e2a793 commit 3507fda

35 files changed

+2755
-2639
lines changed

.github/workflows/minimal-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
strategy:
1818
fail-fast: false
1919
matrix:
20-
python-version: [3.8]
20+
python-version: ["3.8", "3.11", "3.12"]
2121
platform: [ubuntu-latest, macos-latest, windows-latest]
2222

2323
env:

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ AWS SDK for pandas can also run your workflows at scale by leveraging [Modin](ht
101101

102102
The quickest way to get started is to use AWS Glue with Ray. Read our [docs](https://aws-sdk-pandas.readthedocs.io/en/3.5.0/scale.html), our blogs ([1](https://aws.amazon.com/blogs/big-data/scale-aws-sdk-for-pandas-workloads-with-aws-glue-for-ray/)/[2](https://aws.amazon.com/blogs/big-data/advanced-patterns-with-aws-sdk-for-pandas-on-aws-glue-for-ray/)), or head to our latest [tutorials](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials) to discover even more features.
103103

104+
> ⚠️ **Ray is currently not available for Python 3.12. While AWS SDK for pandas supports Python 3.12, it cannot be used at scale.**
105+
104106
## [Read The Docs](https://aws-sdk-pandas.readthedocs.io/)
105107

106108
- [**What is AWS SDK for pandas?**](https://aws-sdk-pandas.readthedocs.io/en/3.5.0/about.html)

awswrangler/_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
_logger: logging.Logger = logging.getLogger(__name__)
1717

1818

19-
_ConfigValueType = Union[str, bool, int, float, botocore.config.Config, dict]
19+
_ConfigValueType = Union[str, bool, int, float, botocore.config.Config, Dict[Any, Any]]
2020

2121

2222
class _ConfigArg(NamedTuple):

awswrangler/_databases.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def _records2df(
160160
for col_values, col_name in zip(tuple(zip(*records)), cols_names): # Transposing
161161
if (dtype is None) or (col_name not in dtype):
162162
if _oracledb_found:
163-
col_values = oracle.handle_oracle_objects(col_values, col_name) # ruff: noqa: PLW2901
163+
col_values = oracle.handle_oracle_objects(col_values, col_name) # type: ignore[arg-type,assignment] # noqa: PLW2901
164164
try:
165165
array: pa.Array = pa.array(obj=col_values, safe=safe) # Creating Arrow array
166166
except pa.ArrowInvalid as ex:
@@ -169,7 +169,7 @@ def _records2df(
169169
try:
170170
if _oracledb_found:
171171
if _should_handle_oracle_objects(dtype[col_name]):
172-
col_values = oracle.handle_oracle_objects(col_values, col_name, dtype)
172+
col_values = oracle.handle_oracle_objects(col_values, col_name, dtype) # type: ignore[arg-type,assignment] # noqa: PLW2901
173173
array = pa.array(obj=col_values, type=dtype[col_name], safe=safe) # Creating Arrow array with dtype
174174
except (pa.ArrowInvalid, pa.ArrowTypeError):
175175
array = pa.array(obj=col_values, safe=safe) # Creating Arrow array

awswrangler/_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def decorator(func: FunctionType) -> FunctionType:
165165

166166
@wraps(func)
167167
def inner(*args: Any, **kwargs: Any) -> Any:
168-
passed_unsupported_kwargs = set(unsupported_kwargs).intersection( # type: ignore
168+
passed_unsupported_kwargs = set(unsupported_kwargs).intersection(
169169
set([key for key, value in kwargs.items() if value is not None])
170170
)
171171

@@ -620,7 +620,7 @@ def ensure_cpu_count(use_threads: Union[bool, int] = True) -> int:
620620
1
621621
622622
"""
623-
if type(use_threads) == int: # pylint: disable=unidiomatic-typecheck
623+
if type(use_threads) == int: # pylint: disable=unidiomatic-typecheck # noqa: E721
624624
if use_threads < 1:
625625
return 1
626626
return use_threads
@@ -736,7 +736,7 @@ def get_credentials_from_session(
736736
) -> botocore.credentials.ReadOnlyCredentials:
737737
"""Get AWS credentials from boto3 session."""
738738
session: boto3.Session = ensure_session(session=boto3_session)
739-
credentials: botocore.credentials.Credentials = session.get_credentials()
739+
credentials: botocore.credentials.Credentials = session.get_credentials() # type: ignore[assignment]
740740
frozen_credentials: botocore.credentials.ReadOnlyCredentials = credentials.get_frozen_credentials()
741741
return frozen_credentials
742742

awswrangler/athena/_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def update_cache(self, items: List[Dict[str, Any]]) -> None:
5050
if oldest_item:
5151
items = list(
5252
filter(
53-
lambda x: x["Status"]["SubmissionDateTime"] > oldest_item["Status"]["SubmissionDateTime"], # type: ignore[arg-type]
53+
lambda x: x["Status"]["SubmissionDateTime"] > oldest_item["Status"]["SubmissionDateTime"],
5454
items,
5555
)
5656
)

awswrangler/athena/_read.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def _add_query_metadata_generator(
7474
) -> Iterator[pd.DataFrame]:
7575
"""Add Query Execution metadata to every DF in iterator."""
7676
for df in dfs:
77-
df = _apply_query_metadata(df=df, query_metadata=query_metadata) # ruff: noqa: PLW2901
77+
df = _apply_query_metadata(df=df, query_metadata=query_metadata) # noqa: PLW2901
7878
yield df
7979

8080

awswrangler/athena/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def _parse_describe_table(df: pd.DataFrame) -> pd.DataFrame:
197197
origin_df_dict = df.to_dict()
198198
target_df_dict: Dict[str, List[Union[str, bool]]] = {"Column Name": [], "Type": [], "Partition": [], "Comment": []}
199199
for index, col_name in origin_df_dict["col_name"].items():
200-
col_name = col_name.strip() # ruff: noqa: PLW2901
200+
col_name = col_name.strip() # noqa: PLW2901
201201
if col_name.startswith("#") or not col_name:
202202
pass
203203
elif col_name in target_df_dict["Column Name"]:

awswrangler/cleanrooms/_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def wait_query(
2929
Protected query execution ID
3030
boto3_session : boto3.Session, optional
3131
Boto3 Session. If None, the default boto3 session is used
32+
3233
Returns
3334
-------
3435
Dict[str, Any]

awswrangler/data_api/rds.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def _execute_statement(
165165
def function(sql: str) -> "ExecuteStatementResponseTypeDef":
166166
return self.client.execute_statement(
167167
resourceArn=self.resource_arn,
168-
database=database, # type: ignore[arg-type]
168+
database=database,
169169
sql=sql,
170170
secretArn=self.secret_arn,
171171
includeResultMetadata=True,
@@ -196,7 +196,7 @@ def _batch_execute_statement(
196196
def function(sql: str) -> "BatchExecuteStatementResponseTypeDef":
197197
return self.client.batch_execute_statement(
198198
resourceArn=self.resource_arn,
199-
database=database, # type: ignore[arg-type]
199+
database=database,
200200
sql=sql,
201201
secretArn=self.secret_arn,
202202
**additional_kwargs,
@@ -363,7 +363,7 @@ def _generate_parameters(columns: List[str], values: List[Any]) -> List[Dict[str
363363
parameter_list = []
364364

365365
for col, value in zip(columns, values):
366-
value, type_hint = _create_value_dict(value) # ruff: noqa: PLW2901
366+
value, type_hint = _create_value_dict(value) # noqa: PLW2901
367367

368368
parameter = {
369369
"name": col,

awswrangler/distributed/ray/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def _estimate_available_parallelism() -> int:
5353

5454

5555
def ensure_worker_count(use_threads: Union[bool, int] = True) -> int:
56-
if type(use_threads) == int: # pylint: disable=unidiomatic-typecheck
56+
if type(use_threads) == int: # pylint: disable=unidiomatic-typecheck # noqa: E721
5757
if use_threads < 1:
5858
return 1
5959
return use_threads

awswrangler/distributed/ray/datasources/arrow_parquet_datasource.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def _get_file_suffix(self, file_format: str, compression: Optional[str]) -> str:
119119
# raw pyarrow file fragment causes S3 network calls.
120120
class _SerializedPiece:
121121
def __init__(self, frag: ParquetFileFragment):
122-
self._data = cloudpickle.dumps( # type: ignore[attr-defined]
122+
self._data = cloudpickle.dumps( # type: ignore[attr-defined,no-untyped-call]
123123
(frag.format, frag.path, frag.filesystem, frag.partition_expression)
124124
)
125125

awswrangler/distributed/ray/modin/_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def _validate_partition_shape(df: pd.DataFrame) -> bool:
2727
"""
2828
# Unwrap partitions as they are currently stored (axis=None)
2929
partitions_shape = np.array(unwrap_partitions(df)).shape
30-
return partitions_shape[1] == 1
30+
return partitions_shape[1] == 1 # type: ignore[no-any-return,unused-ignore]
3131

3232

3333
FunctionType = TypeVar("FunctionType", bound=Callable[..., Any])

awswrangler/dynamodb/_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from mypy_boto3_dynamodb.type_defs import (
2020
AttributeValueTypeDef,
2121
ExecuteStatementOutputTypeDef,
22-
KeySchemaElementTableTypeDef,
22+
KeySchemaElementTypeDef,
2323
WriteRequestTypeDef,
2424
)
2525

@@ -180,7 +180,7 @@ def execute_statement(
180180

181181

182182
def _validate_items(
183-
items: Union[List[Dict[str, Any]], List[Mapping[str, Any]]], key_schema: List["KeySchemaElementTableTypeDef"]
183+
items: Union[List[Dict[str, Any]], List[Mapping[str, Any]]], key_schema: List["KeySchemaElementTypeDef"]
184184
) -> None:
185185
"""
186186
Validate if all items have the required keys for the Amazon DynamoDB table.

awswrangler/dynamodb/_write.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
if TYPE_CHECKING:
2222
from mypy_boto3_dynamodb.client import DynamoDBClient
23-
from mypy_boto3_dynamodb.type_defs import KeySchemaElementTableTypeDef
23+
from mypy_boto3_dynamodb.type_defs import KeySchemaElementTypeDef
2424

2525

2626
_logger: logging.Logger = logging.getLogger(__name__)
@@ -139,7 +139,7 @@ def _put_df(
139139
dynamodb_client: Optional["DynamoDBClient"],
140140
df: pd.DataFrame,
141141
table_name: str,
142-
key_schema: List["KeySchemaElementTableTypeDef"],
142+
key_schema: List["KeySchemaElementTypeDef"],
143143
) -> None:
144144
items: List[Mapping[str, Any]] = [v.dropna().to_dict() for _, v in df.iterrows()]
145145

@@ -214,7 +214,7 @@ def _put_items(
214214
dynamodb_client: Optional["DynamoDBClient"],
215215
items: Union[List[Dict[str, Any]], List[Mapping[str, Any]]],
216216
table_name: str,
217-
key_schema: List["KeySchemaElementTableTypeDef"],
217+
key_schema: List["KeySchemaElementTypeDef"],
218218
) -> None:
219219
_logger.debug("Inserting %d items", len(items))
220220
_validate_items(items=items, key_schema=key_schema)

awswrangler/emr.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,7 @@ def create_cluster( # pylint: disable=too-many-arguments,too-many-locals,unused
663663
664664
By default, adds log4j config as follows:
665665
`{"Classification": "spark-log4j", "Properties": {"log4j.rootCategory": f"{pars['spark_log_level']}, console"}}`
666+
666667
Returns
667668
-------
668669
str

awswrangler/neptune/_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import boto3
99
from botocore.auth import SigV4Auth
1010
from botocore.awsrequest import AWSPreparedRequest, AWSRequest
11+
from botocore.credentials import Credentials
1112
from typing_extensions import Literal, NotRequired
1213

1314
import awswrangler.neptune._gremlin_init as gremlin
@@ -126,7 +127,7 @@ def _get_aws_request(
126127
) -> Union[AWSRequest, AWSPreparedRequest]:
127128
req = AWSRequest(method=method, url=url, data=data, params=params, headers=headers)
128129
if self.iam_enabled:
129-
credentials = self.boto3_session.get_credentials()
130+
credentials: Credentials = self.boto3_session.get_credentials() # type: ignore[assignment]
130131
try:
131132
frozen_creds = credentials.get_frozen_credentials()
132133
except AttributeError:

awswrangler/neptune/_gremlin_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def _parse_dict(data: Any) -> Any:
6868
for k, v in data.items():
6969
# If the key is a Vertex or an Edge do special processing
7070
if isinstance(k, (gremlin.Vertex, gremlin.Edge)):
71-
k = k.id # ruff: noqa: PLW2901
71+
k = k.id # noqa: PLW2901
7272

7373
# If the value is a list do special processing to make it a scalar if the list is of length 1
7474
if isinstance(v, list) and len(v) == 1:

awswrangler/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ def to_sql(
603603
df=df, column_placeholders=column_placeholders, chunksize=chunksize
604604
)
605605
for _, parameters in placeholder_parameter_pair_generator:
606-
parameters = list(zip(*[iter(parameters)] * len(df.columns))) # ruff: noqa: PLW2901
606+
parameters = list(zip(*[iter(parameters)] * len(df.columns))) # noqa: PLW2901
607607
_logger.debug("sql: %s", sql)
608608
cursor.executemany(sql, parameters)
609609

awswrangler/s3/_copy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def _copy_objects(
3939
CopySource=copy_source,
4040
Bucket=target_bucket,
4141
Key=target_key,
42-
ExtraArgs=s3_additional_kwargs, # type: ignore[arg-type]
42+
ExtraArgs=s3_additional_kwargs,
4343
Config=TransferConfig(num_download_attempts=10, use_threads=use_threads), # type: ignore[arg-type]
4444
)
4545

awswrangler/s3/_read.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ def _ensure_locations_are_valid(paths: Iterable[str]) -> Iterator[str]:
325325
# If the suffix looks like a partition,
326326
if suffix and (suffix.count("=") == 1):
327327
# the path should end in a '/' character.
328-
path = f"{path}/" # ruff: noqa: PLW2901
328+
path = f"{path}/" # noqa: PLW2901
329329
yield path
330330

331331

awswrangler/s3/_select.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ def _select_object_content(
5656
for event in response["Payload"]:
5757
if "Records" in event:
5858
records = (
59-
event["Records"]["Payload"] # type: ignore[index]
60-
.decode( # type: ignore[attr-defined]
59+
event["Records"]["Payload"]
60+
.decode(
6161
encoding="utf-8",
6262
errors="ignore",
6363
)

awswrangler/s3/_write_dataset.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,16 @@ def _to_partitions(
149149
s3_client = client(service_name="s3", session=boto3_session)
150150
for keys, subgroup in df.groupby(by=partition_cols, observed=True):
151151
# Keys are either a primitive type or a tuple if partitioning by multiple cols
152-
keys = (keys,) if not isinstance(keys, tuple) else keys # ruff: noqa: PLW2901
152+
keys = (keys,) if not isinstance(keys, tuple) else keys # noqa: PLW2901
153153
# Drop partition columns from df
154154
subgroup.drop(
155155
columns=[col for col in partition_cols if col in subgroup.columns],
156156
inplace=True,
157-
) # ruff: noqa: PLW2901
157+
) # noqa: PLW2901
158158
# Drop index levels if partitioning by index columns
159-
subgroup = subgroup.droplevel(level=[col for col in partition_cols if col in subgroup.index.names])
159+
subgroup = subgroup.droplevel( # noqa: PLW2901
160+
level=[col for col in partition_cols if col in subgroup.index.names]
161+
)
160162
prefix = _delete_objects(
161163
keys=keys,
162164
path_root=path_root,

building/build-lambda-layers.sh

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,44 +4,74 @@ set -ex
44
VERSION=$(poetry version --short)
55
DIR_NAME=$(dirname "$PWD")
66

7+
PYTHON_VERSION=${1:-ALL}
8+
79
ARCH=$(arch)
810
[ "${ARCH}" = "aarch64" ] && ARCH_SUFFIX="-arm64" # AWS Lambda, the name arm64 is used instead of aarch64
911

10-
echo "Building Lambda Layers for AWS SDK for pandas ${VERSION}"
12+
if [[ $PYTHON_VERSION == "ALL" ]]
13+
then
14+
echo "Building Lambda Layers for AWS SDK for pandas ${VERSION} (ALL supported Python versions)"
15+
else
16+
echo "Building Lambda Layers for AWS SDK for pandas ${VERSION} (ONLY Python $PYTHON_VERSION)"
17+
fi
1118

1219
pushd lambda
1320

1421
# Building all related docker images
15-
./build-docker-images.sh
22+
./build-docker-images.sh $PYTHON_VERSION
1623

1724
# Python 3.8
18-
docker run \
19-
--volume "$DIR_NAME":/aws-sdk-pandas/ \
20-
--workdir /aws-sdk-pandas/building/lambda \
21-
--rm \
22-
awswrangler-build-py38 \
23-
build-lambda-layer.sh "${VERSION}-py3.8${ARCH_SUFFIX}" "ninja-build"
25+
if [[ $PYTHON_VERSION == "ALL" || $PYTHON_VERSION == "3.8" ]]
26+
then
27+
docker run \
28+
--volume "$DIR_NAME":/aws-sdk-pandas/ \
29+
--workdir /aws-sdk-pandas/building/lambda \
30+
--rm \
31+
awswrangler-build-py38 \
32+
build-lambda-layer.sh "${VERSION}-py3.8${ARCH_SUFFIX}" "ninja-build"
33+
fi
2434

2535
# Python 3.9
26-
docker run \
27-
--volume "$DIR_NAME":/aws-sdk-pandas/ \
28-
--workdir /aws-sdk-pandas/building/lambda \
29-
--rm \
30-
awswrangler-build-py39 \
31-
build-lambda-layer.sh "${VERSION}-py3.9${ARCH_SUFFIX}" "ninja-build"
36+
if [[ $PYTHON_VERSION == "ALL" || $PYTHON_VERSION == "3.9" ]]
37+
then
38+
docker run \
39+
--volume "$DIR_NAME":/aws-sdk-pandas/ \
40+
--workdir /aws-sdk-pandas/building/lambda \
41+
--rm \
42+
awswrangler-build-py39 \
43+
build-lambda-layer.sh "${VERSION}-py3.9${ARCH_SUFFIX}" "ninja-build"
44+
fi
3245

3346
# Python 3.10
34-
docker run \
35-
--volume "$DIR_NAME":/aws-sdk-pandas/ \
36-
--workdir /aws-sdk-pandas/building/lambda \
37-
--rm \
38-
awswrangler-build-py310 \
39-
build-lambda-layer.sh "${VERSION}-py3.10${ARCH_SUFFIX}" "ninja-build"
47+
if [[ $PYTHON_VERSION == "ALL" || $PYTHON_VERSION == "3.10" ]]
48+
then
49+
docker run \
50+
--volume "$DIR_NAME":/aws-sdk-pandas/ \
51+
--workdir /aws-sdk-pandas/building/lambda \
52+
--rm \
53+
awswrangler-build-py310 \
54+
build-lambda-layer.sh "${VERSION}-py3.10${ARCH_SUFFIX}" "ninja-build"
55+
fi
4056

4157
# Python 3.11
42-
docker run \
43-
--volume "$DIR_NAME":/aws-sdk-pandas/ \
44-
--workdir /aws-sdk-pandas/building/lambda \
45-
--rm \
46-
awswrangler-build-py311 \
47-
build-lambda-layer.sh "${VERSION}-py3.11${ARCH_SUFFIX}" "ninja-build"
58+
if [[ $PYTHON_VERSION == "ALL" || $PYTHON_VERSION == "3.11" ]]
59+
then
60+
docker run \
61+
--volume "$DIR_NAME":/aws-sdk-pandas/ \
62+
--workdir /aws-sdk-pandas/building/lambda \
63+
--rm \
64+
awswrangler-build-py311 \
65+
build-lambda-layer.sh "${VERSION}-py3.11${ARCH_SUFFIX}" "ninja-build"
66+
fi
67+
68+
# Python 3.12
69+
if [[ $PYTHON_VERSION == "ALL" || $PYTHON_VERSION == "3.12" ]]
70+
then
71+
docker run \
72+
--volume "$DIR_NAME":/aws-sdk-pandas/ \
73+
--workdir /aws-sdk-pandas/building/lambda \
74+
--rm \
75+
awswrangler-build-py312 \
76+
build-lambda-layer.sh "${VERSION}-py3.12${ARCH_SUFFIX}" "ninja-build"
77+
fi

0 commit comments

Comments
 (0)