Skip to content

Commit 19aaef4

Browse files
author
awstools
committed
feat(client-glue): AWS Glue now supports sort and z-order strategy for managed automated compaction for Iceberg tables in addition to binpack.
1 parent abce1d6 commit 19aaef4

15 files changed

+496
-299
lines changed

clients/client-glue/src/commands/BatchGetTableOptimizerCommand.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ export interface BatchGetTableOptimizerCommandOutput extends BatchGetTableOptimi
6161
* // vpcConfiguration: { // TableOptimizerVpcConfiguration Union: only one key present
6262
* // glueConnectionName: "STRING_VALUE",
6363
* // },
64+
* // compactionConfiguration: { // CompactionConfiguration
65+
* // icebergConfiguration: { // IcebergCompactionConfiguration
66+
* // strategy: "binpack" || "sort" || "z-order",
67+
* // },
68+
* // },
6469
* // retentionConfiguration: { // RetentionConfiguration
6570
* // icebergConfiguration: { // IcebergRetentionConfiguration
6671
* // snapshotRetentionPeriodInDays: Number("int"),
@@ -95,6 +100,7 @@ export interface BatchGetTableOptimizerCommandOutput extends BatchGetTableOptimi
95100
* // JobDurationInHour: Number("double"),
96101
* // },
97102
* // },
103+
* // compactionStrategy: "binpack" || "sort" || "z-order",
98104
* // retentionMetrics: { // RetentionMetrics
99105
* // IcebergMetrics: { // IcebergRetentionMetrics
100106
* // NumberOfDataFilesDeleted: Number("long"),

clients/client-glue/src/commands/CreateTableOptimizerCommand.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ export interface CreateTableOptimizerCommandOutput extends CreateTableOptimizerR
4646
* vpcConfiguration: { // TableOptimizerVpcConfiguration Union: only one key present
4747
* glueConnectionName: "STRING_VALUE",
4848
* },
49+
* compactionConfiguration: { // CompactionConfiguration
50+
* icebergConfiguration: { // IcebergCompactionConfiguration
51+
* strategy: "binpack" || "sort" || "z-order",
52+
* },
53+
* },
4954
* retentionConfiguration: { // RetentionConfiguration
5055
* icebergConfiguration: { // IcebergRetentionConfiguration
5156
* snapshotRetentionPeriodInDays: Number("int"),

clients/client-glue/src/commands/GetColumnStatisticsForTableCommand.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
88
import { GlueClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../GlueClient";
9-
import { GetColumnStatisticsForTableRequest, GetColumnStatisticsForTableResponse } from "../models/models_1";
9+
import { GetColumnStatisticsForTableRequest } from "../models/models_1";
10+
import { GetColumnStatisticsForTableResponse } from "../models/models_2";
1011
import { de_GetColumnStatisticsForTableCommand, se_GetColumnStatisticsForTableCommand } from "../protocols/Aws_json1_1";
1112

1213
/**

clients/client-glue/src/commands/GetColumnStatisticsTaskRunCommand.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
88
import { GlueClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../GlueClient";
9-
import { GetColumnStatisticsTaskRunRequest } from "../models/models_1";
10-
import { GetColumnStatisticsTaskRunResponse } from "../models/models_2";
9+
import { GetColumnStatisticsTaskRunRequest, GetColumnStatisticsTaskRunResponse } from "../models/models_2";
1110
import { de_GetColumnStatisticsTaskRunCommand, se_GetColumnStatisticsTaskRunCommand } from "../protocols/Aws_json1_1";
1211

1312
/**

clients/client-glue/src/commands/GetTableOptimizerCommand.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ export interface GetTableOptimizerCommandOutput extends GetTableOptimizerRespons
5555
* // vpcConfiguration: { // TableOptimizerVpcConfiguration Union: only one key present
5656
* // glueConnectionName: "STRING_VALUE",
5757
* // },
58+
* // compactionConfiguration: { // CompactionConfiguration
59+
* // icebergConfiguration: { // IcebergCompactionConfiguration
60+
* // strategy: "binpack" || "sort" || "z-order",
61+
* // },
62+
* // },
5863
* // retentionConfiguration: { // RetentionConfiguration
5964
* // icebergConfiguration: { // IcebergRetentionConfiguration
6065
* // snapshotRetentionPeriodInDays: Number("int"),
@@ -89,6 +94,7 @@ export interface GetTableOptimizerCommandOutput extends GetTableOptimizerRespons
8994
* // JobDurationInHour: Number("double"),
9095
* // },
9196
* // },
97+
* // compactionStrategy: "binpack" || "sort" || "z-order",
9298
* // retentionMetrics: { // RetentionMetrics
9399
* // IcebergMetrics: { // IcebergRetentionMetrics
94100
* // NumberOfDataFilesDeleted: Number("long"),

clients/client-glue/src/commands/ListTableOptimizerRunsCommand.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ export interface ListTableOptimizerRunsCommandOutput extends ListTableOptimizerR
7171
* // JobDurationInHour: Number("double"),
7272
* // },
7373
* // },
74+
* // compactionStrategy: "binpack" || "sort" || "z-order",
7475
* // retentionMetrics: { // RetentionMetrics
7576
* // IcebergMetrics: { // IcebergRetentionMetrics
7677
* // NumberOfDataFilesDeleted: Number("long"),

clients/client-glue/src/commands/RemoveSchemaVersionMetadataCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
88
import { GlueClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../GlueClient";
9-
import { RemoveSchemaVersionMetadataInput, RemoveSchemaVersionMetadataResponse } from "../models/models_2";
9+
import { RemoveSchemaVersionMetadataInput, RemoveSchemaVersionMetadataResponse } from "../models/models_3";
1010
import { de_RemoveSchemaVersionMetadataCommand, se_RemoveSchemaVersionMetadataCommand } from "../protocols/Aws_json1_1";
1111

1212
/**

clients/client-glue/src/commands/ResetJobBookmarkCommand.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
88
import { GlueClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../GlueClient";
9-
import { ResetJobBookmarkRequest } from "../models/models_2";
10-
import { ResetJobBookmarkResponse } from "../models/models_3";
9+
import { ResetJobBookmarkRequest, ResetJobBookmarkResponse } from "../models/models_3";
1110
import { de_ResetJobBookmarkCommand, se_ResetJobBookmarkCommand } from "../protocols/Aws_json1_1";
1211

1312
/**

clients/client-glue/src/commands/UpdateTableOptimizerCommand.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ export interface UpdateTableOptimizerCommandOutput extends UpdateTableOptimizerR
4646
* vpcConfiguration: { // TableOptimizerVpcConfiguration Union: only one key present
4747
* glueConnectionName: "STRING_VALUE",
4848
* },
49+
* compactionConfiguration: { // CompactionConfiguration
50+
* icebergConfiguration: { // IcebergCompactionConfiguration
51+
* strategy: "binpack" || "sort" || "z-order",
52+
* },
53+
* },
4954
* retentionConfiguration: { // RetentionConfiguration
5055
* icebergConfiguration: { // IcebergRetentionConfiguration
5156
* snapshotRetentionPeriodInDays: Number("int"),

clients/client-glue/src/models/models_0.ts

Lines changed: 99 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -8075,6 +8075,69 @@ export interface BatchGetTableOptimizerError {
80758075
type?: TableOptimizerType | undefined;
80768076
}
80778077

8078+
/**
8079+
* @public
8080+
* @enum
8081+
*/
8082+
export const CompactionStrategy = {
8083+
BINPACK: "binpack",
8084+
SORT: "sort",
8085+
ZORDER: "z-order",
8086+
} as const;
8087+
8088+
/**
8089+
* @public
8090+
*/
8091+
export type CompactionStrategy = (typeof CompactionStrategy)[keyof typeof CompactionStrategy];
8092+
8093+
/**
8094+
* <p>The configuration for an Iceberg compaction optimizer. This configuration defines parameters for optimizing the layout of data files in Iceberg tables.</p>
8095+
* @public
8096+
*/
8097+
export interface IcebergCompactionConfiguration {
8098+
/**
8099+
* <p>The strategy to use for compaction. Valid values are:</p>
8100+
* <ul>
8101+
* <li>
8102+
* <p>
8103+
* <code>binpack</code>: Combines small files into larger files, typically targeting sizes over 100MB, while applying any pending deletes.
8104+
* This is the recommended compaction strategy for most use cases.
8105+
* </p>
8106+
* </li>
8107+
* <li>
8108+
* <p>
8109+
* <code>sort</code>: Organizes data based on specified columns which are sorted hierarchically during compaction, improving query
8110+
* performance for filtered operations. This strategy is recommended when your queries frequently filter on specific columns. To use this strategy,
8111+
* you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property.</p>
8112+
* </li>
8113+
* <li>
8114+
* <p>
8115+
* <code>z-order</code>: Optimizes data organization by blending multiple attributes into a single scalar value that can be used for sorting,
8116+
* allowing efficient querying across multiple dimensions. This strategy is recommended when you need to query data across multiple dimensions
8117+
* simultaneously. To use this strategy, you must first define a sort order in your Iceberg table properties using the
8118+
* <code>sort_order</code> table property.
8119+
* </p>
8120+
* </li>
8121+
* </ul>
8122+
* <p>If an input is not provided, the default value 'binpack' will be used.</p>
8123+
* @public
8124+
*/
8125+
strategy?: CompactionStrategy | undefined;
8126+
}
8127+
8128+
/**
8129+
* <p>The configuration for a compaction optimizer. This configuration defines how data files in your table will be compacted to improve
8130+
* query performance and reduce storage costs.</p>
8131+
* @public
8132+
*/
8133+
export interface CompactionConfiguration {
8134+
/**
8135+
* <p>The configuration for an Iceberg compaction optimizer.</p>
8136+
* @public
8137+
*/
8138+
icebergConfiguration?: IcebergCompactionConfiguration | undefined;
8139+
}
8140+
80788141
/**
80798142
* <p>The configuration for an Iceberg orphan file deletion optimizer.</p>
80808143
* @public
@@ -8206,6 +8269,13 @@ export interface TableOptimizerConfiguration {
82068269
*/
82078270
vpcConfiguration?: TableOptimizerVpcConfiguration | undefined;
82088271

8272+
/**
8273+
* <p>The configuration for a compaction optimizer. This configuration defines how data files in your table will be compacted to
8274+
* improve query performance and reduce storage costs.</p>
8275+
* @public
8276+
*/
8277+
compactionConfiguration?: CompactionConfiguration | undefined;
8278+
82098279
/**
82108280
* <p>The configuration for a snapshot retention optimizer.</p>
82118281
* @public
@@ -8454,6 +8524,35 @@ export interface TableOptimizerRun {
84548524
*/
84558525
compactionMetrics?: CompactionMetrics | undefined;
84568526

8527+
/**
8528+
* <p>The strategy used for the compaction run. Indicates which algorithm was applied to determine how files were selected and combined during the
8529+
* compaction process. Valid values are:</p>
8530+
* <ul>
8531+
* <li>
8532+
* <p>
8533+
* <code>binpack</code>: Combines small files into larger files, typically targeting sizes over 100MB, while applying any pending deletes.
8534+
* This is the recommended compaction strategy for most use cases.
8535+
* </p>
8536+
* </li>
8537+
* <li>
8538+
* <p>
8539+
* <code>sort</code>: Organizes data based on specified columns which are sorted hierarchically during compaction, improving query
8540+
* performance for filtered operations. This strategy is recommended when your queries frequently filter on specific columns. To use this strategy,
8541+
* you must first define a sort order in your Iceberg table properties using the <code>sort_order</code> table property.</p>
8542+
* </li>
8543+
* <li>
8544+
* <p>
8545+
* <code>z-order</code>: Optimizes data organization by blending multiple attributes into a single scalar value that can be used for sorting,
8546+
* allowing efficient querying across multiple dimensions. This strategy is recommended when you need to query data across multiple dimensions
8547+
* simultaneously. To use this strategy, you must first define a sort order in your Iceberg table properties using the
8548+
* <code>sort_order</code> table property.
8549+
* </p>
8550+
* </li>
8551+
* </ul>
8552+
* @public
8553+
*/
8554+
compactionStrategy?: CompactionStrategy | undefined;
8555+
84578556
/**
84588557
* <p>A <code>RetentionMetrics</code> object containing metrics for the optimizer run.</p>
84598558
* @public
@@ -9383,150 +9482,6 @@ export interface StartingEventBatchCondition {
93839482
BatchWindow?: number | undefined;
93849483
}
93859484

9386-
/**
9387-
* <p>Workflow run statistics provides statistics about the workflow run.</p>
9388-
* @public
9389-
*/
9390-
export interface WorkflowRunStatistics {
9391-
/**
9392-
* <p>Total number of Actions in the workflow run.</p>
9393-
* @public
9394-
*/
9395-
TotalActions?: number | undefined;
9396-
9397-
/**
9398-
* <p>Total number of Actions that timed out.</p>
9399-
* @public
9400-
*/
9401-
TimeoutActions?: number | undefined;
9402-
9403-
/**
9404-
* <p>Total number of Actions that have failed.</p>
9405-
* @public
9406-
*/
9407-
FailedActions?: number | undefined;
9408-
9409-
/**
9410-
* <p>Total number of Actions that have stopped.</p>
9411-
* @public
9412-
*/
9413-
StoppedActions?: number | undefined;
9414-
9415-
/**
9416-
* <p>Total number of Actions that have succeeded.</p>
9417-
* @public
9418-
*/
9419-
SucceededActions?: number | undefined;
9420-
9421-
/**
9422-
* <p>Total number Actions in running state.</p>
9423-
* @public
9424-
*/
9425-
RunningActions?: number | undefined;
9426-
9427-
/**
9428-
* <p>Indicates the count of job runs in the ERROR state in the workflow run.</p>
9429-
* @public
9430-
*/
9431-
ErroredActions?: number | undefined;
9432-
9433-
/**
9434-
* <p>Indicates the count of job runs in WAITING state in the workflow run.</p>
9435-
* @public
9436-
*/
9437-
WaitingActions?: number | undefined;
9438-
}
9439-
9440-
/**
9441-
* @public
9442-
* @enum
9443-
*/
9444-
export const WorkflowRunStatus = {
9445-
COMPLETED: "COMPLETED",
9446-
ERROR: "ERROR",
9447-
RUNNING: "RUNNING",
9448-
STOPPED: "STOPPED",
9449-
STOPPING: "STOPPING",
9450-
} as const;
9451-
9452-
/**
9453-
* @public
9454-
*/
9455-
export type WorkflowRunStatus = (typeof WorkflowRunStatus)[keyof typeof WorkflowRunStatus];
9456-
9457-
/**
9458-
* <p>A workflow run is an execution of a workflow providing all the runtime information.</p>
9459-
* @public
9460-
*/
9461-
export interface WorkflowRun {
9462-
/**
9463-
* <p>Name of the workflow that was run.</p>
9464-
* @public
9465-
*/
9466-
Name?: string | undefined;
9467-
9468-
/**
9469-
* <p>The ID of this workflow run.</p>
9470-
* @public
9471-
*/
9472-
WorkflowRunId?: string | undefined;
9473-
9474-
/**
9475-
* <p>The ID of the previous workflow run.</p>
9476-
* @public
9477-
*/
9478-
PreviousRunId?: string | undefined;
9479-
9480-
/**
9481-
* <p>The workflow run properties which were set during the run.</p>
9482-
* @public
9483-
*/
9484-
WorkflowRunProperties?: Record<string, string> | undefined;
9485-
9486-
/**
9487-
* <p>The date and time when the workflow run was started.</p>
9488-
* @public
9489-
*/
9490-
StartedOn?: Date | undefined;
9491-
9492-
/**
9493-
* <p>The date and time when the workflow run completed.</p>
9494-
* @public
9495-
*/
9496-
CompletedOn?: Date | undefined;
9497-
9498-
/**
9499-
* <p>The status of the workflow run.</p>
9500-
* @public
9501-
*/
9502-
Status?: WorkflowRunStatus | undefined;
9503-
9504-
/**
9505-
* <p>This error message describes any error that may have occurred in starting the workflow run. Currently the only error message is "Concurrent runs exceeded for workflow: <code>foo</code>."</p>
9506-
* @public
9507-
*/
9508-
ErrorMessage?: string | undefined;
9509-
9510-
/**
9511-
* <p>The statistics of the run.</p>
9512-
* @public
9513-
*/
9514-
Statistics?: WorkflowRunStatistics | undefined;
9515-
9516-
/**
9517-
* <p>The graph representing all the Glue components that belong to the workflow as nodes and directed
9518-
* connections between them as edges.</p>
9519-
* @public
9520-
*/
9521-
Graph?: WorkflowGraph | undefined;
9522-
9523-
/**
9524-
* <p>The batch condition that started the workflow run.</p>
9525-
* @public
9526-
*/
9527-
StartingEventBatchCondition?: StartingEventBatchCondition | undefined;
9528-
}
9529-
95309485
/**
95319486
* @internal
95329487
*/

0 commit comments

Comments
 (0)