From d09157d1b8f53c5044c68efad79be60e7ef23713 Mon Sep 17 00:00:00 2001 From: wulin Date: Mon, 17 Mar 2025 09:52:07 +0800 Subject: [PATCH 1/5] [FLINK-35459] Use Incremental Source Framework in Flink CDC TiKV Source Connector # Conflicts: # flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml # flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java # flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java # flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java --- .../flink-connector-tidb-cdc/pom.xml | 40 +- .../Listeners/TiDBAntlrDdlParserListener.java | 115 +++ .../connector/tidb/TiDBAntlrDdlParser.java | 313 ++++++++ .../tidb/TiDBEventMetadataProvider.java | 50 ++ .../connector/tidb/TiDBPartition.java | 43 ++ .../connector/tidb/TiDBTaskContext.java | 27 + .../connector/tidb/TidbTopicSelector.java | 36 + .../cdc/connectors/tidb/TDBSourceOptions.java | 109 --- .../flink/cdc/connectors/tidb/TiDBSource.java | 91 --- .../TiKVChangeEventDeserializationSchema.java | 40 - .../tidb/TiKVRichParallelSourceFunction.java | 420 ---------- ...iKVSnapshotEventDeserializationSchema.java | 40 - .../tidb/metrics/TiDBSourceMetrics.java | 9 +- .../connectors/tidb/source/TiDBDialect.java | 214 ++++++ .../tidb/source/TiDBSourceBuilder.java | 203 +++++ .../source/config/TiDBConnectorConfig.java | 422 ++++++++++ .../tidb/source/config/TiDBSourceConfig.java | 140 ++++ .../config/TiDBSourceConfigFactory.java | 154 ++++ .../tidb/source/config/TiDBSourceOptions.java | 82 ++ .../source/connection/TiDBConnection.java | 526 +++++++++++++ .../TiDBConnectionPoolFactory.java} | 31 +- .../converter/TiDBDefaultValueConverter.java | 505 ++++++++++++ .../source/converter/TiDBValueConverters.java | 724 ++++++++++++++++++ .../tidb/source/fetch/EventEmitter.java | 60 ++ .../tidb/source/fetch/EventSourceReader.java | 69 ++ .../StoppableChangeEventSourceContext.java} | 25 +- .../tidb/source/fetch/TiDBScanFetchTask.java | 311 ++++++++ .../fetch/TiDBSourceFetchTaskContext.java | 238 ++++++ .../source/fetch/TiDBStreamFetchTask.java | 91 +++ .../tidb/source/handler/TiDBErrorHandler.java | 57 ++ .../TiDBSchemaChangeEventHandler.java} | 23 +- .../tidb/source/offset/EventOffset.java | 121 +++ .../source/offset/EventOffsetContext.java | 210 +++++ .../source/offset/EventOffsetFactory.java | 58 ++ .../tidb/source/offset/EventOffsetUtils.java | 44 ++ .../tidb/source/offset/TiDBSourceInfo.java | 93 +++ .../offset/TiDBSourceInfoStructMaker.java | 60 ++ .../source/schema/TiDBDatabaseSchema.java | 310 ++++++++ .../source/schema/TiDBFieldDefinition.java | 96 +++ .../tidb/source/schema/TiDBSchema.java | 207 +++++ .../source/schema/TiDBTableDefinition.java | 65 ++ .../source/splitter/TiDBChunkSplitter.java | 64 ++ ...aTiKVChangeEventDeserializationSchema.java | 111 --- ...ataTiKVEventDeserializationSchemaBase.java | 578 -------------- ...iKVSnapshotEventDeserializationSchema.java | 78 -- .../connectors/tidb/table/StartupOptions.java | 73 -- .../TiDBDeserializationConverterFactory.java | 171 +++++ .../tidb/table/TiDBReadableMetadata.java | 132 ++++ .../tidb/table/TiDBTableFactory.java | 253 ++++++ .../tidb/table/TiDBTableSource.java | 274 ++++--- .../tidb/table/TiDBTableSourceFactory.java | 153 ---- .../table/TiKVAppendMetadataCollector.java | 56 -- .../tidb/table/TiKVMetadataConverter.java | 50 -- .../tidb/table/TiKVReadableMetadata.java | 116 --- .../tidb/table/utils/TableKeyRangeUtils.java | 75 -- .../tidb/utils/TableDiscoveryUtils.java | 52 ++ .../tidb/utils/TiDBConnectionUtils.java | 91 +++ .../cdc/connectors/tidb/utils/TiDBUtils.java | 469 ++++++++++++ .../{table => }/utils/UriHostMapping.java | 2 +- .../org.apache.flink.table.factories.Factory | 2 +- .../table/TiDBTableSourceFactoryTest.java | 128 +++- .../tidb/table/utils/UriHostMappingTest.java | 42 +- 62 files changed, 7234 insertions(+), 2208 deletions(-) create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java rename flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/{table/utils/OptionUtils.java => source/connection/TiDBConnectionPoolFactory.java} (50%) create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java rename flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/{table/StartupMode.java => source/fetch/StoppableChangeEventSourceContext.java} (65%) create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java rename flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/{table/TiKVDeserializationRuntimeConverter.java => source/handler/TiDBSchemaChangeEventHandler.java} (63%) create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java rename flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/{table => }/utils/UriHostMapping.java (98%) diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml index 6d87970920e..957ed190e49 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml @@ -75,12 +75,6 @@ limitations under the License. flink-test-utils ${flink.version} test - - - org.testcontainers - testcontainers - - @@ -88,12 +82,6 @@ limitations under the License. flink-connector-test-utils ${flink.version} test - - - org.testcontainers - testcontainers - - @@ -163,18 +151,32 @@ limitations under the License. test + - org.testcontainers - junit-jupiter - ${testcontainers.version} - test + com.esri.geometry + esri-geometry-api + ${geometry.version} + + + com.fasterxml.jackson.core + jackson-core + + + org.apache.flink - flink-connector-test-util - ${project.version} - test + flink-cdc-base + 3.4-SNAPSHOT + compile + + + io.debezium + debezium-connector-mysql + 1.9.8.Final + compile + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java new file mode 100644 index 00000000000..7fca9cd3f13 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java @@ -0,0 +1,115 @@ +package io.debezium.connector.tidb.Listeners; + +import io.debezium.antlr.AntlrDdlParserListener; +import io.debezium.antlr.ProxyParseTreeListenerUtil; +import io.debezium.connector.mysql.antlr.listener.AlterTableParserListener; +import io.debezium.connector.mysql.antlr.listener.AlterViewParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateAndAlterDatabaseParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateTableParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateUniqueIndexParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateViewParserListener; +import io.debezium.connector.mysql.antlr.listener.DropDatabaseParserListener; +import io.debezium.connector.mysql.antlr.listener.DropTableParserListener; +import io.debezium.connector.mysql.antlr.listener.DropViewParserListener; +import io.debezium.connector.mysql.antlr.listener.RenameTableParserListener; +import io.debezium.connector.mysql.antlr.listener.SetStatementParserListener; +import io.debezium.connector.mysql.antlr.listener.TruncateTableParserListener; +import io.debezium.connector.mysql.antlr.listener.UseStatementParserListener; +import io.debezium.connector.tidb.TiDBAntlrDdlParser; +import io.debezium.ddl.parser.mysql.generated.MySqlParser; +import io.debezium.ddl.parser.mysql.generated.MySqlParserBaseListener; +import io.debezium.text.ParsingException; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.TerminalNode; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; + +public class TiDBAntlrDdlParserListener extends MySqlParserBaseListener + implements AntlrDdlParserListener { + private final List listeners = new CopyOnWriteArrayList<>(); + + /** Flag for skipping phase. */ + private boolean skipNodes; + + /** + * Count of skipped nodes. Each enter event during skipping phase will increase the counter and + * each exit event will decrease it. When counter will be decreased to 0, the skipping phase + * will end. + */ + private int skippedNodesCount = 0; + + /** Collection of catched exceptions. */ + private final Collection errors = new ArrayList<>(); + + public TiDBAntlrDdlParserListener(TiDBAntlrDdlParser parser) { + // initialize listeners + listeners.add(new CreateAndAlterDatabaseParserListener(parser)); + listeners.add(new DropDatabaseParserListener(parser)); + listeners.add(new CreateTableParserListener(parser, listeners)); + listeners.add(new AlterTableParserListener(parser, listeners)); + listeners.add(new DropTableParserListener(parser)); + listeners.add(new RenameTableParserListener(parser)); + listeners.add(new TruncateTableParserListener(parser)); + listeners.add(new CreateViewParserListener(parser, listeners)); + listeners.add(new AlterViewParserListener(parser, listeners)); + listeners.add(new DropViewParserListener(parser)); + listeners.add(new CreateUniqueIndexParserListener(parser)); + listeners.add(new SetStatementParserListener(parser)); + listeners.add(new UseStatementParserListener(parser)); + } + + /** + * Returns all caught errors during tree walk. + * + * @return list of Parsing exceptions + */ + @Override + public Collection getErrors() { + return errors; + } + + @Override + public void enterEveryRule(ParserRuleContext ctx) { + if (skipNodes) { + skippedNodesCount++; + } else { + ProxyParseTreeListenerUtil.delegateEnterRule(ctx, listeners, errors); + } + } + + @Override + public void exitEveryRule(ParserRuleContext ctx) { + if (skipNodes) { + if (skippedNodesCount == 0) { + // back in the node where skipping started + skipNodes = false; + } else { + // going up in a tree, means decreasing a number of skipped nodes + skippedNodesCount--; + } + } else { + ProxyParseTreeListenerUtil.delegateExitRule(ctx, listeners, errors); + } + } + + @Override + public void visitErrorNode(ErrorNode node) { + ProxyParseTreeListenerUtil.visitErrorNode(node, listeners, errors); + } + + @Override + public void visitTerminal(TerminalNode node) { + ProxyParseTreeListenerUtil.visitTerminal(node, listeners, errors); + } + + @Override + public void enterRoutineBody(MySqlParser.RoutineBodyContext ctx) { + // this is a grammar rule for BEGIN ... END part of statements. Skip it. + skipNodes = true; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java new file mode 100644 index 00000000000..00d4d052f8c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java @@ -0,0 +1,313 @@ +package io.debezium.connector.tidb; + +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; + +import io.debezium.antlr.AntlrDdlParserListener; +import io.debezium.antlr.DataTypeResolver; +import io.debezium.connector.mysql.MySqlSystemVariables; +import io.debezium.connector.mysql.antlr.MySqlAntlrDdlParser; +import io.debezium.connector.tidb.Listeners.TiDBAntlrDdlParserListener; +import io.debezium.ddl.parser.mysql.generated.MySqlLexer; +import io.debezium.ddl.parser.mysql.generated.MySqlParser; +import io.debezium.relational.SystemVariables; +import io.debezium.relational.Tables; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; + +import java.sql.Types; +import java.util.Arrays; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +public class TiDBAntlrDdlParser extends MySqlAntlrDdlParser { + private final ConcurrentMap charsetNameForDatabase = new ConcurrentHashMap<>(); + private final TiDBValueConverters converters; + private final Tables.TableFilter tableFilter; + + public TiDBAntlrDdlParser() { + this(null, Tables.TableFilter.includeAll()); + } + + public TiDBAntlrDdlParser(TiDBValueConverters converters) { + this(converters, Tables.TableFilter.includeAll()); + } + + public TiDBAntlrDdlParser(TiDBValueConverters converters, Tables.TableFilter tableFilter) { + this(true, false, false, converters, tableFilter); + } + + public TiDBAntlrDdlParser( + boolean throwErrorsFromTreeWalk, + boolean includeViews, + boolean includeComments, + TiDBValueConverters converters, + Tables.TableFilter tableFilter) { + // super(throwErrorsFromTreeWalk, includeViews, includeComments); + systemVariables = new MySqlSystemVariables(); + this.converters = converters; + this.tableFilter = tableFilter; + } + + @Override + protected ParseTree parseTree(MySqlParser parser) { + return parser.root(); + } + + @Override + protected AntlrDdlParserListener createParseTreeWalkerListener() { + return new TiDBAntlrDdlParserListener(this); + } + + @Override + protected MySqlLexer createNewLexerInstance(CharStream charStreams) { + return new MySqlLexer(charStreams); + } + + @Override + protected MySqlParser createNewParserInstance(CommonTokenStream commonTokenStream) { + return new MySqlParser(commonTokenStream); + } + + @Override + protected SystemVariables createNewSystemVariablesInstance() { + return new MySqlSystemVariables(); + } + + @Override + protected boolean isGrammarInUpperCase() { + return true; + } + + @Override + protected DataTypeResolver initializeDataTypeResolver() { + DataTypeResolver.Builder dataTypeResolverBuilder = new DataTypeResolver.Builder(); + + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.StringDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.CHAR), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.CHAR, MySqlParser.VARYING), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.VARCHAR), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.TINYTEXT), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.TEXT), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.MEDIUMTEXT), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.LONGTEXT), + new DataTypeResolver.DataTypeEntry(Types.NCHAR, MySqlParser.NCHAR), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NCHAR, MySqlParser.VARYING), + new DataTypeResolver.DataTypeEntry(Types.NVARCHAR, MySqlParser.NVARCHAR), + new DataTypeResolver.DataTypeEntry( + Types.CHAR, MySqlParser.CHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.VARCHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.TINYTEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.TEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.MEDIUMTEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.LONGTEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NCHAR, MySqlParser.NCHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NVARCHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.CHARACTER), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.CHARACTER, MySqlParser.VARYING))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.NationalStringDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NATIONAL, MySqlParser.VARCHAR) + .setSuffixTokens(MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NCHAR, MySqlParser.NATIONAL, MySqlParser.CHARACTER) + .setSuffixTokens(MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NCHAR, MySqlParser.VARCHAR) + .setSuffixTokens(MySqlParser.BINARY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.NationalVaryingStringDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, + MySqlParser.NATIONAL, + MySqlParser.CHAR, + MySqlParser.VARYING), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, + MySqlParser.NATIONAL, + MySqlParser.CHARACTER, + MySqlParser.VARYING))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.DimensionDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.TINYINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.INT1) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.SMALLINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.INT2) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.MEDIUMINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INT3) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.MIDDLEINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INTEGER) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INT4) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.BIGINT, MySqlParser.BIGINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.BIGINT, MySqlParser.INT8) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.REAL, MySqlParser.REAL) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.DOUBLE, MySqlParser.DOUBLE) + .setSuffixTokens( + MySqlParser.PRECISION, + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.DOUBLE, MySqlParser.FLOAT8) + .setSuffixTokens( + MySqlParser.PRECISION, + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.FLOAT, MySqlParser.FLOAT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.FLOAT, MySqlParser.FLOAT4) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.DECIMAL, MySqlParser.DECIMAL) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.DECIMAL, MySqlParser.DEC) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.DECIMAL, MySqlParser.FIXED) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.NUMERIC, MySqlParser.NUMERIC) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.BIT, MySqlParser.BIT), + new DataTypeResolver.DataTypeEntry(Types.TIME, MySqlParser.TIME), + new DataTypeResolver.DataTypeEntry( + Types.TIMESTAMP_WITH_TIMEZONE, MySqlParser.TIMESTAMP), + new DataTypeResolver.DataTypeEntry(Types.TIMESTAMP, MySqlParser.DATETIME), + new DataTypeResolver.DataTypeEntry(Types.BINARY, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry(Types.VARBINARY, MySqlParser.VARBINARY), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.BLOB), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.YEAR))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.SimpleDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.DATE, MySqlParser.DATE), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.TINYBLOB), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.MEDIUMBLOB), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.LONGBLOB), + new DataTypeResolver.DataTypeEntry(Types.BOOLEAN, MySqlParser.BOOL), + new DataTypeResolver.DataTypeEntry(Types.BOOLEAN, MySqlParser.BOOLEAN), + new DataTypeResolver.DataTypeEntry(Types.BIGINT, MySqlParser.SERIAL))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.CollectionDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.ENUM) + .setSuffixTokens(MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.SET) + .setSuffixTokens(MySqlParser.BINARY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.SpatialDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry( + Types.OTHER, MySqlParser.GEOMETRYCOLLECTION), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.GEOMCOLLECTION), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.LINESTRING), + new DataTypeResolver.DataTypeEntry( + Types.OTHER, MySqlParser.MULTILINESTRING), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.MULTIPOINT), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.MULTIPOLYGON), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.POINT), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.POLYGON), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.JSON), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.GEOMETRY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.LongVarbinaryDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.LONG) + .setSuffixTokens(MySqlParser.VARBINARY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.LongVarcharDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.LONG) + .setSuffixTokens(MySqlParser.VARCHAR))); + + return dataTypeResolverBuilder.build(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java new file mode 100644 index 00000000000..79b08f8f35e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java @@ -0,0 +1,50 @@ +package io.debezium.connector.tidb; + +import io.debezium.connector.AbstractSourceInfo; +import io.debezium.connector.mysql.MySqlOffsetContext; +import io.debezium.data.Envelope; +import io.debezium.pipeline.source.spi.EventMetadataProvider; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.schema.DataCollectionId; +import io.debezium.util.Collect; +import org.apache.kafka.connect.data.Struct; + +import java.time.Instant; +import java.util.Map; + +import static org.apache.flink.cdc.connectors.tidb.source.offset.TiDBSourceInfo.COMMIT_VERSION_KEY; + +public class TiDBEventMetadataProvider implements EventMetadataProvider { + @Override + public Instant getEventTimestamp( + DataCollectionId source, OffsetContext offset, Object key, Struct value) { + if (value == null) { + return null; + } + final Struct sourceInfo = value.getStruct(Envelope.FieldName.SOURCE); + if (sourceInfo == null) { + return null; + } + final Long timestamp = sourceInfo.getInt64(AbstractSourceInfo.TIMESTAMP_KEY); + return timestamp == null ? null : Instant.ofEpochMilli(timestamp); + } + + @Override + public Map getEventSourcePosition( + DataCollectionId source, OffsetContext offset, Object key, Struct value) { + if (value == null) { + return null; + } + final Struct sourceInfo = value.getStruct(Envelope.FieldName.SOURCE); + if (source == null) { + return null; + } + return Collect.hashMapOf(COMMIT_VERSION_KEY, sourceInfo.getString(COMMIT_VERSION_KEY)); + } + + @Override + public String getTransactionId( + DataCollectionId source, OffsetContext offset, Object key, Struct value) { + return ((MySqlOffsetContext) offset).getTransactionId(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java new file mode 100644 index 00000000000..699168f3f5d --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java @@ -0,0 +1,43 @@ +package io.debezium.connector.tidb; + +import io.debezium.pipeline.spi.Partition; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; + +public class TiDBPartition implements Partition { + private final String serverName; + + public TiDBPartition(String serverName) { + this.serverName = serverName; + } + + @Override + public Map getSourcePartition() { + return Collections.emptyMap(); + } + + @Override + public Map getLoggingContext() { + return Partition.super.getLoggingContext(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final io.debezium.connector.tidb.TiDBPartition other = + (io.debezium.connector.tidb.TiDBPartition) obj; + return Objects.equals(serverName, other.serverName); + } + + @Override + public String toString() { + return super.toString(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java new file mode 100644 index 00000000000..4d6175f09a7 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java @@ -0,0 +1,27 @@ +package io.debezium.connector.tidb; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; + +import io.debezium.connector.common.CdcSourceTaskContext; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; + +public class TiDBTaskContext extends CdcSourceTaskContext { + private final TiDBDatabaseSchema schema; + private final TopicSelector topicSelector; + + public TiDBTaskContext(TiDBConnectorConfig config, TiDBDatabaseSchema schema) { + super(config.getContextName(), config.getLogicalName(), schema::tableIds); + this.schema = schema; + topicSelector = TidbTopicSelector.defaultSelector(config); + } + + public TiDBDatabaseSchema getSchema() { + return schema; + } + + public TopicSelector getTopicSelector() { + return topicSelector; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java new file mode 100644 index 00000000000..aab12292f2b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java @@ -0,0 +1,36 @@ +package io.debezium.connector.tidb; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import io.debezium.annotation.ThreadSafe; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; + +@ThreadSafe +public class TidbTopicSelector { + + /** + * Get the default topic selector logic, which uses a '.' delimiter character when needed. + * + * @param prefix the name of the prefix to be used for all topics; may not be null and must not + * terminate in the {@code delimiter} + * @param heartbeatPrefix the name of the prefix to be used for all heartbeat topics; may not be + * null and must not terminate in the {@code delimiter} + * @return the topic selector; never null + */ + @Deprecated + public static TopicSelector defaultSelector(String prefix, String heartbeatPrefix) { + return TopicSelector.defaultSelector( + prefix, + heartbeatPrefix, + ".", + (t, pref, delimiter) -> String.join(delimiter, pref, t.catalog(), t.table())); + } + + public static TopicSelector defaultSelector(TiDBConnectorConfig connectorConfig) { + return TopicSelector.defaultSelector( + connectorConfig, + (tableId, prefix, delimiter) -> + String.join(delimiter, prefix, tableId.catalog(), tableId.table())); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java deleted file mode 100644 index fc468e68512..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.cdc.connectors.tidb.table.utils.UriHostMapping; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; -import org.apache.flink.configuration.Configuration; - -import org.tikv.common.ConfigUtils; -import org.tikv.common.TiConfiguration; - -import java.util.Map; -import java.util.Optional; - -/** Configurations for {@link TiDBSource}. */ -public class TDBSourceOptions { - - private TDBSourceOptions() {} - - public static final ConfigOption DATABASE_NAME = - ConfigOptions.key("database-name") - .stringType() - .noDefaultValue() - .withDescription("Database name of the TiDB server to monitor."); - - public static final ConfigOption TABLE_NAME = - ConfigOptions.key("table-name") - .stringType() - .noDefaultValue() - .withDescription("Table name of the TiDB database to monitor."); - - public static final ConfigOption SCAN_STARTUP_MODE = - ConfigOptions.key("scan.startup.mode") - .stringType() - .defaultValue("initial") - .withDescription( - "Optional startup mode for TiDB CDC consumer, valid enumerations are " - + "\"initial\", \"latest-offset\""); - - public static final ConfigOption PD_ADDRESSES = - ConfigOptions.key("pd-addresses") - .stringType() - .noDefaultValue() - .withDescription("TiKV cluster's PD address"); - - public static final ConfigOption HOST_MAPPING = - ConfigOptions.key("host-mapping") - .stringType() - .noDefaultValue() - .withDescription( - "TiKV cluster's host-mapping used to configure public IP and intranet IP mapping. When the TiKV cluster is running on the intranet, you can map a set of intranet IPs to public IPs for an outside Flink cluster to access. The format is {Intranet IP1}:{Public IP1};{Intranet IP2}:{Public IP2}, e.g. 192.168.0.2:8.8.8.8;192.168.0.3:9.9.9.9."); - public static final ConfigOption TIKV_GRPC_TIMEOUT = - ConfigOptions.key(ConfigUtils.TIKV_GRPC_TIMEOUT) - .longType() - .noDefaultValue() - .withDescription("TiKV GRPC timeout in ms"); - - public static final ConfigOption TIKV_GRPC_SCAN_TIMEOUT = - ConfigOptions.key(ConfigUtils.TIKV_GRPC_SCAN_TIMEOUT) - .longType() - .noDefaultValue() - .withDescription("TiKV GRPC scan timeout in ms"); - - public static final ConfigOption TIKV_BATCH_GET_CONCURRENCY = - ConfigOptions.key(ConfigUtils.TIKV_BATCH_GET_CONCURRENCY) - .intType() - .noDefaultValue() - .withDescription("TiKV GRPC batch get concurrency"); - - public static final ConfigOption TIKV_BATCH_SCAN_CONCURRENCY = - ConfigOptions.key(ConfigUtils.TIKV_BATCH_SCAN_CONCURRENCY) - .intType() - .noDefaultValue() - .withDescription("TiKV GRPC batch scan concurrency"); - - public static TiConfiguration getTiConfiguration( - final String pdAddrsStr, final String hostMapping, final Map options) { - final Configuration configuration = Configuration.fromMap(options); - - final TiConfiguration tiConf = TiConfiguration.createDefault(pdAddrsStr); - Optional.of(new UriHostMapping(hostMapping)).ifPresent(tiConf::setHostMapping); - configuration.getOptional(TIKV_GRPC_TIMEOUT).ifPresent(tiConf::setTimeout); - configuration.getOptional(TIKV_GRPC_SCAN_TIMEOUT).ifPresent(tiConf::setScanTimeout); - configuration - .getOptional(TIKV_BATCH_GET_CONCURRENCY) - .ifPresent(tiConf::setBatchGetConcurrency); - - configuration - .getOptional(TIKV_BATCH_SCAN_CONCURRENCY) - .ifPresent(tiConf::setBatchScanConcurrency); - return tiConf; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java deleted file mode 100644 index fa74f69ba88..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.cdc.connectors.tidb.table.StartupOptions; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; - -import org.tikv.common.TiConfiguration; - -/** A builder to build a SourceFunction which can read snapshot and continue to read CDC events. */ -public class TiDBSource { - - public static Builder builder() { - return new Builder<>(); - } - - /** Builder class of {@link TiDBSource}. */ - public static class Builder { - private String database; - private String tableName; - private StartupOptions startupOptions = StartupOptions.initial(); - private TiConfiguration tiConf; - - private TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema; - private TiKVChangeEventDeserializationSchema changeEventDeserializationSchema; - - /** Database name to be monitored. */ - public Builder database(String database) { - this.database = database; - return this; - } - - /** TableName name to be monitored. */ - public Builder tableName(String tableName) { - this.tableName = tableName; - return this; - } - - /** The deserializer used to convert from consumed snapshot event from TiKV. */ - public Builder snapshotEventDeserializer( - TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema) { - this.snapshotEventDeserializationSchema = snapshotEventDeserializationSchema; - return this; - } - - /** The deserializer used to convert from consumed change event from TiKV. */ - public Builder changeEventDeserializer( - TiKVChangeEventDeserializationSchema changeEventDeserializationSchema) { - this.changeEventDeserializationSchema = changeEventDeserializationSchema; - return this; - } - - /** Specifies the startup options. */ - public Builder startupOptions(StartupOptions startupOptions) { - this.startupOptions = startupOptions; - return this; - } - - /** TIDB config. */ - public Builder tiConf(TiConfiguration tiConf) { - this.tiConf = tiConf; - return this; - } - - public RichParallelSourceFunction build() { - - return new TiKVRichParallelSourceFunction<>( - snapshotEventDeserializationSchema, - changeEventDeserializationSchema, - tiConf, - startupOptions.startupMode, - database, - tableName); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java deleted file mode 100644 index bf652624fdf..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.common.annotation.PublicEvolving; -import org.apache.flink.util.Collector; - -import org.tikv.kvproto.Cdcpb.Event.Row; - -import java.io.Serializable; - -/** - * The deserialization schema describes how to turn the TiKV Change Event into data types - * (Java/Scala objects) that are processed by Flink. - * - * @param The type created by the deserialization schema. - */ -@PublicEvolving -public interface TiKVChangeEventDeserializationSchema - extends Serializable, ResultTypeQueryable { - - /** Deserialize the TiDB record. */ - void deserialize(Row record, Collector out) throws Exception; -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java deleted file mode 100644 index 16c130b38c8..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.common.state.CheckpointListener; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.base.LongSerializer; -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.connectors.tidb.metrics.TiDBSourceMetrics; -import org.apache.flink.cdc.connectors.tidb.table.StartupMode; -import org.apache.flink.cdc.connectors.tidb.table.utils.TableKeyRangeUtils; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.FunctionSnapshotContext; -import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.util.Collector; -import org.apache.flink.util.Preconditions; - -import org.apache.flink.shaded.guava31.com.google.common.util.concurrent.ThreadFactoryBuilder; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.tikv.cdc.CDCClient; -import org.tikv.common.TiConfiguration; -import org.tikv.common.TiSession; -import org.tikv.common.key.RowKey; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.common.meta.TiTimestamp; -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Coprocessor; -import org.tikv.kvproto.Kvrpcpb; -import org.tikv.shade.com.google.protobuf.ByteString; -import org.tikv.txn.KVClient; - -import java.util.List; -import java.util.Objects; -import java.util.TreeMap; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; - -/** - * The source implementation for TiKV that read snapshot events first and then read the change - * event. - */ -public class TiKVRichParallelSourceFunction extends RichParallelSourceFunction - implements CheckpointListener, CheckpointedFunction, ResultTypeQueryable { - - private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(TiKVRichParallelSourceFunction.class); - private static final long SNAPSHOT_VERSION_EPOCH = -1L; - private static final long STREAMING_VERSION_START_EPOCH = 0L; - - private final TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema; - private final TiKVChangeEventDeserializationSchema changeEventDeserializationSchema; - private final TiConfiguration tiConf; - private final StartupMode startupMode; - private final String database; - private final String tableName; - - /** Task local variables. */ - private transient TiSession session = null; - - private transient Coprocessor.KeyRange keyRange = null; - private transient CDCClient cdcClient = null; - private transient SourceFunction.SourceContext sourceContext = null; - private transient volatile long resolvedTs = -1L; - private transient TreeMap prewrites = null; - private transient TreeMap commits = null; - private transient BlockingQueue committedEvents = null; - private transient OutputCollector outputCollector; - - private transient boolean running = true; - private transient ExecutorService executorService; - private transient TiDBSourceMetrics sourceMetrics; - - /** offset state. */ - private transient ListState offsetState; - - private static final long CLOSE_TIMEOUT = 30L; - - public TiKVRichParallelSourceFunction( - TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema, - TiKVChangeEventDeserializationSchema changeEventDeserializationSchema, - TiConfiguration tiConf, - StartupMode startupMode, - String database, - String tableName) { - this.snapshotEventDeserializationSchema = snapshotEventDeserializationSchema; - this.changeEventDeserializationSchema = changeEventDeserializationSchema; - this.tiConf = tiConf; - this.startupMode = startupMode; - this.database = database; - this.tableName = tableName; - } - - @Override - public void open(final Configuration config) throws Exception { - super.open(config); - session = TiSession.create(tiConf); - TiTableInfo tableInfo = session.getCatalog().getTable(database, tableName); - if (tableInfo == null) { - throw new RuntimeException( - String.format("Table %s.%s does not exist.", database, tableName)); - } - long tableId = tableInfo.getId(); - keyRange = - TableKeyRangeUtils.getTableKeyRange( - tableId, - getRuntimeContext().getTaskInfo().getNumberOfParallelSubtasks(), - getRuntimeContext().getTaskInfo().getIndexOfThisSubtask()); - cdcClient = new CDCClient(session, keyRange); - prewrites = new TreeMap<>(); - commits = new TreeMap<>(); - // cdc event will lose if pull cdc event block when region split - // use queue to separate read and write to ensure pull event unblock. - // since sink jdbc is slow, 5000W queue size may be safe size. - committedEvents = new LinkedBlockingQueue<>(); - outputCollector = new OutputCollector<>(); - resolvedTs = - startupMode == StartupMode.INITIAL - ? SNAPSHOT_VERSION_EPOCH - : STREAMING_VERSION_START_EPOCH; - ThreadFactory threadFactory = - new ThreadFactoryBuilder() - .setNameFormat( - "tidb-source-function-" - + getRuntimeContext().getTaskInfo().getIndexOfThisSubtask()) - .build(); - executorService = Executors.newSingleThreadExecutor(threadFactory); - final MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - sourceMetrics = new TiDBSourceMetrics(metricGroup); - sourceMetrics.registerMetrics(); - } - - @Override - public void run(final SourceFunction.SourceContext ctx) throws Exception { - sourceContext = ctx; - outputCollector.context = sourceContext; - - if (startupMode == StartupMode.INITIAL) { - synchronized (sourceContext.getCheckpointLock()) { - readSnapshotEvents(); - } - } else { - LOG.info("Skip snapshot read"); - resolvedTs = session.getTimestamp().getVersion(); - } - - LOG.info("start read change events"); - cdcClient.start(resolvedTs); - running = true; - readChangeEvents(); - } - - private void handleRow(final Cdcpb.Event.Row row) { - if (!TableKeyRangeUtils.isRecordKey(row.getKey().toByteArray())) { - // Don't handle index key for now - return; - } - LOG.debug("binlog record, type: {}, data: {}", row.getType(), row); - switch (row.getType()) { - case COMMITTED: - prewrites.put(RowKeyWithTs.ofStart(row), row); - commits.put(RowKeyWithTs.ofCommit(row), row); - break; - case COMMIT: - commits.put(RowKeyWithTs.ofCommit(row), row); - break; - case PREWRITE: - prewrites.put(RowKeyWithTs.ofStart(row), row); - break; - case ROLLBACK: - prewrites.remove(RowKeyWithTs.ofStart(row)); - break; - default: - LOG.warn("Unsupported row type:" + row.getType()); - } - } - - protected void readSnapshotEvents() throws Exception { - LOG.info("read snapshot events"); - try (KVClient scanClient = session.createKVClient()) { - long startTs = session.getTimestamp().getVersion(); - ByteString start = keyRange.getStart(); - while (true) { - final List segment = - scanClient.scan(start, keyRange.getEnd(), startTs); - - if (segment.isEmpty()) { - resolvedTs = startTs; - break; - } - - for (final Kvrpcpb.KvPair pair : segment) { - if (TableKeyRangeUtils.isRecordKey(pair.getKey().toByteArray())) { - snapshotEventDeserializationSchema.deserialize(pair, outputCollector); - reportMetrics(0L, startTs); - } - } - - start = - RowKey.toRawKey(segment.get(segment.size() - 1).getKey()) - .next() - .toByteString(); - } - } - } - - protected void readChangeEvents() throws Exception { - LOG.info("read change event from resolvedTs:{}", resolvedTs); - // child thread to sink committed rows. - executorService.execute( - () -> { - while (running) { - try { - Cdcpb.Event.Row committedRow = committedEvents.take(); - changeEventDeserializationSchema.deserialize( - committedRow, outputCollector); - // use startTs of row as messageTs, use commitTs of row as fetchTs - reportMetrics(committedRow.getStartTs(), committedRow.getCommitTs()); - } catch (Exception e) { - e.printStackTrace(); - } - } - }); - while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { - for (int i = 0; i < 1000; i++) { - final Cdcpb.Event.Row row = cdcClient.get(); - if (row == null) { - break; - } - handleRow(row); - } - resolvedTs = cdcClient.getMaxResolvedTs(); - if (commits.size() > 0) { - flushRows(resolvedTs); - } - } - } - - protected void flushRows(final long timestamp) throws Exception { - Preconditions.checkState(sourceContext != null, "sourceContext shouldn't be null"); - synchronized (sourceContext) { - while (!commits.isEmpty() && commits.firstKey().timestamp <= timestamp) { - final Cdcpb.Event.Row commitRow = commits.pollFirstEntry().getValue(); - final Cdcpb.Event.Row prewriteRow = - prewrites.remove(RowKeyWithTs.ofStart(commitRow)); - // if pull cdc event block when region split, cdc event will lose. - committedEvents.offer(prewriteRow); - } - } - } - - @Override - public void cancel() { - try { - running = false; - if (cdcClient != null) { - cdcClient.close(); - } - if (executorService != null) { - executorService.shutdown(); - if (!executorService.awaitTermination(CLOSE_TIMEOUT, TimeUnit.SECONDS)) { - LOG.warn( - "Failed to close the tidb source function in {} seconds.", - CLOSE_TIMEOUT); - } - } - } catch (final Exception e) { - LOG.error("Unable to close cdcClient", e); - } - } - - @Override - public void snapshotState(final FunctionSnapshotContext context) throws Exception { - LOG.info( - "snapshotState checkpoint: {} at resolvedTs: {}", - context.getCheckpointId(), - resolvedTs); - flushRows(resolvedTs); - offsetState.clear(); - offsetState.add(resolvedTs); - } - - @Override - public void initializeState(final FunctionInitializationContext context) throws Exception { - LOG.info("initialize checkpoint"); - offsetState = - context.getOperatorStateStore() - .getListState( - new ListStateDescriptor<>( - "resolvedTsState", LongSerializer.INSTANCE)); - if (context.isRestored()) { - for (final Long offset : offsetState.get()) { - resolvedTs = offset; - LOG.info("Restore State from resolvedTs: {}", resolvedTs); - return; - } - } else { - resolvedTs = 0; - LOG.info("Initialize State from resolvedTs: {}", resolvedTs); - } - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - // do nothing - } - - @Override - public TypeInformation getProducedType() { - return snapshotEventDeserializationSchema.getProducedType(); - } - - // --------------------------------------- - // static Utils classes - // --------------------------------------- - private static class RowKeyWithTs implements Comparable { - private final long timestamp; - private final RowKey rowKey; - - private RowKeyWithTs(final long timestamp, final RowKey rowKey) { - this.timestamp = timestamp; - this.rowKey = rowKey; - } - - private RowKeyWithTs(final long timestamp, final byte[] key) { - this(timestamp, RowKey.decode(key)); - } - - @Override - public int compareTo(final RowKeyWithTs that) { - int res = Long.compare(this.timestamp, that.timestamp); - if (res == 0) { - res = Long.compare(this.rowKey.getTableId(), that.rowKey.getTableId()); - } - if (res == 0) { - res = Long.compare(this.rowKey.getHandle(), that.rowKey.getHandle()); - } - return res; - } - - @Override - public int hashCode() { - return Objects.hash(this.timestamp, this.rowKey.getTableId(), this.rowKey.getHandle()); - } - - @Override - public boolean equals(final Object thatObj) { - if (thatObj instanceof RowKeyWithTs) { - final RowKeyWithTs that = (RowKeyWithTs) thatObj; - return this.timestamp == that.timestamp && this.rowKey.equals(that.rowKey); - } - return false; - } - - static RowKeyWithTs ofStart(final Cdcpb.Event.Row row) { - return new RowKeyWithTs(row.getStartTs(), row.getKey().toByteArray()); - } - - static RowKeyWithTs ofCommit(final Cdcpb.Event.Row row) { - return new RowKeyWithTs(row.getCommitTs(), row.getKey().toByteArray()); - } - } - - private static class OutputCollector implements Collector { - - private SourceFunction.SourceContext context; - - @Override - public void collect(T record) { - context.collect(record); - } - - @Override - public void close() { - // do nothing - } - } - - private void reportMetrics(long messageTs, long fetchTs) { - long now = System.currentTimeMillis(); - // record the latest process time - sourceMetrics.recordProcessTime(now); - long messageTimestamp = TiTimestamp.extractPhysical(messageTs); - long fetchTimestamp = TiTimestamp.extractPhysical(fetchTs); - if (messageTimestamp > 0L) { - // report fetch delay - if (fetchTimestamp >= messageTimestamp) { - sourceMetrics.recordFetchDelay(fetchTimestamp - messageTimestamp); - } - // report emit delay - sourceMetrics.recordEmitDelay(now - messageTimestamp); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java deleted file mode 100644 index a0a43658181..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.common.annotation.PublicEvolving; -import org.apache.flink.util.Collector; - -import org.tikv.kvproto.Kvrpcpb.KvPair; - -import java.io.Serializable; - -/** - * The deserialization schema describes how to turn the TiKV snapshot event into data types - * (Java/Scala objects) that are processed by Flink. - * - * @param The type created by the deserialization schema. - */ -@PublicEvolving -public interface TiKVSnapshotEventDeserializationSchema - extends Serializable, ResultTypeQueryable { - - /** Deserialize the TiDB record. */ - void deserialize(KvPair record, Collector out) throws Exception; -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java index 1f32c0f3411..aff0d124269 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java @@ -17,7 +17,6 @@ package org.apache.flink.cdc.connectors.tidb.metrics; -import org.apache.flink.cdc.connectors.tidb.TiKVRichParallelSourceFunction; import org.apache.flink.metrics.Gauge; import org.apache.flink.metrics.MetricGroup; @@ -25,15 +24,15 @@ import static org.apache.flink.runtime.metrics.MetricNames.CURRENT_FETCH_EVENT_TIME_LAG; import static org.apache.flink.runtime.metrics.MetricNames.SOURCE_IDLE_TIME; -/** A collection class for handling metrics in {@link TiKVRichParallelSourceFunction}. */ +/** A collection class for handling metrics in {@link }. */ public class TiDBSourceMetrics { private final MetricGroup metricGroup; /** - * The last record processing time, which is updated after {@link - * TiKVRichParallelSourceFunction} fetches a batch of data. It's mainly used to report metrics - * sourceIdleTime for sourceIdleTime = System.currentTimeMillis() - processTime. + * The last record processing time, which is updated after {@link } fetches a batch of data. + * It's mainly used to report metrics sourceIdleTime for sourceIdleTime = + * System.currentTimeMillis() - processTime. */ private long processTime = 0L; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java new file mode 100644 index 00000000000..62572da20b9 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionFactory; +import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; +import org.apache.flink.cdc.connectors.base.source.assigner.splitter.ChunkSplitter; +import org.apache.flink.cdc.connectors.base.source.assigner.state.ChunkSplitterState; +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnectionPoolFactory; +import org.apache.flink.cdc.connectors.tidb.source.fetch.TiDBScanFetchTask; +import org.apache.flink.cdc.connectors.tidb.source.fetch.TiDBSourceFetchTaskContext; +import org.apache.flink.cdc.connectors.tidb.source.fetch.TiDBStreamFetchTask; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBSchema; +import org.apache.flink.cdc.connectors.tidb.source.splitter.TiDBChunkSplitter; +import org.apache.flink.cdc.connectors.tidb.utils.TableDiscoveryUtils; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.TableId; +import io.debezium.relational.history.TableChanges; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class TiDBDialect implements JdbcDataSourceDialect { + private static final Logger LOG = LoggerFactory.getLogger(TiDBDialect.class); + + private static final String QUOTED_CHARACTER = "`"; + private static final long serialVersionUID = 1L; + + private final TiDBSourceConfig sourceConfig; + private transient TiDBSchema tiDBSchema; + @Nullable private TiDBStreamFetchTask streamFetchTask; + + public TiDBDialect(TiDBSourceConfig sourceConfig) { + this.sourceConfig = sourceConfig; + } + + @Override + public String getName() { + return "TiDB"; + } + + @Override + public Offset displayCurrentOffset(JdbcSourceConfig sourceConfig) { + try (JdbcConnection jdbcConnection = openJdbcConnection(sourceConfig)) { + return TiDBUtils.currentBinlogOffset(jdbcConnection); + } catch (Exception e) { + throw new FlinkRuntimeException("Read the binlog offset error", e); + } + // return null; + } + + @Override + public boolean isDataCollectionIdCaseSensitive(JdbcSourceConfig sourceConfig) { + try (JdbcConnection jdbcConnection = openJdbcConnection(sourceConfig)) { + return TiDBConnectionUtils.isTableIdCaseInsensitive(jdbcConnection); + } catch (SQLException e) { + throw new FlinkRuntimeException("Error reading TiDB variables: " + e.getMessage(), e); + } + } + + @Override + public ChunkSplitter createChunkSplitter(JdbcSourceConfig sourceConfig) { + return new TiDBChunkSplitter( + sourceConfig, this, ChunkSplitterState.NO_SPLITTING_TABLE_STATE); + } + + @Override + public ChunkSplitter createChunkSplitter( + JdbcSourceConfig sourceConfig, ChunkSplitterState chunkSplitterState) { + return new TiDBChunkSplitter(this.sourceConfig, this, chunkSplitterState); + } + + @Override + public FetchTask.Context createFetchTaskContext(JdbcSourceConfig sourceConfig) { + return new TiDBSourceFetchTaskContext(sourceConfig, this, openJdbcConnection()); + } + + @Override + public void notifyCheckpointComplete(long checkpointId, Offset offset) throws Exception { + if (streamFetchTask != null) { + streamFetchTask.commitCurrentOffset(offset); + } + } + + @Override + public boolean isIncludeDataCollection(JdbcSourceConfig sourceConfig, TableId tableId) { + // temp + return true; + } + + @Override + public List discoverDataCollections(JdbcSourceConfig sourceConfig) { + try (JdbcConnection jdbc = openJdbcConnection(sourceConfig)) { + List tableIds = + TableDiscoveryUtils.listTables( + sourceConfig.getDatabaseList().get(0), + jdbc, + sourceConfig.getTableFilters()); + if (tableIds.isEmpty()) { + throw new FlinkRuntimeException( + "No tables discovered for the given tables:" + sourceConfig.getTableList()); + } + return tableIds; + } catch (SQLException e) { + throw new FlinkRuntimeException("Error to discover tables:" + e.getMessage(), e); + } + } + + @Override + public Map discoverDataCollectionSchemas( + JdbcSourceConfig sourceConfig) { + final List capturedTableIds = discoverDataCollections(sourceConfig); + + try (JdbcConnection jdbc = openJdbcConnection(sourceConfig)) { + // fetch table schemas + Map tableSchemas = new HashMap<>(); + for (TableId tableId : capturedTableIds) { + TableChanges.TableChange tableSchema = queryTableSchema(jdbc, tableId); + tableSchemas.put(tableId, tableSchema); + } + return tableSchemas; + } catch (Exception e) { + throw new FlinkRuntimeException( + "Error to discover table schemas: " + e.getMessage(), e); + } + } + + @Override + public JdbcConnection openJdbcConnection(JdbcSourceConfig sourceConfig) { + TiDBSourceConfig tiDBSourceConfig = (TiDBSourceConfig) sourceConfig; + TiDBConnectorConfig dbzConfig = tiDBSourceConfig.getDbzConnectorConfig(); + + JdbcConnection jdbc = + new TiDBConnection( + dbzConfig.getJdbcConfig(), + new JdbcConnectionFactory(sourceConfig, getPooledDataSourceFactory()), + QUOTED_CHARACTER, + QUOTED_CHARACTER); + try { + jdbc.connect(); + } catch (Exception e) { + LOG.error("Failed to open TiDB connection", e); + throw new FlinkRuntimeException(e); + } + return jdbc; + } + + public TiDBConnection openJdbcConnection() { + return (TiDBConnection) openJdbcConnection(sourceConfig); + } + + @Override + public JdbcConnectionPoolFactory getPooledDataSourceFactory() { + return new TiDBConnectionPoolFactory(); + } + + @Override + public TableChanges.TableChange queryTableSchema(JdbcConnection jdbc, TableId tableId) { + if (tiDBSchema == null) { + tiDBSchema = + new TiDBSchema(sourceConfig, isDataCollectionIdCaseSensitive(sourceConfig)); + } + return tiDBSchema.getTableSchema(jdbc, tableId); + } + + @Override + public FetchTask createFetchTask(SourceSplitBase sourceSplitBase) { + if (sourceSplitBase.isSnapshotSplit()) { + return new TiDBScanFetchTask(sourceSplitBase.asSnapshotSplit()); + } else { + this.streamFetchTask = new TiDBStreamFetchTask(sourceSplitBase.asStreamSplit()); + return this.streamFetchTask; + } + } + + @Override + public void close() throws IOException { + JdbcDataSourceDialect.super.close(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java new file mode 100644 index 00000000000..a1e70c7a68f --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfigFactory; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.table.catalog.ObjectPath; + +import org.tikv.common.TiConfiguration; + +import java.time.Duration; +import java.util.Map; +import java.util.Properties; + +import static org.apache.flink.cdc.common.utils.Preconditions.checkNotNull; + +public class TiDBSourceBuilder { + private final TiDBSourceConfigFactory configFactory = new TiDBSourceConfigFactory(); + private EventOffsetFactory offsetFactory; + private DebeziumDeserializationSchema deserializer; + private TiDBDialect dialect; + + private TiDBSourceBuilder() {} + + public TiDBSourceBuilder startupOptions(StartupOptions startupOptions) { + this.configFactory.startupOptions(startupOptions); + return this; + } + + public TiDBSourceBuilder hostname(String hostname) { + this.configFactory.hostname(hostname); + return this; + } + + public TiDBSourceBuilder port(int port) { + this.configFactory.port(port); + return this; + } + + public TiDBSourceBuilder driverClassName(String driverClassName) { + this.configFactory.driverClassName(driverClassName); + return this; + } + + public TiDBSourceBuilder databaseList(String... databaseList) { + this.configFactory.databaseList(databaseList); + return this; + } + + public TiDBSourceBuilder tableList(String... tableList) { + this.configFactory.tableList(tableList); + return this; + } + + public TiDBSourceBuilder username(String username) { + this.configFactory.username(username); + return this; + } + + public TiDBSourceBuilder password(String password) { + this.configFactory.password(password); + return this; + } + + public TiDBSourceBuilder jdbcProperties(Properties properties) { + this.configFactory.jdbcProperties(properties); + return this; + } + + public TiDBSourceBuilder tikvProperties(Properties properties) { + this.configFactory.tikvProperties(properties); + return this; + } + + public TiDBSourceBuilder serverTimeZone(String timeZone) { + this.configFactory.serverTimeZone(timeZone); + return this; + } + + public TiDBSourceBuilder connectTimeout(Duration connectTimeout) { + this.configFactory.connectTimeout(connectTimeout); + return this; + } + + public TiDBSourceBuilder connectionPoolSize(int connectionPoolSize) { + this.configFactory.connectionPoolSize(connectionPoolSize); + return this; + } + + public TiDBSourceBuilder connectMaxRetries(int connectMaxRetries) { + this.configFactory.connectMaxRetries(connectMaxRetries); + return this; + } + + public TiDBSourceBuilder chunkKeyColumn(String chunkKeyColumn) { + this.configFactory.chunkKeyColumn(chunkKeyColumn); + return this; + } + + public TiDBSourceBuilder chunkKeyColumns(Map chunkKeyColumns) { + this.configFactory.chunkKeyColumns(chunkKeyColumns); + return this; + } + + public TiDBSourceBuilder pdAddresses(String pdAddresses) { + this.configFactory.pdAddresses(pdAddresses); + return this; + } + + public TiDBSourceBuilder hostMapping(String hostMapping) { + this.configFactory.hostMapping(hostMapping); + return this; + } + + /** + * The split size (number of rows) of table snapshot, captured tables are split into multiple + * splits when read the snapshot of table. + */ + public TiDBSourceBuilder splitSize(int splitSize) { + this.configFactory.splitSize(splitSize); + return this; + } + + /** The maximum fetch size for per poll when read table snapshot. */ + public TiDBSourceBuilder fetchSize(int fetchSize) { + this.configFactory.fetchSize(fetchSize); + return this; + } + + public TiDBSourceBuilder splitMetaGroupSize(int splitMetaGroupSize) { + this.configFactory.splitMetaGroupSize(splitMetaGroupSize); + return this; + } + + public TiDBSourceBuilder distributionFactorUpper(double distributionFactorUpper) { + this.configFactory.distributionFactorUpper(distributionFactorUpper); + return this; + } + + /** + * The lower bound of split key evenly distribution factor, the factor is used to determine + * whether the table is evenly distribution or not. + */ + public TiDBSourceBuilder distributionFactorLower(double distributionFactorLower) { + this.configFactory.distributionFactorLower(distributionFactorLower); + return this; + } + + public TiDBSourceBuilder scanNewlyAddedTableEnabled(boolean scanNewlyAddedTableEnabled) { + this.configFactory.scanNewlyAddedTableEnabled(scanNewlyAddedTableEnabled); + return this; + } + + public TiDBSourceBuilder deserializer(DebeziumDeserializationSchema deserializer) { + this.deserializer = deserializer; + return this; + } + + public TiDBSourceBuilder tiConfiguration(TiConfiguration tiConfiguration) { + this.configFactory.tiConfiguration(tiConfiguration); + return this; + } + + public TiDBIncrementalSource build() { + this.offsetFactory = new EventOffsetFactory(); + this.dialect = new TiDBDialect(configFactory.create(0)); + return new TiDBIncrementalSource<>( + configFactory, checkNotNull(deserializer), offsetFactory, dialect); + } + + public static class TiDBIncrementalSource extends JdbcIncrementalSource { + public TiDBIncrementalSource( + JdbcSourceConfigFactory configFactory, + DebeziumDeserializationSchema deserializationSchema, + EventOffsetFactory offsetFactory, + TiDBDialect dataSourceDialect) { + super(configFactory, deserializationSchema, offsetFactory, dataSourceDialect); + } + + public static TiDBSourceBuilder builder() { + return new TiDBSourceBuilder<>(); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java new file mode 100644 index 00000000000..67c010d3168 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java @@ -0,0 +1,422 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.tidb.source.offset.TiDBSourceInfoStructMaker; + +import io.debezium.config.CommonConnectorConfig; +import io.debezium.config.Configuration; +import io.debezium.config.EnumeratedValue; +import io.debezium.config.Field; +import io.debezium.connector.SourceInfoStructMaker; +import io.debezium.connector.mysql.MySqlConnectorConfig; +import io.debezium.jdbc.JdbcValueConverters; +import io.debezium.relational.ColumnFilterMode; +import io.debezium.relational.RelationalDatabaseConnectorConfig; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import org.apache.kafka.common.config.ConfigDef; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class TiDBConnectorConfig extends RelationalDatabaseConnectorConfig { + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBConnectorConfig.class); + + protected static final String LOGICAL_NAME = "tidb_cdc_connector"; + protected static final int DEFAULT_SNAPSHOT_FETCH_SIZE = Integer.MIN_VALUE; + private final boolean readOnlyConnection = true; + protected static final List BUILT_IN_DB_NAMES = + Collections.unmodifiableList( + Arrays.asList("information_schema", "mysql", "tidb", "LBACSYS", "ORAAUDITOR")); + private final TiDBSourceConfig sourceConfig; + + public static final Field READ_ONLY_CONNECTION = + Field.create("read.only") + .withDisplayName("Read only connection") + .withType(ConfigDef.Type.BOOLEAN) + .withDefault(false) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.LOW) + .withDescription( + "Switched connector to use alternative methods to deliver signals to Debezium instead of writing to signaling table"); + + public static final Field BIGINT_UNSIGNED_HANDLING_MODE = + Field.create("bigint.unsigned.handling.mode") + .withDisplayName("BIGINT UNSIGNED Handling") + .withEnum(BigIntUnsignedHandlingMode.class, BigIntUnsignedHandlingMode.LONG) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 27)) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Specify how BIGINT UNSIGNED columns should be represented in change events, including:" + + "'precise' uses java.math.BigDecimal to represent values, which are encoded in the change events using a binary representation and Kafka Connect's 'org.apache.kafka.connect.data.Decimal' type; " + + "'long' (the default) represents values using Java's 'long', which may not offer the precision but will be far easier to use in consumers."); + + public static final Field ENABLE_TIME_ADJUSTER = + Field.create("enable.time.adjuster") + .withDisplayName("Enable Time Adjuster") + .withType(ConfigDef.Type.BOOLEAN) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 22)) + .withDefault(true) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.LOW) + .withDescription( + "MySQL allows user to insert year value as either 2-digit or 4-digit. In case of two digit the value is automatically mapped into 1970 - 2069." + + "false - delegates the implicit conversion to the database" + + "true - (the default) Debezium makes the conversion"); + + public static enum BigIntUnsignedHandlingMode implements EnumeratedValue { + /** + * Represent {@code BIGINT UNSIGNED} values as precise {@link BigDecimal} values, which are + * represented in change events in a binary form. This is precise but difficult to use. + */ + PRECISE("precise"), + + /** + * Represent {@code BIGINT UNSIGNED} values as precise {@code long} values. This may be less + * precise but is far easier to use. + */ + LONG("long"); + + private final String value; + + private BigIntUnsignedHandlingMode(String value) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + public JdbcValueConverters.BigIntUnsignedMode asBigIntUnsignedMode() { + switch (this) { + case LONG: + return JdbcValueConverters.BigIntUnsignedMode.LONG; + case PRECISE: + default: + return JdbcValueConverters.BigIntUnsignedMode.PRECISE; + } + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @return the matching option, or null if no match is found + */ + public static BigIntUnsignedHandlingMode parse(String value) { + if (value == null) { + return null; + } + value = value.trim(); + for (BigIntUnsignedHandlingMode option : BigIntUnsignedHandlingMode.values()) { + if (option.getValue().equalsIgnoreCase(value)) { + return option; + } + } + return null; + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @param defaultValue the default value; may be null + * @return the matching option, or null if no match is found and the non-null default is + * invalid + */ + public static BigIntUnsignedHandlingMode parse(String value, String defaultValue) { + BigIntUnsignedHandlingMode mode = parse(value); + if (mode == null && defaultValue != null) { + mode = parse(defaultValue); + } + return mode; + } + } + + @Override + public String getContextName() { + return "TiDB"; + } + + @Override + public String getConnectorName() { + return "TiDB"; + } + + public String databaseName() { + return getConfig().getString(DATABASE_NAME); + } + + public TiDBConnectorConfig(TiDBSourceConfig sourceConfig) { + super( + Configuration.from(sourceConfig.getDbzProperties()), + LOGICAL_NAME, + Tables.TableFilter.fromPredicate( + tableId -> + "mysql".equalsIgnoreCase(sourceConfig.getCompatibleMode()) + ? !BUILT_IN_DB_NAMES.contains(tableId.catalog()) + : !BUILT_IN_DB_NAMES.contains(tableId.schema())), + TableId::identifier, + DEFAULT_SNAPSHOT_FETCH_SIZE, + "mysql".equalsIgnoreCase(sourceConfig.getCompatibleMode()) + ? ColumnFilterMode.CATALOG + : ColumnFilterMode.SCHEMA); + this.sourceConfig = sourceConfig; + } + + public TiDBSourceConfig getSourceConfig() { + return sourceConfig; + } + + @Override + protected SourceInfoStructMaker getSourceInfoStructMaker(Version version) { + return new TiDBSourceInfoStructMaker(); + } + + public static final Field SERVER_NAME = + RelationalDatabaseConnectorConfig.SERVER_NAME.withValidation( + CommonConnectorConfig::validateServerNameIsDifferentFromHistoryTopicName); + + public boolean isReadOnlyConnection() { + return readOnlyConnection; + } + + public static enum SecureConnectionMode implements EnumeratedValue { + /** Establish an unencrypted connection. */ + DISABLED("disabled"), + + /** + * Establish a secure (encrypted) connection if the server supports secure connections. Fall + * back to an unencrypted connection otherwise. + */ + PREFERRED("preferred"), + /** + * Establish a secure connection if the server supports secure connections. The connection + * attempt fails if a secure connection cannot be established. + */ + REQUIRED("required"), + /** + * Like REQUIRED, but additionally verify the server TLS certificate against the configured + * Certificate Authority (CA) certificates. The connection attempt fails if no valid + * matching CA certificates are found. + */ + VERIFY_CA("verify_ca"), + /** + * Like VERIFY_CA, but additionally verify that the server certificate matches the host to + * which the connection is attempted. + */ + VERIFY_IDENTITY("verify_identity"); + + private final String value; + + private SecureConnectionMode(String value) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @return the matching option, or null if no match is found + */ + public static SecureConnectionMode parse(String value) { + if (value == null) { + return null; + } + value = value.trim(); + for (SecureConnectionMode option : SecureConnectionMode.values()) { + if (option.getValue().equalsIgnoreCase(value)) { + return option; + } + } + return null; + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @param defaultValue the default value; may be null + * @return the matching option, or null if no match is found and the non-null default is + * invalid + */ + public static SecureConnectionMode parse(String value, String defaultValue) { + SecureConnectionMode mode = parse(value); + if (mode == null && defaultValue != null) { + mode = parse(defaultValue); + } + return mode; + } + } + + public static final Field SSL_MODE = + Field.create("database.ssl.mode") + .withDisplayName("SSL mode") + .withEnum( + MySqlConnectorConfig.SecureConnectionMode.class, + MySqlConnectorConfig.SecureConnectionMode.DISABLED) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 0)) + .withWidth(ConfigDef.Width.MEDIUM) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Whether to use an encrypted connection to MySQL. Options include" + + "'disabled' (the default) to use an unencrypted connection; " + + "'preferred' to establish a secure (encrypted) connection if the server supports secure connections, " + + "but fall back to an unencrypted connection otherwise; " + + "'required' to use a secure (encrypted) connection, and fail if one cannot be established; " + + "'verify_ca' like 'required' but additionally verify the server TLS certificate against the configured Certificate Authority " + + "(CA) certificates, or fail if no valid matching CA certificates are found; or" + + "'verify_identity' like 'verify_ca' but additionally verify that the server certificate matches the host to which the connection is attempted."); + + public static final Field SSL_KEYSTORE = + Field.create("database.ssl.keystore") + .withDisplayName("SSL Keystore") + .withType(ConfigDef.Type.STRING) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 1)) + .withWidth(ConfigDef.Width.LONG) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The location of the key store file. " + + "This is optional and can be used for two-way authentication between the client and the MySQL Server."); + + public static final Field SSL_KEYSTORE_PASSWORD = + Field.create("database.ssl.keystore.password") + .withDisplayName("SSL Keystore Password") + .withType(ConfigDef.Type.PASSWORD) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 2)) + .withWidth(ConfigDef.Width.MEDIUM) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The password for the key store file. " + + "This is optional and only needed if 'database.ssl.keystore' is configured."); + + public static final Field SSL_TRUSTSTORE = + Field.create("database.ssl.truststore") + .withDisplayName("SSL Truststore") + .withType(ConfigDef.Type.STRING) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 3)) + .withWidth(ConfigDef.Width.LONG) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The location of the trust store file for the server certificate verification."); + + public static final Field SSL_TRUSTSTORE_PASSWORD = + Field.create("database.ssl.truststore.password") + .withDisplayName("SSL Truststore Password") + .withType(ConfigDef.Type.PASSWORD) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 4)) + .withWidth(ConfigDef.Width.MEDIUM) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The password for the trust store file. " + + "Used to check the integrity of the truststore, and unlock the truststore."); + + public static final Field CONNECTION_TIMEOUT_MS = + Field.create("connect.timeout.ms") + .withDisplayName("Connection Timeout (ms)") + .withType(ConfigDef.Type.INT) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED, 1)) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Maximum time to wait after trying to connect to the database before timing out, given in milliseconds. Defaults to 30 seconds (30,000 ms).") + .withDefault(30 * 1000) + .withValidation(Field::isPositiveInteger); + + public static final Field EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE = + Field.create("event.deserialization.failure.handling.mode") + .withDisplayName("Event deserialization failure handling") + .withEnum( + EventProcessingFailureHandlingMode.class, + EventProcessingFailureHandlingMode.FAIL) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 21)) + .withValidation( + TiDBConnectorConfig + ::validateEventDeserializationFailureHandlingModeNotSet) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Specify how failures during deserialization of binlog events (i.e. when encountering a corrupted event) should be handled, including:" + + "'fail' (the default) an exception indicating the problematic event and its binlog position is raised, causing the connector to be stopped; " + + "'warn' the problematic event and its binlog position will be logged and the event will be skipped;" + + "'ignore' the problematic event will be skipped."); + + public static final Field INCONSISTENT_SCHEMA_HANDLING_MODE = + Field.create("inconsistent.schema.handling.mode") + .withDisplayName("Inconsistent schema failure handling") + .withEnum( + EventProcessingFailureHandlingMode.class, + EventProcessingFailureHandlingMode.FAIL) + .withGroup(Field.createGroupEntry(Field.Group.ADVANCED, 2)) + .withValidation( + TiDBConnectorConfig::validateInconsistentSchemaHandlingModeNotIgnore) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Specify how binlog events that belong to a table missing from internal schema representation (i.e. internal representation is not consistent with database) should be handled, including:" + + "'fail' (the default) an exception indicating the problematic event and its binlog position is raised, causing the connector to be stopped; " + + "'warn' the problematic event and its binlog position will be logged and the event will be skipped;" + + "'skip' the problematic event will be skipped."); + + private static int validateEventDeserializationFailureHandlingModeNotSet( + Configuration config, Field field, Field.ValidationOutput problems) { + final String modeName = + config.asMap().get(EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE.name()); + if (modeName != null) { + LOGGER.warn( + "Configuration option '{}' is renamed to '{}'", + EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE.name(), + EVENT_PROCESSING_FAILURE_HANDLING_MODE.name()); + if (EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING.equals( + modeName)) { + LOGGER.warn( + "Value '{}' of configuration option '{}' is deprecated and should be replaced with '{}'", + EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING, + EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE.name(), + EventProcessingFailureHandlingMode.SKIP.getValue()); + } + } + return 0; + } + + private static int validateInconsistentSchemaHandlingModeNotIgnore( + Configuration config, Field field, Field.ValidationOutput problems) { + final String modeName = config.getString(INCONSISTENT_SCHEMA_HANDLING_MODE); + if (EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING.equals( + modeName)) { + LOGGER.warn( + "Value '{}' of configuration option '{}' is deprecated and should be replaced with '{}'", + EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING, + INCONSISTENT_SCHEMA_HANDLING_MODE.name(), + EventProcessingFailureHandlingMode.SKIP.getValue()); + } + return 0; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java new file mode 100644 index 00000000000..f6b5e9debf9 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.table.catalog.ObjectPath; + +import io.debezium.config.Configuration; +import org.tikv.common.TiConfiguration; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +public class TiDBSourceConfig extends JdbcSourceConfig { + private static final long serialVersionUID = 1L; + private final String compatibleMode; + private final String pdAddresses; + + private final String hostMapping; + private TiConfiguration tiConfiguration; + private final Properties jdbcProperties; + private Map chunkKeyColumns; + + public TiDBSourceConfig( + String compatibleMode, + StartupOptions startupOptions, + List databaseList, + List tableList, + String pdAddresses, + String hostMapping, + int splitSize, + int splitMetaGroupSize, + TiConfiguration tiConfiguration, + double distributionFactorUpper, + double distributionFactorLower, + boolean includeSchemaChanges, + boolean closeIdleReaders, + Properties jdbcProperties, + Configuration dbzConfiguration, + String driverClassName, + String hostname, + int port, + String username, + String password, + int fetchSize, + String serverTimeZone, + Duration connectTimeout, + int connectMaxRetries, + int connectionPoolSize, + String chunkKeyColumn, + Map chunkKeyColumns, + boolean skipSnapshotBackfill, + boolean isScanNewlyAddedTableEnabled, + boolean assignUnboundedChunkFirst) { + super( + startupOptions, + databaseList, + null, + tableList, + splitSize, + splitMetaGroupSize, + distributionFactorUpper, + distributionFactorLower, + includeSchemaChanges, + closeIdleReaders, + jdbcProperties, + dbzConfiguration, + driverClassName, + hostname, + port, + username, + password, + fetchSize, + serverTimeZone, + connectTimeout, + connectMaxRetries, + connectionPoolSize, + chunkKeyColumn, + skipSnapshotBackfill, + isScanNewlyAddedTableEnabled, + assignUnboundedChunkFirst); + this.compatibleMode = compatibleMode; + this.pdAddresses = pdAddresses; + this.hostMapping = hostMapping; + this.jdbcProperties = jdbcProperties; + this.tiConfiguration = tiConfiguration; + this.chunkKeyColumns = chunkKeyColumns; + } + + public String getCompatibleMode() { + return compatibleMode; + } + + public String getPdAddresses() { + return pdAddresses; + } + + public String getHostMapping() { + return hostMapping; + } + + public Properties getJdbcProperties() { + return this.jdbcProperties; + } + + public TiConfiguration getTiConfiguration() { + return this.tiConfiguration; + } + + public Map getChunkKeyColumns() { + return this.chunkKeyColumns; + } + + @Override + public TiDBConnectorConfig getDbzConnectorConfig() { + return new TiDBConnectorConfig(this); + } + + public StartupOptions getStartupOptions() { + return startupOptions; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java new file mode 100644 index 00000000000..12c68c3987e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfigFactory; +import org.apache.flink.table.catalog.ObjectPath; + +import io.debezium.config.Configuration; +import org.tikv.common.TiConfiguration; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import static org.apache.flink.cdc.common.utils.Preconditions.checkNotNull; +import static org.apache.flink.cdc.connectors.base.utils.EnvironmentUtils.checkSupportCheckpointsAfterTasksFinished; + +/** A factory to initialize {@link TiDBSourceConfig}. */ +@SuppressWarnings("UnusedReturnValue") +public class TiDBSourceConfigFactory extends JdbcSourceConfigFactory { + private static final long serialVersionUID = 1L; + private String compatibleMode; + private String driverClassName = "com.mysql.cj.jdbc.Driver"; + private String pdAddresses; + + private String hostMapping; + private TiConfiguration tiConfiguration; + private Properties tikvProperties; + private Properties jdbcProperties; + private Map chunkKeyColumns = new HashMap<>(); + + public TiDBSourceConfigFactory compatibleMode(String compatibleMode) { + this.compatibleMode = compatibleMode; + return this; + } + + public TiDBSourceConfigFactory chunkKeyColumn(ObjectPath objectPath, String chunkKeyColumn) { + this.chunkKeyColumns.put(objectPath, chunkKeyColumn); + return this; + } + + public TiDBSourceConfigFactory chunkKeyColumns(Map chunkKeyColumns) { + this.chunkKeyColumns.putAll(chunkKeyColumns); + return this; + } + + public TiDBSourceConfigFactory driverClassName(String driverClassName) { + this.driverClassName = driverClassName; + return this; + } + + public TiDBSourceConfigFactory pdAddresses(String pdAddresses) { + this.pdAddresses = pdAddresses; + return this; + } + + public TiDBSourceConfigFactory hostMapping(String hostMapping) { + this.hostMapping = hostMapping; + return this; + } + + public TiDBSourceConfigFactory tikvProperties(Properties tikvProperties) { + this.tikvProperties = tikvProperties; + return this; + } + + public TiDBSourceConfigFactory jdbcProperties(Properties jdbcProperties) { + this.jdbcProperties = jdbcProperties; + return this; + } + + public TiDBSourceConfigFactory tiConfiguration(TiConfiguration tiConfiguration) { + this.tiConfiguration = tiConfiguration; + return this; + } + + @Override + public TiDBSourceConfig create(int subtask) { + checkSupportCheckpointsAfterTasksFinished(closeIdleReaders); + Properties props = new Properties(); + props.setProperty("database.server.name", "tidb_cdc"); + props.setProperty("database.hostname", checkNotNull(hostname)); + props.setProperty("database.port", String.valueOf(port)); + props.setProperty("database.user", checkNotNull(username)); + props.setProperty("database.password", checkNotNull(password)); + props.setProperty("database.dbname", checkNotNull(databaseList.get(0))); + props.setProperty("database.connect.timeout.ms", String.valueOf(connectTimeout.toMillis())); + + // table filter + // props.put("database.include.list", String.join(",", databaseList)); + if (tableList != null) { + props.put("table.include.list", String.join(",", tableList)); + } + // value converter + props.put("decimal.handling.mode", "precise"); + props.put("time.precision.mode", "adaptive_time_microseconds"); + props.put("binary.handling.mode", "bytes"); + + if (jdbcProperties != null) { + props.putAll(jdbcProperties); + } + + if (tikvProperties != null) { + props.putAll(tikvProperties); + } + + Configuration dbzConfiguration = Configuration.from(props); + return new TiDBSourceConfig( + compatibleMode, + startupOptions, + databaseList, + tableList, + pdAddresses, + hostMapping, + splitSize, + splitMetaGroupSize, + tiConfiguration, + distributionFactorUpper, + distributionFactorLower, + includeSchemaChanges, + closeIdleReaders, + props, + dbzConfiguration, + driverClassName, + hostname, + port, + username, + password, + fetchSize, + serverTimeZone, + connectTimeout, + connectMaxRetries, + connectionPoolSize, + chunkKeyColumn, + chunkKeyColumns, + skipSnapshotBackfill, + scanNewlyAddedTableEnabled, + assignUnboundedChunkFirst); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java new file mode 100644 index 00000000000..bb36951656b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions; +import org.apache.flink.cdc.connectors.tidb.utils.UriHostMapping; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; + +import org.tikv.common.TiConfiguration; + +import java.time.Duration; +import java.util.Map; +import java.util.Optional; + +public class TiDBSourceOptions extends JdbcSourceOptions { + + public static final ConfigOption TiDB_PORT = + ConfigOptions.key("port") + .intType() + .defaultValue(4000) + .withDescription("Integer port number of the TiDB database server."); + + public static final ConfigOption PD_ADDRESSES = + ConfigOptions.key("pd-addresses") + .stringType() + .noDefaultValue() + .withDescription("TiDB pd-server addresses"); + + public static final ConfigOption HEARTBEAT_INTERVAL = + ConfigOptions.key("heartbeat.interval.ms") + .durationType() + .defaultValue(Duration.ofSeconds(30)) + .withDescription( + "Optional interval of sending heartbeat event for tracing the latest available replication slot offsets"); + + public static final ConfigOption TABLE_LIST = + ConfigOptions.key("table-list") + .stringType() + .noDefaultValue() + .withDescription( + "List of full names of tables, separated by commas, e.g. \"db1.table1, db2.table2\"."); + + public static final ConfigOption HOST_MAPPING = + ConfigOptions.key("host-mapping") + .stringType() + .noDefaultValue() + .withDescription( + "TiKV cluster's host-mapping used to configure public IP and intranet IP mapping. When the TiKV cluster is running on the intranet, you can map a set of intranet IPs to public IPs for an outside Flink cluster to access. The format is {Intranet IP1}:{Public IP1};{Intranet IP2}:{Public IP2}, e.g. 192.168.0.2:8.8.8.8;192.168.0.3:9.9.9.9."); + + public static final ConfigOption JDBC_DRIVER = + ConfigOptions.key("jdbc.driver") + .stringType() + .defaultValue("com.mysql.cj.jdbc.Driver") + .withDescription( + "JDBC driver class name, use 'com.mysql.cj.jdbc.Driver' by default."); + + public static TiConfiguration getTiConfiguration( + final String pdAddrsStr, final String hostMapping, final Map options) { + final Configuration configuration = Configuration.fromMap(options); + + final TiConfiguration tiConf = TiConfiguration.createDefault(pdAddrsStr); + Optional.of(new UriHostMapping(hostMapping)).ifPresent(tiConf::setHostMapping); + // todo add more config to tidb + return tiConf; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java new file mode 100644 index 00000000000..ab06e8f7a4e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java @@ -0,0 +1,526 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.connection; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.jdbc.JdbcConfiguration; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import io.debezium.relational.history.TableChanges; +import io.debezium.schema.SchemaChangeEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.Set; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.regex.Pattern; + +public class TiDBConnection extends JdbcConnection { + private static final Logger LOG = LoggerFactory.getLogger(TiDBConnection.class); + + private static final Properties DEFAULT_JDBC_PROPERTIES = initializeDefaultJdbcProperties(); + private static final String MYSQL_URL_PATTERN = + "jdbc:mysql://${hostname}:${port}/?connectTimeout=${connectTimeout}"; + private static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE "; + private static final int TYPE_BINARY_FLOAT = 100; + private static final int TYPE_BINARY_DOUBLE = 101; + private static final int TYPE_TIMESTAMP_WITH_TIME_ZONE = -101; + private static final int TYPE_TIMESTAMP_WITH_LOCAL_TIME_ZONE = -102; + private static final int TYPE_INTERVAL_YEAR_TO_MONTH = -103; + private static final int TYPE_INTERVAL_DAY_TO_SECOND = -104; + private static final char quote = '`'; + private static final String QUOTED_CHARACTER = "`"; + + public TiDBConnection( + String hostname, + Integer port, + String user, + String password, + Duration timeout, + String jdbcDriver, + Properties jdbcProperties, + ClassLoader classLoader) { + super( + config(hostname, port, user, password, timeout), + JdbcConnection.patternBasedFactory( + formatJdbcUrl(jdbcDriver, jdbcProperties), jdbcDriver, classLoader), + quote + "", + quote + ""); + } + + public TiDBConnection( + JdbcConfiguration config, + ConnectionFactory connectionFactory, + String openingQuoteCharacter, + String closingQuoteCharacter) { + super(config, connectionFactory, openingQuoteCharacter, closingQuoteCharacter); + } + + public TiDBConnection( + JdbcConfiguration config, + ConnectionFactory connectionFactory, + Supplier classLoaderSupplier, + String openingQuoteCharacter, + String closingQuoteCharacter) { + super( + config, + connectionFactory, + classLoaderSupplier, + openingQuoteCharacter, + closingQuoteCharacter); + } + + protected TiDBConnection( + JdbcConfiguration config, + ConnectionFactory connectionFactory, + Operations initialOperations, + Supplier classLoaderSupplier, + String openingQuotingChar, + String closingQuotingChar) { + super( + config, + connectionFactory, + initialOperations, + classLoaderSupplier, + openingQuotingChar, + closingQuotingChar); + } + + private static JdbcConfiguration config( + String hostname, Integer port, String user, String password, Duration timeout) { + return JdbcConfiguration.create() + .with("hostname", hostname) + .with("port", port) + .with("user", user) + .with("password", password) + .with("connectTimeout", timeout == null ? 30000 : timeout.toMillis()) + .build(); + } + + private static String formatJdbcUrl(String jdbcDriver, Properties jdbcProperties) { + Properties combinedProperties = new Properties(); + combinedProperties.putAll(DEFAULT_JDBC_PROPERTIES); + if (jdbcProperties != null) { + combinedProperties.putAll(jdbcProperties); + } + StringBuilder jdbcUrlStringBuilder = new StringBuilder(MYSQL_URL_PATTERN); + combinedProperties.forEach( + (key, value) -> { + jdbcUrlStringBuilder.append("&").append(key).append("=").append(value); + }); + return jdbcUrlStringBuilder.toString(); + } + + private static Properties initializeDefaultJdbcProperties() { + Properties defaultJdbcProperties = new Properties(); + defaultJdbcProperties.setProperty("useInformationSchema", "true"); + defaultJdbcProperties.setProperty("nullCatalogMeansCurrent", "false"); + defaultJdbcProperties.setProperty("useUnicode", "true"); + defaultJdbcProperties.setProperty("zeroDateTimeBehavior", "convertToNull"); + defaultJdbcProperties.setProperty("characterEncoding", "UTF-8"); + defaultJdbcProperties.setProperty("characterSetResults", "UTF-8"); + return defaultJdbcProperties; + } + + public long getCurrentTimestampS() throws SQLException { + try { + long globalTimestamp = getGlobalTimestamp(); + LOG.info("Global timestamp: {}", globalTimestamp); + return Long.parseLong(String.valueOf(globalTimestamp).substring(0, 10)); + } catch (Exception e) { + LOG.warn("Failed to get global timestamp, use local timestamp instead"); + } + return getCurrentTimestamp() + .orElseThrow(IllegalStateException::new) + .toInstant() + .getEpochSecond(); + } + + private long getGlobalTimestamp() throws SQLException { + return querySingleValue( + connection(), "SELECT CURRENT_TIMESTAMP FROM DUAL", ps -> {}, rs -> rs.getLong(1)); + } + + @Override + public Optional getCurrentTimestamp() throws SQLException { + return queryAndMap( + "SELECT LOCALTIMESTAMP FROM DUAL", + rs -> rs.next() ? Optional.of(rs.getTimestamp(1)) : Optional.empty()); + } + + @Override + protected String[] supportedTableTypes() { + return new String[] {"TABLE"}; + } + + @Override + public String quotedTableIdString(TableId tableId) { + return tableId.toQuotedString(quote); + } + + public void readSchemaForCapturedTables( + Tables tables, + String databaseCatalog, + String schemaNamePattern, + Tables.ColumnNameFilter columnFilter, + boolean removeTablesNotFoundInJdbc, + Set capturedTables) + throws SQLException { + + Set tableIdsBefore = new HashSet<>(tables.tableIds()); + + DatabaseMetaData metadata = connection().getMetaData(); + Map> columnsByTable = new HashMap<>(); + + for (TableId tableId : capturedTables) { + try (ResultSet columnMetadata = + metadata.getColumns( + databaseCatalog, schemaNamePattern, tableId.table(), null)) { + while (columnMetadata.next()) { + // add all whitelisted columns + readTableColumn(columnMetadata, tableId, columnFilter) + .ifPresent( + column -> { + columnsByTable + .computeIfAbsent(tableId, t -> new ArrayList<>()) + .add(column.create()); + }); + } + } + } + + // Read the metadata for the primary keys ... + for (Map.Entry> tableEntry : columnsByTable.entrySet()) { + // First get the primary key information, which must be done for *each* table ... + List pkColumnNames = readPrimaryKeyNames(metadata, tableEntry.getKey()); + + // Then define the table ... + List columns = tableEntry.getValue(); + Collections.sort(columns); + tables.overwriteTable(tableEntry.getKey(), columns, pkColumnNames, null); + } + + if (removeTablesNotFoundInJdbc) { + // Remove any definitions for tables that were not found in the database metadata ... + tableIdsBefore.removeAll(columnsByTable.keySet()); + tableIdsBefore.forEach(tables::removeTable); + } + } + + @Override + protected int resolveNativeType(String typeName) { + String upperCaseTypeName = typeName.toUpperCase(); + if (upperCaseTypeName.startsWith("JSON")) { + return Types.VARCHAR; + } + if (upperCaseTypeName.startsWith("NCHAR")) { + return Types.NCHAR; + } + if (upperCaseTypeName.startsWith("NVARCHAR2")) { + return Types.NVARCHAR; + } + if (upperCaseTypeName.startsWith("TIMESTAMP")) { + if (upperCaseTypeName.contains("WITH TIME ZONE")) { + return TYPE_TIMESTAMP_WITH_TIME_ZONE; + } + if (upperCaseTypeName.contains("WITH LOCAL TIME ZONE")) { + return TYPE_TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + return Types.TIMESTAMP; + } + if (upperCaseTypeName.startsWith("INTERVAL")) { + if (upperCaseTypeName.contains("TO MONTH")) { + return TYPE_INTERVAL_YEAR_TO_MONTH; + } + if (upperCaseTypeName.contains("TO SECOND")) { + return TYPE_INTERVAL_DAY_TO_SECOND; + } + } + return Column.UNSET_INT_VALUE; + } + + public String readSystemVariable(String variable) throws SQLException { + return querySingleValue( + connection(), + "SHOW VARIABLES LIKE ?", + ps -> ps.setString(1, variable), + rs -> rs.getString("VALUE")); + } + + @Override + protected int resolveJdbcType(int metadataJdbcType, int nativeType) { + switch (metadataJdbcType) { + case TYPE_BINARY_FLOAT: + return Types.REAL; + case TYPE_BINARY_DOUBLE: + return Types.DOUBLE; + case TYPE_TIMESTAMP_WITH_TIME_ZONE: + case TYPE_TIMESTAMP_WITH_LOCAL_TIME_ZONE: + case TYPE_INTERVAL_YEAR_TO_MONTH: + case TYPE_INTERVAL_DAY_TO_SECOND: + return Types.OTHER; + default: + return nativeType == Column.UNSET_INT_VALUE ? metadataJdbcType : nativeType; + } + } + + public List getTables(String dbPattern, String tbPattern) throws SQLException { + return listTables( + db -> Pattern.matches(dbPattern, db), + tableId -> Pattern.matches(tbPattern, tableId.table())); + } + + private List listTables( + Predicate databaseFilter, Tables.TableFilter tableFilter) throws SQLException { + List tableIds = new ArrayList<>(); + DatabaseMetaData metaData = connection().getMetaData(); + ResultSet rs = metaData.getCatalogs(); + List dbList = new ArrayList<>(); + while (rs.next()) { + String db = rs.getString("TABLE_CAT"); + if (databaseFilter.test(db)) { + dbList.add(db); + } + } + for (String db : dbList) { + + rs = metaData.getTables(db, null, null, supportedTableTypes()); + while (rs.next()) { + TableId tableId = new TableId(db, null, rs.getString("TABLE_NAME")); + if (tableFilter.isIncluded(tableId)) { + tableIds.add(tableId); + } + } + } + return tableIds; + } + + // 新的readSchema + public void readTiDBSchema( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + Tables tables, + String databaseCatalog, + String schemaNamePattern, + Tables.TableFilter tableFilter, + Tables.ColumnNameFilter columnFilter, + boolean removeTablesNotFoundInJdbc) + throws SQLException { + // Before we make any changes, get the copy of the set of table IDs ... + Set tableIdsBefore = new HashSet<>(tables.tableIds()); + + // Read the metadata for the table columns ... + DatabaseMetaData metadata = connection().getMetaData(); + + // Find regular and materialized views as they cannot be snapshotted + final Set viewIds = new HashSet<>(); + final Set tableIds = new HashSet<>(); + + int totalTables = 0; + try (final ResultSet rs = + metadata.getTables( + databaseCatalog, schemaNamePattern, null, supportedTableTypes())) { + while (rs.next()) { + final String catalogName = resolveCatalogName(rs.getString(1)); + final String schemaName = rs.getString(2); + final String tableName = rs.getString(3); + final String tableType = rs.getString(4); + if (isTableType(tableType)) { + totalTables++; + TableId tableId = new TableId(catalogName, schemaName, tableName); + if (tableFilter == null || tableFilter.isIncluded(tableId)) { + tableIds.add(tableId); + } + } else { + TableId tableId = new TableId(catalogName, schemaName, tableName); + viewIds.add(tableId); + } + } + } + + Map> columnsByTable = new HashMap<>(); + if (totalTables == tableIds.size()) { + columnsByTable = + getColumnsDetailsWithTableChange( + config, + databaseSchema, + databaseCatalog, + schemaNamePattern, + null, + tableFilter, + columnFilter, + metadata, + viewIds); + // LOGGER.info("connection readSchema:", columnsByTable); + } else { + for (TableId includeTable : tableIds) { + Map> cols = + getColumnsDetailsWithTableChange( + config, + databaseSchema, + databaseCatalog, + schemaNamePattern, + null, + tableFilter, + columnFilter, + metadata, + viewIds); + columnsByTable.putAll(cols); + } + } + + // Read the metadata for the primary keys ... + for (Map.Entry> tableEntry : columnsByTable.entrySet()) { + // First get the primary key information, which must be done for *each* table ... + List pkColumnNames = + readPrimaryKeyOrUniqueIndexNames(metadata, tableEntry.getKey()); + + // Then define the table ... + List columns = tableEntry.getValue(); + Collections.sort(columns); + String defaultCharsetName = null; // JDBC does not expose character sets + tables.overwriteTable(tableEntry.getKey(), columns, pkColumnNames, defaultCharsetName); + } + + if (removeTablesNotFoundInJdbc) { + // Remove any definitions for tables that were not found in the database metadata ... + tableIdsBefore.removeAll(columnsByTable.keySet()); + tableIdsBefore.forEach(tables::removeTable); + } + } + + protected Map> getColumnsDetailsWithTableChange( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + String databaseCatalog, + String schemaNamePattern, + String tableName, + Tables.TableFilter tableFilter, + Tables.ColumnNameFilter columnFilter, + DatabaseMetaData metadata, + final Set viewIds) + throws SQLException { + Map> columnsByTable = new HashMap<>(); + try (ResultSet columnMetadata = + metadata.getColumns(databaseCatalog, schemaNamePattern, tableName, null)) { + while (columnMetadata.next()) { + String catalogName = resolveCatalogName(columnMetadata.getString(1)); + String schemaName = columnMetadata.getString(2); + String metaTableName = columnMetadata.getString(3); + TableId tableId = new TableId(catalogName, schemaName, metaTableName); + + // exclude views and non-captured tables + if (viewIds.contains(tableId) + || (tableFilter != null && !tableFilter.isIncluded(tableId))) { + continue; + } + TableChanges.TableChange tableChange = + readTableSchema(config, databaseSchema, tableId); + if (tableChange != null) { + ArrayList columns = new ArrayList<>(tableChange.getTable().columns()); + columnsByTable.put(tableId, columns); + } + } + } + return columnsByTable; + } + + private TableChanges.TableChange readTableSchema( + TiDBConnectorConfig connectorConfig, + TiDBDatabaseSchema databaseSchema, + TableId tableId) { + final Map tableChangeMap = new HashMap<>(); + String showCreateTable = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + final TiDBPartition partition = new TiDBPartition(connectorConfig.getLogicalName()); + buildSchemaByShowCreateTable( + connectorConfig, databaseSchema, partition, this, tableId, tableChangeMap); + return tableChangeMap.get(tableId); + } + + private void buildSchemaByShowCreateTable( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + TiDBPartition partition, + JdbcConnection jdbc, + TableId tableId, + Map tableChangeMap) { + final String sql = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + try { + jdbc.query( + sql, + rs -> { + if (rs.next()) { + final String ddl = rs.getString(2); + parseSchemaByDdl( + config, + databaseSchema, + partition, + ddl, + tableId, + tableChangeMap); + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException( + String.format("Failed to read schema for table %s by running %s", tableId, sql), + e); + } + } + + private void parseSchemaByDdl( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + TiDBPartition partition, + String ddl, + TableId tableId, + Map tableChangeMap) { + final EventOffsetContext offsetContext = EventOffsetContext.initial(config); + List schemaChangeEvents = + databaseSchema.parseSnapshotDdl( + partition, ddl, tableId.catalog(), offsetContext, Instant.now()); + for (SchemaChangeEvent schemaChangeEvent : schemaChangeEvents) { + for (TableChanges.TableChange tableChange : schemaChangeEvent.getTableChanges()) { + tableChangeMap.put(tableId, tableChange); + } + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/OptionUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java similarity index 50% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/OptionUtils.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java index 1b2be59b1f8..7a826ebaeab 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/OptionUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java @@ -15,28 +15,19 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table.utils; +package org.apache.flink.cdc.connectors.tidb.source.connection; -import org.apache.flink.configuration.ConfigurationUtils; +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +public class TiDBConnectionPoolFactory extends JdbcConnectionPoolFactory { + private static final String MYSQL_URL_PATTERN = + "jdbc:mysql://%s:%s/?useUnicode=true&useSSL=false&useInformationSchema=true&nullCatalogMeansCurrent=false&zeroDateTimeBehavior=convertToNull&characterEncoding=UTF-8&characterSetResults=UTF-8"; -import java.util.Map; - -/** A utility class to print configuration of connectors. */ -public class OptionUtils { - - private static final Logger LOG = LoggerFactory.getLogger(OptionUtils.class); - - /** Utility class can not be instantiated. */ - private OptionUtils() {} - - public static void printOptions(String identifier, Map config) { - Map hideMap = ConfigurationUtils.hideSensitiveValues(config); - LOG.info("Print {} connector configuration:", identifier); - for (String key : hideMap.keySet()) { - LOG.info("{} = {}", key, hideMap.get(key)); - } + @Override + public String getJdbcUrl(JdbcSourceConfig sourceConfig) { + String hostName = sourceConfig.getHostname(); + int port = sourceConfig.getPort(); + return String.format(MYSQL_URL_PATTERN, hostName, port); } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java new file mode 100644 index 00000000000..49a4a1aff06 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.converter; + +import io.debezium.annotation.Immutable; +import io.debezium.connector.mysql.MySqlDefaultValueConverter; +import io.debezium.connector.mysql.MySqlValueConverters; +import io.debezium.relational.Column; +import io.debezium.relational.DefaultValueConverter; +import io.debezium.relational.ValueConverter; +import io.debezium.util.Collect; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; +import java.util.Optional; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class TiDBDefaultValueConverter implements DefaultValueConverter { + + private static final Logger LOGGER = LoggerFactory.getLogger(MySqlDefaultValueConverter.class); + + private static final Pattern EPOCH_EQUIVALENT_TIMESTAMP = + Pattern.compile( + "(\\d{4}-\\d{2}-00|\\d{4}-00-\\d{2}|0000-\\d{2}-\\d{2}) (00:00:00(\\.\\d{1,6})?)"); + + private static final Pattern EPOCH_EQUIVALENT_DATE = + Pattern.compile("\\d{4}-\\d{2}-00|\\d{4}-00-\\d{2}|0000-\\d{2}-\\d{2}"); + + private static final String EPOCH_TIMESTAMP = "1970-01-01 00:00:00"; + + private static final String EPOCH_DATE = "1970-01-01"; + + private static final Pattern TIMESTAMP_PATTERN = + Pattern.compile("([0-9]*-[0-9]*-[0-9]*) ([0-9]*:[0-9]*:[0-9]*(\\.([0-9]*))?)"); + + private static final Pattern CHARSET_INTRODUCER_PATTERN = + Pattern.compile("^_[A-Za-z0-9]+'(.*)'$"); + + @Immutable + private static final Set TRIM_DATA_TYPES = + Collect.unmodifiableSet( + Types.TINYINT, + Types.INTEGER, + Types.DATE, + Types.TIMESTAMP, + Types.TIMESTAMP_WITH_TIMEZONE, + Types.TIME, + Types.BOOLEAN, + Types.BIT, + Types.NUMERIC, + Types.DECIMAL, + Types.FLOAT, + Types.DOUBLE, + Types.REAL); + + @Immutable + private static final Set NUMBER_DATA_TYPES = + Collect.unmodifiableSet( + Types.BIT, + Types.TINYINT, + Types.SMALLINT, + Types.INTEGER, + Types.BIGINT, + Types.FLOAT, + Types.REAL, + Types.DOUBLE, + Types.NUMERIC, + Types.DECIMAL); + + private static final DateTimeFormatter ISO_LOCAL_DATE_WITH_OPTIONAL_TIME = + new DateTimeFormatterBuilder() + .append(DateTimeFormatter.ISO_LOCAL_DATE) + .optionalStart() + .appendLiteral(" ") + .append(DateTimeFormatter.ISO_LOCAL_TIME) + .optionalEnd() + .toFormatter(); + + private final TiDBValueConverters converters; + + public TiDBDefaultValueConverter(TiDBValueConverters converters) { + this.converters = converters; + } + + @Override + public Optional parseDefaultValue(Column column, String defaultValueExpression) { + Object logicalDefaultValue = convert(column, defaultValueExpression); + if (logicalDefaultValue == null) { + return Optional.empty(); + } + + final SchemaBuilder schemaBuilder = converters.schemaBuilder(column); + if (schemaBuilder == null) { + return Optional.of(logicalDefaultValue); + } + final Schema schema = schemaBuilder.build(); + + // In order to get the valueConverter for this column, we have to create a field; + // The index value -1 in the field will never used when converting default value; + // So we can set any number here; + final Field field = new Field(column.name(), -1, schema); + final ValueConverter valueConverter = converters.converter(column, field); + + return Optional.ofNullable(valueConverter.convert(logicalDefaultValue)); + } + + public Object convert(Column column, String value) { + if (value == null) { + return value; + } + + // trim non varchar data types before converting + if (TRIM_DATA_TYPES.contains(column.jdbcType())) { + value = value.trim(); + } + + // strip character set introducer on default value expressions + value = stripCharacterSetIntroducer(value); + + // boolean is also INT(1) or TINYINT(1) + if (NUMBER_DATA_TYPES.contains(column.jdbcType()) + && ("true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value))) { + if (Types.DECIMAL == column.jdbcType() || Types.NUMERIC == column.jdbcType()) { + return convertToDecimal(column, value.equalsIgnoreCase("true") ? "1" : "0"); + } + return value.equalsIgnoreCase("true") ? 1 : 0; + } + switch (column.jdbcType()) { + case Types.DATE: + return convertToLocalDate(column, value); + case Types.TIMESTAMP: + return convertToLocalDateTime(column, value); + case Types.TIMESTAMP_WITH_TIMEZONE: + return convertToTimestamp(column, value); + case Types.TIME: + return convertToDuration(column, value); + case Types.BOOLEAN: + return convertToBoolean(value); + case Types.BIT: + return convertToBits(column, value); + + case Types.NUMERIC: + case Types.DECIMAL: + return convertToDecimal(column, value); + + case Types.FLOAT: + case Types.DOUBLE: + case Types.REAL: + return convertToDouble(value); + } + return value; + } + + private Object convertToLocalDate(Column column, String value) { + final boolean zero = + EPOCH_EQUIVALENT_DATE.matcher(value).matches() + || EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() + || "0".equals(value); + + if (zero && column.isOptional()) { + return null; + } + if (zero) { + value = EPOCH_DATE; + } + + try { + return LocalDate.from(ISO_LOCAL_DATE_WITH_OPTIONAL_TIME.parse(value)); + } catch (Exception e) { + LOGGER.warn( + "Invalid default value '{}' for date column '{}'; {}", + value, + column.name(), + e.getMessage()); + if (column.isOptional()) { + return null; + } else { + return LocalDate.from(ISO_LOCAL_DATE_WITH_OPTIONAL_TIME.parse(EPOCH_DATE)); + } + } + } + + /** + * Converts a string object for an object type of {@link LocalDateTime}. If the column + * definition allows null and default value is 0000-00-00 00:00:00, we need return null, else + * 0000-00-00 00:00:00 will be replaced with 1970-01-01 00:00:00; + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link LocalDateTime} type; + * @return the converted value; + */ + private Object convertToLocalDateTime(Column column, String value) { + final boolean matches = + EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() || "0".equals(value); + if (matches) { + if (column.isOptional()) { + return null; + } + + value = EPOCH_TIMESTAMP; + } + + try { + return LocalDateTime.from(timestampFormat(column.length()).parse(value)); + } catch (Exception e) { + LOGGER.warn( + "Invalid default value '{}' for datetime column '{}'; {}", + value, + column.name(), + e.getMessage()); + if (column.isOptional()) { + return null; + } else { + return LocalDateTime.from(timestampFormat(column.length()).parse(EPOCH_TIMESTAMP)); + } + } + } + + /** + * Converts a string object for an object type of {@link Timestamp}. If the column definition + * allows null and default value is 0000-00-00 00:00:00, we need return null, else 0000-00-00 + * 00:00:00 will be replaced with 1970-01-01 00:00:00; + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Timestamp} type; + * @return the converted value; + */ + private Object convertToTimestamp(Column column, String value) { + final boolean matches = + EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() + || "0".equals(value) + || EPOCH_TIMESTAMP.equals(value); + if (matches) { + if (column.isOptional()) { + return null; + } + + return Timestamp.from(Instant.EPOCH); + } + value = cleanTimestamp(value); + return Timestamp.valueOf(value).toInstant().atZone(ZoneId.systemDefault()); + } + + /** + * Converts a string object for an object type of {@link Duration}. + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Duration} type; + * @return the converted value; + */ + private Object convertToDuration(Column column, String value) { + Matcher matcher = TIMESTAMP_PATTERN.matcher(value); + if (matcher.matches()) { + value = matcher.group(2); + } + return MySqlValueConverters.stringToDuration(value); + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#DOUBLE}. + * + * @param value the string object to be converted into a {@link Types#DOUBLE} type; + * @return the converted value; + */ + private Object convertToDouble(String value) { + return Double.parseDouble(value); + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#DECIMAL}. + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Types#DECIMAL} type; + * @return the converted value; + */ + private Object convertToDecimal(Column column, String value) { + return column.scale().isPresent() + ? new BigDecimal(value).setScale(column.scale().get(), RoundingMode.HALF_UP) + : new BigDecimal(value); + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#BIT}. + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Types#BIT} type; + * @return the converted value; + */ + private Object convertToBits(Column column, String value) { + if (column.length() > 1) { + return convertToBits(value); + } + return convertToBit(value); + } + + private Object convertToBit(String value) { + try { + return Short.parseShort(value) != 0; + } catch (NumberFormatException ignore) { + return Boolean.parseBoolean(value); + } + } + + private Object convertToBits(String value) { + int nums = value.length() / Byte.SIZE + (value.length() % Byte.SIZE == 0 ? 0 : 1); + byte[] bytes = new byte[nums]; + for (int i = 0; i < nums; i++) { + int s = value.length() - Byte.SIZE < 0 ? 0 : value.length() - Byte.SIZE; + int e = value.length(); + bytes[nums - i - 1] = (byte) Integer.parseInt(value.substring(s, e), 2); + value = value.substring(0, s); + } + return bytes; + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#BOOLEAN}. + * + * @param value the string object to be converted into a {@link Types#BOOLEAN} type; + * @return the converted value; + */ + private Object convertToBoolean(String value) { + try { + return Integer.parseInt(value) != 0; + } catch (NumberFormatException ignore) { + return Boolean.parseBoolean(value); + } + } + + private DateTimeFormatter timestampFormat(int length) { + final DateTimeFormatterBuilder dtf = + new DateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd") + .optionalStart() + .appendLiteral(" ") + .append(DateTimeFormatter.ISO_LOCAL_TIME) + .optionalEnd() + .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) + .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) + .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0); + if (length > 0) { + dtf.appendFraction(ChronoField.MICRO_OF_SECOND, 0, length, true); + } + return dtf.toFormatter(); + } + + /** + * Clean input timestamp to yyyy-mm-dd hh:mm:ss[.fffffffff] format + * + * @param s input timestamp + * @return cleaned timestamp + */ + private String cleanTimestamp(String s) { + if (s == null) { + throw new IllegalArgumentException("null string"); + } + + s = s.trim(); + + // clean first dash + s = replaceFirstNonNumericSubstring(s, 0, '-'); + // clean second dash + s = replaceFirstNonNumericSubstring(s, s.indexOf('-') + 1, '-'); + // clean dividing space + s = replaceFirstNonNumericSubstring(s, s.indexOf('-', s.indexOf('-') + 1) + 1, ' '); + if (s.indexOf(' ') != -1) { + // clean first colon + s = replaceFirstNonNumericSubstring(s, s.indexOf(' ') + 1, ':'); + if (s.indexOf(':') != -1) { + // clean second colon + s = replaceFirstNonNumericSubstring(s, s.indexOf(':') + 1, ':'); + } + } + + final int MAX_MONTH = 12; + final int MAX_DAY = 31; + + // Parse the date + int firstDash = s.indexOf('-'); + int secondDash = s.indexOf('-', firstDash + 1); + int dividingSpace = s.indexOf(' '); + + // Parse the time + int firstColon = s.indexOf(':', dividingSpace + 1); + int secondColon = s.indexOf(':', firstColon + 1); + int period = s.indexOf('.', secondColon + 1); + + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + + // Get the date + int len = s.length(); + boolean parsedDate = false; + if (firstDash > 0 && secondDash > firstDash) { + year = Integer.parseInt(s.substring(0, firstDash)); + month = Integer.parseInt(s.substring(firstDash + 1, secondDash)); + if (dividingSpace != -1) { + day = Integer.parseInt(s.substring(secondDash + 1, dividingSpace)); + } else { + day = Integer.parseInt(s.substring(secondDash + 1, len)); + } + + if ((month >= 1 && month <= MAX_MONTH) && (day >= 1 && day <= MAX_DAY)) { + parsedDate = true; + } + } + if (!parsedDate) { + throw new IllegalArgumentException("Cannot parse the date from " + s); + } + + // Get the time. Hour, minute, second and colons are all optional + if (dividingSpace != -1 && dividingSpace < len - 1) { + if (firstColon == -1) { + hour = Integer.parseInt(s.substring(dividingSpace + 1, len)); + } else { + hour = Integer.parseInt(s.substring(dividingSpace + 1, firstColon)); + if (firstColon < len - 1) { + if (secondColon == -1) { + minute = Integer.parseInt(s.substring(firstColon + 1, len)); + } else { + minute = Integer.parseInt(s.substring(firstColon + 1, secondColon)); + if (secondColon < len - 1) { + if (period == -1) { + second = Integer.parseInt(s.substring(secondColon + 1, len)); + } else { + second = Integer.parseInt(s.substring(secondColon + 1, period)); + } + } + } + } + } + } + + StringBuilder cleanedTimestamp = new StringBuilder(); + cleanedTimestamp = + cleanedTimestamp.append( + String.format( + "%04d-%02d-%02d %02d:%02d:%02d", + year, month, day, hour, minute, second)); + + if (period != -1 && period < len - 1) { + cleanedTimestamp = cleanedTimestamp.append(".").append(s.substring(period + 1)); + } + + return cleanedTimestamp.toString(); + } + + /** + * Replace the first non-numeric substring + * + * @param s the original string + * @param startIndex the beginning index, inclusive + * @param c the new character + * @return + */ + private String replaceFirstNonNumericSubstring(String s, int startIndex, char c) { + StringBuilder sb = new StringBuilder(); + sb.append(s.substring(0, startIndex)); + + String rest = s.substring(startIndex); + sb.append(rest.replaceFirst("[^\\d]+", Character.toString(c))); + return sb.toString(); + } + + private String stripCharacterSetIntroducer(String value) { + final Matcher matcher = CHARSET_INTRODUCER_PATTERN.matcher(value); + return !matcher.matches() ? value : matcher.group(1); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java new file mode 100644 index 00000000000..02e90ec4272 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java @@ -0,0 +1,724 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.converter; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.util.FlinkRuntimeException; + +import com.github.shyiko.mysql.binlog.event.deserialization.json.JsonBinary; +import io.debezium.DebeziumException; +import io.debezium.config.CommonConnectorConfig; +import io.debezium.connector.mysql.MySqlConnection; +import io.debezium.connector.mysql.MySqlGeometry; +import io.debezium.connector.mysql.MySqlUnsignedIntegerConverter; +import io.debezium.connector.mysql.antlr.MySqlAntlrDdlParser; +import io.debezium.data.Json; +import io.debezium.data.geometry.Geometry; +import io.debezium.data.geometry.Point; +import io.debezium.jdbc.JdbcValueConverters; +import io.debezium.jdbc.TemporalPrecisionMode; +import io.debezium.relational.Column; +import io.debezium.relational.ValueConverter; +import io.debezium.time.Year; +import io.debezium.util.Strings; +import org.apache.kafka.connect.data.Decimal; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.errors.ConnectException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteOrder; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; +import java.sql.Date; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.Duration; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; +import java.time.temporal.TemporalAdjuster; +import java.util.List; +import java.util.regex.Pattern; + +/** JdbcValueConverters for tiDB. */ +public class TiDBValueConverters extends JdbcValueConverters { + + @FunctionalInterface + public static interface ParsingErrorHandler { + void error(String message, Exception exception); + } + + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBValueConverters.class); + /** Used to parse values of TIME columns. Format: 000:00:00.000000. */ + private static final Pattern TIME_FIELD_PATTERN = + Pattern.compile("(\\-?[0-9]*):([0-9]*):([0-9]*)(\\.([0-9]*))?"); + + /** Used to parse values of DATE columns. Format: 000-00-00. */ + private static final Pattern DATE_FIELD_PATTERN = Pattern.compile("([0-9]*)-([0-9]*)-([0-9]*)"); + + /** Used to parse values of TIMESTAMP columns. Format: 000-00-00 00:00:00.000. */ + private static final Pattern TIMESTAMP_FIELD_PATTERN = + Pattern.compile("([0-9]*)-([0-9]*)-([0-9]*) .*"); + + public static Temporal adjustTemporal(Temporal temporal) { + if (temporal.isSupported(ChronoField.YEAR)) { + int year = temporal.get(ChronoField.YEAR); + if (0 <= year && year <= 69) { + temporal = temporal.plus(2000, ChronoUnit.YEARS); + } else if (70 <= year && year <= 99) { + temporal = temporal.plus(1900, ChronoUnit.YEARS); + } + } + return temporal; + } + + // todo + public TiDBValueConverters(TiDBConnectorConfig connectorConfig) { + super( + connectorConfig.getDecimalMode(), + connectorConfig.getTemporalPrecisionMode(), + ZoneOffset.UTC, + x -> x, + BigIntUnsignedMode.PRECISE, + connectorConfig.binaryHandlingMode()); + } + + public TiDBValueConverters( + DecimalMode decimalMode, + TemporalPrecisionMode temporalPrecisionMode, + BigIntUnsignedMode bigIntUnsignedMode, + CommonConnectorConfig.BinaryHandlingMode binaryMode) { + this( + decimalMode, + temporalPrecisionMode, + bigIntUnsignedMode, + binaryMode, + x -> x, + TiDBValueConverters::defaultParsingErrorHandler); + } + + public TiDBValueConverters( + DecimalMode decimalMode, + TemporalPrecisionMode temporalPrecisionMode, + BigIntUnsignedMode bigIntUnsignedMode, + CommonConnectorConfig.BinaryHandlingMode binaryMode, + TemporalAdjuster adjuster, + ParsingErrorHandler parsingErrorHandler) { + super( + decimalMode, + temporalPrecisionMode, + ZoneOffset.UTC, + adjuster, + bigIntUnsignedMode, + binaryMode); + // this.parsingErrorHandler = parsingErrorHandler; + } + + @Override + protected ByteOrder byteOrderOfBitType() { + return ByteOrder.BIG_ENDIAN; + } + + @Override + public SchemaBuilder schemaBuilder(Column column) { + // Handle a few MySQL-specific types based upon how they are handled by the MySQL binlog + // client ... + String typeName = column.typeName().toUpperCase(); + if (matches(typeName, "JSON")) { + return Json.builder(); + } + if (matches(typeName, "POINT")) { + return Point.builder(); + } + if (matches(typeName, "GEOMETRY") + || matches(typeName, "LINESTRING") + || matches(typeName, "POLYGON") + || matches(typeName, "MULTIPOINT") + || matches(typeName, "MULTILINESTRING") + || matches(typeName, "MULTIPOLYGON") + || isGeometryCollection(typeName)) { + return Geometry.builder(); + } + if (matches(typeName, "YEAR")) { + return Year.builder(); + } + if (matches(typeName, "ENUM")) { + String commaSeparatedOptions = extractEnumAndSetOptionsAsString(column); + return io.debezium.data.Enum.builder(commaSeparatedOptions); + } + if (matches(typeName, "SET")) { + String commaSeparatedOptions = extractEnumAndSetOptionsAsString(column); + return io.debezium.data.EnumSet.builder(commaSeparatedOptions); + } + if (matches(typeName, "SMALLINT UNSIGNED") + || matches(typeName, "SMALLINT UNSIGNED ZEROFILL") + || matches(typeName, "INT2 UNSIGNED") + || matches(typeName, "INT2 UNSIGNED ZEROFILL")) { + // In order to capture unsigned SMALLINT 16-bit data source, INT32 will be required to + // safely capture all valid values + // Source: + // https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html + return SchemaBuilder.int32(); + } + if (matches(typeName, "INT UNSIGNED") + || matches(typeName, "INT UNSIGNED ZEROFILL") + || matches(typeName, "INT4 UNSIGNED") + || matches(typeName, "INT4 UNSIGNED ZEROFILL")) { + // In order to capture unsigned INT 32-bit data source, INT64 will be required to safely + // capture all valid values + // Source: + // https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html + return SchemaBuilder.int64(); + } + if (matches(typeName, "BIGINT UNSIGNED") + || matches(typeName, "BIGINT UNSIGNED ZEROFILL") + || matches(typeName, "INT8 UNSIGNED") + || matches(typeName, "INT8 UNSIGNED ZEROFILL")) { + switch (super.bigIntUnsignedMode) { + case LONG: + return SchemaBuilder.int64(); + case PRECISE: + // In order to capture unsigned INT 64-bit data source, + // org.apache.kafka.connect.data.Decimal:Byte will be required to safely capture + // all valid values with scale of 0 + // Source: + // https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html + return Decimal.builder(0); + } + } + + // Otherwise, let the base class handle it ... + return super.schemaBuilder(column); + } + + @Override + public ValueConverter converter(Column column, Field fieldDefn) { + // Handle a few MySQL-specific types based upon how they are handled by the MySQL binlog + // client ... + String typeName = column.typeName().toUpperCase(); + if (matches(typeName, "JSON")) { + return (data) -> convertJson(column, fieldDefn, data); + } + if (matches(typeName, "GEOMETRY") + || matches(typeName, "LINESTRING") + || matches(typeName, "POLYGON") + || matches(typeName, "MULTIPOINT") + || matches(typeName, "MULTILINESTRING") + || matches(typeName, "MULTIPOLYGON") + || isGeometryCollection(typeName)) { + return (data -> convertGeometry(column, fieldDefn, data)); + } + if (matches(typeName, "POINT")) { + // backwards compatibility + return (data -> convertPoint(column, fieldDefn, data)); + } + if (matches(typeName, "YEAR")) { + return (data) -> convertYearToInt(column, fieldDefn, data); + } + if (matches(typeName, "ENUM")) { + // Build up the character array based upon the column's type ... + List options = extractEnumAndSetOptions(column); + return (data) -> convertEnumToString(options, column, fieldDefn, data); + } + if (matches(typeName, "SET")) { + // Build up the character array based upon the column's type ... + List options = extractEnumAndSetOptions(column); + return (data) -> convertSetToString(options, column, fieldDefn, data); + } + if (matches(typeName, "TINYINT UNSIGNED") + || matches(typeName, "TINYINT UNSIGNED ZEROFILL") + || matches(typeName, "INT1 UNSIGNED") + || matches(typeName, "INT1 UNSIGNED ZEROFILL")) { + // Convert TINYINT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedTinyint(column, fieldDefn, data); + } + if (matches(typeName, "SMALLINT UNSIGNED") + || matches(typeName, "SMALLINT UNSIGNED ZEROFILL") + || matches(typeName, "INT2 UNSIGNED") + || matches(typeName, "INT2 UNSIGNED ZEROFILL")) { + // Convert SMALLINT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedSmallint(column, fieldDefn, data); + } + if (matches(typeName, "MEDIUMINT UNSIGNED") + || matches(typeName, "MEDIUMINT UNSIGNED ZEROFILL") + || matches(typeName, "INT3 UNSIGNED") + || matches(typeName, "INT3 UNSIGNED ZEROFILL") + || matches(typeName, "MIDDLEINT UNSIGNED") + || matches(typeName, "MIDDLEINT UNSIGNED ZEROFILL")) { + // Convert MEDIUMINT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedMediumint(column, fieldDefn, data); + } + if (matches(typeName, "INT UNSIGNED") + || matches(typeName, "INT UNSIGNED ZEROFILL") + || matches(typeName, "INT4 UNSIGNED") + || matches(typeName, "INT4 UNSIGNED ZEROFILL")) { + // Convert INT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedInt(column, fieldDefn, data); + } + if (matches(typeName, "BIGINT UNSIGNED") + || matches(typeName, "BIGINT UNSIGNED ZEROFILL") + || matches(typeName, "INT8 UNSIGNED") + || matches(typeName, "INT8 UNSIGNED ZEROFILL")) { + switch (super.bigIntUnsignedMode) { + case LONG: + return (data) -> convertBigInt(column, fieldDefn, data); + case PRECISE: + // Convert BIGINT UNSIGNED internally from SIGNED to UNSIGNED based on the + // boundary settings + return (data) -> convertUnsignedBigint(column, fieldDefn, data); + } + } + + // We have to convert bytes encoded in the column's character set ... + switch (column.jdbcType()) { + case Types.CHAR: // variable-length + case Types.VARCHAR: // variable-length + case Types.LONGVARCHAR: // variable-length + case Types.CLOB: // variable-length + case Types.NCHAR: // fixed-length + case Types.NVARCHAR: // fixed-length + case Types.LONGNVARCHAR: // fixed-length + case Types.NCLOB: // fixed-length + case Types.DATALINK: + case Types.SQLXML: + Charset charset = charsetFor(column); + if (charset != null) { + logger.debug("Using {} charset by default for column: {}", charset, column); + return (data) -> convertString(column, fieldDefn, charset, data); + } + logger.warn( + "Using UTF-8 charset by default for column without charset: {}", column); + return (data) -> convertString(column, fieldDefn, StandardCharsets.UTF_8, data); + case Types.TIME: + if (adaptiveTimeMicrosecondsPrecisionMode) { + return (data) -> convertTime(column, fieldDefn, data); + } + case Types.TIMESTAMP: + return ((ValueConverter) + (data -> convertTimestampToLocalDateTime(column, fieldDefn, data))) + .and(super.converter(column, fieldDefn)); + default: + break; + } + + // Otherwise, let the base class handle it ... + return super.converter(column, fieldDefn); + } + + protected Object convertJson(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "{}", + (r) -> { + if (data instanceof byte[]) { + if (((byte[]) data).length == 0) { + r.deliver(column.isOptional() ? null : "{}"); + } else { + try { + r.deliver(JsonBinary.parseAsString((byte[]) data)); + } catch (IOException var5) { + throw new FlinkRuntimeException("tidbvalueConverters error"); + // this.parsingErrorHandler.error( + // "Failed to parse and read + // a JSON value on '" + // + column + // + "' value " + // + + // Arrays.toString((byte[]) data), + // var5); + // r.deliver(column.isOptional() ? + // null : "{}"); + } + } + } else if (data instanceof String) { + r.deliver(data); + } + }); + } + + protected Object convertPoint(Column column, Field fieldDefn, Object data) { + MySqlGeometry empty = MySqlGeometry.createEmpty(); + return this.convertValue( + column, + fieldDefn, + data, + Geometry.createValue(fieldDefn.schema(), empty.getWkb(), empty.getSrid()), + (r) -> { + if (data instanceof byte[]) { + MySqlGeometry mySqlGeometry = MySqlGeometry.fromBytes((byte[]) data); + if (!mySqlGeometry.isPoint()) { + throw new ConnectException( + "Failed to parse and read a value of type POINT on " + column); + } + + r.deliver( + Point.createValue( + fieldDefn.schema(), + mySqlGeometry.getWkb(), + mySqlGeometry.getSrid())); + } + }); + } + + protected Object convertYearToInt(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0, + (r) -> { + Object mutData = data; + if (data instanceof java.time.Year) { + r.deliver( + adjustTemporal( + java.time.Year.of( + ((java.time.Year) data).getValue())) + .get(ChronoField.YEAR)); + } else if (data instanceof Date) { + r.deliver(((Date) data).getYear() + 1900); + } else if (data instanceof String) { + mutData = Integer.valueOf((String) data); + } + + if (mutData instanceof Number) { + r.deliver( + adjustTemporal(java.time.Year.of(((Number) mutData).intValue())) + .get(ChronoField.YEAR)); + } + }); + } + + protected Object convertEnumToString( + List options, Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "", + (r) -> { + if (data instanceof String) { + r.deliver(data); + } else if (data instanceof Integer) { + if (options != null) { + int value = (Integer) data; + if (value == 0) { + r.deliver(""); + } + + int index = value - 1; + if (index < options.size() && index >= 0) { + r.deliver(options.get(index)); + } + } else { + r.deliver((Object) null); + } + } + }); + } + + protected Object convertSetToString( + List options, Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "", + (r) -> { + if (data instanceof String) { + r.deliver(data); + } else if (data instanceof Long) { + long indexes = (Long) data; + r.deliver(this.convertSetValue(column, indexes, options)); + } + }); + } + + protected String convertSetValue(Column column, long indexes, List options) { + StringBuilder sb = new StringBuilder(); + int index = 0; + boolean first = true; + + for (int optionLen = options.size(); indexes != 0L; indexes >>>= 1) { + if (indexes % 2L != 0L) { + if (first) { + first = false; + } else { + sb.append(','); + } + + if (index < optionLen) { + sb.append((String) options.get(index)); + } else { + this.logger.warn("Found unexpected index '{}' on column {}", index, column); + } + } + + ++index; + } + + return sb.toString(); + } + + protected Object convertUnsignedBigint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0L, + (r) -> { + if (data instanceof BigDecimal) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedBigint( + (BigDecimal) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedBigint( + new BigDecimal(((Number) data).toString()))); + } else if (data instanceof String) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedBigint( + new BigDecimal((String) data))); + } else { + r.deliver(this.convertNumeric(column, fieldDefn, data)); + } + }); + } + + protected Charset charsetFor(Column column) { + String mySqlCharsetName = column.charsetName(); + if (mySqlCharsetName == null) { + logger.warn("Column is missing a character set: {}", column); + return null; + } + String encoding = MySqlConnection.getJavaEncodingForMysqlCharSet(mySqlCharsetName); + if (encoding == null) { + logger.debug( + "Column uses MySQL character set '{}', which has no mapping to a Java character set, will try it in lowercase", + mySqlCharsetName); + encoding = + MySqlConnection.getJavaEncodingForMysqlCharSet(mySqlCharsetName.toLowerCase()); + } + if (encoding == null) { + logger.warn( + "Column uses MySQL character set '{}', which has no mapping to a Java character set", + mySqlCharsetName); + } else { + try { + return Charset.forName(encoding); + } catch (IllegalCharsetNameException e) { + logger.error( + "Unable to load Java charset '{}' for column with MySQL character set '{}'", + encoding, + mySqlCharsetName); + } + } + return null; + } + + protected boolean matches(String upperCaseTypeName, String upperCaseMatch) { + if (upperCaseTypeName == null) { + return false; + } else { + return upperCaseMatch.equals(upperCaseTypeName) + || upperCaseTypeName.startsWith(upperCaseMatch + "("); + } + } + + protected Object convertDurationToMicroseconds(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0L, + (r) -> { + try { + if (data instanceof Duration) { + r.deliver(((Duration) data).toNanos() / 1000L); + } + } catch (IllegalArgumentException var3) { + } + }); + } + + protected Object convertUnsignedInt(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0L, + (r) -> { + if (data instanceof Long) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedInteger((Long) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedInteger( + ((Number) data).longValue())); + } else { + r.deliver(this.convertBigInt(column, fieldDefn, data)); + } + }); + } + + protected Object convertUnsignedMediumint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0, + (r) -> { + if (data instanceof Integer) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedMediumint( + (Integer) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedMediumint( + ((Number) data).intValue())); + } else { + r.deliver(this.convertInteger(column, fieldDefn, data)); + } + }); + } + + protected Object convertUnsignedSmallint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0, + (r) -> { + if (data instanceof Integer) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedSmallint( + (Integer) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedSmallint( + ((Number) data).intValue())); + } else { + r.deliver(this.convertInteger(column, fieldDefn, data)); + } + }); + } + + protected Object convertUnsignedTinyint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + Short.valueOf((short) 0), + (r) -> { + if (data instanceof Short) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedTinyint((Short) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedTinyint( + ((Number) data).shortValue())); + } else { + r.deliver(this.convertSmallInt(column, fieldDefn, data)); + } + }); + } + + protected Object convertGeometry(Column column, Field fieldDefn, Object data) { + MySqlGeometry empty = MySqlGeometry.createEmpty(); + return this.convertValue( + column, + fieldDefn, + data, + Geometry.createValue(fieldDefn.schema(), empty.getWkb(), empty.getSrid()), + (r) -> { + if (data instanceof byte[] && data instanceof byte[]) { + MySqlGeometry mySqlGeometry = MySqlGeometry.fromBytes((byte[]) data); + r.deliver( + Geometry.createValue( + fieldDefn.schema(), + mySqlGeometry.getWkb(), + mySqlGeometry.getSrid())); + } + }); + } + + protected boolean isGeometryCollection(String upperCaseTypeName) { + if (upperCaseTypeName == null) { + return false; + } else { + return upperCaseTypeName.equals("GEOMETRYCOLLECTION") + || upperCaseTypeName.equals("GEOMCOLLECTION") + || upperCaseTypeName.endsWith(".GEOMCOLLECTION"); + } + } + + protected String extractEnumAndSetOptionsAsString(Column column) { + return Strings.join(",", this.extractEnumAndSetOptions(column)); + } + + protected List extractEnumAndSetOptions(Column column) { + return MySqlAntlrDdlParser.extractEnumAndSetOptions(column.enumValues()); + } + + public static void defaultParsingErrorHandler(String message, Exception exception) { + throw new DebeziumException(message, exception); + } + + protected Object convertString( + Column column, Field fieldDefn, Charset columnCharset, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "", + (r) -> { + if (data instanceof byte[]) { + r.deliver(new String((byte[]) data, columnCharset)); + } else if (data instanceof String) { + r.deliver(data); + } + }); + } + + protected Object convertTimestampToLocalDateTime(Column column, Field fieldDefn, Object data) { + if (data == null && !fieldDefn.schema().isOptional()) { + return null; + } else { + return !(data instanceof Timestamp) ? data : ((Timestamp) data).toLocalDateTime(); + } + } + + @Override + protected Object convertTime(Column column, Field fieldDefn, Object data) { + if (data instanceof String) { + data = Strings.asDuration((String) data); + } + return super.convertTime(column, fieldDefn, data); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java new file mode 100644 index 00000000000..38a1bc4e3eb --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.data.Envelope; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.relational.RelationalChangeRecordEmitter; +import io.debezium.util.Clock; + +import java.io.Serializable; + +public class EventEmitter extends RelationalChangeRecordEmitter { + private final Envelope.Operation operation; + private final Object[] before; + private final Object[] after; + + public EventEmitter( + TiDBPartition partition, + OffsetContext offsetContext, + Clock clock, + Envelope.Operation operation, + Serializable[] before, + Serializable[] after) { + super(partition, offsetContext, clock); + this.operation = operation; + this.before = before; + this.after = after; + } + + @Override + protected Object[] getOldColumnValues() { + return before; + } + + @Override + protected Object[] getNewColumnValues() { + return after; + } + + @Override + public Envelope.Operation getOperation() { + return operation; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java new file mode 100644 index 00000000000..43a87707408 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.pipeline.ErrorHandler; +import io.debezium.pipeline.source.spi.StreamingChangeEventSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +public class EventSourceReader + implements StreamingChangeEventSource { + private static final Logger LOG = LoggerFactory.getLogger(EventSourceReader.class); + + public EventSourceReader( + TiDBConnectorConfig dbzConnectorConfig, + JdbcSourceEventDispatcher eventDispatcher, + ErrorHandler errorHandler, + TiDBSourceFetchTaskContext taskContext, + StreamSplit split) {} + + @Override + public void init() throws InterruptedException { + StreamingChangeEventSource.super.init(); + } + + @Override + public void execute( + ChangeEventSourceContext context, + TiDBPartition partition, + EventOffsetContext offsetContext) + throws InterruptedException {} + + @Override + public boolean executeIteration( + ChangeEventSourceContext context, + TiDBPartition partition, + EventOffsetContext offsetContext) + throws InterruptedException { + return StreamingChangeEventSource.super.executeIteration(context, partition, offsetContext); + } + + @Override + public void commitOffset(Map offset) { + StreamingChangeEventSource.super.commitOffset(offset); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupMode.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java similarity index 65% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupMode.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java index 07a4284212b..7d5b251fa8b 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupMode.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java @@ -15,14 +15,21 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table; +package org.apache.flink.cdc.connectors.tidb.source.fetch; -/** - * Startup modes for the TiDB CDC Consumer. - * - * @see StartupOptions - */ -public enum StartupMode { - INITIAL, - LATEST_OFFSET, +import io.debezium.pipeline.source.spi.ChangeEventSource; + +public class StoppableChangeEventSourceContext + implements ChangeEventSource.ChangeEventSourceContext { + + private volatile boolean isRunning = true; + + public void stopChangeEventSource() { + isRunning = false; + } + + @Override + public boolean isRunning() { + return isRunning; + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java new file mode 100644 index 00000000000..c55a1ee20c2 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; +import org.apache.flink.cdc.connectors.base.source.meta.split.SnapshotSplit; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkKind; +import org.apache.flink.cdc.connectors.base.source.reader.external.AbstractScanFetchTask; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.pipeline.EventDispatcher; +import io.debezium.pipeline.source.AbstractSnapshotChangeEventSource; +import io.debezium.pipeline.source.spi.ChangeEventSource; +import io.debezium.pipeline.source.spi.SnapshotProgressListener; +import io.debezium.pipeline.spi.ChangeRecordEmitter; +import io.debezium.pipeline.spi.SnapshotResult; +import io.debezium.relational.RelationalSnapshotChangeEventSource; +import io.debezium.relational.SnapshotChangeRecordEmitter; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.util.Clock; +import io.debezium.util.ColumnUtils; +import io.debezium.util.Strings; +import io.debezium.util.Threads; +import org.apache.kafka.connect.errors.ConnectException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.time.Duration; + +public class TiDBScanFetchTask extends AbstractScanFetchTask { + private static final Logger LOG = LoggerFactory.getLogger(TiDBScanFetchTask.class); + + public TiDBScanFetchTask(SnapshotSplit split) { + super(split); + } + + @Override + protected void executeBackfillTask(Context context, StreamSplit backfillStreamSplit) + throws Exception { + + // just for test + TiDBSourceFetchTaskContext ctx = (TiDBSourceFetchTaskContext) context; + final EventOffset currentOffset = + EventOffset.of( + ((TiDBSourceFetchTaskContext) context).getOffsetContext().getOffset()); + JdbcSourceEventDispatcher dispatcher = ctx.getEventDispatcher(); + dispatcher.dispatchWatermarkEvent( + ctx.getPartition().getSourcePartition(), + backfillStreamSplit, + currentOffset, + WatermarkKind.END); + } + + /** 创建并执行一个 TiDBSnapshotSplitReadTask */ + @Override + protected void executeDataSnapshot(Context context) throws Exception { + TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; + TiDBSnapshotSplitReadTask tiDBSnapshotSplitReadTask = + new TiDBSnapshotSplitReadTask( + sourceFetchContext.getDbzConnectorConfig(), + sourceFetchContext.getOffsetContext(), + sourceFetchContext.getSnapshotChangeEventSourceMetrics(), + sourceFetchContext.getConnection(), + sourceFetchContext.getDatabaseSchema(), + sourceFetchContext.getEventDispatcher(), + snapshotSplit); + TiDBSnapshotSplitChangeEventSourceContext tiDBSnapshotSplitChangeEventSourceContext = + new TiDBSnapshotSplitChangeEventSourceContext(); + SnapshotResult snapshotResult = + tiDBSnapshotSplitReadTask.execute( + tiDBSnapshotSplitChangeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); + + if (!snapshotResult.isCompletedOrSkipped()) { + taskRunning = false; + throw new IllegalStateException( + String.format("Read snapshot for tidb split %s fail", snapshotResult)); + } + } + + /** A wrapped task to fetch snapshot split of table. 负责从TiDB读取快照分片 */ + public static class TiDBSnapshotSplitReadTask + extends AbstractSnapshotChangeEventSource { + + private static final Logger LOG = LoggerFactory.getLogger(TiDBSnapshotSplitReadTask.class); + private static final Duration LOG_INTERVAL = Duration.ofMillis(10_000); + private final TiDBConnectorConfig connectorConfig; + private final TiDBDatabaseSchema databaseSchema; + + private final TiDBConnection jdbcConnection; + + private final JdbcSourceEventDispatcher dispatcher; + private final Clock clock; + + private final SnapshotSplit snapshotSplit; + + private final EventOffsetContext offsetContext; + private final SnapshotProgressListener snapshotProgressListener; + + public TiDBSnapshotSplitReadTask( + TiDBConnectorConfig connectorConfig, + EventOffsetContext previousOffset, + SnapshotProgressListener snapshotProgressListener, + TiDBConnection jdbcConnection, + TiDBDatabaseSchema databaseSchema, + JdbcSourceEventDispatcher dispatcher, + SnapshotSplit snapshotSplit) { + super(connectorConfig, snapshotProgressListener); + this.connectorConfig = connectorConfig; + this.databaseSchema = databaseSchema; + this.jdbcConnection = jdbcConnection; + this.dispatcher = dispatcher; + this.snapshotSplit = snapshotSplit; + this.offsetContext = previousOffset; + this.snapshotProgressListener = snapshotProgressListener; + this.clock = Clock.SYSTEM; + } + + @Override + public SnapshotResult execute( + ChangeEventSource.ChangeEventSourceContext context, + TiDBPartition partition, + EventOffsetContext previousOffset) + throws InterruptedException { + // todo 返回为null + AbstractSnapshotChangeEventSource.SnapshottingTask snapshottingTask = + getSnapshottingTask(partition, previousOffset); + final TiDBSnapshotContext ctx; + try { + ctx = prepare(partition); + } catch (Exception e) { + LOG.error("Failed to initialize snapshot context.", e); + throw new RuntimeException(e); + } + try { + return doExecute(context, previousOffset, ctx, snapshottingTask); + } catch (InterruptedException e) { + LOG.warn("Snapshot was interrupted before completion"); + throw e; + } catch (Exception e) { + LOG.warn("Snapshot was interrupted before completion"); + throw new RuntimeException(e); + } + } + + private static class TiDBSnapshotContext + extends RelationalSnapshotChangeEventSource.RelationalSnapshotContext< + TiDBPartition, EventOffsetContext> { + + public TiDBSnapshotContext(TiDBPartition partition) throws SQLException { + super(partition, ""); + } + } + + @Override + protected SnapshotResult doExecute( + ChangeEventSourceContext context, + EventOffsetContext previousOffset, + SnapshotContext snapshotContext, + SnapshottingTask snapshottingTask) // 没有调用这个参数 + throws Exception { + final TiDBSnapshotContext ctx = (TiDBSnapshotContext) snapshotContext; + ctx.offset = offsetContext; + createDataEvents(ctx, snapshotSplit.getTableId()); + + return SnapshotResult.completed(ctx.offset); + } + + private void createDataEvents(TiDBSnapshotContext snapshotContext, TableId tableId) + throws Exception { + EventDispatcher.SnapshotReceiver snapshotReceiver = + dispatcher.getSnapshotChangeEventReceiver(); + LOG.debug("Snapshotting table {}", tableId); + createDataEventsForTable( + snapshotContext, snapshotReceiver, databaseSchema.tableFor(tableId)); + snapshotReceiver.completeSnapshot(); + } + + private void createDataEventsForTable( + TiDBSnapshotContext snapshotContext, + EventDispatcher.SnapshotReceiver snapshotReceiver, + Table table) + throws InterruptedException { + + long exportStart = clock.currentTimeInMillis(); + LOG.info( + "Exporting data from split '{}' of table {}", + snapshotSplit.splitId(), + table.id()); + + final String selectSql = + TiDBUtils.buildSplitScanQuery( + snapshotSplit.getTableId(), + snapshotSplit.getSplitKeyType(), + snapshotSplit.getSplitStart() == null, + snapshotSplit.getSplitEnd() == null); + LOG.info( + "For split '{}' of table {} using select statement: '{}'", + snapshotSplit.splitId(), + table.id(), + selectSql); + + try (PreparedStatement selectStatement = + TiDBUtils.readTableSplitDataStatement( + jdbcConnection, + selectSql, + snapshotSplit.getSplitStart() == null, + snapshotSplit.getSplitEnd() == null, + snapshotSplit.getSplitStart(), + snapshotSplit.getSplitEnd(), + snapshotSplit.getSplitKeyType().getFieldCount(), + connectorConfig.getQueryFetchSize()); + ResultSet rs = selectStatement.executeQuery()) { + ColumnUtils.ColumnArray columnArray = ColumnUtils.toArray(rs, table); + long rows = 0; + Threads.Timer logTimer = getTableScanLogTimer(); + + while (rs.next()) { + rows++; + final Object[] row = + jdbcConnection.rowToArray(table, databaseSchema, rs, columnArray); + if (logTimer.expired()) { + long stop = clock.currentTimeInMillis(); + LOG.info( + "Exported {} records for split '{}' after {}", + rows, + snapshotSplit.splitId(), + Strings.duration(stop - exportStart)); + snapshotProgressListener.rowsScanned( + snapshotContext.partition, table.id(), rows); + logTimer = getTableScanLogTimer(); + } + dispatcher.dispatchSnapshotEvent( + snapshotContext.partition, + table.id(), + getChangeRecordEmitter(snapshotContext, table.id(), row), + snapshotReceiver); + } + LOG.info( + "Finished exporting {} records for split '{}', total duration '{}'", + rows, + snapshotSplit.splitId(), + Strings.duration(clock.currentTimeInMillis() - exportStart)); + } catch (SQLException e) { + throw new ConnectException("Snapshotting of table " + table.id() + " failed", e); + } + } + + protected ChangeRecordEmitter getChangeRecordEmitter( + TiDBSnapshotContext snapshotContext, TableId tableId, Object[] row) { + snapshotContext.offset.event(tableId, clock.currentTime()); + return new SnapshotChangeRecordEmitter<>( + snapshotContext.partition, snapshotContext.offset, row, clock); + } + + private Threads.Timer getTableScanLogTimer() { + return Threads.timer(clock, LOG_INTERVAL); + } + + @Override + protected SnapshottingTask getSnapshottingTask( + TiDBPartition partition, EventOffsetContext previousOffset) { + return new SnapshottingTask(false, true); + } + + @Override + protected TiDBSnapshotContext prepare(TiDBPartition partition) throws Exception { + return new TiDBSnapshotContext(partition); + } + } + + public class TiDBSnapshotSplitChangeEventSourceContext + implements ChangeEventSource.ChangeEventSourceContext { + + public void finished() { + taskRunning = false; + } + + @Override + public boolean isRunning() { + return taskRunning; + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java new file mode 100644 index 00000000000..67fc3eccea7 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.WatermarkDispatcher; +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.reader.external.JdbcSourceFetchTaskContext; +import org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.handler.TiDBErrorHandler; +import org.apache.flink.cdc.connectors.tidb.source.handler.TiDBSchemaChangeEventHandler; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetUtils; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.table.types.logical.RowType; + +import io.debezium.connector.base.ChangeEventQueue; +import io.debezium.connector.tidb.TiDBEventMetadataProvider; +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.connector.tidb.TiDBTaskContext; +import io.debezium.pipeline.DataChangeEvent; +import io.debezium.pipeline.ErrorHandler; +import io.debezium.pipeline.metrics.DefaultChangeEventSourceMetricsFactory; +import io.debezium.pipeline.metrics.SnapshotChangeEventSourceMetrics; +import io.debezium.pipeline.metrics.spi.ChangeEventSourceMetricsFactory; +import io.debezium.pipeline.source.spi.EventMetadataProvider; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import io.debezium.schema.TopicSelector; +import org.apache.kafka.connect.source.SourceRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TiDBSourceFetchTaskContext extends JdbcSourceFetchTaskContext { + + private static final Logger LOG = LoggerFactory.getLogger(TiDBSourceFetchTaskContext.class); + + private TiDBTaskContext tidbTaskContext; + + private final TiDBConnection connection; + private TiDBDatabaseSchema tiDBDatabaseSchema; + private EventOffsetContext offsetContext; + private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; + private TopicSelector topicSelector; + private JdbcSourceEventDispatcher dispatcher; + private TiDBPartition tiDBPartition; + private ChangeEventQueue queue; + private ErrorHandler errorHandler; + private EventMetadataProvider metadataProvider; + + public TiDBSourceFetchTaskContext( + JdbcSourceConfig sourceConfig, + JdbcDataSourceDialect dataSourceDialect, + TiDBConnection connection) { + super(sourceConfig, dataSourceDialect); + this.connection = connection; + this.metadataProvider = new TiDBEventMetadataProvider(); + } + + @Override + public void configure(SourceSplitBase sourceSplitBase) { + final TiDBConnectorConfig connectorConfig = getDbzConnectorConfig(); + final boolean tableIdCaseInsensitive = + dataSourceDialect.isDataCollectionIdCaseSensitive(sourceConfig); + TopicSelector topicSelector = + TopicSelector.defaultSelector( + connectorConfig, + (tableId, prefix, delimiter) -> + String.join(delimiter, prefix, tableId.identifier())); + try { + this.tiDBDatabaseSchema = + TiDBUtils.newSchema( + connection, connectorConfig, topicSelector, tableIdCaseInsensitive); + } catch (Exception e) { + throw new RuntimeException("Failed to initialize TiDBSchema", e); + } + + this.tiDBPartition = new TiDBPartition(connectorConfig.getLogicalName()); + this.tidbTaskContext = new TiDBTaskContext(connectorConfig, tiDBDatabaseSchema); + this.offsetContext = + loadStartingOffsetState( + new EventOffsetContext.Loader(connectorConfig), sourceSplitBase); + this.queue = + new ChangeEventQueue.Builder() + .pollInterval(connectorConfig.getPollInterval()) + .maxBatchSize(connectorConfig.getMaxBatchSize()) + .maxQueueSize(connectorConfig.getMaxQueueSize()) + .maxQueueSizeInBytes(connectorConfig.getMaxQueueSizeInBytes()) + .loggingContextSupplier( + () -> + tidbTaskContext.configureLoggingContext( + "tidb-cdc-connector-task")) + // do not buffer any element, we use signal event + // .buffering() + .build(); + this.errorHandler = + new TiDBErrorHandler( + (TiDBConnectorConfig) sourceConfig.getDbzConnectorConfig(), queue); + this.dispatcher = + new JdbcSourceEventDispatcher<>( + connectorConfig, + topicSelector, + tiDBDatabaseSchema, + queue, + connectorConfig.getTableFilters().dataCollectionFilter(), + DataChangeEvent::new, + metadataProvider, + schemaNameAdjuster, + new TiDBSchemaChangeEventHandler()); + + ChangeEventSourceMetricsFactory metricsFactory = + new DefaultChangeEventSourceMetricsFactory<>(); + this.snapshotChangeEventSourceMetrics = + metricsFactory.getSnapshotMetrics(tidbTaskContext, queue, metadataProvider); + } + + public TiDBConnection getConnection() { + return connection; + } + + @Override + public ChangeEventQueue getQueue() { + return queue; + } + + @Override + public Tables.TableFilter getTableFilter() { + return this.sourceConfig.getTableFilters().dataCollectionFilter(); + } + + @Override + public Offset getStreamOffset(SourceRecord record) { + return new EventOffset(record.sourceOffset()); + } + + @Override + public void close() throws Exception { + this.connection.close(); + } + + @Override + public TiDBDatabaseSchema getDatabaseSchema() { + return tiDBDatabaseSchema; + } + + @Override + public boolean isRecordBetween(SourceRecord record, Object[] splitStart, Object[] splitEnd) { + if (this.offsetContext.isSnapshotRunning()) { + RowType splitKeyType = + getSplitType(getDatabaseSchema().tableFor(this.getTableId(record))); + Object[] key = + SourceRecordUtils.getSplitKey(splitKeyType, record, getSchemaNameAdjuster()); + return SourceRecordUtils.splitKeyRangeContains(key, splitStart, splitEnd); + } else { + EventOffset newOffset = new EventOffset(record.sourceOffset()); + return SourceRecordUtils.splitKeyRangeContains( + new EventOffset[] {newOffset}, splitStart, splitEnd); + } + } + + @Override + public RowType getSplitType(Table table) { + return TiDBUtils.getSplitType(table); + } + + @Override + public ErrorHandler getErrorHandler() { + return errorHandler; + } + + @Override + public JdbcSourceEventDispatcher getEventDispatcher() { + return dispatcher; + } + + @Override + public WatermarkDispatcher getWaterMarkDispatcher() { + return null; + } + + @Override + public EventOffsetContext getOffsetContext() { + return offsetContext; + } + + @Override + public TiDBPartition getPartition() { + return tiDBPartition; + } + + @Override + public TiDBConnectorConfig getDbzConnectorConfig() { + return (TiDBConnectorConfig) super.getDbzConnectorConfig(); + } + + public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { + return snapshotChangeEventSourceMetrics; + } + + private EventOffsetContext loadStartingOffsetState( + EventOffsetContext.Loader loader, SourceSplitBase sourceSplitBase) { + Offset offset = + sourceSplitBase.isSnapshotSplit() + ? new EventOffsetFactory() + .createInitialOffset() // get an offset for starting snapshot + : sourceSplitBase.asStreamSplit().getStartingOffset(); + + return EventOffsetUtils.getEventOffsetContext(loader, offset); + } + + public TiDBSourceFetchTaskContext getTaskContext() { + return this; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java new file mode 100644 index 00000000000..e46ad7df2e0 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +public class TiDBStreamFetchTask implements FetchTask { + private static final Logger LOG = LoggerFactory.getLogger(TiDBStreamFetchTask.class); + private final StreamSplit split; + private volatile boolean taskRunning = false; + private volatile boolean stopped = false; + + public TiDBStreamFetchTask(StreamSplit split) { + this.split = split; + } + + @Override + public void execute(Context context) throws Exception { + if (stopped) { + LOG.debug( + "StreamFetchTask for split: {} is already stopped and can not be executed", + split); + return; + } else { + LOG.debug("execute StreamFetchTask for split: {}", split); + } + taskRunning = true; + TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; + sourceFetchContext.getOffsetContext().preSnapshotCompletion(); + + EventSourceReader eventSourceReader = + new EventSourceReader( + sourceFetchContext.getDbzConnectorConfig(), + sourceFetchContext.getEventDispatcher(), + sourceFetchContext.getErrorHandler(), + sourceFetchContext.getTaskContext(), + split); + StoppableChangeEventSourceContext changeEventSourceContext = + new StoppableChangeEventSourceContext(); + eventSourceReader.execute( + changeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); + } + + public void commitCurrentOffset(@Nullable Offset offsetToCommit) {} + + @Override + public boolean isRunning() { + return taskRunning; + } + + @Override + public SourceSplitBase getSplit() { + return split; + } + + @Override + public void close() { + LOG.debug("stopping StreamFetchTask for split: {}", split); + // if (CDCEventSource != null) { + // ((StoppableChangeEventSourceContext) + // (CDCEventSource.context)).stopChangeEventSource(); + // } + stopped = false; + taskRunning = false; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java new file mode 100644 index 00000000000..ab8f9d57953 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.handler; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import com.github.shyiko.mysql.binlog.network.ServerException; +import io.debezium.DebeziumException; +import io.debezium.connector.base.ChangeEventQueue; +import io.debezium.connector.mysql.MySqlConnector; +import io.debezium.pipeline.ErrorHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.EOFException; +import java.sql.SQLException; + +public class TiDBErrorHandler extends ErrorHandler { + private static final Logger LOG = LoggerFactory.getLogger(TiDBErrorHandler.class); + private static final String SQL_CODE_TOO_MANY_CONNECTIONS = "08004"; + + public TiDBErrorHandler(TiDBConnectorConfig connectorConfig, ChangeEventQueue queue) { + super(MySqlConnector.class, connectorConfig, queue); + } + + protected boolean isRetriable(Throwable throwable) { + LOG.info("start tidb errorHandler : {}", throwable.getClass()); + if (throwable instanceof SQLException) { + final SQLException sql = (SQLException) throwable; + return SQL_CODE_TOO_MANY_CONNECTIONS.equals(sql.getSQLState()); + } else if (throwable instanceof ServerException) { + final ServerException sql = (ServerException) throwable; + return SQL_CODE_TOO_MANY_CONNECTIONS.equals(sql.getSqlState()); + } else if (throwable instanceof EOFException) { + // Retry with reading binlog error + return throwable.getMessage().contains("Failed to read next byte from position"); + } else if (throwable instanceof DebeziumException && throwable.getCause() != null) { + return isRetriable(throwable.getCause()); + } + return false; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVDeserializationRuntimeConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java similarity index 63% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVDeserializationRuntimeConverter.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java index b688c824854..49cf6f2ad0e 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVDeserializationRuntimeConverter.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java @@ -15,18 +15,19 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table; +package org.apache.flink.cdc.connectors.tidb.source.handler; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.common.types.DataType; +import org.apache.flink.cdc.connectors.base.relational.handler.SchemaChangeEventHandler; -import java.io.Serializable; +import io.debezium.schema.SchemaChangeEvent; -/** - * Runtime converter that converts objects of TiKV into objects of Flink Table & SQL internal data - * structures. - */ -@FunctionalInterface -public interface TiKVDeserializationRuntimeConverter extends Serializable { - Object convert(Object object, TiTableInfo tableInfo, DataType dataType) throws Exception; +import java.util.HashMap; +import java.util.Map; + +public class TiDBSchemaChangeEventHandler implements SchemaChangeEventHandler { + + @Override + public Map parseSource(SchemaChangeEvent event) { + return new HashMap<>(); + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java new file mode 100644 index 00000000000..2650b4d941b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; + +import org.tikv.common.meta.TiTimestamp; + +import javax.annotation.Nonnull; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class EventOffset extends Offset { + public static final String TIMESTAMP_KEY = "timestamp"; + // TimeStamp Oracle from pd + public static final String COMMIT_VERSION_KEY = "commit_version"; + + public static final EventOffset INITIAL_OFFSET = + new EventOffset(Collections.singletonMap(TIMESTAMP_KEY, "0")); + public static final EventOffset NO_STOPPING_OFFSET = new EventOffset(Long.MAX_VALUE); + + public EventOffset(Map offset) { + Map offsetMap = new HashMap<>(); + for (Map.Entry entry : offset.entrySet()) { + offsetMap.put( + entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString()); + } + this.offset = offsetMap; + } + + public EventOffset(@Nonnull String timestamp, String commitVersion) { + Map offsetMap = new HashMap<>(); + offsetMap.put(TIMESTAMP_KEY, timestamp); + if (commitVersion != null) { + offsetMap.put(COMMIT_VERSION_KEY, commitVersion); + } + this.offset = offsetMap; + } + + public EventOffset(long binlogEpochMill) { + Map offsetMap = new HashMap<>(); + offsetMap.put(TIMESTAMP_KEY, String.valueOf(binlogEpochMill)); + offsetMap.put( + COMMIT_VERSION_KEY, + String.valueOf(new TiTimestamp(binlogEpochMill, 0).getVersion())); + this.offset = offsetMap; + } + + public String getTimestamp() { + return offset.get(TIMESTAMP_KEY); + } + + public String getCommitVersion() { + if (offset.get(COMMIT_VERSION_KEY) == null) { + String timestamp = getTimestamp(); + // timestamp to commit version. + return String.valueOf(new TiTimestamp(Long.parseLong(timestamp), 0).getVersion()); + } + return offset.get(COMMIT_VERSION_KEY); + } + + @Override + public int compareTo(@Nonnull Offset o) { + EventOffset that = (EventOffset) o; + + int flag; + flag = compareLong(getTimestamp(), that.getTimestamp()); + if (flag != 0) { + return flag; + } + return compareLong(getCommitVersion(), that.getCommitVersion()); + } + + private int compareLong(String a, String b) { + if (a == null && b == null) { + return 0; + } + if (a == null) { + return -1; + } + if (b == null) { + return 1; + } + return Long.compare(Long.parseLong(a), Long.parseLong(b)); + } + + public static EventOffset of(Map offsetMap) { + Map offsetStrMap = new HashMap<>(); + for (Map.Entry entry : offsetMap.entrySet()) { + offsetStrMap.put( + entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString()); + } + return new EventOffset(offsetStrMap); + } + + public static long getStartTs(Offset offset) { + if (offset.getOffset().get(COMMIT_VERSION_KEY) != null) { + return Long.parseLong(offset.getOffset().get(COMMIT_VERSION_KEY)); + } else { + return new TiTimestamp(Long.parseLong(offset.getOffset().get(TIMESTAMP_KEY)), 0) + .getVersion(); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java new file mode 100644 index 00000000000..8953b8f8985 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import io.debezium.connector.AbstractSourceInfo; +import io.debezium.connector.SnapshotRecord; +import io.debezium.connector.mysql.MySqlReadOnlyIncrementalSnapshotContext; +import io.debezium.pipeline.source.snapshot.incremental.IncrementalSnapshotContext; +import io.debezium.pipeline.source.snapshot.incremental.SignalBasedIncrementalSnapshotContext; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.pipeline.txmetadata.TransactionContext; +import io.debezium.relational.TableId; +import io.debezium.schema.DataCollectionId; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; +import org.tikv.common.meta.TiTimestamp; + +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.COMMIT_VERSION_KEY; +import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.TIMESTAMP_KEY; + +public class EventOffsetContext implements OffsetContext { + private static final String SNAPSHOT_COMPLETED_KEY = "snapshot_completed"; + + private final Schema sourceInfoSchema; + private final TiDBSourceInfo sourceInfo; + private final TransactionContext transactionContext; + private final IncrementalSnapshotContext incrementalSnapshotContext; + private boolean snapshotCompleted; + private String commitVersion; + private String timestamp; + + public EventOffsetContext( + boolean snapshot, + boolean snapshotCompleted, + TransactionContext transactionContext, + IncrementalSnapshotContext incrementalSnapshotContext, + TiDBSourceInfo sourceInfo) { + this.sourceInfo = sourceInfo; + this.sourceInfoSchema = sourceInfo.schema(); + this.snapshotCompleted = snapshotCompleted; + + this.transactionContext = transactionContext; + this.incrementalSnapshotContext = incrementalSnapshotContext; + + if (this.snapshotCompleted) { + postSnapshotCompletion(); + } else { + sourceInfo.setSnapshot(snapshot ? SnapshotRecord.TRUE : SnapshotRecord.FALSE); + } + } + + public static EventOffsetContext initial(TiDBConnectorConfig config) { + return new EventOffsetContext( + false, + false, + new TransactionContext(), + new SignalBasedIncrementalSnapshotContext<>(), + new TiDBSourceInfo(config)); + } + + @Override + public Map getOffset() { + HashMap offset = new HashMap<>(); + if (timestamp != null) { + offset.put(TIMESTAMP_KEY, timestamp); + } + + if (commitVersion != null) { + offset.put(COMMIT_VERSION_KEY, commitVersion); + } + if (sourceInfo.isSnapshot()) { + if (!snapshotCompleted) { + offset.put(AbstractSourceInfo.SNAPSHOT_KEY, true); + } + return offset; + } else { + return incrementalSnapshotContext.store(transactionContext.store(offset)); + } + } + + public void databaseEvent(String database, Instant timestamp) { + sourceInfo.setSourceTime(timestamp); + sourceInfo.databaseEvent(database); + sourceInfo.tableEvent((TableId) null); + } + + public void tableEvent(String database, Set tableIds, Instant timestamp) { + sourceInfo.setSourceTime(timestamp); + sourceInfo.databaseEvent(database); + sourceInfo.tableEvent(tableIds); + } + + @Override + public Schema getSourceInfoSchema() { + return sourceInfoSchema.schema(); + } + + @Override + public Struct getSourceInfo() { + return sourceInfo.struct(); + } + + @Override + public boolean isSnapshotRunning() { + return sourceInfo.isSnapshot() && !snapshotCompleted; + } + + @Override + public void markLastSnapshotRecord() { + sourceInfo.setSnapshot(SnapshotRecord.LAST); + } + + @Override + public void preSnapshotStart() { + sourceInfo.setSnapshot(SnapshotRecord.TRUE); + snapshotCompleted = false; + } + + @Override + public void preSnapshotCompletion() { + snapshotCompleted = true; + } + + @Override + public void postSnapshotCompletion() { + snapshotCompleted = true; + } + + @Override + public void event(DataCollectionId collectionId, Instant timestamp) { + sourceInfo.setSourceTime(timestamp); + sourceInfo.tableEvent((TableId) collectionId); + } + + @Override + public TransactionContext getTransactionContext() { + return transactionContext; + } + + public void setCheckpoint(Instant timestamp, String commitVersion) { + this.timestamp = String.valueOf(timestamp.toEpochMilli()); + if (commitVersion == null) { + commitVersion = + String.valueOf(new TiTimestamp(timestamp.toEpochMilli(), 0).getVersion()); + } + this.commitVersion = commitVersion; + } + + public static class Loader implements OffsetContext.Loader { + + private final TiDBConnectorConfig connectorConfig; + + public Loader(TiDBConnectorConfig connectorConfig) { + this.connectorConfig = connectorConfig; + } + + @SuppressWarnings("unchecked") + @Override + public EventOffsetContext load(Map offset) { + boolean snapshot = + Boolean.TRUE.equals(offset.get(TiDBSourceInfo.SNAPSHOT_KEY)) + || "true".equals(offset.get(TiDBSourceInfo.SNAPSHOT_KEY)); + boolean snapshotCompleted = + Boolean.TRUE.equals(offset.get(SNAPSHOT_COMPLETED_KEY)) + || "true".equals(offset.get(SNAPSHOT_COMPLETED_KEY)); + IncrementalSnapshotContext incrementalSnapshotContext; + if (connectorConfig.isReadOnlyConnection()) { + incrementalSnapshotContext = MySqlReadOnlyIncrementalSnapshotContext.load(offset); + } else { + incrementalSnapshotContext = SignalBasedIncrementalSnapshotContext.load(offset); + } + final EventOffsetContext offsetContext = + new EventOffsetContext( + snapshot, + snapshotCompleted, + TransactionContext.load(offset), + incrementalSnapshotContext, + new TiDBSourceInfo(connectorConfig)); + String timestamp = (String) offset.get(TIMESTAMP_KEY); + offsetContext.setCheckpoint( + timestamp == null + ? Instant.now() + : Instant.ofEpochMilli(Long.parseLong(timestamp)), + (String) offset.get(COMMIT_VERSION_KEY)); + return offsetContext; + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java new file mode 100644 index 00000000000..6ad3d9ebc5c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory; + +import java.util.Map; + +import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.NO_STOPPING_OFFSET; + +public class EventOffsetFactory extends OffsetFactory { + + @Override + public Offset newOffset(Map offset) { + return new EventOffset(offset); + } + + @Override + public Offset newOffset(String filename, Long position) { + throw new UnsupportedOperationException(); + } + + @Override + public Offset newOffset(Long position) { + return new EventOffset(position); + } + + @Override + public Offset createTimestampOffset(long timestampMillis) { + return new EventOffset(timestampMillis); + } + + @Override + public Offset createInitialOffset() { + return EventOffset.INITIAL_OFFSET; + } + + @Override + public Offset createNoStoppingOffset() { + return NO_STOPPING_OFFSET; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java new file mode 100644 index 00000000000..bfe971e26ea --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; + +import io.debezium.pipeline.spi.OffsetContext; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +public class EventOffsetUtils { + public static EventOffsetContext getEventOffsetContext( + OffsetContext.Loader loader, Offset offset) { + Map offsetStrMap = + Objects.requireNonNull(offset, "offset is null for the sourceSplitBase") + .getOffset(); + // all the keys happen to be long type for PostgresOffsetContext.Loader.load + Map offsetMap = new HashMap<>(); + for (String key : offsetStrMap.keySet()) { + String value = offsetStrMap.get(key); + if (value != null) { + offsetMap.put(key, value); + } + } + return (EventOffsetContext) loader.load(offsetMap); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java new file mode 100644 index 00000000000..9f460c8421b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import io.debezium.connector.common.BaseSourceInfo; +import io.debezium.relational.TableId; + +import java.time.Instant; +import java.util.Collections; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +public class TiDBSourceInfo extends BaseSourceInfo { + public static final String COMMIT_VERSION_KEY = "commitVersion"; + private Long commitVersion = -1L; + private Instant sourceTime; + private Set tableIds; + private String databaseName; + + public TiDBSourceInfo(TiDBConnectorConfig config) { + super(config); + this.tableIds = new HashSet<>(); + } + + @Override + protected Instant timestamp() { + return sourceTime; + } + + public void setSourceTime(Instant sourceTime) { + this.sourceTime = sourceTime; + } + + public void databaseEvent(String databaseName) { + this.databaseName = databaseName; + } + + public void tableEvent(Set tableIds) { + this.tableIds = new HashSet<>(tableIds); + } + + public void tableEvent(TableId tableId) { + this.tableIds = Collections.singleton(tableId); + } + + @Override + protected String database() { + if (tableIds == null || tableIds.isEmpty()) { + return databaseName; + } + final TableId tableId = tableIds.iterator().next(); + if (tableId == null) { + return databaseName; + } + return tableId.catalog(); + } + + public Long getCommitVersion() { + return commitVersion; + } + + public void setCommitVersion(long commitVersion) { + this.commitVersion = commitVersion; + } + + public String table() { + return (tableIds == null || tableIds.isEmpty()) + ? null + : tableIds.stream() + .filter(Objects::nonNull) + .map(TableId::table) + .collect(Collectors.joining(",")); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java new file mode 100644 index 00000000000..a493d43bc37 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import io.debezium.connector.SourceInfoStructMaker; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; + +import java.time.Instant; + +public class TiDBSourceInfoStructMaker implements SourceInfoStructMaker { + private final Schema schema; + + public TiDBSourceInfoStructMaker() { + this.schema = + SchemaBuilder.struct() + .field(TiDBSourceInfo.TABLE_NAME_KEY, Schema.STRING_SCHEMA) + .field(TiDBSourceInfo.TIMESTAMP_KEY, Schema.INT64_SCHEMA) + .field(TiDBSourceInfo.DATABASE_NAME_KEY, Schema.OPTIONAL_STRING_SCHEMA) + .field(TiDBSourceInfo.SCHEMA_NAME_KEY, Schema.OPTIONAL_STRING_SCHEMA) + .field(TiDBSourceInfo.COMMIT_VERSION_KEY, Schema.INT64_SCHEMA) + .build(); + } + + @Override + public Schema schema() { + return schema; + } + + @Override + public Struct struct(TiDBSourceInfo sourceInfo) { + Struct source = new Struct(schema); + source.put(TiDBSourceInfo.TABLE_NAME_KEY, sourceInfo.table()); + Instant timestamp = sourceInfo.timestamp(); + long commitVersion = sourceInfo.getCommitVersion(); + source.put(TiDBSourceInfo.TIMESTAMP_KEY, timestamp != null ? timestamp.toEpochMilli() : 0); + // todo timestamp to commit version. + source.put(TiDBSourceInfo.COMMIT_VERSION_KEY, commitVersion); + if (sourceInfo.database() != null) { + source.put(TiDBSourceInfo.DATABASE_NAME_KEY, sourceInfo.database()); + } + return source; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java new file mode 100644 index 00000000000..c3157801b1a --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBDefaultValueConverter; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; + +import io.debezium.connector.tidb.TiDBAntlrDdlParser; +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.relational.RelationalDatabaseSchema; +import io.debezium.relational.RelationalTableFilters; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.relational.TableSchemaBuilder; +import io.debezium.relational.ddl.DdlChanges; +import io.debezium.relational.ddl.DdlParser; +import io.debezium.relational.ddl.DdlParserListener; +import io.debezium.schema.SchemaChangeEvent; +import io.debezium.schema.TopicSelector; +import io.debezium.text.MultipleParsingExceptions; +import io.debezium.text.ParsingException; +import io.debezium.util.Collect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** OceanBase database schema. */ +public class TiDBDatabaseSchema extends RelationalDatabaseSchema { + + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBDatabaseSchema.class); + private final Set ignoredQueryStatements = + Collect.unmodifiableSet("BEGIN", "END", "FLUSH PRIVILEGES"); + private final RelationalTableFilters filters; + private final DdlParser ddlParser; + private final DdlChanges ddlChanges; + + public TiDBDatabaseSchema( + TiDBConnectorConfig config, + TiDBValueConverters tiDBValueConverters, + TopicSelector topicSelector, + boolean tableIdCaseInsensitive) { + super( + config, + topicSelector, + config.getTableFilters().dataCollectionFilter(), + config.getColumnFilter(), + new TableSchemaBuilder( + tiDBValueConverters, + new TiDBDefaultValueConverter(tiDBValueConverters), + config.schemaNameAdjustmentMode().createAdjuster(), + config.customConverterRegistry(), + config.getSourceInfoStructMaker().schema(), + config.getSanitizeFieldNames(), + false), + tableIdCaseInsensitive, + config.getKeyMapper()); + + // todo change + this.ddlParser = + new TiDBAntlrDdlParser( + true, + false, + config.isSchemaCommentsHistoryEnabled(), + tiDBValueConverters, + getTableFilter()); + filters = config.getTableFilters(); + this.ddlChanges = this.ddlParser.getDdlChanges(); + } + + public TiDBDatabaseSchema refresh( + TiDBConnection connection, TiDBConnectorConfig config, boolean printReplicaIdentityInfo) + throws SQLException { + // read all the information from the DB + // connection.readSchema(tables(), null, null, getTableFilter(), null, true); + // LOGGER.info("TiDBDatabaseSchema refresh **********"); + connection.readTiDBSchema(config, this, tables(), null, null, getTableFilter(), null, true); + + // if (printReplicaIdentityInfo) { + // // print out all the replica identity info + // tableIds().forEach(tableId -> printReplicaIdentityInfo(connection, tableId)); + // } + // and then refresh the schemas + refreshSchemas(); + // if (readToastableColumns) { + // tableIds().forEach(tableId -> refreshToastableColumnsMap(connection, tableId)); + // } + return this; + } + + protected void refreshSchemas() { + clearSchemas(); + // Create TableSchema instances for any existing table ... + tableIds().forEach(this::refreshSchema); + } + + @Override + protected void refreshSchema(TableId id) { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("refreshing DB schema for table '{}'", id); + } + Table table = tableFor(id); + buildAndRegisterSchema(table); + } + + public List parseSnapshotDdl( + TiDBPartition partition, + String ddlStatements, + String databaseName, + EventOffsetContext offset, + Instant sourceTime) { + LOGGER.debug("Processing snapshot DDL '{}' for database '{}'", ddlStatements, databaseName); + return parseDdl(partition, ddlStatements, databaseName, offset, sourceTime, true); + } + + private List parseDdl( + TiDBPartition partition, + String ddlStatements, + String databaseName, + EventOffsetContext offset, + Instant sourceTime, + boolean snapshot) { + final List schemaChangeEvents = new ArrayList<>(3); + + if (ignoredQueryStatements.contains(ddlStatements)) { + return schemaChangeEvents; + } + + try { + this.ddlChanges.reset(); + this.ddlParser.setCurrentSchema(databaseName); + this.ddlParser.parse(ddlStatements, tables()); + } catch (ParsingException | MultipleParsingExceptions e) { + throw e; + } + if (!ddlChanges.isEmpty()) { + ddlChanges.getEventsByDatabase( + (String dbName, List events) -> { + final String sanitizedDbName = (dbName == null) ? "" : dbName; + if (acceptableDatabase(dbName)) { + final Set tableIds = new HashSet<>(); + events.forEach( + event -> { + final TableId tableId = getTableId(event); + if (tableId != null) { + tableIds.add(tableId); + } + }); + events.forEach( + event -> { + final TableId tableId = getTableId(event); + offset.tableEvent(dbName, tableIds, sourceTime); + // For SET with multiple parameters + if (event instanceof DdlParserListener.TableCreatedEvent) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType.CREATE, + snapshot); + } else if (event + instanceof + DdlParserListener.TableAlteredEvent + || event + instanceof + DdlParserListener.TableIndexCreatedEvent + || event + instanceof + DdlParserListener.TableIndexDroppedEvent) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType.ALTER, + snapshot); + } else if (event + instanceof DdlParserListener.TableDroppedEvent) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType.DROP, + snapshot); + } else if (event + instanceof DdlParserListener.SetVariableEvent) { + // SET statement with multiple variable emits event for + // each variable. We want to emit only + // one change event + final DdlParserListener.SetVariableEvent varEvent = + (DdlParserListener.SetVariableEvent) event; + if (varEvent.order() == 0) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType + .DATABASE, + snapshot); + } + } else { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType + .DATABASE, + snapshot); + } + }); + } + }); + } else { + offset.databaseEvent(databaseName, sourceTime); + schemaChangeEvents.add( + SchemaChangeEvent.ofDatabase( + partition, offset, databaseName, ddlStatements, snapshot)); + } + return schemaChangeEvents; + } + + private boolean acceptableDatabase(final String databaseName) { + return filters.databaseFilter().test(databaseName) + || databaseName == null + || databaseName.isEmpty(); + } + + private TableId getTableId(DdlParserListener.Event event) { + if (event instanceof DdlParserListener.TableEvent) { + return ((DdlParserListener.TableEvent) event).tableId(); + } else if (event instanceof DdlParserListener.TableIndexEvent) { + return ((DdlParserListener.TableIndexEvent) event).tableId(); + } + return null; + } + + private void emitChangeEvent( + TiDBPartition partition, + EventOffsetContext offset, + List schemaChangeEvents, + final String sanitizedDbName, + DdlParserListener.Event event, + TableId tableId, + SchemaChangeEvent.SchemaChangeEventType type, + boolean snapshot) { + SchemaChangeEvent schemaChangeEvent; + if (type.equals(SchemaChangeEvent.SchemaChangeEventType.ALTER) + && event instanceof DdlParserListener.TableAlteredEvent + && ((DdlParserListener.TableAlteredEvent) event).previousTableId() != null) { + schemaChangeEvent = + SchemaChangeEvent.ofRename( + partition, + offset, + sanitizedDbName, + null, + event.statement(), + tableId != null ? tableFor(tableId) : null, + ((DdlParserListener.TableAlteredEvent) event).previousTableId()); + } else { + schemaChangeEvent = + SchemaChangeEvent.of( + type, + partition, + offset, + sanitizedDbName, + null, + event.statement(), + tableId != null ? tableFor(tableId) : null, + snapshot); + } + schemaChangeEvents.add(schemaChangeEvent); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java new file mode 100644 index 00000000000..98708857095 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; + +import org.apache.commons.lang3.StringUtils; + +public class TiDBFieldDefinition { + private String columnName; + private String columnType; + private boolean nullable; + private boolean key; + private String defaultValue; + private String extra; + private boolean unique; + + public String getColumnName() { + return columnName; + } + + public void setColumnName(String columnName) { + this.columnName = columnName; + } + + public String getColumnType() { + return columnType; + } + + public void setColumnType(String columnType) { + this.columnType = columnType; + } + + public void setNullable(boolean nullable) { + this.nullable = nullable; + } + + public String getDefaultValue() { + return StringUtils.isEmpty(defaultValue) ? "" : "DEFAULT " + defaultValue; + } + + public void setDefaultValue(String defaultValue) { + this.defaultValue = defaultValue; + } + + public boolean isUnsigned() { + return StringUtils.containsIgnoreCase(columnType, "unsigned"); + } + + public boolean isNullable() { + return nullable; + } + + public boolean isKey() { + return key; + } + + public void setKey(boolean key) { + this.key = key; + } + + public String getExtra() { + return extra; + } + + public void setExtra(String extra) { + this.extra = extra; + } + + public boolean isUnique() { + return unique; + } + + public void setUnique(boolean unique) { + this.unique = unique; + } + + public String toDdl() { + return TiDBUtils.quote(columnName) + " " + columnType + " " + (nullable ? "" : "NOT NULL"); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java new file mode 100644 index 00000000000..5e20679918b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.connector.tidb.TidbTopicSelector; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.TableId; +import io.debezium.relational.history.TableChanges.TableChange; +import io.debezium.schema.SchemaChangeEvent; +import io.debezium.schema.TopicSelector; +import org.apache.commons.lang3.StringUtils; + +import java.sql.SQLException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils.getValueConverters; + +public class TiDBSchema { + private static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE "; + private static final String DESC_TABLE = "DESC "; + + private final TiDBConnectorConfig connectorConfig; + private final TiDBDatabaseSchema databaseSchema; + private final SchemasByTableId schemasByTableId; + + public TiDBSchema(TiDBSourceConfig sourceConfig, boolean isTableIdCaseInSensitive) { + this.connectorConfig = sourceConfig.getDbzConnectorConfig(); + this.databaseSchema = createTiDBDatabaseSchema(connectorConfig, isTableIdCaseInSensitive); + this.schemasByTableId = new SchemasByTableId(isTableIdCaseInSensitive); + } + + public TableChange getTableSchema(JdbcConnection jdbc, TableId tableId) { + // read schema from cache first + TableChange schema = schemasByTableId.get(tableId); + if (schema == null) { + schema = readTableSchema(jdbc, tableId); + schemasByTableId.put(tableId, schema); + } + return schema; + } + + public static TiDBDatabaseSchema createTiDBDatabaseSchema( + TiDBConnectorConfig dbzTiDBConfig, boolean isTableIdCaseSensitive) { + TopicSelector topicSelector = TidbTopicSelector.defaultSelector(dbzTiDBConfig); + TiDBValueConverters valueConverters = getValueConverters(dbzTiDBConfig); + return new TiDBDatabaseSchema( + dbzTiDBConfig, valueConverters, topicSelector, isTableIdCaseSensitive); + } + + private TableChange readTableSchema(JdbcConnection jdbc, TableId tableId) { + final Map tableChangeMap = new HashMap<>(); + String showCreateTable = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + final TiDBPartition partition = new TiDBPartition(connectorConfig.getLogicalName()); + buildSchemaByShowCreateTable(partition, jdbc, tableId, tableChangeMap); + if (!tableChangeMap.containsKey(tableId)) { + // fallback to desc table + String descTable = DESC_TABLE + TiDBUtils.quote(tableId); + buildSchemaByDescTable(partition, jdbc, descTable, tableId, tableChangeMap); + if (!tableChangeMap.containsKey(tableId)) { + throw new FlinkRuntimeException( + String.format( + "Can't obtain schema for table %s by running %s and %s ", + tableId, showCreateTable, descTable)); + } + } + return tableChangeMap.get(tableId); + } + + private void buildSchemaByShowCreateTable( + TiDBPartition partition, + JdbcConnection jdbc, + TableId tableId, + Map tableChangeMap) { + final String sql = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + try { + jdbc.query( + sql, + rs -> { + if (rs.next()) { + final String ddl = rs.getString(2); + parseSchemaByDdl(partition, ddl, tableId, tableChangeMap); + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException( + String.format("Failed to read schema for table %s by running %s", tableId, sql), + e); + } + } + + private void buildSchemaByDescTable( + TiDBPartition partition, + JdbcConnection jdbc, + String descTable, + TableId tableId, + Map tableChangeMap) { + List fieldMetas = new ArrayList<>(); + List primaryKeys = new ArrayList<>(); + try { + jdbc.query( + descTable, + rs -> { + while (rs.next()) { + TiDBFieldDefinition meta = new TiDBFieldDefinition(); + meta.setColumnName(rs.getString("Field")); + meta.setColumnType(rs.getString("Type")); + meta.setNullable( + StringUtils.equalsIgnoreCase(rs.getString("Null"), "YES")); + meta.setKey("PRI".equalsIgnoreCase(rs.getString("Key"))); + meta.setUnique("UNI".equalsIgnoreCase(rs.getString("Key"))); + meta.setDefaultValue(rs.getString("Default")); + meta.setExtra(rs.getString("Extra")); + if (meta.isKey()) { + primaryKeys.add(meta.getColumnName()); + } + fieldMetas.add(meta); + } + }); + parseSchemaByDdl( + partition, + new TiDBTableDefinition(tableId, fieldMetas, primaryKeys).toDdl(), + tableId, + tableChangeMap); + } catch (SQLException e) { + throw new FlinkRuntimeException( + String.format( + "Failed to read schema for table %s by running %s", tableId, descTable), + e); + } + } + + private void parseSchemaByDdl( + TiDBPartition partition, + String ddl, + TableId tableId, + Map tableChangeMap) { + final EventOffsetContext offsetContext = EventOffsetContext.initial(connectorConfig); + List schemaChangeEvents = + databaseSchema.parseSnapshotDdl( + partition, ddl, tableId.catalog(), offsetContext, Instant.now()); + for (SchemaChangeEvent schemaChangeEvent : schemaChangeEvents) { + for (TableChange tableChange : schemaChangeEvent.getTableChanges()) { + tableChangeMap.put(tableId, tableChange); + } + } + } + + private static class SchemasByTableId { + + private final boolean tableIdCaseInsensitive; + private final ConcurrentMap values; + + public SchemasByTableId(boolean tableIdCaseInsensitive) { + this.tableIdCaseInsensitive = tableIdCaseInsensitive; + this.values = new ConcurrentHashMap<>(); + } + + public void clear() { + values.clear(); + } + + public TableChange remove(TableId tableId) { + return values.remove(toLowerCaseIfNeeded(tableId)); + } + + public TableChange get(TableId tableId) { + return values.get(toLowerCaseIfNeeded(tableId)); + } + + public TableChange put(TableId tableId, TableChange updated) { + return values.put(toLowerCaseIfNeeded(tableId), updated); + } + + private TableId toLowerCaseIfNeeded(TableId tableId) { + return tableIdCaseInsensitive ? tableId.toLowercase() : tableId; + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java new file mode 100644 index 00000000000..d1eb4ff123b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.CollectionUtil; + +import io.debezium.relational.TableId; + +import java.util.List; +import java.util.stream.Collectors; + +public class TiDBTableDefinition { + TableId tableId; + List fieldDefinitions; + List primaryKeys; + + public TiDBTableDefinition( + TableId tableId, List fieldDefinitions, List primaryKeys) { + this.tableId = tableId; + this.fieldDefinitions = fieldDefinitions; + this.primaryKeys = primaryKeys; + } + + public String toDdl() { + return String.format( + "CREATE TABLE %s (\n\t %s %s );", + TiDBUtils.quote(tableId), fieldDefinitions(), pkDefinition()); + } + + private String fieldDefinitions() { + return fieldDefinitions.stream() + .map(TiDBFieldDefinition::toDdl) + .collect(Collectors.joining(", \n\t")); + } + + private String pkDefinition() { + StringBuilder pkDefinition = new StringBuilder(); + if (!CollectionUtil.isNullOrEmpty(primaryKeys)) { + pkDefinition.append(","); + pkDefinition.append( + String.format( + "PRIMARY KEY ( %s )", + primaryKeys.stream() + .map(TiDBUtils::quote) + .collect(Collectors.joining(",")))); + } + return pkDefinition.toString(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java new file mode 100644 index 00000000000..9358bcac2a1 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.splitter; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.source.assigner.splitter.JdbcSourceChunkSplitter; +import org.apache.flink.cdc.connectors.base.source.assigner.state.ChunkSplitterState; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.table.types.DataType; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.TableId; + +import java.sql.SQLException; + +public class TiDBChunkSplitter extends JdbcSourceChunkSplitter { + + public TiDBChunkSplitter( + JdbcSourceConfig sourceConfig, + JdbcDataSourceDialect dialect, + ChunkSplitterState chunkSplitterState) { + super(sourceConfig, dialect, chunkSplitterState); + } + + @Override + protected Object queryNextChunkMax( + JdbcConnection jdbc, + TableId tableId, + Column splitColumn, + int chunkSize, + Object includedLowerBound) + throws SQLException { + return TiDBUtils.queryNextChunkMax( + jdbc, tableId, splitColumn.name(), chunkSize, includedLowerBound); + } + + @Override + protected Long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) + throws SQLException { + return TiDBUtils.queryApproximateRowCnt(jdbc, tableId); + } + + @Override + protected DataType fromDbzColumn(Column splitColumn) { + return TiDBUtils.fromDbzColumn(splitColumn); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java deleted file mode 100644 index 05f96e1693c..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.cdc.connectors.tidb.TiKVChangeEventDeserializationSchema; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.types.RowKind; -import org.apache.flink.util.Collector; -import org.apache.flink.util.FlinkRuntimeException; - -import org.tikv.common.TiConfiguration; -import org.tikv.common.key.RowKey; -import org.tikv.kvproto.Cdcpb.Event.Row; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.tikv.common.codec.TableCodec.decodeObjects; - -/** - * Deserialization schema from TiKV Change Event to Flink Table/SQL internal data structure {@link - * RowData}. - */ -public class RowDataTiKVChangeEventDeserializationSchema - extends RowDataTiKVEventDeserializationSchemaBase - implements TiKVChangeEventDeserializationSchema { - - private static final long serialVersionUID = 1L; - - /** TypeInformation of the produced {@link RowData}. * */ - private final TypeInformation resultTypeInfo; - - public RowDataTiKVChangeEventDeserializationSchema( - TiConfiguration tiConf, - String database, - String tableName, - TypeInformation resultTypeInfo, - TiKVMetadataConverter[] metadataConverters, - RowType physicalDataType) { - super(tiConf, database, tableName, metadataConverters, physicalDataType); - this.resultTypeInfo = checkNotNull(resultTypeInfo); - } - - @Override - public void deserialize(Row row, Collector out) throws Exception { - if (tableInfo == null) { - tableInfo = fetchTableInfo(); - } - final RowKey rowKey = RowKey.decode(row.getKey().toByteArray()); - final long handle = rowKey.getHandle(); - Object[] tikvValues; - - switch (row.getOpType()) { - case DELETE: - tikvValues = decodeObjects(row.getOldValue().toByteArray(), handle, tableInfo); - RowData rowDataDelete = - (RowData) physicalConverter.convert(tikvValues, tableInfo, null); - rowDataDelete.setRowKind(RowKind.DELETE); - emit(new TiKVMetadataConverter.TiKVRowValue(row), rowDataDelete, out); - break; - case PUT: - try { - tikvValues = - decodeObjects( - row.getValue().toByteArray(), - RowKey.decode(row.getKey().toByteArray()).getHandle(), - tableInfo); - if (row.getOldValue() == null || row.getOldValue().isEmpty()) { - RowData rowDataUpdateBefore = - (RowData) physicalConverter.convert(tikvValues, tableInfo, null); - rowDataUpdateBefore.setRowKind(RowKind.INSERT); - emit(new TiKVMetadataConverter.TiKVRowValue(row), rowDataUpdateBefore, out); - } else { - RowData rowDataUpdateAfter = - (RowData) physicalConverter.convert(tikvValues, tableInfo, null); - rowDataUpdateAfter.setRowKind(RowKind.UPDATE_AFTER); - emit(new TiKVMetadataConverter.TiKVRowValue(row), rowDataUpdateAfter, out); - } - break; - } catch (final RuntimeException e) { - throw new FlinkRuntimeException( - String.format( - "Fail to deserialize row: %s, table: %s", - row, tableInfo.getId()), - e); - } - default: - throw new IllegalArgumentException("Unknown Row Op Type: " + row.getOpType()); - } - } - - @Override - public TypeInformation getProducedType() { - return resultTypeInfo; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java deleted file mode 100644 index 6e9c7dcda61..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.cdc.debezium.utils.TemporalConversions; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.GenericArrayData; -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.types.logical.DecimalType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.util.Collector; -import org.apache.flink.util.FlinkRuntimeException; - -import org.tikv.common.TiConfiguration; -import org.tikv.common.TiSession; -import org.tikv.common.meta.TiColumnInfo; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.kvproto.Kvrpcpb; - -import java.io.Serializable; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.sql.Timestamp; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Base class of deserialization schema from TiKV RowValue (Snapshot or Change Event) to Flink - * Table/SQL internal data structure {@link RowData}. - */ -public class RowDataTiKVEventDeserializationSchemaBase implements Serializable { - private static final long serialVersionUID = 1L; - - /** Whether the deserializer needs to handle metadata columns. */ - private final boolean hasMetadata; - - /** Information of the TiKV table. * */ - protected TiTableInfo tableInfo; - - private final TiConfiguration tiConf; - private final String database; - private final String tableName; - - /** - * A wrapped output collector which is used to append metadata columns after physical columns. - */ - private final TiKVAppendMetadataCollector appendMetadataCollector; - - /** - * Runtime converter that converts Tikv {@link Kvrpcpb.KvPair}s into {@link RowData} consisted - * of physical column values. - */ - protected final TiKVDeserializationRuntimeConverter physicalConverter; - - public RowDataTiKVEventDeserializationSchemaBase( - TiConfiguration tiConf, - String database, - String tableName, - TiKVMetadataConverter[] metadataConverters, - RowType physicalDataType) { - this.tiConf = checkNotNull(tiConf); - this.database = checkNotNull(database); - this.tableName = checkNotNull(tableName); - this.hasMetadata = checkNotNull(metadataConverters).length > 0; - this.appendMetadataCollector = new TiKVAppendMetadataCollector(metadataConverters); - this.physicalConverter = createConverter(checkNotNull(physicalDataType)); - } - - protected TiTableInfo fetchTableInfo() { - try (final TiSession session = TiSession.create(tiConf)) { - return session.getCatalog().getTable(database, tableName); - } catch (final Exception e) { - throw new FlinkRuntimeException(e); - } - } - - public void emit( - TiKVMetadataConverter.TiKVRowValue inRecord, - RowData physicalRow, - Collector collector) { - if (!hasMetadata) { - collector.collect(physicalRow); - return; - } - - appendMetadataCollector.row = inRecord; - appendMetadataCollector.outputCollector = collector; - appendMetadataCollector.collect(physicalRow); - } - - // ------------------------------------------------------------------------------------- - // Runtime Converters - // ------------------------------------------------------------------------------------- - - /** Creates a runtime converter which is null safe. */ - protected static TiKVDeserializationRuntimeConverter createConverter(LogicalType type) { - return wrapIntoNullableConverter(createNotNullConverter(type)); - } - - // -------------------------------------------------------------------------------- - // IMPORTANT! We use anonymous classes instead of lambdas for a reason here. It is - // necessary because the maven shade plugin cannot relocate classes in - // SerializedLambdas (MSHADE-260). - // -------------------------------------------------------------------------------- - - /** Creates a runtime converter which assuming input object is not null. */ - public static TiKVDeserializationRuntimeConverter createNotNullConverter(LogicalType type) { - - // if no matched user defined converter, fallback to the default converter - switch (type.getTypeRoot()) { - case NULL: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo schema, - org.tikv.common.types.DataType dataType) { - return null; - } - }; - case BOOLEAN: - return convertToBoolean(); - case TINYINT: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo schema, - org.tikv.common.types.DataType dataType) { - - return Byte.parseByte(object.toString()); - } - }; - case SMALLINT: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo schema, - org.tikv.common.types.DataType dataType) { - return Short.parseShort(object.toString()); - } - }; - case INTEGER: - case INTERVAL_YEAR_MONTH: - return convertToInt(); - case BIGINT: - case INTERVAL_DAY_TIME: - return convertToLong(); - case DATE: - return convertToDate(); - case TIME_WITHOUT_TIME_ZONE: - return convertToTime(); - case TIMESTAMP_WITHOUT_TIME_ZONE: - return convertToTimestamp(); - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return convertToLocalTimeZoneTimestamp(); - case FLOAT: - return convertToFloat(); - case DOUBLE: - return convertToDouble(); - case CHAR: - case VARCHAR: - return convertToString(); - case BINARY: - case VARBINARY: - return convertToBinary(); - case DECIMAL: - return createDecimalConverter((DecimalType) type); - case ROW: - return createRowConverter((RowType) type); - case ARRAY: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo tableInfo, - org.tikv.common.types.DataType dataType) - throws Exception { - String[] strArray = ((String) object).split(","); - StringData[] stringDataArray = new StringData[strArray.length]; - for (int i = 0; i < strArray.length; i++) { - stringDataArray[i] = StringData.fromString(strArray[i]); - } - return new GenericArrayData(stringDataArray); - } - }; - case MAP: - case MULTISET: - case RAW: - default: - throw new UnsupportedOperationException("Unsupported type: " + type); - } - } - - private static TiKVDeserializationRuntimeConverter convertToBoolean() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Boolean) { - return object; - } else if (object instanceof Long) { - return (Long) object == 1; - } else if (object instanceof Byte) { - return (byte) object == 1; - } else if (object instanceof Short) { - return (short) object == 1; - } else { - return Boolean.parseBoolean(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToInt() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Integer) { - return object; - } else if (object instanceof Long) { - return dataType.isUnsigned() - ? Integer.valueOf(Short.toUnsignedInt(((Long) object).shortValue())) - : ((Long) object).intValue(); - } else { - return Integer.parseInt(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToLong() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Integer) { - return ((Integer) object).longValue(); - } else if (object instanceof Long) { - return object; - } else { - return Long.parseLong(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToDouble() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Float) { - return ((Float) object).doubleValue(); - } else if (object instanceof Double) { - return object; - } else { - return Double.parseDouble(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToFloat() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Float) { - return object; - } else if (object instanceof Double) { - return ((Double) object).floatValue(); - } else { - return Float.parseFloat(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToDate() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - return (int) TemporalConversions.toLocalDate(object).toEpochDay(); - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToTime() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Long) { - return (int) ((Long) object / 1000_000); - } - return TemporalConversions.toLocalTime(object).toSecondOfDay() * 1000; - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToTimestamp() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - - switch (dataType.getType()) { - case TypeTimestamp: - if (object instanceof Timestamp) { - return TimestampData.fromInstant(((Timestamp) object).toInstant()); - } - break; - case TypeDatetime: - if (object instanceof Timestamp) { - return TimestampData.fromLocalDateTime( - ((Timestamp) object).toLocalDateTime()); - } - break; - default: - throw new IllegalArgumentException( - "Unable to convert to TimestampData from unexpected value '" - + object - + "' of type " - + object.getClass().getName()); - } - return object; - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToLocalTimeZoneTimestamp() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Timestamp) { - return TimestampData.fromInstant(((Timestamp) object).toInstant()); - } - throw new IllegalArgumentException( - "Unable to convert to TimestampData from unexpected value '" - + object - + "' of type " - + object.getClass().getName()); - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToString() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof byte[]) { - return StringData.fromBytes((byte[]) object); - } - return StringData.fromString(object.toString()); - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToBinary() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof byte[]) { - return object; - } else if (object instanceof String) { - return ((String) object).getBytes(); - } else if (object instanceof ByteBuffer) { - ByteBuffer byteBuffer = (ByteBuffer) object; - byte[] bytes = new byte[byteBuffer.remaining()]; - byteBuffer.get(bytes); - return bytes; - } else { - throw new UnsupportedOperationException( - "Unsupported BYTES value type: " + object.getClass().getSimpleName()); - } - } - }; - } - - /** Deal with unsigned column's value. */ - public static Object dealUnsignedColumnValue( - org.tikv.common.types.DataType dataType, Object object) { - // For more information about numeric columns with unsigned, please refer link - // https://docs.pingcap.com/tidb/stable/data-type-numeric. - switch (dataType.getType()) { - case TypeTiny: - return (short) Byte.toUnsignedInt(((Long) object).byteValue()); - case TypeShort: - return Short.toUnsignedInt(((Long) object).shortValue()); - case TypeInt24: - return (((Long) object).intValue()) & 0xffffff; - case TypeLong: - return Integer.toUnsignedLong(((Long) object).intValue()); - case TypeLonglong: - return new BigDecimal(Long.toUnsignedString(((Long) object))); - default: - return object; - } - } - - private static TiKVDeserializationRuntimeConverter createDecimalConverter( - DecimalType decimalType) { - final int precision = decimalType.getPrecision(); - final int scale = decimalType.getScale(); - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - BigDecimal bigDecimal; - if (object instanceof String) { - bigDecimal = new BigDecimal((String) object); - } else if (object instanceof Long) { - bigDecimal = new BigDecimal((String) object); - } else if (object instanceof Double) { - bigDecimal = BigDecimal.valueOf((Double) object); - } else if (object instanceof BigDecimal) { - bigDecimal = (BigDecimal) object; - } else { - throw new IllegalArgumentException( - "Unable to convert to decimal from unexpected value '" - + object - + "' of type " - + object.getClass()); - } - return DecimalData.fromBigDecimal(bigDecimal, precision, scale); - } - }; - } - - private static TiKVDeserializationRuntimeConverter createRowConverter(RowType rowType) { - final TiKVDeserializationRuntimeConverter[] fieldConverters = - rowType.getFields().stream() - .map(RowType.RowField::getType) - .map(logicType -> createConverter(logicType)) - .toArray(TiKVDeserializationRuntimeConverter[]::new); - final String[] fieldNames = rowType.getFieldNames().toArray(new String[0]); - - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo tableInfo, org.tikv.common.types.DataType dataType) - throws Exception { - int arity = fieldNames.length; - GenericRowData row = new GenericRowData(arity); - for (int i = 0; i < arity; i++) { - String fieldName = fieldNames[i]; - - TiColumnInfo columnInfo = tableInfo.getColumn(fieldName); - if (columnInfo == null) { - row.setField(i, null); - } else { - int offset = columnInfo.getOffset(); - org.tikv.common.types.DataType type = columnInfo.getType(); - Object convertedField = - convertField( - fieldConverters[i], - tableInfo, - type, - ((Object[]) object)[offset]); - row.setField(i, convertedField); - } - } - return row; - } - }; - } - - private static Object convertField( - TiKVDeserializationRuntimeConverter fieldConverter, - TiTableInfo tableInfo, - org.tikv.common.types.DataType dataType, - Object fieldValue) - throws Exception { - if (fieldValue == null) { - return null; - } else { - if (dataType.isUnsigned()) { - fieldValue = dealUnsignedColumnValue(dataType, fieldValue); - } - return fieldConverter.convert(fieldValue, tableInfo, dataType); - } - } - - private static TiKVDeserializationRuntimeConverter wrapIntoNullableConverter( - TiKVDeserializationRuntimeConverter converter) { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) - throws Exception { - if (object == null) { - return null; - } - return converter.convert(object, schema, dataType); - } - }; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java deleted file mode 100644 index be66a72494b..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.cdc.connectors.tidb.TiKVSnapshotEventDeserializationSchema; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.util.Collector; - -import org.tikv.common.TiConfiguration; -import org.tikv.common.key.RowKey; -import org.tikv.kvproto.Kvrpcpb.KvPair; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.tikv.common.codec.TableCodec.decodeObjects; - -/** - * Deserialization schema from TiKV Snapshot Event to Flink Table/SQL internal data structure {@link - * RowData}. - */ -public class RowDataTiKVSnapshotEventDeserializationSchema - extends RowDataTiKVEventDeserializationSchemaBase - implements TiKVSnapshotEventDeserializationSchema { - - private static final long serialVersionUID = 1L; - - /** TypeInformation of the produced {@link RowData}. * */ - private final TypeInformation resultTypeInfo; - - public RowDataTiKVSnapshotEventDeserializationSchema( - TiConfiguration tiConf, - String database, - String tableName, - TypeInformation resultTypeInfo, - TiKVMetadataConverter[] metadataConverters, - RowType physicalDataType) { - super(tiConf, database, tableName, metadataConverters, physicalDataType); - this.resultTypeInfo = checkNotNull(resultTypeInfo); - } - - @Override - public TypeInformation getProducedType() { - return resultTypeInfo; - } - - @Override - public void deserialize(KvPair record, Collector out) throws Exception { - if (tableInfo == null) { - tableInfo = fetchTableInfo(); - } - Object[] tikvValues = - decodeObjects( - record.getValue().toByteArray(), - RowKey.decode(record.getKey().toByteArray()).getHandle(), - tableInfo); - - emit( - new TiKVMetadataConverter.TiKVRowValue(record), - (RowData) physicalConverter.convert(tikvValues, tableInfo, null), - out); - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java deleted file mode 100644 index e1dcfa6345c..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import java.util.Objects; - -/** TiDB CDC Source startup options. */ -public final class StartupOptions { - - public final StartupMode startupMode; - - /** - * Performs an initial snapshot on the monitored database tables upon first startup, and - * continue to read the latest CDC events. - */ - public static StartupOptions initial() { - return new StartupOptions(StartupMode.INITIAL); - } - - /** - * Never to perform snapshot on the monitored database tables upon first startup, just read from - * the latest CDC events which means only have the changes since the connector was started. - */ - public static StartupOptions latest() { - return new StartupOptions(StartupMode.LATEST_OFFSET); - } - - private StartupOptions(StartupMode startupMode) { - this.startupMode = startupMode; - - switch (startupMode) { - case INITIAL: - - case LATEST_OFFSET: - break; - - default: - throw new UnsupportedOperationException(startupMode + " mode is not supported."); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - StartupOptions that = (StartupOptions) o; - return startupMode == that.startupMode; - } - - @Override - public int hashCode() { - return Objects.hash(startupMode); - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java new file mode 100644 index 00000000000..367f3a8f864 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.table; + +import org.apache.flink.cdc.debezium.table.DeserializationRuntimeConverter; +import org.apache.flink.cdc.debezium.table.DeserializationRuntimeConverterFactory; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeFamily; + +import com.esri.core.geometry.ogc.OGCGeometry; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import io.debezium.data.EnumSet; +import io.debezium.data.geometry.Geometry; +import io.debezium.data.geometry.Point; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; + +import java.nio.ByteBuffer; +import java.time.ZoneId; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class TiDBDeserializationConverterFactory { + public static DeserializationRuntimeConverterFactory instance() { + return new DeserializationRuntimeConverterFactory() { + + private static final long serialVersionUID = 1L; + + @Override + public Optional createUserDefinedConverter( + LogicalType logicalType, ZoneId serverTimeZone) { + switch (logicalType.getTypeRoot()) { + case TINYINT: + return createTinyIntConverter(); + case CHAR: + case VARCHAR: + return createStringConverter(); + case ARRAY: + return createArrayConverter((ArrayType) logicalType); + default: + // fallback to default converter + return Optional.empty(); + } + } + }; + } + + private static Optional createStringConverter() { + final ObjectMapper objectMapper = new ObjectMapper(); + final ObjectWriter objectWriter = objectMapper.writer(); + return Optional.of( + new DeserializationRuntimeConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) throws Exception { + // the Geometry datatype in MySQL will be converted to + // a String with Json format + if (Point.LOGICAL_NAME.equals(schema.name()) + || Geometry.LOGICAL_NAME.equals(schema.name())) { + try { + Struct geometryStruct = (Struct) dbzObj; + byte[] wkb = geometryStruct.getBytes("wkb"); + String geoJson = + OGCGeometry.fromBinary(ByteBuffer.wrap(wkb)).asGeoJson(); + JsonNode originGeoNode = objectMapper.readTree(geoJson); + Optional srid = + Optional.ofNullable(geometryStruct.getInt32("srid")); + Map geometryInfo = new HashMap<>(); + String geometryType = originGeoNode.get("type").asText(); + geometryInfo.put("type", geometryType); + if (geometryType.equals("GeometryCollection")) { + geometryInfo.put("geometries", originGeoNode.get("geometries")); + } else { + geometryInfo.put( + "coordinates", originGeoNode.get("coordinates")); + } + geometryInfo.put("srid", srid.orElse(0)); + return StringData.fromString( + objectWriter.writeValueAsString(geometryInfo)); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format( + "Failed to convert %s to geometry JSON.", dbzObj), + e); + } + } else { + return StringData.fromString(dbzObj.toString()); + } + } + }); + } + + private static Optional createArrayConverter( + ArrayType arrayType) { + if (hasFamily(arrayType.getElementType(), LogicalTypeFamily.CHARACTER_STRING)) { + // only map MySQL SET type to Flink ARRAY type + return Optional.of( + new DeserializationRuntimeConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) throws Exception { + if (EnumSet.LOGICAL_NAME.equals(schema.name()) + && dbzObj instanceof String) { + // for SET datatype in mysql, debezium will always + // return a string split by comma like "a,b,c" + String[] enums = ((String) dbzObj).split(","); + StringData[] elements = new StringData[enums.length]; + for (int i = 0; i < enums.length; i++) { + elements[i] = StringData.fromString(enums[i]); + } + return new GenericArrayData(elements); + } else { + throw new IllegalArgumentException( + String.format( + "Unable convert to Flink ARRAY type from unexpected value '%s', " + + "only SET type could be converted to ARRAY type for MySQL", + dbzObj)); + } + } + }); + } else { + // otherwise, fallback to default converter + return Optional.empty(); + } + } + + private static Optional createTinyIntConverter() { + + return Optional.of( + new DeserializationRuntimeConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) throws Exception { + if (dbzObj instanceof Boolean) { + return dbzObj == Boolean.TRUE ? (byte) 1 : (byte) 0; + } else { + return Byte.parseByte(dbzObj.toString()); + } + } + }); + } + + private static boolean hasFamily(LogicalType logicalType, LogicalTypeFamily family) { + return logicalType.getTypeRoot().getFamilies().contains(family); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java new file mode 100644 index 00000000000..1569ea6b155 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.table; + +import org.apache.flink.cdc.debezium.table.MetadataConverter; +import org.apache.flink.cdc.debezium.table.RowDataMetadataConverter; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; + +import io.debezium.connector.AbstractSourceInfo; +import io.debezium.data.Envelope; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; + +/** Defines the supported metadata columns for {@link TiDBTableSource}. */ +public enum TiDBReadableMetadata { + + /** Name of the table that contain the row. */ + TABLE_NAME( + "table_name", + DataTypes.STRING().notNull(), + new MetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + return StringData.fromString( + sourceStruct.getString(AbstractSourceInfo.TABLE_NAME_KEY)); + } + }), + + /** Name of the database that contain the row. */ + DATABASE_NAME( + "database_name", + DataTypes.STRING().notNull(), + new MetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + return StringData.fromString( + sourceStruct.getString(AbstractSourceInfo.DATABASE_NAME_KEY)); + } + }), + + /** + * It indicates the time that the change was made in the database. If the record is read from + * snapshot of the table instead of the binlog, the value is always 0. + */ + OP_TS( + "op_ts", + DataTypes.TIMESTAMP_LTZ(3).notNull(), + new MetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + return TimestampData.fromEpochMillis( + (Long) sourceStruct.get(AbstractSourceInfo.TIMESTAMP_KEY)); + } + }), + + /** + * It indicates the row kind of the changelog. '+I' means INSERT message, '-D' means DELETE + * message, '-U' means UPDATE_BEFORE message and '+U' means UPDATE_AFTER message + */ + ROW_KIND( + "row_kind", + DataTypes.STRING().notNull(), + new RowDataMetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(RowData rowData) { + return StringData.fromString(rowData.getRowKind().shortString()); + } + + @Override + public Object read(SourceRecord record) { + throw new UnsupportedOperationException( + "Please call read(RowData rowData) method instead."); + } + }); + + private final String key; + + private final DataType dataType; + + private final MetadataConverter converter; + + TiDBReadableMetadata(String key, DataType dataType, MetadataConverter converter) { + this.key = key; + this.dataType = dataType; + this.converter = converter; + } + + public String getKey() { + return key; + } + + public DataType getDataType() { + return dataType; + } + + public MetadataConverter getConverter() { + return converter; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java new file mode 100644 index 00000000000..7b6e7ef7f4f --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.table; + +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.cdc.connectors.base.utils.OptionUtils; +import org.apache.flink.cdc.debezium.table.DebeziumOptions; +import org.apache.flink.cdc.debezium.utils.JdbcUrlUtils; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.factories.DynamicTableSourceFactory; +import org.apache.flink.table.factories.FactoryUtil; + +import java.time.Duration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECTION_POOL_SIZE; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECT_MAX_RETRIES; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECT_TIMEOUT; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.DATABASE_NAME; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.HOSTNAME; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.PASSWORD; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.SERVER_TIME_ZONE; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.TABLE_NAME; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.USERNAME; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.CHUNK_META_GROUP_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_INCREMENTAL_SNAPSHOT_ENABLED; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_SNAPSHOT_FETCH_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_STARTUP_MODE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_STARTUP_TIMESTAMP_MILLIS; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HEARTBEAT_INTERVAL; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HOST_MAPPING; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.JDBC_DRIVER; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.PD_ADDRESSES; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TABLE_LIST; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TiDB_PORT; +import static org.apache.flink.cdc.debezium.table.DebeziumOptions.getDebeziumProperties; +import static org.apache.flink.cdc.debezium.utils.ResolvedSchemaUtils.getPhysicalSchema; + +public class TiDBTableFactory implements DynamicTableSourceFactory { + private static final String IDENTIFIER = "tidb-cdc"; + + @Override + public String factoryIdentifier() { + return IDENTIFIER; + } + + @Override + public Set> requiredOptions() { + Set> options = new HashSet<>(); + options.add(HOSTNAME); + options.add(USERNAME); + options.add(PASSWORD); + options.add(PD_ADDRESSES); + options.add(TiDB_PORT); + + return options; + } + + @Override + public Set> optionalOptions() { + Set> options = new HashSet<>(); + options.add(SCAN_STARTUP_MODE); + options.add(SCAN_STARTUP_TIMESTAMP_MILLIS); + + options.add(DATABASE_NAME); + options.add(TABLE_NAME); + options.add(TABLE_LIST); + options.add(CONNECT_TIMEOUT); + options.add(SERVER_TIME_ZONE); + options.add(HOST_MAPPING); + options.add(JDBC_DRIVER); + options.add(HEARTBEAT_INTERVAL); + + // increment snapshot options + options.add(SCAN_INCREMENTAL_SNAPSHOT_ENABLED); + options.add(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE); + options.add(CHUNK_META_GROUP_SIZE); + options.add(CONNECTION_POOL_SIZE); + options.add(CONNECT_MAX_RETRIES); + options.add(SCAN_SNAPSHOT_FETCH_SIZE); + options.add(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN); + options.add(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND); + options.add(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND); + return options; + } + + private static final String SCAN_STARTUP_MODE_VALUE_INITIAL = "initial"; + private static final String SCAN_STARTUP_MODE_VALUE_LATEST_OFFSET = "latest-offset"; + private static final String SCAN_STARTUP_MODE_VALUE_SNAPSHOT = "snapshot"; + private static final String SCAN_STARTUP_MODE_VALUE_TIMESTAMP = "timestamp"; + + private static StartupOptions getStartupOptions(ReadableConfig config) { + String modeString = config.get(SCAN_STARTUP_MODE); + Long startupTimestamp = config.get(SCAN_STARTUP_TIMESTAMP_MILLIS); + switch (modeString.toLowerCase()) { + case SCAN_STARTUP_MODE_VALUE_INITIAL: + return StartupOptions.initial(); + case SCAN_STARTUP_MODE_VALUE_SNAPSHOT: + return StartupOptions.snapshot(); + case SCAN_STARTUP_MODE_VALUE_LATEST_OFFSET: + return StartupOptions.latest(); + case SCAN_STARTUP_MODE_VALUE_TIMESTAMP: + return StartupOptions.timestamp(startupTimestamp); + default: + throw new ValidationException( + String.format( + "Invalid value for option '%s'. Supported values are [%s, %s, %s, %s], but was: %s", + SCAN_STARTUP_MODE.key(), + SCAN_STARTUP_MODE_VALUE_INITIAL, + SCAN_STARTUP_MODE_VALUE_SNAPSHOT, + SCAN_STARTUP_MODE_VALUE_LATEST_OFFSET, + SCAN_STARTUP_MODE_VALUE_TIMESTAMP, + modeString)); + } + } + + @Override + public DynamicTableSource createDynamicTableSource(Context context) { + final FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); + + // 作用 + helper.validateExcept( + JdbcUrlUtils.PROPERTIES_PREFIX, + DebeziumOptions.DEBEZIUM_OPTIONS_PREFIX, + TiKVOptions.TIKV_OPTIONS_PREFIX); + + final ReadableConfig config = helper.getOptions(); + + String hostname = config.get(HOSTNAME); + String username = config.get(USERNAME); + String password = config.get(PASSWORD); + String databaseName = config.get(DATABASE_NAME); + String tableName = config.get(TABLE_NAME); + String tableList = config.get(TABLE_LIST); + + int port = config.get(TiDB_PORT); + String serverTimeZone = config.get(SERVER_TIME_ZONE); + Duration connectTimeout = config.get(CONNECT_TIMEOUT); + String pdAddresses = config.get(PD_ADDRESSES); + String hostMapping = config.get(HOST_MAPPING); + String jdbcDriver = config.get(JDBC_DRIVER); + + // increment snapshot options + boolean enableParallelRead = config.get(SCAN_INCREMENTAL_SNAPSHOT_ENABLED); + int splitSize = config.get(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE); + int splitMetaGroupSize = config.get(CHUNK_META_GROUP_SIZE); + int fetchSize = config.get(SCAN_SNAPSHOT_FETCH_SIZE); + int connectionPoolSize = config.get(CONNECTION_POOL_SIZE); + int connectMaxRetries = config.get(CONNECT_MAX_RETRIES); + String chunkKeyColumn = + config.getOptional(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN).orElse(null); + Map chunkKeyColumns = new HashMap<>(); + if (chunkKeyColumn != null) { + chunkKeyColumns.put(new ObjectPath(databaseName, tableName), chunkKeyColumn); + } + + double distributionFactorUpper = config.get(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND); + double distributionFactorLower = config.get(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND); + + ResolvedSchema physicalSchema = + getPhysicalSchema(context.getCatalogTable().getResolvedSchema()); + + StartupOptions startupOptions = getStartupOptions(config); + + Duration heartbeatInterval = config.get(HEARTBEAT_INTERVAL); + + OptionUtils.printOptions(IDENTIFIER, config.toMap()); + + return new TiDBTableSource( + physicalSchema, + port, + hostname, + databaseName, + tableName, + tableList, + username, + password, + serverTimeZone, + getDebeziumProperties(context.getCatalogTable().getOptions()), + enableParallelRead, + heartbeatInterval, + pdAddresses, + hostMapping, + connectTimeout, + TiKVOptions.getTiKVOptions(context.getCatalogTable().getOptions()), + splitSize, + splitMetaGroupSize, + fetchSize, + connectMaxRetries, + connectionPoolSize, + distributionFactorUpper, + distributionFactorLower, + chunkKeyColumn, + chunkKeyColumns, + jdbcDriver, + startupOptions); + } + + static class TiKVOptions { + private static final String TIKV_OPTIONS_PREFIX = "tikv."; + + public static Map getTiKVOptions(Map properties) { + Map tikvOptions = new HashMap<>(); + + if (hasTiKVOptions(properties)) { + properties.keySet().stream() + .filter(key -> key.startsWith(TIKV_OPTIONS_PREFIX)) + .forEach( + key -> { + final String value = properties.get(key); + tikvOptions.put(key, value); + }); + } + return tikvOptions; + } + + /** + * Decides if the table options contains Debezium client properties that start with prefix + * 'debezium'. + */ + private static boolean hasTiKVOptions(Map options) { + return options.keySet().stream().anyMatch(k -> k.startsWith(TIKV_OPTIONS_PREFIX)); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java index f9310462548..e3874d709c0 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java @@ -18,13 +18,19 @@ package org.apache.flink.cdc.connectors.tidb.table; import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions; -import org.apache.flink.cdc.connectors.tidb.TiDBSource; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.source.TiDBSourceBuilder; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.cdc.debezium.table.MetadataConverter; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.table.catalog.ObjectPath; import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.connector.ChangelogMode; import org.apache.flink.table.connector.source.DynamicTableSource; import org.apache.flink.table.connector.source.ScanTableSource; -import org.apache.flink.table.connector.source.SourceFunctionProvider; +import org.apache.flink.table.connector.source.SourceProvider; import org.apache.flink.table.connector.source.abilities.SupportsReadingMetadata; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.DataType; @@ -35,32 +41,51 @@ import javax.annotation.Nullable; +import java.time.Duration; +import java.time.ZoneId; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Objects; +import java.util.Properties; import java.util.stream.Collectors; import java.util.stream.Stream; import static org.apache.flink.util.Preconditions.checkNotNull; -/** - * A {@link DynamicTableSource} that describes how to create a TiDB change event stream from a - * logical description. - */ public class TiDBTableSource implements ScanTableSource, SupportsReadingMetadata { - private final ResolvedSchema physicalSchema; - private final String database; + + private final StartupOptions startupOptions; + private final String tableList; private final String tableName; + private final Duration connectTimeout; + private final String jdbcDriver; + private final String serverTimeZone; + private final String pdAddresses; - @Nullable private final String hostMapping; - private final StartupOptions startupOptions; - private final Map options; + private final String hostMapping; + + private final int port; + private final String hostName; + private final String database; + private final String username; + private final String password; + private final Duration heartbeatInterval; - // -------------------------------------------------------------------------------------------- - // Mutable attributes - // -------------------------------------------------------------------------------------------- + // incremental snapshot options + private final int splitSize; + private final int splitMetaGroupSize; + private final int fetchSize; + private final int connectionPoolSize; + private final int connectMaxRetries; + private final double distributionFactorUpper; + private final double distributionFactorLower; + private final String chunkKeyColumn; + private final Map chunkKeyColumns; + + private final Properties jdbcProperties; + private final Map options; + private final boolean enableParallelRead; /** Data type that describes the final output of the source. */ protected DataType producedDataType; @@ -70,20 +95,62 @@ public class TiDBTableSource implements ScanTableSource, SupportsReadingMetadata public TiDBTableSource( ResolvedSchema physicalSchema, + int port, + String hostName, String database, String tableName, + String tableList, + String username, + String password, + String serverTimeZone, + Properties jdbcProperties, + boolean enableParallelRead, + Duration heartbeatInterval, String pdAddresses, String hostMapping, - StartupOptions startupOptions, - Map options) { + Duration connectTimeout, + Map options, + int splitSize, + int splitMetaGroupSize, + int fetchSize, + int connectMaxRetries, + int connectionPoolSize, + double distributionFactorUpper, + double distributionFactorLower, + @Nullable String chunkKeyColumn, + @Nullable Map chunkKeyColumns, + String jdbcDriver, + StartupOptions startupOptions) { this.physicalSchema = physicalSchema; this.database = checkNotNull(database); this.tableName = checkNotNull(tableName); this.pdAddresses = checkNotNull(pdAddresses); + this.port = port; + this.username = username; + this.password = password; + this.serverTimeZone = serverTimeZone; + this.jdbcProperties = jdbcProperties; + this.hostName = hostName; + this.options = options; + + // incremental snapshot options + this.enableParallelRead = enableParallelRead; + this.splitSize = splitSize; + this.splitMetaGroupSize = splitMetaGroupSize; + this.fetchSize = fetchSize; + this.connectMaxRetries = connectMaxRetries; + this.connectionPoolSize = connectionPoolSize; + this.distributionFactorUpper = distributionFactorUpper; + this.distributionFactorLower = distributionFactorLower; + this.chunkKeyColumn = chunkKeyColumn; + this.chunkKeyColumns = chunkKeyColumns; + this.heartbeatInterval = heartbeatInterval; + this.jdbcDriver = jdbcDriver; + this.connectTimeout = connectTimeout; + this.tableList = tableList; this.hostMapping = hostMapping; this.startupOptions = startupOptions; this.producedDataType = physicalSchema.toPhysicalRowDataType(); - this.options = options; this.metadataKeys = Collections.emptyList(); } @@ -98,40 +165,59 @@ public ChangelogMode getChangelogMode() { @Override public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) { + // TIDB source builder final TiConfiguration tiConf = - TDBSourceOptions.getTiConfiguration(pdAddresses, hostMapping, options); + TiDBSourceOptions.getTiConfiguration(pdAddresses, hostMapping, options); + RowType physicalDataType = (RowType) physicalSchema.toPhysicalRowDataType().getLogicalType(); + TypeInformation typeInfo = scanContext.createTypeInformation(producedDataType); - TiKVMetadataConverter[] metadataConverters = getMetadataConverters(); + MetadataConverter[] metadataConverters = getMetadataConverters(); - RowDataTiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema = - new RowDataTiKVSnapshotEventDeserializationSchema( - tiConf, - database, - tableName, - typeInfo, - metadataConverters, - physicalDataType); + DebeziumDeserializationSchema deserializer = + RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType(physicalDataType) + .setMetadataConverters(metadataConverters) + .setResultTypeInfo(typeInfo) + .setServerTimeZone( + serverTimeZone == null + ? ZoneId.systemDefault() + : ZoneId.of(serverTimeZone)) + .setUserDefinedConverterFactory( + TiDBDeserializationConverterFactory.instance()) + .build(); - RowDataTiKVChangeEventDeserializationSchema changeEventDeserializationSchema = - new RowDataTiKVChangeEventDeserializationSchema( - tiConf, - database, - tableName, - typeInfo, - metadataConverters, - physicalDataType); - - TiDBSource.Builder builder = - TiDBSource.builder() - .database(database) - .tableName(tableName) + JdbcIncrementalSource parallelSource = + TiDBSourceBuilder.TiDBIncrementalSource.builder() + .hostname(hostName) + .port(port) + .tiConfiguration(tiConf) + .databaseList(database) + .tableList(database + "\\." + tableName) + .username(username) + .password(password) + .serverTimeZone(serverTimeZone.toString()) + .splitSize(splitSize) + .splitMetaGroupSize(splitMetaGroupSize) + .distributionFactorUpper(distributionFactorUpper) + .distributionFactorLower(distributionFactorLower) + .fetchSize(fetchSize) + .connectTimeout(connectTimeout) + .connectionPoolSize(connectionPoolSize) + .chunkKeyColumn(chunkKeyColumn) + .chunkKeyColumns(chunkKeyColumns) + .driverClassName(jdbcDriver) + .connectMaxRetries(connectMaxRetries) + .jdbcProperties(jdbcProperties) .startupOptions(startupOptions) - .tiConf(tiConf) - .snapshotEventDeserializer(snapshotEventDeserializationSchema) - .changeEventDeserializer(changeEventDeserializationSchema); - return SourceFunctionProvider.of(builder.build(), false); + .pdAddresses(pdAddresses) + .hostMapping(hostMapping) + .deserializer(deserializer) + .build(); + // todo JdbcIncrementalSource parallelSource = + // TiDBSourceBuilder.TiDBIncrementalSource.builder() + return SourceProvider.of(parallelSource); } @Override @@ -139,65 +225,36 @@ public DynamicTableSource copy() { TiDBTableSource source = new TiDBTableSource( physicalSchema, + port, + hostName, database, tableName, + tableList, + username, + password, + serverTimeZone, + jdbcProperties, + enableParallelRead, + heartbeatInterval, pdAddresses, hostMapping, - startupOptions, - options); + connectTimeout, + options, + splitSize, + splitMetaGroupSize, + fetchSize, + connectMaxRetries, + connectionPoolSize, + distributionFactorUpper, + distributionFactorLower, + chunkKeyColumn, + chunkKeyColumns, + jdbcDriver, + startupOptions); source.producedDataType = producedDataType; source.metadataKeys = metadataKeys; - return source; - } - - private TiKVMetadataConverter[] getMetadataConverters() { - if (metadataKeys.isEmpty()) { - return new TiKVMetadataConverter[0]; - } - return metadataKeys.stream() - .map( - key -> - Stream.of( - TiKVReadableMetadata.createTiKVReadableMetadata( - database, tableName)) - .filter(m -> m.getKey().equals(key)) - .findFirst() - .orElseThrow(IllegalStateException::new)) - .map(TiKVReadableMetadata::getConverter) - .toArray(TiKVMetadataConverter[]::new); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - TiDBTableSource that = (TiDBTableSource) o; - return Objects.equals(physicalSchema, that.physicalSchema) - && Objects.equals(database, that.database) - && Objects.equals(tableName, that.tableName) - && Objects.equals(pdAddresses, that.pdAddresses) - && Objects.equals(startupOptions, that.startupOptions) - && Objects.equals(options, that.options) - && Objects.equals(producedDataType, that.producedDataType) - && Objects.equals(metadataKeys, that.metadataKeys); - } - - @Override - public int hashCode() { - return Objects.hash( - physicalSchema, - database, - tableName, - pdAddresses, - startupOptions, - options, - producedDataType, - metadataKeys); + return source; } @Override @@ -207,10 +264,10 @@ public String asSummaryString() { @Override public Map listReadableMetadata() { - return Stream.of(TiKVReadableMetadata.createTiKVReadableMetadata(database, tableName)) + return Stream.of(TiDBReadableMetadata.values()) .collect( Collectors.toMap( - TiKVReadableMetadata::getKey, TiKVReadableMetadata::getDataType)); + TiDBReadableMetadata::getKey, TiDBReadableMetadata::getDataType)); } @Override @@ -218,4 +275,21 @@ public void applyReadableMetadata(List metadataKeys, DataType producedDa this.metadataKeys = metadataKeys; this.producedDataType = producedDataType; } + + // TiDBMetadataConverter to MetadataConverter + private MetadataConverter[] getMetadataConverters() { + if (metadataKeys.isEmpty()) { + return new MetadataConverter[0]; + } + + return metadataKeys.stream() + .map( + key -> + Stream.of(TiDBReadableMetadata.values()) + .filter(m -> m.getKey().equals(key)) + .findFirst() + .orElseThrow(IllegalStateException::new)) + .map(TiDBReadableMetadata::getConverter) + .toArray(MetadataConverter[]::new); + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java deleted file mode 100644 index b38175b71b5..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.cdc.connectors.tidb.table.utils.OptionUtils; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.ReadableConfig; -import org.apache.flink.table.api.ValidationException; -import org.apache.flink.table.catalog.ResolvedSchema; -import org.apache.flink.table.connector.source.DynamicTableSource; -import org.apache.flink.table.factories.DynamicTableSourceFactory; -import org.apache.flink.table.factories.FactoryUtil; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.DATABASE_NAME; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.HOST_MAPPING; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.PD_ADDRESSES; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.SCAN_STARTUP_MODE; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TABLE_NAME; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_BATCH_GET_CONCURRENCY; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_BATCH_SCAN_CONCURRENCY; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_GRPC_SCAN_TIMEOUT; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_GRPC_TIMEOUT; -import static org.apache.flink.cdc.debezium.utils.ResolvedSchemaUtils.getPhysicalSchema; - -/** Factory for creating configured instance of {@link TiDBTableSource}. */ -public class TiDBTableSourceFactory implements DynamicTableSourceFactory { - - private static final String IDENTIFIER = "tidb-cdc"; - - @Override - public DynamicTableSource createDynamicTableSource(Context context) { - final FactoryUtil.TableFactoryHelper helper = - FactoryUtil.createTableFactoryHelper(this, context); - - final ReadableConfig config = helper.getOptions(); - String databaseName = config.get(DATABASE_NAME); - String tableName = config.get(TABLE_NAME); - String pdAddresses = config.get(PD_ADDRESSES); - String hostMapping = config.get(HOST_MAPPING); - StartupOptions startupOptions = getStartupOptions(config); - ResolvedSchema physicalSchema = - getPhysicalSchema(context.getCatalogTable().getResolvedSchema()); - - OptionUtils.printOptions(IDENTIFIER, ((Configuration) config).toMap()); - - return new TiDBTableSource( - physicalSchema, - databaseName, - tableName, - pdAddresses, - hostMapping, - startupOptions, - TiKVOptions.getTiKVOptions(context.getCatalogTable().getOptions())); - } - - @Override - public String factoryIdentifier() { - return IDENTIFIER; - } - - @Override - public Set> requiredOptions() { - Set> options = new HashSet<>(); - options.add(DATABASE_NAME); - options.add(TABLE_NAME); - options.add(PD_ADDRESSES); - return options; - } - - @Override - public Set> optionalOptions() { - Set> options = new HashSet<>(); - options.add(SCAN_STARTUP_MODE); - options.add(HOST_MAPPING); - options.add(TIKV_GRPC_TIMEOUT); - options.add(TIKV_GRPC_SCAN_TIMEOUT); - options.add(TIKV_BATCH_GET_CONCURRENCY); - options.add(TIKV_BATCH_SCAN_CONCURRENCY); - return options; - } - - private static final String SCAN_STARTUP_MODE_VALUE_INITIAL = "initial"; - private static final String SCAN_STARTUP_MODE_VALUE_LATEST = "latest-offset"; - - private static StartupOptions getStartupOptions(ReadableConfig config) { - String modeString = config.get(SCAN_STARTUP_MODE); - - switch (modeString.toLowerCase()) { - case SCAN_STARTUP_MODE_VALUE_INITIAL: - return StartupOptions.initial(); - - case SCAN_STARTUP_MODE_VALUE_LATEST: - return StartupOptions.latest(); - - default: - throw new ValidationException( - String.format( - "Invalid value for option '%s'. Supported values are [%s, %s], but was: %s", - SCAN_STARTUP_MODE.key(), - SCAN_STARTUP_MODE_VALUE_INITIAL, - SCAN_STARTUP_MODE_VALUE_LATEST, - modeString)); - } - } - - static class TiKVOptions { - private static final String TIKV_OPTIONS_PREFIX = "tikv."; - - public static Map getTiKVOptions(Map properties) { - Map tikvOptions = new HashMap<>(); - - if (hasTiKVOptions(properties)) { - properties.keySet().stream() - .filter(key -> key.startsWith(TIKV_OPTIONS_PREFIX)) - .forEach( - key -> { - final String value = properties.get(key); - tikvOptions.put(key, value); - }); - } - return tikvOptions; - } - - /** - * Decides if the table options contains Debezium client properties that start with prefix - * 'debezium'. - */ - private static boolean hasTiKVOptions(Map options) { - return options.keySet().stream().anyMatch(k -> k.startsWith(TIKV_OPTIONS_PREFIX)); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java deleted file mode 100644 index 43904f3aaaa..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.utils.JoinedRowData; -import org.apache.flink.util.Collector; - -import java.io.Serializable; - -/** Emits a row with physical fields and metadata fields. */ -public class TiKVAppendMetadataCollector implements Collector, Serializable { - - private static final long serialVersionUID = 1L; - - private final TiKVMetadataConverter[] metadataConverters; - - public transient TiKVMetadataConverter.TiKVRowValue row; - public transient Collector outputCollector; - - public TiKVAppendMetadataCollector(TiKVMetadataConverter[] metadataConverters) { - this.metadataConverters = metadataConverters; - } - - @Override - public void collect(RowData physicalRow) { - GenericRowData metaRow = new GenericRowData(metadataConverters.length); - for (int i = 0; i < metadataConverters.length; i++) { - Object meta = metadataConverters[i].read(row); - metaRow.setField(i, meta); - } - RowData outRow = new JoinedRowData(physicalRow.getRowKind(), physicalRow, metaRow); - outputCollector.collect(outRow); - } - - @Override - public void close() { - // nothing to do - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java deleted file mode 100644 index 891fffb2943..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.cdc.common.annotation.Internal; - -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Kvrpcpb; - -import java.io.Serializable; - -/** A converter converts TiKV Row metadata into Flink internal data structures. */ -@FunctionalInterface -@Internal -public interface TiKVMetadataConverter extends Serializable { - - Object read(TiKVRowValue row); - - /** TiKV Row Value. */ - class TiKVRowValue { - public boolean isSnapshotRecord; - public Kvrpcpb.KvPair kvPair; - public Cdcpb.Event.Row row; - - public TiKVRowValue(Kvrpcpb.KvPair kvPair) { - this.isSnapshotRecord = true; - this.kvPair = kvPair; - } - - public TiKVRowValue(Cdcpb.Event.Row row) { - this.isSnapshotRecord = false; - this.row = row; - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java deleted file mode 100644 index a617347e283..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.table.api.DataTypes; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.types.DataType; - -import java.util.ArrayList; -import java.util.List; - -/** Defines the supported metadata columns for {@link TiDBTableSource}. */ -public class TiKVReadableMetadata { - - private final String key; - - private final DataType dataType; - - private final TiKVMetadataConverter converter; - - TiKVReadableMetadata(String key, DataType dataType, TiKVMetadataConverter converter) { - this.key = key; - this.dataType = dataType; - this.converter = converter; - } - - public String getKey() { - return key; - } - - public DataType getDataType() { - return dataType; - } - - public TiKVMetadataConverter getConverter() { - return converter; - } - - /** Name of the table that contain the row. */ - public static TiKVReadableMetadata createTableNameMetadata(String tableName) { - return new TiKVReadableMetadata( - "table_name", - DataTypes.STRING().notNull(), - new TiKVMetadataConverter() { - private static final long serialVersionUID = 1L; - - @Override - public Object read(TiKVRowValue row) { - return StringData.fromString(tableName); - } - }); - } - - /** Name of the database that contain the row. */ - public static TiKVReadableMetadata createDatabaseNameMetadata(String database) { - return new TiKVReadableMetadata( - "database_name", - DataTypes.STRING().notNull(), - new TiKVMetadataConverter() { - private static final long serialVersionUID = 1L; - - @Override - public Object read(TiKVRowValue row) { - return StringData.fromString(database); - } - }); - } - - /** - * It indicates the time that the change was made in the database. If the record is read from - * snapshot of the table instead of the change stream, the value is always 0. - */ - public static TiKVReadableMetadata createOpTsMetadata() { - return new TiKVReadableMetadata( - "op_ts", - DataTypes.TIMESTAMP_LTZ(3).notNull(), - new TiKVMetadataConverter() { - private static final long serialVersionUID = 1L; - - @Override - public Object read(TiKVRowValue row) { - if (row.isSnapshotRecord) { - // Uses OL as the operation time of snapshot records. - return TimestampData.fromEpochMillis(0L); - } else { - return TimestampData.fromEpochMillis(row.row.getStartTs()); - } - } - }); - } - - public static TiKVReadableMetadata[] createTiKVReadableMetadata( - String database, String tableName) { - List list = new ArrayList<>(); - list.add(createDatabaseNameMetadata(database)); - list.add(createTableNameMetadata(tableName)); - list.add(createOpTsMetadata()); - return list.toArray(new TiKVReadableMetadata[0]); - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java deleted file mode 100644 index a76b787b985..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table.utils; - -import org.apache.flink.util.Preconditions; - -import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableList; - -import org.tikv.common.key.RowKey; -import org.tikv.common.util.KeyRangeUtils; -import org.tikv.kvproto.Coprocessor.KeyRange; - -import java.math.BigInteger; -import java.util.List; - -/** Utils to obtain the keyRange of table. */ -public class TableKeyRangeUtils { - public static KeyRange getTableKeyRange(final long tableId) { - return KeyRangeUtils.makeCoprocRange( - RowKey.createMin(tableId).toByteString(), - RowKey.createBeyondMax(tableId).toByteString()); - } - - public static List getTableKeyRanges(final long tableId, final int num) { - Preconditions.checkArgument(num > 0, "Illegal value of num"); - - if (num == 1) { - return ImmutableList.of(getTableKeyRange(tableId)); - } - - final long delta = - BigInteger.valueOf(Long.MAX_VALUE) - .subtract(BigInteger.valueOf(Long.MIN_VALUE + 1)) - .divide(BigInteger.valueOf(num)) - .longValueExact(); - final ImmutableList.Builder builder = ImmutableList.builder(); - for (int i = 0; i < num; i++) { - final RowKey startKey = - (i == 0) - ? RowKey.createMin(tableId) - : RowKey.toRowKey(tableId, Long.MIN_VALUE + delta * i); - final RowKey endKey = - (i == num - 1) - ? RowKey.createBeyondMax(tableId) - : RowKey.toRowKey(tableId, Long.MIN_VALUE + delta * (i + 1)); - builder.add( - KeyRangeUtils.makeCoprocRange(startKey.toByteString(), endKey.toByteString())); - } - return builder.build(); - } - - public static KeyRange getTableKeyRange(final long tableId, final int num, final int idx) { - Preconditions.checkArgument(idx >= 0 && idx < num, "Illegal value of idx"); - return getTableKeyRanges(tableId, num).get(idx); - } - - public static boolean isRecordKey(final byte[] key) { - return key[9] == '_' && key[10] == 'r'; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java new file mode 100644 index 00000000000..1299d527bed --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.flink.cdc.connectors.tidb.utils; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.RelationalTableFilters; +import io.debezium.relational.TableId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class TableDiscoveryUtils { + private static final Logger LOG = LoggerFactory.getLogger(TableDiscoveryUtils.class); + + public static List listTables( + String database, JdbcConnection jdbc, RelationalTableFilters tableFilters) + throws SQLException { + + Set allTableIds = + jdbc.readTableNames(database, null, null, new String[] {"TABLE"}); + + Set capturedTables = + allTableIds.stream() + .filter(t -> tableFilters.dataCollectionFilter().isIncluded(t)) + .collect(Collectors.toSet()); + LOG.info("listTables include parameters:database:{}", database); + LOG.info( + "TiDB captured tables : {} .", + capturedTables.stream().map(TableId::toString).collect(Collectors.joining(","))); + + return new ArrayList<>(capturedTables); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java new file mode 100644 index 00000000000..29549aa1a72 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.utils; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.connector.mysql.MySqlSystemVariables; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.jdbc.JdbcValueConverters; +import io.debezium.jdbc.TemporalPrecisionMode; + +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; + +public class TiDBConnectionUtils { + + public static boolean isTableIdCaseInsensitive(JdbcConnection connection) { + return !"0" + .equals( + readMySqlSystemVariables(connection) + .get(MySqlSystemVariables.LOWER_CASE_TABLE_NAMES)); + } + + public static Map readMySqlSystemVariables(JdbcConnection connection) { + // Read the system variables from the MySQL instance and get the current database name ... + return querySystemVariables(connection, "SHOW VARIABLES"); + } + + private static Map querySystemVariables( + JdbcConnection connection, String statement) { + final Map variables = new HashMap<>(); + try { + connection.query( + statement, + rs -> { + while (rs.next()) { + String varName = rs.getString(1); + String value = rs.getString(2); + if (varName != null && value != null) { + variables.put(varName, value); + } + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException("Error reading TiDB variables: " + e.getMessage(), e); + } + + return variables; + } + + // MysqlValueConverters + public static TiDBValueConverters getValueConverters(TiDBConnectorConfig dbzTiDBConfig) { + TemporalPrecisionMode timePrecisionMode = dbzTiDBConfig.getTemporalPrecisionMode(); + JdbcValueConverters.DecimalMode decimalMode = dbzTiDBConfig.getDecimalMode(); + String bigIntUnsignedHandlingModeStr = + dbzTiDBConfig.getConfig().getString(dbzTiDBConfig.BIGINT_UNSIGNED_HANDLING_MODE); + TiDBConnectorConfig.BigIntUnsignedHandlingMode bigIntUnsignedHandlingMode = + TiDBConnectorConfig.BigIntUnsignedHandlingMode.parse(bigIntUnsignedHandlingModeStr); + JdbcValueConverters.BigIntUnsignedMode bigIntUnsignedMode = + bigIntUnsignedHandlingMode.asBigIntUnsignedMode(); + + boolean timeAdjusterEnabled = + dbzTiDBConfig.getConfig().getBoolean(dbzTiDBConfig.ENABLE_TIME_ADJUSTER); + + return new TiDBValueConverters( + decimalMode, + timePrecisionMode, + bigIntUnsignedMode, + dbzTiDBConfig.binaryHandlingMode(), + timeAdjusterEnabled ? TiDBValueConverters::adjustTemporal : x -> x, + TiDBValueConverters::defaultParsingErrorHandler); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java new file mode 100644 index 00000000000..4170986df6c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java @@ -0,0 +1,469 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.utils; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; +import org.tikv.common.meta.TiTimestamp; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils.getValueConverters; +import static org.apache.flink.table.api.DataTypes.FIELD; +import static org.apache.flink.table.api.DataTypes.ROW; + +public class TiDBUtils { + private static final String BIT = "BIT"; + private static final String TINYINT = "TINYINT"; + private static final String TINYINT_UNSIGNED = "TINYINT UNSIGNED"; + private static final String SMALLINT = "SMALLINT"; + private static final String SMALLINT_UNSIGNED = "SMALLINT UNSIGNED"; + private static final String MEDIUMINT = "MEDIUMINT"; + private static final String MEDIUMINT_UNSIGNED = "MEDIUMINT UNSIGNED"; + private static final String INT = "INT"; + private static final String INT_UNSIGNED = "INT UNSIGNED"; + private static final String BIGINT = "BIGINT"; + private static final String BIGINT_UNSIGNED = "BIGINT UNSIGNED"; + private static final String FLOAT = "FLOAT"; + private static final String FLOAT_UNSIGNED = "FLOAT UNSIGNED"; + private static final String DOUBLE = "DOUBLE"; + private static final String DOUBLE_UNSIGNED = "DOUBLE UNSIGNED"; + private static final String DECIMAL = "DECIMAL"; + private static final String DECIMAL_UNSIGNED = "DECIMAL UNSIGNED"; + private static final String CHAR = "CHAR"; + private static final String VARCHAR = "VARCHAR"; + private static final String TINYTEXT = "TINYTEXT"; + private static final String MEDIUMTEXT = "MEDIUMTEXT"; + private static final String TEXT = "TEXT"; + private static final String LONGTEXT = "LONGTEXT"; + private static final String DATE = "DATE"; + private static final String TIME = "TIME"; + private static final String DATETIME = "DATETIME"; + private static final String TIMESTAMP = "TIMESTAMP"; + private static final String YEAR = "YEAR"; + private static final String BINARY = "BINARY"; + private static final String VARBINARY = "VARBINARY"; + private static final String TINYBLOB = "TINYBLOB"; + private static final String MEDIUMBLOB = "MEDIUMBLOB"; + private static final String BLOB = "BLOB"; + private static final String LONGBLOB = "LONGBLOB"; + private static final String JSON = "JSON"; + private static final String SET = "SET"; + private static final String ENUM = "ENUM"; + private static final String GEOMETRY = "GEOMETRY"; + private static final String UNKNOWN = "UNKNOWN"; + + public static Object queryNextChunkMax( + JdbcConnection jdbc, + TableId tableId, + String splitColumnName, + int chunkSize, + Object includedLowerBound) + throws SQLException { + String quotedColumn = jdbc.quotedColumnIdString(splitColumnName); + String query = + String.format( + "SELECT MAX(%s) FROM (" + + "SELECT %s FROM %s WHERE %s >= ? ORDER BY %s ASC LIMIT %s" + + ") AS T", + quotedColumn, + quotedColumn, + jdbc.quotedTableIdString(tableId), + quotedColumn, + quotedColumn, + chunkSize); + return jdbc.prepareQueryAndMap( + query, + ps -> ps.setObject(1, includedLowerBound), + rs -> { + if (!rs.next()) { + // this should never happen + throw new SQLException( + String.format( + "No result returned after running query [%s]", query)); + } + return rs.getObject(1); + }); + } + + public static long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) + throws SQLException { + // The statement used to get approximate row count which is less + // accurate than COUNT(*), but is more efficient for large table. + final String useDatabaseStatement = String.format("USE %s;", quote(tableId.catalog())); + final String rowCountQuery = String.format("SHOW TABLE STATUS LIKE '%s';", tableId.table()); + jdbc.execute(useDatabaseStatement); + return jdbc.queryAndMap( + rowCountQuery, + rs -> { + if (!rs.next() || rs.getMetaData().getColumnCount() < 5) { + throw new SQLException( + String.format( + "No result returned after running query [%s]", + rowCountQuery)); + } + return rs.getLong(5); + }); + } + + public static DataType fromDbzColumn(Column column) { + DataType dataType = convertFromColumn(column); + if (column.isOptional()) { + return dataType; + } else { + return dataType.notNull(); + } + } + + private static DataType convertFromColumn(Column column) { + String typeName = column.typeName(); + switch (typeName) { + case TINYINT: + return column.length() == 1 ? DataTypes.BOOLEAN() : DataTypes.TINYINT(); + case TINYINT_UNSIGNED: + case SMALLINT: + return DataTypes.SMALLINT(); + case SMALLINT_UNSIGNED: + case INT: + case MEDIUMINT: + return DataTypes.INT(); + case INT_UNSIGNED: + case MEDIUMINT_UNSIGNED: + case BIGINT: + return DataTypes.BIGINT(); + case BIGINT_UNSIGNED: + return DataTypes.DECIMAL(20, 0); + case FLOAT: + return DataTypes.FLOAT(); + case DOUBLE: + return DataTypes.DOUBLE(); + case DECIMAL: + return DataTypes.DECIMAL(column.length(), column.scale().orElse(0)); + case TIME: + return column.length() >= 0 ? DataTypes.TIME(column.length()) : DataTypes.TIME(); + case DATE: + return DataTypes.DATE(); + case DATETIME: + case TIMESTAMP: + return column.length() >= 0 + ? DataTypes.TIMESTAMP(column.length()) + : DataTypes.TIMESTAMP(); + case CHAR: + return DataTypes.CHAR(column.length()); + case VARCHAR: + return DataTypes.VARCHAR(column.length()); + case TEXT: + return DataTypes.STRING(); + case BINARY: + return DataTypes.BINARY(column.length()); + case VARBINARY: + return DataTypes.VARBINARY(column.length()); + case BLOB: + return DataTypes.BYTES(); + default: + throw new UnsupportedOperationException( + String.format("Don't support MySQL type '%s' yet.", typeName)); + } + } + + public static String quote(String dbOrTableName) { + return "`" + dbOrTableName + "`"; + } + + public static String quote(TableId tableId) { + return tableId.toQuotedString('`'); + } + + public static PreparedStatement readTableSplitDataStatement( + JdbcConnection jdbc, + String sql, + boolean isFirstSplit, + boolean isLastSplit, + Object[] splitStart, + Object[] splitEnd, + int primaryKeyNum, + int fetchSize) { + try { + final PreparedStatement statement = initStatement(jdbc, sql, fetchSize); + if (isFirstSplit && isLastSplit) { + return statement; + } + if (isFirstSplit) { + for (int i = 0; i < primaryKeyNum; i++) { + statement.setObject(i + 1, splitEnd[i]); + statement.setObject(i + 1 + primaryKeyNum, splitEnd[i]); + } + } else if (isLastSplit) { + for (int i = 0; i < primaryKeyNum; i++) { + statement.setObject(i + 1, splitStart[i]); + } + } else { + for (int i = 0; i < primaryKeyNum; i++) { + statement.setObject(i + 1, splitStart[i]); + statement.setObject(i + 1 + primaryKeyNum, splitEnd[i]); + statement.setObject(i + 1 + 2 * primaryKeyNum, splitEnd[i]); + } + } + return statement; + } catch (Exception e) { + throw new RuntimeException("Failed to build the split data read statement.", e); + } + } + + private static PreparedStatement initStatement(JdbcConnection jdbc, String sql, int fetchSize) + throws SQLException { + final Connection connection = jdbc.connection(); + connection.setAutoCommit(false); + final PreparedStatement statement = connection.prepareStatement(sql); + statement.setFetchSize(fetchSize); + return statement; + } + + public static String buildSplitScanQuery( + TableId tableId, RowType pkRowType, boolean isFirstSplit, boolean isLastSplit) { + return buildSplitQuery(tableId, pkRowType, isFirstSplit, isLastSplit, -1, true); + } + + private static String buildSplitQuery( + TableId tableId, + RowType pkRowType, + boolean isFirstSplit, + boolean isLastSplit, + int limitSize, + boolean isScanningData) { + final String condition; + + if (isFirstSplit && isLastSplit) { + condition = null; + } else if (isFirstSplit) { + final StringBuilder sql = new StringBuilder(); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " <= ?"); + if (isScanningData) { + sql.append(" AND NOT ("); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ?"); + sql.append(")"); + } + condition = sql.toString(); + } else if (isLastSplit) { + final StringBuilder sql = new StringBuilder(); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " >= ?"); + condition = sql.toString(); + } else { + final StringBuilder sql = new StringBuilder(); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " >= ?"); + if (isScanningData) { + sql.append(" AND NOT ("); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ?"); + sql.append(")"); + } + sql.append(" AND "); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " <= ?"); + condition = sql.toString(); + } + + if (isScanningData) { + return buildSelectWithRowLimits( + tableId, limitSize, "*", Optional.ofNullable(condition), Optional.empty()); + } else { + final String orderBy = + pkRowType.getFieldNames().stream().collect(Collectors.joining(", ")); + return buildSelectWithBoundaryRowLimits( + tableId, + limitSize, + getPrimaryKeyColumnsProjection(pkRowType), + getMaxPrimaryKeyColumnsProjection(pkRowType), + Optional.ofNullable(condition), + orderBy); + } + } + + private static void addPrimaryKeyColumnsToCondition( + RowType pkRowType, StringBuilder sql, String predicate) { + for (Iterator fieldNamesIt = pkRowType.getFieldNames().iterator(); + fieldNamesIt.hasNext(); ) { + sql.append(fieldNamesIt.next()).append(predicate); + if (fieldNamesIt.hasNext()) { + sql.append(" AND "); + } + } + } + + private static String buildSelectWithBoundaryRowLimits( + TableId tableId, + int limit, + String projection, + String maxColumnProjection, + Optional condition, + String orderBy) { + final StringBuilder sql = new StringBuilder("SELECT "); + sql.append(maxColumnProjection); + sql.append(" FROM ("); + sql.append("SELECT "); + sql.append(projection); + sql.append(" FROM "); + sql.append(quotedTableIdString(tableId)); + if (condition.isPresent()) { + sql.append(" WHERE ").append(condition.get()); + } + sql.append(" ORDER BY ").append(orderBy).append(" LIMIT ").append(limit); + sql.append(") T"); + return sql.toString(); + } + + private static String quotedTableIdString(TableId tableId) { + return tableId.toQuotedString('`'); + } + + private static String buildSelectWithRowLimits( + TableId tableId, + int limit, + String projection, + Optional condition, + Optional orderBy) { + final StringBuilder sql = new StringBuilder("SELECT "); + sql.append(projection).append(" FROM "); + sql.append(quotedTableIdString(tableId)); + if (condition.isPresent()) { + sql.append(" WHERE ").append(condition.get()); + } + if (orderBy.isPresent()) { + sql.append(" ORDER BY ").append(orderBy.get()); + } + if (limit > 0) { + sql.append(" LIMIT ").append(limit); + } + return sql.toString(); + } + + private static String getPrimaryKeyColumnsProjection(RowType pkRowType) { + StringBuilder sql = new StringBuilder(); + for (Iterator fieldNamesIt = pkRowType.getFieldNames().iterator(); + fieldNamesIt.hasNext(); ) { + sql.append(fieldNamesIt.next()); + if (fieldNamesIt.hasNext()) { + sql.append(" , "); + } + } + return sql.toString(); + } + + private static String getMaxPrimaryKeyColumnsProjection(RowType pkRowType) { + StringBuilder sql = new StringBuilder(); + for (Iterator fieldNamesIt = pkRowType.getFieldNames().iterator(); + fieldNamesIt.hasNext(); ) { + sql.append("MAX(" + fieldNamesIt.next() + ")"); + if (fieldNamesIt.hasNext()) { + sql.append(" , "); + } + } + return sql.toString(); + } + + public static EventOffset currentBinlogOffset(JdbcConnection jdbc) { + final String showMasterStmt = "SHOW MASTER STATUS"; + try { + return jdbc.queryAndMap( + showMasterStmt, + rs -> { + if (rs.next()) { + final long eventPosition = rs.getLong(2); + return new EventOffset( + String.valueOf(TiTimestamp.extractPhysical(eventPosition)), + String.valueOf(eventPosition)); + } else { + throw new FlinkRuntimeException( + "Cannot read the binlog filename and position via '" + + showMasterStmt + + "'. Make sure your server is correctly configured"); + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException( + "Cannot read the binlog filename and position via '" + + showMasterStmt + + "'. Make sure your server is correctly configured", + e); + } + } + + public static TiDBDatabaseSchema newSchema( + TiDBConnection connection, + TiDBConnectorConfig config, + TopicSelector topicSelector, + boolean isTableIdCaseSensitive) + throws SQLException { + // Key.KeyMapper customKeysMapper = new CustomeKeyMapper(); + TiDBValueConverters valueConverters = getValueConverters(config); + TiDBDatabaseSchema schema = + new TiDBDatabaseSchema( + config, valueConverters, topicSelector, isTableIdCaseSensitive); + schema.refresh(connection, config, false); + return schema; + } + + public static TiDBDatabaseSchema createTiDBDatabaseSchema( + TiDBConnectorConfig dbzTiDBConfig, + TopicSelector topicSelector, + boolean isTableIdCaseSensitive) { + TiDBValueConverters valueConverters = getValueConverters(dbzTiDBConfig); + TiDBDatabaseSchema tiDBDatabaseSchema = + new TiDBDatabaseSchema( + dbzTiDBConfig, valueConverters, topicSelector, isTableIdCaseSensitive); + return tiDBDatabaseSchema; + } + + public static RowType getSplitType(Table table) { + List primaryKeys = table.primaryKeyColumns(); + if (primaryKeys.isEmpty()) { + throw new ValidationException( + String.format( + "Incremental snapshot for tables requires primary key," + + " but table %s doesn't have primary key.", + table.id())); + } + + // use first field in primary key as the split key + return getSplitType(primaryKeys.get(0)); + } + + public static RowType getSplitType(Column splitColumn) { + return (RowType) + ROW(FIELD(splitColumn.name(), TiDBUtils.fromDbzColumn(splitColumn))) + .getLogicalType(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMapping.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/UriHostMapping.java similarity index 98% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMapping.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/UriHostMapping.java index c1bf04ae8e2..42ccbb087d5 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMapping.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/UriHostMapping.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table.utils; +package org.apache.flink.cdc.connectors.tidb.utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory index 606391e6953..a54a19fce70 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.flink.cdc.connectors.tidb.table.TiDBTableSourceFactory \ No newline at end of file +org.apache.flink.cdc.connectors.tidb.table.TiDBTableFactory \ No newline at end of file diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java index d27be8b7d32..f8a08d8d451 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java @@ -17,29 +17,44 @@ package org.apache.flink.cdc.connectors.tidb.table; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.table.api.DataTypes; import org.apache.flink.table.api.Schema; -import org.apache.flink.table.catalog.CatalogTableAdapter; +import org.apache.flink.table.catalog.CatalogTable; import org.apache.flink.table.catalog.Column; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.catalog.UniqueConstraint; import org.apache.flink.table.connector.source.DynamicTableSource; -import org.apache.flink.table.factories.FactoryUtilAdapter; +import org.apache.flink.table.factories.FactoryUtil; -import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.Test; +import org.junit.Test; +import java.time.Duration; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Properties; + +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECTION_POOL_SIZE; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECT_MAX_RETRIES; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.CHUNK_META_GROUP_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_SNAPSHOT_FETCH_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.CONNECT_TIMEOUT; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HEARTBEAT_INTERVAL; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.JDBC_DRIVER; +import static org.junit.Assert.assertEquals; /** Unit tests for TiDB table source factory. */ -class TiDBTableSourceFactoryTest { +public class TiDBTableSourceFactoryTest { private static final ResolvedSchema SCHEMA = new ResolvedSchema( @@ -66,81 +81,132 @@ class TiDBTableSourceFactoryTest { Collections.emptyList(), UniqueConstraint.primaryKey("pk", Collections.singletonList("id"))); - private static final String MY_HOSTNAME = "tidb0:4000"; + private static final String MY_HOSTNAME = "tidb0"; private static final String MY_DATABASE = "inventory"; + private static final String MY_PORT = "4000"; private static final String MY_TABLE = "products"; + private static final String MY_USERNAME = "root"; + private static final String MY_PASSWORD = ""; private static final String PD_ADDRESS = "pd0:2379"; private static final String HOST_MAPPING = "host1:1;host2:2;host3:3"; private static final Map OPTIONS = new HashMap<>(); + private static final Properties PROPERTIES = new Properties(); @Test - void testCommonProperties() { + public void testCommonProperties() { Map properties = getAllOptions(); // validation for source DynamicTableSource actualSource = createTableSource(properties); + System.out.println(actualSource.asSummaryString()); TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, + Integer.parseInt(MY_PORT), + MY_HOSTNAME, MY_DATABASE, MY_TABLE, + null, + MY_USERNAME, + MY_PASSWORD, + ZoneId.of("UTC").toString(), + PROPERTIES, + false, + HEARTBEAT_INTERVAL.defaultValue(), PD_ADDRESS, HOST_MAPPING, - StartupOptions.latest(), - OPTIONS); - Assertions.assertThat(actualSource).isEqualTo(expectedSource); + CONNECT_TIMEOUT.defaultValue(), + OPTIONS, + SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE.defaultValue(), + CHUNK_META_GROUP_SIZE.defaultValue(), + SCAN_SNAPSHOT_FETCH_SIZE.defaultValue(), + CONNECT_MAX_RETRIES.defaultValue(), + CONNECTION_POOL_SIZE.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND.defaultValue(), + null, + new HashMap<>(), + JDBC_DRIVER.defaultValue(), + StartupOptions.initial()); + assertEquals(expectedSource, actualSource); } @Test - void testOptionalProperties() { + public void testOptionalProperties() { Map properties = getAllOptions(); - properties.put("host-mapping", "host1:1;host2:2;host3:3"); - properties.put("tikv.grpc.timeout_in_ms", "20000"); - properties.put("tikv.grpc.scan_timeout_in_ms", "20000"); - properties.put("tikv.batch_get_concurrency", "4"); - properties.put("tikv.batch_put_concurrency", "4"); - properties.put("tikv.batch_scan_concurrency", "4"); - properties.put("tikv.batch_delete_concurrency", "4"); + properties.put("port", MY_PORT); + properties.put("scan.startup.mode", "initial"); + properties.put("heartbeat.interval.ms", "15213ms"); + // properties.put("server-time-zone", "Asia/Shanghai"); + + Properties dbzProperties = new Properties(); + dbzProperties.put("snapshot.mode", "never"); + dbzProperties.put("offset.flush.interval.ms", "3000"); + dbzProperties.put("tombstones.on.delete", "true"); + dbzProperties.put("test", "test"); - // validation for source DynamicTableSource actualSource = createTableSource(properties); Map options = new HashMap<>(); - options.put("tikv.grpc.timeout_in_ms", "20000"); - options.put("tikv.grpc.scan_timeout_in_ms", "20000"); - options.put("tikv.batch_get_concurrency", "4"); - options.put("tikv.batch_put_concurrency", "4"); - options.put("tikv.batch_scan_concurrency", "4"); - options.put("tikv.batch_delete_concurrency", "4"); + // options.put("tikv.grpc.timeout_in_ms", "20000"); + // options.put("tikv.grpc.scan_timeout_in_ms", "20000"); + // options.put("tikv.batch_get_concurrency", "4"); + // options.put("tikv.batch_put_concurrency", "4"); + // options.put("tikv.batch_scan_concurrency", "4"); + // options.put("tikv.batch_delete_concurrency", "4"); TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, + 4111, + MY_HOSTNAME, MY_DATABASE, MY_TABLE, + null, + MY_USERNAME, + MY_PASSWORD, + ZoneId.of("UTC").toString(), + dbzProperties, + false, + Duration.ofMillis(15213), PD_ADDRESS, HOST_MAPPING, - StartupOptions.latest(), - options); - Assertions.assertThat(actualSource).isEqualTo(expectedSource); + CONNECT_TIMEOUT.defaultValue(), + OPTIONS, + SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE.defaultValue(), + CHUNK_META_GROUP_SIZE.defaultValue(), + SCAN_SNAPSHOT_FETCH_SIZE.defaultValue(), + CONNECT_MAX_RETRIES.defaultValue(), + CONNECTION_POOL_SIZE.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND.defaultValue(), + null, + null, + JDBC_DRIVER.defaultValue(), + StartupOptions.initial()); + assertEquals(expectedSource, actualSource); } private Map getAllOptions() { Map options = new HashMap<>(); options.put("connector", "tidb-cdc"); options.put("hostname", MY_HOSTNAME); + options.put("port", MY_PORT); options.put("database-name", MY_DATABASE); options.put("table-name", MY_TABLE); options.put("pd-addresses", PD_ADDRESS); - options.put("scan.startup.mode", "latest-offset"); + options.put("username", MY_USERNAME); + options.put("password", MY_PASSWORD); + options.put("host-mapping", HOST_MAPPING); + options.put("scan.incremental.snapshot.enabled", String.valueOf(false)); return options; } private static DynamicTableSource createTableSource( ResolvedSchema schema, Map options) { - return FactoryUtilAdapter.createTableSource( + return FactoryUtil.createTableSource( null, ObjectIdentifier.of("default", "default", "t1"), new ResolvedCatalogTable( - CatalogTableAdapter.of( + CatalogTable.of( Schema.newBuilder().fromResolvedSchema(schema).build(), "mock source", new ArrayList<>(), diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java index 304f3fba8e2..bc700f8100d 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java @@ -17,45 +17,45 @@ package org.apache.flink.cdc.connectors.tidb.table.utils; -import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions; +import org.apache.flink.cdc.connectors.tidb.utils.UriHostMapping; -import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.Test; +import org.junit.Test; import org.tikv.common.TiConfiguration; import java.util.HashMap; +import static org.junit.Assert.assertEquals; + /** Unit test for {@link UriHostMapping}. * */ -class UriHostMappingTest { +public class UriHostMappingTest { @Test - void uriHostMappingTest() { + public void uriHostMappingTest() { final TiConfiguration tiConf = - TDBSourceOptions.getTiConfiguration( + TiDBSourceOptions.getTiConfiguration( "http://0.0.0.0:2347", "host1:1;host2:2;host3:3", new HashMap<>()); UriHostMapping uriHostMapping = (UriHostMapping) tiConf.getHostMapping(); - Assertions.assertThat(uriHostMapping.getHostMapping()) - .hasSize(3) - .containsEntry("host1", "1"); + assertEquals(uriHostMapping.getHostMapping().size(), 3); + assertEquals(uriHostMapping.getHostMapping().get("host1"), "1"); } @Test - void uriHostMappingEmpty() { + public void uriHostMappingEmpty() { final TiConfiguration tiConf = - TDBSourceOptions.getTiConfiguration("http://0.0.0.0:2347", "", new HashMap<>()); + TiDBSourceOptions.getTiConfiguration("http://0.0.0.0:2347", "", new HashMap<>()); UriHostMapping uriHostMapping = (UriHostMapping) tiConf.getHostMapping(); - Assertions.assertThat(uriHostMapping.getHostMapping()).isNull(); + assertEquals(uriHostMapping.getHostMapping(), null); } @Test - void uriHostMappingError() { - Assertions.assertThatThrownBy( - () -> - TDBSourceOptions.getTiConfiguration( - "http://0.0.0.0:2347", - "host1=1;host2=2;host3=3", - new HashMap<>())) - .isExactlyInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid host mapping string: host1=1;host2=2;host3=3"); + public void uriHostMappingError() { + try { + final TiConfiguration tiConf = + TiDBSourceOptions.getTiConfiguration( + "http://0.0.0.0:2347", "host1=1;host2=2;host3=3", new HashMap<>()); + } catch (IllegalArgumentException e) { + assertEquals(e.getMessage(), "Invalid host mapping string: host1=1;host2=2;host3=3"); + } } } From 8940620131f87e0f5fb5eef3540e545be344072c Mon Sep 17 00:00:00 2001 From: wulin Date: Tue, 18 Mar 2025 08:52:53 +0800 Subject: [PATCH 2/5] [FLINK-35459] add splitReader & scanFetch test. [FLINK-35459] add stream fetch. [FLINK-35459] add stream fetch,fixed test [FLINK-35459] fixed TiDBTestBase version fixed conflict fixd git fixed checkstyle fixed rat error. fixed ut test --- .../flink-connector-tidb-cdc/pom.xml | 16 +- .../Listeners/TiDBAntlrDdlParserListener.java | 17 + .../connector/tidb/TiDBAntlrDdlParser.java | 17 + .../tidb/TiDBEventMetadataProvider.java | 17 + .../connector/tidb/TiDBPartition.java | 17 + .../connector/tidb/TiDBTaskContext.java | 17 + .../connector/tidb/TidbTopicSelector.java | 17 + .../tidb/metrics/TiDBSourceMetrics.java | 8 +- .../connectors/tidb/source/TiDBDialect.java | 1 + .../tidb/source/TiDBSourceBuilder.java | 2 + .../source/config/TiDBConnectorConfig.java | 7 +- .../tidb/source/config/TiDBSourceConfig.java | 1 + .../config/TiDBSourceConfigFactory.java | 1 + .../tidb/source/config/TiDBSourceOptions.java | 4 +- .../source/connection/TiDBConnection.java | 1 + .../connection/TiDBConnectionPoolFactory.java | 1 + .../converter/TiDBDefaultValueConverter.java | 11 +- .../source/converter/TiDBValueConverters.java | 4 +- .../tidb/source/fetch/EventEmitter.java | 1 + .../tidb/source/fetch/EventSourceReader.java | 391 +++++++++++++++++- .../StoppableChangeEventSourceContext.java | 1 + .../tidb/source/fetch/TiDBScanFetchTask.java | 9 +- .../fetch/TiDBSourceFetchTaskContext.java | 3 +- .../source/fetch/TiDBStreamFetchTask.java | 17 +- .../tidb/source/handler/TiDBErrorHandler.java | 1 + .../handler/TiDBSchemaChangeEventHandler.java | 1 + .../tidb/source/offset/EventOffset.java | 1 + .../source/offset/EventOffsetContext.java | 2 + .../source/offset/EventOffsetFactory.java | 1 + .../tidb/source/offset/EventOffsetUtils.java | 1 + .../tidb/source/offset/TiDBSourceInfo.java | 1 + .../offset/TiDBSourceInfoStructMaker.java | 1 + .../source/schema/TiDBFieldDefinition.java | 1 + .../tidb/source/schema/TiDBSchema.java | 1 + .../source/schema/TiDBTableDefinition.java | 1 + .../source/splitter/TiDBChunkSplitter.java | 1 + .../TiDBDeserializationConverterFactory.java | 1 + .../tidb/table/TiDBTableFactory.java | 7 +- .../tidb/table/TiDBTableSource.java | 76 ++++ .../tidb/utils/TableDiscoveryUtils.java | 2 + .../tidb/utils/TableKeyRangeUtils.java | 75 ++++ .../tidb/utils/TiDBConnectionUtils.java | 1 + .../cdc/connectors/tidb/utils/TiDBUtils.java | 1 + .../cdc/connectors/tidb/TiDBTestBase.java | 29 +- .../tidb/source/TiDBDialectTest.java | 48 +++ .../tidb/source/TiDBSourceExampleTest.java | 138 +++++++ .../source/fetch/TiDBScanFetchTaskTest.java | 348 ++++++++++++++++ .../reader/TiDBStreamSplitReaderTest.java | 170 ++++++++ .../tidb/table/TiDBConnectorITCase.java | 52 ++- .../tidb/table/TiDBConnectorRegionITCase.java | 8 + .../table/TiDBTableSourceFactoryTest.java | 16 +- .../tidb/table/utils/UriHostMappingTest.java | 14 +- .../tidb/testutils/RecordsFormatter.java | 101 +++++ .../src/test/resources/ddl/customer.sql | 51 +++ 54 files changed, 1679 insertions(+), 53 deletions(-) create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml index 957ed190e49..e5f330545c5 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml @@ -164,17 +164,23 @@ limitations under the License. + + io.debezium + debezium-connector-mysql + 1.9.8.Final + compile + org.apache.flink flink-cdc-base - 3.4-SNAPSHOT + ${project.version} compile - io.debezium - debezium-connector-mysql - 1.9.8.Final - compile + org.apache.flink + flink-connector-test-util + ${project.version} + test diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java index 7fca9cd3f13..531370e6c42 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.debezium.connector.tidb.Listeners; import io.debezium.antlr.AntlrDdlParserListener; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java index 00d4d052f8c..6352666b8d2 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.debezium.connector.tidb; import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java index 79b08f8f35e..e1ca467d9fb 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.debezium.connector.tidb; import io.debezium.connector.AbstractSourceInfo; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java index 699168f3f5d..f14e595ac65 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.debezium.connector.tidb; import io.debezium.pipeline.spi.Partition; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java index 4d6175f09a7..70a61a0eebe 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.debezium.connector.tidb; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java index aab12292f2b..5663029c5de 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.debezium.connector.tidb; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java index aff0d124269..2fe98827b5f 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java @@ -24,15 +24,15 @@ import static org.apache.flink.runtime.metrics.MetricNames.CURRENT_FETCH_EVENT_TIME_LAG; import static org.apache.flink.runtime.metrics.MetricNames.SOURCE_IDLE_TIME; -/** A collection class for handling metrics in {@link }. */ +/** A collection class for handling metrics in {@link TiKVRichParallelSourceFunction}. */ public class TiDBSourceMetrics { private final MetricGroup metricGroup; /** - * The last record processing time, which is updated after {@link } fetches a batch of data. - * It's mainly used to report metrics sourceIdleTime for sourceIdleTime = - * System.currentTimeMillis() - processTime. + * The last record processing time, which is updated after {@link + * TiKVRichParallelSourceFunction} fetches a batch of data. It's mainly used to report metrics + * sourceIdleTime for sourceIdleTime = System.currentTimeMillis() - processTime. */ private long processTime = 0L; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java index 62572da20b9..d1598fcc0cb 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java @@ -54,6 +54,7 @@ import java.util.List; import java.util.Map; +/** TiDB data source dialect. */ public class TiDBDialect implements JdbcDataSourceDialect { private static final Logger LOG = LoggerFactory.getLogger(TiDBDialect.class); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java index a1e70c7a68f..93f4ff34b60 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java @@ -33,6 +33,7 @@ import static org.apache.flink.cdc.common.utils.Preconditions.checkNotNull; +/** Builder for {@link JdbcIncrementalSource}. */ public class TiDBSourceBuilder { private final TiDBSourceConfigFactory configFactory = new TiDBSourceConfigFactory(); private EventOffsetFactory offsetFactory; @@ -187,6 +188,7 @@ public TiDBIncrementalSource build() { configFactory, checkNotNull(deserializer), offsetFactory, dialect); } + /** TiDB incremental source. */ public static class TiDBIncrementalSource extends JdbcIncrementalSource { public TiDBIncrementalSource( JdbcSourceConfigFactory configFactory, diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java index 67c010d3168..3bd6e481a68 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java @@ -39,6 +39,7 @@ import java.util.Collections; import java.util.List; +/** TiDB connector configuration. */ public class TiDBConnectorConfig extends RelationalDatabaseConnectorConfig { private static final Logger LOGGER = LoggerFactory.getLogger(TiDBConnectorConfig.class); @@ -85,7 +86,8 @@ public class TiDBConnectorConfig extends RelationalDatabaseConnectorConfig { + "false - delegates the implicit conversion to the database" + "true - (the default) Debezium makes the conversion"); - public static enum BigIntUnsignedHandlingMode implements EnumeratedValue { + /** The set of predefined options for the handling mode configuration property. */ + public enum BigIntUnsignedHandlingMode implements EnumeratedValue { /** * Represent {@code BIGINT UNSIGNED} values as precise {@link BigDecimal} values, which are * represented in change events in a binary form. This is precise but difficult to use. @@ -203,7 +205,8 @@ public boolean isReadOnlyConnection() { return readOnlyConnection; } - public static enum SecureConnectionMode implements EnumeratedValue { + /** Whether to use SSL/TLS to connect to the database. */ + public enum SecureConnectionMode implements EnumeratedValue { /** Establish an unencrypted connection. */ DISABLED("disabled"), diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java index f6b5e9debf9..7268a1d9f19 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java @@ -29,6 +29,7 @@ import java.util.Map; import java.util.Properties; +/** The configuration for TiDB source. */ public class TiDBSourceConfig extends JdbcSourceConfig { private static final long serialVersionUID = 1L; private final String compatibleMode; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java index 12c68c3987e..2e6afbf9496 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.flink.cdc.connectors.tidb.source.config; import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfigFactory; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java index bb36951656b..fb8a026e1b6 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.flink.cdc.connectors.tidb.source.config; import org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions; @@ -28,9 +29,10 @@ import java.util.Map; import java.util.Optional; +/** Options for {@link org.apache.flink.cdc.connectors.tidb.table.TiDBTableSource}. */ public class TiDBSourceOptions extends JdbcSourceOptions { - public static final ConfigOption TiDB_PORT = + public static final ConfigOption TIDB_PORT = ConfigOptions.key("port") .intType() .defaultValue(4000) diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java index ab06e8f7a4e..48e4cd0d29c 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java @@ -54,6 +54,7 @@ import java.util.function.Supplier; import java.util.regex.Pattern; +/** TiDB JDBC connection. */ public class TiDBConnection extends JdbcConnection { private static final Logger LOG = LoggerFactory.getLogger(TiDBConnection.class); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java index 7a826ebaeab..5253ca99650 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java @@ -20,6 +20,7 @@ import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; +/** The factory to create {@link TiDBConnectionPool}. */ public class TiDBConnectionPoolFactory extends JdbcConnectionPoolFactory { private static final String MYSQL_URL_PATTERN = "jdbc:mysql://%s:%s/?useUnicode=true&useSSL=false&useInformationSchema=true&nullCatalogMeansCurrent=false&zeroDateTimeBehavior=convertToNull&characterEncoding=UTF-8&characterSetResults=UTF-8"; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java index 49a4a1aff06..f94f7db63a1 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java @@ -47,6 +47,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +/** TiDBDefaultValueConverter. */ public class TiDBDefaultValueConverter implements DefaultValueConverter { private static final Logger LOGGER = LoggerFactory.getLogger(MySqlDefaultValueConverter.class); @@ -377,7 +378,7 @@ private DateTimeFormatter timestampFormat(int length) { } /** - * Clean input timestamp to yyyy-mm-dd hh:mm:ss[.fffffffff] format + * Clean input timestamp to yyyy-mm-dd hh:mm:ss[.fffffffff] format. * * @param s input timestamp * @return cleaned timestamp @@ -404,8 +405,8 @@ private String cleanTimestamp(String s) { } } - final int MAX_MONTH = 12; - final int MAX_DAY = 31; + final int maxMonth = 12; + final int maxDay = 31; // Parse the date int firstDash = s.indexOf('-'); @@ -436,7 +437,7 @@ private String cleanTimestamp(String s) { day = Integer.parseInt(s.substring(secondDash + 1, len)); } - if ((month >= 1 && month <= MAX_MONTH) && (day >= 1 && day <= MAX_DAY)) { + if ((month >= 1 && month <= maxMonth) && (day >= 1 && day <= maxDay)) { parsedDate = true; } } @@ -482,7 +483,7 @@ private String cleanTimestamp(String s) { } /** - * Replace the first non-numeric substring + * Replace the first non-numeric substring. * * @param s the original string * @param startIndex the beginning index, inclusive diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java index 02e90ec4272..fb2d3d4c353 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java @@ -64,8 +64,9 @@ /** JdbcValueConverters for tiDB. */ public class TiDBValueConverters extends JdbcValueConverters { + /** Handler for parsing errors. */ @FunctionalInterface - public static interface ParsingErrorHandler { + public interface ParsingErrorHandler { void error(String message, Exception exception); } @@ -318,6 +319,7 @@ public ValueConverter converter(Column column, Field fieldDefn) { if (adaptiveTimeMicrosecondsPrecisionMode) { return (data) -> convertTime(column, fieldDefn, data); } + break; case Types.TIMESTAMP: return ((ValueConverter) (data -> convertTimestampToLocalDateTime(column, fieldDefn, data))) diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java index 38a1bc4e3eb..607b2d6a226 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java @@ -25,6 +25,7 @@ import java.io.Serializable; +/** TiDB event emitter. */ public class EventEmitter extends RelationalChangeRecordEmitter { private final Envelope.Operation operation; private final Object[] before; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java index 43a87707408..ce9d7403124 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java @@ -19,31 +19,125 @@ import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkKind; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.utils.TableKeyRangeUtils; +import org.apache.flink.util.Preconditions; + +import org.apache.flink.shaded.guava31.com.google.common.util.concurrent.ThreadFactoryBuilder; import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.data.Envelope; import io.debezium.pipeline.ErrorHandler; import io.debezium.pipeline.source.spi.StreamingChangeEventSource; +import io.debezium.relational.TableId; +import io.debezium.relational.TableSchema; +import io.debezium.util.Clock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.tikv.cdc.CDCClient; +import org.tikv.common.TiConfiguration; +import org.tikv.common.TiSession; +import org.tikv.common.key.RowKey; +import org.tikv.common.meta.TiColumnInfo; +import org.tikv.common.meta.TiTableInfo; +import org.tikv.kvproto.Cdcpb; +import org.tikv.kvproto.Coprocessor; +import org.tikv.shade.com.google.protobuf.ByteString; +import java.io.Serializable; +import java.time.Instant; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadFactory; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.tikv.common.codec.TableCodec.decodeObjects; +/** TiDB streaming change event source reader. */ public class EventSourceReader implements StreamingChangeEventSource { private static final Logger LOG = LoggerFactory.getLogger(EventSourceReader.class); + private final StreamSplit split; + private final TiDBConnectorConfig connectorConfig; + private final TiConfiguration ticonf; + private final JdbcSourceEventDispatcher eventDispatcher; + private final ErrorHandler errorHandler; + private final TiDBSourceFetchTaskContext taskContext; + private final Map> fieldIndexMap = new HashMap<>(); + public ChangeEventSourceContext context; + + private static final long STREAMING_VERSION_START_EPOCH = 0L; + + /** Task local variables. */ + private transient TiSession session = null; + + private transient Coprocessor.KeyRange keyRange = null; + private transient CDCClient cdcClient = null; + private transient volatile long resolvedTs = -1L; + private transient TreeMap prewrites = null; + private transient TreeMap commits = null; + private transient BlockingQueue committedEvents = null; + private transient TableId tableId; + private transient TiTableInfo tableInfo; + + private transient boolean running = true; + private transient ExecutorService executorService; public EventSourceReader( - TiDBConnectorConfig dbzConnectorConfig, + TiDBConnectorConfig connectorConfig, JdbcSourceEventDispatcher eventDispatcher, ErrorHandler errorHandler, TiDBSourceFetchTaskContext taskContext, - StreamSplit split) {} + StreamSplit split) { + this.connectorConfig = connectorConfig; + this.ticonf = connectorConfig.getSourceConfig().getTiConfiguration(); + this.eventDispatcher = eventDispatcher; + this.errorHandler = errorHandler; + this.taskContext = taskContext; + this.split = split; + } @Override public void init() throws InterruptedException { StreamingChangeEventSource.super.init(); + session = TiSession.create(ticonf); + Set tableIds = this.split.getTableSchemas().keySet(); + if (tableIds.isEmpty() && tableIds.size() != 1) { + LOG.error("Currently only single table ingest is supported."); + return; + } + this.tableId = tableIds.stream().findFirst().get(); + this.tableInfo = session.getCatalog().getTable(tableId.catalog(), tableId.table()); + if (tableInfo == null) { + throw new RuntimeException( + String.format( + "Table %s.%s does not exist.", tableId.catalog(), tableId.table())); + } + keyRange = TableKeyRangeUtils.getTableKeyRange(tableInfo.getId(), 1, 0); + cdcClient = new CDCClient(session, keyRange); + prewrites = new TreeMap<>(); + commits = new TreeMap<>(); + // cdc event will lose if pull cdc event block when region split + // use queue to separate read and write to ensure pull event unblock. + // since sink jdbc is slow, 5000W queue size may be safe size. + committedEvents = new LinkedBlockingQueue<>(); + resolvedTs = EventOffset.getStartTs(this.split.getStartingOffset()); + ThreadFactory threadFactory = + new ThreadFactoryBuilder().setNameFormat("tidb-source-function-0").build(); + executorService = Executors.newSingleThreadExecutor(threadFactory); } @Override @@ -51,7 +145,247 @@ public void execute( ChangeEventSourceContext context, TiDBPartition partition, EventOffsetContext offsetContext) - throws InterruptedException {} + throws InterruptedException { + this.context = context; + if (connectorConfig.getSourceConfig().getStartupOptions().isSnapshotOnly()) { + LOG.info("Streaming is not enabled in current configuration"); + return; + } + this.taskContext.getDatabaseSchema().assureNonEmptySchema(); + cdcClient.start(resolvedTs); + running = true; + EventOffsetContext effectiveOffsetContext = + offsetContext != null + ? offsetContext + : EventOffsetContext.initial(this.connectorConfig); + try { + EventOffset currentOffset = new EventOffset(effectiveOffsetContext.getOffset()); + if (currentOffset.isBefore(split.getStartingOffset())) { + return; + } + readChangeEvents(partition, effectiveOffsetContext); + } catch (Exception e) { + this.errorHandler.setProducerThrowable(e); + } + } + + protected void readChangeEvents(TiDBPartition partition, EventOffsetContext offsetContext) + throws Exception { + LOG.info("read change event from resolvedTs:{}", resolvedTs); + // child thread to sink committed rows. + executorService.execute( + () -> { + while (running) { + try { + Cdcpb.Event.Row committedRow = committedEvents.take(); + EventOffset currentOffset = new EventOffset(offsetContext.getOffset()); + if (currentOffset.isBefore(split.getStartingOffset())) { + return; + } + if (!EventOffset.NO_STOPPING_OFFSET.equals(split.getEndingOffset()) + && currentOffset.isAtOrAfter(split.getEndingOffset())) { + // send watermark event; + try { + eventDispatcher.dispatchWatermarkEvent( + partition.getSourcePartition(), + split, + currentOffset, + WatermarkKind.END); + } catch (InterruptedException e) { + LOG.error("Send signal event error.", e); + errorHandler.setProducerThrowable( + new RuntimeException( + "Error processing log signal event", e)); + } + ((StoppableChangeEventSourceContext) context) + .stopChangeEventSource(); + return; + } + + final EventOffsetContext localOffsetContext = + new EventOffsetContext.Loader(this.connectorConfig) + .load(currentOffset.getOffset()); + emitChangeEvent(partition, localOffsetContext, committedRow); + // use startTs of row as messageTs, use commitTs of row as fetchTs + } catch (Exception e) { + LOG.error("Read change events error.", e); + } + } + }); + while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { + for (int i = 0; i < 1000; i++) { + final Cdcpb.Event.Row row = cdcClient.get(); + if (row == null) { + break; + } + handleRow(row); + } + resolvedTs = cdcClient.getMaxResolvedTs(); + if (commits.size() > 0) { + flushRows(resolvedTs); + } + } + } + + protected void emitChangeEvent( + TiDBPartition partition, EventOffsetContext offsetContext, final Cdcpb.Event.Row row) + throws Exception { + if (!context.isRunning()) { + LOG.info("sourceContext is not running, skip emit change event."); + return; + } + if (tableId == null) { + LOG.warn("No valid tableId found, skipping log message: {}", row); + return; + } + TableSchema tableSchema = taskContext.getDatabaseSchema().schemaFor(tableId); + if (tableSchema == null) { + LOG.warn("No table schema found, skipping log message: {}", row); + return; + } + offsetContext.event(tableSchema.id(), Instant.ofEpochMilli(row.getCommitTs())); + Set fieldIndex = fieldIndexConverter(tableInfo.getColumns(), tableSchema); + + Serializable[] before = null; + Serializable[] after = null; + final RowKey rowKey = RowKey.decode(row.getKey().toByteArray()); + final long handle = rowKey.getHandle(); + Envelope.Operation operation = getOperation(row); + switch (operation) { + case CREATE: + after = + (Serializable[]) + getSerializableObject( + handle, row.getValue(), tableInfo, fieldIndex); + break; + case UPDATE: + before = + (Serializable[]) + getSerializableObject( + handle, row.getOldValue(), tableInfo, fieldIndex); + after = + (Serializable[]) + getSerializableObject( + handle, row.getValue(), tableInfo, fieldIndex); + break; + case DELETE: + before = + (Serializable[]) + getSerializableObject( + handle, row.getOldValue(), tableInfo, fieldIndex); + + break; + default: + LOG.error("Row data opType is not supported,row:{}.", row); + } + eventDispatcher.dispatchDataChangeEvent( + partition, + tableSchema.id(), + new EventEmitter(partition, offsetContext, Clock.SYSTEM, operation, before, after)); + } + + private Object[] getSerializableObject( + long handle, final ByteString value, TiTableInfo tableInfo, Set fieldIndex) { + Object[] serializableObject = new Serializable[fieldIndex.size()]; + try { + if (value == null) { + return null; + } + + Object[] tiKVValueAfter; + if (value != null && !value.isEmpty()) { + tiKVValueAfter = decodeObjects(value.toByteArray(), handle, tableInfo); + } else { + return null; + } + for (int index : fieldIndex) { + serializableObject[index] = tiKVValueAfter[index]; + } + return serializableObject; + } catch (Exception e) { + LOG.error("decode object error", e); + return null; + } + } + + private Set fieldIndexConverter( + List tiColumnInfos, TableSchema tableSchema) { + Map fieldIndex = + fieldIndexMap.computeIfAbsent( + tableSchema, + schema -> + IntStream.range(0, schema.valueSchema().fields().size()) + .boxed() + .collect( + Collectors.toMap( + i -> + schema.valueSchema() + .fields() + .get(i) + .name(), + i -> i))); + Set fieldIndexSet = new HashSet<>(); + for (TiColumnInfo tiColumnInfo : tiColumnInfos) { + if (fieldIndex.keySet().stream() + .anyMatch(key -> key.equalsIgnoreCase(tiColumnInfo.getName()))) { + fieldIndexSet.add(tiColumnInfo.getOffset()); + } + } + return fieldIndexSet; + } + + private Envelope.Operation getOperation(final Cdcpb.Event.Row row) { + if (row.getOpType() == Cdcpb.Event.Row.OpType.PUT) { // create ,update + if (row.getValue() != null && row.getOldValue() != null) { + return Envelope.Operation.UPDATE; + } else { + return Envelope.Operation.CREATE; + } + } else if (row.getOpType() == Cdcpb.Event.Row.OpType.DELETE) { // delete + return Envelope.Operation.DELETE; + } else { + LOG.error("Row data opType is not supported,row:{}.", row); + return null; + } + } + + protected void flushRows(final long timestamp) throws Exception { + Preconditions.checkState(context != null, "sourceContext shouldn't be null"); + synchronized (context) { + while (!commits.isEmpty() && commits.firstKey().timestamp <= timestamp) { + final Cdcpb.Event.Row commitRow = commits.pollFirstEntry().getValue(); + final Cdcpb.Event.Row prewriteRow = + prewrites.remove(RowKeyWithTs.ofStart(commitRow)); + // if pull cdc event block when region split, cdc event will lose. + committedEvents.offer(prewriteRow); + } + } + } + + private void handleRow(final Cdcpb.Event.Row row) { + if (!TableKeyRangeUtils.isRecordKey(row.getKey().toByteArray())) { + // Don't handle index key for now + return; + } + LOG.debug("binlog record, type: {}, data: {}", row.getType(), row); + switch (row.getType()) { + case COMMITTED: + prewrites.put(RowKeyWithTs.ofStart(row), row); + commits.put(RowKeyWithTs.ofCommit(row), row); + break; + case COMMIT: + commits.put(RowKeyWithTs.ofCommit(row), row); + break; + case PREWRITE: + prewrites.put(RowKeyWithTs.ofStart(row), row); + break; + case ROLLBACK: + prewrites.remove(RowKeyWithTs.ofStart(row)); + break; + default: + LOG.warn("Unsupported row type:" + row.getType()); + } + } @Override public boolean executeIteration( @@ -66,4 +400,55 @@ public boolean executeIteration( public void commitOffset(Map offset) { StreamingChangeEventSource.super.commitOffset(offset); } + + // --------------------------------------- + // static Utils classes + // --------------------------------------- + private static class RowKeyWithTs implements Comparable { + private final long timestamp; + private final RowKey rowKey; + + private RowKeyWithTs(final long timestamp, final RowKey rowKey) { + this.timestamp = timestamp; + this.rowKey = rowKey; + } + + private RowKeyWithTs(final long timestamp, final byte[] key) { + this(timestamp, RowKey.decode(key)); + } + + @Override + public int compareTo(final RowKeyWithTs that) { + int res = Long.compare(this.timestamp, that.timestamp); + if (res == 0) { + res = Long.compare(this.rowKey.getTableId(), that.rowKey.getTableId()); + } + if (res == 0) { + res = Long.compare(this.rowKey.getHandle(), that.rowKey.getHandle()); + } + return res; + } + + @Override + public int hashCode() { + return Objects.hash(this.timestamp, this.rowKey.getTableId(), this.rowKey.getHandle()); + } + + @Override + public boolean equals(final Object thatObj) { + if (thatObj instanceof RowKeyWithTs) { + final RowKeyWithTs that = (RowKeyWithTs) thatObj; + return this.timestamp == that.timestamp && this.rowKey.equals(that.rowKey); + } + return false; + } + + static RowKeyWithTs ofStart(final Cdcpb.Event.Row row) { + return new RowKeyWithTs(row.getStartTs(), row.getKey().toByteArray()); + } + + static RowKeyWithTs ofCommit(final Cdcpb.Event.Row row) { + return new RowKeyWithTs(row.getCommitTs(), row.getKey().toByteArray()); + } + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java index 7d5b251fa8b..1058b114f41 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java @@ -19,6 +19,7 @@ import io.debezium.pipeline.source.spi.ChangeEventSource; +/** A change event source context that can be stopped. */ public class StoppableChangeEventSourceContext implements ChangeEventSource.ChangeEventSourceContext { diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java index c55a1ee20c2..8b302e9305c 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java @@ -53,6 +53,7 @@ import java.sql.SQLException; import java.time.Duration; +/** A wrapped task to fetch snapshot split of table. */ public class TiDBScanFetchTask extends AbstractScanFetchTask { private static final Logger LOG = LoggerFactory.getLogger(TiDBScanFetchTask.class); @@ -77,7 +78,12 @@ protected void executeBackfillTask(Context context, StreamSplit backfillStreamSp WatermarkKind.END); } - /** 创建并执行一个 TiDBSnapshotSplitReadTask */ + /** + * Execute data snapshot task. + * + * @param context the task context + * @throws Exception exception + */ @Override protected void executeDataSnapshot(Context context) throws Exception { TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; @@ -296,6 +302,7 @@ protected TiDBSnapshotContext prepare(TiDBPartition partition) throws Exception } } + /** Context for snapshotting. */ public class TiDBSnapshotSplitChangeEventSourceContext implements ChangeEventSource.ChangeEventSourceContext { diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java index 67fc3eccea7..ed77366d5e1 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java @@ -55,6 +55,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** TiDB source fetch task context. */ public class TiDBSourceFetchTaskContext extends JdbcSourceFetchTaskContext { private static final Logger LOG = LoggerFactory.getLogger(TiDBSourceFetchTaskContext.class); @@ -199,7 +200,7 @@ public JdbcSourceEventDispatcher getEventDispatcher() { @Override public WatermarkDispatcher getWaterMarkDispatcher() { - return null; + return dispatcher; } @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java index e46ad7df2e0..4c7169c4619 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java @@ -27,11 +27,13 @@ import javax.annotation.Nullable; +/** TiDBStreamFetchTask. */ public class TiDBStreamFetchTask implements FetchTask { private static final Logger LOG = LoggerFactory.getLogger(TiDBStreamFetchTask.class); private final StreamSplit split; private volatile boolean taskRunning = false; private volatile boolean stopped = false; + EventSourceReader eventSourceReader; public TiDBStreamFetchTask(StreamSplit split) { this.split = split; @@ -51,13 +53,14 @@ public void execute(Context context) throws Exception { TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; sourceFetchContext.getOffsetContext().preSnapshotCompletion(); - EventSourceReader eventSourceReader = + eventSourceReader = new EventSourceReader( sourceFetchContext.getDbzConnectorConfig(), sourceFetchContext.getEventDispatcher(), sourceFetchContext.getErrorHandler(), sourceFetchContext.getTaskContext(), split); + eventSourceReader.init(); StoppableChangeEventSourceContext changeEventSourceContext = new StoppableChangeEventSourceContext(); eventSourceReader.execute( @@ -66,7 +69,9 @@ public void execute(Context context) throws Exception { sourceFetchContext.getOffsetContext()); } - public void commitCurrentOffset(@Nullable Offset offsetToCommit) {} + public void commitCurrentOffset(@Nullable Offset offsetToCommit) { + // todo + } @Override public boolean isRunning() { @@ -81,10 +86,10 @@ public SourceSplitBase getSplit() { @Override public void close() { LOG.debug("stopping StreamFetchTask for split: {}", split); - // if (CDCEventSource != null) { - // ((StoppableChangeEventSourceContext) - // (CDCEventSource.context)).stopChangeEventSource(); - // } + if (eventSourceReader != null) { + ((StoppableChangeEventSourceContext) (eventSourceReader.context)) + .stopChangeEventSource(); + } stopped = false; taskRunning = false; } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java index ab8f9d57953..18840340e1e 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java @@ -30,6 +30,7 @@ import java.io.EOFException; import java.sql.SQLException; +/** TiDBErrorHandler. */ public class TiDBErrorHandler extends ErrorHandler { private static final Logger LOG = LoggerFactory.getLogger(TiDBErrorHandler.class); private static final String SQL_CODE_TOO_MANY_CONNECTIONS = "08004"; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java index 49cf6f2ad0e..d3f11577dcd 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.Map; +/** TiDB schema change event handler. */ public class TiDBSchemaChangeEventHandler implements SchemaChangeEventHandler { @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java index 2650b4d941b..2181d09a565 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java @@ -27,6 +27,7 @@ import java.util.HashMap; import java.util.Map; +/** The offset of TiDB binlog. */ public class EventOffset extends Offset { public static final String TIMESTAMP_KEY = "timestamp"; // TimeStamp Oracle from pd diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java index 8953b8f8985..1338bc9fcc1 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java @@ -40,6 +40,7 @@ import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.COMMIT_VERSION_KEY; import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.TIMESTAMP_KEY; +/** The offset context for TiDB connector. */ public class EventOffsetContext implements OffsetContext { private static final String SNAPSHOT_COMPLETED_KEY = "snapshot_completed"; @@ -168,6 +169,7 @@ public void setCheckpoint(Instant timestamp, String commitVersion) { this.commitVersion = commitVersion; } + /** The loader for TiDB offset context. */ public static class Loader implements OffsetContext.Loader { private final TiDBConnectorConfig connectorConfig; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java index 6ad3d9ebc5c..6d36cb174f4 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java @@ -24,6 +24,7 @@ import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.NO_STOPPING_OFFSET; +/** The factory class for {@link EventOffset}. */ public class EventOffsetFactory extends OffsetFactory { @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java index bfe971e26ea..9f46a6845b3 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.Objects; +/** Utils for EventOffset. */ public class EventOffsetUtils { public static EventOffsetContext getEventOffsetContext( OffsetContext.Loader loader, Offset offset) { diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java index 9f460c8421b..7d5a9ba31db 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java @@ -29,6 +29,7 @@ import java.util.Set; import java.util.stream.Collectors; +/** The source info of TiDB. */ public class TiDBSourceInfo extends BaseSourceInfo { public static final String COMMIT_VERSION_KEY = "commitVersion"; private Long commitVersion = -1L; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java index a493d43bc37..7a8df65ab5e 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java @@ -24,6 +24,7 @@ import java.time.Instant; +/** TiDBSourceInfoStructMaker. */ public class TiDBSourceInfoStructMaker implements SourceInfoStructMaker { private final Schema schema; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java index 98708857095..257d3dba99c 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.StringUtils; +/** TiDB field definition. */ public class TiDBFieldDefinition { private String columnName; private String columnType; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java index 5e20679918b..f93581dbd32 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java @@ -44,6 +44,7 @@ import static org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils.getValueConverters; +/** TiDB schema. */ public class TiDBSchema { private static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE "; private static final String DESC_TABLE = "DESC "; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java index d1eb4ff123b..83a163166eb 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.stream.Collectors; +/** TiDB table definition. */ public class TiDBTableDefinition { TableId tableId; List fieldDefinitions; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java index 9358bcac2a1..cde0819875c 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java @@ -30,6 +30,7 @@ import java.sql.SQLException; +/** TiDB chunk splitter. */ public class TiDBChunkSplitter extends JdbcSourceChunkSplitter { public TiDBChunkSplitter( diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java index 367f3a8f864..fcbcb3b086b 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java @@ -41,6 +41,7 @@ import java.util.Map; import java.util.Optional; +/** Factory for creating {@link DeserializationRuntimeConverter} for TiDB. */ public class TiDBDeserializationConverterFactory { public static DeserializationRuntimeConverterFactory instance() { return new DeserializationRuntimeConverterFactory() { diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java index 7b6e7ef7f4f..9ec64476f7e 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java @@ -59,10 +59,11 @@ import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.JDBC_DRIVER; import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.PD_ADDRESSES; import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TABLE_LIST; -import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TiDB_PORT; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TIDB_PORT; import static org.apache.flink.cdc.debezium.table.DebeziumOptions.getDebeziumProperties; import static org.apache.flink.cdc.debezium.utils.ResolvedSchemaUtils.getPhysicalSchema; +/** Factory for creating configured instances of {@link TiDBTableSource}. */ public class TiDBTableFactory implements DynamicTableSourceFactory { private static final String IDENTIFIER = "tidb-cdc"; @@ -78,7 +79,7 @@ public Set> requiredOptions() { options.add(USERNAME); options.add(PASSWORD); options.add(PD_ADDRESSES); - options.add(TiDB_PORT); + options.add(TIDB_PORT); return options; } @@ -161,7 +162,7 @@ public DynamicTableSource createDynamicTableSource(Context context) { String tableName = config.get(TABLE_NAME); String tableList = config.get(TABLE_LIST); - int port = config.get(TiDB_PORT); + int port = config.get(TIDB_PORT); String serverTimeZone = config.get(SERVER_TIME_ZONE); Duration connectTimeout = config.get(CONNECT_TIMEOUT); String pdAddresses = config.get(PD_ADDRESSES); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java index e3874d709c0..98c5a38f570 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java @@ -46,12 +46,14 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Properties; import java.util.stream.Collectors; import java.util.stream.Stream; import static org.apache.flink.util.Preconditions.checkNotNull; +/** TiDB table source. */ public class TiDBTableSource implements ScanTableSource, SupportsReadingMetadata { private final ResolvedSchema physicalSchema; @@ -292,4 +294,78 @@ private MetadataConverter[] getMetadataConverters() { .map(TiDBReadableMetadata::getConverter) .toArray(MetadataConverter[]::new); } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TiDBTableSource that = (TiDBTableSource) o; + return port == that.port + && enableParallelRead == that.enableParallelRead + && splitSize == that.splitSize + && splitMetaGroupSize == that.splitMetaGroupSize + && fetchSize == that.fetchSize + && connectionPoolSize == that.connectionPoolSize + && connectMaxRetries == that.connectMaxRetries + && Double.compare(that.distributionFactorUpper, distributionFactorUpper) == 0 + && Double.compare(that.distributionFactorLower, distributionFactorLower) == 0 + && physicalSchema.equals(that.physicalSchema) + && startupOptions.equals(that.startupOptions) + && Objects.equals(tableList, that.tableList) + && Objects.equals(tableName, that.tableName) + && Objects.equals(connectTimeout, that.connectTimeout) + && Objects.equals(jdbcDriver, that.jdbcDriver) + && Objects.equals(serverTimeZone, that.serverTimeZone) + && Objects.equals(pdAddresses, that.pdAddresses) + && Objects.equals(hostMapping, that.hostMapping) + && Objects.equals(hostName, that.hostName) + && Objects.equals(database, that.database) + && Objects.equals(username, that.username) + && Objects.equals(password, that.password) + && Objects.equals(heartbeatInterval, that.heartbeatInterval) + && Objects.equals(chunkKeyColumn, that.chunkKeyColumn) + && Objects.equals(chunkKeyColumns, that.chunkKeyColumns) + && Objects.equals(jdbcProperties, that.jdbcProperties) + && Objects.equals(options, that.options) + && Objects.equals(producedDataType, that.producedDataType) + && Objects.equals(metadataKeys, that.metadataKeys); + } + + @Override + public int hashCode() { + return Objects.hash( + physicalSchema, + startupOptions, + tableList, + tableName, + connectTimeout, + jdbcDriver, + serverTimeZone, + pdAddresses, + hostMapping, + port, + hostName, + database, + username, + password, + heartbeatInterval, + splitSize, + splitMetaGroupSize, + fetchSize, + connectMaxRetries, + connectionPoolSize, + distributionFactorUpper, + distributionFactorLower, + chunkKeyColumn, + chunkKeyColumns, + jdbcProperties, + options, + enableParallelRead, + producedDataType, + metadataKeys); + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java index 1299d527bed..0c74c81a8d4 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.flink.cdc.connectors.tidb.utils; import io.debezium.jdbc.JdbcConnection; @@ -28,6 +29,7 @@ import java.util.Set; import java.util.stream.Collectors; +/** Utils for table discovery. */ public class TableDiscoveryUtils { private static final Logger LOG = LoggerFactory.getLogger(TableDiscoveryUtils.class); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java new file mode 100644 index 00000000000..d67ccb81259 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.utils; + +import org.apache.flink.util.Preconditions; + +import org.apache.flink.shaded.guava31.com.google.common.collect.ImmutableList; + +import org.tikv.common.key.RowKey; +import org.tikv.common.util.KeyRangeUtils; +import org.tikv.kvproto.Coprocessor.KeyRange; + +import java.math.BigInteger; +import java.util.List; + +/** Utils to obtain the keyRange of table. */ +public class TableKeyRangeUtils { + public static KeyRange getTableKeyRange(final long tableId) { + return KeyRangeUtils.makeCoprocRange( + RowKey.createMin(tableId).toByteString(), + RowKey.createBeyondMax(tableId).toByteString()); + } + + public static List getTableKeyRanges(final long tableId, final int num) { + Preconditions.checkArgument(num > 0, "Illegal value of num"); + + if (num == 1) { + return ImmutableList.of(getTableKeyRange(tableId)); + } + + final long delta = + BigInteger.valueOf(Long.MAX_VALUE) + .subtract(BigInteger.valueOf(Long.MIN_VALUE + 1)) + .divide(BigInteger.valueOf(num)) + .longValueExact(); + final ImmutableList.Builder builder = ImmutableList.builder(); + for (int i = 0; i < num; i++) { + final RowKey startKey = + (i == 0) + ? RowKey.createMin(tableId) + : RowKey.toRowKey(tableId, Long.MIN_VALUE + delta * i); + final RowKey endKey = + (i == num - 1) + ? RowKey.createBeyondMax(tableId) + : RowKey.toRowKey(tableId, Long.MIN_VALUE + delta * (i + 1)); + builder.add( + KeyRangeUtils.makeCoprocRange(startKey.toByteString(), endKey.toByteString())); + } + return builder.build(); + } + + public static KeyRange getTableKeyRange(final long tableId, final int num, final int idx) { + Preconditions.checkArgument(idx >= 0 && idx < num, "Illegal value of idx"); + return getTableKeyRanges(tableId, num).get(idx); + } + + public static boolean isRecordKey(final byte[] key) { + return key[9] == '_' && key[10] == 'r'; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java index 29549aa1a72..d3f8a5838af 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java @@ -30,6 +30,7 @@ import java.util.HashMap; import java.util.Map; +/** Utils to obtain the connection of TiDB. */ public class TiDBConnectionUtils { public static boolean isTableIdCaseInsensitive(JdbcConnection connection) { diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java index 4170986df6c..c796284a5d8 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java @@ -47,6 +47,7 @@ import static org.apache.flink.table.api.DataTypes.FIELD; import static org.apache.flink.table.api.DataTypes.ROW; +/** Utils for TiDB. */ public class TiDBUtils { private static final String BIT = "BIT"; private static final String TINYINT = "TINYINT"; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java index 525fd2a28e4..14b8f6b6b94 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java @@ -17,6 +17,7 @@ package org.apache.flink.cdc.connectors.tidb; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; import org.apache.flink.test.util.AbstractTestBase; import com.alibaba.dcm.DnsCacheManipulator; @@ -32,7 +33,6 @@ import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; import org.testcontainers.containers.output.Slf4jLogConsumer; -import org.testcontainers.junit.jupiter.Container; import org.testcontainers.lifecycle.Startables; import java.net.URL; @@ -70,7 +70,6 @@ public class TiDBTestBase extends AbstractTestBase { public static final Network NETWORK = Network.newNetwork(); - @Container public static final GenericContainer PD = new FixedHostPortGenericContainer<>("pingcap/pd:v6.1.0") .withFileSystemBind("src/test/resources/config/pd.toml", "/pd.toml") @@ -90,7 +89,6 @@ public class TiDBTestBase extends AbstractTestBase { .withStartupTimeout(Duration.ofSeconds(120)) .withLogConsumer(new Slf4jLogConsumer(LOG)); - @Container public static final GenericContainer TIKV = new FixedHostPortGenericContainer<>("pingcap/tikv:v6.1.0") .withFixedExposedPort(TIKV_PORT_ORIGIN, TIKV_PORT_ORIGIN) @@ -108,7 +106,6 @@ public class TiDBTestBase extends AbstractTestBase { .withStartupTimeout(Duration.ofSeconds(120)) .withLogConsumer(new Slf4jLogConsumer(LOG)); - @Container public static final GenericContainer TIDB = new GenericContainer<>("pingcap/tidb:v6.1.0") .withExposedPorts(TIDB_PORT) @@ -212,4 +209,28 @@ protected void initializeTidbTable(String sqlFile) { throw new RuntimeException(e); } } + + protected TiDBSourceConfigFactory getMockTiDBSourceConfigFactory( + String database, String schemaName, String tableName, int splitSize) { + return getMockTiDBSourceConfigFactory(database, schemaName, tableName, splitSize, false); + } + + protected TiDBSourceConfigFactory getMockTiDBSourceConfigFactory( + String database, + String schemaName, + String tableName, + int splitSize, + boolean skipSnapshotBackfill) { + + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.hostname(TIDB.getContainerIpAddress()); + tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); + tiDBSourceConfigFactory.username(TIDB_USER); + tiDBSourceConfigFactory.password(TIDB_PASSWORD); + tiDBSourceConfigFactory.databaseList(database); + tiDBSourceConfigFactory.tableList(database + "." + tableName); + tiDBSourceConfigFactory.splitSize(splitSize); + tiDBSourceConfigFactory.skipSnapshotBackfill(skipSnapshotBackfill); + return tiDBSourceConfigFactory; + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java new file mode 100644 index 00000000000..d5574a0f276 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; + +import io.debezium.relational.TableId; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; + +/** Test for {@link TiDBDialect}. */ +public class TiDBDialectTest extends TiDBTestBase { + private static final String databaseName = "customer"; + private static final String tableName = "customers"; + + @Test + public void testDiscoverDataCollectionsInMultiDatabases() { + initializeTidbTable("customer"); + TiDBSourceConfigFactory configFactoryOfCustomDatabase = + getMockTiDBSourceConfigFactory(databaseName, null, tableName, 10); + + TiDBDialect dialectOfcustomDatabase = + new TiDBDialect(configFactoryOfCustomDatabase.create(0)); + List tableIdsOfcustomDatabase = + dialectOfcustomDatabase.discoverDataCollections( + configFactoryOfCustomDatabase.create(0)); + Assertions.assertThat(tableIdsOfcustomDatabase.get(0).toString()) + .isEqualTo("customer.customers"); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java new file mode 100644 index 00000000000..d1e4161d96f --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.conversion.RowRowConverter; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.util.CloseableIterator; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** Tests for TiDB Source based on incremental snapshot framework . */ +public class TiDBSourceExampleTest extends TiDBTestBase { + + private static final String databaseName = "inventory"; + private static final String tableName = "products"; + + @Test + public void testConsumingScanEvents() throws Exception { + final DataType dataType = + DataTypes.ROW( + DataTypes.FIELD("id", DataTypes.BIGINT()), + DataTypes.FIELD("name", DataTypes.STRING()), + DataTypes.FIELD("description", DataTypes.STRING()), + DataTypes.FIELD("weight", DataTypes.FLOAT())); + + initializeTidbTable("inventory"); + + JdbcIncrementalSource tiDBIncrementalSource = + TiDBSourceBuilder.TiDBIncrementalSource.builder() + .hostname(TIDB.getHost()) + .port(TIDB.getMappedPort(TIDB_PORT)) + .username(TiDBTestBase.TIDB_USER) + .password(TiDBTestBase.TIDB_PASSWORD) + .databaseList(databaseName) + .tableList(this.databaseName + "." + this.tableName) + .splitSize(10) + .deserializer(buildRowDataDebeziumDeserializeSchema(dataType)) + .build(); + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + CloseableIterator iterator = + env.fromSource( + tiDBIncrementalSource, + WatermarkStrategy.noWatermarks(), + "TiDBParallelSource") + .setParallelism(2) + .executeAndCollect(); // collect record + + String[] snapshotExpectedRecords = + new String[] { + "+I[101, scooter, Small 2-wheel scooter, 3.14]", + "+I[102, car battery, 12V car battery, 8.1]", + "+I[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]", + "+I[104, hammer, 12oz carpenter's hammer, 0.75]", + "+I[105, hammer, 14oz carpenter's hammer, 0.875]", + "+I[106, hammer, 16oz carpenter's hammer, 1.0]", + "+I[107, rocks, box of assorted rocks, 5.3]", + "+I[108, jacket, water resistent black wind breaker, 0.1]", + "+I[109, spare tire, 24 inch spare tire, 22.2]" + }; + + // step-1: consume snapshot data + List snapshotRowDataList = new ArrayList<>(); + for (int i = 0; i < snapshotExpectedRecords.length && iterator.hasNext(); i++) { + snapshotRowDataList.add(iterator.next()); + } + + List snapshotActualRecords = formatResult(snapshotRowDataList, dataType); + assertEqualsInAnyOrder(Arrays.asList(snapshotExpectedRecords), snapshotActualRecords); + } + + private DebeziumDeserializationSchema buildRowDataDebeziumDeserializeSchema( + DataType dataType) { + LogicalType logicalType = TypeConversions.fromDataToLogicalType(dataType); + InternalTypeInfo typeInfo = InternalTypeInfo.of(logicalType); + return RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType((RowType) dataType.getLogicalType()) + .setResultTypeInfo(typeInfo) + .build(); + } + + private List formatResult(List records, DataType dataType) { + RowRowConverter rowRowConverter = RowRowConverter.create(dataType); + rowRowConverter.open(Thread.currentThread().getContextClassLoader()); + return records.stream() + .map(rowRowConverter::toExternal) + .map(Object::toString) + .collect(Collectors.toList()); + } + + public static void assertEqualsInAnyOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + assertEqualsInOrder( + expected.stream().sorted().collect(Collectors.toList()), + actual.stream().sorted().collect(Collectors.toList())); + } + + public static void assertEqualsInOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + Assertions.assertThat(expected.size()).isEqualTo(actual.size()); + Assertions.assertThat(expected.toArray(new String[0])) + .isEqualTo(actual.toArray(new String[0])); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java new file mode 100644 index 00000000000..c88d4e4f65b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.source.assigner.splitter.ChunkSplitter; +import org.apache.flink.cdc.connectors.base.source.meta.split.SnapshotSplit; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceRecords; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.reader.external.AbstractScanFetchTask; +import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; +import org.apache.flink.cdc.connectors.base.source.reader.external.IncrementalSourceScanFetcher; +import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHook; +import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHooks; +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.connectors.tidb.source.TiDBDialect; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.testutils.RecordsFormatter; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.types.DataType; + +import io.debezium.relational.TableId; +import org.apache.kafka.connect.source.SourceRecord; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +/** Tests for {@link TiDBScanFetchTask}. */ +public class TiDBScanFetchTaskTest extends TiDBTestBase { + private static final String databaseName = "customer"; + private static final String tableName = "customers"; + + private static final int USE_POST_LOWWATERMARK_HOOK = 1; + private static final int USE_PRE_HIGHWATERMARK_HOOK = 2; + + @Test + public void testChangingDataInSnapshotScan() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + String[] changingDataSql = + new String[] { + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 103", + "UPDATE " + tableId + " SET address = 'Shanghai' where id = 103", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 110", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 111", + }; + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, Shanghai, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Hangzhou, 123567891234]", + "+I[111, user_6, Hangzhou, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + List actual = + getDataInSnapshotScan(changingDataSql, USE_POST_LOWWATERMARK_HOOK, false); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testInsertDataInSnapshotScan() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + String[] insertDataSql = + new String[] { + "INSERT INTO " + tableId + " VALUES(112, 'user_12','Shanghai','123567891234')", + "INSERT INTO " + tableId + " VALUES(113, 'user_13','Shanghai','123567891234')", + }; + + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, Shanghai, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Shanghai, 123567891234]", + "+I[111, user_6, Shanghai, 123567891234]", + "+I[112, user_12, Shanghai, 123567891234]", + "+I[113, user_13, Shanghai, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + + List actual = + getDataInSnapshotScan(insertDataSql, USE_POST_LOWWATERMARK_HOOK, false); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testDeleteDataInSnapshotScan() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + String[] deleteDataSql = + new String[] { + "DELETE FROM " + tableId + " where id = 101", + "DELETE FROM " + tableId + " where id = 102", + }; + String[] expected = + new String[] { + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Shanghai, 123567891234]", + "+I[111, user_6, Shanghai, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + List actual = + getDataInSnapshotScan(deleteDataSql, USE_POST_LOWWATERMARK_HOOK, false); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testSnapshotScanSkipBackfillWithPostLowWatermark() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + + String[] changingDataSql = + new String[] { + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 103", + "DELETE FROM " + tableId + " where id = 102", + "INSERT INTO " + tableId + " VALUES(102, 'user_2','hangzhou','123567891234')", + "UPDATE " + tableId + " SET address = 'Shanghai' where id = 103", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 110", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 111", + }; + + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, hangzhou, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Hangzhou, 123567891234]", + "+I[111, user_6, Hangzhou, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + + // Change data during [low_watermark, snapshot) will not be captured by snapshotting + List actual = + getDataInSnapshotScan(changingDataSql, USE_POST_LOWWATERMARK_HOOK, true); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testSnapshotScanSkipBackfillWithPreHighWatermark() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + + String[] changingDataSql = + new String[] { + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 103", + // "DELETE FROM " + tableId + " where id = 102", + // "INSERT INTO " + tableId + " VALUES(102, + // 'user_2',Hangzhou','123567891234')", + "UPDATE " + tableId + " SET address = 'Shanghai' where id = 103", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 110", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 111", + }; + + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, Shanghai, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Hangzhou, 123567891234]", + "+I[111, user_6, Hangzhou, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + + // Change data during [snapshot, high_watermark) will not be captured by snapshotting + List actual = + getDataInSnapshotScan(changingDataSql, USE_POST_LOWWATERMARK_HOOK, true); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + private List getDataInSnapshotScan( + String[] changingDataSql, int hookType, boolean skipSnapshotBackfill) throws Exception { + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.hostname(TIDB.getHost()); + tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); + tiDBSourceConfigFactory.username(TiDBTestBase.TIDB_USER); + tiDBSourceConfigFactory.password(TiDBTestBase.TIDB_PASSWORD); + tiDBSourceConfigFactory.databaseList(this.databaseName); + tiDBSourceConfigFactory.tableList(this.databaseName + "." + this.tableName); + tiDBSourceConfigFactory.splitSize(10); + tiDBSourceConfigFactory.skipSnapshotBackfill(skipSnapshotBackfill); + TiDBSourceConfig tiDBSourceConfig = tiDBSourceConfigFactory.create(0); + TiDBDialect tiDBDialect = new TiDBDialect(tiDBSourceConfigFactory.create(0)); + SnapshotPhaseHooks hooks = new SnapshotPhaseHooks(); + + try (TiDBConnection tiDBConnection = tiDBDialect.openJdbcConnection()) { + SnapshotPhaseHook snapshotPhaseHook = + (tidbSourceConfig, split) -> { + tiDBConnection.execute(changingDataSql); + tiDBConnection.commit(); + try { + Thread.sleep(500L); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }; + if (hookType == USE_POST_LOWWATERMARK_HOOK) { + hooks.setPostLowWatermarkAction(snapshotPhaseHook); + } else if (hookType == USE_PRE_HIGHWATERMARK_HOOK) { + hooks.setPreHighWatermarkAction(snapshotPhaseHook); + } + final DataType dataType = + DataTypes.ROW( + DataTypes.FIELD("id", DataTypes.BIGINT()), + DataTypes.FIELD("name", DataTypes.STRING()), + DataTypes.FIELD("address", DataTypes.STRING()), + DataTypes.FIELD("phone_number", DataTypes.STRING())); + List snapshotSplits = getSnapshotSplits(tiDBSourceConfig, tiDBDialect); + + TiDBSourceFetchTaskContext tidbsourceFetchTaskContext = + new TiDBSourceFetchTaskContext(tiDBSourceConfig, tiDBDialect, tiDBConnection); + + return readTableSnapshotSplits( + snapshotSplits, tidbsourceFetchTaskContext, 1, dataType, hooks); + } + } + + private List readTableSnapshotSplits( + List snapshotSplits, + TiDBSourceFetchTaskContext taskContext, + int scanSplitsNum, + DataType dataType, + SnapshotPhaseHooks snapshotPhaseHooks) + throws Exception { + IncrementalSourceScanFetcher sourceScanFetcher = + new IncrementalSourceScanFetcher(taskContext, 0); + + ArrayList result = new ArrayList<>(); + for (int i = 0; i < scanSplitsNum; i++) { + SnapshotSplit sqlSplit = snapshotSplits.get(i); + if (sourceScanFetcher.isFinished()) { + FetchTask fetchTask = + taskContext.getDataSourceDialect().createFetchTask(sqlSplit); + ((AbstractScanFetchTask) fetchTask).setSnapshotPhaseHooks(snapshotPhaseHooks); + sourceScanFetcher.submitTask(fetchTask); + } + Iterator res; + while ((res = sourceScanFetcher.pollSplitRecords()) != null) { + while (res.hasNext()) { + SourceRecords sourceRecords = res.next(); + result.addAll(sourceRecords.getSourceRecordList()); + } + } + } + sourceScanFetcher.close(); + + Assertions.assertThat(sourceScanFetcher.getExecutorService()).isNotNull(); + Assertions.assertThat(sourceScanFetcher.getExecutorService().isTerminated()).isTrue(); + + return formatResult(result, dataType); + } + + private List formatResult(List records, DataType dataType) { + final RecordsFormatter formatter = new RecordsFormatter(dataType); + return formatter.format(records); + } + + /** Get snapshot splits. */ + private List getSnapshotSplits( + TiDBSourceConfig sourceConfig, JdbcDataSourceDialect sourceDialect) throws Exception { + List discoverTables = sourceDialect.discoverDataCollections(sourceConfig); + final ChunkSplitter chunkSplitter = sourceDialect.createChunkSplitter(sourceConfig); + chunkSplitter.open(); + + List snapshotSplitList = new ArrayList<>(); + for (TableId table : discoverTables) { + List snapshotSplits = + (List) chunkSplitter.generateSplits(table); + snapshotSplitList.addAll(snapshotSplits); + } + return snapshotSplitList; + } + + public static void assertEqualsInAnyOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + assertEqualsInOrder( + expected.stream().sorted().collect(Collectors.toList()), + actual.stream().sorted().collect(Collectors.toList())); + } + + public static void assertEqualsInOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + Assertions.assertThat(expected.size()).isEqualTo(actual.size()); + Assertions.assertThat(expected.toArray(new String[0])) + .isEqualTo(actual.toArray(new String[0])); + } + + protected TiDBSourceConfigFactory getMockTiDBSourceConfigFactory( + String hostName, + int port, + String userName, + String password, + String databaseName, + String schemaName, + String tableName, + int splitSize, + boolean skipSnapshotBackfill) { + + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.hostname(hostName); + tiDBSourceConfigFactory.port(port); + tiDBSourceConfigFactory.username(userName); + tiDBSourceConfigFactory.password(password); + tiDBSourceConfigFactory.databaseList(databaseName); + tiDBSourceConfigFactory.tableList(schemaName + "." + tableName); + tiDBSourceConfigFactory.splitSize(splitSize); + tiDBSourceConfigFactory.skipSnapshotBackfill(skipSnapshotBackfill); + return tiDBSourceConfigFactory; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java new file mode 100644 index 00000000000..724386141fe --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.reader; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.source.meta.split.ChangeEventRecords; +import org.apache.flink.cdc.connectors.base.source.meta.split.FinishedSnapshotSplitInfo; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceRecords; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceReaderContext; +import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceSplitReader; +import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHooks; +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.connectors.tidb.source.TiDBDialect; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; +import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition; +import org.apache.flink.connector.testutils.source.reader.TestingReaderContext; + +import io.debezium.relational.TableId; +import io.debezium.relational.history.TableChanges; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import static java.util.Collections.singletonList; + +/** Test for {@link TiDBTestBase}. */ +public class TiDBStreamSplitReaderTest extends TiDBTestBase { + private static final Logger LOG = LoggerFactory.getLogger(TiDBStreamSplitReaderTest.class); + private static final String databaseName = "customer"; + private static final String tableName = "customers"; + private static final String STREAM_SPLIT_ID = "stream-split"; + + private static final int USE_POST_LOWWATERMARK_HOOK = 1; + private static final int USE_PRE_HIGHWATERMARK_HOOK = 2; + private static final int MAX_RETRY_TIMES = 100; + + private TiDBSourceConfig sourceConfig; + private TiDBDialect tiDBDialect; + private EventOffsetFactory cdcEventOffsetFactory; + + @BeforeEach + public void before() { + initializeTidbTable("customer"); + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.pdAddresses( + PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN)); + tiDBSourceConfigFactory.hostname(TIDB.getHost()); + tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); + tiDBSourceConfigFactory.username(TiDBTestBase.TIDB_USER); + tiDBSourceConfigFactory.password(TiDBTestBase.TIDB_PASSWORD); + tiDBSourceConfigFactory.databaseList(this.databaseName); + tiDBSourceConfigFactory.tableList(this.databaseName + "." + this.tableName); + tiDBSourceConfigFactory.splitSize(10); + tiDBSourceConfigFactory.skipSnapshotBackfill(true); + tiDBSourceConfigFactory.scanNewlyAddedTableEnabled(true); + this.sourceConfig = tiDBSourceConfigFactory.create(0); + this.tiDBDialect = new TiDBDialect(tiDBSourceConfigFactory.create(0)); + this.cdcEventOffsetFactory = new EventOffsetFactory(); + } + + @Test + public void testStreamSplitReader() throws Exception { + String tableId = databaseName + "." + tableName; + IncrementalSourceReaderContext incrementalSourceReaderContext = + new IncrementalSourceReaderContext(new TestingReaderContext()); + IncrementalSourceSplitReader streamSplitReader = + new IncrementalSourceSplitReader<>( + 0, + tiDBDialect, + sourceConfig, + incrementalSourceReaderContext, + SnapshotPhaseHooks.empty()); + try { + EventOffset startOffset = new EventOffset(Instant.now().toEpochMilli()); + String[] insertDataSql = + new String[] { + "INSERT INTO " + + tableId + + " VALUES(112, 'user_12','Shanghai','123567891234')", + "INSERT INTO " + + tableId + + " VALUES(113, 'user_13','Shanghai','123567891234')", + }; + try (TiDBConnection tiDBConnection = tiDBDialect.openJdbcConnection()) { + tiDBConnection.execute(insertDataSql); + tiDBConnection.commit(); + } + TableId tableIds = new TableId(databaseName, null, tableName); + Map tableSchemas = new HashMap<>(); + tableSchemas.put(tableIds, null); + FinishedSnapshotSplitInfo finishedSnapshotSplitInfo = + new FinishedSnapshotSplitInfo( + tableIds, + STREAM_SPLIT_ID, + new Object[] {startOffset}, + new Object[] {EventOffset.NO_STOPPING_OFFSET}, + startOffset, + cdcEventOffsetFactory); + StreamSplit streamSplit = + new StreamSplit( + STREAM_SPLIT_ID, + startOffset, + cdcEventOffsetFactory.createNoStoppingOffset(), + Collections.singletonList(finishedSnapshotSplitInfo), + tableSchemas, + 0); + Assertions.assertThat(streamSplitReader.canAssignNextSplit()).isTrue(); + streamSplitReader.handleSplitsChanges(new SplitsAddition<>(singletonList(streamSplit))); + int retry = 0; + int count = 0; + while (retry < MAX_RETRY_TIMES) { + ChangeEventRecords records = (ChangeEventRecords) streamSplitReader.fetch(); + if (records.nextSplit() != null) { + SourceRecords sourceRecords; + while ((sourceRecords = records.nextRecordFromSplit()) != null) { + Iterator iterator = sourceRecords.iterator(); + while (iterator.hasNext()) { + Struct value = (Struct) iterator.next().value(); + String opType = value.getString("op"); + Assertions.assertThat(opType).isEqualTo("c"); + Struct after = (Struct) value.get("after"); + String name = after.getString("name"); + + Assertions.assertThat(name.contains("user")).isTrue(); + if (++count >= insertDataSql.length) { + return; + } + } + } + } else { + break; + } + } + } catch (Exception e) { + LOG.error("Stream split read error.", e); + } finally { + streamSplitReader.close(); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java index 73848a46c8a..dbab19ae8ac 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java @@ -72,12 +72,19 @@ void testConsumingAllEvents() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -177,12 +184,19 @@ void testDeleteColumn() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -256,12 +270,19 @@ void testAddColumn() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -344,12 +365,19 @@ void testMetadataColumns() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -456,10 +484,18 @@ void testAllDataTypes() throws Throwable { + " 'connector' = 'tidb-cdc'," + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "column_type_test", "full_types"); @@ -569,10 +605,18 @@ void testTiDBServerTimezone(String timezone) throws Exception { + " 'connector' = 'tidb-cdc'," + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "column_type_test", "full_types"); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java index 1951f40463d..bc0f1fb58d7 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java @@ -69,10 +69,18 @@ void testRegionChange() throws Exception { + " 'connector' = 'tidb-cdc'," + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "region_switch_test", "t1"); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java index f8a08d8d451..e840bee0790 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java @@ -30,7 +30,8 @@ import org.apache.flink.table.connector.source.DynamicTableSource; import org.apache.flink.table.factories.FactoryUtil; -import org.junit.Test; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; import java.time.Duration; import java.time.ZoneId; @@ -51,7 +52,6 @@ import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.CONNECT_TIMEOUT; import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HEARTBEAT_INTERVAL; import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.JDBC_DRIVER; -import static org.junit.Assert.assertEquals; /** Unit tests for TiDB table source factory. */ public class TiDBTableSourceFactoryTest { @@ -128,7 +128,7 @@ public void testCommonProperties() { new HashMap<>(), JDBC_DRIVER.defaultValue(), StartupOptions.initial()); - assertEquals(expectedSource, actualSource); + Assertions.assertThat(expectedSource).isEqualTo(actualSource); } @Test @@ -137,6 +137,10 @@ public void testOptionalProperties() { properties.put("port", MY_PORT); properties.put("scan.startup.mode", "initial"); properties.put("heartbeat.interval.ms", "15213ms"); + properties.put("debezium.tombstones.on.delete", "true"); + properties.put("debezium.snapshot.mode", "never"); + properties.put("debezium.offset.flush.interval.ms", "3000"); + properties.put("debezium.test", "test"); // properties.put("server-time-zone", "Asia/Shanghai"); Properties dbzProperties = new Properties(); @@ -156,7 +160,7 @@ public void testOptionalProperties() { TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, - 4111, + 4000, MY_HOSTNAME, MY_DATABASE, MY_TABLE, @@ -179,10 +183,10 @@ public void testOptionalProperties() { SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND.defaultValue(), SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND.defaultValue(), null, - null, + new HashMap<>(), JDBC_DRIVER.defaultValue(), StartupOptions.initial()); - assertEquals(expectedSource, actualSource); + Assertions.assertThat(expectedSource).isEqualTo(actualSource); } private Map getAllOptions() { diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java index bc700f8100d..8051034edf0 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java @@ -20,13 +20,12 @@ import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions; import org.apache.flink.cdc.connectors.tidb.utils.UriHostMapping; -import org.junit.Test; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; import org.tikv.common.TiConfiguration; import java.util.HashMap; -import static org.junit.Assert.assertEquals; - /** Unit test for {@link UriHostMapping}. * */ public class UriHostMappingTest { @@ -36,8 +35,8 @@ public void uriHostMappingTest() { TiDBSourceOptions.getTiConfiguration( "http://0.0.0.0:2347", "host1:1;host2:2;host3:3", new HashMap<>()); UriHostMapping uriHostMapping = (UriHostMapping) tiConf.getHostMapping(); - assertEquals(uriHostMapping.getHostMapping().size(), 3); - assertEquals(uriHostMapping.getHostMapping().get("host1"), "1"); + Assertions.assertThat(uriHostMapping.getHostMapping().size()).isEqualTo(3); + Assertions.assertThat(uriHostMapping.getHostMapping().get("host1")).isEqualTo("1"); } @Test @@ -45,7 +44,7 @@ public void uriHostMappingEmpty() { final TiConfiguration tiConf = TiDBSourceOptions.getTiConfiguration("http://0.0.0.0:2347", "", new HashMap<>()); UriHostMapping uriHostMapping = (UriHostMapping) tiConf.getHostMapping(); - assertEquals(uriHostMapping.getHostMapping(), null); + Assertions.assertThat(uriHostMapping.getHostMapping()).isEqualTo(null); } @Test @@ -55,7 +54,8 @@ public void uriHostMappingError() { TiDBSourceOptions.getTiConfiguration( "http://0.0.0.0:2347", "host1=1;host2=2;host3=3", new HashMap<>()); } catch (IllegalArgumentException e) { - assertEquals(e.getMessage(), "Invalid host mapping string: host1=1;host2=2;host3=3"); + Assertions.assertThat(e.getMessage()) + .isEqualTo("Invalid host mapping string: host1=1;host2=2;host3=3"); } } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java new file mode 100644 index 00000000000..8dee3688670 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.testutils; + +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.conversion.RowRowConverter; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import org.apache.flink.util.Collector; + +import org.apache.kafka.connect.source.SourceRecord; + +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** Formatter that formats the {@link SourceRecord} to String. */ +public class RecordsFormatter { + + private final DataType dataType; + private final ZoneId zoneId; + + private TypeInformation typeInfo; + private DebeziumDeserializationSchema deserializationSchema; + private SimpleCollector collector; + private RowRowConverter rowRowConverter; + + public RecordsFormatter(DataType dataType) { + this(dataType, ZoneId.of("UTC")); + } + + public RecordsFormatter(DataType dataType, ZoneId zoneId) { + this.dataType = dataType; + this.zoneId = zoneId; + this.typeInfo = + (TypeInformation) TypeConversions.fromDataTypeToLegacyInfo(dataType); + this.deserializationSchema = + RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType((RowType) dataType.getLogicalType()) + .setResultTypeInfo(typeInfo) + .build(); + this.collector = new SimpleCollector(); + this.rowRowConverter = RowRowConverter.create(dataType); + rowRowConverter.open(Thread.currentThread().getContextClassLoader()); + } + + public List format(List records) { + records.stream() + // Keep DataChangeEvent only + .filter(SourceRecordUtils::isDataChangeRecord) + .forEach( + r -> { + try { + deserializationSchema.deserialize(r, collector); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + return collector.list.stream() + .map(rowRowConverter::toExternal) + .map(Row::toString) + .collect(Collectors.toList()); + } + + private static class SimpleCollector implements Collector { + + private List list = new ArrayList<>(); + + @Override + public void collect(RowData record) { + list.add(record); + } + + @Override + public void close() { + // do nothing + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql new file mode 100644 index 00000000000..c772e28e0ff --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: inventory +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE customer; + +USE customer; + +-- Create and populate our users using a single insert with many rows +CREATE TABLE customers ( + id INTEGER NOT NULL PRIMARY KEY, + name VARCHAR(255) NOT NULL DEFAULT 'flink', + address VARCHAR(1024), + phone_number VARCHAR(512) +); +INSERT INTO customers +VALUES (101,'user_1','Shanghai','123567891234'), + (102,'user_2','Shanghai','123567891234'), + (103,'user_3','Shanghai','123567891234'), + (109,'user_4','Shanghai','123567891234'), + (110,'user_5','Shanghai','123567891234'), + (111,'user_6','Shanghai','123567891234'), + (118,'user_7','Shanghai','123567891234'), + (121,'user_8','Shanghai','123567891234'), + (123,'user_9','Shanghai','123567891234'), + (1009,'user_10','Shanghai','123567891234'), + (1010,'user_11','Shanghai','123567891234'), + (1011,'user_12','Shanghai','123567891234'), + (1012,'user_13','Shanghai','123567891234'), + (1013,'user_14','Shanghai','123567891234'), + (1014,'user_15','Shanghai','123567891234'), + (1015,'user_16','Shanghai','123567891234'), + (1016,'user_17','Shanghai','123567891234'), + (1017,'user_18','Shanghai','123567891234'), + (1018,'user_19','Shanghai','123567891234'), + (1019,'user_20','Shanghai','123567891234'), + (2000,'user_21','Shanghai','123567891234'); \ No newline at end of file From f43a38984d1bbb5c70a5b04355010f1f9d2d84f7 Mon Sep 17 00:00:00 2001 From: Kunni Date: Fri, 20 Mar 2026 17:45:44 +0800 Subject: [PATCH 3/5] [FLINK-38729] Add Flink2 support for Source/Pipeline connector. (#4322) # Conflicts: # flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java --- .../tidb/TiKVRichParallelSourceFunction.java | 420 ++++++++++++++++++ .../table/TiDBTableSourceFactoryTest.java | 8 +- 2 files changed, 424 insertions(+), 4 deletions(-) create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java new file mode 100644 index 00000000000..16c130b38c8 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java @@ -0,0 +1,420 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb; + +import org.apache.flink.api.common.state.CheckpointListener; +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.base.LongSerializer; +import org.apache.flink.api.java.typeutils.ResultTypeQueryable; +import org.apache.flink.cdc.connectors.tidb.metrics.TiDBSourceMetrics; +import org.apache.flink.cdc.connectors.tidb.table.StartupMode; +import org.apache.flink.cdc.connectors.tidb.table.utils.TableKeyRangeUtils; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.runtime.state.FunctionSnapshotContext; +import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.util.Collector; +import org.apache.flink.util.Preconditions; + +import org.apache.flink.shaded.guava31.com.google.common.util.concurrent.ThreadFactoryBuilder; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.tikv.cdc.CDCClient; +import org.tikv.common.TiConfiguration; +import org.tikv.common.TiSession; +import org.tikv.common.key.RowKey; +import org.tikv.common.meta.TiTableInfo; +import org.tikv.common.meta.TiTimestamp; +import org.tikv.kvproto.Cdcpb; +import org.tikv.kvproto.Coprocessor; +import org.tikv.kvproto.Kvrpcpb; +import org.tikv.shade.com.google.protobuf.ByteString; +import org.tikv.txn.KVClient; + +import java.util.List; +import java.util.Objects; +import java.util.TreeMap; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; + +/** + * The source implementation for TiKV that read snapshot events first and then read the change + * event. + */ +public class TiKVRichParallelSourceFunction extends RichParallelSourceFunction + implements CheckpointListener, CheckpointedFunction, ResultTypeQueryable { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(TiKVRichParallelSourceFunction.class); + private static final long SNAPSHOT_VERSION_EPOCH = -1L; + private static final long STREAMING_VERSION_START_EPOCH = 0L; + + private final TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema; + private final TiKVChangeEventDeserializationSchema changeEventDeserializationSchema; + private final TiConfiguration tiConf; + private final StartupMode startupMode; + private final String database; + private final String tableName; + + /** Task local variables. */ + private transient TiSession session = null; + + private transient Coprocessor.KeyRange keyRange = null; + private transient CDCClient cdcClient = null; + private transient SourceFunction.SourceContext sourceContext = null; + private transient volatile long resolvedTs = -1L; + private transient TreeMap prewrites = null; + private transient TreeMap commits = null; + private transient BlockingQueue committedEvents = null; + private transient OutputCollector outputCollector; + + private transient boolean running = true; + private transient ExecutorService executorService; + private transient TiDBSourceMetrics sourceMetrics; + + /** offset state. */ + private transient ListState offsetState; + + private static final long CLOSE_TIMEOUT = 30L; + + public TiKVRichParallelSourceFunction( + TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema, + TiKVChangeEventDeserializationSchema changeEventDeserializationSchema, + TiConfiguration tiConf, + StartupMode startupMode, + String database, + String tableName) { + this.snapshotEventDeserializationSchema = snapshotEventDeserializationSchema; + this.changeEventDeserializationSchema = changeEventDeserializationSchema; + this.tiConf = tiConf; + this.startupMode = startupMode; + this.database = database; + this.tableName = tableName; + } + + @Override + public void open(final Configuration config) throws Exception { + super.open(config); + session = TiSession.create(tiConf); + TiTableInfo tableInfo = session.getCatalog().getTable(database, tableName); + if (tableInfo == null) { + throw new RuntimeException( + String.format("Table %s.%s does not exist.", database, tableName)); + } + long tableId = tableInfo.getId(); + keyRange = + TableKeyRangeUtils.getTableKeyRange( + tableId, + getRuntimeContext().getTaskInfo().getNumberOfParallelSubtasks(), + getRuntimeContext().getTaskInfo().getIndexOfThisSubtask()); + cdcClient = new CDCClient(session, keyRange); + prewrites = new TreeMap<>(); + commits = new TreeMap<>(); + // cdc event will lose if pull cdc event block when region split + // use queue to separate read and write to ensure pull event unblock. + // since sink jdbc is slow, 5000W queue size may be safe size. + committedEvents = new LinkedBlockingQueue<>(); + outputCollector = new OutputCollector<>(); + resolvedTs = + startupMode == StartupMode.INITIAL + ? SNAPSHOT_VERSION_EPOCH + : STREAMING_VERSION_START_EPOCH; + ThreadFactory threadFactory = + new ThreadFactoryBuilder() + .setNameFormat( + "tidb-source-function-" + + getRuntimeContext().getTaskInfo().getIndexOfThisSubtask()) + .build(); + executorService = Executors.newSingleThreadExecutor(threadFactory); + final MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); + sourceMetrics = new TiDBSourceMetrics(metricGroup); + sourceMetrics.registerMetrics(); + } + + @Override + public void run(final SourceFunction.SourceContext ctx) throws Exception { + sourceContext = ctx; + outputCollector.context = sourceContext; + + if (startupMode == StartupMode.INITIAL) { + synchronized (sourceContext.getCheckpointLock()) { + readSnapshotEvents(); + } + } else { + LOG.info("Skip snapshot read"); + resolvedTs = session.getTimestamp().getVersion(); + } + + LOG.info("start read change events"); + cdcClient.start(resolvedTs); + running = true; + readChangeEvents(); + } + + private void handleRow(final Cdcpb.Event.Row row) { + if (!TableKeyRangeUtils.isRecordKey(row.getKey().toByteArray())) { + // Don't handle index key for now + return; + } + LOG.debug("binlog record, type: {}, data: {}", row.getType(), row); + switch (row.getType()) { + case COMMITTED: + prewrites.put(RowKeyWithTs.ofStart(row), row); + commits.put(RowKeyWithTs.ofCommit(row), row); + break; + case COMMIT: + commits.put(RowKeyWithTs.ofCommit(row), row); + break; + case PREWRITE: + prewrites.put(RowKeyWithTs.ofStart(row), row); + break; + case ROLLBACK: + prewrites.remove(RowKeyWithTs.ofStart(row)); + break; + default: + LOG.warn("Unsupported row type:" + row.getType()); + } + } + + protected void readSnapshotEvents() throws Exception { + LOG.info("read snapshot events"); + try (KVClient scanClient = session.createKVClient()) { + long startTs = session.getTimestamp().getVersion(); + ByteString start = keyRange.getStart(); + while (true) { + final List segment = + scanClient.scan(start, keyRange.getEnd(), startTs); + + if (segment.isEmpty()) { + resolvedTs = startTs; + break; + } + + for (final Kvrpcpb.KvPair pair : segment) { + if (TableKeyRangeUtils.isRecordKey(pair.getKey().toByteArray())) { + snapshotEventDeserializationSchema.deserialize(pair, outputCollector); + reportMetrics(0L, startTs); + } + } + + start = + RowKey.toRawKey(segment.get(segment.size() - 1).getKey()) + .next() + .toByteString(); + } + } + } + + protected void readChangeEvents() throws Exception { + LOG.info("read change event from resolvedTs:{}", resolvedTs); + // child thread to sink committed rows. + executorService.execute( + () -> { + while (running) { + try { + Cdcpb.Event.Row committedRow = committedEvents.take(); + changeEventDeserializationSchema.deserialize( + committedRow, outputCollector); + // use startTs of row as messageTs, use commitTs of row as fetchTs + reportMetrics(committedRow.getStartTs(), committedRow.getCommitTs()); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { + for (int i = 0; i < 1000; i++) { + final Cdcpb.Event.Row row = cdcClient.get(); + if (row == null) { + break; + } + handleRow(row); + } + resolvedTs = cdcClient.getMaxResolvedTs(); + if (commits.size() > 0) { + flushRows(resolvedTs); + } + } + } + + protected void flushRows(final long timestamp) throws Exception { + Preconditions.checkState(sourceContext != null, "sourceContext shouldn't be null"); + synchronized (sourceContext) { + while (!commits.isEmpty() && commits.firstKey().timestamp <= timestamp) { + final Cdcpb.Event.Row commitRow = commits.pollFirstEntry().getValue(); + final Cdcpb.Event.Row prewriteRow = + prewrites.remove(RowKeyWithTs.ofStart(commitRow)); + // if pull cdc event block when region split, cdc event will lose. + committedEvents.offer(prewriteRow); + } + } + } + + @Override + public void cancel() { + try { + running = false; + if (cdcClient != null) { + cdcClient.close(); + } + if (executorService != null) { + executorService.shutdown(); + if (!executorService.awaitTermination(CLOSE_TIMEOUT, TimeUnit.SECONDS)) { + LOG.warn( + "Failed to close the tidb source function in {} seconds.", + CLOSE_TIMEOUT); + } + } + } catch (final Exception e) { + LOG.error("Unable to close cdcClient", e); + } + } + + @Override + public void snapshotState(final FunctionSnapshotContext context) throws Exception { + LOG.info( + "snapshotState checkpoint: {} at resolvedTs: {}", + context.getCheckpointId(), + resolvedTs); + flushRows(resolvedTs); + offsetState.clear(); + offsetState.add(resolvedTs); + } + + @Override + public void initializeState(final FunctionInitializationContext context) throws Exception { + LOG.info("initialize checkpoint"); + offsetState = + context.getOperatorStateStore() + .getListState( + new ListStateDescriptor<>( + "resolvedTsState", LongSerializer.INSTANCE)); + if (context.isRestored()) { + for (final Long offset : offsetState.get()) { + resolvedTs = offset; + LOG.info("Restore State from resolvedTs: {}", resolvedTs); + return; + } + } else { + resolvedTs = 0; + LOG.info("Initialize State from resolvedTs: {}", resolvedTs); + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + // do nothing + } + + @Override + public TypeInformation getProducedType() { + return snapshotEventDeserializationSchema.getProducedType(); + } + + // --------------------------------------- + // static Utils classes + // --------------------------------------- + private static class RowKeyWithTs implements Comparable { + private final long timestamp; + private final RowKey rowKey; + + private RowKeyWithTs(final long timestamp, final RowKey rowKey) { + this.timestamp = timestamp; + this.rowKey = rowKey; + } + + private RowKeyWithTs(final long timestamp, final byte[] key) { + this(timestamp, RowKey.decode(key)); + } + + @Override + public int compareTo(final RowKeyWithTs that) { + int res = Long.compare(this.timestamp, that.timestamp); + if (res == 0) { + res = Long.compare(this.rowKey.getTableId(), that.rowKey.getTableId()); + } + if (res == 0) { + res = Long.compare(this.rowKey.getHandle(), that.rowKey.getHandle()); + } + return res; + } + + @Override + public int hashCode() { + return Objects.hash(this.timestamp, this.rowKey.getTableId(), this.rowKey.getHandle()); + } + + @Override + public boolean equals(final Object thatObj) { + if (thatObj instanceof RowKeyWithTs) { + final RowKeyWithTs that = (RowKeyWithTs) thatObj; + return this.timestamp == that.timestamp && this.rowKey.equals(that.rowKey); + } + return false; + } + + static RowKeyWithTs ofStart(final Cdcpb.Event.Row row) { + return new RowKeyWithTs(row.getStartTs(), row.getKey().toByteArray()); + } + + static RowKeyWithTs ofCommit(final Cdcpb.Event.Row row) { + return new RowKeyWithTs(row.getCommitTs(), row.getKey().toByteArray()); + } + } + + private static class OutputCollector implements Collector { + + private SourceFunction.SourceContext context; + + @Override + public void collect(T record) { + context.collect(record); + } + + @Override + public void close() { + // do nothing + } + } + + private void reportMetrics(long messageTs, long fetchTs) { + long now = System.currentTimeMillis(); + // record the latest process time + sourceMetrics.recordProcessTime(now); + long messageTimestamp = TiTimestamp.extractPhysical(messageTs); + long fetchTimestamp = TiTimestamp.extractPhysical(fetchTs); + if (messageTimestamp > 0L) { + // report fetch delay + if (fetchTimestamp >= messageTimestamp) { + sourceMetrics.recordFetchDelay(fetchTimestamp - messageTimestamp); + } + // report emit delay + sourceMetrics.recordEmitDelay(now - messageTimestamp); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java index e840bee0790..7a59d69584a 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java @@ -21,14 +21,14 @@ import org.apache.flink.configuration.Configuration; import org.apache.flink.table.api.DataTypes; import org.apache.flink.table.api.Schema; -import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.CatalogTableAdapter; import org.apache.flink.table.catalog.Column; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.catalog.UniqueConstraint; import org.apache.flink.table.connector.source.DynamicTableSource; -import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.FactoryUtilAdapter; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; @@ -206,11 +206,11 @@ private Map getAllOptions() { private static DynamicTableSource createTableSource( ResolvedSchema schema, Map options) { - return FactoryUtil.createTableSource( + return FactoryUtilAdapter.createTableSource( null, ObjectIdentifier.of("default", "default", "t1"), new ResolvedCatalogTable( - CatalogTable.of( + CatalogTableAdapter.of( Schema.newBuilder().fromResolvedSchema(schema).build(), "mock source", new ArrayList<>(), From e5808cb04c31428fe22732582a9891a674c09eab Mon Sep 17 00:00:00 2001 From: wulin Date: Mon, 17 Mar 2025 09:52:07 +0800 Subject: [PATCH 4/5] [FLINK-35459] Use Incremental Source Framework in Flink CDC TiKV Source Connector [FLINK-35459] add splitReader & scanFetch test. [FLINK-35459] add stream fetch. [FLINK-35459] fixed TiDBTestBase version fixed conflict fixd git fixed checkstyle fixed rat error. fixed ut test add .vscode ide is more popular in AI ide fixed spotless fixed split key method error. fixed CatalogTableAdapter error fixed FactoryUtilAdapter fixed reader fixed ut fixed ut error fixed spotless:apply fixed e2e fixed test fixed test fixed review fixed alltype s test fixed all fail ut --- .gitignore | 1 + .../docs/connectors/flink-sources/tidb-cdc.md | 596 +++++------------ .../docs/connectors/flink-sources/tidb-cdc.md | 604 +++++------------- .../tidb/TiDBEventMetadataProvider.java | 7 +- .../connector/tidb/TiDBPartition.java | 2 +- .../tidb/TiKVRichParallelSourceFunction.java | 420 ------------ .../connectors/tidb/source/TiDBDialect.java | 15 +- .../source/config/TiDBConnectorConfig.java | 5 +- .../source/converter/TiDBValueConverters.java | 17 +- .../tidb/source/fetch/EventSourceReader.java | 195 +++--- .../fetch/TiDBSourceFetchTaskContext.java | 9 +- .../source/fetch/TiDBStreamFetchTask.java | 70 +- .../source/offset/EventOffsetContext.java | 2 +- .../TiDBDeserializationConverterFactory.java | 24 + .../tidb/table/TiDBTableFactory.java | 8 + .../tidb/table/TiDBTableSource.java | 7 +- .../cdc/connectors/tidb/utils/TiDBUtils.java | 7 +- .../tikv/common/codec/TiDBRowV2Decoder.java | 66 ++ .../config/TiDBConnectorConfigTest.java | 49 ++ .../source/fetch/TiDBStreamLifecycleTest.java | 81 +++ .../offset/EventOffsetCheckpointTest.java | 66 ++ .../reader/TiDBStreamSplitReaderTest.java | 14 +- .../tidb/table/TiDBConnectorITCase.java | 14 +- .../table/TiDBTableSourceFactoryTest.java | 8 - .../tidb/table/utils/UriHostMappingTest.java | 16 +- .../flink-sql-connector-tidb-cdc/pom.xml | 56 +- .../cdc/common/test/utils/JdbcProxy.java | 5 +- .../cdc/connectors/tests/TiDBE2eITCase.java | 4 + .../utils/FlinkContainerTestEnvironment.java | 58 ++ 29 files changed, 973 insertions(+), 1453 deletions(-) delete mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/tikv/common/codec/TiDBRowV2Decoder.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfigTest.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamLifecycleTest.java create mode 100644 flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetCheckpointTest.java diff --git a/.gitignore b/.gitignore index b6ef706a748..5b8d8691525 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ scalastyle-output.xml .classpath .idea/* +.vscode/* .metadata .settings .project diff --git a/docs/content.zh/docs/connectors/flink-sources/tidb-cdc.md b/docs/content.zh/docs/connectors/flink-sources/tidb-cdc.md index 458a5d4cf45..29070fd1ddd 100644 --- a/docs/content.zh/docs/connectors/flink-sources/tidb-cdc.md +++ b/docs/content.zh/docs/connectors/flink-sources/tidb-cdc.md @@ -26,478 +26,212 @@ under the License. # TiDB CDC Connector -The TiDB CDC connector allows for reading snapshot data and incremental data from TiDB database. This document describes how to setup the TiDB CDC connector to run SQL queries against TiDB databases. +TiDB CDC Connector 首先读取 TiDB 表的一致性快照,随后通过 TiKV CDC 持续读取行级变更。Connector 支持 Flink SQL/Table API 和统一的 DataStream Source API。 -Dependencies ------------- - -In order to setup the TiDB CDC connector, the following table provides dependency information for both projects using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles. +依赖项 +------ -### Maven dependency +### Maven 依赖 {{< artifact flink-connector-tidb-cdc >}} ### SQL Client JAR -```Download link is available only for stable releases.``` +下载 [flink-sql-connector-tidb-cdc](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc),并将 JAR 放入 `/lib/`。 -Download [flink-sql-connector-tidb-cdc](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc) and put it under `/lib/`. +下载链接仅适用于已发布版本。Connector 构件的 Flink 主版本必须与运行作业的 Flink 主版本一致。 -**Note:** Refer to [flink-sql-connector-tidb-cdc](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc), more released versions will be available in the Maven central warehouse. +创建 TiDB CDC 表 +----------------- -How to create a TiDB CDC table ----------------- +Connector 使用两类连接地址: -The TiDB CDC table can be defined as following: +- `hostname` 和 `port` 连接 TiDB SQL 服务,用于发现表结构和读取快照。 +- `pd-addresses` 连接 PD 和 TiKV,用于读取增量变更。 -```sql --- checkpoint every 3000 milliseconds -Flink SQL> SET 'execution.checkpointing.interval' = '3s'; - --- register a TiDB table 'orders' in Flink SQL -Flink SQL> CREATE TABLE orders ( - order_id INT, - order_date TIMESTAMP(3), - customer_name STRING, - price DECIMAL(10, 5), - product_id INT, - order_status BOOLEAN, - PRIMARY KEY(order_id) NOT ENFORCED - ) WITH ( - 'connector' = 'tidb-cdc', - 'tikv.grpc.timeout_in_ms' = '20000', - 'pd-addresses' = 'localhost:2379', - 'database-name' = 'mydb', - 'table-name' = 'orders' -); - --- read snapshot and binlogs from orders table -Flink SQL> SELECT * FROM orders; -``` +PD 和 TiKV 对外发布的所有地址都必须能被每个 TaskManager 访问。如果集群发布的是 Flink 无法直接解析的内网主机名或地址,请配置 `host-mapping`。 -Connector Options ----------------- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OptionRequiredDefaultTypeDescription
connectorrequired(none)StringSpecify what connector to use, here should be 'tidb-cdc'.
database-namerequired(none)StringDatabase name of the TiDB server to monitor.
table-namerequired(none)StringTable name of the TiDB database to monitor.
scan.startup.modeoptionalinitialStringOptional startup mode for TiDB CDC consumer, valid enumerations are "initial" and "latest-offset".
pd-addressesrequired(none)StringTiKV cluster's PD address.
host-mappingoptional(none)StringTiKV cluster's host-mapping used to configure public IP and intranet IP mapping. When the TiKV cluster is running on the intranet, you can map a set of intranet IPs to public IPs for an outside Flink cluster to access. The format is {Intranet IP1}:{Public IP1};{Intranet IP2}:{Public IP2}, e.g. 192.168.0.2:8.8.8.8;192.168.0.3:9.9.9.9.
tikv.grpc.timeout_in_msoptional(none)LongTiKV GRPC timeout in ms.
tikv.grpc.scan_timeout_in_msoptional(none)LongTiKV GRPC scan timeout in ms.
tikv.batch_get_concurrencyoptional20IntegerTiKV GRPC batch get concurrency.
tikv.*optional(none)StringPass-through TiDB client's properties.
-
- -Available Metadata ----------------- - -The following format metadata can be exposed as read-only (VIRTUAL) columns in a table definition. - - - - - - - - - - - - - - - - - - - - - - - - - - -
KeyDataTypeDescription
table_nameSTRING NOT NULLName of the table that contain the row.
database_nameSTRING NOT NULLName of the database that contain the row.
op_tsTIMESTAMP_LTZ(3) NOT NULLIt indicates the time that the change was made in the database.
If the record is read from snapshot of the table instead of the binlog, the value is always 0.
- -The extended CREATE TABLE example demonstrates the syntax for exposing these metadata fields: ```sql -CREATE TABLE products ( - db_name STRING METADATA FROM 'database_name' VIRTUAL, - table_name STRING METADATA FROM 'table_name' VIRTUAL, - operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL, +-- 定期执行 checkpoint,使 Source 在故障恢复后能够继续读取。 +SET 'execution.checkpointing.interval' = '3s'; + +CREATE TABLE orders ( order_id INT, - order_date TIMESTAMP(0), + order_date TIMESTAMP(3), customer_name STRING, price DECIMAL(10, 5), product_id INT, order_status BOOLEAN, - PRIMARY KEY(order_id) NOT ENFORCED + PRIMARY KEY (order_id) NOT ENFORCED ) WITH ( 'connector' = 'tidb-cdc', - 'tikv.grpc.timeout_in_ms' = '20000', - 'pd-addresses' = 'localhost:2379', + 'hostname' = 'tidb.example.com', + 'port' = '4000', + 'username' = 'root', + 'password' = '', + 'pd-addresses' = 'pd-0.example.com:2379,pd-1.example.com:2379', 'database-name' = 'mydb', - 'table-name' = 'orders' + 'table-name' = 'orders', + 'scan.startup.mode' = 'initial' ); + +SELECT * FROM orders; ``` -Features +{{< hint info >}} +如果 TiDB 表存在主键,请在 Flink 表中定义相同的主键。下游算子和 Sink 可以借助主键正确处理更新和删除。当前 Connector 为一张捕获表创建一个流读取器,因此每个 Flink SQL Source 表只应对应一张 TiDB 表。 +{{< /hint >}} + +Connector 参数 +-------------- + +| 参数 | 是否必填 | 默认值 | 类型 | 说明 | +| --- | --- | --- | --- | --- | +| `connector` | 是 | 无 | String | 必须为 `tidb-cdc`。 | +| `hostname` | 是 | 无 | String | TiDB SQL 服务的主机名或 IP 地址。 | +| `port` | 是 | `4000` | Integer | TiDB SQL 服务端口。配置对象虽然提供默认值,但 Table Factory 当前将该参数声明为必填。 | +| `username` | 是 | 无 | String | 建立 TiDB JDBC 连接时使用的用户名。 | +| `password` | 是 | 无 | String | 建立 TiDB JDBC 连接时使用的密码。账号没有密码时也需要显式配置空字符串。 | +| `pd-addresses` | 是 | 无 | String | TiKV Client 使用的 PD 地址,多个地址以逗号分隔。 | +| `database-name` | 是 | 无 | String | 捕获表所在的数据库。 | +| `table-name` | 是 | 无 | String | 要捕获的单张表名称。 | +| `scan.startup.mode` | 否 | `initial` | String | 启动模式:`initial`、`snapshot`、`latest-offset` 或 `timestamp`。 | +| `scan.startup.timestamp-millis` | 条件必填 | 无 | Long | Epoch 毫秒时间戳;`scan.startup.mode` 为 `timestamp` 时必须配置。 | +| `server-time-zone` | 否 | `UTC` | String | 转换时间类型时使用的 TiDB 会话时区。 | +| `connect.timeout` | 否 | `30s` | Duration | 建立 TiDB JDBC 连接的最长等待时间。 | +| `connect.max-retries` | 否 | `3` | Integer | 建立 TiDB JDBC 连接的最大重试次数。 | +| `connection.pool.size` | 否 | `20` | Integer | JDBC 连接池大小。 | +| `jdbc.driver` | 否 | `com.mysql.cj.jdbc.Driver` | String | 连接 TiDB 使用的 JDBC Driver 类名。 | +| `scan.incremental.snapshot.enabled` | 否 | `true` | Boolean | Factory 可以接收该参数。当前 SQL 运行路径始终构建增量 Source,不会根据该值选择其他实现。 | +| `scan.incremental.snapshot.chunk.size` | 否 | `8096` | Integer | 每个快照 Chunk 的近似行数。 | +| `scan.snapshot.fetch.size` | 否 | `1024` | Integer | 单次快照轮询最多拉取的行数。 | +| `chunk-meta.group.size` | 否 | `1000` | Integer | 每组 Chunk 元数据包含的条目数。 | +| `scan.incremental.snapshot.chunk.key-column` | 否 | 主键第一列 | String | 切分快照 Chunk 使用的列,建议选择可比较且分布均匀的列。 | +| `chunk-key.even-distribution.factor.upper-bound` | 否 | `1000.0` | Double | 判断 Chunk Key 是否均匀分布的上界。 | +| `chunk-key.even-distribution.factor.lower-bound` | 否 | `0.05` | Double | 判断 Chunk Key 是否均匀分布的下界。 | +| `host-mapping` | 否 | 无 | String | 将 TiKV 发布的主机映射为 Flink 可访问的地址,格式为 `内网主机:外部主机;内网主机2:外部主机2`,端口保持不变。 | +| `heartbeat.interval.ms` | 否 | `30s` | Duration | 配置的心跳间隔。当前 TiDB SQL 运行路径会保存该值,但尚未把心跳配置传入 Source Builder。 | +| `table-list` | 否 | 无 | String | Factory 可以接收该参数,但当前 SQL 运行路径通过 `database-name` 和 `table-name` 构造捕获列表,不能用它替代这两个参数。 | + +`jdbc.properties.*`、`debezium.*` 和 `tikv.*` 前缀的参数都能通过表参数校验。当前 SQL 运行路径不会把任意 `tikv.*` 参数复制到 `TiConfiguration`,因此这些参数目前不会产生运行时效果。 + +启动模式 -------- -### Exactly-Once Processing -The TiDB CDC connector is a Flink Source connector which will read database snapshot first and then continues to read change events with **exactly-once processing** even failures happen. +- `initial`(默认):读取表快照,然后从对应变更位置继续读取增量事件。 +- `snapshot`:只读取快照,不进入增量读取阶段。 +- `latest-offset`:跳过已有数据,只读取 Source 启动之后产生的变更。 +- `timestamp`:从 `scan.startup.timestamp-millis` 指定的时间开始读取变更。 + +Source Offset 保存在 Flink Checkpoint 状态中。生产环境应开启 Checkpoint,以便 Source 在恢复时从已完成的 Checkpoint 继续读取。 + +Changelog 语义 +-------------- + +TiDB Table Source 声明完整的 Flink Changelog: -### Startup Reading Position +| RowKind | 含义 | +| --- | --- | +| `+I` | 插入数据或快照数据 | +| `-U` | 更新前的数据 | +| `+U` | 更新后的数据 | +| `-D` | 被删除的数据 | -The config option `scan.startup.mode` specifies the startup mode for TiDB CDC consumer. The valid enumerations are: +TiKV 必须为更新和删除事件提供旧行数据。定义了主键的下游 Sink 可以利用这些事件维护最新表状态。 -- `initial` (default): Takes a snapshot of structure and data of captured tables; useful if you want fetch a complete representation of the data from the captured tables. -- `latest-offset`: Takes a snapshot of the structure of captured tables only; useful if only changes happening from now onwards should be fetched. +可用元数据 +---------- -### Multi Thread Reading +元数据列是只读列,必须使用 `METADATA ... VIRTUAL` 声明。 -The TiDB CDC source can work in parallel reading, because there is multiple tasks can receive change events. +| Key | 数据类型 | 说明 | +| --- | --- | --- | +| `table_name` | `STRING NOT NULL` | 源表名称。 | +| `database_name` | `STRING NOT NULL` | 源数据库名称。 | +| `op_ts` | `TIMESTAMP_LTZ(3) NOT NULL` | 数据库变更时间;快照记录的时间戳为 `0`。 | +| `row_kind` | `STRING NOT NULL` | Flink RowKind:`+I`、`-U`、`+U` 或 `-D`。 | -### DataStream Source +```sql +CREATE TABLE products ( + db_name STRING METADATA FROM 'database_name' VIRTUAL, + source_table STRING METADATA FROM 'table_name' VIRTUAL, + operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL, + change_kind STRING METADATA FROM 'row_kind' VIRTUAL, + id BIGINT, + name STRING, + weight DECIMAL(10, 3), + PRIMARY KEY (id) NOT ENFORCED +) WITH ( + 'connector' = 'tidb-cdc', + 'hostname' = 'tidb.example.com', + 'port' = '4000', + 'username' = 'root', + 'password' = '', + 'pd-addresses' = 'pd.example.com:2379', + 'database-name' = 'inventory', + 'table-name' = 'products' +); +``` -The TiDB CDC connector can also be a DataStream source. You can create a SourceFunction as the following shows: +DataStream Source +----------------- -### DataStream Source +DataStream API 应使用 `TiDBSourceBuilder.TiDBIncrementalSource` 和 `StreamExecutionEnvironment#fromSource`。旧文档中的 `SourceFunction` 版 `TiDBSource` API 已不属于当前 Connector。 ```java -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.source.TiDBSourceBuilder; +import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.util.Collector; - -import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions; -import org.apache.flink.cdc.connectors.tidb.TiDBSource; -import org.apache.flink.cdc.connectors.tidb.TiKVChangeEventDeserializationSchema; -import org.apache.flink.cdc.connectors.tidb.TiKVSnapshotEventDeserializationSchema; -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Kvrpcpb; - -import java.util.HashMap; - -public class TiDBSourceExample { - - public static void main(String[] args) throws Exception { - - SourceFunction tidbSource = - TiDBSource.builder() - .database("mydb") // set captured database - .tableName("products") // set captured table - .tiConf( - TDBSourceOptions.getTiConfiguration( - "localhost:2399", new HashMap<>())) - .snapshotEventDeserializer( - new TiKVSnapshotEventDeserializationSchema() { - @Override - public void deserialize( - Kvrpcpb.KvPair record, Collector out) - throws Exception { - out.collect(record.toString()); - } - - @Override - public TypeInformation getProducedType() { - return BasicTypeInfo.STRING_TYPE_INFO; - } - }) - .changeEventDeserializer( - new TiKVChangeEventDeserializationSchema() { - @Override - public void deserialize( - Cdcpb.Event.Row record, Collector out) - throws Exception { - out.collect(record.toString()); - } - - @Override - public TypeInformation getProducedType() { - return BasicTypeInfo.STRING_TYPE_INFO; - } - }) - .build(); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - // enable checkpoint - env.enableCheckpointing(3000); - env.addSource(tidbSource).print().setParallelism(1); +JdbcIncrementalSource source = + TiDBSourceBuilder.TiDBIncrementalSource.builder() + .hostname("tidb.example.com") + .port(4000) + .username("root") + .password("") + .pdAddresses("pd.example.com:2379") + .databaseList("inventory") + .tableList("inventory.products") + .deserializer(new JsonDebeziumDeserializationSchema()) + .build(); - env.execute("Print TiDB Snapshot + Binlog"); - } -} +StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); +env.enableCheckpointing(3000L); +env.fromSource(source, WatermarkStrategy.noWatermarks(), "TiDB CDC") + .print(); +env.execute("TiDB CDC example"); ``` -Data Type Mapping ----------------- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TiDB typeFlink SQL typeNOTE
TINYINTTINYINT
- SMALLINT
- TINYINT UNSIGNED
SMALLINT
- INT
- MEDIUMINT
- SMALLINT UNSIGNED
INT
- BIGINT
- INT UNSIGNED
BIGINT
BIGINT UNSIGNEDDECIMAL(20, 0)
- FLOAT
-
FLOAT
- REAL
- DOUBLE -
DOUBLE
- NUMERIC(p, s)
- DECIMAL(p, s)
- where p <= 38
-
DECIMAL(p, s)
- NUMERIC(p, s)
- DECIMAL(p, s)
- where 38 < p <= 65
-
STRINGThe precision for DECIMAL data type is up to 65 in TiDB, but the precision for DECIMAL is limited to 38 in Flink. - So if you define a decimal column whose precision is greater than 38, you should map it to STRING to avoid precision loss.
- BOOLEAN
- TINYINT(1)
- BIT(1) -
BOOLEAN
DATEDATE
TIME [(p)]TIME [(p)]
TIMESTAMP [(p)]TIMESTAMP_LTZ [(p)]
DATETIME [(p)]TIMESTAMP [(p)] -
- CHAR(n) - CHAR(n)
- VARCHAR(n) - VARCHAR(n)
- BIT(n) - BINARY(⌈n/8⌉)
- BINARY(n) - BINARY(n)
- TINYTEXT
- TEXT
- MEDIUMTEXT
- LONGTEXT
-
STRING
- TINYBLOB
- BLOB
- MEDIUMBLOB
- LONGBLOB
-
BYTESCurrently, for BLOB data type in TiDB, only the blob whose length isn't greater than 2,147,483,647(2 ** 31 - 1) is supported.
- YEAR - INT
- ENUM - STRING
- JSON - STRINGThe JSON data type will be converted into STRING with JSON format in Flink.
- SET - ARRAY<STRING>As the SET data type in TiDB is a string object that can have zero or more values, - it should always be mapped to an array of string -
-
+数据类型映射 +------------ + +| TiDB 类型 | Flink SQL 类型 | 说明 | +| --- | --- | --- | +| `TINYINT` | `TINYINT` | | +| `TINYINT UNSIGNED`、`SMALLINT` | `SMALLINT` | | +| `SMALLINT UNSIGNED`、`MEDIUMINT`、`MEDIUMINT UNSIGNED`、`INT` | `INT` | | +| `INT UNSIGNED`、`BIGINT` | `BIGINT` | | +| `BIGINT UNSIGNED` | `DECIMAL(20, 0)` | | +| `FLOAT` | `FLOAT` | | +| `REAL`、`DOUBLE` | `DOUBLE` | | +| `NUMERIC(p,s)`、`DECIMAL(p,s)`,且 `p <= 38` | `DECIMAL(p,s)` | | +| `NUMERIC(p,s)`、`DECIMAL(p,s)`,且 `38 < p <= 65` | `STRING` | Flink Decimal 最多支持 38 位精度,使用 `STRING` 可以避免精度丢失。 | +| `BOOLEAN`、`TINYINT(1)`、`BIT(1)` | `BOOLEAN` | | +| `DATE` | `DATE` | | +| `TIME(p)` | `TIME(p)` | | +| `TIMESTAMP(p)` | `TIMESTAMP_LTZ(p)` | 根据 `server-time-zone` 解释。 | +| `DATETIME(p)` | `TIMESTAMP(p)` | | +| `CHAR(n)` | `CHAR(n)` | | +| `VARCHAR(n)` | `VARCHAR(n)` | | +| `BIT(n)` | `BINARY(ceil(n / 8))` | | +| `BINARY(n)` | `BINARY(n)` | | +| `TINYTEXT`、`TEXT`、`MEDIUMTEXT`、`LONGTEXT` | `STRING` | | +| `TINYBLOB`、`BLOB`、`MEDIUMBLOB`、`LONGBLOB` | `BYTES` | 不支持超过 Java 数组最大长度的值。 | +| `YEAR` | `INT` | | +| `ENUM` | `STRING` | | +| `SET` | `ARRAY` | 将 SET 值拆分为字符串元素。 | +| `JSON` | `STRING` | 序列化为 JSON 文本。 | {{< top >}} diff --git a/docs/content/docs/connectors/flink-sources/tidb-cdc.md b/docs/content/docs/connectors/flink-sources/tidb-cdc.md index 0e7a78b450f..945d9855fa0 100644 --- a/docs/content/docs/connectors/flink-sources/tidb-cdc.md +++ b/docs/content/docs/connectors/flink-sources/tidb-cdc.md @@ -26,478 +26,212 @@ under the License. # TiDB CDC Connector -The TiDB CDC connector allows for reading snapshot data and incremental data from TiDB database. This document describes how to setup the TiDB CDC connector to run SQL queries against TiDB databases. +The TiDB CDC connector reads a consistent snapshot of a TiDB table and then continuously reads row-level changes from TiKV CDC. It supports the Flink SQL/Table API and the unified DataStream Source API. Dependencies ------------ -In order to setup the TiDB CDC connector, the following table provides dependency information for both projects using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles. - ### Maven dependency {{< artifact flink-connector-tidb-cdc >}} ### SQL Client JAR -```Download link is available only for stable releases.``` +Download [flink-sql-connector-tidb-cdc](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc) and place the JAR in `/lib/`. -Download [flink-sql-connector-tidb-cdc](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc) and put it under `/lib/`. +The download link is available only for released versions. Use a connector artifact built for the Flink major version running the job. -**Note:** Refer to [flink-sql-connector-tidb-cdc](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc), more released versions will be available in the Maven central warehouse. +Create a TiDB CDC table +----------------------- -How to create a TiDB CDC table ----------------- +The connector uses two endpoints: -The TiDB CDC table can be defined as following: +- `hostname` and `port` connect to the TiDB SQL endpoint for schema discovery and snapshot reads. +- `pd-addresses` connects to PD and TiKV for incremental change reads. -```sql --- checkpoint every 3000 milliseconds -Flink SQL> SET 'execution.checkpointing.interval' = '3s'; - --- register a TiDB table 'orders' in Flink SQL -Flink SQL> CREATE TABLE orders ( - order_id INT, - order_date TIMESTAMP(3), - customer_name STRING, - price DECIMAL(10, 5), - product_id INT, - order_status BOOLEAN, - PRIMARY KEY(order_id) NOT ENFORCED - ) WITH ( - 'connector' = 'tidb-cdc', - 'tikv.grpc.timeout_in_ms' = '20000', - 'pd-addresses' = 'localhost:2379', - 'database-name' = 'mydb', - 'table-name' = 'orders' -); - --- read snapshot and binlogs from orders table -Flink SQL> SELECT * FROM orders; -``` +All addresses advertised by PD and TiKV must be reachable from every TaskManager. Use `host-mapping` when the cluster advertises internal host names or addresses that Flink cannot resolve directly. -Connector Options ----------------- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OptionRequiredDefaultTypeDescription
connectorrequired(none)StringSpecify what connector to use, here should be 'tidb-cdc'.
database-namerequired(none)StringDatabase name of the TiDB server to monitor.
table-namerequired(none)StringTable name of the TiDB database to monitor.
scan.startup.modeoptionalinitialStringOptional startup mode for TiDB CDC consumer, valid enumerations are "initial" and "latest-offset".
pd-addressesrequired(none)StringTiKV cluster's PD address.
host-mappingoptional(none)StringTiKV cluster's host-mapping used to configure public IP and intranet IP mapping. When the TiKV cluster is running on the intranet, you can map a set of intranet IPs to public IPs for an outside Flink cluster to access. The format is {Intranet IP1}:{Public IP1};{Intranet IP2}:{Public IP2}, e.g. 192.168.0.2:8.8.8.8;192.168.0.3:9.9.9.9.
tikv.grpc.timeout_in_msoptional(none)LongTiKV GRPC timeout in ms.
tikv.grpc.scan_timeout_in_msoptional(none)LongTiKV GRPC scan timeout in ms.
tikv.batch_get_concurrencyoptional20IntegerTiKV GRPC batch get concurrency.
tikv.*optional(none)StringPass-through TiDB client's properties.
-
- -Available Metadata ----------------- - -The following format metadata can be exposed as read-only (VIRTUAL) columns in a table definition. - - - - - - - - - - - - - - - - - - - - - - - - - - -
KeyDataTypeDescription
table_nameSTRING NOT NULLName of the table that contain the row.
database_nameSTRING NOT NULLName of the database that contain the row.
op_tsTIMESTAMP_LTZ(3) NOT NULLIt indicates the time that the change was made in the database.
If the record is read from snapshot of the table instead of the binlog, the value is always 0.
- -The extended CREATE TABLE example demonstrates the syntax for exposing these metadata fields: ```sql -CREATE TABLE products ( - db_name STRING METADATA FROM 'database_name' VIRTUAL, - table_name STRING METADATA FROM 'table_name' VIRTUAL, - operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL, +-- Checkpoint periodically so source progress can be recovered after a failure. +SET 'execution.checkpointing.interval' = '3s'; + +CREATE TABLE orders ( order_id INT, - order_date TIMESTAMP(0), + order_date TIMESTAMP(3), customer_name STRING, price DECIMAL(10, 5), product_id INT, order_status BOOLEAN, - PRIMARY KEY(order_id) NOT ENFORCED + PRIMARY KEY (order_id) NOT ENFORCED ) WITH ( 'connector' = 'tidb-cdc', - 'tikv.grpc.timeout_in_ms' = '20000', - 'pd-addresses' = 'localhost:2379', + 'hostname' = 'tidb.example.com', + 'port' = '4000', + 'username' = 'root', + 'password' = '', + 'pd-addresses' = 'pd-0.example.com:2379,pd-1.example.com:2379', 'database-name' = 'mydb', - 'table-name' = 'orders' + 'table-name' = 'orders', + 'scan.startup.mode' = 'initial' ); -``` - -Features --------- -### Exactly-Once Processing - -The TiDB CDC connector is a Flink Source connector which will read database snapshot first and then continues to read change events with **exactly-once processing** even failures happen. - -### Startup Reading Position - -The config option `scan.startup.mode` specifies the startup mode for TiDB CDC consumer. The valid enumerations are: -- `initial` (default): Takes a snapshot of structure and data of captured tables; useful if you want fetch a complete representation of the data from the captured tables. -- `latest-offset`: Takes a snapshot of the structure of captured tables only; useful if only changes happening from now onwards should be fetched. - -### Multi Thread Reading +SELECT * FROM orders; +``` -The TiDB CDC source can work in parallel reading, because there is multiple tasks can receive change events. +{{< hint info >}} +Define a primary key in the Flink table whenever the TiDB table has one. The key lets downstream operators and sinks interpret updates and deletes correctly. The connector currently creates one stream reader for one captured table, so use one SQL source table per TiDB table. +{{< /hint >}} + +Connector options +----------------- + +| Option | Required | Default | Type | Description | +| --- | --- | --- | --- | --- | +| `connector` | Yes | (none) | String | Must be `tidb-cdc`. | +| `hostname` | Yes | (none) | String | Host name or IP address of the TiDB SQL endpoint. | +| `port` | Yes | `4000` | Integer | Port of the TiDB SQL endpoint. Although a default exists in the configuration object, the table factory requires this option. | +| `username` | Yes | (none) | String | User name used for the TiDB JDBC connection. | +| `password` | Yes | (none) | String | Password used for the TiDB JDBC connection. Specify an empty string when the account has no password. | +| `pd-addresses` | Yes | (none) | String | Comma-separated PD endpoints used by the TiKV client. | +| `database-name` | Yes | (none) | String | Database containing the captured table. | +| `table-name` | Yes | (none) | String | Name of the single table to capture. | +| `scan.startup.mode` | No | `initial` | String | Startup mode: `initial`, `snapshot`, `latest-offset`, or `timestamp`. | +| `scan.startup.timestamp-millis` | Conditional | (none) | Long | Start timestamp in epoch milliseconds. Required when `scan.startup.mode` is `timestamp`. | +| `server-time-zone` | No | `UTC` | String | TiDB session time zone used when converting temporal values. | +| `connect.timeout` | No | `30s` | Duration | Maximum time to wait when opening a TiDB JDBC connection. | +| `connect.max-retries` | No | `3` | Integer | Maximum number of retries when opening a TiDB JDBC connection. | +| `connection.pool.size` | No | `20` | Integer | JDBC connection pool size. | +| `jdbc.driver` | No | `com.mysql.cj.jdbc.Driver` | String | JDBC driver class used to connect to TiDB. | +| `scan.incremental.snapshot.enabled` | No | `true` | Boolean | Accepted by the factory. The current SQL runtime always builds the incremental source and does not branch on this value. | +| `scan.incremental.snapshot.chunk.size` | No | `8096` | Integer | Approximate number of rows in each snapshot chunk. | +| `scan.snapshot.fetch.size` | No | `1024` | Integer | Maximum number of rows fetched by one snapshot poll. | +| `chunk-meta.group.size` | No | `1000` | Integer | Number of chunk metadata entries in each metadata group. | +| `scan.incremental.snapshot.chunk.key-column` | No | first primary-key column | String | Column used to split snapshot chunks. Use a comparable, evenly distributed column when possible. | +| `chunk-key.even-distribution.factor.upper-bound` | No | `1000.0` | Double | Upper bound used to decide whether the chunk key is evenly distributed. | +| `chunk-key.even-distribution.factor.lower-bound` | No | `0.05` | Double | Lower bound used to decide whether the chunk key is evenly distributed. | +| `host-mapping` | No | (none) | String | Maps advertised TiKV hosts to addresses reachable by Flink. Format: `internalHost:externalHost;internalHost2:externalHost2`. The port is preserved. | +| `heartbeat.interval.ms` | No | `30s` | Duration | Configured heartbeat interval. The current TiDB SQL runtime stores this value but does not attach a heartbeat setting to the source builder. | +| `table-list` | No | (none) | String | Accepted by the factory, but the current SQL runtime builds the capture list from `database-name` and `table-name`; do not use it as a replacement for those options. | + +Options prefixed with `jdbc.properties.`, `debezium.`, and `tikv.` are accepted during table validation. In the current SQL runtime, arbitrary `tikv.*` keys are not copied into `TiConfiguration` and therefore have no runtime effect. + +Startup modes +------------- + +- `initial` (default): reads the table snapshot and then continues from the corresponding change position. +- `snapshot`: reads the snapshot only and stops before streaming changes. +- `latest-offset`: skips existing rows and reads changes produced after the source starts. +- `timestamp`: starts the change stream at `scan.startup.timestamp-millis`. + +The source offset is stored in Flink checkpoint state. Enable checkpointing in production so the source can resume from a completed checkpoint after recovery. + +Changelog semantics +------------------- + +The table source declares the full Flink changelog mode: + +| Row kind | Meaning | +| --- | --- | +| `+I` | Insert or snapshot row | +| `-U` | Value before an update | +| `+U` | Value after an update | +| `-D` | Deleted value | + +TiKV must provide the old row value for updates and deletes. Downstream sinks that have a primary key can use these events to maintain the latest table state. + +Available metadata +------------------ + +Metadata columns are read-only and must be declared with `METADATA ... VIRTUAL`. + +| Key | Data type | Description | +| --- | --- | --- | +| `table_name` | `STRING NOT NULL` | Name of the source table. | +| `database_name` | `STRING NOT NULL` | Name of the source database. | +| `op_ts` | `TIMESTAMP_LTZ(3) NOT NULL` | Database change timestamp. Snapshot records use timestamp `0`. | +| `row_kind` | `STRING NOT NULL` | Flink row kind: `+I`, `-U`, `+U`, or `-D`. | -### DataStream Source +```sql +CREATE TABLE products ( + db_name STRING METADATA FROM 'database_name' VIRTUAL, + source_table STRING METADATA FROM 'table_name' VIRTUAL, + operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL, + change_kind STRING METADATA FROM 'row_kind' VIRTUAL, + id BIGINT, + name STRING, + weight DECIMAL(10, 3), + PRIMARY KEY (id) NOT ENFORCED +) WITH ( + 'connector' = 'tidb-cdc', + 'hostname' = 'tidb.example.com', + 'port' = '4000', + 'username' = 'root', + 'password' = '', + 'pd-addresses' = 'pd.example.com:2379', + 'database-name' = 'inventory', + 'table-name' = 'products' +); +``` -The TiDB CDC connector can also be a DataStream source. You can create a SourceFunction as the following shows: +DataStream Source +----------------- -### DataStream Source +Use `TiDBSourceBuilder.TiDBIncrementalSource` with `StreamExecutionEnvironment#fromSource`. The legacy `SourceFunction`-based `TiDBSource` API shown in older documentation is not part of the current connector. ```java -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.source.TiDBSourceBuilder; +import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.util.Collector; - -import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions; -import org.apache.flink.cdc.connectors.tidb.TiDBSource; -import org.apache.flink.cdc.connectors.tidb.TiKVChangeEventDeserializationSchema; -import org.apache.flink.cdc.connectors.tidb.TiKVSnapshotEventDeserializationSchema; -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Kvrpcpb; - -import java.util.HashMap; - -public class TiDBSourceExample { - - public static void main(String[] args) throws Exception { - - SourceFunction tidbSource = - TiDBSource.builder() - .database("mydb") // set captured database - .tableName("products") // set captured table - .tiConf( - TDBSourceOptions.getTiConfiguration( - "localhost:2399", new HashMap<>())) - .snapshotEventDeserializer( - new TiKVSnapshotEventDeserializationSchema() { - @Override - public void deserialize( - Kvrpcpb.KvPair record, Collector out) - throws Exception { - out.collect(record.toString()); - } - - @Override - public TypeInformation getProducedType() { - return BasicTypeInfo.STRING_TYPE_INFO; - } - }) - .changeEventDeserializer( - new TiKVChangeEventDeserializationSchema() { - @Override - public void deserialize( - Cdcpb.Event.Row record, Collector out) - throws Exception { - out.collect(record.toString()); - } - - @Override - public TypeInformation getProducedType() { - return BasicTypeInfo.STRING_TYPE_INFO; - } - }) - .build(); - - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - // enable checkpoint - env.enableCheckpointing(3000); - env.addSource(tidbSource).print().setParallelism(1); +JdbcIncrementalSource source = + TiDBSourceBuilder.TiDBIncrementalSource.builder() + .hostname("tidb.example.com") + .port(4000) + .username("root") + .password("") + .pdAddresses("pd.example.com:2379") + .databaseList("inventory") + .tableList("inventory.products") + .deserializer(new JsonDebeziumDeserializationSchema()) + .build(); - env.execute("Print TiDB Snapshot + Binlog"); - } -} +StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); +env.enableCheckpointing(3000L); +env.fromSource(source, WatermarkStrategy.noWatermarks(), "TiDB CDC") + .print(); +env.execute("TiDB CDC example"); ``` -Data Type Mapping ----------------- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TiDB typeFlink SQL typeNOTE
TINYINTTINYINT
- SMALLINT
- TINYINT UNSIGNED
SMALLINT
- INT
- MEDIUMINT
- SMALLINT UNSIGNED
INT
- BIGINT
- INT UNSIGNED
BIGINT
BIGINT UNSIGNEDDECIMAL(20, 0)
- FLOAT
-
FLOAT
- REAL
- DOUBLE -
DOUBLE
- NUMERIC(p, s)
- DECIMAL(p, s)
- where p <= 38
-
DECIMAL(p, s)
- NUMERIC(p, s)
- DECIMAL(p, s)
- where 38 < p <= 65
-
STRINGThe precision for DECIMAL data type is up to 65 in TiDB, but the precision for DECIMAL is limited to 38 in Flink. - So if you define a decimal column whose precision is greater than 38, you should map it to STRING to avoid precision loss.
- BOOLEAN
- TINYINT(1)
- BIT(1) -
BOOLEAN
DATEDATE
TIME [(p)]TIME [(p)]
TIMESTAMP [(p)]TIMESTAMP_LTZ [(p)]
DATETIME [(p)]TIMESTAMP [(p)] -
- CHAR(n) - CHAR(n)
- VARCHAR(n) - VARCHAR(n)
- BIT(n) - BINARY(⌈n/8⌉)
- BINARY(n) - BINARY(n)
- TINYTEXT
- TEXT
- MEDIUMTEXT
- LONGTEXT
-
STRING
- TINYBLOB
- BLOB
- MEDIUMBLOB
- LONGBLOB
-
BYTESCurrently, for BLOB data type in TiDB, only the blob whose length isn't greater than 2,147,483,647(2 ** 31 - 1) is supported.
- YEAR - INT
- ENUM - STRING
- JSON - STRINGThe JSON data type will be converted into STRING with JSON format in Flink.
- SET - ARRAY<STRING>As the SET data type in TiDB is a string object that can have zero or more values, - it should always be mapped to an array of string -
-
+Data type mapping +----------------- + +| TiDB type | Flink SQL type | Notes | +| --- | --- | --- | +| `TINYINT` | `TINYINT` | | +| `TINYINT UNSIGNED`, `SMALLINT` | `SMALLINT` | | +| `SMALLINT UNSIGNED`, `MEDIUMINT`, `MEDIUMINT UNSIGNED`, `INT` | `INT` | | +| `INT UNSIGNED`, `BIGINT` | `BIGINT` | | +| `BIGINT UNSIGNED` | `DECIMAL(20, 0)` | | +| `FLOAT` | `FLOAT` | | +| `REAL`, `DOUBLE` | `DOUBLE` | | +| `NUMERIC(p,s)`, `DECIMAL(p,s)`, `p <= 38` | `DECIMAL(p,s)` | | +| `NUMERIC(p,s)`, `DECIMAL(p,s)`, `38 < p <= 65` | `STRING` | Flink decimals support at most 38 digits; use `STRING` to avoid precision loss. | +| `BOOLEAN`, `TINYINT(1)`, `BIT(1)` | `BOOLEAN` | | +| `DATE` | `DATE` | | +| `TIME(p)` | `TIME(p)` | | +| `TIMESTAMP(p)` | `TIMESTAMP_LTZ(p)` | Interpreted using `server-time-zone`. | +| `DATETIME(p)` | `TIMESTAMP(p)` | | +| `CHAR(n)` | `CHAR(n)` | | +| `VARCHAR(n)` | `VARCHAR(n)` | | +| `BIT(n)` | `BINARY(ceil(n / 8))` | | +| `BINARY(n)` | `BINARY(n)` | | +| `TINYTEXT`, `TEXT`, `MEDIUMTEXT`, `LONGTEXT` | `STRING` | | +| `TINYBLOB`, `BLOB`, `MEDIUMBLOB`, `LONGBLOB` | `BYTES` | Values larger than the maximum Java array size are not supported. | +| `YEAR` | `INT` | | +| `ENUM` | `STRING` | | +| `SET` | `ARRAY` | Values are split into string elements. | +| `JSON` | `STRING` | Serialized as JSON text. | {{< top >}} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java index e1ca467d9fb..396f95a8cb5 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java @@ -53,10 +53,13 @@ public Map getEventSourcePosition( return null; } final Struct sourceInfo = value.getStruct(Envelope.FieldName.SOURCE); - if (source == null) { + if (sourceInfo == null) { return null; } - return Collect.hashMapOf(COMMIT_VERSION_KEY, sourceInfo.getString(COMMIT_VERSION_KEY)); + final Long commitVersion = sourceInfo.getInt64(COMMIT_VERSION_KEY); + return commitVersion == null + ? null + : Collect.hashMapOf(COMMIT_VERSION_KEY, String.valueOf(commitVersion)); } @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java index f14e595ac65..ca5eea0fdcd 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java @@ -32,7 +32,7 @@ public TiDBPartition(String serverName) { @Override public Map getSourcePartition() { - return Collections.emptyMap(); + return Collections.singletonMap("server", serverName); } @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java deleted file mode 100644 index 16c130b38c8..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.common.state.CheckpointListener; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.base.LongSerializer; -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.connectors.tidb.metrics.TiDBSourceMetrics; -import org.apache.flink.cdc.connectors.tidb.table.StartupMode; -import org.apache.flink.cdc.connectors.tidb.table.utils.TableKeyRangeUtils; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.FunctionSnapshotContext; -import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.util.Collector; -import org.apache.flink.util.Preconditions; - -import org.apache.flink.shaded.guava31.com.google.common.util.concurrent.ThreadFactoryBuilder; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.tikv.cdc.CDCClient; -import org.tikv.common.TiConfiguration; -import org.tikv.common.TiSession; -import org.tikv.common.key.RowKey; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.common.meta.TiTimestamp; -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Coprocessor; -import org.tikv.kvproto.Kvrpcpb; -import org.tikv.shade.com.google.protobuf.ByteString; -import org.tikv.txn.KVClient; - -import java.util.List; -import java.util.Objects; -import java.util.TreeMap; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; - -/** - * The source implementation for TiKV that read snapshot events first and then read the change - * event. - */ -public class TiKVRichParallelSourceFunction extends RichParallelSourceFunction - implements CheckpointListener, CheckpointedFunction, ResultTypeQueryable { - - private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(TiKVRichParallelSourceFunction.class); - private static final long SNAPSHOT_VERSION_EPOCH = -1L; - private static final long STREAMING_VERSION_START_EPOCH = 0L; - - private final TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema; - private final TiKVChangeEventDeserializationSchema changeEventDeserializationSchema; - private final TiConfiguration tiConf; - private final StartupMode startupMode; - private final String database; - private final String tableName; - - /** Task local variables. */ - private transient TiSession session = null; - - private transient Coprocessor.KeyRange keyRange = null; - private transient CDCClient cdcClient = null; - private transient SourceFunction.SourceContext sourceContext = null; - private transient volatile long resolvedTs = -1L; - private transient TreeMap prewrites = null; - private transient TreeMap commits = null; - private transient BlockingQueue committedEvents = null; - private transient OutputCollector outputCollector; - - private transient boolean running = true; - private transient ExecutorService executorService; - private transient TiDBSourceMetrics sourceMetrics; - - /** offset state. */ - private transient ListState offsetState; - - private static final long CLOSE_TIMEOUT = 30L; - - public TiKVRichParallelSourceFunction( - TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema, - TiKVChangeEventDeserializationSchema changeEventDeserializationSchema, - TiConfiguration tiConf, - StartupMode startupMode, - String database, - String tableName) { - this.snapshotEventDeserializationSchema = snapshotEventDeserializationSchema; - this.changeEventDeserializationSchema = changeEventDeserializationSchema; - this.tiConf = tiConf; - this.startupMode = startupMode; - this.database = database; - this.tableName = tableName; - } - - @Override - public void open(final Configuration config) throws Exception { - super.open(config); - session = TiSession.create(tiConf); - TiTableInfo tableInfo = session.getCatalog().getTable(database, tableName); - if (tableInfo == null) { - throw new RuntimeException( - String.format("Table %s.%s does not exist.", database, tableName)); - } - long tableId = tableInfo.getId(); - keyRange = - TableKeyRangeUtils.getTableKeyRange( - tableId, - getRuntimeContext().getTaskInfo().getNumberOfParallelSubtasks(), - getRuntimeContext().getTaskInfo().getIndexOfThisSubtask()); - cdcClient = new CDCClient(session, keyRange); - prewrites = new TreeMap<>(); - commits = new TreeMap<>(); - // cdc event will lose if pull cdc event block when region split - // use queue to separate read and write to ensure pull event unblock. - // since sink jdbc is slow, 5000W queue size may be safe size. - committedEvents = new LinkedBlockingQueue<>(); - outputCollector = new OutputCollector<>(); - resolvedTs = - startupMode == StartupMode.INITIAL - ? SNAPSHOT_VERSION_EPOCH - : STREAMING_VERSION_START_EPOCH; - ThreadFactory threadFactory = - new ThreadFactoryBuilder() - .setNameFormat( - "tidb-source-function-" - + getRuntimeContext().getTaskInfo().getIndexOfThisSubtask()) - .build(); - executorService = Executors.newSingleThreadExecutor(threadFactory); - final MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - sourceMetrics = new TiDBSourceMetrics(metricGroup); - sourceMetrics.registerMetrics(); - } - - @Override - public void run(final SourceFunction.SourceContext ctx) throws Exception { - sourceContext = ctx; - outputCollector.context = sourceContext; - - if (startupMode == StartupMode.INITIAL) { - synchronized (sourceContext.getCheckpointLock()) { - readSnapshotEvents(); - } - } else { - LOG.info("Skip snapshot read"); - resolvedTs = session.getTimestamp().getVersion(); - } - - LOG.info("start read change events"); - cdcClient.start(resolvedTs); - running = true; - readChangeEvents(); - } - - private void handleRow(final Cdcpb.Event.Row row) { - if (!TableKeyRangeUtils.isRecordKey(row.getKey().toByteArray())) { - // Don't handle index key for now - return; - } - LOG.debug("binlog record, type: {}, data: {}", row.getType(), row); - switch (row.getType()) { - case COMMITTED: - prewrites.put(RowKeyWithTs.ofStart(row), row); - commits.put(RowKeyWithTs.ofCommit(row), row); - break; - case COMMIT: - commits.put(RowKeyWithTs.ofCommit(row), row); - break; - case PREWRITE: - prewrites.put(RowKeyWithTs.ofStart(row), row); - break; - case ROLLBACK: - prewrites.remove(RowKeyWithTs.ofStart(row)); - break; - default: - LOG.warn("Unsupported row type:" + row.getType()); - } - } - - protected void readSnapshotEvents() throws Exception { - LOG.info("read snapshot events"); - try (KVClient scanClient = session.createKVClient()) { - long startTs = session.getTimestamp().getVersion(); - ByteString start = keyRange.getStart(); - while (true) { - final List segment = - scanClient.scan(start, keyRange.getEnd(), startTs); - - if (segment.isEmpty()) { - resolvedTs = startTs; - break; - } - - for (final Kvrpcpb.KvPair pair : segment) { - if (TableKeyRangeUtils.isRecordKey(pair.getKey().toByteArray())) { - snapshotEventDeserializationSchema.deserialize(pair, outputCollector); - reportMetrics(0L, startTs); - } - } - - start = - RowKey.toRawKey(segment.get(segment.size() - 1).getKey()) - .next() - .toByteString(); - } - } - } - - protected void readChangeEvents() throws Exception { - LOG.info("read change event from resolvedTs:{}", resolvedTs); - // child thread to sink committed rows. - executorService.execute( - () -> { - while (running) { - try { - Cdcpb.Event.Row committedRow = committedEvents.take(); - changeEventDeserializationSchema.deserialize( - committedRow, outputCollector); - // use startTs of row as messageTs, use commitTs of row as fetchTs - reportMetrics(committedRow.getStartTs(), committedRow.getCommitTs()); - } catch (Exception e) { - e.printStackTrace(); - } - } - }); - while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { - for (int i = 0; i < 1000; i++) { - final Cdcpb.Event.Row row = cdcClient.get(); - if (row == null) { - break; - } - handleRow(row); - } - resolvedTs = cdcClient.getMaxResolvedTs(); - if (commits.size() > 0) { - flushRows(resolvedTs); - } - } - } - - protected void flushRows(final long timestamp) throws Exception { - Preconditions.checkState(sourceContext != null, "sourceContext shouldn't be null"); - synchronized (sourceContext) { - while (!commits.isEmpty() && commits.firstKey().timestamp <= timestamp) { - final Cdcpb.Event.Row commitRow = commits.pollFirstEntry().getValue(); - final Cdcpb.Event.Row prewriteRow = - prewrites.remove(RowKeyWithTs.ofStart(commitRow)); - // if pull cdc event block when region split, cdc event will lose. - committedEvents.offer(prewriteRow); - } - } - } - - @Override - public void cancel() { - try { - running = false; - if (cdcClient != null) { - cdcClient.close(); - } - if (executorService != null) { - executorService.shutdown(); - if (!executorService.awaitTermination(CLOSE_TIMEOUT, TimeUnit.SECONDS)) { - LOG.warn( - "Failed to close the tidb source function in {} seconds.", - CLOSE_TIMEOUT); - } - } - } catch (final Exception e) { - LOG.error("Unable to close cdcClient", e); - } - } - - @Override - public void snapshotState(final FunctionSnapshotContext context) throws Exception { - LOG.info( - "snapshotState checkpoint: {} at resolvedTs: {}", - context.getCheckpointId(), - resolvedTs); - flushRows(resolvedTs); - offsetState.clear(); - offsetState.add(resolvedTs); - } - - @Override - public void initializeState(final FunctionInitializationContext context) throws Exception { - LOG.info("initialize checkpoint"); - offsetState = - context.getOperatorStateStore() - .getListState( - new ListStateDescriptor<>( - "resolvedTsState", LongSerializer.INSTANCE)); - if (context.isRestored()) { - for (final Long offset : offsetState.get()) { - resolvedTs = offset; - LOG.info("Restore State from resolvedTs: {}", resolvedTs); - return; - } - } else { - resolvedTs = 0; - LOG.info("Initialize State from resolvedTs: {}", resolvedTs); - } - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - // do nothing - } - - @Override - public TypeInformation getProducedType() { - return snapshotEventDeserializationSchema.getProducedType(); - } - - // --------------------------------------- - // static Utils classes - // --------------------------------------- - private static class RowKeyWithTs implements Comparable { - private final long timestamp; - private final RowKey rowKey; - - private RowKeyWithTs(final long timestamp, final RowKey rowKey) { - this.timestamp = timestamp; - this.rowKey = rowKey; - } - - private RowKeyWithTs(final long timestamp, final byte[] key) { - this(timestamp, RowKey.decode(key)); - } - - @Override - public int compareTo(final RowKeyWithTs that) { - int res = Long.compare(this.timestamp, that.timestamp); - if (res == 0) { - res = Long.compare(this.rowKey.getTableId(), that.rowKey.getTableId()); - } - if (res == 0) { - res = Long.compare(this.rowKey.getHandle(), that.rowKey.getHandle()); - } - return res; - } - - @Override - public int hashCode() { - return Objects.hash(this.timestamp, this.rowKey.getTableId(), this.rowKey.getHandle()); - } - - @Override - public boolean equals(final Object thatObj) { - if (thatObj instanceof RowKeyWithTs) { - final RowKeyWithTs that = (RowKeyWithTs) thatObj; - return this.timestamp == that.timestamp && this.rowKey.equals(that.rowKey); - } - return false; - } - - static RowKeyWithTs ofStart(final Cdcpb.Event.Row row) { - return new RowKeyWithTs(row.getStartTs(), row.getKey().toByteArray()); - } - - static RowKeyWithTs ofCommit(final Cdcpb.Event.Row row) { - return new RowKeyWithTs(row.getCommitTs(), row.getKey().toByteArray()); - } - } - - private static class OutputCollector implements Collector { - - private SourceFunction.SourceContext context; - - @Override - public void collect(T record) { - context.collect(record); - } - - @Override - public void close() { - // do nothing - } - } - - private void reportMetrics(long messageTs, long fetchTs) { - long now = System.currentTimeMillis(); - // record the latest process time - sourceMetrics.recordProcessTime(now); - long messageTimestamp = TiTimestamp.extractPhysical(messageTs); - long fetchTimestamp = TiTimestamp.extractPhysical(fetchTs); - if (messageTimestamp > 0L) { - // report fetch delay - if (fetchTimestamp >= messageTimestamp) { - sourceMetrics.recordFetchDelay(fetchTimestamp - messageTimestamp); - } - // report emit delay - sourceMetrics.recordEmitDelay(now - messageTimestamp); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java index d1598fcc0cb..daa61e86051 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java @@ -46,8 +46,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nullable; - import java.io.IOException; import java.sql.SQLException; import java.util.HashMap; @@ -63,7 +61,6 @@ public class TiDBDialect implements JdbcDataSourceDialect { private final TiDBSourceConfig sourceConfig; private transient TiDBSchema tiDBSchema; - @Nullable private TiDBStreamFetchTask streamFetchTask; public TiDBDialect(TiDBSourceConfig sourceConfig) { this.sourceConfig = sourceConfig; @@ -87,7 +84,7 @@ public Offset displayCurrentOffset(JdbcSourceConfig sourceConfig) { @Override public boolean isDataCollectionIdCaseSensitive(JdbcSourceConfig sourceConfig) { try (JdbcConnection jdbcConnection = openJdbcConnection(sourceConfig)) { - return TiDBConnectionUtils.isTableIdCaseInsensitive(jdbcConnection); + return !TiDBConnectionUtils.isTableIdCaseInsensitive(jdbcConnection); } catch (SQLException e) { throw new FlinkRuntimeException("Error reading TiDB variables: " + e.getMessage(), e); } @@ -110,13 +107,6 @@ public FetchTask.Context createFetchTaskContext(JdbcSourceConfig sourceConfig) { return new TiDBSourceFetchTaskContext(sourceConfig, this, openJdbcConnection()); } - @Override - public void notifyCheckpointComplete(long checkpointId, Offset offset) throws Exception { - if (streamFetchTask != null) { - streamFetchTask.commitCurrentOffset(offset); - } - } - @Override public boolean isIncludeDataCollection(JdbcSourceConfig sourceConfig, TableId tableId) { // temp @@ -203,8 +193,7 @@ public FetchTask createFetchTask(SourceSplitBase sourceSplitBas if (sourceSplitBase.isSnapshotSplit()) { return new TiDBScanFetchTask(sourceSplitBase.asSnapshotSplit()); } else { - this.streamFetchTask = new TiDBStreamFetchTask(sourceSplitBase.asStreamSplit()); - return this.streamFetchTask; + return new TiDBStreamFetchTask(sourceSplitBase.asStreamSplit()); } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java index 3bd6e481a68..c997e4ac19b 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java @@ -64,14 +64,15 @@ public class TiDBConnectorConfig extends RelationalDatabaseConnectorConfig { public static final Field BIGINT_UNSIGNED_HANDLING_MODE = Field.create("bigint.unsigned.handling.mode") .withDisplayName("BIGINT UNSIGNED Handling") - .withEnum(BigIntUnsignedHandlingMode.class, BigIntUnsignedHandlingMode.LONG) + .withEnum(BigIntUnsignedHandlingMode.class, BigIntUnsignedHandlingMode.PRECISE) .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 27)) .withWidth(ConfigDef.Width.SHORT) .withImportance(ConfigDef.Importance.MEDIUM) .withDescription( "Specify how BIGINT UNSIGNED columns should be represented in change events, including:" + "'precise' uses java.math.BigDecimal to represent values, which are encoded in the change events using a binary representation and Kafka Connect's 'org.apache.kafka.connect.data.Decimal' type; " - + "'long' (the default) represents values using Java's 'long', which may not offer the precision but will be far easier to use in consumers."); + + "'long' represents values using Java's 'long', which may not offer the precision but will be far easier to use in consumers. " + + "The default is 'precise'."); public static final Field ENABLE_TIME_ADJUSTER = Field.create("enable.time.adjuster") diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java index fb2d3d4c353..61924a29d75 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java @@ -71,6 +71,7 @@ public interface ParsingErrorHandler { } private static final Logger LOGGER = LoggerFactory.getLogger(TiDBValueConverters.class); + /** Used to parse values of TIME columns. Format: 000:00:00.000000. */ private static final Pattern TIME_FIELD_PATTERN = Pattern.compile("(\\-?[0-9]*):([0-9]*):([0-9]*)(\\.([0-9]*))?"); @@ -716,9 +717,23 @@ protected Object convertTimestampToLocalDateTime(Column column, Field fieldDefn, } } + @Override + protected Object convertBit(Column column, Field fieldDefn, Object data) { + if (data instanceof byte[]) { + boolean value = false; + for (byte current : (byte[]) data) { + value |= current != 0; + } + data = value; + } + return super.convertBit(column, fieldDefn, data); + } + @Override protected Object convertTime(Column column, Field fieldDefn, Object data) { - if (data instanceof String) { + if (data instanceof Long) { + data = Duration.ofNanos((Long) data); + } else if (data instanceof String) { data = Strings.asDuration((String) data); } return super.convertTime(column, fieldDefn, data); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java index ce9d7403124..c5a480afc81 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java @@ -19,7 +19,6 @@ import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; -import org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkKind; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; @@ -61,10 +60,12 @@ import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import java.util.stream.IntStream; import static org.tikv.common.codec.TableCodec.decodeObjects; +import static org.tikv.common.codec.TiDBRowV2Decoder.decodeObjectsPreservingBinary; /** TiDB streaming change event source reader. */ public class EventSourceReader @@ -77,7 +78,7 @@ public class EventSourceReader private final ErrorHandler errorHandler; private final TiDBSourceFetchTaskContext taskContext; private final Map> fieldIndexMap = new HashMap<>(); - public ChangeEventSourceContext context; + public volatile ChangeEventSourceContext context; private static final long STREAMING_VERSION_START_EPOCH = 0L; @@ -93,8 +94,10 @@ public class EventSourceReader private transient TableId tableId; private transient TiTableInfo tableInfo; - private transient boolean running = true; + private transient volatile boolean running; + private transient volatile Thread executionThread; private transient ExecutorService executorService; + private final AtomicBoolean closed = new AtomicBoolean(false); public EventSourceReader( TiDBConnectorConfig connectorConfig, @@ -111,33 +114,43 @@ public EventSourceReader( } @Override - public void init() throws InterruptedException { - StreamingChangeEventSource.super.init(); - session = TiSession.create(ticonf); - Set tableIds = this.split.getTableSchemas().keySet(); - if (tableIds.isEmpty() && tableIds.size() != 1) { - LOG.error("Currently only single table ingest is supported."); + public synchronized void init() throws InterruptedException { + if (closed.get()) { return; } - this.tableId = tableIds.stream().findFirst().get(); - this.tableInfo = session.getCatalog().getTable(tableId.catalog(), tableId.table()); - if (tableInfo == null) { - throw new RuntimeException( - String.format( - "Table %s.%s does not exist.", tableId.catalog(), tableId.table())); + StreamingChangeEventSource.super.init(); + try { + session = TiSession.create(ticonf); + Set tableIds = this.split.getTableSchemas().keySet(); + if (tableIds.size() != 1) { + throw new IllegalStateException( + "Currently only single table ingest is supported, but found " + + tableIds.size() + + " tables."); + } + this.tableId = tableIds.stream().findFirst().get(); + this.tableInfo = session.getCatalog().getTable(tableId.catalog(), tableId.table()); + if (tableInfo == null) { + throw new RuntimeException( + String.format( + "Table %s.%s does not exist.", tableId.catalog(), tableId.table())); + } + keyRange = TableKeyRangeUtils.getTableKeyRange(tableInfo.getId(), 1, 0); + cdcClient = new CDCClient(session, keyRange); + prewrites = new TreeMap<>(); + commits = new TreeMap<>(); + // cdc event will lose if pull cdc event block when region split + // use queue to separate read and write to ensure pull event unblock. + // since sink jdbc is slow, 5000W queue size may be safe size. + committedEvents = new LinkedBlockingQueue<>(); + resolvedTs = EventOffset.getStartTs(this.split.getStartingOffset()); + ThreadFactory threadFactory = + new ThreadFactoryBuilder().setNameFormat("tidb-source-function-0").build(); + executorService = Executors.newSingleThreadExecutor(threadFactory); + } catch (RuntimeException e) { + close(); + throw e; } - keyRange = TableKeyRangeUtils.getTableKeyRange(tableInfo.getId(), 1, 0); - cdcClient = new CDCClient(session, keyRange); - prewrites = new TreeMap<>(); - commits = new TreeMap<>(); - // cdc event will lose if pull cdc event block when region split - // use queue to separate read and write to ensure pull event unblock. - // since sink jdbc is slow, 5000W queue size may be safe size. - committedEvents = new LinkedBlockingQueue<>(); - resolvedTs = EventOffset.getStartTs(this.split.getStartingOffset()); - ThreadFactory threadFactory = - new ThreadFactoryBuilder().setNameFormat("tidb-source-function-0").build(); - executorService = Executors.newSingleThreadExecutor(threadFactory); } @Override @@ -146,26 +159,36 @@ public void execute( TiDBPartition partition, EventOffsetContext offsetContext) throws InterruptedException { - this.context = context; - if (connectorConfig.getSourceConfig().getStartupOptions().isSnapshotOnly()) { - LOG.info("Streaming is not enabled in current configuration"); + if (closed.get()) { return; } - this.taskContext.getDatabaseSchema().assureNonEmptySchema(); - cdcClient.start(resolvedTs); + this.context = context; + this.executionThread = Thread.currentThread(); running = true; - EventOffsetContext effectiveOffsetContext = - offsetContext != null - ? offsetContext - : EventOffsetContext.initial(this.connectorConfig); try { - EventOffset currentOffset = new EventOffset(effectiveOffsetContext.getOffset()); - if (currentOffset.isBefore(split.getStartingOffset())) { + if (connectorConfig.getSourceConfig().getStartupOptions().isSnapshotOnly()) { + LOG.info("Streaming is not enabled in current configuration"); return; } + this.taskContext.getDatabaseSchema().assureNonEmptySchema(); + cdcClient.start(resolvedTs); + EventOffsetContext effectiveOffsetContext = + offsetContext != null + ? offsetContext + : EventOffsetContext.initial(this.connectorConfig); readChangeEvents(partition, effectiveOffsetContext); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + if (!closed.get()) { + throw e; + } } catch (Exception e) { - this.errorHandler.setProducerThrowable(e); + if (!closed.get()) { + this.errorHandler.setProducerThrowable(e); + } + } finally { + running = false; + executionThread = null; } } @@ -175,44 +198,22 @@ protected void readChangeEvents(TiDBPartition partition, EventOffsetContext offs // child thread to sink committed rows. executorService.execute( () -> { - while (running) { + while (running && context.isRunning()) { try { Cdcpb.Event.Row committedRow = committedEvents.take(); - EventOffset currentOffset = new EventOffset(offsetContext.getOffset()); - if (currentOffset.isBefore(split.getStartingOffset())) { - return; - } - if (!EventOffset.NO_STOPPING_OFFSET.equals(split.getEndingOffset()) - && currentOffset.isAtOrAfter(split.getEndingOffset())) { - // send watermark event; - try { - eventDispatcher.dispatchWatermarkEvent( - partition.getSourcePartition(), - split, - currentOffset, - WatermarkKind.END); - } catch (InterruptedException e) { - LOG.error("Send signal event error.", e); - errorHandler.setProducerThrowable( - new RuntimeException( - "Error processing log signal event", e)); - } - ((StoppableChangeEventSourceContext) context) - .stopChangeEventSource(); - return; - } - - final EventOffsetContext localOffsetContext = - new EventOffsetContext.Loader(this.connectorConfig) - .load(currentOffset.getOffset()); - emitChangeEvent(partition, localOffsetContext, committedRow); + emitChangeEvent(partition, offsetContext, committedRow); // use startTs of row as messageTs, use commitTs of row as fetchTs + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; } catch (Exception e) { - LOG.error("Read change events error.", e); + if (running && context.isRunning()) { + LOG.error("Read change events error.", e); + } } } }); - while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { + while (running && context.isRunning() && resolvedTs >= STREAMING_VERSION_START_EPOCH) { for (int i = 0; i < 1000; i++) { final Cdcpb.Event.Row row = cdcClient.get(); if (row == null) { @@ -294,7 +295,11 @@ private Object[] getSerializableObject( Object[] tiKVValueAfter; if (value != null && !value.isEmpty()) { - tiKVValueAfter = decodeObjects(value.toByteArray(), handle, tableInfo); + byte[] encodedValue = value.toByteArray(); + tiKVValueAfter = + Byte.toUnsignedInt(encodedValue[0]) == org.tikv.common.codec.RowV2.CODEC_VER + ? decodeObjectsPreservingBinary(encodedValue, handle, tableInfo) + : decodeObjects(encodedValue, handle, tableInfo); } else { return null; } @@ -336,7 +341,7 @@ private Set fieldIndexConverter( private Envelope.Operation getOperation(final Cdcpb.Event.Row row) { if (row.getOpType() == Cdcpb.Event.Row.OpType.PUT) { // create ,update - if (row.getValue() != null && row.getOldValue() != null) { + if (row.getValue() != null && !row.getOldValue().isEmpty()) { return Envelope.Operation.UPDATE; } else { return Envelope.Operation.CREATE; @@ -396,9 +401,49 @@ public boolean executeIteration( return StreamingChangeEventSource.super.executeIteration(context, partition, offsetContext); } - @Override - public void commitOffset(Map offset) { - StreamingChangeEventSource.super.commitOffset(offset); + /** Stops event production and releases all TiDB resources. This operation is idempotent. */ + public synchronized void close() { + if (!closed.compareAndSet(false, true)) { + return; + } + + running = false; + ChangeEventSourceContext currentContext = context; + if (currentContext instanceof StoppableChangeEventSourceContext) { + ((StoppableChangeEventSourceContext) currentContext).stopChangeEventSource(); + } + + Thread currentExecutionThread = executionThread; + if (currentExecutionThread != null && currentExecutionThread != Thread.currentThread()) { + currentExecutionThread.interrupt(); + } + + if (cdcClient != null) { + try { + cdcClient.close(); + } catch (RuntimeException e) { + LOG.warn("Failed to close TiDB CDC client.", e); + } finally { + cdcClient = null; + } + } + if (executorService != null) { + executorService.shutdownNow(); + executorService = null; + } + if (session != null) { + try { + session.close(); + } catch (Exception e) { + LOG.warn("Failed to close TiDB session.", e); + } finally { + session = null; + } + } + } + + boolean isClosed() { + return closed.get(); } // --------------------------------------- diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java index ed77366d5e1..216b13b77e2 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java @@ -24,7 +24,7 @@ import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; import org.apache.flink.cdc.connectors.base.source.reader.external.JdbcSourceFetchTaskContext; -import org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils; +import org.apache.flink.cdc.connectors.base.utils.SplitKeyUtils; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; import org.apache.flink.cdc.connectors.tidb.source.handler.TiDBErrorHandler; @@ -173,12 +173,11 @@ public boolean isRecordBetween(SourceRecord record, Object[] splitStart, Object[ if (this.offsetContext.isSnapshotRunning()) { RowType splitKeyType = getSplitType(getDatabaseSchema().tableFor(this.getTableId(record))); - Object[] key = - SourceRecordUtils.getSplitKey(splitKeyType, record, getSchemaNameAdjuster()); - return SourceRecordUtils.splitKeyRangeContains(key, splitStart, splitEnd); + Object[] key = SplitKeyUtils.getSplitKey(splitKeyType, record, getSchemaNameAdjuster()); + return SplitKeyUtils.splitKeyRangeContains(key, splitStart, splitEnd); } else { EventOffset newOffset = new EventOffset(record.sourceOffset()); - return SourceRecordUtils.splitKeyRangeContains( + return SplitKeyUtils.splitKeyRangeContains( new EventOffset[] {newOffset}, splitStart, splitEnd); } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java index 4c7169c4619..ff2c624c0cb 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java @@ -17,7 +17,6 @@ package org.apache.flink.cdc.connectors.tidb.source.fetch; -import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; @@ -25,15 +24,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nullable; - /** TiDBStreamFetchTask. */ public class TiDBStreamFetchTask implements FetchTask { private static final Logger LOG = LoggerFactory.getLogger(TiDBStreamFetchTask.class); private final StreamSplit split; private volatile boolean taskRunning = false; private volatile boolean stopped = false; - EventSourceReader eventSourceReader; + private volatile EventSourceReader eventSourceReader; + private volatile StoppableChangeEventSourceContext changeEventSourceContext; public TiDBStreamFetchTask(StreamSplit split) { this.split = split; @@ -50,27 +48,37 @@ public void execute(Context context) throws Exception { LOG.debug("execute StreamFetchTask for split: {}", split); } taskRunning = true; - TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; - sourceFetchContext.getOffsetContext().preSnapshotCompletion(); - - eventSourceReader = - new EventSourceReader( - sourceFetchContext.getDbzConnectorConfig(), - sourceFetchContext.getEventDispatcher(), - sourceFetchContext.getErrorHandler(), - sourceFetchContext.getTaskContext(), - split); - eventSourceReader.init(); - StoppableChangeEventSourceContext changeEventSourceContext = - new StoppableChangeEventSourceContext(); - eventSourceReader.execute( - changeEventSourceContext, - sourceFetchContext.getPartition(), - sourceFetchContext.getOffsetContext()); - } + try { + TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; + sourceFetchContext.getOffsetContext().preSnapshotCompletion(); - public void commitCurrentOffset(@Nullable Offset offsetToCommit) { - // todo + EventSourceReader reader = + new EventSourceReader( + sourceFetchContext.getDbzConnectorConfig(), + sourceFetchContext.getEventDispatcher(), + sourceFetchContext.getErrorHandler(), + sourceFetchContext.getTaskContext(), + split); + StoppableChangeEventSourceContext sourceContext = + new StoppableChangeEventSourceContext(); + this.eventSourceReader = reader; + this.changeEventSourceContext = sourceContext; + if (stopped) { + reader.close(); + return; + } + reader.init(); + if (stopped) { + reader.close(); + return; + } + reader.execute( + sourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); + } finally { + taskRunning = false; + } } @Override @@ -86,11 +94,15 @@ public SourceSplitBase getSplit() { @Override public void close() { LOG.debug("stopping StreamFetchTask for split: {}", split); - if (eventSourceReader != null) { - ((StoppableChangeEventSourceContext) (eventSourceReader.context)) - .stopChangeEventSource(); - } - stopped = false; + stopped = true; taskRunning = false; + StoppableChangeEventSourceContext sourceContext = changeEventSourceContext; + if (sourceContext != null) { + sourceContext.stopChangeEventSource(); + } + EventSourceReader reader = eventSourceReader; + if (reader != null) { + reader.close(); + } } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java index 1338bc9fcc1..e5d3d5afb51 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java @@ -115,7 +115,7 @@ public void tableEvent(String database, Set tableIds, Instant timestamp @Override public Schema getSourceInfoSchema() { - return sourceInfoSchema.schema(); + return sourceInfoSchema; } @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java index fcbcb3b086b..45776656370 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java @@ -21,6 +21,7 @@ import org.apache.flink.cdc.debezium.table.DeserializationRuntimeConverterFactory; import org.apache.flink.table.data.GenericArrayData; import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; import org.apache.flink.table.types.logical.ArrayType; import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.table.types.logical.LogicalTypeFamily; @@ -32,10 +33,12 @@ import io.debezium.data.EnumSet; import io.debezium.data.geometry.Geometry; import io.debezium.data.geometry.Point; +import io.debezium.time.ZonedTimestamp; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; import java.nio.ByteBuffer; +import java.time.Instant; import java.time.ZoneId; import java.util.HashMap; import java.util.Map; @@ -59,6 +62,8 @@ public Optional createUserDefinedConverter( return createStringConverter(); case ARRAY: return createArrayConverter((ArrayType) logicalType); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return createTimestampLtzConverter(); default: // fallback to default converter return Optional.empty(); @@ -67,6 +72,25 @@ public Optional createUserDefinedConverter( }; } + private static Optional createTimestampLtzConverter() { + return Optional.of( + new DeserializationRuntimeConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) { + if (dbzObj instanceof String) { + Instant instant = + ZonedTimestamp.FORMATTER.parse((String) dbzObj, Instant::from); + return TimestampData.fromInstant(instant); + } + + throw new IllegalArgumentException( + "Unable to convert TIMESTAMP_LTZ from " + dbzObj); + } + }); + } + private static Optional createStringConverter() { final ObjectMapper objectMapper = new ObjectMapper(); final ObjectWriter objectWriter = objectMapper.writer(); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java index 9ec64476f7e..3ab847de512 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java @@ -180,6 +180,14 @@ public DynamicTableSource createDynamicTableSource(Context context) { config.getOptional(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN).orElse(null); Map chunkKeyColumns = new HashMap<>(); if (chunkKeyColumn != null) { + if (databaseName == null || tableName == null) { + throw new ValidationException( + String.format( + "Option '%s' requires both '%s' and '%s' to be set.", + SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN.key(), + DATABASE_NAME.key(), + TABLE_NAME.key())); + } chunkKeyColumns.put(new ObjectPath(databaseName, tableName), chunkKeyColumn); } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java index 98c5a38f570..6f15db79336 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java @@ -35,7 +35,6 @@ import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.DataType; import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.types.RowKind; import org.tikv.common.TiConfiguration; @@ -158,11 +157,7 @@ public TiDBTableSource( @Override public ChangelogMode getChangelogMode() { - return ChangelogMode.newBuilder() - .addContainedKind(RowKind.INSERT) - .addContainedKind(RowKind.UPDATE_AFTER) - .addContainedKind(RowKind.DELETE) - .build(); + return ChangelogMode.all(); } @Override diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java index c796284a5d8..ea618bec0ae 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java @@ -180,10 +180,13 @@ private static DataType convertFromColumn(Column column) { case DATE: return DataTypes.DATE(); case DATETIME: - case TIMESTAMP: return column.length() >= 0 ? DataTypes.TIMESTAMP(column.length()) - : DataTypes.TIMESTAMP(); + : DataTypes.TIMESTAMP(0); + case TIMESTAMP: + return column.length() >= 0 + ? DataTypes.TIMESTAMP_LTZ(column.length()) + : DataTypes.TIMESTAMP_LTZ(0); case CHAR: return DataTypes.CHAR(column.length()); case VARCHAR: diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/tikv/common/codec/TiDBRowV2Decoder.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/tikv/common/codec/TiDBRowV2Decoder.java new file mode 100644 index 00000000000..cfe400a50e3 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/tikv/common/codec/TiDBRowV2Decoder.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tikv.common.codec; + +import org.tikv.common.meta.TiColumnInfo; +import org.tikv.common.meta.TiTableInfo; +import org.tikv.common.types.DataType; +import org.tikv.common.types.MySQLType; + +/** Decodes TiDB RowV2 values without converting binary strings to UTF-8 text. */ +public final class TiDBRowV2Decoder { + + private TiDBRowV2Decoder() {} + + public static Object[] decodeObjectsPreservingBinary( + byte[] value, Long handle, TiTableInfo tableInfo) { + if (handle == null && tableInfo.isPkHandle()) { + throw new IllegalArgumentException("when pk is handle, handle cannot be null"); + } + + RowV2 row = RowV2.createNew(value); + Object[] result = new Object[tableInfo.getColumns().size()]; + for (TiColumnInfo column : tableInfo.getColumns()) { + int offset = column.getOffset(); + if (column.isPrimaryKey() && tableInfo.isPkHandle()) { + result[offset] = handle; + continue; + } + + RowV2.ColIDSearchResult searchResult = row.findColID(column.getId()); + if (searchResult.isNull || searchResult.notFound) { + continue; + } + + byte[] columnData = row.getData(searchResult.idx); + result[offset] = + isBinaryString(column.getType()) + ? columnData + : RowDecoderV2.decodeCol(columnData, column.getType()); + } + return result; + } + + private static boolean isBinaryString(DataType type) { + MySQLType mysqlType = type.getType(); + return (type.isBinary() || "binary".equalsIgnoreCase(type.getCharset())) + && (mysqlType == MySQLType.TypeString + || mysqlType == MySQLType.TypeVarchar + || mysqlType == MySQLType.TypeVarString); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfigTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfigTest.java new file mode 100644 index 00000000000..cfb4b4949e2 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfigTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import io.debezium.config.Configuration; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +/** Tests for {@link TiDBConnectorConfig}. */ +class TiDBConnectorConfigTest { + + @Test + void testBigIntUnsignedHandlingModeDefaultsToPrecise() { + String configuredMode = + Configuration.empty().getString(TiDBConnectorConfig.BIGINT_UNSIGNED_HANDLING_MODE); + + Assertions.assertThat(TiDBConnectorConfig.BigIntUnsignedHandlingMode.parse(configuredMode)) + .isEqualTo(TiDBConnectorConfig.BigIntUnsignedHandlingMode.PRECISE); + } + + @Test + void testBigIntUnsignedHandlingModeCanBeSetToLong() { + Configuration configuration = + Configuration.create() + .with(TiDBConnectorConfig.BIGINT_UNSIGNED_HANDLING_MODE.name(), "long") + .build(); + + Assertions.assertThat( + TiDBConnectorConfig.BigIntUnsignedHandlingMode.parse( + configuration.getString( + TiDBConnectorConfig.BIGINT_UNSIGNED_HANDLING_MODE))) + .isEqualTo(TiDBConnectorConfig.BigIntUnsignedHandlingMode.LONG); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamLifecycleTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamLifecycleTest.java new file mode 100644 index 00000000000..4e23c51aefb --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamLifecycleTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; + +import io.debezium.relational.TableId; +import org.junit.jupiter.api.Test; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests realtime and idempotent TiDB stream cleanup. */ +class TiDBStreamLifecycleTest { + + @Test + void shouldStopReaderContextWhenClosedRepeatedly() { + EventSourceReader reader = createReader(); + StoppableChangeEventSourceContext context = new StoppableChangeEventSourceContext(); + reader.context = context; + + reader.close(); + reader.close(); + + assertThat(context.isRunning()).isFalse(); + assertThat(reader.isClosed()).isTrue(); + } + + @Test + void shouldAllowClosingTaskBeforeItStarts() { + TiDBStreamFetchTask task = new TiDBStreamFetchTask(createStreamSplit()); + + task.close(); + task.close(); + + assertThat(task.isRunning()).isFalse(); + } + + private EventSourceReader createReader() { + TiDBSourceConfigFactory configFactory = new TiDBSourceConfigFactory(); + configFactory.hostname("localhost"); + configFactory.port(4000); + configFactory.username("root"); + configFactory.password(""); + configFactory.databaseList("inventory"); + configFactory.tableList("inventory.products"); + configFactory.pdAddresses("localhost:2379"); + TiDBSourceConfig sourceConfig = configFactory.create(0); + return new EventSourceReader( + sourceConfig.getDbzConnectorConfig(), null, null, null, createStreamSplit()); + } + + private StreamSplit createStreamSplit() { + return new StreamSplit( + "stream-split", + EventOffset.INITIAL_OFFSET, + EventOffset.NO_STOPPING_OFFSET, + Collections.emptyList(), + Collections.singletonMap(new TableId("inventory", null, "products"), null), + 0); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetCheckpointTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetCheckpointTest.java new file mode 100644 index 00000000000..13781d8c171 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetCheckpointTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitSerializer; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplitState; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests that TiDB offsets are retained in Flink checkpoint state. */ +class EventOffsetCheckpointTest { + + private final EventOffsetFactory offsetFactory = new EventOffsetFactory(); + private final SourceSplitSerializer splitSerializer = + new SourceSplitSerializer() { + @Override + public OffsetFactory getOffsetFactory() { + return offsetFactory; + } + }; + + @Test + void shouldRestoreCheckpointedStreamOffset() throws Exception { + StreamSplit streamSplit = + new StreamSplit( + "stream-split", + EventOffset.INITIAL_OFFSET, + EventOffset.NO_STOPPING_OFFSET, + Collections.emptyList(), + Collections.emptyMap(), + 0); + StreamSplitState streamSplitState = new StreamSplitState(streamSplit); + EventOffset checkpointOffset = new EventOffset("1782896898607", "467375724588433411"); + + streamSplitState.setStartingOffset(checkpointOffset); + StreamSplit checkpointSplit = streamSplitState.toSourceSplit(); + + byte[] checkpointBytes = splitSerializer.serialize(checkpointSplit); + SourceSplitBase restored = + splitSerializer.deserialize(splitSerializer.getVersion(), checkpointBytes); + + assertThat(restored.asStreamSplit().getStartingOffset()).isEqualTo(checkpointOffset); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java index 724386141fe..a689acddd7a 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java @@ -29,6 +29,7 @@ import org.apache.flink.cdc.connectors.tidb.source.TiDBDialect; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions; import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; @@ -72,8 +73,12 @@ public class TiDBStreamSplitReaderTest extends TiDBTestBase { public void before() { initializeTidbTable("customer"); TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); - tiDBSourceConfigFactory.pdAddresses( - PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN)); + String pdAddress = PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN); + tiDBSourceConfigFactory + .pdAddresses(pdAddress) + .tiConfiguration( + TiDBSourceOptions.getTiConfiguration( + pdAddress, "", Collections.emptyMap())); tiDBSourceConfigFactory.hostname(TIDB.getHost()); tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); tiDBSourceConfigFactory.username(TiDBTestBase.TIDB_USER); @@ -138,7 +143,7 @@ public void testStreamSplitReader() throws Exception { streamSplitReader.handleSplitsChanges(new SplitsAddition<>(singletonList(streamSplit))); int retry = 0; int count = 0; - while (retry < MAX_RETRY_TIMES) { + while (retry++ < MAX_RETRY_TIMES) { ChangeEventRecords records = (ChangeEventRecords) streamSplitReader.fetch(); if (records.nextSplit() != null) { SourceRecords sourceRecords; @@ -161,8 +166,9 @@ public void testStreamSplitReader() throws Exception { break; } } + Assertions.fail("Timed out waiting for change events from stream split."); } catch (Exception e) { - LOG.error("Stream split read error.", e); + throw new AssertionError("Stream split read error.", e); } finally { streamSplitReader.close(); } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java index dbab19ae8ac..e48fa93c08d 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java @@ -422,8 +422,8 @@ void testMetadataColumns() throws Exception { "+I(inventory,products,107,rocks,box of assorted rocks,5.3000000000)", "+I(inventory,products,108,jacket,water resistent black wind breaker,0.1000000000)", "+I(inventory,products,109,spare tire,24 inch spare tire,22.2000000000)", - "+U(inventory,products,106,hammer,18oz carpenter hammer,1.0000000000)", - "-U(inventory,products,106,hammer,16oz carpenter's hammer,1.0000000000)"); + "-U(inventory,products,106,hammer,16oz carpenter's hammer,1.0000000000)", + "+U(inventory,products,106,hammer,18oz carpenter hammer,1.0000000000)"); List actual = TestValuesTableFactory.getRawResultsAsStrings("sink"); assertEqualsInAnyOrder(expected, actual); result.getJobClient().get().cancel().get(); @@ -566,8 +566,8 @@ void testAllDataTypes() throws Throwable { List expected = Arrays.asList( - "+I(1,127,255,32767,65535,8388607,16777215,2147483647,4294967295,2147483647,9223372036854775807,18446744073709551615,Hello World,abc,123.102,123.102,404.4443,123.4567,346,34567892.1,false,true,true,2020-07-17,18:00:22,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17T18:00:22,[101, 26, -17, -65, -67, 8, 57, 15, 72, -17, -65, -67, -17, -65, -67, -17, -65, -67, 54, -17, -65, -67, 62, 123, 116, 0],[4, 4, 4, 4, 4, 4, 4, 4],text,[16],[16],[16],[16],2021,red,[a, b],{\"key1\":\"value1\"})", - "+U(1,127,255,32767,65535,8388607,16777215,2147483647,4294967295,2147483647,9223372036854775807,18446744073709551615,Hello World,abc,123.102,123.102,404.4443,123.4567,346,34567892.1,false,true,true,2020-07-17,18:00:22,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17T18:33:22,[101, 26, -17, -65, -67, 8, 57, 15, 72, -17, -65, -67, -17, -65, -67, -17, -65, -67, 54, -17, -65, -67, 62, 123, 116, 0],[4, 4, 4, 4, 4, 4, 4, 4],text,[16],[16],[16],[16],2021,red,[a, b],{\"key1\":\"value1\"})"); + "+I(1,127,255,32767,65535,8388607,16777215,2147483647,4294967295,2147483647,9223372036854775807,18446744073709551615,Hello World,abc,123.102,123.102,404.4443,123.4567,346,34567892.1,false,true,true,2020-07-17,18:00:22,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17T18:00:22,[101, 26, -19, 8, 57, 15, 72, -109, -78, -15, 54, -110, 62, 123, 116, 0],[4, 4, 4, 4, 4, 4, 4, 4],text,[16],[16],[16],[16],2021,red,[a, b],{\"key1\": \"value1\"})", + "+U(1,127,255,32767,65535,8388607,16777215,2147483647,4294967295,2147483647,9223372036854775807,18446744073709551615,Hello World,abc,123.102,123.102,404.4443,123.4567,346,34567892.1,false,true,true,2020-07-17,18:00:22,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17T18:33:22,[101, 26, -19, 8, 57, 15, 72, -109, -78, -15, 54, -110, 62, 123, 116, 0],[4, 4, 4, 4, 4, 4, 4, 4],text,[16],[16],[16],[16],2021,red,[a, b],{\"key1\":\"value1\"})"); List actual = TestValuesTableFactory.getRawResultsAsStrings("sink"); assertEqualsInAnyOrder(expected, actual); @@ -610,7 +610,8 @@ void testTiDBServerTimezone(String timezone) throws Exception { + " 'password' = '%s'," + " 'username' = '%s'," + " 'database-name' = '%s'," - + " 'table-name' = '%s'" + + " 'table-name' = '%s'," + + " 'server-time-zone' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), TIDB.getHost(), @@ -618,7 +619,8 @@ void testTiDBServerTimezone(String timezone) throws Exception { TIDB_PASSWORD, TIDB_USER, "column_type_test", - "full_types"); + "full_types", + timezone); String sinkDDL = "CREATE TABLE sink (" diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java index 7a59d69584a..01f415a62bb 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java @@ -98,7 +98,6 @@ public void testCommonProperties() { // validation for source DynamicTableSource actualSource = createTableSource(properties); - System.out.println(actualSource.asSummaryString()); TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, @@ -150,13 +149,6 @@ public void testOptionalProperties() { dbzProperties.put("test", "test"); DynamicTableSource actualSource = createTableSource(properties); - Map options = new HashMap<>(); - // options.put("tikv.grpc.timeout_in_ms", "20000"); - // options.put("tikv.grpc.scan_timeout_in_ms", "20000"); - // options.put("tikv.batch_get_concurrency", "4"); - // options.put("tikv.batch_put_concurrency", "4"); - // options.put("tikv.batch_scan_concurrency", "4"); - // options.put("tikv.batch_delete_concurrency", "4"); TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java index 8051034edf0..56bde64a274 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java @@ -49,13 +49,13 @@ public void uriHostMappingEmpty() { @Test public void uriHostMappingError() { - try { - final TiConfiguration tiConf = - TiDBSourceOptions.getTiConfiguration( - "http://0.0.0.0:2347", "host1=1;host2=2;host3=3", new HashMap<>()); - } catch (IllegalArgumentException e) { - Assertions.assertThat(e.getMessage()) - .isEqualTo("Invalid host mapping string: host1=1;host2=2;host3=3"); - } + Assertions.assertThatThrownBy( + () -> + TiDBSourceOptions.getTiConfiguration( + "http://0.0.0.0:2347", + "host1=1;host2=2;host3=3", + new HashMap<>())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid host mapping string: host1=1;host2=2;host3=3"); } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-sql-connector-tidb-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-sql-connector-tidb-cdc/pom.xml index 8a2fb60d0ec..c151e6ad4d2 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-sql-connector-tidb-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-sql-connector-tidb-cdc/pom.xml @@ -52,16 +52,44 @@ limitations under the License. false + io.debezium:debezium-api + io.debezium:debezium-embedded + io.debezium:debezium-core + io.debezium:debezium-ddl-parser + io.debezium:debezium-connector-mysql + org.apache.flink:flink-cdc-common + org.apache.flink:flink-cdc-runtime + org.apache.flink:flink-cdc-base org.apache.flink:flink-connector-debezium org.apache.flink:flink-connector-tidb-cdc - org.tikv:tikv-client-java org.apache.flink:flink-cdc-flink*-compat + org.tikv:tikv-client-java + org.antlr:antlr4-runtime + org.apache.kafka:* + com.zendesk:mysql-binlog-connector-java + com.github.luben:zstd-jni + com.fasterxml.*:* + com.google.guava:* com.google.protobuf:* + com.esri.geometry:esri-geometry-api + com.zaxxer:HikariCP io.grpc:* + commons-codec:commons-codec org.apache.flink:flink-shaded-guava + + + org.apache.kafka:* + + kafka/kafka-version.properties + LICENSE + NOTICE + common/** + + + org.apache.kafka @@ -69,12 +97,36 @@ limitations under the License. org.apache.flink.cdc.connectors.shaded.org.apache.kafka + + org.antlr + + org.apache.flink.cdc.connectors.shaded.org.antlr + + + + com.fasterxml + + org.apache.flink.cdc.connectors.shaded.com.fasterxml + + com.google org.apache.flink.cdc.connectors.shaded.com.google + + com.esri.geometry + + org.apache.flink.cdc.connectors.shaded.com.esri.geometry + + + + com.zaxxer + + org.apache.flink.cdc.connectors.shaded.com.zaxxer + + io.grpc @@ -88,4 +140,4 @@ limitations under the License. - \ No newline at end of file + diff --git a/flink-cdc-e2e-tests/flink-cdc-e2e-utils/src/test/java/org/apache/flink/cdc/common/test/utils/JdbcProxy.java b/flink-cdc-e2e-tests/flink-cdc-e2e-utils/src/test/java/org/apache/flink/cdc/common/test/utils/JdbcProxy.java index 82a8723c82f..f07cec7d2b7 100644 --- a/flink-cdc-e2e-tests/flink-cdc-e2e-utils/src/test/java/org/apache/flink/cdc/common/test/utils/JdbcProxy.java +++ b/flink-cdc-e2e-tests/flink-cdc-e2e-utils/src/test/java/org/apache/flink/cdc/common/test/utils/JdbcProxy.java @@ -65,9 +65,10 @@ private void checkResult(List expectedResult, String table, String[] fie results.add(StringUtils.join(result, ",")); } Collections.sort(results); - Collections.sort(expectedResult); + List sortedExpectedResult = new ArrayList<>(expectedResult); + Collections.sort(sortedExpectedResult); // make it easier to check the result - Assertions.assertThat(expectedResult.toArray()).isEqualTo(results.toArray()); + Assertions.assertThat(results).containsExactlyElementsOf(sortedExpectedResult); } } diff --git a/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/TiDBE2eITCase.java b/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/TiDBE2eITCase.java index e76dfbdf918..3a974436137 100644 --- a/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/TiDBE2eITCase.java +++ b/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/TiDBE2eITCase.java @@ -157,6 +157,10 @@ void testTIDBCDC() throws Exception { " 'connector' = 'tidb-cdc',", " 'tikv.grpc.timeout_in_ms' = '20000',", " 'pd-addresses' = '" + PD_SERVICE_NAME + ":" + PD_PORT + "',", + " 'hostname' = '" + TIDB_SERVICE_NAME + "',", + " 'port' = '" + TIDB_PORT + "',", + " 'username' = '" + TIDB_USER + "',", + " 'password' = '" + TIDB_PASSWORD + "',", " 'database-name' = 'inventory',", " 'table-name' = 'products'", ");", diff --git a/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/utils/FlinkContainerTestEnvironment.java b/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/utils/FlinkContainerTestEnvironment.java index 96173989c38..69cc3a75029 100644 --- a/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/utils/FlinkContainerTestEnvironment.java +++ b/flink-cdc-e2e-tests/flink-cdc-source-e2e-tests/src/test/java/org/apache/flink/cdc/connectors/tests/utils/FlinkContainerTestEnvironment.java @@ -275,18 +275,25 @@ public RestClusterClient getRestClusterClient() { public void waitUntilJobRunning(Duration timeout) { RestClusterClient clusterClient = getRestClusterClient(); Deadline deadline = Deadline.fromNow(timeout); + String lastObservedJobs = "none"; + Exception lastStatusFetchError = null; while (deadline.hasTimeLeft()) { Collection jobStatusMessages; try { jobStatusMessages = clusterClient.listJobs().get(10, TimeUnit.SECONDS); + lastStatusFetchError = null; } catch (Exception e) { + lastStatusFetchError = e; LOG.warn("Error when fetching job status.", e); + pauseBeforeNextJobStatusCheck(); continue; } if (jobStatusMessages != null && !jobStatusMessages.isEmpty()) { + lastObservedJobs = formatJobStatuses(jobStatusMessages); JobStatusMessage message = jobStatusMessages.iterator().next(); JobStatus jobStatus = message.getJobState(); if (jobStatus.isTerminalState()) { + logFlinkContainerLogs(); throw new ValidationException( String.format( "Job has been terminated! JobName: %s, JobID: %s, Status: %s", @@ -297,6 +304,57 @@ public void waitUntilJobRunning(Duration timeout) { return; } } + pauseBeforeNextJobStatusCheck(); + } + + logFlinkContainerLogs(); + String message = + String.format( + "Timed out after %s waiting for a Flink job to reach RUNNING state. " + + "Last observed jobs: %s", + timeout, lastObservedJobs); + if (lastStatusFetchError != null) { + throw new ValidationException(message, lastStatusFetchError); + } + throw new ValidationException(message); + } + + private static String formatJobStatuses(Collection jobStatusMessages) { + return jobStatusMessages.stream() + .map( + message -> + String.format( + "JobName=%s, JobID=%s, Status=%s", + message.getJobName(), + message.getJobId(), + message.getJobState())) + .collect(Collectors.joining("; ")); + } + + private static void pauseBeforeNextJobStatusCheck() { + try { + Thread.sleep(200L); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ValidationException("Interrupted while waiting for Flink job status.", e); + } + } + + private void logFlinkContainerLogs() { + logContainerLogs("JobManager", jobManager); + logContainerLogs("TaskManager", taskManager); + } + + private static void logContainerLogs( + String containerName, @Nullable GenericContainer container) { + if (container == null) { + LOG.error("{} container was not created.", containerName); + return; + } + try { + LOG.error("{} logs before test failure:\n{}", containerName, container.getLogs()); + } catch (RuntimeException e) { + LOG.error("Unable to collect {} logs.", containerName, e); } } From ac10cc1b32b6e8a34858552e9b9735a627cea3d1 Mon Sep 17 00:00:00 2001 From: ouyangwulin Date: Fri, 3 Jul 2026 15:29:34 +0800 Subject: [PATCH 5/5] add timeout spotless:apply fixed testStreamSplitReader error fixed streamsplit error --- .../reader/TiDBStreamSplitReaderTest.java | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java index a689acddd7a..7d8d212d430 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java @@ -45,8 +45,8 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.tikv.common.TiConfiguration; -import java.time.Instant; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -61,9 +61,7 @@ public class TiDBStreamSplitReaderTest extends TiDBTestBase { private static final String tableName = "customers"; private static final String STREAM_SPLIT_ID = "stream-split"; - private static final int USE_POST_LOWWATERMARK_HOOK = 1; - private static final int USE_PRE_HIGHWATERMARK_HOOK = 2; - private static final int MAX_RETRY_TIMES = 100; + private static final int MAX_RETRY_TIMES = 600; private TiDBSourceConfig sourceConfig; private TiDBDialect tiDBDialect; @@ -73,12 +71,19 @@ public class TiDBStreamSplitReaderTest extends TiDBTestBase { public void before() { initializeTidbTable("customer"); TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); - String pdAddress = PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN); + String pdHost = PD.getHost(); + String tikvHost = TIKV.getHost(); + String pdAddress = pdHost + ":" + PD.getMappedPort(PD_PORT_ORIGIN); + + String hostMapping = "pd0:" + pdHost + ";tikv0:" + tikvHost; + + TiConfiguration tiConfiguration = + TiDBSourceOptions.getTiConfiguration( + pdAddress, hostMapping, Collections.emptyMap()); tiDBSourceConfigFactory .pdAddresses(pdAddress) - .tiConfiguration( - TiDBSourceOptions.getTiConfiguration( - pdAddress, "", Collections.emptyMap())); + .hostMapping(hostMapping) + .tiConfiguration(tiConfiguration); tiDBSourceConfigFactory.hostname(TIDB.getHost()); tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); tiDBSourceConfigFactory.username(TiDBTestBase.TIDB_USER); @@ -89,7 +94,7 @@ public void before() { tiDBSourceConfigFactory.skipSnapshotBackfill(true); tiDBSourceConfigFactory.scanNewlyAddedTableEnabled(true); this.sourceConfig = tiDBSourceConfigFactory.create(0); - this.tiDBDialect = new TiDBDialect(tiDBSourceConfigFactory.create(0)); + this.tiDBDialect = new TiDBDialect(sourceConfig); this.cdcEventOffsetFactory = new EventOffsetFactory(); } @@ -106,7 +111,7 @@ public void testStreamSplitReader() throws Exception { incrementalSourceReaderContext, SnapshotPhaseHooks.empty()); try { - EventOffset startOffset = new EventOffset(Instant.now().toEpochMilli()); + EventOffset startOffset = (EventOffset) tiDBDialect.displayCurrentOffset(sourceConfig); String[] insertDataSql = new String[] { "INSERT INTO " @@ -127,8 +132,8 @@ public void testStreamSplitReader() throws Exception { new FinishedSnapshotSplitInfo( tableIds, STREAM_SPLIT_ID, - new Object[] {startOffset}, - new Object[] {EventOffset.NO_STOPPING_OFFSET}, + null, + null, startOffset, cdcEventOffsetFactory); StreamSplit streamSplit = @@ -138,7 +143,9 @@ public void testStreamSplitReader() throws Exception { cdcEventOffsetFactory.createNoStoppingOffset(), Collections.singletonList(finishedSnapshotSplitInfo), tableSchemas, - 0); + 1, + false, + true); Assertions.assertThat(streamSplitReader.canAssignNextSplit()).isTrue(); streamSplitReader.handleSplitsChanges(new SplitsAddition<>(singletonList(streamSplit))); int retry = 0;