Skip to content

Commit a169f63

Browse files
nj1973helensilva14
andauthored
fix: Row validations fail for values with trailing newline (#1415)
* test: Add tests for data with special characters such as new line * chore: dos2unix conversion on Spanner DDL file * test: Add tests for data with special characters such as new line * fix: Align RStrip behaviour across SQL engines * test: Add tests for data with special characters such as new line * test: Add tests for data with special characters such as new line * Update tests/resources/snowflake_test_tables.sql Co-authored-by: Helen Cristina <[email protected]> * Update tests/resources/sqlserver_test_tables.sql Co-authored-by: Helen Cristina <[email protected]> --------- Co-authored-by: Helen Cristina <[email protected]>
1 parent a752cf5 commit a169f63

16 files changed

+717
-543
lines changed

tests/resources/bigquery_test_tables.sql

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,18 @@ CREATE TABLE `pso_data_validator`.`dvt_tricky_dates` (
655655
, col_dt_high DATE
656656
, col_ts_low DATETIME
657657
, col_ts_epoch DATETIME
658-
, col_ts_high DATETIME);
658+
, col_ts_high DATETIME
659+
) OPTIONS (description='Integration test table used to test potentially difficult Timestamps.');
659660
INSERT INTO `pso_data_validator`.`dvt_tricky_dates` VALUES
660661
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
661662
,DATETIME'1000-01-01 00:00:00',DATETIME'1970-01-01 00:00:00',DATETIME'9999-12-31 23:59:59');
663+
664+
CREATE OR REPLACE TABLE `pso_data_validator`.`dvt_tricky_strings` (
665+
id INT64
666+
, col_string STRING
667+
, col_comment STRING
668+
) OPTIONS (description='Integration test table used to test potentially difficult Strings.');
669+
INSERT INTO `pso_data_validator`.`dvt_tricky_strings` VALUES
670+
(1,'str\nstr','Contains: new line'), (2,'str\n','Trailing: new line'),
671+
(3,'str\rstr','Contains: carriage return'), (4,'str\r','Trailing: carriage return'),
672+
(5,'str\tstr','Contains: tab'), (6,'str\t','Trailing: tab');

tests/resources/mysql_test_tables.sql

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,15 +554,16 @@ CREATE TABLE `pso_data_validator`.`dvt_many_cols`
554554
) COMMENT 'Integration test table used to test validating many columns.';
555555
INSERT INTO `pso_data_validator`.`dvt_many_cols` (id) values (1);
556556

557-
DROP TABLE `pso_data_validator`.`dvt_tricky_dates`;
557+
DROP TABLE IF EXISTS `pso_data_validator`.`dvt_tricky_dates`;
558558
CREATE TABLE `pso_data_validator`.`dvt_tricky_dates` (
559559
id integer NOT NULL PRIMARY KEY
560560
, col_dt_low date
561561
, col_dt_epoch date
562562
, col_dt_high date
563563
, col_ts_low datetime(0)
564564
, col_ts_epoch datetime(0)
565-
, col_ts_high datetime(0));
565+
, col_ts_high datetime(0)
566+
) COMMENT='Integration test table used to test potentially difficult Timestamps.';
566567
SET time_zone = '+00:00';
567568
INSERT INTO `pso_data_validator`.`dvt_tricky_dates` VALUES
568569
(1,'1000-01-01','1970-01-01','9999-12-31'

tests/resources/oracle_test_tables.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,22 @@ CREATE TABLE pso_data_validator.dvt_tricky_dates (
764764
, col_ts_low TIMESTAMP(0)
765765
, col_ts_epoch TIMESTAMP(0)
766766
, col_ts_high TIMESTAMP(0));
767+
COMMENT ON TABLE pso_data_validator.dvt_tricky_dates IS 'Integration test table used to test potentially difficult Timestamps.';
767768
INSERT INTO pso_data_validator.dvt_tricky_dates VALUES
768769
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
769770
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59');
770771
COMMIT;
772+
773+
DROP TABLE pso_data_validator.dvt_tricky_strings;
774+
CREATE TABLE pso_data_validator.dvt_tricky_strings (
775+
id NUMBER(5) NOT NULL PRIMARY KEY
776+
, col_string VARCHAR2(20)
777+
, col_comment VARCHAR2(40));
778+
COMMENT ON TABLE pso_data_validator.dvt_tricky_strings IS 'Integration test table used to test potentially difficult Strings.';
779+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (1,'str'||CHR(10)||'str','Contains: new line');
780+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (2,'str'||CHR(10),'Trailing: new line');
781+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (3,'str'||CHR(13)||'str','Contains: carriage return');
782+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (4,'str'||CHR(13),'Trailing: carriage return');
783+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (5,'str'||CHR(9)||'str','Contains: tab');
784+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (6,'str'||CHR(9),'Trailing: tab');
785+
COMMIT;

tests/resources/postgresql_test_tables.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,19 @@ CREATE TABLE pso_data_validator.dvt_tricky_dates (
846846
, col_ts_low timestamp(0)
847847
, col_ts_epoch timestamp(0)
848848
, col_ts_high timestamp(0));
849+
COMMENT ON TABLE pso_data_validator.dvt_tricky_dates IS 'Integration test table used to test potentially difficult Timestamps.';
849850
INSERT INTO pso_data_validator.dvt_tricky_dates VALUES
850851
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
851852
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59');
852853

854+
DROP TABLE IF EXISTS pso_data_validator.dvt_tricky_strings;
855+
CREATE TABLE pso_data_validator.dvt_tricky_strings (
856+
id integer NOT NULL PRIMARY KEY
857+
, col_string varchar(20)
858+
, col_comment varchar(40));
859+
COMMENT ON TABLE pso_data_validator.dvt_tricky_strings IS 'Integration test table used to test potentially difficult Strings.';
860+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES
861+
(1,E'str\nstr','Contains: new line'), (2,E'str\n','Trailing: new line'),
862+
(3,E'str\rstr','Contains: carriage return'), (4,E'str\r','Trailing: carriage return'),
863+
(5,E'str\tstr','Contains: tab'), (6,E'str\t','Trailing: tab');
864+

tests/resources/snowflake_test_tables.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,21 @@ CREATE TABLE PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES (
627627
, col_ts_low TIMESTAMP(0)
628628
, col_ts_epoch TIMESTAMP(0)
629629
, col_ts_high TIMESTAMP(0));
630+
COMMENT ON TABLE PSO_DATA_VALIDATOR.PUBLIC.dvt_tricky_dates IS 'Integration test table used to test potentially difficult Timestamps.';
630631
INSERT INTO PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES VALUES
631632
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
632633
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59');
634+
635+
DROP TABLE IF EXISTS PSO_DATA_VALIDATOR.PUBLIC.dvt_tricky_strings;
636+
CREATE TABLE PSO_DATA_VALIDATOR.PUBLIC.dvt_tricky_strings (
637+
id NUMBER(5) NOT NULL PRIMARY KEY
638+
, col_string VARCHAR(20)
639+
, col_comment VARCHAR(40));
640+
COMMENT ON TABLE PSO_DATA_VALIDATOR.PUBLIC.dvt_tricky_strings IS 'Integration test table used to test potentially difficult Strings.';
641+
INSERT INTO PSO_DATA_VALIDATOR.PUBLIC.dvt_tricky_strings
642+
SELECT 1,'str'||CHR(10)||'str','Contains: new line' UNION ALL
643+
SELECT 2,'str'||CHR(10),'Trailing: new line' UNION ALL
644+
SELECT 3,'str'||CHR(13)||'str','Contains: carriage return' UNION ALL
645+
SELECT 4,'str'||CHR(13),'Trailing: carriage return' UNION ALL
646+
SELECT 5,'str'||CHR(9)||'str','Contains: tab' UNION ALL
647+
SELECT 6,'str'||CHR(9),'Trailing: tab';

tests/resources/sqlserver_test_tables.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,15 @@ CREATE TABLE pso_data_validator.dvt_tricky_dates (
588588
INSERT INTO pso_data_validator.dvt_tricky_dates VALUES
589589
(1,'1000-01-01','1970-01-01','9999-12-31'
590590
,'1000-01-01 00:00:00','1970-01-01 00:00:00','9999-12-31 23:59:59');
591+
592+
DROP TABLE IF EXISTS pso_data_validator.dvt_tricky_strings;
593+
CREATE TABLE pso_data_validator.dvt_tricky_strings (
594+
id integer NOT NULL PRIMARY KEY
595+
, col_string varchar(20)
596+
, col_comment varchar(40));
597+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (1,'str'+CHAR(10)+'str','Contains: new line');
598+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (2,'str'+CHAR(10),'Trailing: new line');
599+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (3,'str'+CHAR(13)+'str','Contains: carriage return');
600+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (4,'str'+CHAR(13),'Trailing: carriage return');
601+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (5,'str'+CHAR(9)+'str','Contains: tab');
602+
INSERT INTO pso_data_validator.dvt_tricky_strings VALUES (6,'str'+CHAR(9),'Trailing: tab');

tests/resources/teradata_test_tables.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,3 +695,16 @@ INSERT INTO udf.dvt_tricky_dates VALUES
695695
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59+00:00');
696696
-- col_ts_high value above forced to UTC based on article below, but we still get wrong answer from the test:
697697
-- https://support.teradata.com/knowledge?id=kb_article_view&sys_kb_id=0e81918ac36da9103eb2d88f05013138
698+
699+
DROP TABLE udf.dvt_tricky_strings;
700+
CREATE TABLE udf.dvt_tricky_strings (
701+
id INTEGER NOT NULL PRIMARY KEY
702+
, col_string VARCHAR(20)
703+
, col_comment VARCHAR(40));
704+
COMMENT ON TABLE udf.dvt_tricky_strings IS 'Integration test table used to test potentially difficult Strings.';
705+
INSERT INTO udf.dvt_tricky_strings VALUES (1,'str'||CHR(10)||'str','Contains: new line');
706+
INSERT INTO udf.dvt_tricky_strings VALUES (2,'str'||CHR(10),'Trailing: new line');
707+
INSERT INTO udf.dvt_tricky_strings VALUES (3,'str'||CHR(13)||'str','Contains: carriage return');
708+
INSERT INTO udf.dvt_tricky_strings VALUES (4,'str'||CHR(13),'Trailing: carriage return');
709+
INSERT INTO udf.dvt_tricky_strings VALUES (5,'str'||CHR(9)||'str','Contains: tab');
710+
INSERT INTO udf.dvt_tricky_strings VALUES (6,'str'||CHR(9),'Trailing: tab');

tests/system/data_sources/test_oracle.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,19 @@ def test_row_validation_tricky_dates_to_bigquery():
943943
)
944944

945945

946+
@mock.patch(
947+
"data_validation.state_manager.StateManager.get_connection_config",
948+
new=mock_get_connection_config,
949+
)
950+
def test_row_validation_tricky_strings_to_bigquery():
951+
"""Test with string values containing special characters."""
952+
row_validation_test(
953+
tables="pso_data_validator.dvt_tricky_strings",
954+
tc="bq-conn",
955+
hash="*",
956+
)
957+
958+
946959
@mock.patch(
947960
"data_validation.state_manager.StateManager.get_connection_config",
948961
new=mock_get_connection_config,

tests/system/data_sources/test_postgres.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,19 @@ def test_row_validation_tricky_dates_to_bigquery():
10571057
)
10581058

10591059

1060+
@mock.patch(
1061+
"data_validation.state_manager.StateManager.get_connection_config",
1062+
new=mock_get_connection_config,
1063+
)
1064+
def test_row_validation_tricky_strings_to_bigquery():
1065+
"""Test with string values containing special characters."""
1066+
row_validation_test(
1067+
tables="pso_data_validator.dvt_tricky_strings",
1068+
tc="bq-conn",
1069+
hash="*",
1070+
)
1071+
1072+
10601073
@mock.patch(
10611074
"data_validation.state_manager.StateManager.get_connection_config",
10621075
new=mock_get_connection_config,

tests/system/data_sources/test_snowflake.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,19 @@ def test_row_validation_tricky_dates_to_bigquery():
464464
)
465465

466466

467+
@mock.patch(
468+
"data_validation.state_manager.StateManager.get_connection_config",
469+
new=mock_get_connection_config,
470+
)
471+
def test_row_validation_tricky_strings_to_bigquery():
472+
"""Test with string values containing special characters."""
473+
row_validation_test(
474+
tables="PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_STRINGS=pso_data_validator.dvt_tricky_strings",
475+
tc="bq-conn",
476+
hash="*",
477+
)
478+
479+
467480
@mock.patch(
468481
"data_validation.state_manager.StateManager.get_connection_config",
469482
new=mock_get_connection_config,

tests/system/data_sources/test_spanner.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,19 @@ def test_row_validation_many_columns():
355355
row_validation_many_columns_test()
356356

357357

358+
@mock.patch(
359+
"data_validation.state_manager.StateManager.get_connection_config",
360+
new=mock_get_connection_config,
361+
)
362+
def test_row_validation_tricky_strings_to_bigquery():
363+
"""Test with string values containing special characters."""
364+
row_validation_test(
365+
tables="pso_data_validator.dvt_tricky_strings",
366+
tc="bq-conn",
367+
hash="*",
368+
)
369+
370+
358371
@mock.patch(
359372
"data_validation.state_manager.StateManager.get_connection_config",
360373
new=mock_get_connection_config,

tests/system/data_sources/test_sql_server.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,22 @@ def test_row_validation_tricky_dates_to_bigquery():
520520
)
521521

522522

523+
@mock.patch(
524+
"data_validation.state_manager.StateManager.get_connection_config",
525+
new=mock_get_connection_config,
526+
)
527+
def test_row_validation_tricky_strings_to_bigquery():
528+
"""Test with string values containing special characters."""
529+
pytest.skip(
530+
"Skipping test_row_validation_tricky_dates_to_bigquery because the version of SQL Server we have does not support rtrim of all whitespace."
531+
)
532+
row_validation_test(
533+
tables="pso_data_validator.dvt_tricky_strings",
534+
tc="bq-conn",
535+
hash="*",
536+
)
537+
538+
523539
@mock.patch(
524540
"data_validation.state_manager.StateManager.get_connection_config",
525541
new=mock_get_connection_config,

tests/system/data_sources/test_teradata.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,19 @@ def test_row_validation_tricky_dates_to_bigquery():
803803
)
804804

805805

806+
@mock.patch(
807+
"data_validation.state_manager.StateManager.get_connection_config",
808+
new=mock_get_connection_config,
809+
)
810+
def test_row_validation_tricky_strings_to_bigquery():
811+
"""Test with string values containing special characters."""
812+
row_validation_test(
813+
tables="udf.dvt_tricky_strings=pso_data_validator.dvt_tricky_strings",
814+
tc="bq-conn",
815+
hash="*",
816+
)
817+
818+
806819
@mock.patch(
807820
"data_validation.state_manager.StateManager.get_connection_config",
808821
new=mock_get_connection_config,

0 commit comments

Comments
 (0)