diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java index 3543fc22a1c..a041589285f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java @@ -53,6 +53,7 @@ public static void register(BuiltinFunctionRepository repository) { repository.register(greater()); repository.register(gte()); repository.register(like()); + repository.register(ilike()); repository.register(notLike()); repository.register(regexp()); } @@ -391,6 +392,12 @@ private static DefaultFunctionResolver like() { impl(nullMissingHandling(OperatorUtils::matches3), BOOLEAN, STRING, STRING, BOOLEAN)); } + private static DefaultFunctionResolver ilike() { + return define( + BuiltinFunctionName.ILIKE.getName(), + impl(nullMissingHandling(OperatorUtils::matches2), BOOLEAN, STRING, STRING)); + } + private static DefaultFunctionResolver regexp() { return define( BuiltinFunctionName.REGEXP.getName(), diff --git a/docs/user/ppl/functions/condition.md b/docs/user/ppl/functions/condition.md index 512b5edbbe6..cb5dff9107e 100644 --- a/docs/user/ppl/functions/condition.md +++ b/docs/user/ppl/functions/condition.md @@ -758,6 +758,71 @@ fetched rows / total rows = 1/1 +-----+ ``` +## CONTAINS + +### Description + +Usage: `field contains 'substring'` returns TRUE if the field value contains the given substring (case-insensitive), FALSE otherwise. + +The `contains` operator is a CloudWatch-style comparison operator that performs case-insensitive substring matching. It is sugar for an `ilike` comparison with `%substring%` wildcards. + +Syntax: ` contains ''` + +- The left-hand side must be a field reference. +- The right-hand side must be a string literal. Using a field reference on the right-hand side will raise a semantic error. +- Matching is case-insensitive. + +**Argument type:** `STRING` +**Return type:** `BOOLEAN` + +### Example + +Basic substring filter: + +```ppl +source=accounts +| where firstname contains 'mbe' +| fields firstname, age +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Amber | 32 | ++-----------+-----+ +``` + +Case-insensitive matching (all of the following are equivalent): + +```ppl ignore +source=accounts | where firstname contains 'mbe' +source=accounts | where firstname CONTAINS 'MBE' +source=accounts | where firstname Contains 'Mbe' +``` + +Combining with other conditions: + +```ppl +source=accounts +| where employer contains 'ami' AND age > 30 +| fields firstname, employer, age +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+----------+-----+ +| firstname | employer | age | +|-----------+----------+-----| +| Amber | Pyrami | 32 | ++-----------+----------+-----+ +``` + ## REGEXP_MATCH ### Description diff --git a/docs/user/ppl/functions/index.md b/docs/user/ppl/functions/index.md index 146288e19dc..cdfbbd201ce 100644 --- a/docs/user/ppl/functions/index.md +++ b/docs/user/ppl/functions/index.md @@ -57,6 +57,7 @@ PPL supports a wide range of built-in functions for data processing and analysis - [EARLIEST](condition.md/#earliest) - [LATEST](condition.md/#latest) - [REGEXP_MATCH](condition.md/#regexp_match) + - [CONTAINS](condition.md/#contains) - [Type Conversion Functions](conversion.md) - [CAST](conversion.md/#cast) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index 224ebd1e1f6..a386987e532 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -144,6 +144,34 @@ public void testLikeOperatorCaseInsensitive() throws IOException { verifyDataRows(result3, rows("Amber")); } + @Test + public void testContainsOperator() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where firstname contains 'mbe' | fields firstname", + TEST_INDEX_ACCOUNT)); + verifyDataRows(result, rows("Amber"), rows("Chambers")); + + result = + executeQuery( + String.format( + "source=%s | where firstname contains 'zzz' | fields firstname", + TEST_INDEX_ACCOUNT)); + assertEquals(0, result.getInt("total")); + } + + @Test + public void testContainsOperatorCaseInsensitive() throws IOException { + // contains uses ilike semantics - case insensitive + JSONObject result = + executeQuery( + String.format( + "source=%s | where firstname contains 'MBE' | fields firstname", + TEST_INDEX_ACCOUNT)); + verifyDataRows(result, rows("Amber"), rows("Chambers")); + } + @Test public void testIsNullFunction() throws IOException { JSONObject result = diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 index b7dc4b7286d..2248374d8d9 100644 --- a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -413,6 +413,7 @@ REDUCE: 'REDUCE'; // BOOL FUNCTIONS LIKE: 'LIKE'; +CONTAINS: 'CONTAINS'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; BETWEEN: 'BETWEEN'; diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 index cae57b53181..0b8203b7ec2 100644 --- a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 @@ -945,6 +945,7 @@ geoIpProperty | GREATER | NOT_GREATER | REGEXP + | CONTAINS ; singleFieldRelevanceFunctionName diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index b7d097b4b88..6ca25b7e9b7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -68,6 +68,7 @@ public ScriptQueryUnSupportedException(String message) { .put(BuiltinFunctionName.LTE.getName(), new RangeQuery(Comparison.LTE)) .put(BuiltinFunctionName.GTE.getName(), new RangeQuery(Comparison.GTE)) .put(BuiltinFunctionName.LIKE.getName(), new LikeQuery()) + .put(BuiltinFunctionName.ILIKE.getName(), new LikeQuery()) .put(BuiltinFunctionName.MATCH.getName(), new MatchQuery()) .put(BuiltinFunctionName.MATCH_PHRASE.getName(), new MatchPhraseQuery()) .put(BuiltinFunctionName.MATCHPHRASE.getName(), new MatchPhraseQuery()) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 732d3944a68..bbb57729303 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -459,6 +459,7 @@ CAST: 'CAST'; // BOOL FUNCTIONS LIKE: 'LIKE'; ILIKE: 'ILIKE'; +CONTAINS: 'CONTAINS'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; CIDRMATCH: 'CIDRMATCH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 53cb4eda36c..09beb7f9ad9 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1444,6 +1444,7 @@ positionFunctionName | REGEXP | LIKE | ILIKE + | CONTAINS ; singleFieldRelevanceFunctionName @@ -1609,6 +1610,7 @@ searchableKeyWord | ELSE | ARROW | BETWEEN + | CONTAINS | EXISTS | SOURCE | INDEX diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 471c0c2f1c9..c58eca20575 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -213,6 +213,18 @@ public UnresolvedExpression visitCompareExpr(CompareExprContext ctx) { String operator = ctx.comparisonOperator().getText(); if ("==".equals(operator)) { operator = EQUAL.getName().getFunctionName(); + } else if ("contains".equalsIgnoreCase(operator)) { + UnresolvedExpression left = visit(ctx.left); + UnresolvedExpression right = visit(ctx.right); + if (!(right instanceof Literal) || ((Literal) right).getType() != DataType.STRING) { + throw new SemanticCheckException( + "The right-hand side of 'contains' must be a string literal"); + } + String raw = ((Literal) right).getValue().toString(); + String escaped = raw.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_"); + String wrapped = "%" + escaped + "%"; + return new Compare( + ILIKE.getName().getFunctionName(), left, new Literal(wrapped, DataType.STRING)); } else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator) && UnresolvedPlanHelper.isCalciteEnabled(astBuilder.getSettings())) { operator = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index b316e461889..af10b53defb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -66,6 +66,7 @@ import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.exception.SemanticCheckException; public class AstExpressionBuilderTest extends AstBuilderTest { @Test @@ -227,6 +228,59 @@ public void testLikeOperatorCaseInsensitive() { filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern")))); } + @Test + public void testContainsOperatorExpr() { + assertEqual( + "source=t | where a contains 'hello'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%")))); + + assertEqual( + "source=t | where message contains 'err'", + filter(relation("t"), compare("ilike", field("message"), stringLiteral("%err%")))); + } + + @Test + public void testContainsOperatorCaseInsensitive() { + assertEqual( + "source=t | where a CONTAINS 'hello'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%")))); + + assertEqual( + "source=t | where a Contains 'hello'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%")))); + } + + @Test + public void testContainsOperatorNonLiteralRhsThrows() { + assertThrows( + SemanticCheckException.class, + () -> assertEqual("source=t | where a contains b", (Node) null)); + } + + @Test + public void testContainsOperatorEscapesSpecialChars() { + // % must be escaped so it is treated as a literal character, not a wildcard + assertEqual( + "source=t | where a contains '%'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\%%")))); + + // _ must be escaped so it is treated as a literal character, not a single-char wildcard + assertEqual( + "source=t | where a contains '_'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\_%")))); + + // backslash in PPL is written as '\\'; unquotes to \, then escaped to \\ in the pattern + // Java: "source=t | where a contains '\\\\'" produces PPL: source=t | where a contains '\\' + assertEqual( + "source=t | where a contains '\\\\'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\\\%")))); + + // mixed special characters are all escaped + assertEqual( + "source=t | where a contains 'foo%bar_baz'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%foo\\%bar\\_baz%")))); + } + @Test public void testBooleanIsNullFunction() { assertEqual(