elastic · bpintea · May 23, 2025 · May 23, 2025 · May 23, 2025 · May 26, 2025
diff --git a/docs/changelog/128393.yaml b/docs/changelog/128393.yaml
@@ -0,0 +1,6 @@
+pr: 128393
+summary: Pushdown constructs doing case-insensitive regexes
+area: ES|QL
+type: enhancement
+issues:
+ - 127479
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -263,6 +263,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ML_INFERENCE_HUGGING_FACE_CHAT_COMPLETION_ADDED = def(9_078_0_00);
     public static final TransportVersion NODES_STATS_SUPPORTS_MULTI_PROJECT = def(9_079_0_00);
     public static final TransportVersion ML_INFERENCE_HUGGING_FACE_RERANK_ADDED = def(9_080_0_00);
+    public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY = def(9_081_0_00);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

diff --git a/...a/org/elasticsearch/xpack/esql/core/expression/predicate/regex/AbstractStringPattern.java b/...a/org/elasticsearch/xpack/esql/core/expression/predicate/regex/AbstractStringPattern.java
@@ -16,11 +16,11 @@ public abstract class AbstractStringPattern implements StringPattern {
 
     private Automaton automaton;
 
-    public abstract Automaton createAutomaton();
+    public abstract Automaton createAutomaton(boolean ignoreCase);
 
     private Automaton automaton() {
         if (automaton == null) {
-            automaton = createAutomaton();
+            automaton = createAutomaton(false);
         }
         return automaton;
     }

diff --git a/.../main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java b/.../main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java
@@ -21,9 +21,10 @@ public RLikePattern(String regexpPattern) {
     }
 
     @Override
-    public Automaton createAutomaton() {
+    public Automaton createAutomaton(boolean ignoreCase) {
+        int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
         return Operations.determinize(
-            new RegExp(regexpPattern, RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT).toAutomaton(),
+            new RegExp(regexpPattern, RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT, matchFlags).toAutomaton(),
             Operations.DEFAULT_DETERMINIZE_WORK_LIMIT
         );
     }

diff --git a/...in/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPattern.java b/...in/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPattern.java
@@ -10,10 +10,13 @@
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.xpack.esql.core.util.StringUtils;
 
 import java.util.Objects;
 
+import static org.elasticsearch.xpack.esql.core.util.StringUtils.luceneWildcardToRegExp;
+
 /**
  * Similar to basic regex, supporting '?' wildcard for single character (same as regex  ".")
  * and '*' wildcard for multiple characters (same as regex ".*")
@@ -37,8 +40,14 @@ public String pattern() {
     }
 
     @Override
-    public Automaton createAutomaton() {
-        return WildcardQuery.toAutomaton(new Term(null, wildcard), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    public Automaton createAutomaton(boolean ignoreCase) {
+        return ignoreCase
+            ? Operations.determinize(
+                new RegExp(luceneWildcardToRegExp(wildcard), RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT, RegExp.CASE_INSENSITIVE)
+                    .toAutomaton(),
+                Operations.DEFAULT_DETERMINIZE_WORK_LIMIT
+            )
+            : WildcardQuery.toAutomaton(new Term(null, wildcard), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
     }
 
     @Override

diff --git a/...ck/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java b/...ck/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java
@@ -7,6 +7,7 @@
 package org.elasticsearch.xpack.esql.core.util;
 
 import org.apache.lucene.document.InetAddressPoint;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.spell.LevenshteinDistance;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CollectionUtil;
@@ -178,6 +179,44 @@ public static String wildcardToJavaPattern(String pattern, char escape) {
         return regex.toString();
     }
 
+    /**
+     * Translates a Lucene wildcard pattern to a Lucene RegExp one.
+     * @param wildcard Lucene wildcard pattern
+     * @return Lucene RegExp pattern
+     */
+    public static String luceneWildcardToRegExp(String wildcard) {
+        StringBuilder regex = new StringBuilder();
+
+        for (int i = 0, wcLen = wildcard.length(); i < wcLen; i++) {
+            char c = wildcard.charAt(i); // this will work chunking through Unicode as long as all values matched are ASCII
+            switch (c) {
+                case WildcardQuery.WILDCARD_STRING -> regex.append(".*");
+                case WildcardQuery.WILDCARD_CHAR -> regex.append(".");
+                case WildcardQuery.WILDCARD_ESCAPE -> {
+                    if (i + 1 < wcLen) {
+                        // consume the wildcard escaping, consider the next char
+                        char next = wildcard.charAt(i + 1);
+                        i++;
+                        switch (next) {
+                            case WildcardQuery.WILDCARD_STRING, WildcardQuery.WILDCARD_CHAR, WildcardQuery.WILDCARD_ESCAPE ->
+                                // escape `*`, `.`, `\`, since these are special chars in RegExp as well
+                                regex.append("\\");
+                            // default: unnecessary escaping -- just ignore the escaping
+                        }
+                        regex.append(next);
+                    } else {
+                        // "else fallthru, lenient parsing with a trailing \" -- according to WildcardQuery#toAutomaton
+                        regex.append("\\\\");
+                    }
+                }
+                case '$', '(', ')', '+', '.', '[', ']', '^', '{', '|', '}' -> regex.append("\\").append(c);
+                default -> regex.append(c);
+            }
+        }
+
+        return regex.toString();
+    }
+
     /**
      * Translates a like pattern to a Lucene wildcard.
      * This methods pays attention to the custom escape char which gets converted into \ (used by Lucene).

diff --git a/...ugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/StringUtilsTests.java b/...ugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/StringUtilsTests.java
@@ -9,8 +9,11 @@
 
 import org.elasticsearch.test.ESTestCase;
 
+import static org.elasticsearch.xpack.esql.core.util.StringUtils.luceneWildcardToRegExp;
 import static org.elasticsearch.xpack.esql.core.util.StringUtils.wildcardToJavaPattern;
 
+import static org.hamcrest.Matchers.is;
+
 public class StringUtilsTests extends ESTestCase {
 
     public void testNoWildcard() {
@@ -55,4 +58,21 @@ public void testWildcard() {
     public void testEscapedEscape() {
         assertEquals("^\\\\\\\\$", wildcardToJavaPattern("\\\\\\\\", '\\'));
     }
+
+    public void testLuceneWildcardToRegExp() {
+        assertThat(luceneWildcardToRegExp(""), is(""));
+        assertThat(luceneWildcardToRegExp("*"), is(".*"));
+        assertThat(luceneWildcardToRegExp("?"), is("."));
+        assertThat(luceneWildcardToRegExp("\\\\"), is("\\\\"));
+        assertThat(luceneWildcardToRegExp("foo?bar"), is("foo.bar"));
+        assertThat(luceneWildcardToRegExp("foo*bar"), is("foo.*bar"));
+        assertThat(luceneWildcardToRegExp("foo\\\\bar"), is("foo\\\\bar"));
+        assertThat(luceneWildcardToRegExp("foo*bar?baz"), is("foo.*bar.baz"));
+        assertThat(luceneWildcardToRegExp("foo\\*bar"), is("foo\\*bar"));
+        assertThat(luceneWildcardToRegExp("foo\\?bar\\?"), is("foo\\?bar\\?"));
+        assertThat(luceneWildcardToRegExp("foo\\?bar\\"), is("foo\\?bar\\\\"));
+        assertThat(luceneWildcardToRegExp("[](){}^$.|+"), is("\\[\\]\\(\\)\\{\\}\\^\\$\\.\\|\\+"));
+        assertThat(luceneWildcardToRegExp("foo\\\uD83D\uDC14bar"), is("foo\uD83D\uDC14bar"));
+        assertThat(luceneWildcardToRegExp("foo\uD83D\uDC14bar"), is("foo\uD83D\uDC14bar"));
+    }
 }
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/where-like.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/where-like.csv-spec
@@ -319,3 +319,63 @@ warningRegex:java.lang.IllegalArgumentException: single-value function encounter
 emp_no:integer | job_positions:keyword 
 10025          | Accountant 
 ;
+
+likeWithUpper
+FROM employees
+| KEEP emp_no, first_name
+| SORT emp_no
+| WHERE TO_UPPER(first_name) LIKE "GEOR*"
+;
+
+emp_no:integer |first_name:keyword
+10001          |Georgi
+10055          |Georgy
+;
+
+likeWithLower
+FROM employees
+| KEEP emp_no, first_name
+| SORT emp_no
+| WHERE TO_LOWER(TO_UPPER(first_name)) LIKE "geor*"
+;
+
+emp_no:integer |first_name:keyword
+10001          |Georgi
+10055          |Georgy
+;
+
+rlikeWithUpper
+FROM employees
+| KEEP emp_no, first_name
+| SORT emp_no
+| WHERE TO_UPPER(first_name) RLIKE "GEOR.*"
+;
+
+emp_no:integer |first_name:keyword
+10001          |Georgi
+10055          |Georgy
+;
+
+rlikeWithLower
+FROM employees
+| KEEP emp_no, first_name
+| SORT emp_no
+| WHERE TO_LOWER(TO_UPPER(first_name)) RLIKE "geor.*"
+;
+
+emp_no:integer |first_name:keyword
+10001          |Georgi
+10055          |Georgy
+;
+
+negatedRLikeWithLower
+FROM employees
+| KEEP emp_no, first_name
+| SORT emp_no
+| WHERE TO_LOWER(TO_UPPER(first_name)) NOT RLIKE "geor.*"
+| STATS c = COUNT()
+;
+
+c:long
+88
+;
diff --git a/...l/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java b/...l/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java
@@ -7,10 +7,12 @@
 
 package org.elasticsearch.xpack.esql.expression.function.scalar.string;
 
+import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
 import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.FoldContext;
@@ -37,6 +39,7 @@ public class RLike extends org.elasticsearch.xpack.esql.core.expression.predicat
         EvaluatorMapper,
         TranslationAware.SingleValueTranslationAware {
     public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "RLike", RLike::new);
+    public static final String NAME = "RLIKE";
 
     @FunctionInfo(returnType = "boolean", description = """
         Use `RLIKE` to filter data based on string patterns using using
@@ -52,28 +55,43 @@ Matching special characters (eg. `.`, `*`, `(`...) will require escaping.
         To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"`
 
         <<load-esql-example, file=string tag=rlikeEscapingTripleQuotes>>
-        """, operator = "RLIKE", examples = @Example(file = "docs", tag = "rlike"))
+        """, operator = NAME, examples = @Example(file = "docs", tag = "rlike"))
     public RLike(
         Source source,
         @Param(name = "str", type = { "keyword", "text" }, description = "A literal value.") Expression value,
         @Param(name = "pattern", type = { "keyword", "text" }, description = "A regular expression.") RLikePattern pattern
     ) {
-        super(source, value, pattern);
+        this(source, value, pattern, false);
     }
 
     public RLike(Source source, Expression field, RLikePattern rLikePattern, boolean caseInsensitive) {
         super(source, field, rLikePattern, caseInsensitive);
     }
 
     private RLike(StreamInput in) throws IOException {
-        this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), new RLikePattern(in.readString()));
+        this(
+            Source.readFrom((PlanStreamInput) in),
+            in.readNamedWriteable(Expression.class),
+            new RLikePattern(in.readString()),
+            in.getTransportVersion().onOrAfter(TransportVersions.ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY) && in.readBoolean()
+        );
     }
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         source().writeTo(out);
         out.writeNamedWriteable(field());
         out.writeString(pattern().asJavaRegex());
+        if (caseInsensitive() && out.getTransportVersion().before(TransportVersions.ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY)) {
+            // The plan has been optimized to run a case-insensitive match, which the remote peer cannot be notified of. Simply avoiding
+            // the serialization of the boolean would result in wrong results.
+            throw new EsqlIllegalArgumentException(
+                NAME + " with case insensitivity is not supported in peer node's version [{}]. Upgrade to version [{}] or newer.",
+                out.getTransportVersion(),
+                TransportVersions.ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY
+            );
+        }
+        out.writeBoolean(caseInsensitive());
     }
 
     @Override
@@ -103,7 +121,7 @@ public Boolean fold(FoldContext ctx) {
 
     @Override
     public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
-        return AutomataMatch.toEvaluator(source(), toEvaluator.apply(field()), pattern().createAutomaton());
+        return AutomataMatch.toEvaluator(source(), toEvaluator.apply(field()), pattern().createAutomaton(caseInsensitive()));
     }
 
     @Override
@@ -122,4 +140,9 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
     public Expression singleValueField() {
         return field();
     }
+
+    @Override
+    public String nodeString() {
+        return NAME + "(" + field().nodeString() + ", \"" + pattern().pattern() + "\", " + caseInsensitive() + ")";
+    }
 }