Skip to content

Commit ee216a0

Browse files
committed
ESQL: Fix case insensitive comparisons to "" (elastic#127532)
This fixes the compute engine side of case insensitive ==. You can trigger it by writing: ``` FROM foo | WHERE TO_LOWER(field) == "" ``` But *only* when we can't push the comparison to lucene - like if `field` is not indexed or is a `text` field. Closes elastic#127431
1 parent 86a3a59 commit ee216a0

File tree

5 files changed

+100
-1
lines changed

5 files changed

+100
-1
lines changed

docs/changelog/127532.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127532
2+
summary: Fix case insensitive comparisons to ""
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 127431

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,12 @@ public enum Cap {
912912
* Avid GROK and DISSECT attributes being removed when resolving fields.
913913
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
914914
*/
915-
KEEP_REGEX_EXTRACT_ATTRIBUTES;
915+
KEEP_REGEX_EXTRACT_ATTRIBUTES,
916+
917+
/**
918+
* Guards a bug fix matching {@code TO_LOWER(f) == ""}.
919+
*/
920+
TO_LOWER_EMPTY_STRING;
916921

917922
private final boolean enabled;
918923

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InsensitiveEquals.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package org.elasticsearch.xpack.esql.expression.predicate.operator.comparison;
88

99
import org.apache.lucene.util.BytesRef;
10+
import org.apache.lucene.util.automaton.Automata;
1011
import org.apache.lucene.util.automaton.Automaton;
1112
import org.apache.lucene.util.automaton.ByteRunAutomaton;
1213
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
@@ -83,6 +84,10 @@ protected TypeResolution resolveType() {
8384
}
8485

8586
public static Automaton automaton(BytesRef val) {
87+
if (val.length == 0) {
88+
// toCaseInsensitiveString doesn't match empty strings properly so let's do it ourselves
89+
return Automata.makeEmptyString();
90+
}
8691
return AutomatonQueries.toCaseInsensitiveString(val.utf8ToString());
8792
}
8893

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/InsensitiveEqualsTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ public void testFold() {
2626
assertTrue(insensitiveEquals(l("foo*"), l("FOO*")).fold(FoldContext.small()));
2727
assertTrue(insensitiveEquals(l("foo?bar"), l("foo?bar")).fold(FoldContext.small()));
2828
assertTrue(insensitiveEquals(l("foo?bar"), l("FOO?BAR")).fold(FoldContext.small()));
29+
assertTrue(insensitiveEquals(l(""), l("")).fold(FoldContext.small()));
30+
2931
assertFalse(insensitiveEquals(l("Foo"), l("fo*")).fold(FoldContext.small()));
3032
assertFalse(insensitiveEquals(l("Fox"), l("fo?")).fold(FoldContext.small()));
3133
assertFalse(insensitiveEquals(l("Foo"), l("*OO")).fold(FoldContext.small()));
@@ -60,6 +62,8 @@ public void testProcess() {
6062
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo*"), BytesRefs.toBytesRef("FOO*")));
6163
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("foo?bar")));
6264
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef("foo?bar"), BytesRefs.toBytesRef("FOO?BAR")));
65+
assertTrue(InsensitiveEquals.process(BytesRefs.toBytesRef(""), BytesRefs.toBytesRef("")));
66+
6367
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("fo*")));
6468
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Fox"), BytesRefs.toBytesRef("fo?")));
6569
assertFalse(InsensitiveEquals.process(BytesRefs.toBytesRef("Foo"), BytesRefs.toBytesRef("*OO")));
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
setup:
3+
- requires:
4+
test_runner_features: [ capabilities ]
5+
capabilities:
6+
- method: POST
7+
path: /_query
8+
parameters: [ ]
9+
capabilities: [ query_monitoring ]
10+
reason: "uses query monitoring"
11+
12+
- do:
13+
bulk:
14+
index: "test"
15+
refresh: true
16+
body:
17+
- { "index": { } }
18+
- { "@timestamp": "2023-10-23T13:55:01.543Z", "message": "" }
19+
- { "index": { } }
20+
- { "@timestamp": "2023-10-23T13:55:01.544Z" }
21+
- { "index": { } }
22+
- { "@timestamp": "2023-10-23T13:55:01.545Z", "message": "a" }
23+
24+
---
25+
keyword equals empty string:
26+
- do:
27+
esql.query:
28+
body:
29+
query: 'FROM test | WHERE message.keyword == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
30+
31+
- match: { columns.0.name: "@timestamp" }
32+
- match: { columns.0.type: "date" }
33+
- length: { values: 1 }
34+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
35+
36+
---
37+
keyword to_lower equals empty string:
38+
- do:
39+
esql.query:
40+
body:
41+
query: 'FROM test | WHERE TO_LOWER(message.keyword) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
42+
43+
- match: { columns.0.name: "@timestamp" }
44+
- match: { columns.0.type: "date" }
45+
- length: { values: 1 }
46+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
47+
48+
---
49+
text equals empty string:
50+
- do:
51+
esql.query:
52+
body:
53+
query: 'FROM test | WHERE message == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
54+
55+
- match: { columns.0.name: "@timestamp" }
56+
- match: { columns.0.type: "date" }
57+
- length: { values: 1 }
58+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }
59+
60+
---
61+
text to_lower equals empty string:
62+
- requires:
63+
test_runner_features: [ capabilities ]
64+
capabilities:
65+
- method: POST
66+
path: /_query
67+
parameters: [ ]
68+
capabilities: [ to_lower_empty_string ]
69+
reason: "bug"
70+
71+
- do:
72+
esql.query:
73+
body:
74+
query: 'FROM test | WHERE TO_LOWER(message) == "" | SORT @timestamp ASC | KEEP @timestamp | LIMIT 10'
75+
76+
- match: { columns.0.name: "@timestamp" }
77+
- match: { columns.0.type: "date" }
78+
- length: { values: 1 }
79+
- match: { values.0.0: 2023-10-23T13:55:01.543Z }

0 commit comments

Comments
 (0)