Skip to content

Commit d01c5bb

Browse files
jimczimridula-s109
authored andcommitted
Avoid unnecessary determinization in index pattern conflict checks (elastic#128362)
Starting with Lucene 10, `CharacterRunAutomaton` is no longer determinized automatically. In Elasticsearch 9, we adapted to this by eagerly determinizing automatons early (via `Regex#simpleMatchToAutomaton`). However, this introduced regression: operations like index template conflict checks, which only require intersection testing, now pay the cost of determinization—an expensive step that wasn’t needed before. In some cases, especially when many wildcard patterns are involved, determinization can even fail due to state explosion. This change removes the unnecessary determinization, restoring the pre-9.0 behavior and allowing valid index templates with many patterns to be registered again.
1 parent e9afc9b commit d01c5bb

File tree

4 files changed

+60
-9
lines changed

4 files changed

+60
-9
lines changed

docs/changelog/128362.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128362
2+
summary: Avoid unnecessary determinization in index pattern conflict checks
3+
area: Indices APIs
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -946,12 +946,15 @@ public static Map<String, List<String>> findConflictingV1Templates(
946946
final String candidateName,
947947
final List<String> indexPatterns
948948
) {
949-
Automaton v2automaton = Regex.simpleMatchToAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
949+
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
950+
// Determinization is avoided because it can fail or become very costly due to state explosion.
951+
Automaton v2automaton = Regex.simpleMatchToNonDeterminizedAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
950952
Map<String, List<String>> overlappingTemplates = new HashMap<>();
951953
for (Map.Entry<String, IndexTemplateMetadata> cursor : project.templates().entrySet()) {
952954
String name = cursor.getKey();
953955
IndexTemplateMetadata template = cursor.getValue();
954-
Automaton v1automaton = Regex.simpleMatchToAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
956+
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
957+
Automaton v1automaton = Regex.simpleMatchToNonDeterminizedAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
955958
if (Operations.isEmpty(Operations.intersection(v2automaton, v1automaton)) == false) {
956959
logger.debug(
957960
"composable template {} and legacy template {} would overlap: {} <=> {}",

server/src/main/java/org/elasticsearch/common/regex/Regex.java

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ public static boolean isSuffixWildcard(String str) {
5959
return isSuffixMatchPattern(str) && str.endsWith(".*");
6060
}
6161

62-
/** Return an {@link Automaton} that matches the given pattern. */
63-
public static Automaton simpleMatchToAutomaton(String pattern) {
62+
/**
63+
* Return a non-determinized {@link Automaton} that matches the given pattern.
64+
* WARNING: Use this method only when the resulting {@link Automaton} is used in contexts
65+
* that do not require determinism (e.g., checking the intersection of automatons).
66+
*
67+
* For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required.
68+
* In that case, use {@link Regex#simpleMatchToAutomaton} instead.
69+
*/
70+
public static Automaton simpleMatchToNonDeterminizedAutomaton(String pattern) {
6471
List<Automaton> automata = new ArrayList<>();
6572
int previous = 0;
6673
for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
@@ -69,13 +76,24 @@ public static Automaton simpleMatchToAutomaton(String pattern) {
6976
previous = i + 1;
7077
}
7178
automata.add(Automata.makeString(pattern.substring(previous)));
72-
return Operations.determinize(Operations.concatenate(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
79+
return Operations.concatenate(automata);
80+
}
81+
82+
/** Return a deterministic {@link Automaton} that matches the given pattern. */
83+
public static Automaton simpleMatchToAutomaton(String pattern) {
84+
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(pattern), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
7385
}
7486

7587
/**
76-
* Return an Automaton that matches the union of the provided patterns.
88+
* Returns a non-deterministic {@link Automaton} that matches the union of the given patterns.
89+
*
90+
* WARNING: Use this method only when the resulting {@link Automaton} is used in contexts
91+
* that do not require determinism (e.g., checking the intersection of automatons).
92+
*
93+
* For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required.
94+
* In that case, use {@link Regex#simpleMatchToAutomaton} instead.
7795
*/
78-
public static Automaton simpleMatchToAutomaton(String... patterns) {
96+
public static Automaton simpleMatchToNonDeterminizedAutomaton(String... patterns) {
7997
if (patterns.length < 1) {
8098
throw new IllegalArgumentException("There must be at least one pattern, zero given");
8199
}
@@ -88,7 +106,7 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
88106
if (isSuffixWildcard(pattern) && pattern.length() < 1000) {
89107
prefixes.add(new BytesRef(pattern.substring(0, pattern.length() - 1)));
90108
} else if (isSimpleMatchPattern(pattern) || pattern.length() >= 1000) {
91-
automata.add(simpleMatchToAutomaton(pattern));
109+
automata.add(simpleMatchToNonDeterminizedAutomaton(pattern));
92110
} else {
93111
simpleStrings.add(new BytesRef(pattern));
94112
}
@@ -113,7 +131,14 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
113131
prefixAutomaton.add(Automata.makeAnyString());
114132
automata.add(Operations.concatenate(prefixAutomaton));
115133
}
116-
return Operations.determinize(Operations.union(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
134+
return Operations.union(automata);
135+
}
136+
137+
/**
138+
* Return a deterministic Automaton that matches the union of the provided patterns.
139+
*/
140+
public static Automaton simpleMatchToAutomaton(String... patterns) {
141+
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(patterns), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
117142
}
118143

119144
/**

server/src/test/java/org/elasticsearch/common/regex/RegexTests.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,20 @@
1010

1111
import org.apache.lucene.util.automaton.Automaton;
1212
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
13+
import org.apache.lucene.util.automaton.Operations;
14+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1315
import org.elasticsearch.test.ESTestCase;
1416

1517
import java.io.IOException;
18+
import java.util.Arrays;
1619
import java.util.Locale;
1720
import java.util.Random;
1821
import java.util.function.Predicate;
1922
import java.util.regex.Pattern;
2023

2124
import static org.elasticsearch.test.LambdaMatchers.falseWith;
2225
import static org.elasticsearch.test.LambdaMatchers.trueWith;
26+
import static org.hamcrest.Matchers.containsString;
2327
import static org.hamcrest.Matchers.equalTo;
2428

2529
public class RegexTests extends ESTestCase {
@@ -250,4 +254,18 @@ public void testThousandsAndLongPattern() throws IOException {
250254
assertTrue(predicate.test(patterns[i]));
251255
}
252256
}
257+
258+
public void testIntersectNonDeterminizedAutomaton() {
259+
// patterns too complex to determinize within the default limit
260+
String[] patterns = randomArray(20, 100, size -> new String[size], () -> "*" + randomAlphanumericOfLength(10) + "*");
261+
Automaton a = Regex.simpleMatchToNonDeterminizedAutomaton(patterns);
262+
assertFalse(a.isDeterministic());
263+
Automaton b = Regex.simpleMatchToNonDeterminizedAutomaton(Arrays.copyOfRange(patterns, patterns.length / 2, patterns.length));
264+
assertFalse(b.isDeterministic());
265+
assertFalse(Operations.isEmpty(Operations.intersection(a, b)));
266+
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> assertMatchesAll(a, "my_test"));
267+
// the run automaton expects a deterministic automaton
268+
assertThat(exc.getMessage(), containsString("deterministic"));
269+
expectThrows(TooComplexToDeterminizeException.class, () -> Regex.simpleMatchToAutomaton(patterns));
270+
}
253271
}

0 commit comments

Comments
 (0)