Skip to content

Commit 3621bc9

Browse files
committed
Add replace command with Calcite
Signed-off-by: Manasvini B S <manasvis@amazon.com>
1 parent 38b4295 commit 3621bc9

File tree

14 files changed

+836
-1
lines changed

14 files changed

+836
-1
lines changed

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
import org.opensearch.sql.ast.tree.Relation;
8585
import org.opensearch.sql.ast.tree.RelationSubquery;
8686
import org.opensearch.sql.ast.tree.Rename;
87+
import org.opensearch.sql.ast.tree.Replace;
8788
import org.opensearch.sql.ast.tree.Reverse;
8889
import org.opensearch.sql.ast.tree.Rex;
8990
import org.opensearch.sql.ast.tree.Search;
@@ -788,6 +789,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con
788789
return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context));
789790
}
790791

792+
@Override
793+
public LogicalPlan visitReplace(Replace node, AnalysisContext context) {
794+
throw getOnlyForCalciteException("Replace");
795+
}
796+
791797
@Override
792798
public LogicalPlan visitJoin(Join node, AnalysisContext context) {
793799
throw getOnlyForCalciteException("Join");

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import org.opensearch.sql.ast.tree.Relation;
7373
import org.opensearch.sql.ast.tree.RelationSubquery;
7474
import org.opensearch.sql.ast.tree.Rename;
75+
import org.opensearch.sql.ast.tree.Replace;
7576
import org.opensearch.sql.ast.tree.Reverse;
7677
import org.opensearch.sql.ast.tree.Rex;
7778
import org.opensearch.sql.ast.tree.SPath;
@@ -244,6 +245,10 @@ public T visitRename(Rename node, C context) {
244245
return visitChildren(node, context);
245246
}
246247

248+
public T visitReplace(Replace node, C context) {
249+
return visitChildren(node, context);
250+
}
251+
247252
public T visitEval(Eval node, C context) {
248253
return visitChildren(node, context);
249254
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import com.google.common.collect.ImmutableList;
9+
import java.util.HashSet;
10+
import java.util.List;
11+
import java.util.Set;
12+
import java.util.stream.Collectors;
13+
import lombok.EqualsAndHashCode;
14+
import lombok.Getter;
15+
import lombok.Setter;
16+
import lombok.ToString;
17+
import org.opensearch.sql.ast.AbstractNodeVisitor;
18+
import org.opensearch.sql.ast.expression.DataType;
19+
import org.opensearch.sql.ast.expression.Field;
20+
import org.opensearch.sql.ast.expression.Literal;
21+
import org.opensearch.sql.ast.expression.UnresolvedExpression;
22+
23+
@Getter
24+
@Setter
25+
@ToString
26+
@EqualsAndHashCode(callSuper = false)
27+
public class Replace extends UnresolvedPlan {
28+
private final UnresolvedExpression pattern;
29+
private final UnresolvedExpression replacement;
30+
private final List<Field> fieldList;
31+
private UnresolvedPlan child;
32+
33+
public Replace(
34+
UnresolvedExpression pattern, UnresolvedExpression replacement, List<Field> fieldList) {
35+
this.pattern = pattern;
36+
this.replacement = replacement;
37+
this.fieldList = fieldList;
38+
validate();
39+
}
40+
41+
public void validate() {
42+
if (pattern == null) {
43+
throw new IllegalArgumentException("Pattern expression cannot be null in Replace command");
44+
}
45+
if (replacement == null) {
46+
throw new IllegalArgumentException(
47+
"Replacement expression cannot be null in Replace command");
48+
}
49+
50+
// Validate pattern is a string literal
51+
if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) {
52+
throw new IllegalArgumentException("Pattern must be a string literal in Replace command");
53+
}
54+
55+
// Validate replacement is a string literal
56+
if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) {
57+
throw new IllegalArgumentException("Replacement must be a string literal in Replace command");
58+
}
59+
60+
if (fieldList == null || fieldList.isEmpty()) {
61+
throw new IllegalArgumentException(
62+
"Field list cannot be empty in Replace command. Use IN clause to specify the field.");
63+
}
64+
65+
Set<String> uniqueFields = new HashSet<>();
66+
List<String> duplicates =
67+
fieldList.stream()
68+
.map(field -> field.getField().toString())
69+
.filter(fieldName -> !uniqueFields.add(fieldName))
70+
.collect(Collectors.toList());
71+
72+
if (!duplicates.isEmpty()) {
73+
throw new IllegalArgumentException(
74+
String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates)));
75+
}
76+
}
77+
78+
@Override
79+
public Replace attach(UnresolvedPlan child) {
80+
if (null == this.child) {
81+
this.child = child;
82+
} else {
83+
this.child.attach(child);
84+
}
85+
return this;
86+
}
87+
88+
@Override
89+
public List<UnresolvedPlan> getChild() {
90+
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
91+
}
92+
93+
@Override
94+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
95+
return nodeVisitor.visitReplace(this, context);
96+
}
97+
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
import org.opensearch.sql.ast.tree.Regex;
113113
import org.opensearch.sql.ast.tree.Relation;
114114
import org.opensearch.sql.ast.tree.Rename;
115+
import org.opensearch.sql.ast.tree.Replace;
115116
import org.opensearch.sql.ast.tree.Rex;
116117
import org.opensearch.sql.ast.tree.SPath;
117118
import org.opensearch.sql.ast.tree.Search;
@@ -144,6 +145,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalciteP
144145

145146
private final CalciteRexNodeVisitor rexVisitor;
146147
private final CalciteAggCallVisitor aggVisitor;
148+
private static final String NEW_FIELD_PREFIX = "new_";
147149

148150
public CalciteRelNodeVisitor() {
149151
this.rexVisitor = new CalciteRexNodeVisitor(this);
@@ -2180,6 +2182,40 @@ public RelNode visitValues(Values values, CalcitePlanContext context) {
21802182
}
21812183
}
21822184

2185+
@Override
2186+
public RelNode visitReplace(Replace node, CalcitePlanContext context) {
2187+
visitChildren(node, context);
2188+
2189+
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
2190+
RexNode patternNode = rexVisitor.analyze(node.getPattern(), context);
2191+
RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context);
2192+
2193+
List<RexNode> projectList = new ArrayList<>();
2194+
List<String> newFieldNames = new ArrayList<>();
2195+
2196+
// First add all original fields
2197+
for (String fieldName : fieldNames) {
2198+
RexNode fieldRef = context.relBuilder.field(fieldName);
2199+
projectList.add(fieldRef);
2200+
newFieldNames.add(fieldName);
2201+
}
2202+
2203+
// Then add new fields with replaced content using new_ prefix
2204+
for (Field field : node.getFieldList()) {
2205+
String fieldName = field.getField().toString();
2206+
RexNode fieldRef = context.relBuilder.field(fieldName);
2207+
2208+
RexNode replaceCall =
2209+
context.relBuilder.call(
2210+
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2211+
projectList.add(replaceCall);
2212+
newFieldNames.add(NEW_FIELD_PREFIX + fieldName);
2213+
}
2214+
2215+
context.relBuilder.project(projectList, newFieldNames);
2216+
return context.relBuilder.peek();
2217+
}
2218+
21832219
private void buildParseRelNode(Parse node, CalcitePlanContext context) {
21842220
RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context);
21852221
ParseMethod parseMethod = node.getParseMethod();

docs/category.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
"user/ppl/cmd/rex.rst",
6464
"user/ppl/cmd/stats.rst",
6565
"user/ppl/cmd/timechart.rst",
66-
"user/ppl/cmd/search.rst"
66+
"user/ppl/cmd/search.rst",
67+
"user/ppl/cmd/replace.rst"
6768
]
6869
}

docs/user/ppl/cmd/replace.rst

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
=============
2+
replace
3+
=============
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 2
10+
11+
12+
Description
13+
============
14+
| Using ``replace`` command to replace text in one or more fields in the search result.
15+
* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country')
16+
* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0')
17+
18+
19+
Version
20+
=======
21+
3.3.0
22+
23+
24+
Syntax
25+
============
26+
replace '<pattern>' WITH '<replacement>' IN <field-name>[, <field-name>]...
27+
28+
Note: This command is only available when Calcite engine is enabled.
29+
30+
* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions).
31+
* replacement: mandatory. The text you want to replace with.
32+
* field list: mandatory. One or more field names where the replacement should occur.
33+
34+
35+
Example 1: Replace text in one field
36+
====================================
37+
38+
The example shows replacing text in one field.
39+
40+
PPL query::
41+
42+
os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state;
43+
fetched rows / total rows = 4/4
44+
+-------+-----------+
45+
| state | new_state |
46+
|-------+-----------|
47+
| IL | Illinois |
48+
| TN | TN |
49+
| VA | VA |
50+
| MD | MD |
51+
+-------+-----------+
52+
53+
54+
Example 2: Replace text in multiple fields
55+
==========================================
56+
57+
The example shows replacing text in multiple fields.
58+
59+
PPL query::
60+
61+
os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address;
62+
fetched rows / total rows = 4/4
63+
+-------+----------------------+-----------+----------------------+
64+
| state | address | new_state | new_address |
65+
|-------+----------------------+-----------+----------------------|
66+
| IL | 880 Holmes Lane | Illinois | 880 Holmes Lane |
67+
| TN | 671 Bristol Street | TN | 671 Bristol Street |
68+
| VA | 789 Madison Street | VA | 789 Madison Street |
69+
| MD | 467 Hutchinson Court | MD | 467 Hutchinson Court |
70+
+-------+----------------------+-----------+----------------------+
71+
72+
73+
Example 3: Replace with IN clause and other commands
74+
====================================================
75+
76+
The example shows using replace with other commands.
77+
78+
PPL query::
79+
80+
os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state;
81+
fetched rows / total rows = 3/3
82+
+-------+-----+-----------+
83+
| state | age | new_state |
84+
|-------+-----+-----------|
85+
| IL | 32 | Illinois |
86+
| TN | 36 | TN |
87+
| MD | 33 | MD |
88+
+-------+-----+-----------+
89+
90+
Example 4: Pattern matching with LIKE and replace
91+
=================================================
92+
93+
Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs.
94+
95+
PPL query::
96+
97+
os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address;
98+
fetched rows / total rows = 1/1
99+
+-----------------+-------+--------+-----+--------+-----------------+
100+
| address | state | gender | age | city | new_address |
101+
|-----------------+-------+--------+-----+--------+-----------------|
102+
| 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane |
103+
+-----------------+-------+--------+-----+--------+-----------------+
104+
105+
Note
106+
====
107+
* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged.

docs/user/ppl/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited
124124

125125
- `trendline command <cmd/trendline.rst>`_
126126

127+
- `replace command <cmd/replace.rst>`_
128+
127129
- `where command <cmd/where.rst>`_
128130

129131
* **Functions**

integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
CalciteRegexCommandIT.class,
8989
CalciteRexCommandIT.class,
9090
CalciteRenameCommandIT.class,
91+
CalciteReplaceCommandIT.class,
9192
CalciteResourceMonitorIT.class,
9293
CalciteSearchCommandIT.class,
9394
CalciteSettingsIT.class,

0 commit comments

Comments
 (0)