Skip to content

Commit 8dd8b5c

Browse files
authored
GH-135379: Support limited scalar replacement for replicated uops in the JIT code generator. (GH-135563)
* Use it to support efficient specializations of COPY and SWAP in the JIT.
1 parent a9e66a7 commit 8dd8b5c

File tree

9 files changed

+313
-198
lines changed

9 files changed

+313
-198
lines changed

Include/internal/pycore_uop_ids.h

Lines changed: 176 additions & 171 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 30 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/bytecodes.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4946,8 +4946,7 @@ dummy_func(
49464946
res = PyStackRef_FromPyObjectSteal(res_o);
49474947
}
49484948

4949-
pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
4950-
assert(oparg > 0);
4949+
pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
49514950
top = PyStackRef_DUP(bottom);
49524951
}
49534952

@@ -4980,12 +4979,11 @@ dummy_func(
49804979

49814980
macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP;
49824981

4983-
pure inst(SWAP, (bottom, unused[oparg-2], top --
4982+
pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top --
49844983
bottom, unused[oparg-2], top)) {
49854984
_PyStackRef temp = bottom;
49864985
bottom = top;
49874986
top = temp;
4988-
assert(oparg >= 2);
49894987
}
49904988

49914989
inst(INSTRUMENTED_LINE, ( -- )) {

Python/executor_cases.c.h

Lines changed: 59 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/generated_cases.c.h

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,8 +1292,8 @@ uop_optimize(
12921292
for (int pc = 0; pc < length; pc++) {
12931293
int opcode = buffer[pc].opcode;
12941294
int oparg = buffer[pc].oparg;
1295-
if (oparg < _PyUop_Replication[opcode]) {
1296-
buffer[pc].opcode = opcode + oparg + 1;
1295+
if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) {
1296+
buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start;
12971297
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
12981298
}
12991299
else if (is_terminator(&buffer[pc])) {

Tools/cases_generator/analyzer.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ class Uop:
180180
properties: Properties
181181
_size: int = -1
182182
implicitly_created: bool = False
183-
replicated = 0
183+
replicated = range(0)
184184
replicates: "Uop | None" = None
185185
# Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
186186
instruction_size: int | None = None
@@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties:
868868
needs_prev=variable_used(op, "prev_instr"),
869869
)
870870

871+
def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
872+
# Only replace array item with scalar if no more than one item is an array
873+
index = -1
874+
for i, item in enumerate(items):
875+
if "oparg" in item.size:
876+
if index >= 0:
877+
return items
878+
index = i
879+
if index < 0:
880+
return items
881+
try:
882+
count = int(eval(items[index].size.replace("oparg", str(oparg))))
883+
except ValueError:
884+
return items
885+
return items[:index] + [
886+
StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count)
887+
] + items[index+1:]
888+
889+
def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
890+
stack.inputs = expand(stack.inputs, oparg)
891+
stack.outputs = expand(stack.outputs, oparg)
892+
return stack
871893

872894
def make_uop(
873895
name: str,
@@ -887,20 +909,26 @@ def make_uop(
887909
)
888910
for anno in op.annotations:
889911
if anno.startswith("replicate"):
890-
result.replicated = int(anno[10:-1])
912+
text = anno[10:-1]
913+
start, stop = text.split(":")
914+
result.replicated = range(int(start), int(stop))
891915
break
892916
else:
893917
return result
894-
for oparg in range(result.replicated):
918+
for oparg in result.replicated:
895919
name_x = name + "_" + str(oparg)
896920
properties = compute_properties(op)
897921
properties.oparg = False
898-
properties.const_oparg = oparg
922+
stack = analyze_stack(op)
923+
if not variable_used(op, "oparg"):
924+
stack = scalarize_stack(stack, oparg)
925+
else:
926+
properties.const_oparg = oparg
899927
rep = Uop(
900928
name=name_x,
901929
context=op.context,
902930
annotations=op.annotations,
903-
stack=analyze_stack(op),
931+
stack=stack,
904932
caches=analyze_caches(inputs),
905933
local_stores=find_variable_stores(op),
906934
body=op.block,

Tools/cases_generator/parsing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,9 +379,13 @@ def inst_header(self) -> InstHeader | None:
379379
while anno := self.expect(lx.ANNOTATION):
380380
if anno.text == "replicate":
381381
self.require(lx.LPAREN)
382-
times = self.require(lx.NUMBER)
382+
stop = self.require(lx.NUMBER)
383+
start_text = "0"
384+
if self.expect(lx.COLON):
385+
start_text = stop.text
386+
stop = self.require(lx.NUMBER)
383387
self.require(lx.RPAREN)
384-
annotations.append(f"replicate({times.text})")
388+
annotations.append(f"replicate({start_text}:{stop.text})")
385389
else:
386390
annotations.append(anno.text)
387391
tkn = self.expect(lx.INST)

Tools/cases_generator/uop_metadata_generator.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525
def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
2626
out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n")
27-
out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n")
27+
out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n")
28+
out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n")
2829
out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n")
2930
out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n")
3031
out.emit("#ifdef NEED_OPCODE_METADATA\n")
@@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None:
3435
out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n")
3536

3637
out.emit("};\n\n")
37-
out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n")
38+
out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n")
3839
for uop in analysis.uops.values():
3940
if uop.replicated:
40-
out.emit(f"[{uop.name}] = {uop.replicated},\n")
41+
assert(uop.replicated.step == 1)
42+
out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n")
4143

4244
out.emit("};\n\n")
4345
out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n")

0 commit comments

Comments
 (0)