From d5afe158bea44785d315d701124ade91875417d9 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 22 Jun 2025 20:18:15 +0900 Subject: [PATCH 1/4] gh-134584: Elimiate redundant refcounting from _BINARY_OP_X_UNICODE --- Include/internal/pycore_uop_ids.h | 90 ++++++++++++++------------ Include/internal/pycore_uop_metadata.h | 16 +++++ Python/bytecodes.c | 22 +++++++ Python/executor_cases.c.h | 42 ++++++++++++ Python/optimizer_cases.c.h | 24 +++++++ 5 files changed, 151 insertions(+), 43 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index aa11ddb75e19fb..a9432401525ebb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -284,72 +284,76 @@ extern "C" { #define _POP_JUMP_IF_FALSE 500 #define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 502 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 -#define _POP_TWO 504 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_FLOAT 502 +#define _POP_TOP_INT 503 +#define _POP_TOP_LOAD_CONST_INLINE 504 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_NOP 506 +#define _POP_TOP_UNICODE 507 +#define _POP_TWO 508 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 506 +#define _PUSH_FRAME 510 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 507 -#define _PY_FRAME_GENERAL 508 -#define _PY_FRAME_KW 509 -#define _QUICKEN_RESUME 510 -#define _REPLACE_WITH_TRUE 511 +#define _PUSH_NULL_CONDITIONAL 511 +#define _PY_FRAME_GENERAL 512 +#define _PY_FRAME_KW 513 +#define _QUICKEN_RESUME 514 +#define _REPLACE_WITH_TRUE 515 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 512 -#define _SEND 513 -#define _SEND_GEN_FRAME 514 +#define _SAVE_RETURN_OFFSET 516 +#define _SEND 517 +#define _SEND_GEN_FRAME 518 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 515 -#define _STORE_ATTR 516 -#define _STORE_ATTR_INSTANCE_VALUE 517 -#define _STORE_ATTR_SLOT 518 -#define _STORE_ATTR_WITH_HINT 519 +#define _START_EXECUTOR 519 +#define _STORE_ATTR 520 +#define _STORE_ATTR_INSTANCE_VALUE 521 +#define _STORE_ATTR_SLOT 522 +#define _STORE_ATTR_WITH_HINT 523 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 520 -#define _STORE_FAST_0 521 -#define _STORE_FAST_1 522 -#define _STORE_FAST_2 523 -#define _STORE_FAST_3 524 -#define _STORE_FAST_4 525 -#define _STORE_FAST_5 526 -#define _STORE_FAST_6 527 -#define _STORE_FAST_7 528 +#define _STORE_FAST 524 +#define _STORE_FAST_0 525 +#define _STORE_FAST_1 526 +#define _STORE_FAST_2 527 +#define _STORE_FAST_3 528 +#define _STORE_FAST_4 529 +#define _STORE_FAST_5 530 +#define _STORE_FAST_6 531 +#define _STORE_FAST_7 532 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 529 -#define _STORE_SUBSCR 530 -#define _STORE_SUBSCR_DICT 531 -#define _STORE_SUBSCR_LIST_INT 532 -#define _SWAP 533 -#define _SWAP_2 534 -#define _SWAP_3 535 -#define _TIER2_RESUME_CHECK 536 -#define _TO_BOOL 537 +#define _STORE_SLICE 533 +#define _STORE_SUBSCR 534 +#define _STORE_SUBSCR_DICT 535 +#define _STORE_SUBSCR_LIST_INT 536 +#define _SWAP 537 +#define _SWAP_2 538 +#define _SWAP_3 539 +#define _TIER2_RESUME_CHECK 540 +#define _TO_BOOL 541 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 538 +#define _TO_BOOL_LIST 542 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 539 +#define _TO_BOOL_STR 543 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 540 -#define _UNPACK_SEQUENCE_LIST 541 -#define _UNPACK_SEQUENCE_TUPLE 542 -#define _UNPACK_SEQUENCE_TWO_TUPLE 543 +#define _UNPACK_SEQUENCE 544 +#define _UNPACK_SEQUENCE_LIST 545 +#define _UNPACK_SEQUENCE_TUPLE 546 +#define _UNPACK_SEQUENCE_TWO_TUPLE 547 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 543 +#define MAX_UOP_ID 547 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 11345a00785817..70387fadd87265 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -64,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TOP_NOP] = HAS_ESCAPES_FLAG, + [_POP_TOP_INT] = 0, + [_POP_TOP_FLOAT] = 0, + [_POP_TOP_UNICODE] = 0, [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, @@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", + [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT", + [_POP_TOP_INT] = "_POP_TOP_INT", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TOP_NOP] = "_POP_TOP_NOP", + [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE", [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", @@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TOP_NOP: + return 1; + case _POP_TOP_INT: + return 1; + case _POP_TOP_FLOAT: + return 1; + case _POP_TOP_UNICODE: + return 1; case _POP_TWO: return 2; case _PUSH_NULL: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 307844d38ccfcc..4ec5c08a58c631 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,28 @@ dummy_func( PyStackRef_XCLOSE(value); } + + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + tier2 op(_POP_TWO, (nos, tos --)) { PyStackRef_CLOSE(tos); PyStackRef_CLOSE(nos); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f506172550afe..c20960c37b634b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,48 @@ break; } + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + _PyFrame_SetStackPointer(frame, stack_pointer); + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { _PyStackRef tos; _PyStackRef nos; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 91927180b3509d..9652f4febaf245 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -105,6 +105,30 @@ break; } + case _POP_TOP_NOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); From 46b423fcd354bd229d6e430860946e1fe166bb18 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 22 Jun 2025 21:17:38 +0900 Subject: [PATCH 2/4] WIP --- Include/internal/pycore_opcode_metadata.h | 2 +- Lib/test/test_capi/test_opt.py | 19 ++++++++++ Lib/test/test_sys.py | 4 +-- Programs/test_frozenmain.h | 2 +- Python/bytecodes.c | 8 ++--- Python/compile.c | 4 +++ Python/executor_cases.c.h | 10 ++++-- Python/generated_cases.c.h | 18 ++++++++-- Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 38 ++++++++++++++++++-- Python/optimizer_cases.c.h | 44 ++++++++++++++++++++--- 11 files changed, 130 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..358c2f44855c22 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1322,7 +1322,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 } } }, [BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 } } }, - [BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, + [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } }, [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 } } }, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 84e864b44b9544..f6679c6e8cc1b0 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -5,6 +5,8 @@ import unittest import gc import os +import random +import string import _opcode @@ -2362,6 +2364,23 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_INT", uops) self.assertNotIn("_GUARD_NOS_INT", uops) + def test_store_fast_pop_top_specialize_unicode(self): + def random_str(n): + return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n)) + def testfunc(n): + y = random_str(32) + for _ in range(n): + x = y + y # _POP_TOP + x = None # _POP_TOP_NOP + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + def test_attr_promotion_failure(self): # We're not testing for any specific uops here, just # testing it doesn't crash. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 27524d86355b9c..3b5411f8615d62 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1671,7 +1671,7 @@ def func(): INTERPRETER_FRAME = '9PihcP' else: INTERPRETER_FRAME = '9PhcP' - check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'P')) + check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'PP')) # function def func(): pass check(func, size('16Pi')) @@ -1688,7 +1688,7 @@ def bar(cls): check(bar, size('PP')) # generator def get_gen(): yield 1 - check(get_gen(), size('6P4c' + INTERPRETER_FRAME + 'P')) + check(get_gen(), size('6P4c' + INTERPRETER_FRAME + 'PP')) # iterator check(iter('abc'), size('lP')) # callable-iterator diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index dbeedb7ffe0ce6..4e8d43d62c1e3e 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,6 +1,6 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { - 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, + 227,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0, 0,0,0,0,0,243,184,0,0,0,128,0,94,0,82,1, 73,0,116,0,94,0,82,1,73,1,116,1,93,2,33,0, 82,2,52,1,0,0,0,0,0,0,31,0,93,2,33,0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4ec5c08a58c631..d0dfc6d329947c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -773,7 +773,7 @@ dummy_func( macro(BINARY_OP_SUBTRACT_FLOAT) = _GUARD_TOS_FLOAT + _GUARD_NOS_FLOAT + unused/5 + _BINARY_OP_SUBTRACT_FLOAT; - pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyUnicode_CheckExact(left_o)); @@ -781,15 +781,15 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL); + l = left; + r = right; res = PyStackRef_FromPyObjectSteal(res_o); } macro(BINARY_OP_ADD_UNICODE) = - _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_ADD_UNICODE; + _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_ADD_UNICODE + _POP_TOP_UNICODE + _POP_TOP_UNICODE; // This is a subtle one. It's a super-instruction for // BINARY_OP_ADD_UNICODE followed by STORE_FAST diff --git a/Python/compile.c b/Python/compile.c index c04391e682f9ac..d05a67421a5918 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1431,6 +1431,10 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache, &optimized_instrs) < 0) { goto error; } + /* Reserve an extra word on the stack to ensure there is space for uops to + pass at least one item on the stack to a subsequent uop. + */ + stackdepth++; /** Assembly **/ co = _PyAssemble_MakeCodeObject(&u->u_metadata, const_cache, consts, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c20960c37b634b..814d2623dc872f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1199,6 +1199,8 @@ _PyStackRef right; _PyStackRef left; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); @@ -1207,16 +1209,18 @@ assert(PyUnicode_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); if (res_o == NULL) { stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); JUMP_TO_ERROR(); } + l = left; + r = right; res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..d014f8b5edf327 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -215,6 +215,8 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; // _GUARD_TOS_UNICODE { value = stack_pointer[-1]; @@ -246,13 +248,25 @@ assert(PyUnicode_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); if (res_o == NULL) { JUMP_TO_LABEL(pop_2_error); } + l = left; + r = right; res = PyStackRef_FromPyObjectSteal(res_o); } + // _POP_TOP_UNICODE + { + value = r; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + // _POP_TOP_UNICODE + { + value = l; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b1a63e3d2916f..145a8c118d3612 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3f2e2e0351e052..03cbe3c61fadc9 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness @@ -318,7 +318,7 @@ dummy_func(void) { } } - op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) { if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); @@ -327,10 +327,14 @@ dummy_func(void) { goto error; } res = sym_new_const(ctx, temp); + l = left; + r = right; Py_DECREF(temp); } else { res = sym_new_type(ctx, &PyUnicode_Type); + l = left; + r = right; } } @@ -561,11 +565,39 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP_UNICODE, (value -- )) { + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value))) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; } + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + PyObject *const_val = sym_get_const(ctx, value); + if (PyJitRef_IsBorrowed(value) || + (const_val != NULL && _Py_IsImmortal(const_val))) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + else if (typ == &PyBool_Type) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + } + op(_SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { JitOptRef temp = bottom; bottom = top; @@ -803,7 +835,7 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 9652f4febaf245..07b1cd2a65ccd9 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -100,6 +100,26 @@ } case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + PyObject *const_val = sym_get_const(ctx, value); + if (PyJitRef_IsBorrowed(value) || + (const_val != NULL && _Py_IsImmortal(const_val))) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + else if (typ == &PyBool_Type) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -124,6 +144,12 @@ } case _POP_TOP_UNICODE: { + JitOptRef value; + value = stack_pointer[-1]; + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value))) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -541,6 +567,8 @@ JitOptRef right; JitOptRef left; JitOptRef res; + JitOptRef l; + JitOptRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -551,16 +579,24 @@ goto error; } res = sym_new_const(ctx, temp); + l = left; + r = right; stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); Py_DECREF(temp); } else { res = sym_new_type(ctx, &PyUnicode_Type); - stack_pointer += -1; + l = left; + r = right; + stack_pointer += 1; } - stack_pointer[-1] = res; + stack_pointer[-3] = res; + stack_pointer[-2] = l; + stack_pointer[-1] = r; break; } @@ -808,7 +844,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; From c90107976ada5b0473a79a98916a874140afb261 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 23 Jun 2025 00:17:15 +0900 Subject: [PATCH 3/4] Address code review --- Python/optimizer_bytecodes.c | 6 ++---- Python/optimizer_cases.c.h | 20 +++++++++----------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 03cbe3c61fadc9..7c8e7fd72c5003 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -327,15 +327,13 @@ dummy_func(void) { goto error; } res = sym_new_const(ctx, temp); - l = left; - r = right; Py_DECREF(temp); } else { res = sym_new_type(ctx, &PyUnicode_Type); - l = left; - r = right; } + l = left; + r = right; } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 07b1cd2a65ccd9..775e43a8853698 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -579,24 +579,22 @@ goto error; } res = sym_new_const(ctx, temp); - l = left; - r = right; stack_pointer[-2] = res; - stack_pointer[-1] = l; - stack_pointer[0] = r; - stack_pointer += 1; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); Py_DECREF(temp); } else { res = sym_new_type(ctx, &PyUnicode_Type); - l = left; - r = right; - stack_pointer += 1; + stack_pointer += -1; } - stack_pointer[-3] = res; - stack_pointer[-2] = l; - stack_pointer[-1] = r; + l = left; + r = right; + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); break; } From ea112a555b885f6ab84bd73232adb210b9c65419 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 23 Jun 2025 00:22:33 +0900 Subject: [PATCH 4/4] Address code review --- Lib/test/test_capi/test_opt.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f6679c6e8cc1b0..a12911e0989edd 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2367,19 +2367,20 @@ def testfunc(n): def test_store_fast_pop_top_specialize_unicode(self): def random_str(n): return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n)) - def testfunc(n): - y = random_str(32) + def testfunc(args): + a, b, n = args + c = '' for _ in range(n): - x = y + y # _POP_TOP - x = None # _POP_TOP_NOP - - testfunc(TIER2_THRESHOLD) + c += a + b + return c - ex = get_first_executor(testfunc) + r0, r1 = random_str(32), random_str(32) + res, ex = self._run_with_optimizer(testfunc, (r0, r1, TIER2_THRESHOLD)) + self.assertAlmostEqual(res, TIER2_THRESHOLD * (r0 + r1)) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_POP_TOP_NOP", uops) + self.assertNotIn("_POP_TOP_UNICODE", uops) def test_attr_promotion_failure(self): # We're not testing for any specific uops here, just