Skip to content

Commit 3dd8dd5

Browse files
committed
Alternative tailcall fix
1 parent 103a5e8 commit 3dd8dd5

File tree

3 files changed

+141
-96
lines changed

3 files changed

+141
-96
lines changed

ext/opcache/jit/ir/ir_ra.c

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -610,8 +610,8 @@ int ir_compute_live_ranges(ir_ctx *ctx)
610610
len = ir_bitset_len(ctx->vregs_count + 1);
611611
bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
612612

613-
/* vregs + tmp + fixed + SRATCH + ALL */
614-
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
613+
/* vregs + tmp + fixed + special */
614+
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + IR_REG_SPECIAL_NUM, sizeof(ir_live_interval*));
615615

616616
#ifdef IR_DEBUG
617617
visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
@@ -1262,8 +1262,8 @@ int ir_compute_live_ranges(ir_ctx *ctx)
12621262
/* Compute Live Ranges */
12631263
ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
12641264

1265-
/* vregs + tmp + fixed + SRATCH + ALL */
1266-
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
1265+
/* vregs + tmp + fixed + special */
1266+
ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + IR_REG_SPECIAL_NUM, sizeof(ir_live_interval*));
12671267

12681268
if (!ctx->arena) {
12691269
ctx->arena = ir_arena_create(16 * 1024);
@@ -2036,8 +2036,8 @@ int ir_coalesce(ir_ctx *ctx)
20362036
n--;
20372037
if (n != ctx->vregs_count) {
20382038
j = ctx->vregs_count - n;
2039-
/* vregs + tmp + fixed + SRATCH + ALL */
2040-
for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
2039+
/* vregs + tmp + fixed + special */
2040+
for (i = n + 1; i <= n + IR_REG_NUM + IR_REG_SPECIAL_NUM; i++) {
20412041
ctx->live_intervals[i] = ctx->live_intervals[i + j];
20422042
if (ctx->live_intervals[i]) {
20432043
ctx->live_intervals[i]->vreg = i;
@@ -2804,6 +2804,37 @@ static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_inte
28042804
}
28052805
}
28062806

2807+
static ir_regset ir_special_reg_regset(ir_ctx *ctx, uint8_t reg)
2808+
{
2809+
IR_ASSERT(reg >= IR_REG_SCRATCH);
2810+
2811+
switch (reg) {
2812+
case IR_REG_SCRATCH:
2813+
return IR_REGSET_SCRATCH;
2814+
case IR_REG_PRESERVED:
2815+
return IR_REGSET_PRESERVED;
2816+
case IR_REG_PNPRESERVED:
2817+
return IR_REGSET_PNPRESERVED;
2818+
case IR_REG_FIXED_SAVED:
2819+
IR_ASSERT(ctx->fixed_stack_frame_size != -1);
2820+
return (ir_regset)ctx->fixed_save_regset;
2821+
case IR_REG_ARGS:
2822+
return IR_REGSET_ARGS;
2823+
#ifdef IR_HAVE_FASTCALL
2824+
case IR_REG_FCARGS:
2825+
return IR_REGSET_FCARGS;
2826+
#endif
2827+
#ifdef IR_HAVE_PRESERVE_NONE
2828+
case IR_REG_PNARGS:
2829+
return IR_REGSET_PNARGS;
2830+
#endif
2831+
case IR_REG_ALL:
2832+
return ~0;
2833+
default:
2834+
IR_ASSERT(0);
2835+
}
2836+
}
2837+
28072838
static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled)
28082839
{
28092840
ir_live_pos freeUntilPos[IR_REG_NUM];
@@ -2846,12 +2877,7 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
28462877
reg = other->reg;
28472878
IR_ASSERT(reg >= 0);
28482879
if (reg >= IR_REG_SCRATCH) {
2849-
if (reg == IR_REG_SCRATCH) {
2850-
available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
2851-
} else {
2852-
IR_ASSERT(reg == IR_REG_ALL);
2853-
available = IR_REGSET_EMPTY;
2854-
}
2880+
available = IR_REGSET_DIFFERENCE(available, ir_special_reg_regset(ctx, reg));
28552881
} else {
28562882
IR_REGSET_EXCL(available, reg);
28572883
}
@@ -2874,14 +2900,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l
28742900
reg = other->reg;
28752901
IR_ASSERT(reg >= 0);
28762902
if (reg >= IR_REG_SCRATCH) {
2877-
ir_regset regset;
2903+
ir_regset regset = IR_REGSET_INTERSECTION(available, ir_special_reg_regset(ctx, reg));
28782904

2879-
if (reg == IR_REG_SCRATCH) {
2880-
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2881-
} else {
2882-
IR_ASSERT(reg == IR_REG_ALL);
2883-
regset = available;
2884-
}
28852905
overlapped = IR_REGSET_UNION(overlapped, regset);
28862906
IR_REGSET_FOREACH(regset, reg) {
28872907
if (next < freeUntilPos[reg]) {
@@ -3087,14 +3107,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
30873107
reg = other->reg;
30883108
IR_ASSERT(reg >= 0);
30893109
if (reg >= IR_REG_SCRATCH) {
3090-
ir_regset regset;
3110+
ir_regset regset = IR_REGSET_INTERSECTION(available, ir_special_reg_regset(ctx, reg));
30913111

3092-
if (reg == IR_REG_SCRATCH) {
3093-
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3094-
} else {
3095-
IR_ASSERT(reg == IR_REG_ALL);
3096-
regset = available;
3097-
}
30983112
IR_REGSET_FOREACH(regset, reg) {
30993113
blockPos[reg] = nextUsePos[reg] = 0;
31003114
} IR_REGSET_FOREACH_END();
@@ -3122,14 +3136,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li
31223136
ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
31233137

31243138
if (overlap) {
3125-
ir_regset regset;
3139+
ir_regset regset = IR_REGSET_INTERSECTION(available, ir_special_reg_regset(ctx, reg));
31263140

3127-
if (reg == IR_REG_SCRATCH) {
3128-
regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3129-
} else {
3130-
IR_ASSERT(reg == IR_REG_ALL);
3131-
regset = available;
3132-
}
31333141
IR_REGSET_FOREACH(regset, reg) {
31343142
if (overlap < nextUsePos[reg]) {
31353143
nextUsePos[reg] = overlap;
@@ -3562,8 +3570,8 @@ static int ir_linear_scan(ir_ctx *ctx)
35623570
ir_merge_to_unhandled(&unhandled, ival);
35633571
}
35643572

3565-
/* vregs + tmp + fixed + SRATCH + ALL */
3566-
for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
3573+
/* vregs + tmp + fixed + special */
3574+
for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + IR_REG_SPECIAL_NUM; j++) {
35673575
ival = ctx->live_intervals[j];
35683576
if (ival) {
35693577
ival->current_range = &ival->range;

ext/opcache/jit/ir/ir_x86.dasc

Lines changed: 72 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,8 +1415,6 @@ op2_const:
14151415
}
14161416
constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
14171417
n = 1;
1418-
IR_FALLTHROUGH;
1419-
case IR_TAILCALL:
14201418
insn = &ctx->ir_base[ref];
14211419
if (insn->inputs_count > 2) {
14221420
constraints->hints[2] = IR_REG_NONE;
@@ -1428,6 +1426,54 @@ op2_const:
14281426
}
14291427
flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG;
14301428
break;
1429+
case IR_TAILCALL:
1430+
insn = &ctx->ir_base[ref];
1431+
1432+
/* Epilogue may clobber these */
1433+
uint8_t preserved = IR_REG_PRESERVED;
1434+
1435+
/* For TAILCALL we don't need to reserve all scratch registers. */
1436+
uint8_t args = IR_REG_ARGS;
1437+
uint8_t tmp_reg = IR_REG_INT_RET1;
1438+
1439+
#ifdef IR_HAVE_FASTCALL
1440+
if (ir_is_fastcall(ctx, insn)) {
1441+
args = IR_REG_FCARGS;
1442+
}
1443+
#endif
1444+
#ifdef IR_HAVE_PRESERVE_NONE
1445+
if (ir_is_preserve_none(ctx, insn)) {
1446+
args = IR_REG_PNARGS;
1447+
/* See ir_emit_tailcall() */
1448+
tmp_reg = IR_REG_R10;
1449+
preserved = IR_REG_PNPRESERVED;
1450+
}
1451+
#endif
1452+
1453+
if (ctx->fixed_stack_frame_size != -1) {
1454+
preserved = IR_REG_FIXED_SAVED;
1455+
}
1456+
1457+
constraints->tmp_regs[n] = IR_SCRATCH_REG(args, IR_USE_SUB_REF, IR_DEF_SUB_REF);
1458+
n++;
1459+
1460+
/* Reserve tmp register used by ir_emit_arguments() */
1461+
constraints->tmp_regs[n] = IR_SCRATCH_REG(tmp_reg, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
1462+
n++;
1463+
1464+
if (insn->inputs_count > 2) {
1465+
constraints->hints[2] = IR_REG_NONE;
1466+
constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints);
1467+
}
1468+
1469+
if (!IR_IS_CONST_REF(insn->op2)) {
1470+
/* Make sure that one of these is not allocated to op2 */
1471+
constraints->tmp_regs[n] = IR_SCRATCH_REG(preserved, IR_USE_SUB_REF, IR_DEF_SUB_REF);
1472+
n++;
1473+
}
1474+
1475+
flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS;
1476+
break;
14311477
case IR_BINOP_SSE2:
14321478
flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG;
14331479
break;
@@ -9245,66 +9291,24 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
92459291
{
92469292
ir_backend_data *data = ctx->data;
92479293
dasm_State **Dst = &data->dasm_state;
9248-
int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
9294+
9295+
uint8_t tmp_reg = IR_REG_INT_RET1;
9296+
#ifdef IR_HAVE_PRESERVE_NONE
9297+
if (ir_is_preserve_none(ctx, insn)) {
9298+
/* preserve_none uses IR_REG_INT_RET1 for argument passing, so we can
9299+
* not use as tmp reg here. */
9300+
tmp_reg = IR_REG_R10;
9301+
}
9302+
#endif
9303+
9304+
int32_t used_stack = ir_emit_arguments(ctx, def, insn, tmp_reg);
92499305

92509306
if (used_stack != 0) {
92519307
ir_emit_call_ex(ctx, def, insn, used_stack);
92529308
ir_emit_return_void(ctx);
92539309
return;
92549310
}
92559311

9256-
/* Move op2 to a scratch register before epilogue if it's in
9257-
* used_preserved_regs, because it will be overridden. */
9258-
9259-
ir_reg op2_reg = IR_REG_NONE;
9260-
ir_mem mem = IR_MEM_B(IR_REG_NONE);
9261-
if (!IR_IS_CONST_REF(insn->op2)) {
9262-
op2_reg = ctx->regs[def][2];
9263-
9264-
if (op2_reg != IR_REG_NONE) {
9265-
if (IR_REG_SPILLED(op2_reg)) {
9266-
ZEND_ASSERT(0 && "TODO");
9267-
op2_reg = IR_REG_NUM(op2_reg);
9268-
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
9269-
}
9270-
if (IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg)) {
9271-
ir_reg tmp_reg;
9272-
if (ir_is_preserve_none(ctx, insn)) {
9273-
tmp_reg = IR_REG_R10;
9274-
} else {
9275-
tmp_reg = IR_REG_INT_RET1;
9276-
}
9277-
9278-
| mov Rq(tmp_reg), Rq(op2_reg)
9279-
9280-
op2_reg = tmp_reg;
9281-
}
9282-
} else {
9283-
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
9284-
mem = ir_fuse_load(ctx, def, insn->op2);
9285-
} else {
9286-
mem = ir_ref_spill_slot(ctx, insn->op2);
9287-
}
9288-
ir_reg base = IR_MEM_BASE(mem);
9289-
ir_reg index = IR_MEM_INDEX(mem);
9290-
if ((base != IR_REG_NONE && IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, base)) ||
9291-
(index != IR_REG_NONE && IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, index))) {
9292-
ir_reg tmp_reg;
9293-
if (ir_is_preserve_none(ctx, insn)) {
9294-
tmp_reg = IR_REG_R10;
9295-
} else {
9296-
tmp_reg = IR_REG_INT_RET1;
9297-
}
9298-
9299-
ir_type type = ctx->ir_base[insn->op2].type;
9300-
9301-
| ASM_REG_MEM_OP, mov, type, tmp_reg, mem
9302-
9303-
op2_reg = tmp_reg;
9304-
}
9305-
}
9306-
}
9307-
93089312
ir_emit_epilogue(ctx);
93099313

93109314
if (IR_IS_CONST_REF(insn->op2)) {
@@ -9330,9 +9334,22 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
93309334
|.endif
93319335
}
93329336
} else {
9337+
ir_reg op2_reg = ctx->regs[def][2];
9338+
93339339
if (op2_reg != IR_REG_NONE) {
9340+
if (IR_REG_SPILLED(op2_reg)) {
9341+
op2_reg = IR_REG_NUM(op2_reg);
9342+
ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
9343+
}
93349344
| jmp Ra(op2_reg)
93359345
} else {
9346+
ir_mem mem;
9347+
9348+
if (ir_rule(ctx, insn->op2) & IR_FUSED) {
9349+
mem = ir_fuse_load(ctx, def, insn->op2);
9350+
} else {
9351+
mem = ir_ref_spill_slot(ctx, insn->op2);
9352+
}
93369353
| ASM_TMEM_OP jmp, aword, mem
93379354
}
93389355
}

ext/opcache/jit/ir/ir_x86.h

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,20 @@ enum _ir_reg {
8484
IR_REG_NUM,
8585
};
8686

87-
#define IR_REG_GP_FIRST IR_REG_R0
88-
#define IR_REG_FP_FIRST IR_REG_XMM0
89-
#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1)
90-
#define IR_REG_FP_LAST (IR_REG_NUM - 1)
91-
#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */
92-
#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */
87+
#define IR_REG_GP_FIRST IR_REG_R0
88+
#define IR_REG_FP_FIRST IR_REG_XMM0
89+
#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1)
90+
#define IR_REG_FP_LAST (IR_REG_NUM - 1)
91+
#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */
92+
#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */
93+
#define IR_REG_PRESERVED (IR_REG_NUM + 2) /* special name for IR_REGSET_PRESERVED */
94+
#define IR_REG_PNPRESERVED (IR_REG_NUM + 3) /* special name for IR_REGSET_PNPRESERVED */
95+
#define IR_REG_FIXED_SAVED (IR_REG_NUM + 4) /* special name for fixed_save_regset */
96+
#define IR_REG_ARGS (IR_REG_NUM + 5) /* special name for IR_REGSET_ARGS */
97+
#define IR_REG_FCARGS (IR_REG_NUM + 6) /* special name for IR_REGSET_FCARGS */
98+
#define IR_REG_PNARGS (IR_REG_NUM + 7) /* special name for IR_REGSET_PNARGS */
99+
100+
#define IR_REG_SPECIAL_NUM 8
93101

94102
#define IR_REGSET_64BIT 0
95103

@@ -177,6 +185,11 @@ enum _ir_reg {
177185
| IR_REGSET(IR_REG_RBP) \
178186
| IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15))
179187

188+
# define IR_REGSET_ARGS \
189+
(IR_REGSET(IR_REG_RDI) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDX) \
190+
| IR_REGSET(IR_REG_RCX) | IR_REGSET(IR_REG_R8) | IR_REGSET(IR_REG_R9) \
191+
| IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM7))
192+
180193
# if __has_attribute(preserve_none)
181194

182195
# define IR_HAVE_PRESERVE_NONE 1
@@ -197,6 +210,13 @@ enum _ir_reg {
197210
# define IR_REG_INT_PNARG12 IR_REG_RAX
198211

199212
# define IR_MAX_REG_ARGS 20 /* IR_REG_INT_PNARGS + IR_REG_FP_ARGS */
213+
214+
# define IR_REGSET_PNARGS \
215+
(IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) | IR_REGSET(IR_REG_RDI) \
216+
| IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDX) | IR_REGSET(IR_REG_RCX) \
217+
| IR_REGSET(IR_REG_R8) | IR_REGSET(IR_REG_R9) | IR_REGSET(IR_REG_R11) \
218+
| IR_REGSET(IR_REG_RAX) | IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM7))
219+
200220
# else /* !preserve_none */
201221
# define IR_MAX_REG_ARGS 14 /* IR_REG_INT_ARGS + IR_REG_FP_ARGS */
202222
# endif

0 commit comments

Comments
 (0)