Skip to content

Commit 6424fbe

Browse files
committed
x86_64 sysv64 vararg: support structs
1 parent 584035c commit 6424fbe

File tree

1 file changed

+165
-43
lines changed

1 file changed

+165
-43
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

Lines changed: 165 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size};
1+
use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
2+
use rustc_codegen_ssa::MemFlags;
23
use rustc_codegen_ssa::common::IntPredicate;
34
use rustc_codegen_ssa::mir::operand::OperandRef;
4-
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
5+
use rustc_codegen_ssa::traits::{
6+
BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7+
};
58
use rustc_middle::ty::Ty;
69
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
710

@@ -300,12 +303,16 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
300303
// } va_list[1];
301304
let va_list_addr = list.immediate();
302305

303-
let unsigned_int_offset = 4;
304-
let ptr_offset = 8;
305-
let gp_offset_ptr = va_list_addr;
306-
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
306+
// Peel off any newtype wrappers.
307+
let layout = {
308+
let mut layout = bx.cx.layout_of(target_ty);
307309

308-
let layout = bx.cx.layout_of(target_ty);
310+
while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
311+
layout = inner;
312+
}
313+
314+
layout
315+
};
309316

310317
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
311318
// in the registers. If not go to step 7.
@@ -317,37 +324,48 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
317324
let mut num_gp_registers = 0;
318325
let mut num_fp_registers = 0;
319326

327+
let mut registers_for_primitive = |p| match p {
328+
Primitive::Int(integer, _is_signed) => {
329+
num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
330+
}
331+
Primitive::Float(float) => {
332+
num_fp_registers += float.size().bytes().div_ceil(16) as u32;
333+
}
334+
Primitive::Pointer(_) => {
335+
num_gp_registers += 1;
336+
}
337+
};
338+
320339
match layout.layout.backend_repr() {
321-
BackendRepr::Scalar(scalar) => match scalar.primitive() {
322-
Primitive::Int(integer, _is_signed) => {
323-
num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
324-
}
325-
Primitive::Float(float) => {
326-
num_fp_registers += float.size().bytes().div_ceil(16) as u32;
327-
}
328-
Primitive::Pointer(_) => {
329-
num_gp_registers += 1;
330-
}
331-
},
332-
BackendRepr::ScalarPair(..)
333-
| BackendRepr::SimdVector { .. }
334-
| BackendRepr::Memory { .. } => {
340+
BackendRepr::Scalar(scalar) => {
341+
registers_for_primitive(scalar.primitive());
342+
}
343+
BackendRepr::ScalarPair(scalar1, scalar2) => {
344+
registers_for_primitive(scalar1.primitive());
345+
registers_for_primitive(scalar2.primitive());
346+
}
347+
BackendRepr::SimdVector { .. } => {
335348
// Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
336349
unreachable!(
337350
"No x86-64 SysV va_arg implementation for {:?}",
338351
layout.layout.backend_repr()
339352
)
340353
}
354+
BackendRepr::Memory { .. } => {
355+
let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
356+
return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
357+
}
341358
};
342359

343-
if num_gp_registers == 0 && num_fp_registers == 0 {
344-
unreachable!("VaArgSafe is not implemented for ZSTs")
345-
}
346-
347360
// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
348361
// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
349362
// l->fp_offset > 176 - num_fp * 16 go to step 7.
350363

364+
let unsigned_int_offset = 4;
365+
let ptr_offset = 8;
366+
let gp_offset_ptr = va_list_addr;
367+
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
368+
351369
let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
352370
let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
353371

@@ -388,14 +406,87 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
388406
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
389407
let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
390408

391-
let reg_addr = if num_gp_registers > 0 && num_fp_registers > 0 {
392-
unreachable!("instances of VaArgSafe cannot use both int and sse registers");
393-
} else if num_gp_registers > 0 || num_fp_registers == 1 {
394-
let gp_or_fp_offset = if num_gp_registers > 0 { gp_offset_v } else { fp_offset_v };
395-
bx.gep(bx.type_i8(), reg_save_area_v, &[gp_or_fp_offset])
396-
} else {
397-
// assert_eq!(num_sse_registers, 2);
398-
unreachable!("all instances of VaArgSafe have an alignment <= 8");
409+
let reg_addr = match layout.layout.backend_repr() {
410+
BackendRepr::Scalar(scalar) => match scalar.primitive() {
411+
Primitive::Int(_, _) | Primitive::Pointer(_) => {
412+
let reg_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[gp_offset_v]);
413+
414+
// Copy into a temporary if the type is more aligned than the register save area.
415+
copy_to_temporary_if_more_aligned(bx, reg_addr, layout)
416+
}
417+
Primitive::Float(_) => bx.gep(bx.type_i8(), reg_save_area_v, &[fp_offset_v]),
418+
},
419+
BackendRepr::ScalarPair(scalar1, scalar2) => {
420+
let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
421+
let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
422+
423+
let align_lo = layout.field(bx.cx, 0).layout.align().abi;
424+
let align_hi = layout.field(bx.cx, 1).layout.align().abi;
425+
426+
match (scalar1.primitive(), scalar2.primitive()) {
427+
(Primitive::Float(_), Primitive::Float(_)) => {
428+
// SSE registers are spaced 16 bytes apart in the register save
429+
// area, we need to collect the two eightbytes together.
430+
// The ABI isn't explicit about this, but it seems reasonable
431+
// to assume that the slots are 16-byte aligned, since the stack is
432+
// naturally 16-byte aligned and the prologue is expected to store
433+
// all the SSE registers to the RSA.
434+
let reg_lo_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[fp_offset_v]);
435+
let reg_hi_addr = bx.gep(bx.type_i8(), reg_lo_addr, &[bx.const_i32(16)]);
436+
437+
let align = layout.layout.align().abi;
438+
let tmp = bx.alloca(layout.layout.size(), align);
439+
440+
let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
441+
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
442+
443+
let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
444+
let field0 = tmp;
445+
let field1 = bx.gep(bx.type_i8(), tmp, &[bx.const_u32(offset as u32)]);
446+
447+
bx.store(reg_lo, field0, align);
448+
bx.store(reg_hi, field1, align);
449+
450+
tmp
451+
}
452+
(Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
453+
let gp_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[gp_offset_v]);
454+
let fp_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[fp_offset_v]);
455+
456+
let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
457+
Primitive::Float(_) => (fp_addr, gp_addr),
458+
Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
459+
};
460+
461+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
462+
463+
let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
464+
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
465+
466+
let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
467+
let field0 = tmp;
468+
let field1 = bx.gep(bx.type_i8(), tmp, &[bx.const_u32(offset as u32)]);
469+
470+
bx.store(reg_lo, field0, align_lo);
471+
bx.store(reg_hi, field1, align_hi);
472+
473+
tmp
474+
}
475+
(_, _) => {
476+
// Two integer/pointer values are just contiguous in memory.
477+
let reg_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[gp_offset_v]);
478+
479+
// Copy into a temporary if the type is more aligned than the register save area.
480+
copy_to_temporary_if_more_aligned(bx, reg_addr, layout)
481+
}
482+
}
483+
}
484+
BackendRepr::SimdVector { .. } => {
485+
unreachable!("panics in the previous match on `backend_repr`")
486+
}
487+
BackendRepr::Memory { .. } => {
488+
unreachable!("early returns in the previous match on `backend_repr`")
489+
}
399490
};
400491

401492
// AMD64-ABI 3.5.7p5: Step 5. Set:
@@ -416,9 +507,47 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
416507
bx.br(end);
417508

418509
bx.switch_to_block(in_mem);
510+
let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
511+
bx.br(end);
419512

420-
let overflow_arg_area_ptr =
421-
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset));
513+
bx.switch_to_block(end);
514+
515+
let val_type = layout.llvm_type(bx);
516+
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
517+
518+
bx.load(val_type, val_addr, layout.align.abi)
519+
}
520+
521+
/// Copy into a temporary if the type is more aligned than the register save area.
522+
fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
523+
bx: &mut Builder<'_, 'll, 'tcx>,
524+
reg_addr: &'ll Value,
525+
layout: TyAndLayout<'tcx, Ty<'tcx>>,
526+
) -> &'ll Value {
527+
if layout.layout.align.abi.bytes() > 8 {
528+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
529+
bx.memcpy(
530+
tmp,
531+
layout.layout.align.abi,
532+
reg_addr,
533+
Align::from_bytes(8).unwrap(),
534+
bx.const_u32(layout.layout.size().bytes() as u32),
535+
MemFlags::empty(),
536+
);
537+
tmp
538+
} else {
539+
reg_addr
540+
}
541+
}
542+
543+
fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
544+
bx: &mut Builder<'_, 'll, 'tcx>,
545+
va_list_addr: &'ll Value,
546+
layout: TyAndLayout<'tcx, Ty<'tcx>>,
547+
) -> &'ll Value {
548+
let dl = bx.cx.data_layout();
549+
550+
let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
422551

423552
let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
424553
// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
@@ -441,14 +570,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
441570
let overflow_arg_area = bx.gep(bx.type_i8(), overflow_arg_area_v, &[offset]);
442571
bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
443572

444-
bx.br(end);
445-
446-
bx.switch_to_block(end);
447-
448-
let val_type = layout.llvm_type(bx);
449-
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
450-
451-
bx.load(val_type, val_addr, layout.align.abi)
573+
mem_addr
452574
}
453575

454576
fn emit_xtensa_va_arg<'ll, 'tcx>(

0 commit comments

Comments
 (0)