Skip to content

Commit 52601bf

Browse files
committed
implement va_arg for x86_64 systemv
Turns out LLVM's `va_arg` is also unreliable for this target.
1 parent 6eef33b commit 52601bf

File tree

3 files changed

+185
-3
lines changed

3 files changed

+185
-3
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rustc_abi::{Align, Endian, HasDataLayout, Size};
1+
use rustc_abi::{Align, BackendRepr, Endian, ExternAbi, HasDataLayout, Size};
22
use rustc_codegen_ssa::common::IntPredicate;
33
use rustc_codegen_ssa::mir::operand::OperandRef;
44
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
@@ -278,6 +278,177 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
278278
bx.load(val_type, val_addr, layout.align.abi)
279279
}
280280

281+
fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
282+
bx: &mut Builder<'_, 'll, 'tcx>,
283+
list: OperandRef<'tcx, &'ll Value>,
284+
target_ty: Ty<'tcx>,
285+
) -> &'ll Value {
286+
let dl = bx.cx.data_layout();
287+
288+
// Implementation of the systemv x86_64 ABI calling convention for va_args, see
289+
// https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
290+
// based on the one in clang.
291+
292+
// We're able to take some shortcuts because the return type of `va_arg` must implement the
293+
// `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
294+
295+
// typedef struct __va_list_tag {
296+
// unsigned int gp_offset;
297+
// unsigned int fp_offset;
298+
// void *overflow_arg_area;
299+
// void *reg_save_area;
300+
// } va_list[1];
301+
let va_list_addr = list.immediate();
302+
303+
let unsigned_int_offset = 4;
304+
let ptr_offset = 8;
305+
let gp_offset_ptr = va_list_addr;
306+
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
307+
308+
let layout = bx.cx.layout_of(target_ty);
309+
310+
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
311+
// in the registers. If not go to step 7.
312+
313+
// AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
314+
// general purpose registers needed to pass type and num_fp to hold
315+
// the number of floating point registers needed.
316+
317+
let mut num_gp_registers = 0;
318+
let mut num_fp_registers = 0;
319+
320+
match layout.layout.backend_repr() {
321+
BackendRepr::Scalar(scalar) => match scalar.primitive() {
322+
rustc_abi::Primitive::Int(integer, _is_signed) => {
323+
num_gp_registers += integer.size().bytes().div_ceil(8) as i32;
324+
}
325+
rustc_abi::Primitive::Float(float) => {
326+
num_fp_registers += float.size().bytes().div_ceil(16) as i32;
327+
}
328+
rustc_abi::Primitive::Pointer(_) => {
329+
num_gp_registers += 1;
330+
}
331+
},
332+
BackendRepr::ScalarPair(..) => {
333+
unreachable!("VaArgSafe is not implemented for scalar pairs")
334+
}
335+
BackendRepr::SimdVector { .. } => {
336+
unreachable!("VaArgSafe is not implemented for SIMD vectors")
337+
}
338+
BackendRepr::Memory { .. } => { /* do nothing */ }
339+
};
340+
341+
if num_gp_registers == 0 && num_fp_registers == 0 {
342+
unreachable!("VaArgSafe is not implemented for ZSTs")
343+
}
344+
345+
// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
346+
// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
347+
// l->fp_offset > 176 - num_fp * 16 go to step 7.
348+
349+
let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
350+
let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
351+
352+
let mut use_regs = bx.const_bool(false);
353+
354+
if num_gp_registers > 0 {
355+
let max_offset_val = 48i32 - num_gp_registers * 8;
356+
let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_i32(max_offset_val));
357+
use_regs = fits_in_gp;
358+
}
359+
360+
if num_fp_registers > 0 {
361+
let max_offset_val = 176i32 - num_fp_registers * 16;
362+
let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_i32(max_offset_val));
363+
use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
364+
}
365+
366+
let in_reg = bx.append_sibling_block("va_arg.in_reg");
367+
let in_mem = bx.append_sibling_block("va_arg.in_mem");
368+
let end = bx.append_sibling_block("va_arg.end");
369+
370+
bx.cond_br(use_regs, in_reg, in_mem);
371+
372+
// Emit code to load the value if it was passed in a register.
373+
bx.switch_to_block(in_reg);
374+
375+
// AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
376+
// an offset of l->gp_offset and/or l->fp_offset. This may require
377+
// copying to a temporary location in case the parameter is passed
378+
// in different register classes or requires an alignment greater
379+
// than 8 for general purpose registers and 16 for XMM registers.
380+
//
381+
// FIXME(llvm): This really results in shameful code when we end up needing to
382+
// collect arguments from different places; often what should result in a
383+
// simple assembling of a structure from scattered addresses has many more
384+
// loads than necessary. Can we clean this up?
385+
let reg_save_area_ptr =
386+
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
387+
let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
388+
389+
let reg_addr = if num_gp_registers > 0 && num_fp_registers > 0 {
390+
unreachable!("instances of VaArgSafe cannot use both int and sse registers");
391+
} else if num_gp_registers > 0 || num_fp_registers == 1 {
392+
let gp_or_fp_offset = if num_gp_registers > 0 { gp_offset_v } else { fp_offset_v };
393+
bx.gep(bx.type_i8(), reg_save_area_v, &[gp_or_fp_offset])
394+
} else {
395+
// assert_eq!(num_sse_registers, 2);
396+
unreachable!("all instances of VaArgSafe have an alignment <= 8");
397+
};
398+
399+
// AMD64-ABI 3.5.7p5: Step 5. Set:
400+
// l->gp_offset = l->gp_offset + num_gp * 8
401+
if num_gp_registers > 0 {
402+
let offset = bx.const_i32(num_gp_registers * 8);
403+
let sum = bx.add(gp_offset_v, offset);
404+
bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
405+
}
406+
407+
// l->fp_offset = l->fp_offset + num_fp * 16.
408+
if num_fp_registers > 0 {
409+
let offset = bx.const_i32(num_fp_registers * 16);
410+
let sum = bx.add(fp_offset_v, offset);
411+
bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
412+
}
413+
414+
bx.br(end);
415+
416+
bx.switch_to_block(in_mem);
417+
418+
let overflow_arg_area_ptr =
419+
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset));
420+
421+
let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
422+
// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
423+
// byte boundary if alignment needed by type exceeds 8 byte boundary.
424+
// It isn't stated explicitly in the standard, but in practice we use
425+
// alignment greater than 16 where necessary.
426+
if layout.layout.align.abi.bytes() > 8 {
427+
unreachable!("all instances of VaArgSafe have an alignment <= 8");
428+
}
429+
430+
// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
431+
let mem_addr = overflow_arg_area_v;
432+
433+
// AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
434+
// l->overflow_arg_area + sizeof(type).
435+
// AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
436+
// an 8 byte boundary.
437+
let size_in_bytes = layout.layout.size().bytes();
438+
let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
439+
let overflow_arg_area = bx.gep(bx.type_i8(), overflow_arg_area_v, &[offset]);
440+
bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
441+
442+
bx.br(end);
443+
444+
bx.switch_to_block(end);
445+
446+
let val_type = layout.llvm_type(bx);
447+
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
448+
449+
bx.load(val_type, val_addr, layout.align.abi)
450+
}
451+
281452
fn emit_xtensa_va_arg<'ll, 'tcx>(
282453
bx: &mut Builder<'_, 'll, 'tcx>,
283454
list: OperandRef<'tcx, &'ll Value>,
@@ -410,6 +581,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
410581
let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
411582
emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
412583
}
584+
"x86_64" if !target.is_like_darwin => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
413585
"xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
414586
// For all other architecture/OS combinations fall back to using
415587
// the LLVM va_arg instruction.

tests/run-make/c-link-to-rust-va-list-fn/checkrust.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ pub unsafe extern "C" fn check_varargs_4(_: c_double, mut ap: ...) -> usize {
112112
continue_if!(ap.arg::<c_double>() == 8.0);
113113
continue_if!(ap.arg::<c_double>() == 9.0);
114114
continue_if!(ap.arg::<c_double>() == 10.0);
115+
continue_if!(ap.arg::<c_double>() == 11.0);
116+
continue_if!(ap.arg::<c_double>() == 12.0);
117+
continue_if!(ap.arg::<c_double>() == 13.0);
115118
0
116119
}
117120

@@ -137,5 +140,11 @@ pub unsafe extern "C" fn check_varargs_5(_: c_int, mut ap: ...) -> usize {
137140
continue_if!(ap.arg::<c_double>() == 9.0);
138141
continue_if!(ap.arg::<c_int>() == 10);
139142
continue_if!(ap.arg::<c_double>() == 10.0);
143+
continue_if!(ap.arg::<c_int>() == 11);
144+
continue_if!(ap.arg::<c_double>() == 11.0);
145+
continue_if!(ap.arg::<c_int>() == 12);
146+
continue_if!(ap.arg::<c_double>() == 12.0);
147+
continue_if!(ap.arg::<c_int>() == 13);
148+
continue_if!(ap.arg::<c_double>() == 13.0);
140149
0
141150
}

tests/run-make/c-link-to-rust-va-list-fn/test.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ int main(int argc, char* argv[]) {
4141

4242
assert(check_varargs_3(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) == 0);
4343

44-
assert(check_varargs_4(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0) == 0);
44+
assert(check_varargs_4(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
45+
13.0) == 0);
4546

4647
assert(check_varargs_5(0, 1.0, 1, 2.0, 2, 3.0, 3, 4.0, 4, 5, 5.0, 6, 6.0, 7, 7.0, 8, 8.0,
47-
9, 9.0, 10, 10.0) == 0);
48+
9, 9.0, 10, 10.0, 11, 11.0, 12, 12.0, 13, 13.0) == 0);
4849

4950
return 0;
5051
}

0 commit comments

Comments
 (0)