|
1 |
| -use rustc_abi::{Align, Endian, HasDataLayout, Size}; |
| 1 | +use rustc_abi::{Align, BackendRepr, Endian, ExternAbi, HasDataLayout, Size}; |
2 | 2 | use rustc_codegen_ssa::common::IntPredicate;
|
3 | 3 | use rustc_codegen_ssa::mir::operand::OperandRef;
|
4 | 4 | use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
|
@@ -278,6 +278,177 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
|
278 | 278 | bx.load(val_type, val_addr, layout.align.abi)
|
279 | 279 | }
|
280 | 280 |
|
| 281 | +fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( |
| 282 | + bx: &mut Builder<'_, 'll, 'tcx>, |
| 283 | + list: OperandRef<'tcx, &'ll Value>, |
| 284 | + target_ty: Ty<'tcx>, |
| 285 | +) -> &'ll Value { |
| 286 | + let dl = bx.cx.data_layout(); |
| 287 | + |
| 288 | + // Implementation of the systemv x86_64 ABI calling convention for va_args, see |
| 289 | + // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily |
| 290 | + // based on the one in clang. |
| 291 | + |
| 292 | + // We're able to take some shortcuts because the return type of `va_arg` must implement the |
| 293 | + // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait. |
| 294 | + |
| 295 | + // typedef struct __va_list_tag { |
| 296 | + // unsigned int gp_offset; |
| 297 | + // unsigned int fp_offset; |
| 298 | + // void *overflow_arg_area; |
| 299 | + // void *reg_save_area; |
| 300 | + // } va_list[1]; |
| 301 | + let va_list_addr = list.immediate(); |
| 302 | + |
| 303 | + let unsigned_int_offset = 4; |
| 304 | + let ptr_offset = 8; |
| 305 | + let gp_offset_ptr = va_list_addr; |
| 306 | + let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset)); |
| 307 | + |
| 308 | + let layout = bx.cx.layout_of(target_ty); |
| 309 | + |
| 310 | + // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed |
| 311 | + // in the registers. If not go to step 7. |
| 312 | + |
| 313 | + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of |
| 314 | + // general purpose registers needed to pass type and num_fp to hold |
| 315 | + // the number of floating point registers needed. |
| 316 | + |
| 317 | + let mut num_gp_registers = 0; |
| 318 | + let mut num_fp_registers = 0; |
| 319 | + |
| 320 | + match layout.layout.backend_repr() { |
| 321 | + BackendRepr::Scalar(scalar) => match scalar.primitive() { |
| 322 | + rustc_abi::Primitive::Int(integer, _is_signed) => { |
| 323 | + num_gp_registers += integer.size().bytes().div_ceil(8) as i32; |
| 324 | + } |
| 325 | + rustc_abi::Primitive::Float(float) => { |
| 326 | + num_fp_registers += float.size().bytes().div_ceil(16) as i32; |
| 327 | + } |
| 328 | + rustc_abi::Primitive::Pointer(_) => { |
| 329 | + num_gp_registers += 1; |
| 330 | + } |
| 331 | + }, |
| 332 | + BackendRepr::ScalarPair(..) => { |
| 333 | + unreachable!("VaArgSafe is not implemented for scalar pairs") |
| 334 | + } |
| 335 | + BackendRepr::SimdVector { .. } => { |
| 336 | + unreachable!("VaArgSafe is not implemented for SIMD vectors") |
| 337 | + } |
| 338 | + BackendRepr::Memory { .. } => { /* do nothing */ } |
| 339 | + }; |
| 340 | + |
| 341 | + if num_gp_registers == 0 && num_fp_registers == 0 { |
| 342 | + unreachable!("VaArgSafe is not implemented for ZSTs") |
| 343 | + } |
| 344 | + |
| 345 | + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into |
| 346 | + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or |
| 347 | + // l->fp_offset > 176 - num_fp * 16 go to step 7. |
| 348 | + |
| 349 | + let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap()); |
| 350 | + let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap()); |
| 351 | + |
| 352 | + let mut use_regs = bx.const_bool(false); |
| 353 | + |
| 354 | + if num_gp_registers > 0 { |
| 355 | + let max_offset_val = 48i32 - num_gp_registers * 8; |
| 356 | + let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_i32(max_offset_val)); |
| 357 | + use_regs = fits_in_gp; |
| 358 | + } |
| 359 | + |
| 360 | + if num_fp_registers > 0 { |
| 361 | + let max_offset_val = 176i32 - num_fp_registers * 16; |
| 362 | + let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_i32(max_offset_val)); |
| 363 | + use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp }; |
| 364 | + } |
| 365 | + |
| 366 | + let in_reg = bx.append_sibling_block("va_arg.in_reg"); |
| 367 | + let in_mem = bx.append_sibling_block("va_arg.in_mem"); |
| 368 | + let end = bx.append_sibling_block("va_arg.end"); |
| 369 | + |
| 370 | + bx.cond_br(use_regs, in_reg, in_mem); |
| 371 | + |
| 372 | + // Emit code to load the value if it was passed in a register. |
| 373 | + bx.switch_to_block(in_reg); |
| 374 | + |
| 375 | + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with |
| 376 | + // an offset of l->gp_offset and/or l->fp_offset. This may require |
| 377 | + // copying to a temporary location in case the parameter is passed |
| 378 | + // in different register classes or requires an alignment greater |
| 379 | + // than 8 for general purpose registers and 16 for XMM registers. |
| 380 | + // |
| 381 | + // FIXME(llvm): This really results in shameful code when we end up needing to |
| 382 | + // collect arguments from different places; often what should result in a |
| 383 | + // simple assembling of a structure from scattered addresses has many more |
| 384 | + // loads than necessary. Can we clean this up? |
| 385 | + let reg_save_area_ptr = |
| 386 | + bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset)); |
| 387 | + let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi); |
| 388 | + |
| 389 | + let reg_addr = if num_gp_registers > 0 && num_fp_registers > 0 { |
| 390 | + unreachable!("instances of VaArgSafe cannot use both int and sse registers"); |
| 391 | + } else if num_gp_registers > 0 || num_fp_registers == 1 { |
| 392 | + let gp_or_fp_offset = if num_gp_registers > 0 { gp_offset_v } else { fp_offset_v }; |
| 393 | + bx.gep(bx.type_i8(), reg_save_area_v, &[gp_or_fp_offset]) |
| 394 | + } else { |
| 395 | + // assert_eq!(num_sse_registers, 2); |
| 396 | + unreachable!("all instances of VaArgSafe have an alignment <= 8"); |
| 397 | + }; |
| 398 | + |
| 399 | + // AMD64-ABI 3.5.7p5: Step 5. Set: |
| 400 | + // l->gp_offset = l->gp_offset + num_gp * 8 |
| 401 | + if num_gp_registers > 0 { |
| 402 | + let offset = bx.const_i32(num_gp_registers * 8); |
| 403 | + let sum = bx.add(gp_offset_v, offset); |
| 404 | + bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap()); |
| 405 | + } |
| 406 | + |
| 407 | + // l->fp_offset = l->fp_offset + num_fp * 16. |
| 408 | + if num_fp_registers > 0 { |
| 409 | + let offset = bx.const_i32(num_fp_registers * 16); |
| 410 | + let sum = bx.add(fp_offset_v, offset); |
| 411 | + bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap()); |
| 412 | + } |
| 413 | + |
| 414 | + bx.br(end); |
| 415 | + |
| 416 | + bx.switch_to_block(in_mem); |
| 417 | + |
| 418 | + let overflow_arg_area_ptr = |
| 419 | + bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset)); |
| 420 | + |
| 421 | + let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi); |
| 422 | + // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 |
| 423 | + // byte boundary if alignment needed by type exceeds 8 byte boundary. |
| 424 | + // It isn't stated explicitly in the standard, but in practice we use |
| 425 | + // alignment greater than 16 where necessary. |
| 426 | + if layout.layout.align.abi.bytes() > 8 { |
| 427 | + unreachable!("all instances of VaArgSafe have an alignment <= 8"); |
| 428 | + } |
| 429 | + |
| 430 | + // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. |
| 431 | + let mem_addr = overflow_arg_area_v; |
| 432 | + |
| 433 | + // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: |
| 434 | + // l->overflow_arg_area + sizeof(type). |
| 435 | + // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to |
| 436 | + // an 8 byte boundary. |
| 437 | + let size_in_bytes = layout.layout.size().bytes(); |
| 438 | + let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32); |
| 439 | + let overflow_arg_area = bx.gep(bx.type_i8(), overflow_arg_area_v, &[offset]); |
| 440 | + bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi); |
| 441 | + |
| 442 | + bx.br(end); |
| 443 | + |
| 444 | + bx.switch_to_block(end); |
| 445 | + |
| 446 | + let val_type = layout.llvm_type(bx); |
| 447 | + let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]); |
| 448 | + |
| 449 | + bx.load(val_type, val_addr, layout.align.abi) |
| 450 | +} |
| 451 | + |
281 | 452 | fn emit_xtensa_va_arg<'ll, 'tcx>(
|
282 | 453 | bx: &mut Builder<'_, 'll, 'tcx>,
|
283 | 454 | list: OperandRef<'tcx, &'ll Value>,
|
@@ -410,6 +581,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
|
410 | 581 | let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
|
411 | 582 | emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
|
412 | 583 | }
|
| 584 | + "x86_64" if !target.is_like_darwin => emit_x86_64_sysv64_va_arg(bx, addr, target_ty), |
413 | 585 | "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
|
414 | 586 | // For all other architecture/OS combinations fall back to using
|
415 | 587 | // the LLVM va_arg instruction.
|
|
0 commit comments