|
1 |
| -use rustc_abi::{Align, Endian, HasDataLayout, Size}; |
| 1 | +use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout}; |
| 2 | +use rustc_codegen_ssa::MemFlags; |
2 | 3 | use rustc_codegen_ssa::common::IntPredicate;
|
3 | 4 | use rustc_codegen_ssa::mir::operand::OperandRef;
|
4 |
| -use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods}; |
| 5 | +use rustc_codegen_ssa::traits::{ |
| 6 | + BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods, |
| 7 | +}; |
5 | 8 | use rustc_middle::ty::Ty;
|
6 | 9 | use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
|
7 | 10 |
|
@@ -303,6 +306,294 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
|
303 | 306 | bx.load(val_type, val_addr, layout.align.abi)
|
304 | 307 | }
|
305 | 308 |
|
| 309 | +fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( |
| 310 | + bx: &mut Builder<'_, 'll, 'tcx>, |
| 311 | + list: OperandRef<'tcx, &'ll Value>, |
| 312 | + target_ty: Ty<'tcx>, |
| 313 | +) -> &'ll Value { |
| 314 | + let dl = bx.cx.data_layout(); |
| 315 | + |
| 316 | + // Implementation of the systemv x86_64 ABI calling convention for va_args, see |
| 317 | + // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily |
| 318 | + // based on the one in clang. |
| 319 | + |
| 320 | + // We're able to take some shortcuts because the return type of `va_arg` must implement the |
| 321 | + // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait. |
| 322 | + |
| 323 | + // typedef struct __va_list_tag { |
| 324 | + // unsigned int gp_offset; |
| 325 | + // unsigned int fp_offset; |
| 326 | + // void *overflow_arg_area; |
| 327 | + // void *reg_save_area; |
| 328 | + // } va_list[1]; |
| 329 | + let va_list_addr = list.immediate(); |
| 330 | + |
| 331 | + // Peel off any newtype wrappers. |
| 332 | + let layout = { |
| 333 | + let mut layout = bx.cx.layout_of(target_ty); |
| 334 | + |
| 335 | + while let Some((_, inner)) = layout.non_1zst_field(bx.cx) { |
| 336 | + layout = inner; |
| 337 | + } |
| 338 | + |
| 339 | + layout |
| 340 | + }; |
| 341 | + |
| 342 | + // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed |
| 343 | + // in the registers. If not go to step 7. |
| 344 | + |
| 345 | + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of |
| 346 | + // general purpose registers needed to pass type and num_fp to hold |
| 347 | + // the number of floating point registers needed. |
| 348 | + |
| 349 | + let mut num_gp_registers = 0; |
| 350 | + let mut num_fp_registers = 0; |
| 351 | + |
| 352 | + let mut registers_for_primitive = |p| match p { |
| 353 | + Primitive::Int(integer, _is_signed) => { |
| 354 | + num_gp_registers += integer.size().bytes().div_ceil(8) as u32; |
| 355 | + } |
| 356 | + Primitive::Float(float) => { |
| 357 | + num_fp_registers += float.size().bytes().div_ceil(16) as u32; |
| 358 | + } |
| 359 | + Primitive::Pointer(_) => { |
| 360 | + num_gp_registers += 1; |
| 361 | + } |
| 362 | + }; |
| 363 | + |
| 364 | + match layout.layout.backend_repr() { |
| 365 | + BackendRepr::Scalar(scalar) => { |
| 366 | + registers_for_primitive(scalar.primitive()); |
| 367 | + } |
| 368 | + BackendRepr::ScalarPair(scalar1, scalar2) => { |
| 369 | + registers_for_primitive(scalar1.primitive()); |
| 370 | + registers_for_primitive(scalar2.primitive()); |
| 371 | + } |
| 372 | + BackendRepr::SimdVector { .. } => { |
| 373 | + // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`. |
| 374 | + unreachable!( |
| 375 | + "No x86-64 SysV va_arg implementation for {:?}", |
| 376 | + layout.layout.backend_repr() |
| 377 | + ) |
| 378 | + } |
| 379 | + BackendRepr::Memory { .. } => { |
| 380 | + let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout); |
| 381 | + return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi); |
| 382 | + } |
| 383 | + }; |
| 384 | + |
| 385 | + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into |
| 386 | + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or |
| 387 | + // l->fp_offset > 176 - num_fp * 16 go to step 7. |
| 388 | + |
| 389 | + let unsigned_int_offset = 4; |
| 390 | + let ptr_offset = 8; |
| 391 | + let gp_offset_ptr = va_list_addr; |
| 392 | + let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset)); |
| 393 | + |
| 394 | + let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap()); |
| 395 | + let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap()); |
| 396 | + |
| 397 | + let mut use_regs = bx.const_bool(false); |
| 398 | + |
| 399 | + if num_gp_registers > 0 { |
| 400 | + let max_offset_val = 48u32 - num_gp_registers * 8; |
| 401 | + let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val)); |
| 402 | + use_regs = fits_in_gp; |
| 403 | + } |
| 404 | + |
| 405 | + if num_fp_registers > 0 { |
| 406 | + let max_offset_val = 176u32 - num_fp_registers * 16; |
| 407 | + let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val)); |
| 408 | + use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp }; |
| 409 | + } |
| 410 | + |
| 411 | + let in_reg = bx.append_sibling_block("va_arg.in_reg"); |
| 412 | + let in_mem = bx.append_sibling_block("va_arg.in_mem"); |
| 413 | + let end = bx.append_sibling_block("va_arg.end"); |
| 414 | + |
| 415 | + bx.cond_br(use_regs, in_reg, in_mem); |
| 416 | + |
| 417 | + // Emit code to load the value if it was passed in a register. |
| 418 | + bx.switch_to_block(in_reg); |
| 419 | + |
| 420 | + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with |
| 421 | + // an offset of l->gp_offset and/or l->fp_offset. This may require |
| 422 | + // copying to a temporary location in case the parameter is passed |
| 423 | + // in different register classes or requires an alignment greater |
| 424 | + // than 8 for general purpose registers and 16 for XMM registers. |
| 425 | + // |
| 426 | + // FIXME(llvm): This really results in shameful code when we end up needing to |
| 427 | + // collect arguments from different places; often what should result in a |
| 428 | + // simple assembling of a structure from scattered addresses has many more |
| 429 | + // loads than necessary. Can we clean this up? |
| 430 | + let reg_save_area_ptr = |
| 431 | + bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset)); |
| 432 | + let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi); |
| 433 | + |
| 434 | + let reg_addr = match layout.layout.backend_repr() { |
| 435 | + BackendRepr::Scalar(scalar) => match scalar.primitive() { |
| 436 | + Primitive::Int(_, _) | Primitive::Pointer(_) => { |
| 437 | + let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v); |
| 438 | + |
| 439 | + // Copy into a temporary if the type is more aligned than the register save area. |
| 440 | + copy_to_temporary_if_more_aligned(bx, reg_addr, layout) |
| 441 | + } |
| 442 | + Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v), |
| 443 | + }, |
| 444 | + BackendRepr::ScalarPair(scalar1, scalar2) => { |
| 445 | + let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false); |
| 446 | + let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false); |
| 447 | + |
| 448 | + let align_lo = layout.field(bx.cx, 0).layout.align().abi; |
| 449 | + let align_hi = layout.field(bx.cx, 1).layout.align().abi; |
| 450 | + |
| 451 | + match (scalar1.primitive(), scalar2.primitive()) { |
| 452 | + (Primitive::Float(_), Primitive::Float(_)) => { |
| 453 | + // SSE registers are spaced 16 bytes apart in the register save |
| 454 | + // area, we need to collect the two eightbytes together. |
| 455 | + // The ABI isn't explicit about this, but it seems reasonable |
| 456 | + // to assume that the slots are 16-byte aligned, since the stack is |
| 457 | + // naturally 16-byte aligned and the prologue is expected to store |
| 458 | + // all the SSE registers to the RSA. |
| 459 | + let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v); |
| 460 | + let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16)); |
| 461 | + |
| 462 | + let align = layout.layout.align().abi; |
| 463 | + let tmp = bx.alloca(layout.layout.size(), align); |
| 464 | + |
| 465 | + let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo); |
| 466 | + let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi); |
| 467 | + |
| 468 | + let offset = scalar1.size(bx.cx).align_to(align_hi).bytes(); |
| 469 | + let field0 = tmp; |
| 470 | + let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32)); |
| 471 | + |
| 472 | + bx.store(reg_lo, field0, align); |
| 473 | + bx.store(reg_hi, field1, align); |
| 474 | + |
| 475 | + tmp |
| 476 | + } |
| 477 | + (Primitive::Float(_), _) | (_, Primitive::Float(_)) => { |
| 478 | + let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v); |
| 479 | + let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v); |
| 480 | + |
| 481 | + let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() { |
| 482 | + Primitive::Float(_) => (fp_addr, gp_addr), |
| 483 | + Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr), |
| 484 | + }; |
| 485 | + |
| 486 | + let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi); |
| 487 | + |
| 488 | + let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo); |
| 489 | + let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi); |
| 490 | + |
| 491 | + let offset = scalar1.size(bx.cx).align_to(align_hi).bytes(); |
| 492 | + let field0 = tmp; |
| 493 | + let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32)); |
| 494 | + |
| 495 | + bx.store(reg_lo, field0, align_lo); |
| 496 | + bx.store(reg_hi, field1, align_hi); |
| 497 | + |
| 498 | + tmp |
| 499 | + } |
| 500 | + (_, _) => { |
| 501 | + // Two integer/pointer values are just contiguous in memory. |
| 502 | + let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v); |
| 503 | + |
| 504 | + // Copy into a temporary if the type is more aligned than the register save area. |
| 505 | + copy_to_temporary_if_more_aligned(bx, reg_addr, layout) |
| 506 | + } |
| 507 | + } |
| 508 | + } |
| 509 | + // The Previous match on `BackendRepr` means control flow already escaped. |
| 510 | + BackendRepr::SimdVector { .. } | BackendRepr::Memory { .. } => unreachable!(), |
| 511 | + }; |
| 512 | + |
| 513 | + // AMD64-ABI 3.5.7p5: Step 5. Set: |
| 514 | + // l->gp_offset = l->gp_offset + num_gp * 8 |
| 515 | + if num_gp_registers > 0 { |
| 516 | + let offset = bx.const_u32(num_gp_registers * 8); |
| 517 | + let sum = bx.add(gp_offset_v, offset); |
| 518 | + bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap()); |
| 519 | + } |
| 520 | + |
| 521 | + // l->fp_offset = l->fp_offset + num_fp * 16. |
| 522 | + if num_fp_registers > 0 { |
| 523 | + let offset = bx.const_u32(num_fp_registers * 16); |
| 524 | + let sum = bx.add(fp_offset_v, offset); |
| 525 | + bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap()); |
| 526 | + } |
| 527 | + |
| 528 | + bx.br(end); |
| 529 | + |
| 530 | + bx.switch_to_block(in_mem); |
| 531 | + let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout); |
| 532 | + bx.br(end); |
| 533 | + |
| 534 | + bx.switch_to_block(end); |
| 535 | + |
| 536 | + let val_type = layout.llvm_type(bx); |
| 537 | + let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]); |
| 538 | + |
| 539 | + bx.load(val_type, val_addr, layout.align.abi) |
| 540 | +} |
| 541 | + |
| 542 | +/// Copy into a temporary if the type is more aligned than the register save area. |
| 543 | +fn copy_to_temporary_if_more_aligned<'ll, 'tcx>( |
| 544 | + bx: &mut Builder<'_, 'll, 'tcx>, |
| 545 | + reg_addr: &'ll Value, |
| 546 | + layout: TyAndLayout<'tcx, Ty<'tcx>>, |
| 547 | +) -> &'ll Value { |
| 548 | + if layout.layout.align.abi.bytes() > 8 { |
| 549 | + let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi); |
| 550 | + bx.memcpy( |
| 551 | + tmp, |
| 552 | + layout.layout.align.abi, |
| 553 | + reg_addr, |
| 554 | + Align::from_bytes(8).unwrap(), |
| 555 | + bx.const_u32(layout.layout.size().bytes() as u32), |
| 556 | + MemFlags::empty(), |
| 557 | + ); |
| 558 | + tmp |
| 559 | + } else { |
| 560 | + reg_addr |
| 561 | + } |
| 562 | +} |
| 563 | + |
| 564 | +fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>( |
| 565 | + bx: &mut Builder<'_, 'll, 'tcx>, |
| 566 | + va_list_addr: &'ll Value, |
| 567 | + layout: TyAndLayout<'tcx, Ty<'tcx>>, |
| 568 | +) -> &'ll Value { |
| 569 | + let dl = bx.cx.data_layout(); |
| 570 | + |
| 571 | + let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8)); |
| 572 | + |
| 573 | + let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi); |
| 574 | + // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 |
| 575 | + // byte boundary if alignment needed by type exceeds 8 byte boundary. |
| 576 | + // It isn't stated explicitly in the standard, but in practice we use |
| 577 | + // alignment greater than 16 where necessary. |
| 578 | + if layout.layout.align.abi.bytes() > 8 { |
| 579 | + unreachable!("all instances of VaArgSafe have an alignment <= 8"); |
| 580 | + } |
| 581 | + |
| 582 | + // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. |
| 583 | + let mem_addr = overflow_arg_area_v; |
| 584 | + |
| 585 | + // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: |
| 586 | + // l->overflow_arg_area + sizeof(type). |
| 587 | + // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to |
| 588 | + // an 8 byte boundary. |
| 589 | + let size_in_bytes = layout.layout.size().bytes(); |
| 590 | + let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32); |
| 591 | + let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset); |
| 592 | + bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi); |
| 593 | + |
| 594 | + mem_addr |
| 595 | +} |
| 596 | + |
306 | 597 | fn emit_xtensa_va_arg<'ll, 'tcx>(
|
307 | 598 | bx: &mut Builder<'_, 'll, 'tcx>,
|
308 | 599 | list: OperandRef<'tcx, &'ll Value>,
|
@@ -447,6 +738,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
|
447 | 738 | AllowHigherAlign::No,
|
448 | 739 | )
|
449 | 740 | }
|
| 741 | + // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64. |
| 742 | + "x86_64" => emit_x86_64_sysv64_va_arg(bx, addr, target_ty), |
450 | 743 | "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
|
451 | 744 | // For all other architecture/OS combinations fall back to using
|
452 | 745 | // the LLVM va_arg instruction.
|
|
0 commit comments