diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 99a4bc1b7d6e8..ec4488a3a84a0 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -979,12 +979,18 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { } /// Handle the effect an FFI call might have on the state of allocations. - /// This overapproximates the modifications which external code might make to memory: - /// We set all reachable allocations as initialized, mark all reachable provenances as exposed - /// and overwrite them with `Provenance::WILDCARD`. + /// If `paranoid` is true, overapproximates the modifications which external code might make + /// to memory: We set all reachable allocations as initialized, mark all reachable provenances + /// as exposed and overwrite them with `Provenance::WILDCARD`. Otherwise, it just makes sure + /// that all allocations are properly set up so that we don't leak whatever was in the uninit + /// bytes on FFI call. /// /// The allocations in `ids` are assumed to be already exposed. - pub fn prepare_for_native_call(&mut self, ids: Vec) -> InterpResult<'tcx> { + pub fn prepare_for_native_call( + &mut self, + ids: Vec, + paranoid: bool, + ) -> InterpResult<'tcx> { let mut done = FxHashSet::default(); let mut todo = ids; while let Some(id) = todo.pop() { @@ -999,25 +1005,117 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { continue; } - // Expose all provenances in this allocation, and add them to `todo`. + // Make sure we iterate over everything recursively, preparing the extra alloc info. let alloc = self.get_alloc_raw(id)?; for prov in alloc.provenance().provenances() { - M::expose_provenance(self, prov)?; + if paranoid { + // Expose all provenances in this allocation, and add them to `todo`. + M::expose_provenance(self, prov)?; + } if let Some(id) = prov.get_alloc_id() { todo.push(id); } } + // Also expose the provenance of the interpreter-level allocation, so it can // be read by FFI. The `black_box` is defensive programming as LLVM likes // to (incorrectly) optimize away ptr2int casts whose result is unused. - std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance()); - - // Prepare for possible write from native code if mutable. - if info.mutbl.is_mut() { - self.get_alloc_raw_mut(id)? - .0 - .prepare_for_native_write() - .map_err(|e| e.to_interp_error(id))?; + if paranoid { + std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance()); + // Prepare for possible write from native code if mutable. + if info.mutbl.is_mut() { + self.get_alloc_raw_mut(id)?.0.prepare_for_native_write(); + } + } + } + interp_ok(()) + } + + /// Updates the machine state "as if" the accesses given had been performed. + /// Used only by Miri for FFI, for taking note of events that were intercepted from foreign + /// code and properly (but still conservatively) marking their effects. Remember to call + /// `prepare_for_native_call` with `paranoid` set to false first on the same `AllocId`s, or + /// some writes may be discarded! + /// + /// The allocations in `ids` are assumed to be already exposed. + pub fn apply_accesses( + &mut self, + mut ids: Vec, + reads: Vec>, + writes: Vec>, + ) -> InterpResult<'tcx> { + /// Helper function to avoid some code duplication over range overlaps. + fn get_start_size( + rg: std::ops::Range, + alloc_base: u64, + alloc_size: u64, + ) -> Option<(u64, u64)> { + // A bunch of range bounds nonsense that effectively simplifies to + // "get the starting point of the overlap and the length from there". + // Needs to also account for the allocation being in the middle of the + // range or completely containing it + let signed_start = rg.start.cast_signed() - alloc_base.cast_signed(); + let size_uncapped = if signed_start < 0 { + // If this returns, they don't overlap + (signed_start + (rg.end - rg.start).cast_signed()).try_into().ok()? + } else { + rg.end - rg.start + }; + let start: u64 = signed_start.try_into().unwrap_or(0); + let size = std::cmp::min(size_uncapped, alloc_size - start); + Some((start, size)) + } + + let mut done = FxHashSet::default(); + while let Some(id) = ids.pop() { + if !done.insert(id) { + continue; + } + let info = self.get_alloc_info(id); + + // If there is no data behind this pointer, skip this. + if !matches!(info.kind, AllocKind::LiveData) { + continue; + } + + let alloc_base: u64 = { + // Keep the alloc here so the borrow checker is happy + let alloc = self.get_alloc_raw(id)?; + // No need for black_box trickery since we actually use the address + alloc.get_bytes_unchecked_raw().expose_provenance().try_into().unwrap() + }; + let alloc_size = info.size.bytes(); + + // Find reads which overlap with the current allocation + for rg in &reads { + if let Some((start, size)) = get_start_size(rg.clone(), alloc_base, alloc_size) { + let alloc = self.get_alloc_raw(id)?; + let prov_map = alloc.provenance(); + // Only iterate on the bytes that overlap with the access + for i in start..start + size { + // We can be conservative and only expose provenances actually read + if let Some(prov) = prov_map.get(Size::from_bytes(1), self) + && rg.contains(&(alloc_base + i)) + { + M::expose_provenance(self, prov)?; + if let Some(id) = prov.get_alloc_id() { + ids.push(id); + } + } + } + } + } + + // Then do the same thing for writes, marking down that a write happened + for rg in &writes { + if let Some((start, size)) = get_start_size(rg.clone(), alloc_base, alloc_size) + && self.get_alloc_mutability(id)?.is_mut() + { + let alloc_mut = self.get_alloc_raw_mut(id)?.0; + let range = + AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(size) }; + alloc_mut.mark_foreign_write(range); + } } } interp_ok(()) diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index dd55d039794f7..e5b9f21b1436d 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -799,7 +799,7 @@ impl Allocation /// Initialize all previously uninitialized bytes in the entire allocation, and set /// provenance of everything to `Wildcard`. Before calling this, make sure all /// provenance in this allocation is exposed! - pub fn prepare_for_native_write(&mut self) -> AllocResult { + pub fn prepare_for_native_write(&mut self) { let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) }; // Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be. for chunk in self.init_mask.range_as_init_chunks(full_range) { @@ -809,18 +809,23 @@ impl Allocation uninit_bytes.fill(0); } } + self.mark_foreign_write(full_range); + } + + /// Initialise previously uninitialised bytes in the given range, and set provenance of + /// everything in it to `Wildcard`. Before calling this, make sure all provenance in this + /// range is exposed! + pub fn mark_foreign_write(&mut self, range: AllocRange) { // Mark everything as initialized now. - self.mark_init(full_range, true); + self.mark_init(range, true); - // Set provenance of all bytes to wildcard. - self.provenance.write_wildcards(self.len()); + // Set provenance of affected bytes to wildcard. + self.provenance.write_wildcards(range); // Also expose the provenance of the interpreter-level allocation, so it can // be written by FFI. The `black_box` is defensive programming as LLVM likes // to (incorrectly) optimize away ptr2int casts whose result is unused. std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance()); - - Ok(()) } /// Remove all provenance in the given memory range. diff --git a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs index c9525df1f7940..a8f2a33441ff5 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs @@ -213,10 +213,11 @@ impl ProvenanceMap { Ok(()) } - /// Overwrites all provenance in the allocation with wildcard provenance. + /// Overwrites all provenance in the specified range within the allocation + /// with wildcard provenance. /// /// Provided for usage in Miri and panics otherwise. - pub fn write_wildcards(&mut self, alloc_size: usize) { + pub fn write_wildcards(&mut self, range: AllocRange) { assert!( Prov::OFFSET_IS_ADDR, "writing wildcard provenance is not supported when `OFFSET_IS_ADDR` is false" @@ -225,9 +226,8 @@ impl ProvenanceMap { // Remove all pointer provenances, then write wildcards into the whole byte range. self.ptrs.clear(); - let last = Size::from_bytes(alloc_size); let bytes = self.bytes.get_or_insert_with(Box::default); - for offset in Size::ZERO..last { + for offset in range.start..range.start + range.size { bytes.insert(offset, wildcard); } } diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs index d2977a55e465f..bc66ba452b1b7 100644 --- a/src/tools/miri/src/alloc_addresses/mod.rs +++ b/src/tools/miri/src/alloc_addresses/mod.rs @@ -471,7 +471,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // for the search within `prepare_for_native_call`. let exposed: Vec = this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect(); - this.prepare_for_native_call(exposed) + this.prepare_for_native_call(exposed, true) } }