Skip to content

Commit 4ab9d39

Browse files
committed
fix: memory access dependency computation correctly remembers writes
1 parent abad0c1 commit 4ab9d39

5 files changed

Lines changed: 91 additions & 93 deletions

quil-rs/src/program/scheduling/graph.rs

Lines changed: 83 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,9 @@ impl std::fmt::Display for ScheduledGraphNode {
7070
}
7171
}
7272

73-
/// A MemoryAccessQueue expresses the current state of memory accessors at the time of
74-
/// an instruction's execution.
75-
///
76-
/// Quil uses a multiple-reader, single-writer concurrency model for memory access.
77-
#[derive(Debug, Default, Clone)]
78-
struct MemoryAccessQueue {
79-
pending_capture: Option<ScheduledGraphNode>,
80-
pending_reads: Vec<ScheduledGraphNode>,
81-
pending_write: Option<ScheduledGraphNode>,
82-
}
83-
8473
/// A MemoryAccessDependency expresses a dependency that one node has on another to complete
8574
/// some type of memory access prior to the dependent node's execution.
86-
#[derive(Clone, Debug)]
75+
#[derive(Clone, Copy, Debug)]
8776
struct MemoryAccessDependency {
8877
/// What type of memory access must complete prior to the downstream instruction.
8978
// NOTE: This must remain the first field for ordering to work as expected.
@@ -107,76 +96,70 @@ pub enum ExecutionDependency {
10796
StableOrdering,
10897
}
10998

110-
/// A data structure to be used in the serializing of access to a memory region.
111-
/// This utility helps guarantee strong consistency in a single-writer, multiple-reader model.
99+
/// A [`MemoryAccessQueue`] keeps track of which reads and writes are currently outstanding for a
100+
/// given memory region.
101+
///
102+
/// Quil memory is sequentially consistent; all writes must happen-before subsequent reads, and all
103+
/// reads must happen-before subsequent writes. There is no dependency between reads.
104+
///
105+
/// For the purposes of linearizing memory accesses, there is no difference between
106+
/// [write][MemoryAccessType::Write] and [capture][MemoryAccessType::Capture] accesses; both are
107+
/// considered writes, although they are distinguished.
108+
///
109+
/// This queue keeps track of the most recent write (or capture) access, if any, as well as all
110+
/// reads that have been performed since that write. Every memory access
111+
/// [registered][Self::access_node_and_get_dependencies] after that write access incurs a
112+
/// [dependency][MemoryAccessDependency] on it. When a write (or capture) access is performed, both
113+
/// the prior most recent write and all outstanding reads are registered as a dependency on this new
114+
/// write; this new write becomes the most recent write, with no reads that have been performed
115+
/// since it.
116+
///
117+
/// [read]: MemoryAccessType::Read
118+
/// [write]: MemoryAccessType::Write
119+
/// [writes]: MemoryAccessType::Write
120+
/// [capture]: MemoryAccessType::Capture
121+
/// [captures]: MemoryAccessType::Capture
122+
#[derive(Debug, Default, Clone)]
123+
struct MemoryAccessQueue {
124+
/// The most recent write or capture access, if any.
125+
write: Option<MemoryAccessDependency>,
126+
127+
/// All reads that that have occurred after [`Self::write`] and before any other write or
128+
/// capture access.
129+
reads: Vec<ScheduledGraphNode>,
130+
}
131+
112132
impl MemoryAccessQueue {
113-
/// Register that a node wants access of the given type, while returning which accesses block
114-
/// the requested access.
115-
///
116-
/// Captures and writes may not happen concurrently with any other access; multiple reads may
117-
/// occur concurrently.
133+
/// Register that an access of the provided [type][MemoryAccessType] is being performed to the
134+
/// provided [graph node][ScheduledGraphNode], returning [all the memory accesses that must have
135+
/// completed before this one can happen][MemoryAccessDependency].
118136
///
119-
/// Thus, if the caller requests Read access, and there are no pending captures or writes, then
120-
/// there will be no blocking nodes.
121-
///
122-
/// However, if there is a pending capture or write, that dependency will be expressed in the
123-
/// return value.
124-
///
125-
/// If the caller requests a capture or a write, then all pending calls - reads, writes, and captures -
126-
/// will be returned as "blocking" the capture or write.
127-
///
128-
/// A capture or write remains blocking until the next capture or write.
129-
pub fn get_blocking_nodes(
137+
/// To ensure sequential consistency, neither [writes][] nor [captures][] may happen
138+
/// concurrently with any other accesses; however, multiple reads may happen concurrently.
139+
pub fn access_node_and_get_dependencies(
130140
&mut self,
131141
node_id: ScheduledGraphNode,
132-
access: &MemoryAccessType,
142+
access_type: MemoryAccessType,
133143
) -> Vec<MemoryAccessDependency> {
134-
use MemoryAccessType::*;
135-
136-
let mut result = vec![];
137-
if let Some(node_id) = self.pending_write {
138-
result.push(MemoryAccessDependency {
139-
node_id,
140-
access_type: Write,
141-
});
142-
}
143-
if let Some(node_id) = self.pending_capture {
144-
result.push(MemoryAccessDependency {
145-
node_id,
146-
access_type: Capture,
147-
});
148-
}
149-
150-
self.pending_capture = None;
151-
self.pending_write = None;
152-
153-
match access {
154-
Read => {
155-
self.pending_reads.push(node_id);
156-
}
157-
Capture => {
158-
for upstream_node_id in self.pending_reads.iter() {
159-
result.push(MemoryAccessDependency {
160-
node_id: *upstream_node_id,
161-
access_type: Read,
162-
});
163-
}
164-
165-
self.pending_reads = vec![];
166-
self.pending_capture = Some(node_id);
167-
}
168-
169-
Write => {
170-
for upstream_node_id in self.pending_reads.iter() {
171-
result.push(MemoryAccessDependency {
172-
node_id: *upstream_node_id,
173-
access_type: Read,
174-
});
175-
}
176-
177-
self.pending_reads = vec![];
178-
self.pending_write = Some(node_id);
144+
// Writes must happen before both subsequent reads *and* subsequent writes, so we
145+
// unconditionally register a dependency on the preceding write.
146+
let mut result: Vec<_> = self.write.into_iter().collect();
147+
148+
match access_type {
149+
// If we're doing a write now, then all outstanding reads must finish first.
150+
MemoryAccessType::Write | MemoryAccessType::Capture => {
151+
result.extend(self.reads.drain(..).map(|read_id| MemoryAccessDependency {
152+
access_type: MemoryAccessType::Read,
153+
node_id: read_id,
154+
}));
155+
156+
self.write = Some(MemoryAccessDependency {
157+
access_type,
158+
node_id,
159+
})
179160
}
161+
// Otherwise, save this read so we can serialize it with respect to the next write.
162+
MemoryAccessType::Read => self.reads.push(node_id),
180163
}
181164

182165
result
@@ -313,8 +296,16 @@ impl<'a> ScheduledBasicBlock<'a> {
313296
let mut last_timed_instruction_by_frame: HashMap<FrameIdentifier, PreviousNodes> =
314297
HashMap::new();
315298

316-
// Store memory access reads and writes. Key is memory region name.
317-
// NOTE: this may be refined to serialize by memory region offset rather than by entire region.
299+
// Keep track of memory accesses (reads/writes) to each region, so that they can be
300+
// serialized. This serialization is done by memory region; that is, writes to `region[0]`
301+
// are not allowed to be concurrent with writes to `region[1]`.
302+
//
303+
// This could be refined to serialize writes to each memory region *and offset*, so writes
304+
// to `region[0]` could happen in parallel with writes to `region[1]`, although this would
305+
// require further refinement of [`Instruction::get_memory_accesses`] and the
306+
// [`crate::program::memory::MemoryAccesses`] type. In particular, we would need to capture
307+
// accesses that read from/write to potentially an entire region, such as `LOAD` and
308+
// `STORE`, as well as accesses that only access a statically known index.
318309
let mut pending_memory_access: HashMap<String, MemoryAccessQueue> = HashMap::new();
319310

320311
let extern_signature_map = ExternSignatureMap::try_from(program.extern_pragma_map.clone())
@@ -350,32 +341,33 @@ impl<'a> ScheduledBasicBlock<'a> {
350341
(accesses.writes, MemoryAccessType::Write),
351342
(accesses.captures, MemoryAccessType::Capture),
352343
]
353-
.iter()
344+
.into_iter()
354345
.flat_map(|(regions, access_type)| {
355346
regions
356-
.iter()
347+
.into_iter()
357348
.flat_map(|region| {
358349
pending_memory_access
359-
.entry(region.clone())
350+
.entry(region)
360351
.or_default()
361352
// NOTE: This mutates the underlying `MemoryAccessQueue` by registering
362353
// the instruction node.
363-
.get_blocking_nodes(node, access_type)
354+
.access_node_and_get_dependencies(node, access_type)
364355
})
365-
// Collecting is necessary to avoid "captured variable cannot escape FnMut closure body" errors
356+
// We have to `collect` into a `Vec` to finish our accesses to
357+
// `pending_memory_access`.
366358
.collect::<Vec<_>>()
367359
})
368360
.collect::<Vec<_>>();
369-
let has_memory_dependencies = !memory_dependencies.is_empty();
361+
let no_memory_dependencies = memory_dependencies.is_empty();
370362
for memory_dependency in memory_dependencies {
371363
// Test to make sure that no instructions depend directly on themselves
372364
if memory_dependency.node_id != node {
373365
let execution_dependency =
374366
ExecutionDependency::AwaitMemoryAccess(memory_dependency.access_type);
375367
add_dependency!(graph, memory_dependency.node_id => node, execution_dependency);
376-
// This memory dependency now has an outgoing edge, so it is no longer a trailing classical
377-
// instruction. If the memory dependency is not a classical instruction, this
378-
// has no effect.
368+
// This memory dependency now has an outgoing edge, so it is no longer a
369+
// trailing classical instruction. If the memory dependency is not a classical
370+
// instruction, this has no effect.
379371
trailing_classical_instructions.remove(&memory_dependency.node_id);
380372
}
381373
}
@@ -385,7 +377,7 @@ impl<'a> ScheduledBasicBlock<'a> {
385377
InstructionRole::ClassicalCompute => {
386378
// If this instruction has no memory dependencies, it is a leading classical
387379
// instruction and needs an incoming edge from the block start.
388-
if !has_memory_dependencies {
380+
if no_memory_dependencies {
389381
add_dependency!(graph, ScheduledGraphNode::BlockStart => node, ExecutionDependency::StableOrdering);
390382
}
391383
trailing_classical_instructions.insert(node);
@@ -801,8 +793,9 @@ MUL params2[0] 2 # reads and writes params2
801793
}
802794

803795
// Because memory reading and writing dependencies also apply to RfControl instructions, we
804-
// expect edges from the first load to the first shift-phase (0 -> 1), the first shift-phase
805-
// to the second load (1 -> 2), and the second load to the second shift-phase (2 -> 3).
796+
// expect edges from the first load to the first shift-phase (0 -> 1) as well as to the the
797+
// second load (0 -> 2), from the first shift-phase to the second load (1 -> 2), and from the
798+
// second load to the second shift-phase (2 -> 3).
806799
build_dot_format_snapshot_test_case! {
807800
quantum_write_parameterized_operations,
808801
r#"

quil-rs/src/program/scheduling/snapshots/quil_rs__program__scheduling__graph__graphviz_dot_tests__memory_dependency_array.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ timing"];
2525
"block_0_1" -> "block_0_2" [label="await write"];
2626
"block_0_2" [shape=rectangle, label="[2] MOVE phase[1] 0.1"];
2727
"block_0_2" -> "block_0_3" [label="await write"];
28+
"block_0_2" -> "block_0_4" [label="await write"];
2829
"block_0_3" [shape=rectangle, label="[3] SET-PHASE 0 \"frame0\" (2*pi)*phase[0]"];
2930
"block_0_3" -> "block_0_5" [label="ordering
3031
timing"];

quil-rs/src/program/scheduling/snapshots/quil_rs__program__scheduling__graph__graphviz_dot_tests__memory_dependency_one_variable.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ timing"];
2323
"block_0_0" [shape=rectangle, label="[0] DELAY 2e-8"];
2424
"block_0_1" [shape=rectangle, label="[1] MOVE phase[0] 0.1"];
2525
"block_0_1" -> "block_0_2" [label="await write"];
26+
"block_0_1" -> "block_0_3" [label="await write"];
2627
"block_0_2" [shape=rectangle, label="[2] SET-PHASE 0 \"frame0\" (2*pi)*phase[0]"];
2728
"block_0_2" -> "block_0_4" [label="ordering
2829
timing"];

quil-rs/src/program/scheduling/snapshots/quil_rs__program__scheduling__graph__graphviz_dot_tests__quantum_write_parameterized_operations.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ timing"];
1616
timing"];
1717
"block_0_0" [shape=rectangle, label="[0] LOAD params2[0] params1 integers[0]"];
1818
"block_0_0" -> "block_0_1" [label="await write"];
19+
"block_0_0" -> "block_0_2" [label="await write"];
1920
"block_0_1" [shape=rectangle, label="[1] SHIFT-PHASE 0 \"rf\" params2[0]"];
2021
"block_0_1" -> "block_0_2" [label="await read"];
2122
"block_0_1" -> "block_0_end" [label="ordering

quil-rs/src/program/scheduling/snapshots/quil_rs__program__scheduling__graphviz_dot__tests__graph__parametric_pulses_using_capture_results.snap

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
source: quil-rs/src/program/graphviz_dot.rs
2+
source: quil-rs/src/program/scheduling/graphviz_dot.rs
33
expression: dot_format
44
---
55
digraph {
@@ -23,7 +23,9 @@ timing"];
2323
"block_0_0" -> "block_0_1" [label="await capture
2424
ordering
2525
timing"];
26-
"block_0_0" -> "block_0_3" [label="ordering
26+
"block_0_0" -> "block_0_2" [label="await capture"];
27+
"block_0_0" -> "block_0_3" [label="await capture
28+
ordering
2729
timing"];
2830
"block_0_0" -> "block_0_end" [label="ordering
2931
timing"];
@@ -41,6 +43,7 @@ timing"];
4143
"block_0_3" -> "block_0_4" [label="await capture
4244
ordering
4345
timing"];
46+
"block_0_3" -> "block_0_5" [label="await capture"];
4447
"block_0_3" -> "block_0_end" [label="ordering
4548
timing"];
4649
"block_0_4" [shape=rectangle, label="[4] NONBLOCKING PULSE 0 \"rf\" test(a: ro[0])"];
@@ -52,4 +55,3 @@ timing"];
5255
"block_0_end" [shape=circle, label="end"];
5356
}
5457
}
55-

0 commit comments

Comments
 (0)