@@ -244,61 +244,62 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
244
244
245
245
acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 .);
246
246
247
- vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
247
+ vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
248
+
249
+ /*
250
+ * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
251
+ * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
252
+ * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
253
+ * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
254
+ * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
255
+ * if someday we have architectures with widely varying connectivity between different layers in a stack.
256
+ */
248
257
249
258
/*
250
259
* To calculate the accumulative number of inter-die connections we first need to get the number of
251
- * inter-die connection per loaction . To be able to work for the cases that RR Graph is read instead
252
- * of being made from the architecture file, we calculate this number by iterating over RR graph. Once
260
+ * inter-die connection per location . To be able to work for the cases that RR Graph is read instead
261
+ * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
253
262
* tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
254
263
* we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
255
- * inter-die connections by adding up the number of inter-die block at that location + the accumulative
256
- * for the block below and left to it. Then, since the accumulative number of inter-die connection to
264
+ * inter-die connections by adding up the number of inter-die block at that location + the accumulation
265
+ * for the block below and left to it. Then, since the accumulated number of inter-die connection to
257
266
* the block on the lower left connection of the block is added twice, that part needs to be removed.
258
267
*/
259
268
for (const auto & src_rr_node : rr_graph.nodes ()) {
260
- for (const auto & rr_edge_idx : rr_graph.configurable_edges (src_rr_node)) {
261
- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
262
- if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
263
- // We assume that the nodes driving the inter-layer connection or being driven by it
264
- // are not streched across multiple tiles
265
- int src_x = rr_graph.node_xhigh (src_rr_node);
266
- int src_y = rr_graph.node_yhigh (src_rr_node);
267
- VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_ylow (src_rr_node) == src_y);
268
-
269
- tile_num_inter_die_conn[src_x][src_y]++;
270
- }
271
- }
272
-
273
- for (const auto & rr_edge_idx : rr_graph.non_configurable_edges (src_rr_node)) {
274
- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
275
- if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
276
- int src_x = rr_graph.node_xhigh (src_rr_node);
277
- VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_xlow (src_rr_node) == src_x);
278
- int src_y = rr_graph.node_yhigh (src_rr_node);
279
- VTR_ASSERT (rr_graph.node_ylow (src_rr_node) == src_y && rr_graph.node_ylow (src_rr_node) == src_y);
280
- tile_num_inter_die_conn[src_x][src_y]++;
269
+ for (auto edge_range: {rr_graph.configurable_edges (src_rr_node), rr_graph.non_configurable_edges (src_rr_node)}) {
270
+ for (const auto & rr_edge_idx : edge_range) {
271
+ const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
272
+ if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
273
+ // We assume that the nodes driving the inter-layer connection or being driven by it
274
+ // are not stretched across multiple tiles
275
+ int src_x = rr_graph.node_xhigh (src_rr_node);
276
+ int src_y = rr_graph.node_yhigh (src_rr_node);
277
+ VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_ylow (src_rr_node) == src_y);
278
+
279
+ tile_num_inter_die_conn[src_x][src_y]++;
280
+ }
281
281
}
282
282
}
283
283
}
284
284
285
+ // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y).
285
286
acc_tile_num_inter_die_conn_[0 ][0 ] = tile_num_inter_die_conn[0 ][0 ];
286
287
// Initialize the first row and column
287
288
for (size_t x = 1 ; x < device_ctx.grid .width (); x++) {
288
- acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] + \
289
+ acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] +
289
290
tile_num_inter_die_conn[x][0 ];
290
291
}
291
292
292
293
for (size_t y = 1 ; y < device_ctx.grid .height (); y++) {
293
- acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] + \
294
+ acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] +
294
295
tile_num_inter_die_conn[0 ][y];
295
296
}
296
297
297
298
for (size_t x_high = 1 ; x_high < device_ctx.grid .width (); x_high++) {
298
299
for (size_t y_high = 1 ; y_high < device_ctx.grid .height (); y_high++) {
299
- acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] + \
300
- acc_tile_num_inter_die_conn_[x_high][y_high-1 ] + \
301
- tile_num_inter_die_conn[x_high][y_high] - \
300
+ acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] +
301
+ acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
302
+ tile_num_inter_die_conn[x_high][y_high] -
302
303
acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
303
304
}
304
305
}
@@ -1604,15 +1605,15 @@ float NetCostHandler::get_chanz_cost_factor(const t_bb& bounding_box) {
1604
1605
if (x_low == 0 && y_low == 0 ) {
1605
1606
num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high];
1606
1607
} else if (x_low == 0 ) {
1607
- num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] - \
1608
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] -
1608
1609
acc_tile_num_inter_die_conn_[x_high][y_low-1 ];
1609
1610
} else if (y_low == 0 ) {
1610
- num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] - \
1611
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] -
1611
1612
acc_tile_num_inter_die_conn_[x_low-1 ][y_high];
1612
1613
} else {
1613
- num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] - \
1614
- acc_tile_num_inter_die_conn_[x_low-1 ][y_high] - \
1615
- acc_tile_num_inter_die_conn_[x_high][y_low-1 ] + \
1614
+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] -
1615
+ acc_tile_num_inter_die_conn_[x_low-1 ][y_high] -
1616
+ acc_tile_num_inter_die_conn_[x_high][y_low-1 ] +
1616
1617
acc_tile_num_inter_die_conn_[x_low-1 ][y_low-1 ];
1617
1618
}
1618
1619
0 commit comments