Skip to content

Commit d7ea38a

Browse files
authored
Merge branch 'master' into ueqri-enhanced-heap-for-connection-router
2 parents 93d051d + 387f187 commit d7ea38a

File tree

18 files changed

+127
-42
lines changed

18 files changed

+127
-42
lines changed

doc/src/vpr/command_line_usage.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,14 @@ General Options
200200

201201
.. option:: --device <string>
202202

203-
Specifies which device layout/floorplan to use from the architecture file.
203+
Specifies which device layout/floorplan to use from the architecture file. Valid values are:
204204

205-
``auto`` uses the smallest device satisfying the circuit's resource requirements.
206-
Other values are assumed to be the names of device layouts defined in the :ref:`arch_grid_layout` section of the architecture file.
205+
* ``auto`` VPR uses the smallest device satisfying the circuit's resource requirements. This option will use the ``<auto_layout>`` tag if it is present in the architecture file in order to construct the smallest FPGA that has sufficient resources to fit the design. If the ``<auto_layout>`` tag is not present, the ``auto`` option chooses the smallest device amongst all the architecture file's ``<fixed_layout>`` specifications into which the design can be packed.
206+
* Any string matching ``name`` attribute of a device layout defined with a ``<fixed_layout>`` tag in the :ref:`arch_grid_layout` section of the architecture file.
207207

208-
.. note:: If the architecture contains both ``<auto_layout>`` and ``<fixed_layout>`` specifications, specifying an ``auto`` device will use the ``<auto_layout>``.
208+
If the value specified is neither ``auto`` nor matches the ``name`` attribute value of a ``<fixed_layout>`` tag, VPR issues an error.
209+
210+
.. note:: If the only layout in the architecture file is a single device specified using ``<fixed_layout>``, it is recommended to always specify the ``--device`` option; this prevents the value ``--device auto`` from interfering with operations supported only for ``<fixed_layout>`` grids.
209211

210212
**Default:** ``auto``
211213

libs/libarchfpga/src/physical_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ class t_pb_graph_pin {
13821382
float tco_max = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the maximum clock to output time */
13831383
t_pb_graph_pin* associated_clock_pin = nullptr; /* For sequentail elements, the associated clock */
13841384

1385-
/* This member is used when flat-routing and has_choking_spot are enabled.
1385+
/* This member is used when flat-routing and router_opt_choke_points are enabled.
13861386
* It is used to identify choke points.
13871387
* This is only valid for IPINs, and it only contain the pins that are reachable to the pin by a forwarding path.
13881388
* It doesn't take into account feed-back connection.

vpr/src/base/SetupVPR.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ void SetupVPR(const t_options* options,
318318
vtr::ScopedStartFinishTimer timer("Allocate intra-cluster resources");
319319
// The following two functions should be called when the data structured related to t_pb_graph_node, t_pb_type,
320320
// and t_pb_graph_edge are initialized
321-
alloc_and_load_intra_cluster_resources(routerOpts->has_choking_spot);
321+
alloc_and_load_intra_cluster_resources(routerOpts->has_choke_point);
322322
add_intra_tile_switches();
323323
}
324324

@@ -510,7 +510,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
510510
RouterOpts->max_logged_overused_rr_nodes = Options.max_logged_overused_rr_nodes;
511511
RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report;
512512
RouterOpts->flat_routing = Options.flat_routing;
513-
RouterOpts->has_choking_spot = Options.has_choking_spot;
513+
RouterOpts->has_choke_point = Options.router_opt_choke_points;
514514
RouterOpts->custom_3d_sb_fanin_fanout = Options.custom_3d_sb_fanin_fanout;
515515
RouterOpts->with_timing_analysis = Options.timing_analysis;
516516
}

vpr/src/base/ShowSetup.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,11 +255,11 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
255255
VTR_LOG("false\n");
256256
}
257257

258-
VTR_LOG("RouterOpts.has_choking_spot: ");
259-
if (RouterOpts.has_choking_spot) {
260-
VTR_LOG("true\n");
258+
VTR_LOG("RouterOpts.choke_points: ");
259+
if (RouterOpts.has_choke_point) {
260+
VTR_LOG("on\n");
261261
} else {
262-
VTR_LOG("false\n");
262+
VTR_LOG("off\n");
263263
}
264264

265265
VTR_ASSERT(GLOBAL == RouterOpts.route_type || DETAILED == RouterOpts.route_type);

vpr/src/base/place_and_route.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
398398

399399
init_route_structs(router_net_list,
400400
router_opts.bb_factor,
401-
router_opts.has_choking_spot,
401+
router_opts.has_choke_point,
402402
is_flat);
403403

404404
restore_routing(best_routing,

vpr/src/base/read_options.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2487,13 +2487,13 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
24872487
.default_value("off")
24882488
.show_in(argparse::ShowIn::HELP_ONLY);
24892489

2490-
route_grp.add_argument(args.has_choking_spot, "--has_choking_spot")
2490+
route_grp.add_argument<bool, ParseOnOff>(args.router_opt_choke_points, "--router_opt_choke_points")
24912491
.help(
24922492
""
2493-
"Some FPGA architectures, due to the lack of full connectivity inside the cluster, may have"
2494-
" a choking spot inside the cluster. Thus, if routing doesn't converge, enabling this option may"
2495-
" help it.")
2496-
.default_value("false")
2493+
"Some FPGA architectures with limited fan-out options within a cluster (e.g. fracturable LUTs with shared pins) do"
2494+
" not converge well in routing unless these fan-out choke points are discovered and optimized for during net routing."
2495+
" This option helps router convergence for such architectures.")
2496+
.default_value("on")
24972497
.show_in(argparse::ShowIn::HELP_ONLY);
24982498

24992499

vpr/src/base/read_options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ struct t_options {
218218
argparse::ArgValue<int> reorder_rr_graph_nodes_threshold;
219219
argparse::ArgValue<int> reorder_rr_graph_nodes_seed;
220220
argparse::ArgValue<bool> flat_routing;
221-
argparse::ArgValue<bool> has_choking_spot;
221+
argparse::ArgValue<bool> router_opt_choke_points;
222222
argparse::ArgValue<int> route_verbosity;
223223
argparse::ArgValue<int> custom_3d_sb_fanin_fanout;
224224

vpr/src/base/read_route.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ bool read_route(const char* route_file, const t_router_opts& router_opts, bool v
107107
const Netlist<>& router_net_list = (flat_router) ? (const Netlist<>&)g_vpr_ctx.atom().nlist : (const Netlist<>&)g_vpr_ctx.clustering().clb_nlist;
108108
init_route_structs(router_net_list,
109109
router_opts.bb_factor,
110-
router_opts.has_choking_spot,
110+
router_opts.has_choke_point,
111111
flat_router);
112112

113113
/*Check dimensions*/

vpr/src/base/vpr_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,7 @@ struct t_router_opts {
13391339
bool generate_rr_node_overuse_report;
13401340

13411341
bool flat_routing;
1342-
bool has_choking_spot;
1342+
bool has_choke_point;
13431343

13441344
int custom_3d_sb_fanin_fanout = 1;
13451345

vpr/src/place/net_cost_handler.cpp

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@
3131
#include "placer_state.h"
3232
#include "move_utils.h"
3333
#include "place_timing_update.h"
34-
#include "noc_place_utils.h"
3534
#include "vtr_math.h"
35+
#include "vtr_ndmatrix.h"
36+
#include "vtr_ndoffsetmatrix.h"
3637

3738
#include <array>
3839

@@ -53,9 +54,6 @@ constexpr std::array<float, MAX_FANOUT_CROSSING_COUNT> cross_count = {1.0000, 1.
5354
2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410,
5455
2.7671, 2.7933};
5556

56-
57-
58-
5957
/**
6058
* @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
6159
* @param pin_old_loc Old location of the moving pin
@@ -229,6 +227,70 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
229227
chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp);
230228
}
231229
}
230+
231+
if (device_ctx.grid.get_num_layers() > 1) {
232+
alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp);
233+
}
234+
}
235+
236+
void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp) {
237+
const auto& device_ctx = g_vpr_ctx.device();
238+
const auto& rr_graph = device_ctx.rr_graph;
239+
240+
const size_t grid_height = device_ctx.grid.height();
241+
const size_t grid_width = device_ctx.grid.width();
242+
243+
244+
chanz_place_cost_fac_ = vtr::NdMatrix<float, 4>({grid_width, grid_height, grid_width, grid_height}, 0.);
245+
246+
vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.);
247+
248+
for (const auto& src_rr_node : rr_graph.nodes()) {
249+
for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) {
250+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
251+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
252+
// We assume that the nodes driving the inter-layer connection or being driven by it
253+
// are not streched across multiple tiles
254+
int src_x = rr_graph.node_xhigh(src_rr_node);
255+
int src_y = rr_graph.node_yhigh(src_rr_node);
256+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y);
257+
258+
tile_num_inter_die_conn[src_x][src_y]++;
259+
}
260+
}
261+
262+
for (const auto& rr_edge_idx : rr_graph.non_configurable_edges(src_rr_node)) {
263+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
264+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
265+
int src_x = rr_graph.node_xhigh(src_rr_node);
266+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_xlow(src_rr_node) == src_x);
267+
int src_y = rr_graph.node_yhigh(src_rr_node);
268+
VTR_ASSERT(rr_graph.node_ylow(src_rr_node) == src_y && rr_graph.node_ylow(src_rr_node) == src_y);
269+
tile_num_inter_die_conn[src_x][src_y]++;
270+
}
271+
}
272+
}
273+
274+
for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) {
275+
for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) {
276+
for (int x_low = 0; x_low <= x_high; x_low++) {
277+
for (int y_low = 0; y_low <= y_high; y_low++) {
278+
int num_inter_die_conn = 0;
279+
for (int x = x_low; x <= x_high; x++) {
280+
for (int y = y_low; y <= y_high; y++) {
281+
num_inter_die_conn += tile_num_inter_die_conn[x][y];
282+
}
283+
}
284+
int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1);
285+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast<float>(num_inter_die_conn);
286+
287+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow(
288+
(double)chanz_place_cost_fac_[x_high][y_high][x_low][y_low],
289+
(double)place_cost_exp);
290+
}
291+
}
292+
}
293+
}
232294
}
233295

234296
double NetCostHandler::comp_bb_cost(e_cost_methods method) {
@@ -1395,6 +1457,8 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
13951457

13961458
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move().bb_coords[net_id];
13971459

1460+
const bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1);
1461+
13981462
double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size());
13991463

14001464
/* Could insert a check for xmin == xmax. In that case, assume *
@@ -1413,6 +1477,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14131477
double ncost;
14141478
ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1];
14151479
ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1];
1480+
if (is_multi_layer) {
1481+
ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin];
1482+
}
14161483

14171484
return ncost;
14181485
}

0 commit comments

Comments
 (0)