From 9b657d1f420ab27c0594650322b3a18f7eee3d30 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Wed, 16 Oct 2024 12:14:35 -0400 Subject: [PATCH 1/7] [vpr][place] add chanz_place_cost_fac_ --- vpr/src/place/net_cost_handler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 65fab00afc2..c5780097a33 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -195,6 +195,7 @@ class NetCostHandler { */ vtr::NdOffsetMatrix chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1] vtr::NdOffsetMatrix chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1] + vtr::NdOffsetMatrix chanz_place_cost_fac_; // [-1...device_ctx.grid.get_num_layers()-1] private: From 89823d7ef7a2bd488d1387a1e8e3d673952a29ac Mon Sep 17 00:00:00 2001 From: amin1377 Date: Wed, 16 Oct 2024 12:22:11 -0400 Subject: [PATCH 2/7] [vpr][place] use chanz_place_cost_fac_ when calculating cost --- vpr/src/place/net_cost_handler.cpp | 5 +++++ vpr/src/place/net_cost_handler.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 539e837090c..be6f7bfb2f9 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1395,6 +1395,8 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move().bb_coords[net_id]; + const bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1); + double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size()); /* Could insert a check for xmin == xmax. In that case, assume * @@ -1413,6 +1415,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { double ncost; ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1]; ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1]; + if (is_multi_layer) { + ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin]; + } return ncost; } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index c5780097a33..984bda6b937 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -195,7 +195,7 @@ class NetCostHandler { */ vtr::NdOffsetMatrix chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1] vtr::NdOffsetMatrix chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1] - vtr::NdOffsetMatrix chanz_place_cost_fac_; // [-1...device_ctx.grid.get_num_layers()-1] + vtr::NdOffsetMatrix chanz_place_cost_fac_; // [-1...device_ctx.grid.get_num_layers()-1] private: From a379afb900175489f6a83c9cd260549ba9c5e0de Mon Sep 17 00:00:00 2001 From: amin1377 Date: Wed, 16 Oct 2024 14:15:03 -0400 Subject: [PATCH 3/7] [vpr][place] add alloc_and_load_for_fast_vertical_cost_update to calculate chanz_place_cost_fac_ --- vpr/src/place/net_cost_handler.cpp | 56 ++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index be6f7bfb2f9..04ef7aaa995 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -55,6 +55,8 @@ constexpr std::array cross_count = {1.0000, 1. +static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, + vtr::NdOffsetMatrix& chanz_place_cost_fac); /** * @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id" @@ -229,6 +231,60 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp); } } + + alloc_and_load_for_fast_vertical_cost_update(place_cost_exp, chanz_place_cost_fac_); +} + +static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, vtr::NdOffsetMatrix& chanz_place_cost_fac) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + vtr::NdMatrix tile_num_inter_die_conn({device_ctx.grid.width(), + device_ctx.grid.height()}, 0); + + for (const auto& src_rr_node : rr_graph.nodes()) { + for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) { + const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx); + if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) { + int src_x = rr_graph.node_xhigh(src_rr_node); + int src_y = rr_graph.node_yhigh(src_rr_node); + VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y); + + tile_num_inter_die_conn[src_x][src_y]++; + } + } + + for (const auto& rr_edge_idx : rr_graph.non_configurable_edges(src_rr_node)) { + const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx); + if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) { + int src_x = rr_graph.node_xhigh(src_rr_node); + VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_xlow(src_rr_node) == src_x); + int src_y = rr_graph.node_yhigh(src_rr_node); + VTR_ASSERT(rr_graph.node_ylow(src_rr_node) == src_y && rr_graph.node_ylow(src_rr_node) == src_y); + tile_num_inter_die_conn[src_x][src_y]++; + } + } + } + + for (int x_high = 1; x_high < (int)device_ctx.grid.width(); x_high++) { + for (int y_high = 1; y_high < (int)device_ctx.grid.height(); y_high++) { + for (int x_low = 0; x_low < x_high; x_low++) { + for (int y_low = 0; y_low < y_high; y_low++) { + int num_inter_die_conn = 0; + for (int x = x_low; x <= x_high; x++) { + for (int y = y_low; y <= y_high; y++) { + num_inter_die_conn += tile_num_inter_die_conn[x][y]; + } + } + int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1); + chanz_place_cost_fac[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast(num_inter_die_conn); + + chanz_place_cost_fac[x_high][y_high][x_low][y_low] = pow( + (double)chanz_place_cost_fac[x_high][y_high][x_low][y_low], + (double)place_cost_exp); + } + } + } + } } double NetCostHandler::comp_bb_cost(e_cost_methods method) { From 36386d7edfe055f32528db61e7730142e61f0f29 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Wed, 16 Oct 2024 14:28:00 -0400 Subject: [PATCH 4/7] [vpr][place] fix typos --- vpr/src/place/net_cost_handler.cpp | 18 ++++++++++++++---- vpr/src/place/net_cost_handler.h | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 04ef7aaa995..b51a8f21f4a 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -240,6 +240,16 @@ static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, v const auto& rr_graph = device_ctx.rr_graph; vtr::NdMatrix tile_num_inter_die_conn({device_ctx.grid.width(), device_ctx.grid.height()}, 0); + + const int grid_height = device_ctx.grid.height(); + const int grid_width = device_ctx.grid.width(); + + + chanz_place_cost_fac = vtr::NdOffsetMatrix({{{0, grid_width-1}, + {0, grid_height-1}, + {0, grid_width-1}, + {0, grid_height-1}}} + ); for (const auto& src_rr_node : rr_graph.nodes()) { for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) { @@ -265,10 +275,10 @@ static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, v } } - for (int x_high = 1; x_high < (int)device_ctx.grid.width(); x_high++) { - for (int y_high = 1; y_high < (int)device_ctx.grid.height(); y_high++) { - for (int x_low = 0; x_low < x_high; x_low++) { - for (int y_low = 0; y_low < y_high; y_low++) { + for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) { + for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) { + for (int x_low = 0; x_low <= x_high; x_low++) { + for (int y_low = 0; y_low <= y_high; y_low++) { int num_inter_die_conn = 0; for (int x = x_low; x <= x_high; x++) { for (int y = y_low; y <= y_high; y++) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 984bda6b937..c8e20aa33d3 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -195,7 +195,7 @@ class NetCostHandler { */ vtr::NdOffsetMatrix chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1] vtr::NdOffsetMatrix chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1] - vtr::NdOffsetMatrix chanz_place_cost_fac_; // [-1...device_ctx.grid.get_num_layers()-1] + vtr::NdOffsetMatrix chanz_place_cost_fac_; // [0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1][0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1] private: From 8cdb6b13503ed76900429f6515faf3218e3d70ee Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 17 Oct 2024 11:27:17 -0400 Subject: [PATCH 5/7] [vpr][place] add comments --- vpr/src/place/net_cost_handler.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index c8e20aa33d3..3048b7637ea 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -195,7 +195,13 @@ class NetCostHandler { */ vtr::NdOffsetMatrix chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1] vtr::NdOffsetMatrix chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1] - vtr::NdOffsetMatrix chanz_place_cost_fac_; // [0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1][0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1] + /** + @brief This data structure functions similarly to the matrices described above + but is applied to 3D connections linking different FPGA layers. It is used in the + placement cost function calculation, where the height of the bounding box is divided + by the average number of inter-die connections within the bounding box. + */ + vtr::NdMatrix chanz_place_cost_fac_; // [0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1][0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1] private: @@ -250,6 +256,18 @@ class NetCostHandler { */ void alloc_and_load_chan_w_factors_for_place_cost_(float place_cost_exp); + /** + * @brief Allocates and loads the chanz_place_cost_fac array with the inverse of + * the average number of inter-die connections between [subhigh] and [sublow]. + * + * @details This is only useful for multi-die FPGAs. The place_cost_exp factor specifies to + * what power the average number of inter-die connections should be take -- larger numbers make narrower channels more expensive. + * + * @param place_cost_exp It is an exponent to which you take the average number of inter-die connections; + * a higher value would favour areas with more inter-die connections over areas with less of those during placement (usually we use 1). + */ + void alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp); + /** * @brief Calculate the new connection delay and timing cost of all the * sink pins affected by moving a specific pin to a new location. Also From 3ac38bf0d36cb7dc9977de0c9005a5be129a2492 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 17 Oct 2024 11:32:49 -0400 Subject: [PATCH 6/7] [vpr][place] populate chanz_place_cost_fac_ --- vpr/src/place/net_cost_handler.cpp | 52 ++++++++++++++---------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index b51a8f21f4a..91a27501355 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -31,8 +31,9 @@ #include "placer_state.h" #include "move_utils.h" #include "place_timing_update.h" -#include "noc_place_utils.h" #include "vtr_math.h" +#include "vtr_ndmatrix.h" +#include "vtr_ndoffsetmatrix.h" #include @@ -53,11 +54,6 @@ constexpr std::array cross_count = {1.0000, 1. 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671, 2.7933}; - - -static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, - vtr::NdOffsetMatrix& chanz_place_cost_fac); - /** * @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id" * @param pin_old_loc Old location of the moving pin @@ -232,29 +228,29 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c } } - alloc_and_load_for_fast_vertical_cost_update(place_cost_exp, chanz_place_cost_fac_); + if (device_ctx.grid.get_num_layers() > 1) { + alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp); + } } -static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, vtr::NdOffsetMatrix& chanz_place_cost_fac) { +void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp) { const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - vtr::NdMatrix tile_num_inter_die_conn({device_ctx.grid.width(), - device_ctx.grid.height()}, 0); const int grid_height = device_ctx.grid.height(); const int grid_width = device_ctx.grid.width(); - chanz_place_cost_fac = vtr::NdOffsetMatrix({{{0, grid_width-1}, - {0, grid_height-1}, - {0, grid_width-1}, - {0, grid_height-1}}} - ); + chanz_place_cost_fac_ = vtr::NdMatrix({grid_width, grid_height, grid_width, grid_height}, 0.); + + vtr::NdMatrix tile_num_inter_die_conn({grid_width, grid_height}, 0.); for (const auto& src_rr_node : rr_graph.nodes()) { for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) { const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx); if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) { + // We assume that the nodes driving the inter-layer connection or being driven by it + // are not streched across multiple tiles int src_x = rr_graph.node_xhigh(src_rr_node); int src_y = rr_graph.node_yhigh(src_rr_node); VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y); @@ -277,22 +273,22 @@ static void alloc_and_load_for_fast_vertical_cost_update(float place_cost_exp, v for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) { for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) { - for (int x_low = 0; x_low <= x_high; x_low++) { - for (int y_low = 0; y_low <= y_high; y_low++) { - int num_inter_die_conn = 0; - for (int x = x_low; x <= x_high; x++) { - for (int y = y_low; y <= y_high; y++) { - num_inter_die_conn += tile_num_inter_die_conn[x][y]; - } + for (int x_low = 0; x_low <= x_high; x_low++) { + for (int y_low = 0; y_low <= y_high; y_low++) { + int num_inter_die_conn = 0; + for (int x = x_low; x <= x_high; x++) { + for (int y = y_low; y <= y_high; y++) { + num_inter_die_conn += tile_num_inter_die_conn[x][y]; } - int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1); - chanz_place_cost_fac[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast(num_inter_die_conn); - - chanz_place_cost_fac[x_high][y_high][x_low][y_low] = pow( - (double)chanz_place_cost_fac[x_high][y_high][x_low][y_low], - (double)place_cost_exp); } + int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1); + chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast(num_inter_die_conn); + + chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow( + (double)chanz_place_cost_fac_[x_high][y_high][x_low][y_low], + (double)place_cost_exp); } + } } } } From abe89ea4c1098cb8552a02eaaab6f4f380987a81 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 17 Oct 2024 12:03:12 -0400 Subject: [PATCH 7/7] [vpr][place] fix grid width/height type --- vpr/src/place/net_cost_handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 91a27501355..63fd0bf07fd 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -237,8 +237,8 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_c const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - const int grid_height = device_ctx.grid.height(); - const int grid_width = device_ctx.grid.width(); + const size_t grid_height = device_ctx.grid.height(); + const size_t grid_width = device_ctx.grid.width(); chanz_place_cost_fac_ = vtr::NdMatrix({grid_width, grid_height, grid_width, grid_height}, 0.);