diff --git a/vpr/src/analytical_place/flat_placement_density_manager.cpp b/vpr/src/analytical_place/flat_placement_density_manager.cpp index 93f7021c22..ec22283621 100644 --- a/vpr/src/analytical_place/flat_placement_density_manager.cpp +++ b/vpr/src/analytical_place/flat_placement_density_manager.cpp @@ -59,15 +59,20 @@ static PrimitiveVector calc_bin_underfill(const PrimitiveVector& bin_utilization * The command-line arguments provided by the user. * @param physical_tile_types * A vector of all physical tile types in the architecture. + * @param device_grid + * The current physical device grid of the FPGA. */ static std::vector get_physical_type_target_densities(const std::vector& target_density_arg_strs, - const std::vector& physical_tile_types) { + const std::vector& physical_tile_types, + const DeviceGrid& device_grid) { // Get the target densisty of each physical block type. - // TODO: Create auto feature to automatically select target densities based - // on properties of the architecture. Need to sweep to find reasonable - // values. std::vector phy_ty_target_density(physical_tile_types.size(), 1.0f); + // By default (auto), make the CLB target density 80%, leaving the other + // blocks at 100%. + t_logical_block_type_ptr logic_block_type = infer_logic_block_type(device_grid); + phy_ty_target_density[logic_block_type->index] = 0.8f; + // Set to auto if no user args are provided. if (target_density_arg_strs.size() == 0) return phy_ty_target_density; @@ -123,7 +128,8 @@ FlatPlacementDensityManager::FlatPlacementDensityManager(const APNetlist& ap_net // Get the target densisty of each physical block type. std::vector phy_ty_target_densities = get_physical_type_target_densities(target_density_arg_strs, - physical_tile_types); + physical_tile_types, + device_grid); VTR_LOG("Partial legalizer is using target densities:"); for (const t_physical_tile_type& phy_ty : physical_tile_types) { VTR_LOG(" %s:%.1f", phy_ty.name.c_str(), phy_ty_target_densities[phy_ty.index]); diff --git a/vpr/src/base/flat_placement_utils.h b/vpr/src/base/flat_placement_utils.h index 16a4641a01..1772585e92 100644 --- a/vpr/src/base/flat_placement_utils.h +++ b/vpr/src/base/flat_placement_utils.h @@ -6,8 +6,11 @@ * @brief Utility methods for working with flat placements. */ +#include #include +#include "device_grid.h" #include "flat_placement_types.h" +#include "physical_types.h" /** * @brief Returns the manhattan distance (L1 distance) between two flat @@ -17,3 +20,31 @@ inline float get_manhattan_distance(const t_flat_pl_loc& loc_a, const t_flat_pl_loc& loc_b) { return std::abs(loc_a.x - loc_b.x) + std::abs(loc_a.y - loc_b.y) + std::abs(loc_a.layer - loc_b.layer); } + +/** + * @brief Returns the L1 distance something at the given flat location would + * need to move to be within the bounds of a tile at the given tile loc. + */ +inline float get_manhattan_distance_to_tile(const t_flat_pl_loc& src_flat_loc, + const t_physical_tile_loc& tile_loc, + const DeviceGrid& device_grid) { + // Get the bounds of the tile. + // Note: The get_tile_bb function will not work in this case since it + // subtracts 1 from the width and height. + auto tile_type = device_grid.get_physical_type(tile_loc); + float tile_xmin = tile_loc.x - device_grid.get_width_offset(tile_loc); + float tile_xmax = tile_xmin + tile_type->width; + float tile_ymin = tile_loc.y - device_grid.get_height_offset(tile_loc); + float tile_ymax = tile_ymin + tile_type->height; + + // Get the closest point in the bounding box (including the edges) to + // the src_flat_loc. To do this, we project the point in L1 space. + float proj_x = std::clamp(src_flat_loc.x, tile_xmin, tile_xmax); + float proj_y = std::clamp(src_flat_loc.y, tile_ymin, tile_ymax); + + // Then compute the L1 distance from the src_flat_loc to the projected + // position. This will be the minimum distance this point needs to move. + float dx = std::abs(proj_x - src_flat_loc.x); + float dy = std::abs(proj_y - src_flat_loc.y); + return dx + dy; +} diff --git a/vpr/src/pack/appack_context.h b/vpr/src/pack/appack_context.h index 4cc7e84fd8..f7da90d023 100644 --- a/vpr/src/pack/appack_context.h +++ b/vpr/src/pack/appack_context.h @@ -56,9 +56,9 @@ struct t_appack_options { // Distance threshold which decides when to use quadratic decay or inverted // sqrt decay. If the distance is less than this threshold, quadratic decay // is used. Inverted sqrt is used otherwise. - static constexpr float dist_th = 1.75f; + static constexpr float dist_th = 2.0f; // Attenuation value at the threshold. - static constexpr float attenuation_th = 0.35f; + static constexpr float attenuation_th = 0.25f; // Using the distance threshold and the attenuation value at that point, we // can compute the other two terms. This is to keep the attenuation function @@ -82,18 +82,28 @@ struct t_appack_options { // search within the cluster's tile. Setting this to a higher number would // allow APPack to search farther away; but may bring in molecules which // do not "want" to be in the cluster. - static constexpr float max_unrelated_tile_distance = 5.0f; + // + // [block_type_index] -> unrelated_tile_distance + std::vector max_unrelated_tile_distance; // Unrelated clustering occurs after all other candidate selection methods - // have failed. This parameter sets how many time we will attempt unrelated - // clustering between failures of unrelated clustering. If this is set to - // 1, and unrelated clustering failed for a cluster, it will not be attempted + // have failed. This attempts to cluster in molecules that are not attracted + // (using the packer's heuristics) to the molecules within a given cluster. + // This parameter sets how many times we will attempt unrelated + // clustering between failures of unrelated clustering. If a molecule used + // for unrelated clustering failed to cluster it will not be attempted // again for that cluster (note: if it succeeds, the number of attempts get // reset). // NOTE: A similar option exists in the candidate selector class. This was // duplicated since it is very likely that APPack would need a // different value for this option than the non-APPack flow. - static constexpr int max_unrelated_clustering_attempts = 10; + // + // [block_type_index] -> max_unrelated_attempts + std::vector max_unrelated_clustering_attempts; + // By default, we perform 10 unrelated clustering attempts. This is used + // to aggresivly resolve density while adhering to the GP solution as much + // as possible. + static constexpr int default_max_unrelated_clustering_attempts = 10; // TODO: Investigate adding flat placement info to seed selection. }; @@ -122,6 +132,16 @@ struct APPackContext : public Context { logical_block_types, device_grid); } + + // Set the max unrelated tile distances for all logical block types. + // By default, we set this to a low value to only allow unrelated molecules + // that are very close to the cluster being created. + // NOTE: Molecules within the same tile as the centroid are considered to have + // 0 distance. The distance is computed relative to the bounds of the + // tile containing the centroid. + appack_options.max_unrelated_tile_distance.resize(logical_block_types.size(), 1.0); + appack_options.max_unrelated_clustering_attempts.resize(logical_block_types.size(), + appack_options.default_max_unrelated_clustering_attempts); } /** diff --git a/vpr/src/pack/appack_max_dist_th_manager.cpp b/vpr/src/pack/appack_max_dist_th_manager.cpp index 4d602bed00..1c224cd55b 100644 --- a/vpr/src/pack/appack_max_dist_th_manager.cpp +++ b/vpr/src/pack/appack_max_dist_th_manager.cpp @@ -29,6 +29,10 @@ static bool has_memory_pbs(const t_pb_type* pb_type); void APPackMaxDistThManager::init(const std::vector& max_dist_ths, const std::vector& logical_block_types, const DeviceGrid& device_grid) { + // Compute the max device distance based on the width and height of the + // device. This is the L1 (manhattan) distance. + max_distance_on_device_ = device_grid.width() + device_grid.height(); + // Automatically set the max distance thresholds. auto_set_max_distance_thresholds(logical_block_types, device_grid); @@ -36,7 +40,7 @@ void APPackMaxDistThManager::init(const std::vector& max_dist_ths, // auto), set the max distance thresholds based on the user-provided strings. VTR_ASSERT(!max_dist_ths.empty()); if (max_dist_ths.size() != 1 || max_dist_ths[0] != "auto") { - set_max_distance_thresholds_from_strings(max_dist_ths, logical_block_types, device_grid); + set_max_distance_thresholds_from_strings(max_dist_ths, logical_block_types); } // Set the initilized flag to true. @@ -57,18 +61,15 @@ void APPackMaxDistThManager::init(const std::vector& max_dist_ths, void APPackMaxDistThManager::auto_set_max_distance_thresholds(const std::vector& logical_block_types, const DeviceGrid& device_grid) { - // Compute the max device distance based on the width and height of the - // device. This is the L1 (manhattan) distance. - float max_device_distance = device_grid.width() + device_grid.height(); // Compute the max distance thresholds of the different logical block types. - float default_max_distance_th = std::max(default_max_dist_th_scale_ * max_device_distance, + float default_max_distance_th = std::max(default_max_dist_th_scale_ * max_distance_on_device_, default_max_dist_th_offset_); - float logic_block_max_distance_th = std::max(logic_block_max_dist_th_scale_ * max_device_distance, + float logic_block_max_distance_th = std::max(logic_block_max_dist_th_scale_ * max_distance_on_device_, logic_block_max_dist_th_offset_); - float memory_max_distance_th = std::max(memory_max_dist_th_scale_ * max_device_distance, + float memory_max_distance_th = std::max(memory_max_dist_th_scale_ * max_distance_on_device_, memory_max_dist_th_offset_); - float io_block_max_distance_th = std::max(io_max_dist_th_scale_ * max_device_distance, + float io_block_max_distance_th = std::max(io_max_dist_th_scale_ * max_distance_on_device_, io_max_dist_th_offset_); // Set all logical block types to have the default max distance threshold. @@ -138,8 +139,7 @@ static bool has_memory_pbs(const t_pb_type* pb_type) { void APPackMaxDistThManager::set_max_distance_thresholds_from_strings( const std::vector& max_dist_ths, - const std::vector& logical_block_types, - const DeviceGrid& device_grid) { + const std::vector& logical_block_types) { std::vector lb_type_names; std::unordered_map lb_type_name_to_index; @@ -167,8 +167,7 @@ void APPackMaxDistThManager::set_max_distance_thresholds_from_strings( } // Compute the max distance threshold the user selected. - float max_device_distance = device_grid.width() + device_grid.height(); - float logical_block_max_dist_th = std::max(max_device_distance * logical_block_max_dist_th_scale, + float logical_block_max_dist_th = std::max(max_distance_on_device_ * logical_block_max_dist_th_scale, logical_block_max_dist_th_offset); int lb_ty_index = lb_type_name_to_index[lb_name]; diff --git a/vpr/src/pack/appack_max_dist_th_manager.h b/vpr/src/pack/appack_max_dist_th_manager.h index 5dc461b2a6..de0b7d0da2 100644 --- a/vpr/src/pack/appack_max_dist_th_manager.h +++ b/vpr/src/pack/appack_max_dist_th_manager.h @@ -39,12 +39,12 @@ class APPackMaxDistThManager { // This is the default scale and offset. Logical blocks that we do not // recognize as being of the special categories will have this threshold. - static constexpr float default_max_dist_th_scale_ = 0.35f; - static constexpr float default_max_dist_th_offset_ = 15.0f; + static constexpr float default_max_dist_th_scale_ = 0.1f; + static constexpr float default_max_dist_th_offset_ = 10.0f; // Logic blocks (such as CLBs and LABs) tend to have more resources on the // device, thus they have tighter thresholds. This was found to work well. - static constexpr float logic_block_max_dist_th_scale_ = 0.1f; + static constexpr float logic_block_max_dist_th_scale_ = 0.06f; static constexpr float logic_block_max_dist_th_offset_ = 15.0f; // Memory blocks (i.e. blocks that contain pb_types of the memory class) @@ -80,7 +80,7 @@ class APPackMaxDistThManager { const DeviceGrid& device_grid); /** - * @brief Get the max distance threshold of the given lobical block type. + * @brief Get the max distance threshold of the given logical block type. */ inline float get_max_dist_threshold(const t_logical_block_type& logical_block_ty) const { VTR_ASSERT_SAFE_MSG(is_initialized_, @@ -91,6 +91,31 @@ class APPackMaxDistThManager { return logical_block_dist_thresholds_[logical_block_ty.index]; } + /** + * @brief Get the maximum distance possible on the device. This is the + * manhattan distance from the bottom-left corner of the device to + * the top-right. + */ + inline float get_max_device_distance() const { + VTR_ASSERT_SAFE_MSG(is_initialized_, + "APPackMaxDistThManager has not been initialized, cannot call this method"); + + return max_distance_on_device_; + } + + /** + * @brief Set the max distance threshold of the given logical block type. + */ + inline void set_max_dist_threshold(const t_logical_block_type& logical_block_ty, + float new_threshold) { + VTR_ASSERT_SAFE_MSG(is_initialized_, + "APPackMaxDistThManager has not been initialized, cannot call this method"); + VTR_ASSERT_SAFE_MSG((size_t)logical_block_ty.index < logical_block_dist_thresholds_.size(), + "Logical block type does not have a max distance threshold"); + + logical_block_dist_thresholds_[logical_block_ty.index] = new_threshold; + } + private: /** * @brief Helper method that initializes the thresholds of all logical @@ -105,8 +130,7 @@ class APPackMaxDistThManager { * strings. */ void set_max_distance_thresholds_from_strings(const std::vector& max_dist_ths, - const std::vector& logical_block_types, - const DeviceGrid& device_grid); + const std::vector& logical_block_types); /// @brief A flag which shows if the thesholds have been computed or not. bool is_initialized_ = false; @@ -114,4 +138,9 @@ class APPackMaxDistThManager { /// @brief The max distance thresholds of all logical blocks in the architecture. /// This is initialized in the constructor and accessed during packing. std::vector logical_block_dist_thresholds_; + + /// @brief This is the maximum manhattan distance possible on the device. This + /// is the distance of traveling from the bottom-left corner of the device + /// to the top right. + float max_distance_on_device_; }; diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp index 60f169c049..2fd4919b27 100644 --- a/vpr/src/pack/greedy_candidate_selector.cpp +++ b/vpr/src/pack/greedy_candidate_selector.cpp @@ -8,6 +8,7 @@ #include "greedy_candidate_selector.h" #include #include +#include #include #include #include "PreClusterTimingManager.h" @@ -18,15 +19,16 @@ #include "attraction_groups.h" #include "cluster_legalizer.h" #include "cluster_placement.h" +#include "globals.h" #include "greedy_clusterer.h" #include "logic_types.h" +#include "physical_types.h" #include "prepack.h" #include "timing_info.h" #include "vpr_types.h" #include "vtr_assert.h" #include "vtr_ndmatrix.h" #include "vtr_vector.h" -#include "vtr_vector_map.h" /* * @brief Get gain of packing molecule into current cluster. @@ -755,7 +757,9 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( if (allow_unrelated_clustering_ && best_molecule == PackMoleculeId::INVALID()) { const t_appack_options& appack_options = appack_ctx_.appack_options; if (appack_options.use_appack) { - if (num_unrelated_clustering_attempts_ < appack_options.max_unrelated_clustering_attempts) { + t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type(cluster_id); + int cluster_max_attempts = appack_options.max_unrelated_clustering_attempts[cluster_type->index]; + if (num_unrelated_clustering_attempts_ < cluster_max_attempts) { best_molecule = get_unrelated_candidate_for_cluster_appack(cluster_gain_stats, cluster_id, cluster_legalizer); @@ -1101,8 +1105,23 @@ static float get_molecule_gain(PackMoleculeId molecule_id, // Get the position of the molecule t_flat_pl_loc target_loc = get_molecule_pos(molecule_id, prepacker, appack_ctx); + // Get the physical tile location of the flat cluster position. + // TODO: This should really be the closest compatible tile to the cluster + // centroid. To do this would require using information from the + // placer which we do not have yet. + t_physical_tile_loc cluster_tile_loc(cluster_gain_stats.flat_cluster_position.x, + cluster_gain_stats.flat_cluster_position.y, + cluster_gain_stats.flat_cluster_position.layer); + // Compute the gain attenuatation term. - float dist = get_manhattan_distance(cluster_gain_stats.flat_cluster_position, target_loc); + + // Here we compute the distance we would need to move the molecule from + // its GP solution to go into the tile we think the cluster will go into. + // This returns a distance of 0 if the molecule is already in the same + // tile as the rest of the molecules in the cluster. + float dist = get_manhattan_distance_to_tile(target_loc, + cluster_tile_loc, + g_vpr_ctx.device().grid); float gain_mult = 1.0f; if (dist < appack_options.dist_th) { gain_mult = 1.0f - (appack_options.quad_fac_sqr * dist * dist); @@ -1245,25 +1264,47 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa } // Create a queue of locations to search and a map of visited grid locations. - std::queue search_queue; + std::queue search_queue; vtr::NdMatrix visited({appack_unrelated_clustering_data_.dim_size(0), appack_unrelated_clustering_data_.dim_size(1)}, false); // Push the position of the cluster to the queue. - search_queue.push(cluster_gain_stats.flat_cluster_position); + t_physical_tile_loc cluster_tile_loc(cluster_gain_stats.flat_cluster_position.x, + cluster_gain_stats.flat_cluster_position.y, + cluster_gain_stats.flat_cluster_position.layer); + search_queue.push(cluster_tile_loc); + + // Get the max unrelated tile distance for the block type of this cluster. + t_logical_block_type_ptr cluster_type = cluster_legalizer.get_cluster_type(cluster_id); + float max_dist = appack_ctx_.appack_options.max_unrelated_tile_distance[cluster_type->index]; + + // Keep track of the closest compatible molecule and its distance. + float best_distance = std::numeric_limits::max(); + PackMoleculeId closest_compatible_molecule = PackMoleculeId::INVALID(); while (!search_queue.empty()) { // Pop a position to search from the queue. - const t_flat_pl_loc& node_loc = search_queue.front(); - VTR_ASSERT_SAFE(node_loc.layer == 0); + const t_physical_tile_loc& node_loc = search_queue.front(); + VTR_ASSERT_SAFE(node_loc.layer_num == 0); + + // Get the distance from the cluster to the current tile in tiles. + float dist = std::abs(node_loc.x - cluster_tile_loc.x) + std::abs(node_loc.y - cluster_tile_loc.y); // If this position is too far from the source, skip it. - float dist = get_manhattan_distance(node_loc, cluster_gain_stats.flat_cluster_position); - if (dist > 1) { + if (dist > max_dist) { search_queue.pop(); continue; } + // If the distance from the cluster to the current tile is larger than + // the best molecule's distance plus the farthest distance within the + // 1x1 tile (2.0), there cannot exist a molecule within the tile with a + // better distance than what we have found. + if (dist >= best_distance + 2.0) { + search_queue.pop(); + break; + } + // If this position has been visited, skip it. if (visited[node_loc.x][node_loc.y]) { search_queue.pop(); @@ -1272,6 +1313,10 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa visited[node_loc.x][node_loc.y] = true; // Explore this position from highest number of inputs available to lowest. + // Here, we are trying to find the closest compatible molecule, where we + // break ties based on whoever has more external inputs. + PackMoleculeId best_candidate = PackMoleculeId::INVALID(); + float best_candidate_distance = std::numeric_limits::max(); const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y]; VTR_ASSERT_SAFE(inputs_avail < uc_data.size()); for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { @@ -1289,30 +1334,46 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appa // skip it. if (!cluster_legalizer.is_molecule_compatible(mol_id, cluster_id)) continue; - // Return this molecule as the unrelated candidate. - return mol_id; + + // If this is the best candidate we have seen so far, hold onto it. + // Here, we get the distance needed to move the molecule from its + // GP placement to the current cluster's tile. + t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_); + float mol_dist = get_manhattan_distance_to_tile(mol_pos, + cluster_tile_loc, + g_vpr_ctx.device().grid); + if (mol_dist < best_candidate_distance && mol_dist < best_distance) { + best_candidate = mol_id; + best_candidate_distance = mol_dist; + } } } + // If a candidate could be found, add it as the best found so far. + if (best_candidate.is_valid()) { + closest_compatible_molecule = best_candidate; + best_distance = best_candidate_distance; + } + // Push the neighbors of the position to the queue. // Note: Here, we are using the manhattan distance, so we do not push // the diagonals. We also want to try the direct neighbors first // since they should be closer. - if (node_loc.x >= 1.0f) - search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer}); - if (node_loc.x <= visited.dim_size(0) - 2) - search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer}); - if (node_loc.y >= 1.0f) - search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer}); - if (node_loc.y <= visited.dim_size(1) - 2) - search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer}); + if (node_loc.x >= 1) + search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer_num}); + if (node_loc.x <= (int)appack_unrelated_clustering_data_.dim_size(0) - 2) + search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer_num}); + if (node_loc.y >= 1) + search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer_num}); + if (node_loc.y <= (int)appack_unrelated_clustering_data_.dim_size(1) - 2) + search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer_num}); // Pop the position off the queue. search_queue.pop(); } - // No molecule could be found. Return an invalid ID. - return PackMoleculeId::INVALID(); + // Return the closest compatible molecule to the cluster. + return closest_compatible_molecule; } void GreedyCandidateSelector::update_candidate_selector_finalize_cluster( diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index 507dcd3e50..7914f97f4a 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -54,6 +54,8 @@ enum class e_packer_state { /// @brief Region constraints: Turns on more attraction groups for all regions /// and increases the pull on these groups. CREATE_ATTRACTION_GROUPS_FOR_ALL_REGIONS_AND_INCREASE_PULL, + /// @brief APPack: Increase the max displacement threshold for overused block types. + AP_INCREASE_MAX_DISPLACEMENT, /// @brief The failure state. FAILURE }; @@ -90,6 +92,8 @@ static bool try_size_device_grid(const t_arch& arch, * The current external pin utilization targets. * @param packer_opts * The options passed into the packer. + * @param appack_ctx + * The APPack context used when AP is turned on. */ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, bool fits_on_device, @@ -98,7 +102,8 @@ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, bool using_balanced_block_type_util, const std::map& block_type_utils, const t_ext_pin_util_targets& external_pin_util_targets, - const t_packer_opts& packer_opts) { + const t_packer_opts& packer_opts, + const APPackContext& appack_ctx) { if (fits_on_device && !floorplan_regions_overfull) { // If everything fits on the device and the floorplan regions are // not overfilled, the next state is success. @@ -142,20 +147,36 @@ static e_packer_state get_next_packer_state(e_packer_state current_packer_state, // density of the block types available. // Check if we can turn on unrelated cluster and/or balanced block type - // utilization. - if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::AUTO && packer_opts.balance_block_type_utilization == e_balance_block_type_util::AUTO) { + // utilization and they have not been turned on already. + if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::AUTO && !using_unrelated_clustering) { + return e_packer_state::SET_UNRELATED_AND_BALANCED; + } + if (packer_opts.balance_block_type_utilization == e_balance_block_type_util::AUTO && !using_balanced_block_type_util) { + return e_packer_state::SET_UNRELATED_AND_BALANCED; + } + } - // Check if they are not already on. If not, set the next state to turn them on. - if (!using_unrelated_clustering || !using_balanced_block_type_util) { - return e_packer_state::SET_UNRELATED_AND_BALANCED; - } + // If APPack is used, we can increase the max distance threshold to create + // a denser clustering. This will cause the packer to not adhere as well to + // the global placement. + if (appack_ctx.appack_options.use_appack) { + for (const auto& p : block_type_utils) { + if (p.second <= 1.0f) + continue; + + // Check if we can increase the max distance threshold for any of the + // overused block types. + float max_device_distance = appack_ctx.max_distance_threshold_manager.get_max_device_distance(); + float max_distance_th = appack_ctx.max_distance_threshold_manager.get_max_dist_threshold(*p.first); + if (max_distance_th < max_device_distance) + return e_packer_state::AP_INCREASE_MAX_DISPLACEMENT; } } // Check if we can increase the target density of the overused block types. // This is a last resort since increasing the target pin density can have // bad affects on quality and routability. - for (auto& p : block_type_utils) { + for (const auto& p : block_type_utils) { const t_ext_pin_util& target_pin_util = external_pin_util_targets.get_pin_util(p.first->name); if (p.second > 1.0f && (target_pin_util.input_pin_util < 1.0f || target_pin_util.output_pin_util < 1.0f)) return e_packer_state::INCREASE_OVERUSED_TARGET_PIN_UTILIZATION; @@ -323,7 +344,8 @@ bool try_pack(const t_packer_opts& packer_opts, balance_block_type_util, block_type_utils, cluster_legalizer.get_target_external_pin_util(), - packer_opts); + packer_opts, + appack_ctx); // Set up for the options used for the next packer state. // NOTE: This must be done here (and not at the start of the next packer @@ -342,6 +364,20 @@ bool try_pack(const t_packer_opts& packer_opts, VTR_ASSERT(balance_block_type_util == false); balance_block_type_util = true; } + if (appack_ctx.appack_options.use_appack) { + // Only do unrelated clustering on the overused type instances. + for (const auto& p : block_type_utils) { + // Any overutilized block types will use the default options. + if (p.second > 1.0f) + continue; + + // Any underutilized block types should not do unrelated clustering. + // We can turn this off by just setting the max attempts to 0. + // TODO: These may become over-utilized in the future. Should + // investigate turning these on if needed. + appack_ctx.appack_options.max_unrelated_clustering_attempts[p.first->index] = 0; + } + } VTR_LOG("Packing failed to fit on device. Re-packing with: unrelated_logic_clustering=%s balance_block_type_util=%s\n", (allow_unrelated_clustering ? "true" : "false"), (balance_block_type_util ? "true" : "false")); @@ -402,6 +438,35 @@ bool try_pack(const t_packer_opts& packer_opts, attraction_groups.set_att_group_pulls(4); break; } + case e_packer_state::AP_INCREASE_MAX_DISPLACEMENT: { + VTR_ASSERT(appack_ctx.appack_options.use_appack); + std::vector block_types_to_increase; + for (const auto& p : block_type_utils) { + if (p.second <= 1.0f) + continue; + + float max_device_distance = appack_ctx.max_distance_threshold_manager.get_max_device_distance(); + float max_distance_th = appack_ctx.max_distance_threshold_manager.get_max_dist_threshold(*p.first); + if (max_distance_th < max_device_distance) + block_types_to_increase.push_back(p.first); + } + + // TODO: Instead of setting to max distance, set to the current threshold, + // multiplied by the overuse. Or maybe just double it. + VTR_LOG("Packing failed to fit on device. Increasing the APPack max distance thresholds of block types: "); + for (size_t i = 0; i < block_types_to_increase.size(); i++) { + t_logical_block_type_ptr block_type_ptr = block_types_to_increase[i]; + + float max_device_distance = appack_ctx.max_distance_threshold_manager.get_max_device_distance(); + appack_ctx.max_distance_threshold_manager.set_max_dist_threshold(*block_type_ptr, max_device_distance); + + VTR_LOG("%s", block_type_ptr->name.c_str()); + if (i < block_types_to_increase.size() - 1) + VTR_LOG(", "); + } + VTR_LOG("\n"); + break; + } case e_packer_state::DEFAULT: case e_packer_state::SUCCESS: case e_packer_state::FAILURE: diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index c4cbeb5645..9f6a4316eb 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -1,6 +1,7 @@ #include "clustered_netlist.h" #include "flat_placement_types.h" #include "atom_netlist_fwd.h" +#include "flat_placement_utils.h" #include "physical_types_util.h" #include "place_macro.h" #include "vtr_assert.h" @@ -638,34 +639,6 @@ static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_ return centroid; } -/** - * @brief Returns the L1 distance a cluster at the given flat location would - * need to move to be within the bounds of a tile at the given tile loc. - */ -static inline float get_dist_to_tile(const t_flat_pl_loc& src_flat_loc, - const t_physical_tile_loc& tile_loc, - const DeviceGrid& device_grid) { - // Get the bounds of the tile. - // Note: The get_tile_bb function will not work in this case since it - // subtracts 1 from the width and height. - auto tile_type = device_grid.get_physical_type(tile_loc); - float tile_xmin = tile_loc.x - device_grid.get_width_offset(tile_loc); - float tile_xmax = tile_xmin + tile_type->width; - float tile_ymin = tile_loc.y - device_grid.get_height_offset(tile_loc); - float tile_ymax = tile_ymin + tile_type->height; - - // Get the closest point in the bounding box (including the edges) to - // the src_flat_loc. To do this, we project the point in L1 space. - float proj_x = std::clamp(src_flat_loc.x, tile_xmin, tile_xmax); - float proj_y = std::clamp(src_flat_loc.y, tile_ymin, tile_ymax); - - // Then compute the L1 distance from the src_flat_loc to the projected - // position. This will be the minimum distance this point needs to move. - float dx = std::abs(proj_x - src_flat_loc.x); - float dy = std::abs(proj_y - src_flat_loc.y); - return dx + dy; -} - /** * @brief Returns the first available sub_tile (both compatible with the given * compressed grid and is empty according the the blk_loc_registry) in @@ -760,7 +733,9 @@ static inline t_pl_loc find_nearest_compatible_loc(const t_flat_pl_loc& src_flat // Note: In compressed space, distances are not what they appear. We are // using the true grid positions to get the truly closest loc. auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc(loc); - float grid_dist = get_dist_to_tile(src_flat_loc, grid_loc, device_grid); + float grid_dist = get_manhattan_distance_to_tile(src_flat_loc, + grid_loc, + device_grid); // If this distance is worst than the best we have seen. // NOTE: This prune is always safe (i.e. it will never remove a better // solution) since this is a spatial graph and our objective is @@ -1580,7 +1555,9 @@ static inline float get_flat_variance(const t_pl_macro& macro, // Get the amount this atom needs to be displaced in order to be // within the same tile as the centroid. - float dist = get_dist_to_tile(atom_pos, centroid_grid_loc, g_vpr_ctx.device().grid); + float dist = get_manhattan_distance_to_tile(atom_pos, + centroid_grid_loc, + g_vpr_ctx.device().grid); // Accumulate the variance. variance += (dist * dist); diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt index 600428ad62..de446b6643 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_partial_legalizer/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time - k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 3.69 vpr 74.85 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 9 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 76644 9 19 896 28 0 558 110 16 16 256 -1 mcnc_medium -1 -1 6604.32 6187 4055 330 2657 1068 74.8 MiB 3.14 0.00 5.5006 5.04382 -83.4196 -5.04382 nan 0.00 0.00159178 0.00140179 0.052728 0.048536 74.8 MiB 3.14 74.8 MiB 1.13 9693 17.4022 2573 4.61939 4635 22418 750276 125848 1.05632e+07 4.41931e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.23966 nan -85.8792 -5.23966 0 0 0.13 -1 -1 74.8 MiB 0.25 0.25043 0.231439 31.6 MiB -1 0.05 - k6_frac_N10_40nm.xml des.pre-vpr.blif common 0.98 vpr 75.87 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 256 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 77692 256 245 954 501 0 592 561 22 22 484 -1 mcnc_large -1 -1 7794.07 7693 5185 54 899 4232 75.9 MiB 0.51 0.01 5.23302 4.72031 -822.458 -4.72031 nan 0.00 0.0019465 0.00181915 0.0146274 0.014127 75.9 MiB 0.51 75.9 MiB 0.36 10399 17.5659 2857 4.82601 2368 5414 292191 61927 2.15576e+07 3.23364e+06 1.49107e+06 3080.73 14 47664 245996 -1 5.04732 nan -891.503 -5.04732 0 0 0.17 -1 -1 75.9 MiB 0.14 0.105175 0.100293 33.7 MiB -1 0.06 - k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 13.53 vpr 105.74 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 295 10 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 108280 10 10 2655 20 0 1258 315 22 22 484 -1 mcnc_large -1 -1 27055.4 24196 24948 3246 18814 2888 105.7 MiB 11.83 0.02 8.14213 6.45814 -63.4873 -6.45814 nan 0.00 0.00723015 0.00601179 0.178647 0.157549 105.7 MiB 11.83 105.7 MiB 3.24 36569 29.0692 9362 7.44197 8387 54711 2280579 294712 2.15576e+07 1.58987e+07 3.51389e+06 7260.09 17 64568 594370 -1 6.99083 nan -66.327 -6.99083 0 0 0.42 -1 -1 105.7 MiB 0.83 0.805444 0.717568 49.7 MiB -1 0.12 - k6_frac_N10_40nm.xml seq.pre-vpr.blif common 3.68 vpr 75.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 41 -1 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 77284 41 35 1006 76 0 591 158 16 16 256 -1 mcnc_medium -1 -1 6788.51 6434 4001 201 1978 1822 75.5 MiB 3.16 0.00 5.22637 4.95486 -145.087 -4.95486 nan 0.00 0.00148664 0.00128788 0.0309401 0.0286989 75.5 MiB 3.16 75.5 MiB 1.05 9852 16.6701 2636 4.46024 3774 18255 562759 99961 1.05632e+07 4.41931e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.24035 nan -152.337 -5.24035 0 0 0.13 -1 -1 75.5 MiB 0.21 0.224432 0.206859 31.8 MiB -1 0.04 + k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.13 vpr 77.07 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 9 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 78920 9 19 896 28 0 597 110 16 16 256 -1 mcnc_medium -1 -1 6923.74 6384 3266 257 2173 836 77.1 MiB 3.53 0.01 5.93826 5.04913 -82.6284 -5.04913 nan 0.00 0.00162746 0.00126185 0.0369838 0.0320011 77.1 MiB 3.53 77.1 MiB 1.45 9854 16.5336 2619 4.39430 4254 19787 655822 113994 1.05632e+07 4.41931e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.61854 nan -86.9247 -5.61854 0 0 0.19 -1 -1 77.1 MiB 0.25 0.236227 0.206716 33.2 MiB -1 0.05 + k6_frac_N10_40nm.xml des.pre-vpr.blif common 1.13 vpr 77.78 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 79648 256 245 954 501 0 598 554 22 22 484 -1 mcnc_large -1 -1 7759.78 7813 32390 260 5540 26590 77.8 MiB 0.62 0.01 5.3774 4.07795 -783.558 -4.07795 nan 0.00 0.00216885 0.00194181 0.0417738 0.0379768 77.8 MiB 0.62 77.8 MiB 0.36 10841 18.1288 2955 4.94147 2557 5883 360499 76612 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 14 47664 245996 -1 4.6034 nan -875.791 -4.6034 0 0 0.21 -1 -1 77.8 MiB 0.17 0.140155 0.129596 35.2 MiB -1 0.07 + k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 17.36 vpr 108.07 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 287 10 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 110664 10 10 2655 20 0 1394 307 22 22 484 -1 mcnc_large -1 -1 29331.8 25819 17902 2041 13789 2072 108.1 MiB 15.40 0.02 8.02093 6.59208 -64.4571 -6.59208 nan 0.00 0.00521069 0.004042 0.148628 0.1258 108.1 MiB 15.40 108.1 MiB 4.28 37964 27.2339 9782 7.01722 9764 59405 2548944 324595 2.15576e+07 1.54676e+07 3.51389e+06 7260.09 18 64568 594370 -1 6.70317 nan -65.0915 -6.70317 0 0 0.60 -1 -1 108.1 MiB 0.88 0.765326 0.666153 51.7 MiB -1 0.15 + k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.38 vpr 78.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 41 -1 -1 success v8.0.0-13239-gc574bc5f2 release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-06-28T23:19:15 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 80080 41 35 1006 76 0 650 162 16 16 256 -1 mcnc_medium -1 -1 7393.66 7070 4572 193 2379 2000 78.2 MiB 3.75 0.01 5.40605 5.02754 -145.024 -5.02754 nan 0.00 0.00227909 0.00183827 0.0409561 0.0356465 78.2 MiB 3.75 78.2 MiB 1.23 10921 16.8015 2925 4.50000 4695 22078 725102 126903 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.1862 nan -149.801 -5.1862 0 0 0.19 -1 -1 78.2 MiB 0.27 0.25655 0.224304 33.2 MiB -1 0.05