diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp index dd25b531dbd..ca5f72772eb 100644 --- a/vpr/src/analytical_place/analytical_placement_flow.cpp +++ b/vpr/src/analytical_place/analytical_placement_flow.cpp @@ -13,6 +13,7 @@ #include "full_legalizer.h" #include "gen_ap_netlist_from_atoms.h" #include "globals.h" +#include "partial_legalizer.h" #include "partial_placement.h" #include "prepack.h" #include "user_place_constraints.h" @@ -53,6 +54,7 @@ static void print_ap_netlist_stats(const APNetlist& netlist) { VTR_LOG("\t\tAverage Fanout: %.2f\n", average_fanout); VTR_LOG("\t\tHighest Fanout: %zu\n", highest_fanout); VTR_LOG("\tPins: %zu\n", netlist.pins().size()); + VTR_LOG("\n"); } void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { @@ -77,11 +79,16 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { print_ap_netlist_stats(ap_netlist); // Run the Global Placer - // For now, just runs the solver. + // For now, just runs the solver and partial legalizer 10 times arbitrarily. PartialPlacement p_placement(ap_netlist); std::unique_ptr solver = make_analytical_solver(e_analytical_solver::QP_HYBRID, ap_netlist); - solver->solve(0, p_placement); + std::unique_ptr legalizer = make_partial_legalizer(e_partial_legalizer::FLOW_BASED, + ap_netlist); + for (size_t i = 0; i < 10; i++) { + solver->solve(i, p_placement); + legalizer->legalize(p_placement); + } // Verify that the partial placement is valid before running the full // legalizer. diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp index 40b8b34a0bc..3f32255dcb8 100644 --- a/vpr/src/analytical_place/full_legalizer.cpp +++ b/vpr/src/analytical_place/full_legalizer.cpp @@ -2,7 +2,9 @@ * @file * @author Alex Singer * @date September 2024 - * @brief Implements the full legalizer in the AP flow. + * @brief Implements the full legalizer in the AP flow. The Full Legalizer + * takes a partial placement and fully legalizes it. This involves + * creating legal clusters and placing them into valid tile sites. */ #include "full_legalizer.h" @@ -33,6 +35,7 @@ #include "vpr_error.h" #include "vpr_types.h" #include "vtr_assert.h" +#include "vtr_geometry.h" #include "vtr_ndmatrix.h" #include "vtr_strong_id.h" #include "vtr_time.h" @@ -126,9 +129,8 @@ class APClusterPlacer { const t_physical_tile_loc& tile_loc, int sub_tile) { const DeviceContext& device_ctx = g_vpr_ctx.device(); - // FIXME: THIS MUST TAKE INTO ACCOUNT THE CONSTRAINTS AS WELL!!! - // - Right now it is just implied. - // - Will work but is unstable. + const FloorplanningContext& floorplanning_ctx = g_vpr_ctx.floorplanning(); + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); const auto& block_locs = g_vpr_ctx.placement().block_locs(); auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry(); VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?"); @@ -141,11 +143,24 @@ class APClusterPlacer { if (device_ctx.grid.get_physical_type(tile_loc)->sub_tiles.size() == 0) return false; VTR_ASSERT(sub_tile >= 0 && sub_tile < device_ctx.grid.get_physical_type(tile_loc)->capacity); - // FIXME: Do this better. - // - May need to try all the sub-tiles in a location. - // - https://github.com/AlexandreSinger/vtr-verilog-to-routing/blob/feature-analytical-placer/vpr/src/place/initial_placement.cpp#L755 - to_loc.sub_tile = sub_tile; - return try_place_macro(pl_macro, to_loc, blk_loc_registry); + // Check if this cluster is constrained and this location is legal. + if (is_cluster_constrained(clb_blk_id)) { + const auto& cluster_constraints = floorplanning_ctx.cluster_constraints; + if (cluster_constraints[clb_blk_id].is_loc_in_part_reg(to_loc)) + return false; + } + // If the location is legal, try to exhaustively place it at this tile + // location. This should try all sub_tiles. + PartitionRegion pr; + vtr::Rect rect(tile_loc.x, tile_loc.y, tile_loc.x, tile_loc.y); + pr.add_to_part_region(Region(rect, to_loc.layer)); + const ClusteredNetlist& clb_nlist = cluster_ctx.clb_nlist; + t_logical_block_type_ptr block_type = clb_nlist.block_type(clb_blk_id); + enum e_pad_loc_type pad_loc_type = g_vpr_ctx.device().pad_loc_type; + // FIXME: This currently ignores the sub_tile. Was running into issues + // with trying to force clusters to specific sub_tiles. + return try_place_macro_exhaustively(pl_macro, pr, block_type, + pad_loc_type, blk_loc_registry); } // This is not the best way of doing things, but its the simplest. Given a @@ -356,10 +371,6 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist, bool placed = ap_cluster_placer.place_cluster(cluster_blk_id, tile_loc, blk_sub_tile); if (placed) continue; - // FIXME: Should now try all sub-tiles at this tile location. - // - May need to try all the sub-tiles in a location. - // - however this may need to be done after. - // - https://github.com/AlexandreSinger/vtr-verilog-to-routing/blob/feature-analytical-placer/vpr/src/place/initial_placement.cpp#L755 // Add to list of unplaced clusters. unplaced_clusters.push_back(cluster_blk_id); diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp new file mode 100644 index 00000000000..44645878e64 --- /dev/null +++ b/vpr/src/analytical_place/partial_legalizer.cpp @@ -0,0 +1,1107 @@ +/** + * @file + * @author Alex Singer and Robert Luo + * @date October 2024 + * @brief The definitions of the Partial Legalizers used in the AP flow and + * their base class. + */ + +#include "partial_legalizer.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ap_netlist.h" +#include "device_grid.h" +#include "globals.h" +#include "partial_placement.h" +#include "physical_types.h" +#include "primitive_vector.h" +#include "vpr_context.h" +#include "vpr_error.h" +#include "vpr_types.h" +#include "vtr_assert.h" +#include "vtr_geometry.h" +#include "vtr_log.h" +#include "vtr_ndmatrix.h" +#include "vtr_strong_id.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +std::unique_ptr make_partial_legalizer(e_partial_legalizer legalizer_type, + const APNetlist& netlist) { + // Based on the partial legalizer type passed in, build the partial legalizer. + switch (legalizer_type) { + case e_partial_legalizer::FLOW_BASED: + return std::make_unique(netlist); + default: + VPR_FATAL_ERROR(VPR_ERROR_AP, + "Unrecognized partial legalizer type"); + break; + } + return nullptr; +} + +/** + * @brief Get the scalar mass of the given model (primitive type). + * + * A model with a higher mass will take up more space in its bin which may force + * more spreading of that type of primitive. + * + * TODO: This will be made more complicated later. Models may be weighted based + * on some factors. + */ +static inline float get_model_mass(const t_model* model) { + // Currently, all models have a mass of one. + (void)model; + return 1.f; +} + +/** + * @brief Get the primitive mass of the given block. + * + * This returns an M-dimensional vector with each entry indicating the mass of + * that primitive type in this block. M is the number of unique models + * (primitive types) in the architecture. + */ +static inline PrimitiveVector get_primitive_mass(APBlockId blk_id, + const APNetlist& netlist) { + PrimitiveVector mass; + const t_pack_molecule* mol = netlist.block_molecule(blk_id); + for (AtomBlockId atom_blk_id : mol->atom_block_ids) { + const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id); + VTR_ASSERT_DEBUG(model->index >= 0); + mass.add_val_to_dim(get_model_mass(model), model->index); + } + return mass; +} + +// This method is being forward-declared due to the double recursion below. +// Eventually this should be made into a non-recursive algorithm for performance, +// however this is not in a performance critical part of the code. +static PrimitiveVector get_primitive_capacity(const t_pb_type* pb_type); + +/** + * @brief Get the amount of primitives this mode can contain. + * + * This is part of a double recursion, since a mode contains primitives which + * themselves have modes. + */ +static PrimitiveVector get_primitive_capacity(const t_mode& mode) { + // Accumulate the capacities of all the pbs in this mode. + PrimitiveVector capacity; + for (int pb_child_idx = 0; pb_child_idx < mode.num_pb_type_children; pb_child_idx++) { + const t_pb_type& pb_type = mode.pb_type_children[pb_child_idx]; + PrimitiveVector pb_capacity = get_primitive_capacity(&pb_type); + // A mode may contain multiple pbs of the same type, multiply the + // capacity. + pb_capacity *= pb_type.num_pb; + capacity += pb_capacity; + } + return capacity; +} + +/** + * @brief Get the amount of primitives this pb can contain. + * + * This is the other part of the double recursion. A pb may have multiple modes. + * Modes are made of pbs. + */ +static PrimitiveVector get_primitive_capacity(const t_pb_type* pb_type) { + // Since a pb cannot be multiple modes at the same time, we do not + // accumulate the capacities of the mode. Instead we need to "mix" the two + // capacities as if the pb could choose either one. + PrimitiveVector capacity; + // If this is a leaf / primitive, create the base PrimitiveVector capacity. + if (pb_type->num_modes == 0) { + const t_model* model = pb_type->model; + VTR_ASSERT(model != nullptr); + VTR_ASSERT_DEBUG(model->index >= 0); + capacity.add_val_to_dim(get_model_mass(model), model->index); + return capacity; + } + // For now, we simply mix the capacities of modes by taking the max of each + // dimension of the capcities. This provides an upper-bound on the amount of + // primitives this pb can contain. + for (int mode = 0; mode < pb_type->num_modes; mode++) { + PrimitiveVector mode_capacity = get_primitive_capacity(pb_type->modes[mode]); + capacity = PrimitiveVector::max(capacity, mode_capacity); + } + return capacity; +} + +/** + * @brief Helper method to get the primitive capacity of the given logical block + * type. + * + * This is the entry point to the double recursion. + */ +static inline PrimitiveVector get_primitive_capacity(const t_logical_block_type& block_type) { + // If this logical block is empty, it cannot contain any primitives. + if (block_type.is_empty()) + return PrimitiveVector(); + // The primitive capacity of a logical block is the primitive capacity of + // its root pb. + return get_primitive_capacity(block_type.pb_type); +} + +/** + * @brief Get the primitive capacity of the given sub_tile. + * + * Sub_tiles may reuse logical blocks between one another, therefore this method + * requires that the capacities of all of the logical blocks have been + * pre-calculated and stored in the given vector. + * + * @param sub_tile The sub_tile to get the capacity of. + * @param logical_block_type_capacities The capacities of all logical block + * types. + */ +static inline PrimitiveVector get_primitive_capacity(const t_sub_tile& sub_tile, + const std::vector& logical_block_type_capacities) { + // Similar to getting the primitive capacity of the pb, sub_tiles have many + // equivalent sites, but it can only be one of them at a time. Need to "mix" + // the capacities of the different sites this sub_tile may be. + PrimitiveVector capacity; + for (t_logical_block_type_ptr block_type : sub_tile.equivalent_sites) { + const PrimitiveVector& block_capacity = logical_block_type_capacities[block_type->index]; + // Currently, we take the max of each primitive dimension as an upper + // bound on the capacity of the sub_tile. + capacity = PrimitiveVector::max(capacity, block_capacity); + } + return capacity; +} + +/** + * @brief Get the primitive capacity of a tile of the given type. + * + * Tiles may reuse logical blocks between one another, therefore this method + * requires that the capacities of all of the logical blocks have been + * pre-calculated and stored in the given vector. + * + * @param tile_type The tile type to get the capacity of. + * @param logical_block_type_capacities The capacities of all logical block + * types. + */ +static inline PrimitiveVector get_primitive_capacity(const t_physical_tile_type& tile_type, + const std::vector& logical_block_type_capacities) { + // Accumulate the capacities of all the sub_tiles in the given tile type. + PrimitiveVector capacity; + for (const t_sub_tile& sub_tile : tile_type.sub_tiles) { + PrimitiveVector sub_tile_capacity = get_primitive_capacity(sub_tile, logical_block_type_capacities); + // A tile may contain many sub_tiles of the same type. Multiply by the + // number of sub_tiles of this type. + sub_tile_capacity *= sub_tile.capacity.total(); + capacity += sub_tile_capacity; + } + return capacity; +} + +/** + * @brief Get the number of models in the device architecture. + * + * FIXME: These are stored in such an annoying way. It should be much easier + * to get this information! + */ +static inline size_t get_num_models() { + size_t num_models = 0; + t_model* curr_model = g_vpr_ctx.device().arch->models; + while (curr_model != nullptr) { + num_models++; + curr_model = curr_model->next; + } + curr_model = g_vpr_ctx.device().arch->model_library; + while (curr_model != nullptr) { + num_models++; + curr_model = curr_model->next; + } + return num_models; +} + +/** + * @brief Debug printing method to print the capacities of all logical blocks + * and physical tile types. + */ +static inline void print_capacities(const std::vector& logical_block_type_capacities, + const std::vector& physical_tile_type_capacities, + const std::vector& logical_block_types, + const std::vector& physical_tile_types) { + // Get a linear list of all models. + // TODO: Again, the way these models are stored is so annoying. It would be + // nice if they were already vectors! + std::vector all_models; + t_model* curr_model = g_vpr_ctx.device().arch->models; + while (curr_model != nullptr) { + if (curr_model->index >= (int)all_models.size()) + all_models.resize(curr_model->index + 1); + all_models[curr_model->index] = curr_model; + curr_model = curr_model->next; + } + curr_model = g_vpr_ctx.device().arch->model_library; + while (curr_model != nullptr) { + if (curr_model->index >= (int)all_models.size()) + all_models.resize(curr_model->index + 1); + all_models[curr_model->index] = curr_model; + curr_model = curr_model->next; + } + // Print the capacities. + VTR_LOG("Logical Block Type Capacities:\n"); + VTR_LOG("------------------------------\n"); + VTR_LOG("name\t"); + for (t_model* model : all_models) { + VTR_LOG("%s\t", model->name); + } + VTR_LOG("\n"); + for (const t_logical_block_type& block_type : logical_block_types) { + const PrimitiveVector& capacity = logical_block_type_capacities[block_type.index]; + VTR_LOG("%s\t", block_type.name.c_str()); + for (t_model* model : all_models) { + VTR_LOG("%.2f\t", capacity.get_dim_val(model->index)); + } + VTR_LOG("\n"); + } + VTR_LOG("\n"); + VTR_LOG("Physical Tile Type Capacities:\n"); + VTR_LOG("------------------------------\n"); + VTR_LOG("name\t"); + for (t_model* model : all_models) { + VTR_LOG("%s\t", model->name); + } + VTR_LOG("\n"); + for (const t_physical_tile_type& tile_type : physical_tile_types) { + const PrimitiveVector& capacity = physical_tile_type_capacities[tile_type.index]; + VTR_LOG("%s\t", tile_type.name.c_str()); + for (t_model* model : all_models) { + VTR_LOG("%.2f\t", capacity.get_dim_val(model->index)); + } + VTR_LOG("\n"); + } + VTR_LOG("\n"); +} + +/** + * @brief Helper method to get the direct neighbors of the given bin. + * + * A direct neighbor of a bin is a bin which shares a side with the given bin on + * the tile graph. Corners do not count. + */ +static std::unordered_set get_direct_neighbors_of_bin( + LegalizerBinId bin_id, + const vtr::vector_map& bins, + const vtr::NdMatrix tile_bin) { + const LegalizerBin& bin = bins[bin_id]; + int bl_x = bin.bounding_box.bottom_left().x(); + int bl_y = bin.bounding_box.bottom_left().y(); + size_t bin_width = bin.bounding_box.width(); + size_t bin_height = bin.bounding_box.height(); + // This is an unfortunate consequence of using double precision to store + // the bounding box. We need to ensure that the bin represents a tile (not + // part of a tile). If it did represent part of a tile, this algorithm + // would need to change. + VTR_ASSERT_DEBUG(static_cast(bl_x) == bin.bounding_box.bottom_left().x() && + static_cast(bl_y) == bin.bounding_box.bottom_left().y() && + static_cast(bin_width) == bin.bounding_box.width() && + static_cast(bin_height) == bin.bounding_box.height()); + + // Add the neighbors. + std::unordered_set neighbor_bin_ids; + // Add unique tiles on left and right sides + for (size_t ty = bl_y; ty < bl_y + bin_height; ty++) { + if (bl_x >= 1) + neighbor_bin_ids.insert(tile_bin[bl_x - 1][ty]); + if (bl_x <= (int)(tile_bin.dim_size(0) - bin_width - 1)) + neighbor_bin_ids.insert(tile_bin[bl_x + bin_width][ty]); + } + // Add unique tiles on the top and bottom + for (size_t tx = bl_x; tx < bl_x + bin_width; tx++) { + if (bl_y >= 1) + neighbor_bin_ids.insert(tile_bin[tx][bl_y - 1]); + if (bl_y <= (int)(tile_bin.dim_size(1) - bin_height - 1)) + neighbor_bin_ids.insert(tile_bin[tx][bl_y + bin_height]); + } + + // A bin cannot be a neighbor with itself. + VTR_ASSERT_DEBUG(neighbor_bin_ids.count(bin_id) == 0); + + return neighbor_bin_ids; +} + +/** + * @brief Get the center point of a rect + */ +static inline vtr::Point get_center_of_rect(vtr::Rect rect) { + return rect.bottom_left() + vtr::Point(rect.width() / 2.0, rect.height() / 2.0); +} + +void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, size_t num_models) { + // Make sure that this bin does not already have neighbors. + VTR_ASSERT_DEBUG(bins_[src_bin_id].neighbors.size() == 0); + + // Bins need to be neighbors to every possible molecule type so things can + // flow properly. + // Perform BFS to find the closest bins of each type. Where closest is in + // manhattan distance. + + // Create the queue and insert the source bin into it. + std::queue q; + q.push(src_bin_id); + // Create visited flags for each bin. Set the source to visited. + vtr::vector_map bin_visited(bins_.size(), false); + bin_visited[src_bin_id] = true; + // Flags to check if a specific model has been found in the given direction. + // In this case, direction is the direction of the largest component of the + // manhattan distance between the source bin and the target bin. + std::vector up_found(num_models, false); + std::vector down_found(num_models, false); + std::vector left_found(num_models, false); + std::vector right_found(num_models, false); + // Flags to check if all models have been found in a given direction. + bool all_up_found = false; + bool all_down_found = false; + bool all_left_found = false; + bool all_right_found = false; + bool all_models_found_in_all_directions = false; + // The center of the source bin. + vtr::Point src_bin_center = get_center_of_rect(bins_[src_bin_id].bounding_box); + // The result will be stored in this set. + std::unordered_set neighbors; + + // Helper method to add a neighbor to the set of neighbors and update the + // found flags for a given direction if this bin is new for a given model + // type. This method returns true if every model has been found in the given + // direction (i.e. dir_found is now all true). + auto add_neighbor_if_new_dir = [&](LegalizerBinId target_bin_id, + std::vector& dir_found) { + bool all_found = true; + // Go through all possible models + for (size_t i = 0; i < num_models; i++) { + // If this model has been found in this direction, continue. + if (dir_found[i]) + continue; + // If this bin has this model in its capacity, we found a neighbor! + if (bins_[target_bin_id].capacity.get_dim_val(i) > 0) { + dir_found[i] = true; + neighbors.insert(target_bin_id); + } else { + all_found = false; + } + } + return all_found; + }; + + // Perform the BFS from the source node until all nodes have been explored + // or all of the models have been found in all directions. + while(!q.empty() && !all_models_found_in_all_directions) { + // Pop the bin from the queue. + LegalizerBinId bin_id = q.front(); + q.pop(); + // Get the direct neighbors of the bin (neighbors that are directly + // touching). + auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_); + for (LegalizerBinId dir_neighbor_bin_id : direct_neighbors) { + // If this neighbor has been visited, do not do anything. + if (bin_visited[dir_neighbor_bin_id]) + continue; + // Get the signed distance from the src bin to the target bin in the + // x and y dimensions. + vtr::Point target_bin_center = get_center_of_rect(bins_[dir_neighbor_bin_id].bounding_box); + double dx = target_bin_center.x() - src_bin_center.x(); + double dy = target_bin_center.y() - src_bin_center.y(); + // Is the target bin above the source bin? + if (!all_up_found && dy >= std::abs(dx)) { + all_up_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, up_found); + } + // Is the target bin below the source bin? + if (!all_down_found && dy <= -std::abs(dx)) { + all_down_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, down_found); + } + // Is the target bin to the right of the source bin? + if (!all_right_found && dx >= std::abs(dy)) { + all_right_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, right_found); + } + // Is the target bin to the left of the source bin? + if (!all_left_found && dx <= -std::abs(dy)) { + all_left_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, left_found); + } + // Mark this bin as visited and push it onto the queue. + bin_visited[dir_neighbor_bin_id] = true; + // FIXME: This may be inneficient since it will do an entire BFS of + // the grid if a neighbor of a given type does not exist in + // a specific direction. Should add a check to see if it is + // worth pushing this bin into the queue. + q.push(dir_neighbor_bin_id); + } + // Check if all of the models have been found in all directions. + all_models_found_in_all_directions = all_up_found && all_down_found && + all_left_found && all_right_found; + } + + // Assign the results into the neighbors of the bin. + bins_[src_bin_id].neighbors.assign(neighbors.begin(), neighbors.end()); +} + +FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist) + : PartialLegalizer(netlist), + // TODO: Pass the device grid in. + tile_bin_({g_vpr_ctx.device().grid.width(), g_vpr_ctx.device().grid.height()}) { + const DeviceGrid& grid = g_vpr_ctx.device().grid; + size_t grid_width = grid.width(); + size_t grid_height = grid.height(); + + // Pre-compute the capacities of all logical blocks in the device. + // logical_block_type::index -> PrimitiveVector + std::vector logical_block_type_capacities(g_vpr_ctx.device().logical_block_types.size()); + for (const t_logical_block_type& logical_block_type : g_vpr_ctx.device().logical_block_types) { + logical_block_type_capacities[logical_block_type.index] = get_primitive_capacity(logical_block_type); + } + // Pre-compute the capacities of all physical tile types in the device. + // physical_tile_type::index -> PrimitiveVector + std::vector physical_tile_type_capacities(g_vpr_ctx.device().physical_tile_types.size()); + for (const t_physical_tile_type& physical_tile_type : g_vpr_ctx.device().physical_tile_types) { + physical_tile_type_capacities[physical_tile_type.index] = get_primitive_capacity(physical_tile_type, logical_block_type_capacities); + } + // Print these capacities. Helpful for debugging. + if (log_verbosity_ > 1) { + print_capacities(logical_block_type_capacities, + physical_tile_type_capacities, + g_vpr_ctx.device().logical_block_types, + g_vpr_ctx.device().physical_tile_types); + } + // Create the bins + // This currently creates 1 bin per tile. + for (size_t x = 0; x < grid_width; x++) { + for (size_t y = 0; y < grid_height; y++) { + // Ignoring 3D placement for now. + t_physical_tile_loc tile_loc = {(int)x, (int)y, 0}; + // Is this the root location? Only create bins for roots. + size_t width_offset = grid.get_width_offset(tile_loc); + size_t height_offset = grid.get_height_offset(tile_loc); + if (width_offset != 0 || height_offset != 0) { + // If this is not a root, point the tile_bin_ lookup to the root + // tile location. + tile_bin_[x][y] = tile_bin_[x - width_offset][y - height_offset]; + continue; + } + // Create the bin + LegalizerBinId new_bin_id = LegalizerBinId(bins_.size()); + LegalizerBin new_bin; + // NOTE: The bounding box from the tile does not make sense in this + // context, making my own here based on the tile size and + // position. + t_physical_tile_type_ptr tile_type = grid.get_physical_type(tile_loc); + int width = tile_type->width; + int height = tile_type->height; + new_bin.bounding_box = vtr::Rect(vtr::Point(x, y), + vtr::Point(x + width, + y + height)); + // The capacity of the bin is the capacity of the tile it represents. + new_bin.capacity = physical_tile_type_capacities[tile_type->index]; + bins_.push_back(std::move(new_bin)); + tile_bin_[x][y] = new_bin_id; + } + } + // Get the number of models in the device. + size_t num_models = get_num_models(); + // Connect the bins. + // TODO: Should create a list of bin IDs to make this more efficient. + for (size_t x = 0; x < grid_width; x++) { + for (size_t y = 0; y < grid_height; y++) { + // Ignoring 3D placement for now. Will likely require modification to + // the solver and legalizer. + t_physical_tile_loc tile_loc = {(int)x, (int)y, 0}; + // Is this the root location? + if (grid.get_width_offset(tile_loc) != 0 || + grid.get_height_offset(tile_loc) != 0) { + continue; + } + // Compute the neighbors of this bin. + compute_neighbors_of_bin(tile_bin_[x][y], num_models); + } + } + // Pre-compute the masses of the APBlocks + for (APBlockId blk_id : netlist.blocks()) { + block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist)); + } + // Initialize the block_bins. + block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID()); +} + +bool FlowBasedLegalizer::verify_bins() const { + // Make sure that every block has a bin. + for (APBlockId blk_id : netlist_.blocks()) { + if (!block_bins_[blk_id].is_valid()) { + VTR_LOG("Bin Verify: Found a block that is not in a bin.\n"); + return false; + } + } + // Make sure that every tile has a bin. + const DeviceGrid& device_grid = g_vpr_ctx.device().grid; + if (tile_bin_.dim_size(0) != device_grid.width() || + tile_bin_.dim_size(1) != device_grid.height()) { + VTR_LOG("Bin Verify: Tile-bin lookup does not contain every tile.\n"); + return false; + } + for (size_t x = 0; x < device_grid.width(); x++) { + for (size_t y = 0; y < device_grid.height(); y++) { + if (!tile_bin_[x][y].is_valid()) { + VTR_LOG("Bin Verify: Found a tile with no bin.\n"); + return false; + } + } + } + // Make sure that every bin has the correct utilization, supply, and demand. + for (const LegalizerBin& bin : bins_) { + PrimitiveVector calc_utilization; + for (APBlockId blk_id : bin.contained_blocks) { + calc_utilization += block_masses_[blk_id]; + } + if (bin.utilization != calc_utilization) { + VTR_LOG("Bin Verify: Found a bin with incorrect utilization.\n"); + return false; + } + PrimitiveVector calc_supply = bin.utilization - bin.capacity; + calc_supply.relu(); + if (bin.supply != calc_supply) { + VTR_LOG("Bin Verify: Found a bin with incorrect supply.\n"); + return false; + } + PrimitiveVector calc_demand = bin.capacity - bin.utilization; + calc_demand.relu(); + if (bin.demand != calc_demand) { + VTR_LOG("Bin Verify: Found a bin with incorrect demand.\n"); + return false; + } + if (!bin.supply.is_non_negative()) { + VTR_LOG("Bin Verify: Found a bin with a negative supply.\n"); + return false; + } + if (!bin.demand.is_non_negative()) { + VTR_LOG("Bin Verify: Found a bin with a negative demand.\n"); + return false; + } + if (!bin.capacity.is_non_negative()) { + VTR_LOG("Bin Verify: Found a bin with a negative capacity.\n"); + return false; + } + if (!bin.utilization.is_non_negative()) { + VTR_LOG("Bin Verify: Found a bin with a negative utilization.\n"); + return false; + } + if (bin.neighbors.size() == 0) { + VTR_LOG("Bin Verify: Found a bin with no neighbors.\n"); + return false; + } + } + // Make sure all overfilled bins are actually overfilled. + // TODO: Need to make sure that all non-overfilled bins are actually not + // overfilled. + for (LegalizerBinId bin_id : overfilled_bins_) { + const LegalizerBin& bin = bins_[bin_id]; + if (bin.supply.is_zero()) { + VTR_LOG("Bin Verify: Found an overfilled bin that was not overfilled.\n"); + return false; + } + } + // If all above passed, then the bins are valid. + return true; +} + +void FlowBasedLegalizer::reset_bins() { + // Reset all of the bins by removing all of the contained blocks. + for (LegalizerBin& bin : bins_) { + bin.contained_blocks.clear(); + bin.utilization = PrimitiveVector(); + bin.compute_supply(); + bin.compute_demand(); + } + // Reset the reverse lookup of block_bins_ + std::fill(block_bins_.begin(), block_bins_.end(), LegalizerBinId::INVALID()); + // No bin can be overfilled right now. + overfilled_bins_.clear(); +} + +void FlowBasedLegalizer::import_placement_into_bins(const PartialPlacement& p_placement) { + // TODO: Maybe import the fixed block locations in the constructor and + // then only import the moveable block locations. + for (APBlockId blk_id : netlist_.blocks()) { + size_t x_loc = p_placement.block_x_locs[blk_id]; + size_t y_loc = p_placement.block_y_locs[blk_id]; + LegalizerBinId bin_id = get_bin(x_loc, y_loc); + insert_blk_into_bin(blk_id, bin_id); + } +} + +/** + * @brief Get the location of a block assuming that it is placed within the + * given bin. + * + * This function will return the position of the block in the point within the + * bin's bounding box which is closest to the original position of the block + * (the position in p_placement). + */ +static inline vtr::Point get_block_location_in_bin(APBlockId blk_id, + const LegalizerBin& bin, + const PartialPlacement& p_placement) { + // A block cannot be placed on the right or top sides of the bounding box + // of a bin; however they can be infinitely close to these sides. It is + // arbitrary how close to the edge we place the blocks; opted to place them + // as close as possible. + double epsilon = 0.0001; + double x = std::clamp(p_placement.block_x_locs[blk_id], + bin.bounding_box.bottom_left().x(), + bin.bounding_box.top_right().x() - epsilon); + double y = std::clamp(p_placement.block_y_locs[blk_id], + bin.bounding_box.bottom_left().y(), + bin.bounding_box.top_right().y() - epsilon); + return vtr::Point(x, y); +} + +void FlowBasedLegalizer::export_placement_from_bins(PartialPlacement& p_placement) const { + // Updates the partial placement with the location of the blocks in the bin + // by moving the blocks to the point within the bin closest to where they + // were originally. + // TODO: This should be investigated more. This may put blocks onto the edges + // of bins which may not be ideal. + for (APBlockId blk_id : netlist_.blocks()) { + // Only the moveable block locations should be exported. + if (netlist_.block_mobility(blk_id) == APBlockMobility::FIXED) + continue; + // Project the coordinate of the block in the partial placement to the + // closest point in the bin. + LegalizerBinId bin_id = block_bins_[blk_id]; + VTR_ASSERT_DEBUG(bin_id.is_valid()); + const LegalizerBin& bin = bins_[bin_id]; + // Set the position of the block to the closest position in the bin to + // where the block was. + vtr::Point new_blk_pos = get_block_location_in_bin(blk_id, + bin, + p_placement); + p_placement.block_x_locs[blk_id] = new_blk_pos.x(); + p_placement.block_y_locs[blk_id] = new_blk_pos.y(); + } +} + +// Helper method to compute the phi term in the durav algorithm. +static inline float computeMaxMovement(size_t iter) { + return 100 * (iter + 1) * (iter + 1); +} + +/** + * @brief Find the minimum cost moveable block in the src_bin which is + * compatible with the target bin. + * + * Cost is the quadratic movement (distance squared) of the block from its + * original position to the position it would be if it were moved into the bin. + * + * @param src_bin The bin that contains the blocks to move. + * @param target_bin The bin to move blocks to. + * @param block_masses A lookup for the masses of all blocks. + * @param p_placement The placement of the blocks prior to legalization. + * @param netlist The APNetlist for the placement. + * + * @return A pair of the minimum cost moveable block and its cost. + */ +static inline std::pair get_min_cost_block_in_bin( + const LegalizerBin& src_bin, + const LegalizerBin& target_bin, + const vtr::vector_map& block_masses, + const PartialPlacement& p_placement, + const APNetlist& netlist) { + // Get the min cost block and its cost. + APBlockId min_cost_block; + float min_cost = std::numeric_limits::infinity(); + // FIXME: If these were somehow pre-sorted, this can be made much cheaper. + for (APBlockId blk_id : src_bin.contained_blocks) { + // If this block is fixed, it has infinite cost to move. + if (netlist.block_mobility(blk_id) == APBlockMobility::FIXED) + continue; + const PrimitiveVector& block_mass = block_masses[blk_id]; + // Is this block compatible with the target bin? + // If the capacity of the target, projected onto the mass, is less than + // the mass, then the block is not compatible. + // TODO: We may want to add a cost term based on how much space is + // available in the bin? + PrimitiveVector target_capacity = target_bin.capacity; + target_capacity.project(block_mass); + if (target_capacity < block_mass) + continue; + // Compute the quadratic movement (aka cost). + vtr::Point new_block_pos = get_block_location_in_bin(blk_id, + target_bin, + p_placement); + double dx = new_block_pos.x() - p_placement.block_x_locs[blk_id]; + double dy = new_block_pos.y() - p_placement.block_y_locs[blk_id]; + float cost = (dx * dx) + (dy * dy); + // If this movement is the least we have seen, this is the min cost. + // FIXME: We could add a cost weight to the block based on things such + // as timing. So critical blocks are less likely to move. + if (cost < min_cost) { + min_cost = cost; + min_cost_block = blk_id; + } + } + + return std::make_pair(min_cost_block, min_cost); +} + +/** + * @brief Compute the cost of moving a block from the source bin into the + * target bin if a compatible block can be found. + * + * @param src_bin The bin that has blocks to be moved. + * @param target_bin The bin to move the blocks into. + * @param psi Algorithm parameter which represents the maximum + * cost this function can return. This function will + * return inf if the cost is larger than psi. + * @param block_masses A lookup for the masses of all blocks. + * @param p_placement The placement of the blocks prior to legalization. + * @param netlist The APNetlist for the placement. + */ +static inline float compute_cost(const LegalizerBin& src_bin, + const LegalizerBin& target_bin, + float psi, + const vtr::vector_map& block_masses, + const PartialPlacement& p_placement, + const APNetlist& netlist) { + // If the src bin is empty, then there is nothing to move. + if (src_bin.contained_blocks.size() == 0) + return std::numeric_limits::infinity(); + // Get the min cost block in the src bin which is compatible with the target + // bin. + APBlockId min_cost_block; + float min_cost; + std::tie(min_cost_block, min_cost) = get_min_cost_block_in_bin(src_bin, + target_bin, + block_masses, + p_placement, + netlist); + // If no block can be moved to the target bin, return. + if (std::isinf(min_cost)) + return std::numeric_limits::infinity(); + // If the quadratic movement is larger than psi, return infinity. + if (min_cost >= psi) + return std::numeric_limits::infinity(); + // Compute the weight, which is proportional to the number of blocks of the + // same type as the min_cost block in the src bin. + // This weight tries to keep blocks of the same type together. + // This term can be found by taking the L1 norm of the projection of the + // src bin's utilization on the direction of the mass. + PrimitiveVector weight_vec = src_bin.utilization; + weight_vec.project(block_masses[min_cost_block]); + float weight = weight_vec.manhattan_norm(); + // Return the overall cost which is the quadratic movement times the weight. + return weight * min_cost; +} + +std::vector> FlowBasedLegalizer::get_paths(LegalizerBinId src_bin_id, + const PartialPlacement& p_placement, + float psi) { + VTR_LOGV(log_verbosity_ >= 20, "\tGetting paths...\n"); + // Create a visited vector. + vtr::vector_map bin_visited(bins_.size(), false); + bin_visited[src_bin_id] = true; + // Create a cost array. The cost of a path is equal to the cost of its tail + // bin. + vtr::vector_map bin_cost(bins_.size(), 0.f); + // Create a starting path. + std::vector starting_path; + starting_path.push_back(src_bin_id); + // Create a FIFO queue. + std::queue> queue; + queue.push(std::move(starting_path)); + // Create the resulting vector of paths. + // TODO: Can we store this more efficiently as a tree? + std::vector> paths; + // Perform the BFS to search for direct paths to flow the starting bin's + // supply of primitives until it has found sufficient demand. + PrimitiveVector demand; + const PrimitiveVector& starting_bin_supply = bins_[src_bin_id].supply; + while (!queue.empty() && demand < starting_bin_supply) { + // Pop the current bin off the queue. + std::vector &p = queue.front(); + LegalizerBinId tail_bin_id = p.back(); + // Look over its neighbors + for (LegalizerBinId neighbor_bin_id : bins_[tail_bin_id].neighbors) { + // If this bin has already been visited, skip it. + if (bin_visited[neighbor_bin_id]) + continue; + // Compute the cost of moving a block from the tail bin to its + // neighbor. + float cost = compute_cost(bins_[tail_bin_id], + bins_[neighbor_bin_id], + psi, + block_masses_, + p_placement, + netlist_); + // If the cost is infinite, then the path cannot be made to this + // neighbor bin. + if (std::isinf(cost)) + continue; + // Else, a path can be made. + std::vector p_copy(p); + bin_cost[neighbor_bin_id] = bin_cost[tail_bin_id] + cost; + p_copy.push_back(neighbor_bin_id); + bin_visited[neighbor_bin_id] = true; + // Project the demand of the neighbor onto the starting supply to + // get how much of the supply this bin can support. If this + // projection is non-zero, this means that we can move blocks into + // this bin as a target. If not, we can flow through it. + // NOTE: This is different from Darav et al. Their original algorithm + // only terminated paths at empty bins. This caused the algorithm + // to never converge if all bins had 1 block in them. However + // this may impact performance since it stops as soon as it + // finds an open bin which may limit the flow. It also + // prevents the flow. This is something that needs to be + // investigated further... + // FIXME: Perhaps we do not check if it is empty, but check if the + // demand is sufficiently large... + PrimitiveVector neighbor_demand = bins_[neighbor_bin_id].demand; + neighbor_demand.project(starting_bin_supply); + VTR_ASSERT_DEBUG(neighbor_demand.is_non_negative()); + // if (bins_[neighbor_bin_id].contained_blocks.size() == 0) { + if (neighbor_demand.is_non_zero()) { + // Add this to the resulting paths. + paths.push_back(std::move(p_copy)); + // Accumulate the demand. + demand += neighbor_demand; + } else { + // Add this path to the queue. + queue.push(std::move(p_copy)); + } + } + // Pop the path from the queue. This pop is delayed to prevent copying + // the path unnecessarily. This is allowed since this is a FIFO queue. + queue.pop(); + } + + // Helpful debug messages. + VTR_LOGV(log_verbosity_ >= 20, "\t\tSupply of source bin: %.2f\n", + starting_bin_supply.manhattan_norm()); + VTR_LOGV(log_verbosity_ >= 20, "\t\tDemand of all paths from source: %.2f\n", + starting_bin_supply.manhattan_norm()); + + // Sort the paths in increasing order of cost. + std::sort(paths.begin(), paths.end(), [&](const std::vector& a, + const std::vector& b) { + return bin_cost[a.back()] < bin_cost[b.back()]; + }); + + return paths; +} + +void FlowBasedLegalizer::flow_blocks_along_path(const std::vector& path, + const PartialPlacement& p_placement, + float psi) { + // Get the root bin of the path. + VTR_ASSERT(!path.empty()); + LegalizerBinId src_bin_id = path[0]; + // Create a stack and put the src bin on top. + std::stack s; + s.push(src_bin_id); + // Insert the bins in the path into the stack in reverse order (so the last + // bin in the path is on top of the stack). + size_t path_size = path.size(); + for (size_t path_idx = 1; path_idx < path_size; path_idx++) { + LegalizerBinId sink_bin_id = path[path_idx]; + // Check that the cost of moving a block from the source bin to the sink + // bin is non-infinite. According to the paper, this check is needed + // since a previous flow on another path may have made this path not + // necessary anymore. + float cost = compute_cost(bins_[src_bin_id], bins_[sink_bin_id], psi, + block_masses_, p_placement, netlist_); + if (std::isinf(cost)) + return; + src_bin_id = sink_bin_id; + s.push(sink_bin_id); + } + // Congo line the blocks along the path, starting from the tail and moving + // forward. + LegalizerBinId sink_bin_id = s.top(); + s.pop(); + while (!s.empty()) { + src_bin_id = s.top(); + s.pop(); + // Minor change to the algorithm proposed by Darav et al., find the + // closest point in src to sink and move it to sink (instead of sorting + // the whole list which is wasteful). + // TODO: Verify this. This is not the same as what was in the original + // algorithm. + std::pair p = get_min_cost_block_in_bin(bins_[src_bin_id], + bins_[sink_bin_id], + block_masses_, + p_placement, + netlist_); + // Move the block from the src bin to the sink bin. + remove_blk_from_bin(p.first, src_bin_id); + insert_blk_into_bin(p.first, sink_bin_id); + + sink_bin_id = src_bin_id; + } +} + +/** + * @brief Prints the header of the per-iteration status of the flow-based + * legalizer. + */ +static void print_flow_based_legalizer_status_header() { + VTR_LOG("---- ----- ------- ---------\n"); + VTR_LOG("Iter Num Largest Psi\n"); + VTR_LOG(" Overf Bin \n"); + VTR_LOG(" Bins Supply \n"); + VTR_LOG("---- ----- ------- ---------\n"); +} + +/** + * @brief Print the current status of the flow-based legalizer (per-iteration). + */ +static void print_flow_based_legalizer_status(size_t iteration, + size_t num_overfilled_bins, + float largest_overfilled_bin_supply, + float psi) { + // Iteration + VTR_LOG("%4zu", iteration); + + // Num overfilled bins + VTR_LOG(" %5zu", num_overfilled_bins); + + // Largest overfilled bin supply + VTR_LOG(" %7.1f", largest_overfilled_bin_supply); + + // Psi + VTR_LOG(" %9.3e", psi); + + VTR_LOG("\n"); + + fflush(stdout); +} + +/** + * @brief Debug method to print the current number of blocks contained in each + * bin visually. + * + * This method helps to see how the spreading is working. + */ +static void print_flow_based_bin_grid(const vtr::NdMatrix& tile_bin, + const vtr::vector_map& bins) { + for (size_t y = 0; y < tile_bin.dim_size(1); y++) { + for (size_t x = 0; x < tile_bin.dim_size(0); x++) { + const LegalizerBin& bin = bins[tile_bin[x][y]]; + VTR_LOG("%3zu ", bin.contained_blocks.size()); + } + VTR_LOG("\n"); + } + VTR_LOG("\n"); +} + +void FlowBasedLegalizer::legalize(PartialPlacement &p_placement) { + VTR_LOGV(log_verbosity_ >= 10, "Running Flow-Based Legalizer\n"); + + // Reset the bins from the previous iteration and prepare for this iteration. + reset_bins(); + // Import the partial placement into bins. + import_placement_into_bins(p_placement); + // Verify that the placement was imported correctly. + VTR_ASSERT_SAFE(verify_bins()); + + // Print the number of blocks in each bin visually before spreading. + if (log_verbosity_ >= 15) { + VTR_LOG("Bin utilization prior to spreading:\n"); + print_flow_based_bin_grid(tile_bin_, bins_); + } + + // Print the status header to make printing the status clearer. + if (log_verbosity_ >= 10) { + print_flow_based_legalizer_status_header(); + } + + // Run the flow-based spreader. + size_t flowBasedIter = 0; + while (true) { + // If we hit the maximum number of iterations, break. + if (flowBasedIter >= max_num_iterations_) { + VTR_LOGV(log_verbosity_ >= 10, + "Flow-Based legalizer hit max iteration limit.\n"); + break; + } + // If there are no overfilled bins, no more work to do. + if (overfilled_bins_.empty()) { + VTR_LOGV(log_verbosity_ >= 10, + "Flow-Based legalizer has no overfilled tiles. No further spreading needed.\n"); + break; + } + // Compute the max movement. + double psi = computeMaxMovement(flowBasedIter); + // Get the overfilled bins and sort them in increasing order of supply. + // We take the manhattan (L1) norm here since we only care about the total + // amount of overfill in each dimension. For example, a bin that has a + // supply of <1, 1> is just as overfilled as a bin of supply <0, 2>. + // The standard L2 norm would give more weigth to <0, 2>. + // NOTE: Although the supply should always be non-negative, we still + // take the absolute value in the norm for completeness. + // TODO: This is a guess. Should investigate other norms. + std::vector overfilled_bins_vec(overfilled_bins_.begin(), overfilled_bins_.end()); + std::sort(overfilled_bins_vec.begin(), overfilled_bins_vec.end(), [&](LegalizerBinId a, LegalizerBinId b) { + return bins_[a].supply.manhattan_norm() < bins_[b].supply.manhattan_norm(); + }); + // Get the paths to flow blocks from the overfilled bins to the under + // filled bins and flow the blocks. + for (LegalizerBinId src_bin_id : overfilled_bins_vec) { + // Get the list of candidate paths based on psi. A path is a list + // of LegalizerBins traversed. + // NOTE: The paths are sorted by increasing cost within the + // getPaths method. + std::vector> paths = get_paths(src_bin_id, + p_placement, + psi); + + VTR_LOGV(log_verbosity_ >= 20, "\tNum paths: %zu\n", paths.size()); + // For each path, flow the blocks along the path. + for (const std::vector& path : paths) { + VTR_LOGV(log_verbosity_ >= 30, "\t\tPath length: %zu\n", path.size()); + // If the bin is no longer overfilled, no need to move any more + // blocks along the paths. + if (!bin_is_overfilled(src_bin_id)) + break; + // Move blocks over the paths. + // NOTE: This will only modify the bins. (actual block + // positions will not change (yet)). + flow_blocks_along_path(path, p_placement, psi); + } + } + + // Print status of the flow based legalizer for debugging. + if (log_verbosity_ >= 10) { + // TODO: Get the total cell displacement for debugging. + print_flow_based_legalizer_status( + flowBasedIter, + overfilled_bins_vec.size(), + bins_[overfilled_bins_vec.back()].supply.manhattan_norm(), + psi); + } + + // Increment the iteration. + flowBasedIter++; + } + VTR_LOGV(log_verbosity_ >= 10, + "Flow-Based Legalizer finished in %zu iterations.\n", + flowBasedIter + 1); + + // Verify that the bins are valid before export. + VTR_ASSERT(verify_bins()); + + // Print the number of blocks in each bin after spreading. + if (log_verbosity_ >= 15) { + VTR_LOG("Bin utilization after spreading:\n"); + print_flow_based_bin_grid(tile_bin_, bins_); + } + + // Export the legalized placement to the partial placement. + export_placement_from_bins(p_placement); +} + diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h new file mode 100644 index 00000000000..09a1ea0e9a3 --- /dev/null +++ b/vpr/src/analytical_place/partial_legalizer.h @@ -0,0 +1,401 @@ +/** + * @file + * @author Alex Singer and Robert Luo + * @date October 2024 + * @brief The declarations of the Partial Legalizer base class which is used + * to define the functionality of all partial legalizers in the AP + * flow. + * + * Partial Legalizers are parts of the flow which take in an illegal Partial + * Placemenent and produce a more legal Partial Placement (according to + * constraints of the architecture). + */ + +#pragma once + +#include +#include +#include +#include "ap_netlist_fwd.h" +#include "primitive_vector.h" +#include "vtr_assert.h" +#include "vtr_geometry.h" +#include "vtr_ndmatrix.h" +#include "vtr_strong_id.h" +#include "vtr_vector_map.h" + +// Forward declarations +class APNetlist; +struct PartialPlacement; + +/** + * @brief Enumeration of all of the partial legalizers currently implemented in + * VPR. + */ +enum class e_partial_legalizer { + FLOW_BASED // Multi-commodity flow-based partial legalizer. +}; + +/** + * @brief The Partial Legalizer base class + * + * This provied functionality that all Partial Legalizers will use. + * + * It provides a standard interface that all Partial Legalizers must implement + * so thet can be used interchangably. This makes it very easy to test and + * compare different solvers. + */ +class PartialLegalizer { +public: + virtual ~PartialLegalizer() {} + + /** + * @brief Constructor of the base PartialLegalizer class + * + * Currently just copies the parameters into the class as member varaibles. + */ + PartialLegalizer(const APNetlist& netlist, int log_verbosity = 1) + : netlist_(netlist), + log_verbosity_(log_verbosity) {} + + /** + * @brief Partially legalize the given partial placement. + * + * This method will take in the Partial Placement as input and write a + * more legal solution into this same object. Here we define legal as it + * pertains to the constraints of the device. + * + * This class expects to receive a valid Partial Placement as input and will + * generate a valid Partial Placement. + * + * @param p_placement The placement to legalize. Will be filled with the + * legalized placement. + */ + virtual void legalize(PartialPlacement &p_placement) = 0; + +protected: + + /// @brief The APNetlist the legalizer will be legalizing the placement of. + /// It is implied that the netlist is not being modified during + /// global placement. + const APNetlist& netlist_; + + /// @brief The verbosity of the log statements within the partial legalizer. + /// 0 would be no log messages, 10 would print per-iteration status, + /// 20 would print logs messages within each iteration. + int log_verbosity_; +}; + +/** + * @brief A factory method which creates a Partial Legalizer of the given type. + */ +std::unique_ptr make_partial_legalizer(e_partial_legalizer legalizer_type, + const APNetlist& netlist); + +/** + * @brief A strong ID for the bins used in the partial legalizer. + * + * This allows a separation between the legalizers and tiles such that a bin may + * represent multiple tiles. + */ +struct legalizer_bin_tag {}; +typedef vtr::StrongId LegalizerBinId; + +/** + * @brief A bin used to contain blocks in the partial legalizer. + * + * Bins can be thought of as generalized tiles which have a capacity of blocks + * (and their types) and a current utilization of the bin. A bin may represent + * multiple tiles. + * + * The capacity, utilization, supply, and demand of the bin are stored as + * M-dimensional vectors; where M is the number of models (primitives) in the + * device. This allows the bin to quickly know how much of each types of + * primitives it can contain and how much of each type it currently contains. + */ +struct LegalizerBin { + /// @brief The blocks currently contained in this bin. + std::unordered_set contained_blocks; + + /// @brief The maximum mass of each primitive type this bin can contain. + PrimitiveVector capacity; + + /// @brief The current mass of each primitive type this bin contains. + PrimitiveVector utilization; + + /// @brief The current over-utilization of the bin. This is defined as: + /// elementwise_max(utilization - capacity, 0) + PrimitiveVector supply; + + /// @brief The current under-utilization of the bin. This is defined as: + /// elementwise_max(capacity - utilization, 0) + PrimitiveVector demand; + + /// @brief The bounding box of the bin on the device grid. This is the + /// positions on the grid the blocks will exist. + /// + /// For example, if the tile at location (2,3) was turned directly into a + /// bin, the bounding box of that bin would be [(2.0, 3.0), (3.0, 4.0)) + /// Notice the notation here. The left and bottom edges are included in the + /// set. + /// It is implied that blocks cannot be placed on the right or top edges of + /// the bounding box (since then they may be in another bin!). + /// + /// NOTE: This uses a double to match the precision of the positions of + /// APBlocks (which are doubles). The use of a double here also allows + /// bins to represent partial tiles which may be useful. + vtr::Rect bounding_box; + + /// @brief The neighbors of this bin. These are neighboring bins that this + /// bin can flow blocks to. + std::vector neighbors; + + /** + * @brief Helper method to compute the supply of the bin. + */ + void compute_supply() { + supply = utilization - capacity; + supply.relu(); + VTR_ASSERT_DEBUG(supply.is_non_negative()); + } + + /** + * @brief Helper method to compute the demand of the bin. + */ + void compute_demand() { + demand = capacity - utilization; + demand.relu(); + VTR_ASSERT_DEBUG(demand.is_non_negative()); + } +}; + +/** + * @brief A multi-commodity flow-based spreading partial legalizer. + * + * This puts the current blocks into bins based on the given placement. It then + * finds paths from bins that are overfilled to bins that are underfilled. Then + * it flows blocks along these paths. Each iteration, the maximum distance that + * blocks can flow is increased. This tries to spread out blocks by moving them + * the smallest distance that it can. + * + * This technique is a modified version of the algorithm proposed by Darav et + * al. Their algorithm was tailored for their Microsemi FPGA. This code extends + * on their work by generalizing it to any theoretical architecture which can be + * expressed in VPR. + * https://doi.org/10.1145/3289602.3293896 + * + * + * TODO: Make the bin size a parameter for the legalizer somehow. That way we + * can make 1x1 bins for very accurate legalizers and larger (clamped) for + * less accurate legalizers. + */ +class FlowBasedLegalizer : public PartialLegalizer { +private: + /// @brief The maximum number of iterations the legalizer can take. This + /// prevents the legalizer from never converging if there is not + /// enough space to flow blocks. + static constexpr size_t max_num_iterations_ = 100; + + /// @brief A vector of all the bins in the legalizer. + vtr::vector_map bins_; + + /// @brief A reverse lookup between every block and the bin they are + /// currently in. + vtr::vector_map block_bins_; + + /// @brief The mass of each APBlock, represented as a primitive vector. + vtr::vector_map block_masses_; + + /// @brief A lookup that gets the bin that represents every tile (and + /// sub-tile). + vtr::NdMatrix tile_bin_; + + /// @brief A set of overfilled bins. Instead of computing this when needed, + /// this list is maintained whenever a block is moved from one bin to + /// another. + std::unordered_set overfilled_bins_; + + /** + * @brief Returns true if the given bin is overfilled. + */ + inline bool bin_is_overfilled(LegalizerBinId bin_id) const { + VTR_ASSERT_DEBUG(bin_id.is_valid()); + VTR_ASSERT_DEBUG(bins_[bin_id].supply.is_non_negative()); + // By definition, a bin is overfilled if its supply is non-zero. + return bins_[bin_id].supply.is_non_zero(); + } + + /** + * @brief Helper method to insert a block into a bin. + * + * This method maintains all the necessary state of the class and updates + * the bin the block is being inserted into. + * + * This method assumes that the given block is not currently in a bin. + */ + inline void insert_blk_into_bin(APBlockId blk_id, LegalizerBinId bin_id) { + VTR_ASSERT_DEBUG(blk_id.is_valid()); + VTR_ASSERT_DEBUG(bin_id.is_valid()); + // Make sure that this block is not anywhere else. + VTR_ASSERT(block_bins_[blk_id] == LegalizerBinId::INVALID()); + // Insert the block into the bin. + block_bins_[blk_id] = bin_id; + LegalizerBin& bin = bins_[bin_id]; + bin.contained_blocks.insert(blk_id); + // Update the utilization, supply, and demand. + const PrimitiveVector& blk_mass = block_masses_[blk_id]; + bin.utilization += blk_mass; + bin.compute_supply(); + bin.compute_demand(); + // Update the overfilled bins since this bin may have become overfilled. + if (bin_is_overfilled(bin_id)) + overfilled_bins_.insert(bin_id); + } + + /** + * @brief Helper method to remove a block from a bin. + * + * This method maintains all the necessary state of the class and updates + * the bin the block is being removed from. + * + * This method assumes that the given block is currently in the given bin. + */ + inline void remove_blk_from_bin(APBlockId blk_id, LegalizerBinId bin_id) { + VTR_ASSERT_DEBUG(blk_id.is_valid()); + VTR_ASSERT_DEBUG(bin_id.is_valid()); + // Make sure that this block is in this bin. + VTR_ASSERT(block_bins_[blk_id] == bin_id); + LegalizerBin& bin = bins_[bin_id]; + VTR_ASSERT_DEBUG(bin.contained_blocks.count(blk_id) == 1); + // Remove the block from the bin. + block_bins_[blk_id] = LegalizerBinId::INVALID(); + bin.contained_blocks.erase(blk_id); + // Update the utilization, supply, and demand. + const PrimitiveVector& blk_mass = block_masses_[blk_id]; + bin.utilization -= blk_mass; + bin.compute_supply(); + bin.compute_demand(); + // Update the overfilled bins since this bin may no longer be + // overfilled. + if (!bin_is_overfilled(bin_id)) + overfilled_bins_.erase(bin_id); + } + + /** + * @brief Helper method to get the bin at the current device x and y tile + * coordinate. + */ + inline LegalizerBinId get_bin(size_t x, size_t y) const { + VTR_ASSERT_DEBUG(x < tile_bin_.dim_size(0)); + VTR_ASSERT_DEBUG(y < tile_bin_.dim_size(1)); + return tile_bin_[x][y]; + } + + /** + * @brief Computes the neighbors of the given bin. + * + * This is different from the algorithm proposed by Darav et al. + * + * Each bin needs to be connected to every type of block. This is because, + * due to the placement being able to place blocks anywhere on the grid, it + * is possible that any type of block can be in any bin. If a bin has a + * block of a given type and no neighbor of the same type, the algorithm + * will never converge. + * + * It is also important that every bin have many different "directions" that + * it can flow blocks for each block type so it can legalize quickly. + * + * The original paper has a fixed architecture, so it builds the bin graph + * directly for their architecture. For VPR, a BFS is performed which finds + * bins in each of the four cardinal directions with the minimum manhattan + * distance for all of the different types of blocks. + * + * @param src_bin_id The bin to compute the neighbors for. + * @param num_models The number of models in the architecture. + */ + void compute_neighbors_of_bin(LegalizerBinId src_bin_id, size_t num_models); + + /** + * @brief Debugging method which verifies that all the bins are valid. + * + * The bins are valid if: + * - All blocks are in bins + * - Every tile is represented by a bin + * - Every bin has the correct utilization, supply, and demand + * - The overfilled bins are correct + */ + bool verify_bins() const; + + /** + * @brief Resets all of the bins from a previous call to partial legalize. + * + * This removes all of the blocks from the bins. + */ + void reset_bins(); + + /** + * @brief Import the given partial placement into bins. + * + * This is called at the beginning of legalize to prepare the bins with the + * current placement. + */ + void import_placement_into_bins(const PartialPlacement& p_placement); + + /** + * @brief Export the placement found from spreading the bins. + * + * This is called at the end of legalize to write back the result of the + * legalizer. + */ + void export_placement_from_bins(PartialPlacement& p_placement) const; + + /** + * @brief Gets paths to flow blocks from the src_bin_id at a maximum cost + * of psi. + * + * @param src_bin_id The bin that all paths will originate from. + * @param p_placement The placement being legalized (used for cost + * calculations). + * @param psi An algorithm parameter that increases over many + * iterations. The "max-cost" a path can be. + */ + std::vector> get_paths(LegalizerBinId src_bin_id, + const PartialPlacement& p_placement, + float psi); + + /** + * @brief Flows the blocks along the given path. + * + * The blocks do a conga line maneuver where blocks move towards the end + * of the path. + * + * @param path The path to flow blocks along. + * @param p_placement The placement being legalized (used for cost + * calculations). + * @param psi An algorithm parameter that increases over many + * iterations. The "max-cost" a path can be. + */ + void flow_blocks_along_path(const std::vector& path, + const PartialPlacement& p_placement, + float psi); + +public: + + /** + * @brief Construcotr for the flow-based legalizer. + * + * Builds all of the bins, computing their capacities based on the device + * description. Builds the connectivity of bins. Computes the mass of all + * blocks in the netlist. + */ + FlowBasedLegalizer(const APNetlist& netlist); + + /** + * @brief Performs flow-based spreading on the given partial placement. + * + * @param p_placement The placmeent to legalize. The result of the partial + * legalizer will be stored in this object. + */ + void legalize(PartialPlacement &p_placement) final; +}; + diff --git a/vpr/src/analytical_place/primitive_vector.h b/vpr/src/analytical_place/primitive_vector.h new file mode 100644 index 00000000000..1dd7c4d5a4b --- /dev/null +++ b/vpr/src/analytical_place/primitive_vector.h @@ -0,0 +1,272 @@ +/** + * @file + * @author Alex Singer + * @date October 2024 + * @brief The declaration of the PrimitiveVector object. + * + * This object is designed to store a sparse M-dimensional vector which can be + * efficiently operated upon. + */ + +#pragma once + +#include +#include + +/** + * @brief A sparse vector class to store an M-dimensional quantity of primitives + * in the context of a legalizer. + * + * This vector is used to represent the capacity of tiles for different + * primitives in a closed form which can be manipulated with math operations. + * + * This vector is also used to represent the "mass" of AP blocks in primitives, + * since an AP block may represent many primitives. + * + * This vector stores floats since it is expected that, due to some heuristics, + * the mass of a block may not be a whole number. + * + * This class contains useful operations to operate and compare different + * Primitive Vectors. + */ +class PrimitiveVector { +private: + /// @brief Storage container for the data of this primitive vector. + /// + /// This is stored as a map since it is assumed that the vector will be + /// quite sparse. This is designed to be a vector which has a dimension + /// for each t_model::index. + /// + /// TODO: Is there a more efficient way to store this sparse info? + /// Perhaps we can just waste the space and use a vector. + std::unordered_map data_; + +public: + /** + * @brief Add the value to the given dimension. + * + * This is a common enough feature to use its own setter. + */ + inline void add_val_to_dim(float val, size_t dim) { + if (data_.count(dim) == 0) + data_[dim] = 0.f; + data_[dim] += val; + } + + /** + * @brief Get the value at the given dimension. + */ + inline float get_dim_val(size_t dim) const { + const auto it = data_.find(dim); + // If there is no data in the dim, return 0. By default the vector is + // empty. + if (it == data_.end()) + return 0.f; + // If there is data at this dimension, return it. + return it->second; + } + + /** + * @brief Set the value at the given dimension. + */ + inline void set_dim_val(size_t dim, float val) { + data_[dim] = val; + } + + /** + * @brief Equality operator between two Primitive Vectors. + * + * Returns true if the dimensions of each vector are equal. + */ + inline bool operator==(const PrimitiveVector& rhs) const { + // Check if every dim in rhs matches this. + for (const auto& p : rhs.data_) { + if (get_dim_val(p.first) != p.second) + return false; + } + // If there is anything in this which is not in rhs, need to check. + for (const auto& p : data_) { + if (rhs.get_dim_val(p.first) != p.second) + return false; + } + return true; + } + + /** + * @brief Inequality operator between two Primitive Vectors. + */ + inline bool operator!=(const PrimitiveVector& rhs) const { + return !operator==(rhs); + } + + /** + * @brief Element-wise accumulation of rhs into this. + */ + inline PrimitiveVector& operator+=(const PrimitiveVector& rhs) { + for (const auto& p : rhs.data_) { + float dim_val = get_dim_val(p.first); + set_dim_val(p.first, dim_val + p.second); + } + return *this; + } + + /** + * @brief Element-wise de-accumulation of rhs into this. + */ + inline PrimitiveVector& operator-=(const PrimitiveVector& rhs) { + for (const auto& p : rhs.data_) { + float dim_val = get_dim_val(p.first); + set_dim_val(p.first, dim_val - p.second); + } + return *this; + } + + /** + * @brief Element-wise subtration of two Primitive Vectors. + */ + inline PrimitiveVector operator-(const PrimitiveVector& rhs) const { + PrimitiveVector res = *this; + res -= rhs; + return res; + } + + /** + * @brief Element-wise multiplication with a scalar. + */ + inline PrimitiveVector& operator*=(float rhs) { + for (auto& p : data_) { + p.second *= rhs; + } + return *this; + } + + /** + * @brief Returns true if any dimension of this vector is less than any + * dimension of rhs; false otherwise. + */ + inline bool operator<(const PrimitiveVector& rhs) const { + // Check for any element of this < rhs + for (const auto& p : data_) { + if (p.second < rhs.get_dim_val(p.first)) + return true; + } + // Check for any element of rhs > this. + // NOTE: This is required since there may be elements in rhs which are + // not in this. + // TODO: This is inneficient. + for (const auto& p : rhs.data_) { + if (p.second > get_dim_val(p.first)) + return true; + } + return false; + } + + /** + * @brief Clamps all dimension of this vector to non-negative values. + * + * If a dimension is negative, the dimension will become 0. If the dimension + * is positive, it will not change. + */ + inline void relu() { + for (auto& p : data_) { + // TODO: Should remove the zero elements from the map to improve + // efficiency. + if (p.second < 0.f) + p.second = 0.f; + } + } + + /** + * @brief Returns true if all dimensions of this vector are zero. + */ + inline bool is_zero() const { + // NOTE: This can be made cheaper by storing this information at + // creation and updating it if values are added or removed. + for (const auto& p : data_) { + if (p.second != 0.f) + return false; + } + return true; + } + + /** + * @brief Returns true if any dimension of this vector is non-zero. + */ + inline bool is_non_zero() const { + return !is_zero(); + } + + /** + * @brief Returns true if all dimensions of this vector are non-negative. + */ + inline bool is_non_negative() const { + for (const auto& p : data_) { + if (p.second < 0.f) + return false; + } + return true; + } + + /** + * @brief Computes the manhattan (L1) norm of this vector. + * + * This is the sum of the absolute value of all dimensions. + */ + inline float manhattan_norm() const { + // NOTE: This can be made much cheaper by storing the magnitude as part + // of the class and updating it whenever something is added or + // removed. + float mag = 0.f; + for (const auto& p : data_) { + mag += std::abs(p.second); + } + return mag; + } + + /** + * @brief Project this vector onto the given vector. + * + * This basically just means zero-ing all dimension which are zero in the + * given vector. The given vector does not need to be a unit vector. + * + * Example: Project <12, 32, 8, 2> onto <2, 0, 2, 0> = <12, 0, 8, 0> + */ + inline void project(const PrimitiveVector& dir) { + // For each dimension of this vector, if that dimension is zero in dir + // set the dimension to zero. + for (auto& p : data_) { + // TODO: Instead of zeroing the dimension, it should be removed + // from the map. + if (dir.get_dim_val(p.first) == 0.f) + p.second = 0.f; + } + } + + /** + * @brief Clear the sparse vector, which is equivalent to setting it to be + * the zero vector. + */ + inline void clear() { + data_.clear(); + } + + /** + * @brief Compute the elementwise max between two primitive vectors. + */ + static inline PrimitiveVector max(const PrimitiveVector& lhs, + const PrimitiveVector& rhs) { + PrimitiveVector res; + // For each key in rhs, get the max(lhs, rhs) + for (const auto& p : rhs.data_) { + res.set_dim_val(p.first, + std::max(lhs.get_dim_val(p.first), p.second)); + } + // For each key in lhs, get the max(lhs, rhs) + for (const auto& p : lhs.data_) { + res.set_dim_val(p.first, + std::max(p.second, rhs.get_dim_val(p.first))); + } + return res; + } +}; + diff --git a/vpr/test/test_ap_primitive_vector.cpp b/vpr/test/test_ap_primitive_vector.cpp new file mode 100644 index 00000000000..e1f114919bb --- /dev/null +++ b/vpr/test/test_ap_primitive_vector.cpp @@ -0,0 +1,316 @@ +/** + * @file + * @author Alex Singer + * @date October 2024 + * @brief Unit tests for the PrimitiveVector object + * + * Very quick functionality checks to make sure that the methods inside of the + * PrimitiveVector object are working as expected. + */ + +#include "catch2/catch_test_macros.hpp" +#include "primitive_vector.h" + +namespace { + +TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") { + SECTION("Test getters and setters") { + PrimitiveVector vec; + // Default value in the vector should be zero. + REQUIRE(vec.get_dim_val(42) == 0.f); + // Able to set a random dim to a value. + vec.set_dim_val(42, 2.f); + REQUIRE(vec.get_dim_val(42) == 2.f); + // Able to add a value to a dim. + vec.add_val_to_dim(10.f, 42); + REQUIRE(vec.get_dim_val(42) == 12.f); + // Try a negative number. + vec.set_dim_val(0, -2.f); + REQUIRE(vec.get_dim_val(0) == -2.f); + vec.add_val_to_dim(-4.f, 42); + REQUIRE(vec.get_dim_val(42) == 8.f); + // Try setting to zero. + vec.set_dim_val(42, 0.f); + REQUIRE(vec.get_dim_val(42) == 0.f); + + // Test clear method. + vec.clear(); + REQUIRE(vec.get_dim_val(42) == 0.f); + REQUIRE(vec.get_dim_val(0) == 0.f); + } + SECTION("Test operators") { + PrimitiveVector vec1, vec2; + + // Equality: + // Two empty vectors should be equal. + REQUIRE(vec1 == vec2); + vec1.set_dim_val(0, 0.f); + vec1.set_dim_val(1, 1.f); + vec1.set_dim_val(2, 2.f); + // Compare with self. + REQUIRE(vec1 == vec1); + // Set vec2 indirectly to vec 1 + vec2.set_dim_val(0, 0.f); + vec2.set_dim_val(1, 1.f); + vec2.set_dim_val(2, 2.f); + REQUIRE(vec1 == vec2); + // Check commutivity + REQUIRE(vec2 == vec1); + // Check copy constructor. + PrimitiveVector vec3 = vec1; + REQUIRE(vec1 == vec3); + // Check strange corner-case where 1 vec has more dims set than another. + PrimitiveVector vec4 = vec1; + vec4.set_dim_val(10, 100.f); + REQUIRE(!(vec4 == vec1)); + REQUIRE(!(vec1 == vec4)); + + // Inequality: + // Set vec2 to not be equal + vec2.set_dim_val(0, 3.f); + REQUIRE(!(vec1 == vec2)); + REQUIRE(vec1 != vec2); + REQUIRE(vec2 != vec1); + vec2.set_dim_val(0, 0.f); + vec2.set_dim_val(3, 3.f); + REQUIRE(!(vec1 == vec2)); + REQUIRE(vec1 != vec2); + // Set a random dim to 0. By default all dims are 0. + vec2 = vec1; + vec2.set_dim_val(10, 0.f); + REQUIRE(vec1 == vec2); + + // Accumulation: + vec1.clear(); + REQUIRE(vec1 == PrimitiveVector()); + vec1.set_dim_val(0, 0.f); + vec1.set_dim_val(1, 1.f); + vec1.set_dim_val(2, 2.f); + vec2.clear(); + vec2.set_dim_val(0, 3.f); + vec2.set_dim_val(1, 4.f); + vec2.set_dim_val(2, 5.f); + vec1 += vec2; + PrimitiveVector res; + res.set_dim_val(0, 3.f); + res.set_dim_val(1, 5.f); + res.set_dim_val(2, 7.f); + REQUIRE(vec1 == res); + // accumulate different dims + vec1.clear(); + vec1.set_dim_val(0, 10.f); + vec2.clear(); + vec2.set_dim_val(1, 20.f); + vec1 += vec2; + REQUIRE(vec1.get_dim_val(0) == 10.f); + REQUIRE(vec1.get_dim_val(1) == 20.f); + + // Subtraction: + vec1 -= vec2; + REQUIRE(vec1.get_dim_val(0) == 10.f); + REQUIRE(vec1.get_dim_val(1) == 0.f); + res = vec1; + res -= vec2; + REQUIRE(vec1 - vec2 == res); + + // Element-wise multiplication: + vec1.clear(); + vec1.set_dim_val(0, 0.f); + vec1.set_dim_val(1, 1.f); + vec1.set_dim_val(2, 2.f); + vec1 *= 2.f; + REQUIRE(vec1.get_dim_val(0) == 0.f); + REQUIRE(vec1.get_dim_val(1) == 2.f); + REQUIRE(vec1.get_dim_val(2) == 4.f); + } + SECTION("Test comparitors") { + PrimitiveVector vec1, vec2; + // empty vector. + vec2.set_dim_val(0, 10.f); + vec2.set_dim_val(1, 20.f); + REQUIRE(vec1 < vec2); + // 1D case. + vec1.clear(); + vec2.clear(); + vec1.set_dim_val(0, 1.f); + vec2.set_dim_val(0, 2.f); + REQUIRE(vec1 < vec2); + vec1.set_dim_val(0, 2.f); + REQUIRE(!(vec1 < vec2)); + vec1.set_dim_val(0, 3.f); + REQUIRE(!(vec1 < vec2)); + // 2D case. + vec1.clear(); + vec2.clear(); + vec1.set_dim_val(0, 1.f); + vec1.set_dim_val(1, 1.f); + vec2.set_dim_val(0, 2.f); + vec2.set_dim_val(1, 2.f); + REQUIRE(vec1 < vec2); + // NOTE: This is somewhat special. Since 1 dimension is less for vec1 + // it should still be less. + vec1.set_dim_val(0, 3.f); + REQUIRE(vec1 < vec2); + vec1.set_dim_val(1, 3.f); + REQUIRE(!(vec1 < vec2)); + } + SECTION("Test methods") { + PrimitiveVector vec1; + // is_zero: + // The default vector is zero. + REQUIRE(vec1.is_zero()); + // Setting an element of the zero-vector to 0 is still a zero vector. + vec1.set_dim_val(0, 0.f); + REQUIRE(vec1.is_zero()); + vec1.set_dim_val(42, 0.f); + REQUIRE(vec1.is_zero()); + vec1.set_dim_val(42, 1.f); + REQUIRE(!vec1.is_zero()); + REQUIRE(vec1.is_non_zero()); + vec1.set_dim_val(42, 0.f); + REQUIRE(vec1.is_zero()); + REQUIRE(!vec1.is_non_zero()); + + // relu: + vec1.clear(); + // Relu of the zero vector is still the zero vector. + vec1.relu(); + REQUIRE(vec1.is_zero()); + // Relu of a negative vector is the zero vector. + vec1.set_dim_val(0, -1.f); + vec1.set_dim_val(1, -2.f); + vec1.relu(); + REQUIRE(vec1.is_zero()); + // Relu of a positive vector is the same vector. + vec1.set_dim_val(0, 1.f); + vec1.set_dim_val(1, 2.f); + PrimitiveVector vec2 = vec1; + vec1.relu(); + REQUIRE(vec1 == vec2); + // Standard Relu test. + vec1.set_dim_val(0, 1.f); + vec1.set_dim_val(1, 0.f); + vec1.set_dim_val(2, -4.f); + vec1.set_dim_val(3, 2.f); + vec1.set_dim_val(4, -5.f); + vec2 = vec1; + vec1.relu(); + vec2.set_dim_val(2, 0.f); + vec2.set_dim_val(4, 0.f); + REQUIRE(vec1 == vec2); + + // is_non_negative: + vec1.clear(); + // The zero vector is non-negative. + REQUIRE(vec1.is_non_negative()); + vec1.set_dim_val(0, 0.f); + REQUIRE(vec1.is_non_negative()); + // Postive vector is non-negative + vec1.set_dim_val(0, 1.f); + REQUIRE(vec1.is_non_negative()); + vec1.set_dim_val(1, 2.f); + REQUIRE(vec1.is_non_negative()); + // Negative vector is negative. + vec2.clear(); + vec2.set_dim_val(0, -1.f); + REQUIRE(!vec2.is_non_negative()); + vec2.set_dim_val(1, -2.f); + REQUIRE(!vec2.is_non_negative()); + // Mixed positive and negative vector is not non-negative. + vec2.set_dim_val(1, 2.f); + REQUIRE(!vec2.is_non_negative()); + vec2.set_dim_val(0, 1.f); + REQUIRE(vec1.is_non_negative()); + + // manhattan_norm: + vec1.clear(); + // Manhatten norm of the zero vector is zero. + REQUIRE(vec1.manhattan_norm() == 0.f); + // Manhatten norm of a non-negative vector is the sum of its dims. + vec1.set_dim_val(0, 1.f); + REQUIRE(vec1.manhattan_norm() == 1.f); + vec1.set_dim_val(1, 2.f); + vec1.set_dim_val(2, 3.f); + vec1.set_dim_val(3, 4.f); + vec1.set_dim_val(4, 5.f); + REQUIRE(vec1.manhattan_norm() == 15.f); + // Manhatten norm of a negative vector is the sum of the absolute value + // of its dims. + vec2 = vec1; + vec2 *= -1.f; + REQUIRE(vec2.manhattan_norm() == vec1.manhattan_norm()); + + // Projection: + // Basic example: + vec1.clear(); + vec1.set_dim_val(0, 12.f); + vec1.set_dim_val(1, 32.f); + vec1.set_dim_val(2, 8.f); + vec1.set_dim_val(3, 2.f); + vec2.clear(); + vec2.set_dim_val(0, 2.f); + vec2.set_dim_val(2, 2.f); + vec1.project(vec2); + PrimitiveVector res; + res.set_dim_val(0, 12.f); + res.set_dim_val(2, 8.f); + REQUIRE(vec1 == res); + // Projecting onto the same vector again should give the same answer. + vec1.project(vec2); + REQUIRE(vec1 == res); + // Projecting onto the same dimensions should not change the vector. + vec1.clear(); + vec1.set_dim_val(0, 1.f); + vec1.set_dim_val(1, 2.f); + vec2.clear(); + vec2.set_dim_val(0, 3.f); + vec2.set_dim_val(1, 4.f); + res = vec1; + vec1.project(vec2); + REQUIRE(vec1 == res); + // Projecting onto higher dimensions should not change the vector. + vec2.set_dim_val(2, 5.f); + res = vec1; + vec1.project(vec2); + REQUIRE(vec1 == res); + + // Max of two vectors: + // The max of the zero vectors is the zero vector. + vec1.clear(); + vec2.clear(); + res = PrimitiveVector::max(vec1, vec2); + REQUIRE(res.is_zero()); + // The max of a non-negative vector with the zero vector is the non- + // negative vector. + vec1.set_dim_val(0, 1.f); + res = PrimitiveVector::max(vec1, vec2); + REQUIRE(res == vec1); + res = PrimitiveVector::max(vec2, vec1); + REQUIRE(res == vec1); + // The max of a negative vector with the zero vector is the zero vector. + vec1.set_dim_val(0, -1.f); + res = PrimitiveVector::max(vec1, vec2); + REQUIRE(res.is_zero()); + // Basic test: + // max(<5, 9, 0>, <3, 10, -2>) = <5, 10, 0> + vec1.clear(); + vec1.set_dim_val(0, 5.f); + vec1.set_dim_val(1, 9.f); + vec1.set_dim_val(2, 0.f); + vec2.clear(); + vec2.set_dim_val(0, 3.f); + vec2.set_dim_val(1, 10.f); + vec2.set_dim_val(2, -2.f); + PrimitiveVector golden; + golden.set_dim_val(0, 5.f); + golden.set_dim_val(1, 10.f); + golden.set_dim_val(2, 0.f); + res = PrimitiveVector::max(vec1, vec2); + REQUIRE(res == golden); + res = PrimitiveVector::max(vec2, vec1); + REQUIRE(res == golden); + } +} + +} // namespace + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt deleted file mode 100644 index 8d0f14aa789..00000000000 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt +++ /dev/null @@ -1,2 +0,0 @@ -arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.00 vpr 72.12 MiB -1 -1 0.45 18396 3 0.09 -1 -1 33188 -1 -1 34 99 1 0 success v8.0.0-11429-g78275509a-dirty release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-02T13:22:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73856 99 130 240 229 1 238 264 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 72.1 MiB 0.23 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 72.1 MiB 0.23 72.1 MiB 0.23 34 2886 15 6.79088e+06 1.00605e+06 618332. 2139.56 3.18 0.448255 0.41129 25102 150614 -1 2722 13 619 970 98287 23397 2.47058 2.47058 -148.551 -2.47058 0 0 787024. 2723.27 0.24 0.09 0.22 -1 -1 0.24 0.0502149 0.0464303 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt index 2b004ff8d4a..b34357d3497 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml single_wire.v common 1.86 vpr 70.71 MiB -1 -1 0.14 16260 1 0.02 -1 -1 29996 -1 -1 0 1 0 0 success v8.0.0-11569-g4abbff8da release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-17T15:58:41 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72404 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.7 MiB 0.09 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.7 MiB 0.09 70.7 MiB 0.09 8 14 1 6.79088e+06 0 166176. 575.005 0.37 0.00145994 0.0013718 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.11 0.00 0.08 -1 -1 0.11 0.00115987 0.00113118 -fixed_k6_frac_N8_22nm.xml single_ff.v common 2.32 vpr 70.91 MiB -1 -1 0.12 16324 1 0.02 -1 -1 29972 -1 -1 1 2 0 0 success v8.0.0-11569-g4abbff8da release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-17T15:58:41 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72608 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.09 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.09 70.9 MiB 0.09 20 32 1 6.79088e+06 13472 414966. 1435.87 0.65 0.0013976 0.00133449 22510 95286 -1 40 1 2 2 394 99 1.06752 1.06752 -2.06486 -1.06752 0 0 503264. 1741.40 0.20 0.00 0.16 -1 -1 0.20 0.0013263 0.00128456 -fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.78 vpr 71.84 MiB -1 -1 0.48 18336 3 0.11 -1 -1 33188 -1 -1 34 99 1 0 success v8.0.0-11569-g4abbff8da release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-17T15:58:41 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73568 99 130 240 229 1 238 264 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.27 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.27 71.8 MiB 0.27 34 2792 13 6.79088e+06 1.00605e+06 618332. 2139.56 3.94 0.383735 0.349796 25102 150614 -1 2694 15 616 987 97889 23015 2.47058 2.47058 -150.612 -2.47058 0 0 787024. 2723.27 0.27 0.11 0.23 -1 -1 0.27 0.0610859 0.0560772 -fixed_k6_frac_N8_22nm.xml diffeq1.v common 24.21 vpr 74.00 MiB -1 -1 0.75 22884 15 0.37 -1 -1 34280 -1 -1 55 162 0 5 success v8.0.0-11569-g4abbff8da release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-17T15:58:41 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 75780 162 96 817 258 1 775 318 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.0 MiB 0.68 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.0 MiB 0.68 74.0 MiB 0.66 72 17155 27 6.79088e+06 2.72096e+06 1.19926e+06 4149.71 18.39 3.30386 3.12634 32302 307853 -1 15386 19 3657 8928 1435723 311083 21.8615 21.8615 -1810.62 -21.8615 0 0 1.50317e+06 5201.28 0.50 0.73 0.55 -1 -1 0.50 0.298787 0.283438 +fixed_k6_frac_N8_22nm.xml single_wire.v common 4.25 vpr 70.91 MiB -1 -1 0.18 16276 1 0.39 -1 -1 29812 -1 -1 0 1 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72608 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 70.9 MiB 0.10 8 14 1 6.79088e+06 0 166176. 575.005 0.36 0.00138004 0.00129992 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00122838 0.00119736 +fixed_k6_frac_N8_22nm.xml single_ff.v common 4.68 vpr 71.03 MiB -1 -1 0.20 16236 1 0.39 -1 -1 29696 -1 -1 1 2 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72736 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 71.0 MiB 0.10 20 31 1 6.79088e+06 13472 414966. 1435.87 0.63 0.00135413 0.0012936 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.17 0.00 0.14 -1 -1 0.17 0.00127341 0.00123431 +fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.69 vpr 71.67 MiB -1 -1 0.46 18220 3 0.40 -1 -1 33084 -1 -1 40 99 3 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73388 99 130 240 229 1 247 272 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 71.7 MiB 0.21 32 3122 15 6.79088e+06 2.18288e+06 586450. 2029.24 1.84 0.271358 0.247517 24814 144142 -1 2952 30 711 1121 349988 188928 2.0466 2.0466 -154.346 -2.0466 -0.04337 -0.04337 744469. 2576.02 0.25 0.25 0.22 -1 -1 0.25 0.102379 0.0937273 +fixed_k6_frac_N8_22nm.xml diffeq1.v common 32.17 vpr 74.24 MiB -1 -1 0.75 23104 15 0.61 -1 -1 34204 -1 -1 74 162 0 5 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76024 162 96 817 258 1 797 337 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 74.2 MiB 0.60 60 15916 46 6.79088e+06 2.97693e+06 1.01997e+06 3529.29 24.68 3.56948 3.3772 29998 257685 -1 13617 19 4413 11558 1499556 342325 21.9657 21.9657 -1806.56 -21.9657 0 0 1.27783e+06 4421.56 0.40 0.79 0.44 -1 -1 0.40 0.334496 0.31821 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt deleted file mode 100644 index 445629f9b23..00000000000 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt +++ /dev/null @@ -1,2 +0,0 @@ -arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml diffeq1.v common 20.71 vpr 73.80 MiB -1 -1 0.73 23416 15 0.36 -1 -1 34412 -1 -1 55 162 0 5 success v8.0.0-11429-g78275509a-dirty release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-02T13:22:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 75572 162 96 817 258 1 775 318 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 73.8 MiB 0.66 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 73.8 MiB 0.66 73.8 MiB 0.65 78 16251 19 6.79088e+06 2.72096e+06 1.27783e+06 4421.56 14.97 2.38249 2.26406 33454 332105 -1 15052 16 3634 8698 1424645 305832 21.8615 21.8615 -1844.65 -21.8615 0 0 1.60349e+06 5548.42 0.47 0.67 0.58 -1 -1 0.47 0.264426 0.252264