From 499b266bddb2f629b1c3c3e96e735b6010c0294f Mon Sep 17 00:00:00 2001
From: AlexandreSinger <alex.singer@mail.utoronto.ca>
Date: Mon, 7 Oct 2024 17:21:15 -0400
Subject: [PATCH] [AP] Partial Legalizer

This adds the Patial Legalizer block into VPR. A Partial Legalizer takes
a partial placement (which likely has many overlapping blocks and is
very illegal with blocks in the wrong tiles) and creates a more legal
partial placement. These are often called spreaders since their main
task is to spread blocks away from each other (the solver tends to put
blocks on top of each other).

Added a multi-commodity flow-based spreading partial legalizer based on
the work by Darav et al. This algorithm was modified to work within the
VPR infrastructure on any hypothetical architecture which can be
expressed in VPR.
---
 .../analytical_placement_flow.cpp             |   11 +-
 vpr/src/analytical_place/full_legalizer.cpp   |   37 +-
 .../analytical_place/partial_legalizer.cpp    | 1107 +++++++++++++++++
 vpr/src/analytical_place/partial_legalizer.h  |  401 ++++++
 vpr/src/analytical_place/primitive_vector.h   |  272 ++++
 vpr/test/test_ap_primitive_vector.cpp         |  316 +++++
 .../ch_intrinsics/config/golden_results.txt   |    2 -
 .../basic_ap/config/golden_results.txt        |    8 +-
 .../diffeq1/config/golden_results.txt         |    2 -
 9 files changed, 2133 insertions(+), 23 deletions(-)
 create mode 100644 vpr/src/analytical_place/partial_legalizer.cpp
 create mode 100644 vpr/src/analytical_place/partial_legalizer.h
 create mode 100644 vpr/src/analytical_place/primitive_vector.h
 create mode 100644 vpr/test/test_ap_primitive_vector.cpp
 delete mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt
 delete mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt
diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp
index dd25b531dbd..ca5f72772eb 100644
--- a/vpr/src/analytical_place/analytical_placement_flow.cpp
+++ b/vpr/src/analytical_place/analytical_placement_flow.cpp
@@ -13,6 +13,7 @@
 #include "full_legalizer.h"
 #include "gen_ap_netlist_from_atoms.h"
 #include "globals.h"
+#include "partial_legalizer.h"
 #include "partial_placement.h"
 #include "prepack.h"
 #include "user_place_constraints.h"
@@ -53,6 +54,7 @@ static void print_ap_netlist_stats(const APNetlist& netlist) {
     VTR_LOG("\t\tAverage Fanout: %.2f\n", average_fanout);
     VTR_LOG("\t\tHighest Fanout: %zu\n", highest_fanout);
     VTR_LOG("\tPins: %zu\n", netlist.pins().size());
+    VTR_LOG("\n");
 }
 
 void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
@@ -77,11 +79,16 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
     print_ap_netlist_stats(ap_netlist);
 
     // Run the Global Placer
-    //  For now, just runs the solver.
+    //  For now, just runs the solver and partial legalizer 10 times arbitrarily.
     PartialPlacement p_placement(ap_netlist);
     std::unique_ptr<AnalyticalSolver> solver = make_analytical_solver(e_analytical_solver::QP_HYBRID,
                                                                       ap_netlist);
-    solver->solve(0, p_placement);
+    std::unique_ptr<PartialLegalizer> legalizer = make_partial_legalizer(e_partial_legalizer::FLOW_BASED,
+                                                                         ap_netlist);
+    for (size_t i = 0; i < 10; i++) {
+        solver->solve(i, p_placement);
+        legalizer->legalize(p_placement);
+    }
 
     // Verify that the partial placement is valid before running the full
     // legalizer.
diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp
index 40b8b34a0bc..3f32255dcb8 100644
--- a/vpr/src/analytical_place/full_legalizer.cpp
+++ b/vpr/src/analytical_place/full_legalizer.cpp
@@ -2,7 +2,9 @@
  * @file
  * @author  Alex Singer
  * @date    September 2024
- * @brief   Implements the full legalizer in the AP flow.
+ * @brief   Implements the full legalizer in the AP flow. The Full Legalizer
+ *          takes a partial placement and fully legalizes it. This involves
+ *          creating legal clusters and placing them into valid tile sites.
  */
 
 #include "full_legalizer.h"
@@ -33,6 +35,7 @@
 #include "vpr_error.h"
 #include "vpr_types.h"
 #include "vtr_assert.h"
+#include "vtr_geometry.h"
 #include "vtr_ndmatrix.h"
 #include "vtr_strong_id.h"
 #include "vtr_time.h"
@@ -126,9 +129,8 @@ class APClusterPlacer {
                        const t_physical_tile_loc& tile_loc,
                        int sub_tile) {
         const DeviceContext& device_ctx = g_vpr_ctx.device();
-        // FIXME: THIS MUST TAKE INTO ACCOUNT THE CONSTRAINTS AS WELL!!!
-        //  - Right now it is just implied.
-        //  - Will work but is unstable.
+        const FloorplanningContext& floorplanning_ctx = g_vpr_ctx.floorplanning();
+        const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
         const auto& block_locs = g_vpr_ctx.placement().block_locs();
         auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
         VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
@@ -141,11 +143,24 @@ class APClusterPlacer {
         if (device_ctx.grid.get_physical_type(tile_loc)->sub_tiles.size() == 0)
             return false;
         VTR_ASSERT(sub_tile >= 0 && sub_tile < device_ctx.grid.get_physical_type(tile_loc)->capacity);
-        // FIXME: Do this better.
-        //  - May need to try all the sub-tiles in a location.
-        //  - https://github.com/AlexandreSinger/vtr-verilog-to-routing/blob/feature-analytical-placer/vpr/src/place/initial_placement.cpp#L755
-        to_loc.sub_tile = sub_tile;
-        return try_place_macro(pl_macro, to_loc, blk_loc_registry);
+        // Check if this cluster is constrained and this location is legal.
+        if (is_cluster_constrained(clb_blk_id)) {
+            const auto& cluster_constraints = floorplanning_ctx.cluster_constraints;
+            if (cluster_constraints[clb_blk_id].is_loc_in_part_reg(to_loc))
+                return false;
+        }
+        // If the location is legal, try to exhaustively place it at this tile
+        // location. This should try all sub_tiles.
+        PartitionRegion pr;
+        vtr::Rect<int> rect(tile_loc.x, tile_loc.y, tile_loc.x, tile_loc.y);
+        pr.add_to_part_region(Region(rect, to_loc.layer));
+        const ClusteredNetlist& clb_nlist = cluster_ctx.clb_nlist;
+        t_logical_block_type_ptr block_type = clb_nlist.block_type(clb_blk_id);
+        enum e_pad_loc_type pad_loc_type = g_vpr_ctx.device().pad_loc_type;
+        // FIXME: This currently ignores the sub_tile. Was running into issues
+        //        with trying to force clusters to specific sub_tiles.
+        return try_place_macro_exhaustively(pl_macro, pr, block_type,
+                                            pad_loc_type, blk_loc_registry);
     }
 
     // This is not the best way of doing things, but its the simplest. Given a
@@ -356,10 +371,6 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
         bool placed = ap_cluster_placer.place_cluster(cluster_blk_id, tile_loc, blk_sub_tile);
         if (placed)
             continue;
-        // FIXME: Should now try all sub-tiles at this tile location.
-        //  - May need to try all the sub-tiles in a location.
-        //  - however this may need to be done after.
-        //  - https://github.com/AlexandreSinger/vtr-verilog-to-routing/blob/feature-analytical-placer/vpr/src/place/initial_placement.cpp#L755
 
         // Add to list of unplaced clusters.
         unplaced_clusters.push_back(cluster_blk_id);
diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp
new file mode 100644
index 00000000000..44645878e64
--- /dev/null
+++ b/vpr/src/analytical_place/partial_legalizer.cpp
@@ -0,0 +1,1107 @@
+/**
+ * @file
+ * @author  Alex Singer and Robert Luo
+ * @date    October 2024
+ * @brief   The definitions of the Partial Legalizers used in the AP flow and
+ *          their base class.
+ */
+
+#include "partial_legalizer.h"
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <queue>
+#include <stack>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+#include "ap_netlist.h"
+#include "device_grid.h"
+#include "globals.h"
+#include "partial_placement.h"
+#include "physical_types.h"
+#include "primitive_vector.h"
+#include "vpr_context.h"
+#include "vpr_error.h"
+#include "vpr_types.h"
+#include "vtr_assert.h"
+#include "vtr_geometry.h"
+#include "vtr_log.h"
+#include "vtr_ndmatrix.h"
+#include "vtr_strong_id.h"
+#include "vtr_vector.h"
+#include "vtr_vector_map.h"
+
+std::unique_ptr<PartialLegalizer> make_partial_legalizer(e_partial_legalizer legalizer_type,
+                                                         const APNetlist& netlist) {
+    // Based on the partial legalizer type passed in, build the partial legalizer.
+    switch (legalizer_type) {
+        case e_partial_legalizer::FLOW_BASED:
+            return std::make_unique<FlowBasedLegalizer>(netlist);
+        default:
+            VPR_FATAL_ERROR(VPR_ERROR_AP,
+                            "Unrecognized partial legalizer type");
+            break;
+    }
+    return nullptr;
+}
+
+/**
+ * @brief Get the scalar mass of the given model (primitive type).
+ *
+ * A model with a higher mass will take up more space in its bin which may force
+ * more spreading of that type of primitive.
+ *
+ * TODO: This will be made more complicated later. Models may be weighted based
+ *       on some factors.
+ */
+static inline float get_model_mass(const t_model* model) {
+    // Currently, all models have a mass of one.
+    (void)model;
+    return 1.f;
+}
+
+/**
+ * @brief Get the primitive mass of the given block.
+ *
+ * This returns an M-dimensional vector with each entry indicating the mass of
+ * that primitive type in this block. M is the number of unique models
+ * (primitive types) in the architecture.
+ */
+static inline PrimitiveVector get_primitive_mass(APBlockId blk_id,
+                                                 const APNetlist& netlist) {
+    PrimitiveVector mass;
+    const t_pack_molecule* mol = netlist.block_molecule(blk_id);
+    for (AtomBlockId atom_blk_id : mol->atom_block_ids) {
+        const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id);
+        VTR_ASSERT_DEBUG(model->index >= 0);
+        mass.add_val_to_dim(get_model_mass(model), model->index);
+    }
+    return mass;
+}
+
+// This method is being forward-declared due to the double recursion below.
+// Eventually this should be made into a non-recursive algorithm for performance,
+// however this is not in a performance critical part of the code.
+static PrimitiveVector get_primitive_capacity(const t_pb_type* pb_type);
+
+/**
+ * @brief Get the amount of primitives this mode can contain.
+ *
+ * This is part of a double recursion, since a mode contains primitives which
+ * themselves have modes.
+ */
+static PrimitiveVector get_primitive_capacity(const t_mode& mode) {
+    // Accumulate the capacities of all the pbs in this mode.
+    PrimitiveVector capacity;
+    for (int pb_child_idx = 0; pb_child_idx < mode.num_pb_type_children; pb_child_idx++) {
+        const t_pb_type& pb_type = mode.pb_type_children[pb_child_idx];
+        PrimitiveVector pb_capacity = get_primitive_capacity(&pb_type);
+        // A mode may contain multiple pbs of the same type, multiply the
+        // capacity.
+        pb_capacity *= pb_type.num_pb;
+        capacity += pb_capacity;
+    }
+    return capacity;
+}
+
+/**
+ * @brief Get the amount of primitives this pb can contain.
+ *
+ * This is the other part of the double recursion. A pb may have multiple modes.
+ * Modes are made of pbs.
+ */
+static PrimitiveVector get_primitive_capacity(const t_pb_type* pb_type) {
+    // Since a pb cannot be multiple modes at the same time, we do not
+    // accumulate the capacities of the mode. Instead we need to "mix" the two
+    // capacities as if the pb could choose either one.
+    PrimitiveVector capacity;
+    // If this is a leaf / primitive, create the base PrimitiveVector capacity.
+    if (pb_type->num_modes == 0) {
+        const t_model* model = pb_type->model;
+        VTR_ASSERT(model != nullptr);
+        VTR_ASSERT_DEBUG(model->index >= 0);
+        capacity.add_val_to_dim(get_model_mass(model), model->index);
+        return capacity;
+    }
+    // For now, we simply mix the capacities of modes by taking the max of each
+    // dimension of the capcities. This provides an upper-bound on the amount of
+    // primitives this pb can contain.
+    for (int mode = 0; mode < pb_type->num_modes; mode++) {
+        PrimitiveVector mode_capacity = get_primitive_capacity(pb_type->modes[mode]);
+        capacity = PrimitiveVector::max(capacity, mode_capacity);
+    }
+    return capacity;
+}
+
+/**
+ * @brief Helper method to get the primitive capacity of the given logical block
+ *        type.
+ *
+ * This is the entry point to the double recursion.
+ */
+static inline PrimitiveVector get_primitive_capacity(const t_logical_block_type& block_type) {
+    // If this logical block is empty, it cannot contain any primitives.
+    if (block_type.is_empty())
+        return PrimitiveVector();
+    // The primitive capacity of a logical block is the primitive capacity of
+    // its root pb.
+    return get_primitive_capacity(block_type.pb_type);
+}
+
+/**
+ * @brief Get the primitive capacity of the given sub_tile.
+ *
+ * Sub_tiles may reuse logical blocks between one another, therefore this method
+ * requires that the capacities of all of the logical blocks have been
+ * pre-calculated and stored in the given vector.
+ *
+ *  @param sub_tile                         The sub_tile to get the capacity of.
+ *  @param logical_block_type_capacities    The capacities of all logical block
+ *                                          types.
+ */
+static inline PrimitiveVector get_primitive_capacity(const t_sub_tile& sub_tile,
+                                                     const std::vector<PrimitiveVector>& logical_block_type_capacities) {
+    // Similar to getting the primitive capacity of the pb, sub_tiles have many
+    // equivalent sites, but it can only be one of them at a time. Need to "mix"
+    // the capacities of the different sites this sub_tile may be.
+    PrimitiveVector capacity;
+    for (t_logical_block_type_ptr block_type : sub_tile.equivalent_sites) {
+        const PrimitiveVector& block_capacity = logical_block_type_capacities[block_type->index];
+        // Currently, we take the max of each primitive dimension as an upper
+        // bound on the capacity of the sub_tile.
+        capacity = PrimitiveVector::max(capacity, block_capacity);
+    }
+    return capacity;
+}
+
+/**
+ * @brief Get the primitive capacity of a tile of the given type.
+ *
+ * Tiles may reuse logical blocks between one another, therefore this method
+ * requires that the capacities of all of the logical blocks have been
+ * pre-calculated and stored in the given vector.
+ *
+ *  @param tile_type                        The tile type to get the capacity of.
+ *  @param logical_block_type_capacities    The capacities of all logical block
+ *                                          types.
+ */
+static inline PrimitiveVector get_primitive_capacity(const t_physical_tile_type& tile_type,
+                                                     const std::vector<PrimitiveVector>& logical_block_type_capacities) {
+    // Accumulate the capacities of all the sub_tiles in the given tile type.
+    PrimitiveVector capacity;
+    for (const t_sub_tile& sub_tile : tile_type.sub_tiles) {
+        PrimitiveVector sub_tile_capacity = get_primitive_capacity(sub_tile, logical_block_type_capacities);
+        // A tile may contain many sub_tiles of the same type. Multiply by the
+        // number of sub_tiles of this type.
+        sub_tile_capacity *= sub_tile.capacity.total();
+        capacity += sub_tile_capacity;
+    }
+    return capacity;
+}
+
+/**
+ * @brief Get the number of models in the device architecture.
+ *
+ * FIXME: These are stored in such an annoying way. It should be much easier
+ *        to get this information!
+ */
+static inline size_t get_num_models() {
+    size_t num_models = 0;
+    t_model* curr_model = g_vpr_ctx.device().arch->models;
+    while (curr_model != nullptr) {
+        num_models++;
+        curr_model = curr_model->next;
+    }
+    curr_model = g_vpr_ctx.device().arch->model_library;
+    while (curr_model != nullptr) {
+        num_models++;
+        curr_model = curr_model->next;
+    }
+    return num_models;
+}
+
+/**
+ * @brief Debug printing method to print the capacities of all logical blocks
+ *        and physical tile types.
+ */
+static inline void print_capacities(const std::vector<PrimitiveVector>& logical_block_type_capacities,
+                                    const std::vector<PrimitiveVector>& physical_tile_type_capacities,
+                                    const std::vector<t_logical_block_type>& logical_block_types,
+                                    const std::vector<t_physical_tile_type>& physical_tile_types) {
+    // Get a linear list of all models.
+    // TODO: Again, the way these models are stored is so annoying. It would be
+    //       nice if they were already vectors!
+    std::vector<t_model*> all_models;
+    t_model* curr_model = g_vpr_ctx.device().arch->models;
+    while (curr_model != nullptr) {
+        if (curr_model->index >= (int)all_models.size())
+            all_models.resize(curr_model->index + 1);
+        all_models[curr_model->index] = curr_model;
+        curr_model = curr_model->next;
+    }
+    curr_model = g_vpr_ctx.device().arch->model_library;
+    while (curr_model != nullptr) {
+        if (curr_model->index >= (int)all_models.size())
+            all_models.resize(curr_model->index + 1);
+        all_models[curr_model->index] = curr_model;
+        curr_model = curr_model->next;
+    }
+    // Print the capacities.
+    VTR_LOG("Logical Block Type Capacities:\n");
+    VTR_LOG("------------------------------\n");
+    VTR_LOG("name\t");
+    for (t_model* model : all_models) {
+        VTR_LOG("%s\t", model->name);
+    }
+    VTR_LOG("\n");
+    for (const t_logical_block_type& block_type : logical_block_types) {
+        const PrimitiveVector& capacity = logical_block_type_capacities[block_type.index];
+        VTR_LOG("%s\t", block_type.name.c_str());
+        for (t_model* model : all_models) {
+            VTR_LOG("%.2f\t", capacity.get_dim_val(model->index));
+        }
+        VTR_LOG("\n");
+    }
+    VTR_LOG("\n");
+    VTR_LOG("Physical Tile Type Capacities:\n");
+    VTR_LOG("------------------------------\n");
+    VTR_LOG("name\t");
+    for (t_model* model : all_models) {
+        VTR_LOG("%s\t", model->name);
+    }
+    VTR_LOG("\n");
+    for (const t_physical_tile_type& tile_type : physical_tile_types) {
+        const PrimitiveVector& capacity = physical_tile_type_capacities[tile_type.index];
+        VTR_LOG("%s\t", tile_type.name.c_str());
+        for (t_model* model : all_models) {
+            VTR_LOG("%.2f\t", capacity.get_dim_val(model->index));
+        }
+        VTR_LOG("\n");
+    }
+    VTR_LOG("\n");
+}
+
+/**
+ * @brief Helper method to get the direct neighbors of the given bin.
+ *
+ * A direct neighbor of a bin is a bin which shares a side with the given bin on
+ * the tile graph. Corners do not count.
+ */
+static std::unordered_set<LegalizerBinId> get_direct_neighbors_of_bin(
+                                        LegalizerBinId bin_id,
+                                        const vtr::vector_map<LegalizerBinId, LegalizerBin>& bins,
+                                        const vtr::NdMatrix<LegalizerBinId, 2> tile_bin) {
+    const LegalizerBin& bin = bins[bin_id];
+    int bl_x = bin.bounding_box.bottom_left().x();
+    int bl_y = bin.bounding_box.bottom_left().y();
+    size_t bin_width = bin.bounding_box.width();
+    size_t bin_height = bin.bounding_box.height();
+    // This is an unfortunate consequence of using double precision to store
+    // the bounding box. We need to ensure that the bin represents a tile (not
+    // part of a tile). If it did represent part of a tile, this algorithm
+    // would need to change.
+    VTR_ASSERT_DEBUG(static_cast<double>(bl_x) == bin.bounding_box.bottom_left().x() &&
+                     static_cast<double>(bl_y) == bin.bounding_box.bottom_left().y() &&
+                     static_cast<double>(bin_width) == bin.bounding_box.width() &&
+                     static_cast<double>(bin_height) == bin.bounding_box.height());
+
+    // Add the neighbors.
+    std::unordered_set<LegalizerBinId> neighbor_bin_ids;
+    // Add unique tiles on left and right sides
+    for (size_t ty = bl_y; ty < bl_y + bin_height; ty++) {
+        if (bl_x >= 1)
+            neighbor_bin_ids.insert(tile_bin[bl_x - 1][ty]);
+        if (bl_x <= (int)(tile_bin.dim_size(0) - bin_width - 1))
+            neighbor_bin_ids.insert(tile_bin[bl_x + bin_width][ty]);
+    }
+    // Add unique tiles on the top and bottom
+    for (size_t tx = bl_x; tx < bl_x + bin_width; tx++) {
+        if (bl_y >= 1)
+            neighbor_bin_ids.insert(tile_bin[tx][bl_y - 1]);
+        if (bl_y <= (int)(tile_bin.dim_size(1) - bin_height - 1))
+            neighbor_bin_ids.insert(tile_bin[tx][bl_y + bin_height]);
+    }
+
+    // A bin cannot be a neighbor with itself.
+    VTR_ASSERT_DEBUG(neighbor_bin_ids.count(bin_id) == 0);
+
+    return neighbor_bin_ids;
+}
+
+/**
+ * @brief Get the center point of a rect
+ */
+static inline vtr::Point<double> get_center_of_rect(vtr::Rect<double> rect) {
+    return rect.bottom_left() + vtr::Point<double>(rect.width() / 2.0, rect.height() / 2.0);
+}
+
+void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, size_t num_models) {
+    // Make sure that this bin does not already have neighbors.
+    VTR_ASSERT_DEBUG(bins_[src_bin_id].neighbors.size() == 0);
+
+    // Bins need to be neighbors to every possible molecule type so things can
+    // flow properly.
+    // Perform BFS to find the closest bins of each type. Where closest is in
+    // manhattan distance.
+
+    // Create the queue and insert the source bin into it.
+    std::queue<LegalizerBinId> q;
+    q.push(src_bin_id);
+    // Create visited flags for each bin. Set the source to visited.
+    vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
+    bin_visited[src_bin_id] = true;
+    // Flags to check if a specific model has been found in the given direction.
+    // In this case, direction is the direction of the largest component of the
+    // manhattan distance between the source bin and the target bin.
+    std::vector<bool> up_found(num_models, false);
+    std::vector<bool> down_found(num_models, false);
+    std::vector<bool> left_found(num_models, false);
+    std::vector<bool> right_found(num_models, false);
+    // Flags to check if all models have been found in a given direction.
+    bool all_up_found = false;
+    bool all_down_found = false;
+    bool all_left_found = false;
+    bool all_right_found = false;
+    bool all_models_found_in_all_directions = false;
+    // The center of the source bin.
+    vtr::Point<double> src_bin_center = get_center_of_rect(bins_[src_bin_id].bounding_box);
+    // The result will be stored in this set.
+    std::unordered_set<LegalizerBinId> neighbors;
+
+    // Helper method to add a neighbor to the set of neighbors and update the
+    // found flags for a given direction if this bin is new for a given model
+    // type. This method returns true if every model has been found in the given
+    // direction (i.e. dir_found is now all true).
+    auto add_neighbor_if_new_dir = [&](LegalizerBinId target_bin_id,
+                                       std::vector<bool>& dir_found) {
+        bool all_found = true;
+        // Go through all possible models
+        for (size_t i = 0; i < num_models; i++) {
+            // If this model has been found in this direction, continue.
+            if (dir_found[i])
+                continue;
+            // If this bin has this model in its capacity, we found a neighbor!
+            if (bins_[target_bin_id].capacity.get_dim_val(i) > 0) {
+                dir_found[i] = true;
+                neighbors.insert(target_bin_id);
+            } else {
+                all_found = false;
+            }
+        }
+        return all_found;
+    };
+
+    // Perform the BFS from the source node until all nodes have been explored
+    // or all of the models have been found in all directions.
+    while(!q.empty() && !all_models_found_in_all_directions) {
+        // Pop the bin from the queue.
+        LegalizerBinId bin_id = q.front();
+        q.pop();
+        // Get the direct neighbors of the bin (neighbors that are directly
+        // touching).
+        auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_);
+        for (LegalizerBinId dir_neighbor_bin_id : direct_neighbors) {
+            // If this neighbor has been visited, do not do anything.
+            if (bin_visited[dir_neighbor_bin_id])
+                continue;
+            // Get the signed distance from the src bin to the target bin in the
+            // x and y dimensions.
+            vtr::Point<double> target_bin_center = get_center_of_rect(bins_[dir_neighbor_bin_id].bounding_box);
+            double dx = target_bin_center.x() - src_bin_center.x();
+            double dy = target_bin_center.y() - src_bin_center.y();
+            // Is the target bin above the source bin?
+            if (!all_up_found && dy >= std::abs(dx)) {
+                all_up_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, up_found);
+            }
+            // Is the target bin below the source bin?
+            if (!all_down_found && dy <= -std::abs(dx)) {
+                all_down_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, down_found);
+            }
+            // Is the target bin to the right of the source bin?
+            if (!all_right_found && dx >= std::abs(dy)) {
+                all_right_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, right_found);
+            }
+            // Is the target bin to the left of the source bin?
+            if (!all_left_found && dx <= -std::abs(dy)) {
+                all_left_found = add_neighbor_if_new_dir(dir_neighbor_bin_id, left_found);
+            }
+            // Mark this bin as visited and push it onto the queue.
+            bin_visited[dir_neighbor_bin_id] = true;
+            // FIXME: This may be inneficient since it will do an entire BFS of
+            //        the grid if a neighbor of a given type does not exist in
+            //        a specific direction. Should add a check to see if it is
+            //        worth pushing this bin into the queue.
+            q.push(dir_neighbor_bin_id);
+        }
+        // Check if all of the models have been found in all directions.
+        all_models_found_in_all_directions = all_up_found && all_down_found &&
+                                             all_left_found && all_right_found;
+    }
+
+    // Assign the results into the neighbors of the bin.
+    bins_[src_bin_id].neighbors.assign(neighbors.begin(), neighbors.end());
+}
+
+FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
+            : PartialLegalizer(netlist),
+              // TODO: Pass the device grid in.
+              tile_bin_({g_vpr_ctx.device().grid.width(), g_vpr_ctx.device().grid.height()}) {
+    const DeviceGrid& grid = g_vpr_ctx.device().grid;
+    size_t grid_width = grid.width();
+    size_t grid_height = grid.height();
+
+    // Pre-compute the capacities of all logical blocks in the device.
+    // logical_block_type::index -> PrimitiveVector
+    std::vector<PrimitiveVector> logical_block_type_capacities(g_vpr_ctx.device().logical_block_types.size());
+    for (const t_logical_block_type& logical_block_type : g_vpr_ctx.device().logical_block_types) {
+        logical_block_type_capacities[logical_block_type.index] = get_primitive_capacity(logical_block_type);
+    }
+    // Pre-compute the capacities of all physical tile types in the device.
+    // physical_tile_type::index -> PrimitiveVector
+    std::vector<PrimitiveVector> physical_tile_type_capacities(g_vpr_ctx.device().physical_tile_types.size());
+    for (const t_physical_tile_type& physical_tile_type : g_vpr_ctx.device().physical_tile_types) {
+        physical_tile_type_capacities[physical_tile_type.index] = get_primitive_capacity(physical_tile_type, logical_block_type_capacities);
+    }
+    // Print these capacities. Helpful for debugging.
+    if (log_verbosity_ > 1) {
+        print_capacities(logical_block_type_capacities,
+                         physical_tile_type_capacities,
+                         g_vpr_ctx.device().logical_block_types,
+                         g_vpr_ctx.device().physical_tile_types);
+    }
+    // Create the bins
+    // This currently creates 1 bin per tile.
+    for (size_t x = 0; x < grid_width; x++) {
+        for (size_t y = 0; y < grid_height; y++) {
+            // Ignoring 3D placement for now.
+            t_physical_tile_loc tile_loc = {(int)x, (int)y, 0};
+            // Is this the root location? Only create bins for roots.
+            size_t width_offset = grid.get_width_offset(tile_loc);
+            size_t height_offset = grid.get_height_offset(tile_loc);
+            if (width_offset != 0 || height_offset != 0) {
+                // If this is not a root, point the tile_bin_ lookup to the root
+                // tile location.
+                tile_bin_[x][y] = tile_bin_[x - width_offset][y - height_offset];
+                continue;
+            }
+            // Create the bin
+            LegalizerBinId new_bin_id = LegalizerBinId(bins_.size());
+            LegalizerBin new_bin;
+            // NOTE: The bounding box from the tile does not make sense in this
+            //       context, making my own here based on the tile size and
+            //       position.
+            t_physical_tile_type_ptr tile_type = grid.get_physical_type(tile_loc);
+            int width = tile_type->width;
+            int height = tile_type->height;
+            new_bin.bounding_box = vtr::Rect<double>(vtr::Point<double>(x, y),
+                                                     vtr::Point<double>(x + width,
+                                                                        y + height));
+            // The capacity of the bin is the capacity of the tile it represents.
+            new_bin.capacity = physical_tile_type_capacities[tile_type->index];
+            bins_.push_back(std::move(new_bin));
+            tile_bin_[x][y] = new_bin_id;
+        }
+    }
+    // Get the number of models in the device.
+    size_t num_models = get_num_models();
+    // Connect the bins.
+    // TODO: Should create a list of bin IDs to make this more efficient.
+    for (size_t x = 0; x < grid_width; x++) {
+        for (size_t y = 0; y < grid_height; y++) {
+            // Ignoring 3D placement for now. Will likely require modification to
+            // the solver and legalizer.
+            t_physical_tile_loc tile_loc = {(int)x, (int)y, 0};
+            // Is this the root location?
+            if (grid.get_width_offset(tile_loc) != 0 ||
+                grid.get_height_offset(tile_loc) != 0) {
+                continue;
+            }
+            // Compute the neighbors of this bin.
+            compute_neighbors_of_bin(tile_bin_[x][y], num_models);
+        }
+    }
+    // Pre-compute the masses of the APBlocks
+    for (APBlockId blk_id : netlist.blocks()) {
+        block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist));
+    }
+    // Initialize the block_bins.
+    block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID());
+}
+
+bool FlowBasedLegalizer::verify_bins() const {
+    // Make sure that every block has a bin.
+    for (APBlockId blk_id : netlist_.blocks()) {
+        if (!block_bins_[blk_id].is_valid()) {
+            VTR_LOG("Bin Verify: Found a block that is not in a bin.\n");
+            return false;
+        }
+    }
+    // Make sure that every tile has a bin.
+    const DeviceGrid& device_grid = g_vpr_ctx.device().grid;
+    if (tile_bin_.dim_size(0) != device_grid.width() ||
+        tile_bin_.dim_size(1) != device_grid.height()) {
+        VTR_LOG("Bin Verify: Tile-bin lookup does not contain every tile.\n");
+        return false;
+    }
+    for (size_t x = 0; x < device_grid.width(); x++) {
+        for (size_t y = 0; y < device_grid.height(); y++) {
+            if (!tile_bin_[x][y].is_valid()) {
+                VTR_LOG("Bin Verify: Found a tile with no bin.\n");
+                return false;
+            }
+        }
+    }
+    // Make sure that every bin has the correct utilization, supply, and demand.
+    for (const LegalizerBin& bin : bins_) {
+        PrimitiveVector calc_utilization;
+        for (APBlockId blk_id : bin.contained_blocks) {
+            calc_utilization += block_masses_[blk_id];
+        }
+        if (bin.utilization != calc_utilization) {
+            VTR_LOG("Bin Verify: Found a bin with incorrect utilization.\n");
+            return false;
+        }
+        PrimitiveVector calc_supply = bin.utilization - bin.capacity;
+        calc_supply.relu();
+        if (bin.supply != calc_supply) {
+            VTR_LOG("Bin Verify: Found a bin with incorrect supply.\n");
+            return false;
+        }
+        PrimitiveVector calc_demand = bin.capacity - bin.utilization;
+        calc_demand.relu();
+        if (bin.demand != calc_demand) {
+            VTR_LOG("Bin Verify: Found a bin with incorrect demand.\n");
+            return false;
+        }
+        if (!bin.supply.is_non_negative()) {
+            VTR_LOG("Bin Verify: Found a bin with a negative supply.\n");
+            return false;
+        }
+        if (!bin.demand.is_non_negative()) {
+            VTR_LOG("Bin Verify: Found a bin with a negative demand.\n");
+            return false;
+        }
+        if (!bin.capacity.is_non_negative()) {
+            VTR_LOG("Bin Verify: Found a bin with a negative capacity.\n");
+            return false;
+        }
+        if (!bin.utilization.is_non_negative()) {
+            VTR_LOG("Bin Verify: Found a bin with a negative utilization.\n");
+            return false;
+        }
+        if (bin.neighbors.size() == 0) {
+            VTR_LOG("Bin Verify: Found a bin with no neighbors.\n");
+            return false;
+        }
+    }
+    // Make sure all overfilled bins are actually overfilled.
+    // TODO: Need to make sure that all non-overfilled bins are actually not
+    //       overfilled.
+    for (LegalizerBinId bin_id : overfilled_bins_) {
+        const LegalizerBin& bin = bins_[bin_id];
+        if (bin.supply.is_zero()) {
+            VTR_LOG("Bin Verify: Found an overfilled bin that was not overfilled.\n");
+            return false;
+        }
+    }
+    // If all above passed, then the bins are valid.
+    return true;
+}
+
+void FlowBasedLegalizer::reset_bins() {
+    // Reset all of the bins by removing all of the contained blocks.
+    for (LegalizerBin& bin : bins_) {
+        bin.contained_blocks.clear();
+        bin.utilization = PrimitiveVector();
+        bin.compute_supply();
+        bin.compute_demand();
+    }
+    // Reset the reverse lookup of block_bins_
+    std::fill(block_bins_.begin(), block_bins_.end(), LegalizerBinId::INVALID());
+    // No bin can be overfilled right now.
+    overfilled_bins_.clear();
+}
+
+void FlowBasedLegalizer::import_placement_into_bins(const PartialPlacement& p_placement) {
+    // TODO: Maybe import the fixed block locations in the constructor and
+    //       then only import the moveable block locations.
+    for (APBlockId blk_id : netlist_.blocks()) {
+        size_t x_loc = p_placement.block_x_locs[blk_id];
+        size_t y_loc = p_placement.block_y_locs[blk_id];
+        LegalizerBinId bin_id = get_bin(x_loc, y_loc);
+        insert_blk_into_bin(blk_id, bin_id);
+    }
+}
+
+/**
+ * @brief Get the location of a block assuming that it is placed within the
+ *        given bin.
+ *
+ * This function will return the position of the block in the point within the
+ * bin's bounding box which is closest to the original position of the block
+ * (the position in p_placement).
+ */
+static inline vtr::Point<double> get_block_location_in_bin(APBlockId blk_id,
+                                                           const LegalizerBin& bin,
+                                                           const PartialPlacement& p_placement) {
+    // A block cannot be placed on the right or top sides of the bounding box
+    // of a bin; however they can be infinitely close to these sides. It is
+    // arbitrary how close to the edge we place the blocks; opted to place them
+    // as close as possible.
+    double epsilon = 0.0001;
+    double x = std::clamp<double>(p_placement.block_x_locs[blk_id],
+                                  bin.bounding_box.bottom_left().x(),
+                                  bin.bounding_box.top_right().x() - epsilon);
+    double y = std::clamp<double>(p_placement.block_y_locs[blk_id],
+                                  bin.bounding_box.bottom_left().y(),
+                                  bin.bounding_box.top_right().y() - epsilon);
+    return vtr::Point<double>(x, y);
+}
+
+void FlowBasedLegalizer::export_placement_from_bins(PartialPlacement& p_placement) const {
+    // Updates the partial placement with the location of the blocks in the bin
+    // by moving the blocks to the point within the bin closest to where they
+    // were originally.
+    // TODO: This should be investigated more. This may put blocks onto the edges
+    //       of bins which may not be ideal.
+    for (APBlockId blk_id : netlist_.blocks()) {
+        // Only the moveable block locations should be exported.
+        if (netlist_.block_mobility(blk_id) == APBlockMobility::FIXED)
+            continue;
+        // Project the coordinate of the block in the partial placement to the
+        // closest point in the bin.
+        LegalizerBinId bin_id = block_bins_[blk_id];
+        VTR_ASSERT_DEBUG(bin_id.is_valid());
+        const LegalizerBin& bin = bins_[bin_id];
+        // Set the position of the block to the closest position in the bin to
+        // where the block was.
+        vtr::Point<double> new_blk_pos = get_block_location_in_bin(blk_id,
+                                                                   bin,
+                                                                   p_placement);
+        p_placement.block_x_locs[blk_id] = new_blk_pos.x();
+        p_placement.block_y_locs[blk_id] = new_blk_pos.y();
+    }
+}
+
+// Helper method to compute the phi term in the durav algorithm.
+static inline float computeMaxMovement(size_t iter) {
+    return 100 * (iter + 1) * (iter + 1);
+}
+
+/**
+ * @brief Find the minimum cost moveable block in the src_bin which is
+ *        compatible with the target bin.
+ *
+ * Cost is the quadratic movement (distance squared) of the block from its
+ * original position to the position it would be if it were moved into the bin.
+ *
+ *  @param src_bin          The bin that contains the blocks to move.
+ *  @param target_bin       The bin to move blocks to.
+ *  @param block_masses     A lookup for the masses of all blocks.
+ *  @param p_placement      The placement of the blocks prior to legalization.
+ *  @param netlist          The APNetlist for the placement.
+ *
+ *  @return     A pair of the minimum cost moveable block and its cost.
+ */
+static inline std::pair<APBlockId, float> get_min_cost_block_in_bin(
+                    const LegalizerBin& src_bin,
+                    const LegalizerBin& target_bin,
+                    const vtr::vector_map<APBlockId, PrimitiveVector>& block_masses,
+                    const PartialPlacement& p_placement,
+                    const APNetlist& netlist) {
+    // Get the min cost block and its cost.
+    APBlockId min_cost_block;
+    float min_cost = std::numeric_limits<float>::infinity();
+    // FIXME: If these were somehow pre-sorted, this can be made much cheaper.
+    for (APBlockId blk_id : src_bin.contained_blocks) {
+        // If this block is fixed, it has infinite cost to move.
+        if (netlist.block_mobility(blk_id) == APBlockMobility::FIXED)
+            continue;
+        const PrimitiveVector& block_mass = block_masses[blk_id];
+        // Is this block compatible with the target bin?
+        // If the capacity of the target, projected onto the mass, is less than
+        // the mass, then the block is not compatible.
+        // TODO: We may want to add a cost term based on how much space is
+        //       available in the bin?
+        PrimitiveVector target_capacity = target_bin.capacity;
+        target_capacity.project(block_mass);
+        if (target_capacity < block_mass)
+            continue;
+        // Compute the quadratic movement (aka cost).
+        vtr::Point<double> new_block_pos = get_block_location_in_bin(blk_id,
+                                                                     target_bin,
+                                                                     p_placement);
+        double dx = new_block_pos.x() - p_placement.block_x_locs[blk_id];
+        double dy = new_block_pos.y() - p_placement.block_y_locs[blk_id];
+        float cost = (dx * dx) + (dy * dy);
+        // If this movement is the least we have seen, this is the min cost.
+        // FIXME: We could add a cost weight to the block based on things such
+        //        as timing. So critical blocks are less likely to move.
+        if (cost < min_cost) {
+            min_cost = cost;
+            min_cost_block = blk_id;
+        }
+    }
+
+    return std::make_pair(min_cost_block, min_cost);
+}
+
+/**
+ * @brief Compute the cost of moving a block from the source bin into the
+ *        target bin if a compatible block can be found.
+ *
+ *  @param src_bin          The bin that has blocks to be moved.
+ *  @param target_bin       The bin to move the blocks into.
+ *  @param psi              Algorithm parameter which represents the maximum
+ *                          cost this function can return. This function will
+ *                          return inf if the cost is larger than psi.
+ *  @param block_masses     A lookup for the masses of all blocks.
+ *  @param p_placement      The placement of the blocks prior to legalization.
+ *  @param netlist          The APNetlist for the placement.
+ */
+static inline float compute_cost(const LegalizerBin& src_bin,
+                                 const LegalizerBin& target_bin,
+                                 float psi,
+                                 const vtr::vector_map<APBlockId, PrimitiveVector>& block_masses,
+                                 const PartialPlacement& p_placement,
+                                 const APNetlist& netlist) {
+    // If the src bin is empty, then there is nothing to move.
+    if (src_bin.contained_blocks.size() == 0)
+        return std::numeric_limits<float>::infinity();
+    // Get the min cost block in the src bin which is compatible with the target
+    // bin.
+    APBlockId min_cost_block;
+    float min_cost;
+    std::tie(min_cost_block, min_cost) = get_min_cost_block_in_bin(src_bin,
+                                                                   target_bin,
+                                                                   block_masses,
+                                                                   p_placement,
+                                                                   netlist);
+    // If no block can be moved to the target bin, return.
+    if (std::isinf(min_cost))
+        return std::numeric_limits<float>::infinity();
+    // If the quadratic movement is larger than psi, return infinity.
+    if (min_cost >= psi)
+        return std::numeric_limits<float>::infinity();
+    // Compute the weight, which is proportional to the number of blocks of the
+    // same type as the min_cost block in the src bin.
+    // This weight tries to keep blocks of the same type together.
+    // This term can be found by taking the L1 norm of the projection of the
+    // src bin's utilization on the direction of the mass.
+    PrimitiveVector weight_vec = src_bin.utilization;
+    weight_vec.project(block_masses[min_cost_block]);
+    float weight = weight_vec.manhattan_norm();
+    // Return the overall cost which is the quadratic movement times the weight.
+    return weight * min_cost;
+}
+
+std::vector<std::vector<LegalizerBinId>> FlowBasedLegalizer::get_paths(LegalizerBinId src_bin_id,
+                                                                       const PartialPlacement& p_placement,
+                                                                       float psi) {
+    VTR_LOGV(log_verbosity_ >= 20, "\tGetting paths...\n");
+    // Create a visited vector.
+    vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
+    bin_visited[src_bin_id] = true;
+    // Create a cost array. The cost of a path is equal to the cost of its tail
+    // bin.
+    vtr::vector_map<LegalizerBinId, float> bin_cost(bins_.size(), 0.f);
+    // Create a starting path.
+    std::vector<LegalizerBinId> starting_path;
+    starting_path.push_back(src_bin_id);
+    // Create a FIFO queue.
+    std::queue<std::vector<LegalizerBinId>> queue;
+    queue.push(std::move(starting_path));
+    // Create the resulting vector of paths.
+    // TODO: Can we store this more efficiently as a tree?
+    std::vector<std::vector<LegalizerBinId>> paths;
+    // Perform the BFS to search for direct paths to flow the starting bin's
+    // supply of primitives until it has found sufficient demand.
+    PrimitiveVector demand;
+    const PrimitiveVector& starting_bin_supply = bins_[src_bin_id].supply;
+    while (!queue.empty() && demand < starting_bin_supply) {
+        // Pop the current bin off the queue.
+        std::vector<LegalizerBinId> &p = queue.front();
+        LegalizerBinId tail_bin_id = p.back();
+        // Look over its neighbors
+        for (LegalizerBinId neighbor_bin_id : bins_[tail_bin_id].neighbors) {
+            // If this bin has already been visited, skip it.
+            if (bin_visited[neighbor_bin_id])
+                continue;
+            // Compute the cost of moving a block from the tail bin to its
+            // neighbor.
+            float cost = compute_cost(bins_[tail_bin_id],
+                                      bins_[neighbor_bin_id],
+                                      psi,
+                                      block_masses_,
+                                      p_placement,
+                                      netlist_);
+            // If the cost is infinite, then the path cannot be made to this
+            // neighbor bin.
+            if (std::isinf(cost))
+                continue;
+            // Else, a path can be made.
+            std::vector<LegalizerBinId> p_copy(p);
+            bin_cost[neighbor_bin_id] = bin_cost[tail_bin_id] + cost;
+            p_copy.push_back(neighbor_bin_id);
+            bin_visited[neighbor_bin_id] = true;
+            // Project the demand of the neighbor onto the starting supply to
+            // get how much of the supply this bin can support. If this
+            // projection is non-zero, this means that we can move blocks into
+            // this bin as a target. If not, we can flow through it.
+            // NOTE: This is different from Darav et al. Their original algorithm
+            //       only terminated paths at empty bins. This caused the algorithm
+            //       to never converge if all bins had 1 block in them. However
+            //       this may impact performance since it stops as soon as it
+            //       finds an open bin which may limit the flow. It also
+            //       prevents the flow. This is something that needs to be
+            //       investigated further...
+            // FIXME: Perhaps we do not check if it is empty, but check if the
+            //        demand is sufficiently large...
+            PrimitiveVector neighbor_demand = bins_[neighbor_bin_id].demand;
+            neighbor_demand.project(starting_bin_supply);
+            VTR_ASSERT_DEBUG(neighbor_demand.is_non_negative());
+            // if (bins_[neighbor_bin_id].contained_blocks.size() == 0) {
+            if (neighbor_demand.is_non_zero()) {
+                // Add this to the resulting paths.
+                paths.push_back(std::move(p_copy));
+                // Accumulate the demand.
+                demand += neighbor_demand;
+            } else {
+                // Add this path to the queue.
+                queue.push(std::move(p_copy));
+            }
+        }
+        // Pop the path from the queue. This pop is delayed to prevent copying
+        // the path unnecessarily. This is allowed since this is a FIFO queue.
+        queue.pop();
+    }
+
+    // Helpful debug messages.
+    VTR_LOGV(log_verbosity_ >= 20, "\t\tSupply of source bin: %.2f\n",
+              starting_bin_supply.manhattan_norm());
+    VTR_LOGV(log_verbosity_ >= 20, "\t\tDemand of all paths from source: %.2f\n",
+              starting_bin_supply.manhattan_norm());
+
+    // Sort the paths in increasing order of cost.
+    std::sort(paths.begin(), paths.end(), [&](const std::vector<LegalizerBinId>& a,
+                                              const std::vector<LegalizerBinId>& b) {
+        return bin_cost[a.back()] < bin_cost[b.back()];
+    });
+
+    return paths;
+}
+
+void FlowBasedLegalizer::flow_blocks_along_path(const std::vector<LegalizerBinId>& path,
+                                                const PartialPlacement& p_placement,
+                                                float psi) {
+    // Get the root bin of the path.
+    VTR_ASSERT(!path.empty());
+    LegalizerBinId src_bin_id = path[0];
+    // Create a stack and put the src bin on top.
+    std::stack<LegalizerBinId> s;
+    s.push(src_bin_id);
+    // Insert the bins in the path into the stack in reverse order (so the last
+    // bin in the path is on top of the stack).
+    size_t path_size = path.size();
+    for (size_t path_idx = 1; path_idx < path_size; path_idx++) {
+        LegalizerBinId sink_bin_id = path[path_idx];
+        // Check that the cost of moving a block from the source bin to the sink
+        // bin is non-infinite. According to the paper, this check is needed
+        // since a previous flow on another path may have made this path not
+        // necessary anymore.
+        float cost = compute_cost(bins_[src_bin_id], bins_[sink_bin_id], psi,
+                                  block_masses_, p_placement, netlist_);
+        if (std::isinf(cost))
+            return;
+        src_bin_id = sink_bin_id;
+        s.push(sink_bin_id);
+    }
+    // Congo line the blocks along the path, starting from the tail and moving
+    // forward.
+    LegalizerBinId sink_bin_id = s.top();
+    s.pop();
+    while (!s.empty()) {
+        src_bin_id = s.top();
+        s.pop();
+        // Minor change to the algorithm proposed by Darav et al., find the
+        // closest point in src to sink and move it to sink (instead of sorting
+        // the whole list which is wasteful).
+        // TODO: Verify this. This is not the same as what was in the original
+        //       algorithm.
+        std::pair<APBlockId, float> p = get_min_cost_block_in_bin(bins_[src_bin_id],
+                                                                  bins_[sink_bin_id],
+                                                                  block_masses_,
+                                                                  p_placement,
+                                                                  netlist_);
+        // Move the block from the src bin to the sink bin.
+        remove_blk_from_bin(p.first, src_bin_id);
+        insert_blk_into_bin(p.first, sink_bin_id);
+
+        sink_bin_id = src_bin_id;
+    }
+}
+
+/**
+ * @brief Prints the header of the per-iteration status of the flow-based
+ *        legalizer.
+ */
+static void print_flow_based_legalizer_status_header() {
+    VTR_LOG("---- ----- ------- ---------\n");
+    VTR_LOG("Iter   Num Largest       Psi\n");
+    VTR_LOG("     Overf     Bin          \n");
+    VTR_LOG("      Bins  Supply          \n");
+    VTR_LOG("---- ----- ------- ---------\n");
+}
+
+/**
+ * @brief Print the current status of the flow-based legalizer (per-iteration).
+ */
+static void print_flow_based_legalizer_status(size_t iteration,
+                                              size_t num_overfilled_bins,
+                                              float largest_overfilled_bin_supply,
+                                              float psi) {
+    // Iteration
+    VTR_LOG("%4zu", iteration);
+
+    // Num overfilled bins
+    VTR_LOG(" %5zu", num_overfilled_bins);
+
+    // Largest overfilled bin supply
+    VTR_LOG(" %7.1f", largest_overfilled_bin_supply);
+
+    // Psi
+    VTR_LOG(" %9.3e", psi);
+
+    VTR_LOG("\n");
+
+    fflush(stdout);
+}
+
+/**
+ * @brief Debug method to print the current number of blocks contained in each
+ *        bin visually.
+ *
+ * This method helps to see how the spreading is working.
+ */
+static void print_flow_based_bin_grid(const vtr::NdMatrix<LegalizerBinId, 2>& tile_bin,
+                                       const vtr::vector_map<LegalizerBinId, LegalizerBin>& bins) {
+    for (size_t y = 0; y < tile_bin.dim_size(1); y++) {
+        for (size_t x = 0; x < tile_bin.dim_size(0); x++) {
+            const LegalizerBin& bin = bins[tile_bin[x][y]];
+            VTR_LOG("%3zu ", bin.contained_blocks.size());
+        }
+        VTR_LOG("\n");
+    }
+    VTR_LOG("\n");
+}
+
+void FlowBasedLegalizer::legalize(PartialPlacement &p_placement) {
+    VTR_LOGV(log_verbosity_ >= 10, "Running Flow-Based Legalizer\n");
+
+    // Reset the bins from the previous iteration and prepare for this iteration.
+    reset_bins();
+    // Import the partial placement into bins.
+    import_placement_into_bins(p_placement);
+    // Verify that the placement was imported correctly.
+    VTR_ASSERT_SAFE(verify_bins());
+
+    // Print the number of blocks in each bin visually before spreading.
+    if (log_verbosity_ >= 15) {
+        VTR_LOG("Bin utilization prior to spreading:\n");
+        print_flow_based_bin_grid(tile_bin_, bins_);
+    }
+
+    // Print the status header to make printing the status clearer.
+    if (log_verbosity_ >= 10) {
+        print_flow_based_legalizer_status_header();
+    }
+
+    // Run the flow-based spreader.
+    size_t flowBasedIter = 0;
+    while (true) {
+        // If we hit the maximum number of iterations, break.
+        if (flowBasedIter >= max_num_iterations_) {
+            VTR_LOGV(log_verbosity_ >= 10,
+                     "Flow-Based legalizer hit max iteration limit.\n");
+            break;
+        }
+        // If there are no overfilled bins, no more work to do.
+        if (overfilled_bins_.empty()) {
+            VTR_LOGV(log_verbosity_ >= 10,
+                     "Flow-Based legalizer has no overfilled tiles. No further spreading needed.\n");
+            break;
+        }
+        // Compute the max movement.
+        double psi = computeMaxMovement(flowBasedIter);
+        // Get the overfilled bins and sort them in increasing order of supply.
+        // We take the manhattan (L1) norm here since we only care about the total
+        // amount of overfill in each dimension. For example, a bin that has a
+        // supply of <1, 1> is just as overfilled as a bin of supply <0, 2>.
+        // The standard L2 norm would give more weigth to <0, 2>.
+        // NOTE: Although the supply should always be non-negative, we still
+        //       take the absolute value in the norm for completeness.
+        // TODO: This is a guess. Should investigate other norms.
+        std::vector<LegalizerBinId> overfilled_bins_vec(overfilled_bins_.begin(), overfilled_bins_.end());
+        std::sort(overfilled_bins_vec.begin(), overfilled_bins_vec.end(), [&](LegalizerBinId a, LegalizerBinId b) {
+            return bins_[a].supply.manhattan_norm() < bins_[b].supply.manhattan_norm();
+        });
+        // Get the paths to flow blocks from the overfilled bins to the under
+        // filled bins and flow the blocks.
+        for (LegalizerBinId src_bin_id : overfilled_bins_vec) {
+            // Get the list of candidate paths based on psi. A path is a list
+            // of LegalizerBins traversed.
+            //  NOTE: The paths are sorted by increasing cost within the
+            //        getPaths method.
+            std::vector<std::vector<LegalizerBinId>> paths = get_paths(src_bin_id,
+                                                                       p_placement,
+                                                                       psi);
+
+            VTR_LOGV(log_verbosity_ >= 20, "\tNum paths: %zu\n", paths.size());
+            // For each path, flow the blocks along the path.
+            for (const std::vector<LegalizerBinId>& path : paths) {
+                VTR_LOGV(log_verbosity_ >= 30, "\t\tPath length: %zu\n", path.size());
+                // If the bin is no longer overfilled, no need to move any more
+                // blocks along the paths.
+                if (!bin_is_overfilled(src_bin_id))
+                    break;
+                // Move blocks over the paths.
+                //  NOTE: This will only modify the bins. (actual block
+                //        positions will not change (yet)).
+                flow_blocks_along_path(path, p_placement, psi);
+            }
+        }
+
+        // Print status of the flow based legalizer for debugging.
+        if (log_verbosity_ >= 10) {
+            // TODO: Get the total cell displacement for debugging.
+            print_flow_based_legalizer_status(
+                    flowBasedIter,
+                    overfilled_bins_vec.size(),
+                    bins_[overfilled_bins_vec.back()].supply.manhattan_norm(),
+                    psi);
+        }
+
+        // Increment the iteration.
+        flowBasedIter++;
+    }
+    VTR_LOGV(log_verbosity_ >= 10,
+             "Flow-Based Legalizer finished in %zu iterations.\n",
+             flowBasedIter + 1);
+
+    // Verify that the bins are valid before export.
+    VTR_ASSERT(verify_bins());
+
+    // Print the number of blocks in each bin after spreading.
+    if (log_verbosity_ >= 15) {
+        VTR_LOG("Bin utilization after spreading:\n");
+        print_flow_based_bin_grid(tile_bin_, bins_);
+    }
+
+    // Export the legalized placement to the partial placement.
+    export_placement_from_bins(p_placement);
+}
+
diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h
new file mode 100644
index 00000000000..09a1ea0e9a3
--- /dev/null
+++ b/vpr/src/analytical_place/partial_legalizer.h
@@ -0,0 +1,401 @@
+/**
+ * @file
+ * @author  Alex Singer and Robert Luo
+ * @date    October 2024
+ * @brief   The declarations of the Partial Legalizer base class which is used
+ *          to define the functionality of all partial legalizers in the AP
+ *          flow.
+ *
+ * Partial Legalizers are parts of the flow which take in an illegal Partial
+ * Placemenent and produce a more legal Partial Placement (according to
+ * constraints of the architecture).
+ */
+
+#pragma once
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+#include "ap_netlist_fwd.h"
+#include "primitive_vector.h"
+#include "vtr_assert.h"
+#include "vtr_geometry.h"
+#include "vtr_ndmatrix.h"
+#include "vtr_strong_id.h"
+#include "vtr_vector_map.h"
+
+// Forward declarations
+class APNetlist;
+struct PartialPlacement;
+
+/**
+ * @brief Enumeration of all of the partial legalizers currently implemented in
+ *        VPR.
+ */
+enum class e_partial_legalizer {
+    FLOW_BASED      // Multi-commodity flow-based partial legalizer.
+};
+
+/**
+ * @brief The Partial Legalizer base class
+ *
+ * This provied functionality that all Partial Legalizers will use.
+ *
+ * It provides a standard interface that all Partial Legalizers must implement
+ * so thet can be used interchangably. This makes it very easy to test and
+ * compare different solvers.
+ */
+class PartialLegalizer {
+public:
+    virtual ~PartialLegalizer() {}
+
+    /**
+     * @brief Constructor of the base PartialLegalizer class
+     *
+     * Currently just copies the parameters into the class as member varaibles.
+     */
+    PartialLegalizer(const APNetlist& netlist, int log_verbosity = 1)
+                        : netlist_(netlist),
+                          log_verbosity_(log_verbosity) {}
+
+    /**
+     * @brief Partially legalize the given partial placement.
+     *
+     * This method will take in the Partial Placement as input and write a
+     * more legal solution into this same object. Here we define legal as it
+     * pertains to the constraints of the device.
+     *
+     * This class expects to receive a valid Partial Placement as input and will
+     * generate a valid Partial Placement.
+     *
+     *  @param p_placement  The placement to legalize. Will be filled with the
+     *                      legalized placement.
+     */
+    virtual void legalize(PartialPlacement &p_placement) = 0;
+
+protected:
+
+    /// @brief The APNetlist the legalizer will be legalizing the placement of.
+    ///        It is implied that the netlist is not being modified during
+    ///        global placement.
+    const APNetlist& netlist_;
+
+    /// @brief The verbosity of the log statements within the partial legalizer.
+    ///        0 would be no log messages, 10 would print per-iteration status,
+    ///        20 would print logs messages within each iteration.
+    int log_verbosity_;
+};
+
+/**
+ * @brief A factory method which creates a Partial Legalizer of the given type.
+ */
+std::unique_ptr<PartialLegalizer> make_partial_legalizer(e_partial_legalizer legalizer_type,
+                                                         const APNetlist& netlist);
+
+/**
+ * @brief A strong ID for the bins used in the partial legalizer.
+ *
+ * This allows a separation between the legalizers and tiles such that a bin may
+ * represent multiple tiles.
+ */
+struct legalizer_bin_tag {};
+typedef vtr::StrongId<legalizer_bin_tag, size_t> LegalizerBinId;
+
+/**
+ * @brief A bin used to contain blocks in the partial legalizer.
+ *
+ * Bins can be thought of as generalized tiles which have a capacity of blocks
+ * (and their types) and a current utilization of the bin. A bin may represent
+ * multiple tiles.
+ *
+ * The capacity, utilization, supply, and demand of the bin are stored as
+ * M-dimensional vectors; where M is the number of models (primitives) in the
+ * device. This allows the bin to quickly know how much of each types of
+ * primitives it can contain and how much of each type it currently contains.
+ */
+struct LegalizerBin {
+    /// @brief The blocks currently contained in this bin.
+    std::unordered_set<APBlockId> contained_blocks;
+
+    /// @brief The maximum mass of each primitive type this bin can contain.
+    PrimitiveVector capacity;
+
+    /// @brief The current mass of each primitive type this bin contains.
+    PrimitiveVector utilization;
+
+    /// @brief The current over-utilization of the bin. This is defined as:
+    ///             elementwise_max(utilization - capacity, 0)
+    PrimitiveVector supply;
+
+    /// @brief The current under-utilization of the bin. This is defined as:
+    ///             elementwise_max(capacity - utilization, 0)
+    PrimitiveVector demand;
+
+    /// @brief The bounding box of the bin on the device grid. This is the
+    /// positions on the grid the blocks will exist.
+    ///
+    /// For example, if the tile at location (2,3) was turned directly into a
+    /// bin, the bounding box of that bin would be [(2.0, 3.0), (3.0, 4.0))
+    /// Notice the notation here. The left and bottom edges are included in the
+    /// set.
+    /// It is implied that blocks cannot be placed on the right or top edges of
+    /// the bounding box (since then they may be in another bin!).
+    ///
+    /// NOTE: This uses a double to match the precision of the positions of
+    ///       APBlocks (which are doubles). The use of a double here also allows
+    ///       bins to represent partial tiles which may be useful.
+    vtr::Rect<double> bounding_box;
+
+    /// @brief The neighbors of this bin. These are neighboring bins that this
+    ///        bin can flow blocks to.
+    std::vector<LegalizerBinId> neighbors;
+
+    /**
+     * @brief Helper method to compute the supply of the bin.
+     */
+    void compute_supply() {
+        supply = utilization - capacity;
+        supply.relu();
+        VTR_ASSERT_DEBUG(supply.is_non_negative());
+    }
+
+    /**
+     * @brief Helper method to compute the demand of the bin.
+     */
+    void compute_demand() {
+        demand = capacity - utilization;
+        demand.relu();
+        VTR_ASSERT_DEBUG(demand.is_non_negative());
+    }
+};
+
+/**
+ * @brief A multi-commodity flow-based spreading partial legalizer.
+ *
+ * This puts the current blocks into bins based on the given placement. It then
+ * finds paths from bins that are overfilled to bins that are underfilled. Then
+ * it flows blocks along these paths. Each iteration, the maximum distance that
+ * blocks can flow is increased. This tries to spread out blocks by moving them
+ * the smallest distance that it can.
+ *
+ * This technique is a modified version of the algorithm proposed by Darav et
+ * al. Their algorithm was tailored for their Microsemi FPGA. This code extends
+ * on their work by generalizing it to any theoretical architecture which can be
+ * expressed in VPR.
+ *          https://doi.org/10.1145/3289602.3293896
+ *
+ *
+ * TODO: Make the bin size a parameter for the legalizer somehow. That way we
+ *       can make 1x1 bins for very accurate legalizers and larger (clamped) for
+ *       less accurate legalizers.
+ */
+class FlowBasedLegalizer : public PartialLegalizer {
+private:
+    /// @brief The maximum number of iterations the legalizer can take. This
+    ///        prevents the legalizer from never converging if there is not
+    ///        enough space to flow blocks.
+    static constexpr size_t max_num_iterations_ = 100;
+
+    /// @brief A vector of all the bins in the legalizer.
+    vtr::vector_map<LegalizerBinId, LegalizerBin> bins_;
+
+    /// @brief A reverse lookup between every block and the bin they are
+    ///        currently in.
+    vtr::vector_map<APBlockId, LegalizerBinId> block_bins_;
+
+    /// @brief The mass of each APBlock, represented as a primitive vector.
+    vtr::vector_map<APBlockId, PrimitiveVector> block_masses_;
+
+    /// @brief A lookup that gets the bin that represents every tile (and
+    ///        sub-tile).
+    vtr::NdMatrix<LegalizerBinId, 2> tile_bin_;
+
+    /// @brief A set of overfilled bins. Instead of computing this when needed,
+    ///        this list is maintained whenever a block is moved from one bin to
+    ///        another.
+    std::unordered_set<LegalizerBinId> overfilled_bins_;
+
+    /**
+     * @brief Returns true if the given bin is overfilled.
+     */
+    inline bool bin_is_overfilled(LegalizerBinId bin_id) const {
+        VTR_ASSERT_DEBUG(bin_id.is_valid());
+        VTR_ASSERT_DEBUG(bins_[bin_id].supply.is_non_negative());
+        // By definition, a bin is overfilled if its supply is non-zero.
+        return bins_[bin_id].supply.is_non_zero();
+    }
+
+    /**
+     * @brief Helper method to insert a block into a bin.
+     *
+     * This method maintains all the necessary state of the class and updates
+     * the bin the block is being inserted into.
+     *
+     * This method assumes that the given block is not currently in a bin.
+     */
+    inline void insert_blk_into_bin(APBlockId blk_id, LegalizerBinId bin_id) {
+        VTR_ASSERT_DEBUG(blk_id.is_valid());
+        VTR_ASSERT_DEBUG(bin_id.is_valid());
+        // Make sure that this block is not anywhere else.
+        VTR_ASSERT(block_bins_[blk_id] == LegalizerBinId::INVALID());
+        // Insert the block into the bin.
+        block_bins_[blk_id] = bin_id;
+        LegalizerBin& bin = bins_[bin_id];
+        bin.contained_blocks.insert(blk_id);
+        // Update the utilization, supply, and demand.
+        const PrimitiveVector& blk_mass = block_masses_[blk_id];
+        bin.utilization += blk_mass;
+        bin.compute_supply();
+        bin.compute_demand();
+        // Update the overfilled bins since this bin may have become overfilled.
+        if (bin_is_overfilled(bin_id))
+            overfilled_bins_.insert(bin_id);
+    }
+
+    /**
+     * @brief Helper method to remove a block from a bin.
+     *
+     * This method maintains all the necessary state of the class and updates
+     * the bin the block is being removed from.
+     *
+     * This method assumes that the given block is currently in the given bin.
+     */
+    inline void remove_blk_from_bin(APBlockId blk_id, LegalizerBinId bin_id) {
+        VTR_ASSERT_DEBUG(blk_id.is_valid());
+        VTR_ASSERT_DEBUG(bin_id.is_valid());
+        // Make sure that this block is in this bin.
+        VTR_ASSERT(block_bins_[blk_id] == bin_id);
+        LegalizerBin& bin = bins_[bin_id];
+        VTR_ASSERT_DEBUG(bin.contained_blocks.count(blk_id) == 1);
+        // Remove the block from the bin.
+        block_bins_[blk_id] = LegalizerBinId::INVALID();
+        bin.contained_blocks.erase(blk_id);
+        // Update the utilization, supply, and demand.
+        const PrimitiveVector& blk_mass = block_masses_[blk_id];
+        bin.utilization -= blk_mass;
+        bin.compute_supply();
+        bin.compute_demand();
+        // Update the overfilled bins since this bin may no longer be
+        // overfilled.
+        if (!bin_is_overfilled(bin_id))
+            overfilled_bins_.erase(bin_id);
+    }
+
+    /**
+     * @brief Helper method to get the bin at the current device x and y tile
+     *        coordinate.
+     */
+    inline LegalizerBinId get_bin(size_t x, size_t y) const {
+        VTR_ASSERT_DEBUG(x < tile_bin_.dim_size(0));
+        VTR_ASSERT_DEBUG(y < tile_bin_.dim_size(1));
+        return tile_bin_[x][y];
+    }
+
+    /**
+     * @brief Computes the neighbors of the given bin.
+     *
+     * This is different from the algorithm proposed by Darav et al.
+     *
+     * Each bin needs to be connected to every type of block. This is because,
+     * due to the placement being able to place blocks anywhere on the grid, it
+     * is possible that any type of block can be in any bin. If a bin has a
+     * block of a given type and no neighbor of the same type, the algorithm
+     * will never converge.
+     *
+     * It is also important that every bin have many different "directions" that
+     * it can flow blocks for each block type so it can legalize quickly.
+     *
+     * The original paper has a fixed architecture, so it builds the bin graph
+     * directly for their architecture. For VPR, a BFS is performed which finds
+     * bins in each of the four cardinal directions with the minimum manhattan
+     * distance for all of the different types of blocks.
+     *
+     *  @param src_bin_id   The bin to compute the neighbors for.
+     *  @param num_models   The number of models in the architecture.
+     */
+    void compute_neighbors_of_bin(LegalizerBinId src_bin_id, size_t num_models);
+
+    /**
+     * @brief Debugging method which verifies that all the bins are valid.
+     *
+     * The bins are valid if:
+     *  - All blocks are in bins
+     *  - Every tile is represented by a bin
+     *  - Every bin has the correct utilization, supply, and demand
+     *  - The overfilled bins are correct
+     */
+    bool verify_bins() const;
+
+    /**
+     * @brief Resets all of the bins from a previous call to partial legalize.
+     *
+     * This removes all of the blocks from the bins.
+     */
+    void reset_bins();
+
+    /**
+     * @brief Import the given partial placement into bins.
+     *
+     * This is called at the beginning of legalize to prepare the bins with the
+     * current placement.
+     */
+    void import_placement_into_bins(const PartialPlacement& p_placement);
+
+    /**
+     * @brief Export the placement found from spreading the bins.
+     *
+     * This is called at the end of legalize to write back the result of the
+     * legalizer.
+     */
+    void export_placement_from_bins(PartialPlacement& p_placement) const;
+
+    /**
+     * @brief Gets paths to flow blocks from the src_bin_id at a maximum cost
+     *        of psi.
+     *
+     *  @param src_bin_id   The bin that all paths will originate from.
+     *  @param p_placement  The placement being legalized (used for cost
+     *                      calculations).
+     *  @param psi          An algorithm parameter that increases over many
+     *                      iterations. The "max-cost" a path can be.
+     */
+    std::vector<std::vector<LegalizerBinId>> get_paths(LegalizerBinId src_bin_id,
+                                                       const PartialPlacement& p_placement,
+                                                       float psi);
+
+    /**
+     * @brief Flows the blocks along the given path.
+     *
+     * The blocks do a conga line maneuver where blocks move towards the end
+     * of the path.
+     *
+     *  @param path         The path to flow blocks along.
+     *  @param p_placement  The placement being legalized (used for cost
+     *                      calculations).
+     *  @param psi          An algorithm parameter that increases over many
+     *                      iterations. The "max-cost" a path can be.
+     */
+    void flow_blocks_along_path(const std::vector<LegalizerBinId>& path,
+                                const PartialPlacement& p_placement,
+                                float psi);
+
+public:
+
+    /**
+     * @brief Construcotr for the flow-based legalizer.
+     *
+     * Builds all of the bins, computing their capacities based on the device
+     * description. Builds the connectivity of bins. Computes the mass of all
+     * blocks in the netlist.
+     */
+    FlowBasedLegalizer(const APNetlist& netlist);
+
+    /**
+     * @brief Performs flow-based spreading on the given partial placement.
+     *
+     *  @param p_placement  The placmeent to legalize. The result of the partial
+     *                      legalizer will be stored in this object.
+     */
+    void legalize(PartialPlacement &p_placement) final;
+};
+
diff --git a/vpr/src/analytical_place/primitive_vector.h b/vpr/src/analytical_place/primitive_vector.h
new file mode 100644
index 00000000000..1dd7c4d5a4b
--- /dev/null
+++ b/vpr/src/analytical_place/primitive_vector.h
@@ -0,0 +1,272 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    October 2024
+ * @brief   The declaration of the PrimitiveVector object.
+ *
+ * This object is designed to store a sparse M-dimensional vector which can be
+ * efficiently operated upon.
+ */
+
+#pragma once
+
+#include <cstdlib>
+#include <unordered_map>
+
+/**
+ * @brief A sparse vector class to store an M-dimensional quantity of primitives
+ *        in the context of a legalizer.
+ *
+ * This vector is used to represent the capacity of tiles for different
+ * primitives in a closed form which can be manipulated with math operations.
+ *
+ * This vector is also used to represent the "mass" of AP blocks in primitives,
+ * since an AP block may represent many primitives.
+ *
+ * This vector stores floats since it is expected that, due to some heuristics,
+ * the mass of a block may not be a whole number.
+ *
+ * This class contains useful operations to operate and compare different
+ * Primitive Vectors.
+ */
+class PrimitiveVector {
+private:
+    /// @brief Storage container for the data of this primitive vector.
+    ///
+    /// This is stored as a map since it is assumed that the vector will be
+    /// quite sparse. This is designed to be a vector which has a dimension
+    /// for each t_model::index.
+    ///
+    /// TODO: Is there a more efficient way to store this sparse info?
+    ///       Perhaps we can just waste the space and use a vector.
+    std::unordered_map<size_t, float> data_;
+
+public:
+    /**
+     * @brief Add the value to the given dimension.
+     *
+     * This is a common enough feature to use its own setter.
+     */
+    inline void add_val_to_dim(float val, size_t dim) {
+        if (data_.count(dim) == 0)
+            data_[dim] = 0.f;
+        data_[dim] += val;
+    }
+
+    /**
+     * @brief Get the value at the given dimension.
+     */
+    inline float get_dim_val(size_t dim) const {
+        const auto it = data_.find(dim);
+        // If there is no data in the dim, return 0. By default the vector is
+        // empty.
+        if (it == data_.end())
+            return 0.f;
+        // If there is data at this dimension, return it.
+        return it->second;
+    }
+
+    /**
+     * @brief Set the value at the given dimension.
+     */
+    inline void set_dim_val(size_t dim, float val) {
+        data_[dim] = val;
+    }
+
+    /**
+     * @brief Equality operator between two Primitive Vectors.
+     *
+     * Returns true if the dimensions of each vector are equal.
+     */
+    inline bool operator==(const PrimitiveVector& rhs) const {
+        // Check if every dim in rhs matches this.
+        for (const auto& p : rhs.data_) {
+            if (get_dim_val(p.first) != p.second)
+                return false;
+        }
+        // If there is anything in this which is not in rhs, need to check.
+        for (const auto& p : data_) {
+            if (rhs.get_dim_val(p.first) != p.second)
+                return false;
+        }
+        return true;
+    }
+
+    /**
+     * @brief Inequality operator between two Primitive Vectors.
+     */
+    inline bool operator!=(const PrimitiveVector& rhs) const {
+        return !operator==(rhs);
+    }
+
+    /**
+     * @brief Element-wise accumulation of rhs into this.
+     */
+    inline PrimitiveVector& operator+=(const PrimitiveVector& rhs) {
+        for (const auto& p : rhs.data_) {
+            float dim_val = get_dim_val(p.first);
+            set_dim_val(p.first, dim_val + p.second);
+        }
+        return *this;
+    }
+
+    /**
+     * @brief Element-wise de-accumulation of rhs into this.
+     */
+    inline PrimitiveVector& operator-=(const PrimitiveVector& rhs) {
+        for (const auto& p : rhs.data_) {
+            float dim_val = get_dim_val(p.first);
+            set_dim_val(p.first, dim_val - p.second);
+        }
+        return *this;
+    }
+
+    /**
+     * @brief Element-wise subtration of two Primitive Vectors.
+     */
+    inline PrimitiveVector operator-(const PrimitiveVector& rhs) const {
+        PrimitiveVector res = *this;
+        res -= rhs;
+        return res;
+    }
+
+    /**
+     * @brief Element-wise multiplication with a scalar.
+     */
+    inline PrimitiveVector& operator*=(float rhs) {
+        for (auto& p : data_) {
+            p.second *= rhs;
+        }
+        return *this;
+    }
+
+    /**
+     * @brief Returns true if any dimension of this vector is less than any
+     *        dimension of rhs; false otherwise.
+     */
+    inline bool operator<(const PrimitiveVector& rhs) const {
+        // Check for any element of this < rhs
+        for (const auto& p : data_) {
+            if (p.second < rhs.get_dim_val(p.first))
+                return true;
+        }
+        // Check for any element of rhs > this.
+        // NOTE: This is required since there may be elements in rhs which are
+        //       not in this.
+        // TODO: This is inneficient.
+        for (const auto& p : rhs.data_) {
+            if (p.second > get_dim_val(p.first))
+                return true;
+        }
+        return false;
+    }
+
+    /**
+     * @brief Clamps all dimension of this vector to non-negative values.
+     *
+     * If a dimension is negative, the dimension will become 0. If the dimension
+     * is positive, it will not change.
+     */
+    inline void relu() {
+        for (auto& p : data_) {
+            // TODO: Should remove the zero elements from the map to improve
+            //       efficiency.
+            if (p.second < 0.f)
+                p.second = 0.f;
+        }
+    }
+
+    /**
+     * @brief Returns true if all dimensions of this vector are zero.
+     */
+    inline bool is_zero() const {
+        // NOTE: This can be made cheaper by storing this information at
+        //       creation and updating it if values are added or removed.
+        for (const auto& p : data_) {
+            if (p.second != 0.f)
+                return false;
+        }
+        return true;
+    }
+
+    /**
+     * @brief Returns true if any dimension of this vector is non-zero.
+     */
+    inline bool is_non_zero() const {
+        return !is_zero();
+    }
+
+    /**
+     * @brief Returns true if all dimensions of this vector are non-negative.
+     */
+    inline bool is_non_negative() const {
+        for (const auto& p : data_) {
+            if (p.second < 0.f)
+                return false;
+        }
+        return true;
+    }
+
+    /**
+     * @brief Computes the manhattan (L1) norm of this vector.
+     *
+     * This is the sum of the absolute value of all dimensions.
+     */
+    inline float manhattan_norm() const {
+        // NOTE: This can be made much cheaper by storing the magnitude as part
+        //       of the class and updating it whenever something is added or
+        //       removed.
+        float mag = 0.f;
+        for (const auto& p : data_) {
+            mag += std::abs(p.second);
+        }
+        return mag;
+    }
+
+    /**
+     * @brief Project this vector onto the given vector.
+     *
+     * This basically just means zero-ing all dimension which are zero in the
+     * given vector. The given vector does not need to be a unit vector.
+     *
+     * Example: Project <12, 32, 8, 2> onto <2, 0, 2, 0> = <12, 0, 8, 0>
+     */
+    inline void project(const PrimitiveVector& dir) {
+        // For each dimension of this vector, if that dimension is zero in dir
+        // set the dimension to zero.
+        for (auto& p : data_) {
+            // TODO: Instead of zeroing the dimension, it should be removed
+            //       from the map.
+            if (dir.get_dim_val(p.first) == 0.f)
+                p.second = 0.f;
+        }
+    }
+
+    /**
+     * @brief Clear the sparse vector, which is equivalent to setting it to be
+     *        the zero vector.
+     */
+    inline void clear() {
+        data_.clear();
+    }
+
+    /**
+     * @brief Compute the elementwise max between two primitive vectors.
+     */
+    static inline PrimitiveVector max(const PrimitiveVector& lhs,
+                                      const PrimitiveVector& rhs) {
+        PrimitiveVector res;
+        // For each key in rhs, get the max(lhs, rhs)
+        for (const auto& p : rhs.data_) {
+            res.set_dim_val(p.first,
+                            std::max(lhs.get_dim_val(p.first), p.second));
+        }
+        // For each key in lhs, get the max(lhs, rhs)
+        for (const auto& p : lhs.data_) {
+            res.set_dim_val(p.first,
+                            std::max(p.second, rhs.get_dim_val(p.first)));
+        }
+        return res;
+    }
+};
+
diff --git a/vpr/test/test_ap_primitive_vector.cpp b/vpr/test/test_ap_primitive_vector.cpp
new file mode 100644
index 00000000000..e1f114919bb
--- /dev/null
+++ b/vpr/test/test_ap_primitive_vector.cpp
@@ -0,0 +1,316 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    October 2024
+ * @brief   Unit tests for the PrimitiveVector object
+ *
+ * Very quick functionality checks to make sure that the methods inside of the
+ * PrimitiveVector object are working as expected.
+ */
+
+#include "catch2/catch_test_macros.hpp"
+#include "primitive_vector.h"
+
+namespace {
+
+TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") {
+    SECTION("Test getters and setters") {
+        PrimitiveVector vec;
+        // Default value in the vector should be zero.
+        REQUIRE(vec.get_dim_val(42) == 0.f);
+        // Able to set a random dim to a value.
+        vec.set_dim_val(42, 2.f);
+        REQUIRE(vec.get_dim_val(42) == 2.f);
+        // Able to add a value to a dim.
+        vec.add_val_to_dim(10.f, 42);
+        REQUIRE(vec.get_dim_val(42) == 12.f);
+        // Try a negative number.
+        vec.set_dim_val(0, -2.f);
+        REQUIRE(vec.get_dim_val(0) == -2.f);
+        vec.add_val_to_dim(-4.f, 42);
+        REQUIRE(vec.get_dim_val(42) == 8.f);
+        // Try setting to zero.
+        vec.set_dim_val(42, 0.f);
+        REQUIRE(vec.get_dim_val(42) == 0.f);
+
+        // Test clear method.
+        vec.clear();
+        REQUIRE(vec.get_dim_val(42) == 0.f);
+        REQUIRE(vec.get_dim_val(0) == 0.f);
+    }
+    SECTION("Test operators") {
+        PrimitiveVector vec1, vec2;
+
+        // Equality:
+        // Two empty vectors should be equal.
+        REQUIRE(vec1 == vec2);
+        vec1.set_dim_val(0, 0.f);
+        vec1.set_dim_val(1, 1.f);
+        vec1.set_dim_val(2, 2.f);
+        // Compare with self.
+        REQUIRE(vec1 == vec1);
+        // Set vec2 indirectly to vec 1
+        vec2.set_dim_val(0, 0.f);
+        vec2.set_dim_val(1, 1.f);
+        vec2.set_dim_val(2, 2.f);
+        REQUIRE(vec1 == vec2);
+        // Check commutivity
+        REQUIRE(vec2 == vec1);
+        // Check copy constructor.
+        PrimitiveVector vec3 = vec1;
+        REQUIRE(vec1 == vec3);
+        // Check strange corner-case where 1 vec has more dims set than another.
+        PrimitiveVector vec4 = vec1;
+        vec4.set_dim_val(10, 100.f);
+        REQUIRE(!(vec4 == vec1));
+        REQUIRE(!(vec1 == vec4));
+
+        // Inequality:
+        // Set vec2 to not be equal
+        vec2.set_dim_val(0, 3.f);
+        REQUIRE(!(vec1 == vec2));
+        REQUIRE(vec1 != vec2);
+        REQUIRE(vec2 != vec1);
+        vec2.set_dim_val(0, 0.f);
+        vec2.set_dim_val(3, 3.f);
+        REQUIRE(!(vec1 == vec2));
+        REQUIRE(vec1 != vec2);
+        // Set a random dim to 0. By default all dims are 0.
+        vec2 = vec1;
+        vec2.set_dim_val(10, 0.f);
+        REQUIRE(vec1 == vec2);
+
+        // Accumulation:
+        vec1.clear();
+        REQUIRE(vec1 == PrimitiveVector());
+        vec1.set_dim_val(0, 0.f);
+        vec1.set_dim_val(1, 1.f);
+        vec1.set_dim_val(2, 2.f);
+        vec2.clear();
+        vec2.set_dim_val(0, 3.f);
+        vec2.set_dim_val(1, 4.f);
+        vec2.set_dim_val(2, 5.f);
+        vec1 += vec2;
+        PrimitiveVector res;
+        res.set_dim_val(0, 3.f);
+        res.set_dim_val(1, 5.f);
+        res.set_dim_val(2, 7.f);
+        REQUIRE(vec1 == res);
+        // accumulate different dims
+        vec1.clear();
+        vec1.set_dim_val(0, 10.f);
+        vec2.clear();
+        vec2.set_dim_val(1, 20.f);
+        vec1 += vec2;
+        REQUIRE(vec1.get_dim_val(0) == 10.f);
+        REQUIRE(vec1.get_dim_val(1) == 20.f);
+
+        // Subtraction:
+        vec1 -= vec2;
+        REQUIRE(vec1.get_dim_val(0) == 10.f);
+        REQUIRE(vec1.get_dim_val(1) == 0.f);
+        res = vec1;
+        res -= vec2;
+        REQUIRE(vec1 - vec2 == res);
+
+        // Element-wise multiplication:
+        vec1.clear();
+        vec1.set_dim_val(0, 0.f);
+        vec1.set_dim_val(1, 1.f);
+        vec1.set_dim_val(2, 2.f);
+        vec1 *= 2.f;
+        REQUIRE(vec1.get_dim_val(0) == 0.f);
+        REQUIRE(vec1.get_dim_val(1) == 2.f);
+        REQUIRE(vec1.get_dim_val(2) == 4.f);
+    }
+    SECTION("Test comparitors") {
+        PrimitiveVector vec1, vec2;
+        // empty vector.
+        vec2.set_dim_val(0, 10.f);
+        vec2.set_dim_val(1, 20.f);
+        REQUIRE(vec1 < vec2);
+        // 1D case.
+        vec1.clear();
+        vec2.clear();
+        vec1.set_dim_val(0, 1.f);
+        vec2.set_dim_val(0, 2.f);
+        REQUIRE(vec1 < vec2);
+        vec1.set_dim_val(0, 2.f);
+        REQUIRE(!(vec1 < vec2));
+        vec1.set_dim_val(0, 3.f);
+        REQUIRE(!(vec1 < vec2));
+        // 2D case.
+        vec1.clear();
+        vec2.clear();
+        vec1.set_dim_val(0, 1.f);
+        vec1.set_dim_val(1, 1.f);
+        vec2.set_dim_val(0, 2.f);
+        vec2.set_dim_val(1, 2.f);
+        REQUIRE(vec1 < vec2);
+        // NOTE: This is somewhat special. Since 1 dimension is less for vec1
+        //       it should still be less.
+        vec1.set_dim_val(0, 3.f);
+        REQUIRE(vec1 < vec2);
+        vec1.set_dim_val(1, 3.f);
+        REQUIRE(!(vec1 < vec2));
+    }
+    SECTION("Test methods") {
+        PrimitiveVector vec1;
+        // is_zero:
+        // The default vector is zero.
+        REQUIRE(vec1.is_zero());
+        // Setting an element of the zero-vector to 0 is still a zero vector.
+        vec1.set_dim_val(0, 0.f);
+        REQUIRE(vec1.is_zero());
+        vec1.set_dim_val(42, 0.f);
+        REQUIRE(vec1.is_zero());
+        vec1.set_dim_val(42, 1.f);
+        REQUIRE(!vec1.is_zero());
+        REQUIRE(vec1.is_non_zero());
+        vec1.set_dim_val(42, 0.f);
+        REQUIRE(vec1.is_zero());
+        REQUIRE(!vec1.is_non_zero());
+
+        // relu:
+        vec1.clear();
+        // Relu of the zero vector is still the zero vector.
+        vec1.relu();
+        REQUIRE(vec1.is_zero());
+        // Relu of a negative vector is the zero vector.
+        vec1.set_dim_val(0, -1.f);
+        vec1.set_dim_val(1, -2.f);
+        vec1.relu();
+        REQUIRE(vec1.is_zero());
+        // Relu of a positive vector is the same vector.
+        vec1.set_dim_val(0, 1.f);
+        vec1.set_dim_val(1, 2.f);
+        PrimitiveVector vec2 = vec1;
+        vec1.relu();
+        REQUIRE(vec1 == vec2);
+        // Standard Relu test.
+        vec1.set_dim_val(0, 1.f);
+        vec1.set_dim_val(1, 0.f);
+        vec1.set_dim_val(2, -4.f);
+        vec1.set_dim_val(3, 2.f);
+        vec1.set_dim_val(4, -5.f);
+        vec2 = vec1;
+        vec1.relu();
+        vec2.set_dim_val(2, 0.f);
+        vec2.set_dim_val(4, 0.f);
+        REQUIRE(vec1 == vec2);
+
+        // is_non_negative:
+        vec1.clear();
+        // The zero vector is non-negative.
+        REQUIRE(vec1.is_non_negative());
+        vec1.set_dim_val(0, 0.f);
+        REQUIRE(vec1.is_non_negative());
+        // Postive vector is non-negative
+        vec1.set_dim_val(0, 1.f);
+        REQUIRE(vec1.is_non_negative());
+        vec1.set_dim_val(1, 2.f);
+        REQUIRE(vec1.is_non_negative());
+        // Negative vector is negative.
+        vec2.clear();
+        vec2.set_dim_val(0, -1.f);
+        REQUIRE(!vec2.is_non_negative());
+        vec2.set_dim_val(1, -2.f);
+        REQUIRE(!vec2.is_non_negative());
+        // Mixed positive and negative vector is not non-negative.
+        vec2.set_dim_val(1, 2.f);
+        REQUIRE(!vec2.is_non_negative());
+        vec2.set_dim_val(0, 1.f);
+        REQUIRE(vec1.is_non_negative());
+
+        // manhattan_norm:
+        vec1.clear();
+        // Manhatten norm of the zero vector is zero.
+        REQUIRE(vec1.manhattan_norm() == 0.f);
+        // Manhatten norm of a non-negative vector is the sum of its dims.
+        vec1.set_dim_val(0, 1.f);
+        REQUIRE(vec1.manhattan_norm() == 1.f);
+        vec1.set_dim_val(1, 2.f);
+        vec1.set_dim_val(2, 3.f);
+        vec1.set_dim_val(3, 4.f);
+        vec1.set_dim_val(4, 5.f);
+        REQUIRE(vec1.manhattan_norm() == 15.f);
+        // Manhatten norm of a negative vector is the sum of the absolute value
+        // of its dims.
+        vec2 = vec1;
+        vec2 *= -1.f;
+        REQUIRE(vec2.manhattan_norm() == vec1.manhattan_norm());
+
+        // Projection:
+        // Basic example:
+        vec1.clear();
+        vec1.set_dim_val(0, 12.f);
+        vec1.set_dim_val(1, 32.f);
+        vec1.set_dim_val(2, 8.f);
+        vec1.set_dim_val(3, 2.f);
+        vec2.clear();
+        vec2.set_dim_val(0, 2.f);
+        vec2.set_dim_val(2, 2.f);
+        vec1.project(vec2);
+        PrimitiveVector res;
+        res.set_dim_val(0, 12.f);
+        res.set_dim_val(2, 8.f);
+        REQUIRE(vec1 == res);
+        // Projecting onto the same vector again should give the same answer.
+        vec1.project(vec2);
+        REQUIRE(vec1 == res);
+        // Projecting onto the same dimensions should not change the vector.
+        vec1.clear();
+        vec1.set_dim_val(0, 1.f);
+        vec1.set_dim_val(1, 2.f);
+        vec2.clear();
+        vec2.set_dim_val(0, 3.f);
+        vec2.set_dim_val(1, 4.f);
+        res = vec1;
+        vec1.project(vec2);
+        REQUIRE(vec1 == res);
+        // Projecting onto higher dimensions should not change the vector.
+        vec2.set_dim_val(2, 5.f);
+        res = vec1;
+        vec1.project(vec2);
+        REQUIRE(vec1 == res);
+
+        // Max of two vectors:
+        // The max of the zero vectors is the zero vector.
+        vec1.clear();
+        vec2.clear();
+        res = PrimitiveVector::max(vec1, vec2);
+        REQUIRE(res.is_zero());
+        // The max of a non-negative vector with the zero vector is the non-
+        // negative vector.
+        vec1.set_dim_val(0, 1.f);
+        res = PrimitiveVector::max(vec1, vec2);
+        REQUIRE(res == vec1);
+        res = PrimitiveVector::max(vec2, vec1);
+        REQUIRE(res == vec1);
+        // The max of a negative vector with the zero vector is the zero vector.
+        vec1.set_dim_val(0, -1.f);
+        res = PrimitiveVector::max(vec1, vec2);
+        REQUIRE(res.is_zero());
+        // Basic test:
+        // max(<5, 9, 0>, <3, 10, -2>) = <5, 10, 0>
+        vec1.clear();
+        vec1.set_dim_val(0, 5.f);
+        vec1.set_dim_val(1, 9.f);
+        vec1.set_dim_val(2, 0.f);
+        vec2.clear();
+        vec2.set_dim_val(0, 3.f);
+        vec2.set_dim_val(1, 10.f);
+        vec2.set_dim_val(2, -2.f);
+        PrimitiveVector golden;
+        golden.set_dim_val(0, 5.f);
+        golden.set_dim_val(1, 10.f);
+        golden.set_dim_val(2, 0.f);
+        res = PrimitiveVector::max(vec1, vec2);
+        REQUIRE(res == golden);
+        res = PrimitiveVector::max(vec2, vec1);
+        REQUIRE(res == golden);
+    }
+}
+
+} // namespace
+
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt
deleted file mode 100644
index 8d0f14aa789..00000000000
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	ap_mem	ap_time	ap_full_legalizer_mem	ap_full_legalizer_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_num_rr_graph_nodes	crit_path_num_rr_graph_edges	crit_path_collapsed_nodes	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_create_rr_graph_time	crit_path_create_intra_cluster_rr_graph_time	crit_path_tile_lookahead_computation_time	crit_path_router_lookahead_computation_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	
-fixed_k6_frac_N8_22nm.xml	ch_intrinsics.v	common	6.00	vpr	72.12 MiB		-1	-1	0.45	18396	3	0.09	-1	-1	33188	-1	-1	34	99	1	0	success	v8.0.0-11429-g78275509a-dirty	release VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-02T13:22:58	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	73856	99	130	240	229	1	238	264	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	72.1 MiB	0.23	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	72.1 MiB	0.23	72.1 MiB	0.23	34	2886	15	6.79088e+06	1.00605e+06	618332.	2139.56	3.18	0.448255	0.41129	25102	150614	-1	2722	13	619	970	98287	23397	2.47058	2.47058	-148.551	-2.47058	0	0	787024.	2723.27	0.24	0.09	0.22	-1	-1	0.24	0.0502149	0.0464303	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt
index 2b004ff8d4a..b34357d3497 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt
@@ -1,5 +1,5 @@
 arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	ap_mem	ap_time	ap_full_legalizer_mem	ap_full_legalizer_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_num_rr_graph_nodes	crit_path_num_rr_graph_edges	crit_path_collapsed_nodes	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_create_rr_graph_time	crit_path_create_intra_cluster_rr_graph_time	crit_path_tile_lookahead_computation_time	crit_path_router_lookahead_computation_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	
-fixed_k6_frac_N8_22nm.xml	single_wire.v	common	1.86	vpr	70.71 MiB		-1	-1	0.14	16260	1	0.02	-1	-1	29996	-1	-1	0	1	0	0	success	v8.0.0-11569-g4abbff8da	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-17T15:58:41	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	72404	1	1	0	2	0	1	2	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	70.7 MiB	0.09	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	70.7 MiB	0.09	70.7 MiB	0.09	8	14	1	6.79088e+06	0	166176.	575.005	0.37	0.00145994	0.0013718	20206	45088	-1	19	1	1	1	194	45	0.7726	nan	-0.7726	-0.7726	0	0	202963.	702.294	0.11	0.00	0.08	-1	-1	0.11	0.00115987	0.00113118	
-fixed_k6_frac_N8_22nm.xml	single_ff.v	common	2.32	vpr	70.91 MiB		-1	-1	0.12	16324	1	0.02	-1	-1	29972	-1	-1	1	2	0	0	success	v8.0.0-11569-g4abbff8da	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-17T15:58:41	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	72608	2	1	3	3	1	3	4	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	70.9 MiB	0.09	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	70.9 MiB	0.09	70.9 MiB	0.09	20	32	1	6.79088e+06	13472	414966.	1435.87	0.65	0.0013976	0.00133449	22510	95286	-1	40	1	2	2	394	99	1.06752	1.06752	-2.06486	-1.06752	0	0	503264.	1741.40	0.20	0.00	0.16	-1	-1	0.20	0.0013263	0.00128456	
-fixed_k6_frac_N8_22nm.xml	ch_intrinsics.v	common	6.78	vpr	71.84 MiB		-1	-1	0.48	18336	3	0.11	-1	-1	33188	-1	-1	34	99	1	0	success	v8.0.0-11569-g4abbff8da	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-17T15:58:41	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	73568	99	130	240	229	1	238	264	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	71.8 MiB	0.27	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	71.8 MiB	0.27	71.8 MiB	0.27	34	2792	13	6.79088e+06	1.00605e+06	618332.	2139.56	3.94	0.383735	0.349796	25102	150614	-1	2694	15	616	987	97889	23015	2.47058	2.47058	-150.612	-2.47058	0	0	787024.	2723.27	0.27	0.11	0.23	-1	-1	0.27	0.0610859	0.0560772	
-fixed_k6_frac_N8_22nm.xml	diffeq1.v	common	24.21	vpr	74.00 MiB		-1	-1	0.75	22884	15	0.37	-1	-1	34280	-1	-1	55	162	0	5	success	v8.0.0-11569-g4abbff8da	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-17T15:58:41	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	75780	162	96	817	258	1	775	318	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	74.0 MiB	0.68	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	74.0 MiB	0.68	74.0 MiB	0.66	72	17155	27	6.79088e+06	2.72096e+06	1.19926e+06	4149.71	18.39	3.30386	3.12634	32302	307853	-1	15386	19	3657	8928	1435723	311083	21.8615	21.8615	-1810.62	-21.8615	0	0	1.50317e+06	5201.28	0.50	0.73	0.55	-1	-1	0.50	0.298787	0.283438	
+fixed_k6_frac_N8_22nm.xml	single_wire.v	common	4.25	vpr	70.91 MiB		-1	-1	0.18	16276	1	0.39	-1	-1	29812	-1	-1	0	1	0	0	success	v8.0.0-11571-g5eb3aa508	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-18T00:28:35	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	72608	1	1	0	2	0	1	2	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	70.9 MiB	0.14	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	70.9 MiB	0.14	70.9 MiB	0.10	8	14	1	6.79088e+06	0	166176.	575.005	0.36	0.00138004	0.00129992	20206	45088	-1	19	1	1	1	194	45	0.7726	nan	-0.7726	-0.7726	0	0	202963.	702.294	0.09	0.00	0.07	-1	-1	0.09	0.00122838	0.00119736	
+fixed_k6_frac_N8_22nm.xml	single_ff.v	common	4.68	vpr	71.03 MiB		-1	-1	0.20	16236	1	0.39	-1	-1	29696	-1	-1	1	2	0	0	success	v8.0.0-11571-g5eb3aa508	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-18T00:28:35	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	72736	2	1	3	3	1	3	4	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	71.0 MiB	0.14	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	71.0 MiB	0.14	71.0 MiB	0.10	20	31	1	6.79088e+06	13472	414966.	1435.87	0.63	0.00135413	0.0012936	22510	95286	-1	35	1	2	2	213	52	0.942216	0.942216	-1.68896	-0.942216	0	0	503264.	1741.40	0.17	0.00	0.14	-1	-1	0.17	0.00127341	0.00123431	
+fixed_k6_frac_N8_22nm.xml	ch_intrinsics.v	common	6.69	vpr	71.67 MiB		-1	-1	0.46	18220	3	0.40	-1	-1	33084	-1	-1	40	99	3	0	success	v8.0.0-11571-g5eb3aa508	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-18T00:28:35	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	73388	99	130	240	229	1	247	272	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	71.7 MiB	0.28	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	71.7 MiB	0.28	71.7 MiB	0.21	32	3122	15	6.79088e+06	2.18288e+06	586450.	2029.24	1.84	0.271358	0.247517	24814	144142	-1	2952	30	711	1121	349988	188928	2.0466	2.0466	-154.346	-2.0466	-0.04337	-0.04337	744469.	2576.02	0.25	0.25	0.22	-1	-1	0.25	0.102379	0.0937273	
+fixed_k6_frac_N8_22nm.xml	diffeq1.v	common	32.17	vpr	74.24 MiB		-1	-1	0.75	23104	15	0.61	-1	-1	34204	-1	-1	74	162	0	5	success	v8.0.0-11571-g5eb3aa508	release VTR_ASSERT_LEVEL=3	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-18T00:28:35	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	76024	162	96	817	258	1	797	337	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	74.2 MiB	1.01	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	74.2 MiB	1.01	74.2 MiB	0.60	60	15916	46	6.79088e+06	2.97693e+06	1.01997e+06	3529.29	24.68	3.56948	3.3772	29998	257685	-1	13617	19	4413	11558	1499556	342325	21.9657	21.9657	-1806.56	-21.9657	0	0	1.27783e+06	4421.56	0.40	0.79	0.44	-1	-1	0.40	0.334496	0.31821	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt
deleted file mode 100644
index 445629f9b23..00000000000
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	ap_mem	ap_time	ap_full_legalizer_mem	ap_full_legalizer_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_num_rr_graph_nodes	crit_path_num_rr_graph_edges	crit_path_collapsed_nodes	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_create_rr_graph_time	crit_path_create_intra_cluster_rr_graph_time	crit_path_tile_lookahead_computation_time	crit_path_router_lookahead_computation_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	
-fixed_k6_frac_N8_22nm.xml	diffeq1.v	common	20.71	vpr	73.80 MiB		-1	-1	0.73	23416	15	0.36	-1	-1	34412	-1	-1	55	162	0	5	success	v8.0.0-11429-g78275509a-dirty	release VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-10-02T13:22:58	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	75572	162	96	817	258	1	775	318	17	17	289	-1	unnamed_device	-1	-1	-1	-1	-1	-1	-1	73.8 MiB	0.66	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	73.8 MiB	0.66	73.8 MiB	0.65	78	16251	19	6.79088e+06	2.72096e+06	1.27783e+06	4421.56	14.97	2.38249	2.26406	33454	332105	-1	15052	16	3634	8698	1424645	305832	21.8615	21.8615	-1844.65	-21.8615	0	0	1.60349e+06	5548.42	0.47	0.67	0.58	-1	-1	0.47	0.264426	0.252264