From 919b13eeda01859abb2c9b73c0b63e6a06e2446e Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 30 Nov 2022 09:47:21 -0500
Subject: [PATCH 001/188] Bringing back iterative improvements (1)

This reverts commit 8463ea87292d3c38a5227fcbb07670dcbf60b119.
---
 vpr/src/pack/pack.cpp | 1 +
 1 file changed, 1 insertion(+)
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index a1868c80778..f36693f0111 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -24,6 +24,7 @@
 #include "SetupGrid.h"
 #include "re_cluster.h"
 
+#include "pack_utils.h"
 /* #define DUMP_PB_GRAPH 1 */
 /* #define DUMP_BLIF_INPUT 1 */
 

From 9845d06c91fc91c8a2d292d79b6b387986535205 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 30 Nov 2022 09:48:08 -0500
Subject: [PATCH 002/188] Bringing back iterative improvements (2)

This reverts commit 449477e4570439067df62b3d9bccf0ddee43d303.
---
 vpr/src/pack/improvement/pack_move_utils.cpp  | 460 ++++++++++++++++++
 vpr/src/pack/improvement/pack_move_utils.h    |  42 ++
 vpr/src/pack/improvement/pack_utils.cpp       | 147 ++++++
 vpr/src/pack/improvement/pack_utils.h         |  20 +
 .../improvement/packing_move_generator.cpp    | 175 +++++++
 .../pack/improvement/packing_move_generator.h |  59 +++
 6 files changed, 903 insertions(+)
 create mode 100644 vpr/src/pack/improvement/pack_move_utils.cpp
 create mode 100644 vpr/src/pack/improvement/pack_move_utils.h
 create mode 100644 vpr/src/pack/improvement/pack_utils.cpp
 create mode 100644 vpr/src/pack/improvement/pack_utils.h
 create mode 100644 vpr/src/pack/improvement/packing_move_generator.cpp
 create mode 100644 vpr/src/pack/improvement/packing_move_generator.h

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
new file mode 100644
index 00000000000..c98115082fe
--- /dev/null
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -0,0 +1,460 @@
+//
+// Created by elgammal on 2022-09-13.
+//
+#include "pack_move_utils.h"
+#include "globals.h"
+#include "cluster_placement.h"
+#include "re_cluster_util.h"
+#include <string>
+
+static void calculate_connected_clbs_to_moving_mol(const t_pack_molecule* mol_1, std::vector<ClusterBlockId>& connected_blocks);
+#if 0
+static void check_net_absorption(const AtomNetId& atom_net_id,
+                          const ClusterBlockId & new_clb,
+                          std::map<AtomNetId, int> direct_connections,
+                          bool& previously_absorbed,
+                          bool& newly_absorbed);
+
+static void update_cutsize_for_net(int& new_cutsize,
+                           bool previously_absorbed,
+                           bool newly_absorbed);
+#endif
+
+#if 0
+int calculate_cutsize_of_clb(ClusterBlockId clb_index) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    //Define the initial conditions
+    int num_unabsorbed = 0;
+
+    //list the atoms inside the current cluster
+    for (auto& pin_id : cluster_ctx.clb_nlist.block_pins(clb_index)) {
+        if (cluster_ctx.clb_nlist.pin_net(pin_id) != ClusterNetId::INVALID()) {
+            ++num_unabsorbed;
+        }
+    }
+    return num_unabsorbed;
+}
+#endif
+
+int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    // initialize the old and new cut sizes
+    int change_cutsize = 0;
+
+    // define some temporary
+    AtomBlockId cur_atom;
+    ClusterBlockId cur_clb;
+    std::set<ClusterBlockId> net_blocks;
+    std::map<AtomNetId, int> nets_between_old_new_blks;
+
+    for (auto& new_loc : new_locs) {
+        ClusterBlockId new_block_id = new_loc.new_clb;
+        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
+
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (!moving_atom)
+                continue;
+            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
+                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
+                    continue;
+
+                net_blocks.clear();
+                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
+                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
+                    if (cur_atom == moving_atom)
+                        continue;
+
+                    cur_clb = atom_to_cluster(cur_atom);
+                    net_blocks.insert(cur_clb);
+                }
+                if (net_blocks.size() == 1 && *(net_blocks.begin()) == old_block_id)
+                    change_cutsize += 1;
+                else if (net_blocks.size() == 1 && *(net_blocks.begin()) == new_block_id) {
+                    change_cutsize -= 1;
+                    if (nets_between_old_new_blks.find(atom_net) == nets_between_old_new_blks.end())
+                        nets_between_old_new_blks.insert(std::make_pair(atom_net, 1));
+                    else
+                        nets_between_old_new_blks[atom_net]++;
+                }
+            }
+        }
+    }
+
+    for (auto& direct_conn : nets_between_old_new_blks) {
+        if (direct_conn.second > 1)
+            change_cutsize += 2;
+    }
+    return change_cutsize;
+}
+
+#if 0
+int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
+                                        int original_cutsize) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    int new_cutsize = original_cutsize;
+    std::map<AtomNetId, int> direct_connections;
+
+    //iterate over the molecules that are moving
+    for(auto new_loc : new_locs) {
+        //iterate over the atom of a molecule
+        for (int i_atom = 0; i_atom < new_loc.molecule_size; i_atom++) {
+            if (new_loc.molecule_to_move->atom_block_ids[i_atom]) {
+                //iterate over the moving atom pins
+                for (auto& pin_id : atom_ctx.nlist.block_pins(new_loc.molecule_to_move->atom_block_ids[i_atom])) {
+                    AtomNetId atom_net_id = atom_ctx.nlist.pin_net(pin_id);
+
+                    //if this pin is connected to a net
+                    if (atom_net_id) {
+                        ClusterPinId cluster_pin;
+                        bool previously_absorbed, newly_absorbed;
+
+                        //check the status of this net (absorbed or not) before and after the proposed move
+                        check_net_absorption(atom_net_id,
+                                             new_loc.new_clb,
+                                             direct_connections,
+                                             previously_absorbed,
+                                             newly_absorbed);
+
+                        //update the cutsize based on the absorption of a net before and after the move
+                        update_cutsize_for_net(new_cutsize,
+                                               previously_absorbed,
+                                               newly_absorbed);
+                    }
+                }
+            }
+        }
+    }
+
+    /* consider the case of swapping two atoms that are directly connected
+     *
+     * In this case, the algorithm will minimize the cutsize by one when iterating over the first atom pins and minimize it again
+     * when iterating over the 2nd atom pins. However, the cutsize should remain the same. Hence,
+     * We are increasing the cutsize by 2 for this specific case
+     */
+    for(auto& direct_conn: direct_connections) {
+        if(direct_conn.second > 1) {
+            new_cutsize += 2;
+        }
+    }
+    return new_cutsize;
+}
+#endif
+
+t_pack_molecule* pick_molecule_randomly() {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    bool molecule_picked = false;
+    t_pack_molecule* molecule;
+
+    while (!molecule_picked) {
+        int rand_num = vtr::irand((int)atom_ctx.nlist.blocks().size() - 1);
+        AtomBlockId random_atom = AtomBlockId(rand_num);
+        ClusterBlockId clb_index = atom_to_cluster(random_atom);
+        if (!clb_index)
+            continue;
+        packing_multithreading_ctx.mu.lock();
+        if (!packing_multithreading_ctx.clb_in_flight[clb_index]) {
+            packing_multithreading_ctx.clb_in_flight[clb_index] = true;
+            packing_multithreading_ctx.mu.unlock();
+        } else {
+            packing_multithreading_ctx.mu.unlock();
+            continue;
+        }
+        auto rng = atom_ctx.atom_molecules.equal_range(random_atom);
+        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
+            molecule = kv.second;
+            molecule_picked = true;
+            break;
+        }
+    }
+    return molecule;
+}
+
+bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    std::vector<ClusterBlockId> connected_blocks;
+    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
+    if (connected_blocks.empty())
+        return false;
+
+    // pick a random clb block from the connected blocks
+    bool clb2_not_found = true;
+    ClusterBlockId clb_index_2;
+    int iteration = 0;
+    while (clb2_not_found && iteration < 10) {
+        int rand_num = vtr::irand((int)connected_blocks.size() - 1);
+        clb_index_2 = connected_blocks[rand_num];
+        packing_multithreading_ctx.mu.lock();
+        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+            clb2_not_found = false;
+            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
+        }
+        packing_multithreading_ctx.mu.unlock();
+        iteration++;
+    }
+
+    if (clb2_not_found)
+        return false;
+
+    //pick a random molecule for the chosen block
+    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
+
+    int rand_num = vtr::irand((int)atom_ids->size() - 1);
+    auto it = atom_ids->begin();
+    std::advance(it, rand_num);
+    AtomBlockId atom_id = *it;
+    auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
+    for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
+        mol_2 = kv.second;
+        return true;
+    }
+    packing_multithreading_ctx.mu.lock();
+    packing_multithreading_ctx.clb_in_flight[clb_index_2] = false;
+    packing_multithreading_ctx.mu.unlock();
+    return false;
+}
+
+bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    std::vector<ClusterBlockId> connected_blocks;
+    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
+    if (connected_blocks.empty())
+        return false;
+
+    // pick a random clb block from the connected blocks
+    bool clb2_not_found = true;
+    ClusterBlockId clb_index_2;
+    while (clb2_not_found) {
+        clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
+        packing_multithreading_ctx.mu.lock();
+        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+            clb2_not_found = false;
+            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
+        }
+        packing_multithreading_ctx.mu.unlock();
+    }
+
+    //pick a random molecule for the chosen block
+    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
+    int iteration = 0;
+    const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
+    do {
+        int rand_num = vtr::irand((int)atom_ids->size() - 1);
+        auto it = atom_ids->begin();
+        std::advance(it, rand_num);
+        AtomBlockId atom_id = *it;
+        auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
+        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
+            mol_2 = kv.second;
+            const t_pb* pb_2 = atom_ctx.lookup.atom_pb(mol_2->atom_block_ids[mol_2->root]);
+            if (strcmp(pb_1->pb_graph_node->pb_type->name, pb_2->pb_graph_node->pb_type->name) == 0)
+                return true;
+            else
+                iteration++;
+        }
+    } while (iteration < 20);
+
+    return false;
+}
+
+bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    std::vector<ClusterBlockId> connected_blocks;
+    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
+    if (connected_blocks.empty())
+        return false;
+
+    // pick a random clb block from the connected blocks
+    bool clb2_not_found = true;
+    ClusterBlockId clb_index_2;
+    while (clb2_not_found) {
+        clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
+        packing_multithreading_ctx.mu.lock();
+        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+            clb2_not_found = false;
+            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
+        }
+        packing_multithreading_ctx.mu.unlock();
+    }
+
+    //pick a random molecule for the chosen block
+    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
+    int iteration = 0;
+    const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
+    do {
+        int rand_num = vtr::irand((int)atom_ids->size() - 1);
+        auto it = atom_ids->begin();
+        std::advance(it, rand_num);
+        AtomBlockId atom_id = *it;
+        auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
+        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
+            mol_2 = kv.second;
+            const t_pb* pb_2 = atom_ctx.lookup.atom_pb(mol_2->atom_block_ids[mol_2->root]);
+            if (pb_1->pb_graph_node->pb_type == pb_2->pb_graph_node->pb_type)
+                return true;
+            else
+                iteration++;
+        }
+    } while (iteration < 20);
+
+    return false;
+}
+
+bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    std::vector<ClusterBlockId> connected_blocks;
+    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
+    if (connected_blocks.empty())
+        return false;
+
+    int mol_1_size = get_array_size_of_molecule(mol_1);
+
+    // pick a random clb block from the connected blocks
+    bool clb2_not_found = true;
+    ClusterBlockId clb_index_2;
+    while (clb2_not_found) {
+        clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
+        packing_multithreading_ctx.mu.lock();
+        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+            clb2_not_found = false;
+            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
+        }
+        packing_multithreading_ctx.mu.unlock();
+    }
+
+    //pick a random molecule for the chosen block
+    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
+    int iteration = 0;
+    do {
+        int rand_num = vtr::irand((int)atom_ids->size() - 1);
+        auto it = atom_ids->begin();
+        std::advance(it, rand_num);
+        AtomBlockId atom_id = *it;
+        auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
+        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
+            mol_2 = kv.second;
+            if (std::abs(mol_1_size - get_array_size_of_molecule(mol_2)) <= 1)
+                return true;
+            else
+                iteration++;
+        }
+    } while (iteration < 20);
+
+    return false;
+}
+
+void build_mol_move_description(std::vector<molMoveDescription>& new_locs,
+                                t_pack_molecule* mol_1,
+                                ClusterBlockId clb_index_1,
+                                t_pack_molecule* mol_2,
+                                ClusterBlockId clb_index_2) {
+    molMoveDescription temp;
+    temp.molecule_to_move = mol_1;
+    temp.new_clb = clb_index_2;
+    temp.molecule_size = get_array_size_of_molecule(mol_1);
+    new_locs.push_back(temp);
+
+    temp.molecule_to_move = mol_2;
+    temp.new_clb = clb_index_1;
+    temp.molecule_size = get_array_size_of_molecule(mol_2);
+    new_locs.push_back(temp);
+}
+
+bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_locs) {
+    int change_in_cutsize = calculate_cutsize_change(new_locs);
+    if (change_in_cutsize < 0)
+        return true;
+    else
+        return false;
+}
+/********* static functions ************/
+/***************************************/
+#if 0
+static void check_net_absorption(const AtomNetId& atom_net_id,
+                          const ClusterBlockId & new_clb,
+                          std::map<AtomNetId, int> direct_connections,
+                          bool& previously_absorbed,
+                          bool& newly_absorbed) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    //check the status of the atom net before the move (absorbed or not)
+    ClusterNetId clb_net_id = atom_ctx.lookup.clb_net(atom_net_id);
+    if(clb_net_id == ClusterNetId::INVALID()) {
+        previously_absorbed = true;
+    } else {
+        previously_absorbed = false;
+    }
+
+    //check the status of the atom net after the move (absorbed or not)
+    newly_absorbed = true;
+    AtomBlockId  atom_block_id;
+    ClusterBlockId  clb_index;
+    for(auto& net_pin_id : atom_ctx.nlist.net_pins(atom_net_id)) {
+        atom_block_id = atom_ctx.nlist.pin_block(net_pin_id);
+        clb_index = atom_ctx.lookup.atom_clb(atom_block_id);
+        if(clb_index == new_clb) {
+            if(direct_connections.find(atom_net_id) == direct_connections.end()) {
+                direct_connections.insert(std::make_pair(atom_net_id, 1));
+            } else {
+                ++direct_connections[atom_net_id];
+            }
+        }
+        if(clb_index != new_clb) {
+            newly_absorbed = false;
+            break;
+        }
+    }
+}
+static void update_cutsize_for_net(int& new_cutsize, bool previously_absorbed, bool newly_absorbed) {
+    if(previously_absorbed && !newly_absorbed) {
+        new_cutsize++;
+    } else if(!previously_absorbed && newly_absorbed) {
+        new_cutsize--;
+    }
+}
+#endif
+
+static void calculate_connected_clbs_to_moving_mol(const t_pack_molecule* mol_1, std::vector<ClusterBlockId>& connected_blocks) {
+    // get the clb index of the first molecule
+    ClusterBlockId clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    t_logical_block_type_ptr block_type_1 = cluster_ctx.clb_nlist.block_type(clb_index_1);
+    t_logical_block_type_ptr block_type_2;
+
+    AtomNetId cur_net;
+    AtomBlockId cur_atom;
+    ClusterBlockId cur_clb;
+
+    // Calculate the connected blocks to the moving molecule
+    for (auto& atom_id : mol_1->atom_block_ids) {
+        if (atom_id) {
+            for (auto& atom_pin : atom_ctx.nlist.block_pins(atom_id)) {
+                cur_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (atom_ctx.nlist.net_pins(cur_net).size() > LARGE_FANOUT_LIMIT)
+                    continue;
+                for (auto& net_pin : atom_ctx.nlist.net_pins(cur_net)) {
+                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
+                    cur_clb = atom_to_cluster(cur_atom);
+                    block_type_2 = cluster_ctx.clb_nlist.block_type(cur_clb);
+                    if (cur_clb != clb_index_1 && block_type_1 == block_type_2)
+                        connected_blocks.push_back(cur_clb);
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
new file mode 100644
index 00000000000..a597c9966bf
--- /dev/null
+++ b/vpr/src/pack/improvement/pack_move_utils.h
@@ -0,0 +1,42 @@
+//
+// Created by elgammal on 2022-09-13.
+//
+
+#ifndef VTR_PACK_MOVE_UTILS_H
+#define VTR_PACK_MOVE_UTILS_H
+
+#include "vpr_types.h"
+
+//#define pack_improve_debug
+
+const int LARGE_FANOUT_LIMIT = 5;
+
+struct molMoveDescription {
+    t_pack_molecule* molecule_to_move = nullptr;
+    int molecule_size = 0;
+    ClusterBlockId new_clb = INVALID_BLOCK_ID;
+};
+
+t_pack_molecule* pick_molecule_randomly();
+bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
+bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
+bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
+bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
+
+void build_mol_move_description(std::vector<molMoveDescription>& new_locs,
+                                t_pack_molecule* mol_1,
+                                ClusterBlockId clb_index_1,
+                                t_pack_molecule* mol_2,
+                                ClusterBlockId clb_index_2);
+
+bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_locs);
+
+int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
+
+#if 0
+int calculate_cutsize_of_clb(ClusterBlockId clb_index);
+int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
+                                        int original_cutsize);
+#endif
+
+#endif //VTR_PACK_MOVE_UTILS_H
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
new file mode 100644
index 00000000000..ed04abc2ea8
--- /dev/null
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -0,0 +1,147 @@
+//
+// Created by elgammal on 2022-07-27.
+//
+
+#include "pack_utils.h"
+#include "re_cluster.h"
+#include "re_cluster_util.h"
+#include "globals.h"
+#include "clustered_netlist_fwd.h"
+#include "move_utils.h"
+#include "cluster_placement.h"
+#include "packing_move_generator.h"
+#include "pack_move_utils.h"
+#include "string.h"
+#include "vtr_time.h"
+//#include <mutex>
+#include <thread>
+void try_n_packing_moves(int n, std::string move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
+void init_multithreading_locks();
+
+std::mutex apply_mu;
+
+void init_multithreading_locks() {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+    auto& helper_ctx = g_vpr_ctx.cl_helper();
+
+    packing_multithreading_ctx.mu.lock();
+    packing_multithreading_ctx.clb_in_flight.resize(helper_ctx.total_clb_num, false);
+    packing_multithreading_ctx.mu.unlock();
+}
+
+void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>>& atoms_lookup) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+#ifdef pack_improve_debug
+    vtr::ScopedFinishTimer lookup_timer("Building CLB atoms lookup");
+#endif
+
+    atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size());
+
+    for (auto atom_blk_id : atom_ctx.nlist.blocks()) {
+        ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id);
+
+        atoms_lookup[clb_index].insert(atom_blk_id);
+    }
+}
+
+void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_data& clustering_data, int) {
+    /*
+     * auto& cluster_ctx = g_vpr_ctx.clustering();
+     * auto& atom_ctx = g_vpr_ctx.atom();
+     */
+    t_pack_iterative_stats pack_stats;
+
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+    init_clb_atoms_lookup(helper_ctx.atoms_lookup);
+
+#ifdef pack_improve_debug
+    float propose_sec = 0;
+    float evaluate_sec = 0;
+    float apply_suc_sec = 0;
+    float apply_fail_sec = 0;
+#endif
+
+    unsigned int total_num_moves = packer_opts.pack_num_moves;
+    //unsigned int num_threads = std::thread::hardware_concurrency();
+    const int num_threads = 1;
+    unsigned int moves_per_thread = total_num_moves / num_threads;
+    std::thread my_threads[num_threads];
+
+    init_multithreading_locks();
+
+    for (unsigned int i = 0; i < num_threads - 1; i++) {
+        my_threads[i] = std::thread(try_n_packing_moves, moves_per_thread, packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
+    }
+    my_threads[num_threads - 1] = std::thread(try_n_packing_moves, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
+
+    for (int i = 0; i < num_threads; i++)
+        my_threads[i].join();
+
+    VTR_LOG("\n### Iterative packing stats: \n\tpack move type = %s\n\ttotal pack moves = %zu\n\tgood pack moves = %zu\n\tlegal pack moves = %zu\n\n",
+            packer_opts.pack_move_type.c_str(),
+            packer_opts.pack_num_moves,
+            pack_stats.good_moves,
+            pack_stats.legal_moves);
+}
+
+void try_n_packing_moves(int n, std::string move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    bool is_proposed, is_valid, is_successful;
+    std::vector<molMoveDescription> new_locs;
+    int num_good_moves = 0;
+    int num_legal_moves = 0;
+
+    std::unique_ptr<packingMoveGenerator> move_generator;
+    if (strcmp(move_type.c_str(), "randomSwap") == 0)
+        move_generator = std::make_unique<randomPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypePackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypePackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizePackingSwap>();
+
+    else {
+        VTR_LOG("Packing move type (%s) is not correct!\n", move_type.c_str());
+        VTR_LOG("Packing iterative improvement is aborted\n");
+        return;
+    }
+
+    for (int i = 0; i < n; i++) {
+        new_locs.clear();
+        is_proposed = move_generator->propose_move(new_locs);
+        if (!is_proposed)
+            continue;
+
+        is_valid = move_generator->evaluate_move(new_locs);
+        if (!is_valid) {
+            packing_multithreading_ctx.mu.lock();
+            packing_multithreading_ctx.clb_in_flight[new_locs[0].new_clb] = false;
+            packing_multithreading_ctx.clb_in_flight[new_locs[1].new_clb] = false;
+            packing_multithreading_ctx.mu.unlock();
+            continue;
+        } else
+            num_good_moves++;
+
+        apply_mu.lock();
+        is_successful = move_generator->apply_move(new_locs, clustering_data);
+        apply_mu.unlock();
+        if (is_successful)
+            num_legal_moves++;
+
+        packing_multithreading_ctx.mu.lock();
+        packing_multithreading_ctx.clb_in_flight[new_locs[0].new_clb] = false;
+        packing_multithreading_ctx.clb_in_flight[new_locs[1].new_clb] = false;
+        packing_multithreading_ctx.mu.unlock();
+    }
+
+    pack_stats.mu.lock();
+    pack_stats.good_moves += num_good_moves;
+    pack_stats.legal_moves += num_legal_moves;
+    pack_stats.mu.unlock();
+}
diff --git a/vpr/src/pack/improvement/pack_utils.h b/vpr/src/pack/improvement/pack_utils.h
new file mode 100644
index 00000000000..17e25d5fa00
--- /dev/null
+++ b/vpr/src/pack/improvement/pack_utils.h
@@ -0,0 +1,20 @@
+//
+// Created by elgammal on 2022-07-27.
+//
+
+#ifndef VTR_PACK_UTILS_H
+#define VTR_PACK_UTILS_H
+#include "cluster_util.h"
+
+struct t_pack_iterative_stats {
+    int good_moves = 0;
+    int legal_moves = 0;
+    std::mutex mu;
+};
+void iteratively_improve_packing(const t_packer_opts& packer_opts,
+                                 t_clustering_data& clustering_data,
+                                 int verbosity);
+
+void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>>& atoms_lookup);
+
+#endif //VTR_PACK_UTILS_H
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
new file mode 100644
index 00000000000..98956bc8860
--- /dev/null
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -0,0 +1,175 @@
+//
+// Created by elgammal on 2022-07-28.
+//
+
+#include "packing_move_generator.h"
+#include "re_cluster.h"
+#include <string.h>
+#include "re_cluster_util.h"
+#include "pack_move_utils.h"
+
+const int MAX_ITERATIONS = 100;
+
+/******************* Packing move base class ************************/
+/********************************************************************/
+bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data) {
+    if (new_locs.size() == 1) {
+        //We need to move a molecule to an existing CLB
+        return (move_mol_to_existing_cluster(new_locs[0].molecule_to_move,
+                                             new_locs[1].new_clb,
+                                             true,
+                                             0,
+                                             clustering_data));
+    } else if (new_locs.size() == 2) {
+        //We need to swap two molecules
+        return (swap_two_molecules(new_locs[0].molecule_to_move,
+                                   new_locs[1].molecule_to_move,
+                                   true,
+                                   0,
+                                   clustering_data));
+    } else {
+        //We have a more complicated move (moving multiple molecules at once)
+        //TODO: This case is not supported yet
+        return false;
+    }
+}
+
+/****************** Random packing move class *******************/
+/****************************************************************/
+bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    t_pack_molecule *mol_1, *mol_2;
+    ClusterBlockId clb_index_1, clb_index_2;
+    t_logical_block_type_ptr block_type_1, block_type_2;
+    int iteration = 0;
+    bool found = false;
+
+    //pick the 1st molecule randomly
+    mol_1 = pick_molecule_randomly();
+    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
+    block_type_1 = cluster_ctx.clb_nlist.block_type(clb_index_1);
+
+    do {
+        mol_2 = pick_molecule_randomly();
+        clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
+        block_type_2 = cluster_ctx.clb_nlist.block_type(clb_index_2);
+        if (block_type_1 == block_type_2 && clb_index_1 != clb_index_2) {
+            found = true;
+            build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+        }
+        ++iteration;
+    } while (!found && iteration < MAX_ITERATIONS);
+
+    return found;
+}
+
+bool randomPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_cutsize(new_locs));
+}
+
+/***************** Quasi directed packing move class *******************/
+/***********************************************************************/
+bool quasiDirectedPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
+    t_pack_molecule *mol_1, *mol_2;
+    ClusterBlockId clb_index_1;
+
+    //pick the 1st molecule randomly
+    mol_1 = pick_molecule_randomly();
+    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
+
+    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
+    mol_2 = nullptr;
+    bool found = pick_molecule_connected(mol_1, mol_2);
+
+    if (found) {
+        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
+        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    } else {
+        packing_multithreading_ctx.mu.lock();
+        packing_multithreading_ctx.clb_in_flight[clb_index_1] = false;
+        packing_multithreading_ctx.mu.unlock();
+    }
+    return found;
+}
+
+bool quasiDirectedPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_cutsize(new_locs));
+}
+
+/***************** Quasi directed same type packing move class *******************/
+/*********************************************************************************/
+bool quasiDirectedSameTypePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_cutsize(new_locs));
+}
+
+bool quasiDirectedSameTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    t_pack_molecule *mol_1, *mol_2;
+    ClusterBlockId clb_index_1;
+
+    //pick the 1st molecule randomly
+    mol_1 = pick_molecule_randomly();
+    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
+
+    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
+    mol_2 = nullptr;
+    bool found = pick_molecule_connected_same_type(mol_1, mol_2);
+
+    if (found) {
+        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
+        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    }
+    return found;
+}
+
+/***************** Quasi directed compatible type packing move class *******************/
+/*********************************************************************************/
+bool quasiDirectedCompatibleTypePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_cutsize(new_locs));
+}
+
+bool quasiDirectedCompatibleTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    t_pack_molecule *mol_1, *mol_2;
+    ClusterBlockId clb_index_1;
+
+    //pick the 1st molecule randomly
+    mol_1 = pick_molecule_randomly();
+    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
+
+    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
+    mol_2 = nullptr;
+    bool found = pick_molecule_connected_compatible_type(mol_1, mol_2);
+
+    if (found) {
+        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
+        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    }
+    return found;
+}
+
+/***************** Quasi directed same size packing move class *******************/
+/*********************************************************************************/
+bool quasiDirectedSameSizePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_cutsize(new_locs));
+}
+
+bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    t_pack_molecule *mol_1, *mol_2;
+    ClusterBlockId clb_index_1;
+
+    //pick the 1st molecule randomly
+    mol_1 = pick_molecule_randomly();
+    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
+
+    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
+    mol_2 = nullptr;
+    bool found = pick_molecule_connected_same_size(mol_1, mol_2);
+
+    if (found) {
+        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
+        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    }
+    return found;
+}
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
new file mode 100644
index 00000000000..4b6dae4b3a6
--- /dev/null
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -0,0 +1,59 @@
+//
+// Created by elgammal on 2022-07-28.
+//
+
+#ifndef VTR_PACKINGMOVEGENERATOR_H
+#define VTR_PACKINGMOVEGENERATOR_H
+
+#include "vpr_types.h"
+#include "cluster_util.h"
+#include "pack_move_utils.h"
+
+/**
+ * @brief a base class for packing move generators
+ *
+ * This class represents the base class for all move generators.
+ */
+class packingMoveGenerator {
+  public:
+    //Propose
+    virtual ~packingMoveGenerator() = default;
+    virtual bool propose_move(std::vector<molMoveDescription>& new_locs) = 0;
+    virtual bool evaluate_move(const std::vector<molMoveDescription>& new_locs) = 0;
+    bool apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data);
+};
+
+class randomPackingSwap : public packingMoveGenerator {
+  public:
+    bool propose_move(std::vector<molMoveDescription>& new_locs);
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+};
+
+class quasiDirectedPackingSwap : public packingMoveGenerator {
+  public:
+    bool propose_move(std::vector<molMoveDescription>& new_locs);
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+};
+
+class quasiDirectedSameTypePackingSwap : public packingMoveGenerator {
+  public:
+    bool propose_move(std::vector<molMoveDescription>& new_locs);
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+};
+
+class quasiDirectedCompatibleTypePackingSwap : public packingMoveGenerator {
+  public:
+    bool propose_move(std::vector<molMoveDescription>& new_locs);
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+};
+
+class quasiDirectedSameSizePackingSwap : public packingMoveGenerator {
+    bool propose_move(std::vector<molMoveDescription>& new_locs);
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+};
+
+class quasiDirectedCompatibleTypeSameSizePackingSwap : public packingMoveGenerator {
+    bool propose_move(std::vector<molMoveDescription>& new_locs);
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+};
+#endif //VTR_PACKINGMOVEGENERATOR_H
\ No newline at end of file

From b15bbb9652dd0ada52b55b3368d9bba7d85504e7 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 30 Nov 2022 09:48:59 -0500
Subject: [PATCH 003/188] Bringing back iterative improvement (3)

This reverts commit f757f5da5bd99e200f2c3fb6bb7d153861804997.
---
 vpr/src/pack/pack.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index f36693f0111..d747da710e5 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -267,7 +267,7 @@ bool try_pack(t_packer_opts* packer_opts,
     /*       Use the re-cluster API to edit it        */
     /******************* Start *************************/
     VTR_LOG("Start the iterative improvement process\n");
-    //iteratively_improve_packing(*packer_opts, clustering_data, 2);
+    iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
 
     /*

From b804664c12d560d4649a68c2c375649d76580f37 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 30 Nov 2022 14:25:04 -0500
Subject: [PATCH 004/188] update the multithreading and locking techniques for
 iterative improvement

---
 vpr/src/base/vpr_context.h                    |  4 +-
 vpr/src/pack/cluster_util.cpp                 |  1 -
 vpr/src/pack/improvement/pack_move_utils.cpp  | 77 ++++++++++---------
 vpr/src/pack/improvement/pack_utils.cpp       | 30 ++++----
 .../improvement/packing_move_generator.cpp    |  4 +-
 5 files changed, 55 insertions(+), 61 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index c4a10026fc2..b0a4b09c034 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -350,8 +350,8 @@ struct ClusteringHelperContext : public Context {
  * This contain data structures to synchronize multithreading of packing iterative improvement.
  */
 struct PackingMultithreadingContext : public Context {
-    vtr::vector<ClusterBlockId, bool> clb_in_flight;
-    vtr::vector<ClusterBlockId, std::mutex> mu;
+    //vtr::vector<ClusterBlockId, bool> clb_in_flight;
+    vtr::vector<ClusterBlockId, std::mutex*> mu;
 };
 
 /**
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 8bc8e87923d..6614facde65 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -1674,7 +1674,6 @@ void store_cluster_info_and_free(const t_packer_opts& packer_opts,
 
     //print clustering progress incrementally
     //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height());
-    free_pb_stats_recursive(cur_pb);
 }
 
 /* Free up data structures and requeue used molecules */
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index c98115082fe..7dd339fe617 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -156,19 +156,15 @@ t_pack_molecule* pick_molecule_randomly() {
         ClusterBlockId clb_index = atom_to_cluster(random_atom);
         if (!clb_index)
             continue;
-        packing_multithreading_ctx.mu.lock();
-        if (!packing_multithreading_ctx.clb_in_flight[clb_index]) {
-            packing_multithreading_ctx.clb_in_flight[clb_index] = true;
-            packing_multithreading_ctx.mu.unlock();
+        if(packing_multithreading_ctx.mu[clb_index]->try_lock()){
+            auto rng = atom_ctx.atom_molecules.equal_range(random_atom);
+            for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
+                molecule = kv.second;
+                molecule_picked = true;
+                break;
+            }
         } else {
-            packing_multithreading_ctx.mu.unlock();
-            continue;
-        }
-        auto rng = atom_ctx.atom_molecules.equal_range(random_atom);
-        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
-            molecule = kv.second;
-            molecule_picked = true;
-            break;
+            continue; //CLB is already in-flight
         }
     }
     return molecule;
@@ -187,15 +183,12 @@ bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     bool clb2_not_found = true;
     ClusterBlockId clb_index_2;
     int iteration = 0;
-    while (clb2_not_found && iteration < 10) {
+    while (clb2_not_found && iteration < 20) {
         int rand_num = vtr::irand((int)connected_blocks.size() - 1);
         clb_index_2 = connected_blocks[rand_num];
-        packing_multithreading_ctx.mu.lock();
-        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
-            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
         }
-        packing_multithreading_ctx.mu.unlock();
         iteration++;
     }
 
@@ -214,9 +207,8 @@ bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
         mol_2 = kv.second;
         return true;
     }
-    packing_multithreading_ctx.mu.lock();
-    packing_multithreading_ctx.clb_in_flight[clb_index_2] = false;
-    packing_multithreading_ctx.mu.unlock();
+
+    packing_multithreading_ctx.mu[clb_index_2]->unlock();
     return false;
 }
 
@@ -232,19 +224,21 @@ bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_mole
     // pick a random clb block from the connected blocks
     bool clb2_not_found = true;
     ClusterBlockId clb_index_2;
-    while (clb2_not_found) {
+    int iteration = 0;
+    while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-        packing_multithreading_ctx.mu.lock();
-        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
-            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
         }
-        packing_multithreading_ctx.mu.unlock();
+        iteration++;
     }
 
+    if(clb2_not_found)
+        return false;
+
     //pick a random molecule for the chosen block
     std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-    int iteration = 0;
+    iteration = 0;
     const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
     do {
         int rand_num = vtr::irand((int)atom_ids->size() - 1);
@@ -262,6 +256,7 @@ bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_mole
         }
     } while (iteration < 20);
 
+    packing_multithreading_ctx.mu[clb_index_2]->unlock();
     return false;
 }
 
@@ -277,19 +272,21 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
     // pick a random clb block from the connected blocks
     bool clb2_not_found = true;
     ClusterBlockId clb_index_2;
-    while (clb2_not_found) {
+    int iteration = 0;
+    while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-        packing_multithreading_ctx.mu.lock();
-        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
-            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
         }
-        packing_multithreading_ctx.mu.unlock();
+        iteration++;
     }
 
+    if(clb2_not_found)
+        return false;
+
     //pick a random molecule for the chosen block
     std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-    int iteration = 0;
+    iteration = 0;
     const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
     do {
         int rand_num = vtr::irand((int)atom_ids->size() - 1);
@@ -307,6 +304,7 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
         }
     } while (iteration < 20);
 
+    packing_multithreading_ctx.mu[clb_index_2]->unlock();
     return false;
 }
 
@@ -324,19 +322,21 @@ bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*&
     // pick a random clb block from the connected blocks
     bool clb2_not_found = true;
     ClusterBlockId clb_index_2;
-    while (clb2_not_found) {
+    int iteration = 0;
+    while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-        packing_multithreading_ctx.mu.lock();
-        if (!packing_multithreading_ctx.clb_in_flight[clb_index_2]) {
+        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
-            packing_multithreading_ctx.clb_in_flight[clb_index_2] = true;
         }
-        packing_multithreading_ctx.mu.unlock();
+        ++iteration;
     }
 
+    if(clb2_not_found)
+        return false;
+
     //pick a random molecule for the chosen block
     std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-    int iteration = 0;
+    iteration = 0;
     do {
         int rand_num = vtr::irand((int)atom_ids->size() - 1);
         auto it = atom_ids->begin();
@@ -352,6 +352,7 @@ bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*&
         }
     } while (iteration < 20);
 
+    packing_multithreading_ctx.mu[clb_index_2]->unlock();
     return false;
 }
 
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index ed04abc2ea8..8843b9bc846 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -15,7 +15,7 @@
 #include "vtr_time.h"
 //#include <mutex>
 #include <thread>
-void try_n_packing_moves(int n, std::string move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
+void try_n_packing_moves(int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
 void init_multithreading_locks();
 
 std::mutex apply_mu;
@@ -24,9 +24,10 @@ void init_multithreading_locks() {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
     auto& helper_ctx = g_vpr_ctx.cl_helper();
 
-    packing_multithreading_ctx.mu.lock();
-    packing_multithreading_ctx.clb_in_flight.resize(helper_ctx.total_clb_num, false);
-    packing_multithreading_ctx.mu.unlock();
+    packing_multithreading_ctx.mu.resize(helper_ctx.total_clb_num);
+    for(auto& m : packing_multithreading_ctx.mu) {
+        m = new std::mutex;
+    }
 }
 
 void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>>& atoms_lookup) {
@@ -76,8 +77,8 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
     }
     my_threads[num_threads - 1] = std::thread(try_n_packing_moves, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
 
-    for (int i = 0; i < num_threads; i++)
-        my_threads[i].join();
+    for (auto & my_thread : my_threads)
+        my_thread.join();
 
     VTR_LOG("\n### Iterative packing stats: \n\tpack move type = %s\n\ttotal pack moves = %zu\n\tgood pack moves = %zu\n\tlegal pack moves = %zu\n\n",
             packer_opts.pack_move_type.c_str(),
@@ -86,7 +87,7 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
             pack_stats.legal_moves);
 }
 
-void try_n_packing_moves(int n, std::string move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
+void try_n_packing_moves(int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
 
     bool is_proposed, is_valid, is_successful;
@@ -120,24 +121,19 @@ void try_n_packing_moves(int n, std::string move_type, t_clustering_data& cluste
 
         is_valid = move_generator->evaluate_move(new_locs);
         if (!is_valid) {
-            packing_multithreading_ctx.mu.lock();
-            packing_multithreading_ctx.clb_in_flight[new_locs[0].new_clb] = false;
-            packing_multithreading_ctx.clb_in_flight[new_locs[1].new_clb] = false;
-            packing_multithreading_ctx.mu.unlock();
+            packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
+            packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
             continue;
         } else
             num_good_moves++;
 
-        apply_mu.lock();
         is_successful = move_generator->apply_move(new_locs, clustering_data);
-        apply_mu.unlock();
         if (is_successful)
             num_legal_moves++;
 
-        packing_multithreading_ctx.mu.lock();
-        packing_multithreading_ctx.clb_in_flight[new_locs[0].new_clb] = false;
-        packing_multithreading_ctx.clb_in_flight[new_locs[1].new_clb] = false;
-        packing_multithreading_ctx.mu.unlock();
+        packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
+        packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
+
     }
 
     pack_stats.mu.lock();
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index 98956bc8860..7597eac2323 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -88,9 +88,7 @@ bool quasiDirectedPackingSwap::propose_move(std::vector<molMoveDescription>& new
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
     } else {
-        packing_multithreading_ctx.mu.lock();
-        packing_multithreading_ctx.clb_in_flight[clb_index_1] = false;
-        packing_multithreading_ctx.mu.unlock();
+        packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
     return found;
 }

From 275fb41fb0ff16ea30fa2db5074d523b17cb93ae Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 30 Nov 2022 14:42:19 -0500
Subject: [PATCH 005/188] Free cluster_pb_stats at the end of packing

---
 vpr/src/pack/pack.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index d747da710e5..5dac8031ae6 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -270,12 +270,10 @@ bool try_pack(t_packer_opts* packer_opts,
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
 
-    /*
-     * auto& cluster_ctx = g_vpr_ctx.clustering();
-     * for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
-     * free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
-     * }
-     */
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
+        free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
+    }
     /******************** End **************************/
 
     //check clustering and output it

From 6b237f3d565777a5a713d821bc90e07d910190aa Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Fri, 2 Dec 2022 11:12:15 -0500
Subject: [PATCH 006/188] Change the pointer in pb_graph_node to indeces for
 faster lookup and to allow multithreading

---
 libs/libarchfpga/src/physical_types.h |  6 +++-
 vpr/src/base/vpr_types.cpp            | 48 +++++++++++++++------------
 vpr/src/base/vpr_types.h              |  2 ++
 vpr/src/pack/cluster_placement.cpp    | 46 ++++++++++++++++---------
 4 files changed, 65 insertions(+), 37 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index a4699e2ccd8..746c80f9af9 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1242,7 +1242,11 @@ class t_pb_graph_node {
     int total_pb_pins; /* only valid for top-level */
 
     void* temp_scratch_pad;                                     /* temporary data, useful for keeping track of things when traversing data structure */
-    t_cluster_placement_primitive* cluster_placement_primitive; /* pointer to indexing structure useful during packing stage */
+
+    /* indeces for cluster_placement_primitive in the cluster_placement_stats structure (useful during packing) */
+    int cluster_placement_primitive_index;
+    int cluster_placement_type_index;
+    int lb_type_index;
 
     int* input_pin_class_size;  /* Stores the number of pins that belong to a particular input pin class */
     int num_input_pin_class;    /* number of input pin classes that this pb_graph_node has */
diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index c6c688e97c3..320b7a8360b 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -258,27 +258,9 @@ void t_cluster_placement_stats::move_primitive_to_inflight(int pb_type_index, st
  * @note that valid status is not changed because if the primitive is not valid, it will get properly collected later
  */
 void t_cluster_placement_stats::insert_primitive_in_valid_primitives(std::pair<int, t_cluster_placement_primitive*> cluster_placement_primitive) {
-    int i;
-    bool success = false;
-    int null_index = OPEN;
-    t_cluster_placement_primitive* input_cluster_placement_primitive = cluster_placement_primitive.second;
-
-    for (i = 0; i < num_pb_types && !success; i++) {
-        if (valid_primitives[i].empty()) {
-            null_index = i;
-            continue;
-        }
-        t_cluster_placement_primitive* cur_cluster_placement_primitive = valid_primitives[i].begin()->second;
-        if (input_cluster_placement_primitive->pb_graph_node->pb_type
-            == cur_cluster_placement_primitive->pb_graph_node->pb_type) {
-            success = true;
-            valid_primitives[i].insert(cluster_placement_primitive);
-        }
-    }
-    if (!success) {
-        VTR_ASSERT(null_index != OPEN);
-        valid_primitives[null_index].insert(cluster_placement_primitive);
-    }
+
+    int pb_type_index = cluster_placement_primitive.second->pb_graph_node->cluster_placement_type_index;
+    valid_primitives[pb_type_index].insert(cluster_placement_primitive);
 }
 
 void t_cluster_placement_stats::flush_queue(std::unordered_multimap<int, t_cluster_placement_primitive*>& queue) {
@@ -320,4 +302,28 @@ void t_cluster_placement_stats::free_primitives() {
             delete primitive.second;
         }
     }
+}
+
+t_cluster_placement_primitive* t_cluster_placement_stats::get_cluster_placement_primitive_from_pb_graph_node(const t_pb_graph_node* pb_graph_node) {
+
+    auto it = valid_primitives[pb_graph_node->cluster_placement_type_index].find(pb_graph_node->cluster_placement_primitive_index);
+    if (it != valid_primitives[pb_graph_node->cluster_placement_type_index].end())
+        return valid_primitives[pb_graph_node->cluster_placement_type_index][pb_graph_node->cluster_placement_primitive_index];
+
+   for(auto itr = tried.find(pb_graph_node->cluster_placement_primitive_index); itr != tried.end(); itr++) {
+       if(itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
+           return itr->second;
+   }
+
+   for(auto itr = invalid.find(pb_graph_node->cluster_placement_primitive_index); itr != invalid.end(); itr++) {
+       if(itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
+           return itr->second;
+   }
+
+   for(auto itr = in_flight.find(pb_graph_node->cluster_placement_primitive_index); itr != in_flight.end(); itr++) {
+       if(itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
+           return itr->second;
+   }
+
+    return nullptr;
 }
\ No newline at end of file
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 905f14cd442..f8eee40f7ae 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -491,6 +491,8 @@ class t_cluster_placement_stats {
      */
     void free_primitives();
 
+    t_cluster_placement_primitive* get_cluster_placement_primitive_from_pb_graph_node(const t_pb_graph_node* pb_graph_node);
+
   private:
     std::unordered_multimap<int, t_cluster_placement_primitive*> in_flight; ///<ptrs to primitives currently being considered to pack into
     std::unordered_multimap<int, t_cluster_placement_primitive*> tried;     ///<ptrs to primitives that are already tried but current logic block unable to pack to
diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp
index c0458154cf1..32d9c654470 100644
--- a/vpr/src/pack/cluster_placement.cpp
+++ b/vpr/src/pack/cluster_placement.cpp
@@ -30,7 +30,8 @@
 /*Local Function Declaration			*/
 /****************************************/
 static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_stats* cluster_placement_stats,
-                                                           t_pb_graph_node* pb_graph_node);
+                                                           t_pb_graph_node* pb_graph_node,
+                                                           int lb_type_index);
 static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node,
                                             float incremental_cost,
                                             bool valid);
@@ -66,7 +67,8 @@ t_cluster_placement_stats* alloc_and_load_cluster_placement_stats() {
         if (!is_empty_type(&type)) {
             cluster_placement_stats_list[type.index].curr_molecule = nullptr;
             load_cluster_placement_stats_for_pb_graph_node(&cluster_placement_stats_list[type.index],
-                                                           type.pb_graph_head);
+                                                           type.pb_graph_head,
+                                                           type.index);
         }
     }
     return cluster_placement_stats_list;
@@ -199,7 +201,8 @@ void reset_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_
  * Adds backward link from pb_graph_node to cluster_placement_primitive
  */
 static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_stats* cluster_placement_stats,
-                                                           t_pb_graph_node* pb_graph_node) {
+                                                           t_pb_graph_node* pb_graph_node,
+                                                           int lb_type_index) {
     int i, j, k;
     t_cluster_placement_primitive* placement_primitive;
     const t_pb_type* pb_type = pb_graph_node->pb_type;
@@ -208,7 +211,7 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
         placement_primitive = new t_cluster_placement_primitive();
         placement_primitive->pb_graph_node = pb_graph_node;
         placement_primitive->valid = true;
-        pb_graph_node->cluster_placement_primitive = placement_primitive;
+        pb_graph_node->lb_type_index = lb_type_index;
         placement_primitive->base_cost = compute_primitive_base_cost(pb_graph_node);
 
         bool success = false;
@@ -220,10 +223,15 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
          *  - Check the pb_type of this element with the pb_type of pb_graph_node
          *      - if matched --> insert the primitive
          */
-        for (auto& type_primitives : cluster_placement_stats->valid_primitives) {
+        for(size_t type_index = 0; type_index < cluster_placement_stats->valid_primitives.size(); type_index++) {
+            auto& type_primitives = cluster_placement_stats->valid_primitives[type_index];
             auto first_elem = type_primitives.find(0);
             if (first_elem != type_primitives.end() && first_elem->second->pb_graph_node->pb_type == pb_graph_node->pb_type) {
-                type_primitives.insert({type_primitives.size(), placement_primitive});
+                size_t index = type_primitives.size();
+                pb_graph_node->cluster_placement_primitive_index = index;
+                pb_graph_node->cluster_placement_type_index = type_index;
+
+                type_primitives.insert({index, placement_primitive});
                 success = true;
                 break;
             }
@@ -234,6 +242,8 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
          * and insert the placement primitive into the new map with index 0
          */
         if (!success) {
+            pb_graph_node->cluster_placement_primitive_index = 0;
+            pb_graph_node->cluster_placement_type_index = cluster_placement_stats->num_pb_types;
             cluster_placement_stats->valid_primitives.emplace_back();
             cluster_placement_stats->valid_primitives[cluster_placement_stats->valid_primitives.size() - 1].insert({0, placement_primitive});
             cluster_placement_stats->num_pb_types++;
@@ -245,7 +255,8 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
                 for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb;
                      k++) {
                     load_cluster_placement_stats_for_pb_graph_node(cluster_placement_stats,
-                                                                   &pb_graph_node->child_pb_graph_nodes[i][j][k]);
+                                                                   &pb_graph_node->child_pb_graph_nodes[i][j][k],
+                                                                   lb_type_index);
                 }
             }
         }
@@ -271,7 +282,7 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
     cluster_placement_stats->flush_intermediate_queues();
 
     /* commit primitive as used, invalidate it */
-    cur = primitive->cluster_placement_primitive;
+    cur = cluster_placement_stats->valid_primitives[primitive->cluster_placement_type_index][primitive->cluster_placement_primitive_index];
     VTR_ASSERT(cur->valid == true);
 
     cur->valid = false;
@@ -321,11 +332,13 @@ void set_mode_cluster_placement_stats(const t_pb_graph_node* pb_graph_node, int
 static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node,
                                             const float incremental_cost,
                                             const bool valid) {
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+
     int i, j, k;
     t_cluster_placement_primitive* placement_primitive;
     if (pb_graph_node->is_primitive()) {
         /* is primitive */
-        placement_primitive = (t_cluster_placement_primitive*)pb_graph_node->cluster_placement_primitive;
+        placement_primitive = helper_ctx.cluster_placement_stats[pb_graph_node->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(pb_graph_node);
         if (valid) {
             placement_primitive->incremental_cost += incremental_cost;
         } else {
@@ -349,18 +362,20 @@ static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node
 static float try_place_molecule(const t_pack_molecule* molecule,
                                 t_pb_graph_node* root,
                                 t_pb_graph_node** primitives_list) {
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     int list_size, i;
     float cost = HUGE_POSITIVE_FLOAT;
     list_size = get_array_size_of_molecule(molecule);
 
     if (primitive_type_feasible(molecule->atom_block_ids[molecule->root],
                                 root->pb_type)) {
-        if (root->cluster_placement_primitive->valid) {
+        t_cluster_placement_primitive* cur_primitive = helper_ctx.cluster_placement_stats[root->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(root);
+        if (cur_primitive->valid) {
             for (i = 0; i < list_size; i++) {
                 primitives_list[i] = nullptr;
             }
-            cost = root->cluster_placement_primitive->base_cost
-                   + root->cluster_placement_primitive->incremental_cost;
+            cost = cur_primitive->base_cost
+                   + cur_primitive->incremental_cost;
             primitives_list[molecule->root] = root;
             if (molecule->type == MOLECULE_FORCED_PACK) {
                 if (!expand_forced_pack_molecule_placement(molecule,
@@ -392,6 +407,7 @@ static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecul
                                                   const t_pack_pattern_block* pack_pattern_block,
                                                   t_pb_graph_node** primitives_list,
                                                   float* cost) {
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     t_pb_graph_node* pb_graph_node = primitives_list[pack_pattern_block->block_id];
     t_pb_graph_node* next_primitive;
     t_pack_pattern_connections* cur;
@@ -435,9 +451,10 @@ static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecul
                 next_primitive = next_pin->parent_node;
                 /* Check for legality of placement, if legal, expand from legal placement, if not, return false */
                 if (molecule->atom_block_ids[next_block->block_id] && primitives_list[next_block->block_id] == nullptr) {
-                    if (next_primitive->cluster_placement_primitive->valid && primitive_type_feasible(molecule->atom_block_ids[next_block->block_id], next_primitive->pb_type)) {
+                    t_cluster_placement_primitive* placement_primitive = helper_ctx.cluster_placement_stats[next_primitive->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(next_primitive);
+                    if (placement_primitive->valid && primitive_type_feasible(molecule->atom_block_ids[next_block->block_id], next_primitive->pb_type)) {
                         primitives_list[next_block->block_id] = next_primitive;
-                        *cost += next_primitive->cluster_placement_primitive->base_cost + next_primitive->cluster_placement_primitive->incremental_cost;
+                        *cost += placement_primitive->base_cost + placement_primitive->incremental_cost;
                         if (!expand_forced_pack_molecule_placement(molecule, next_block, primitives_list, cost)) {
                             return false;
                         }
@@ -579,7 +596,6 @@ bool exists_free_primitive_for_atom_block(t_cluster_placement_stats* cluster_pla
 
     /* Look through list of available primitives to see if any valid */
     for (i = 0; i < cluster_placement_stats->num_pb_types; i++) {
-        //for (auto& primitive : cluster_placement_stats->valid_primitives[i]) {
         if (!cluster_placement_stats->valid_primitives[i].empty() && primitive_type_feasible(blk_id, cluster_placement_stats->valid_primitives[i].begin()->second->pb_graph_node->pb_type)) {
             for (auto it = cluster_placement_stats->valid_primitives[i].begin(); it != cluster_placement_stats->valid_primitives[i].end();) {
                 if (it->second->valid)

From cde1d02349624867e1df6f28643ccbc39fa7bac3 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Fri, 2 Dec 2022 16:01:12 -0500
Subject: [PATCH 007/188] extend cluster_placement_stats to be a vector

---
 vpr/src/base/vpr_context.h                          |  6 +++++-
 vpr/src/pack/cluster.cpp                            |  6 +++---
 vpr/src/pack/cluster_placement.cpp                  |  6 +++---
 vpr/src/pack/improvement/pack_utils.cpp             | 12 +++++-------
 vpr/src/pack/improvement/packing_move_generator.cpp |  2 +-
 vpr/src/pack/improvement/packing_move_generator.h   |  2 +-
 vpr/src/pack/pack.cpp                               |  2 +-
 vpr/src/pack/re_cluster_util.cpp                    | 12 ++++++------
 8 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index b0a4b09c034..c426949a813 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -316,7 +316,7 @@ struct ClusteringHelperContext : public Context {
     std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
 
     // Stats keeper for placement information during packing/clustering
-    t_cluster_placement_stats* cluster_placement_stats;
+    std::vector<t_cluster_placement_stats*> cluster_placement_stats;
 
     // total number of models in the architecture
     int num_models;
@@ -339,6 +339,10 @@ struct ClusteringHelperContext : public Context {
     // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1]
     // unordered_set for faster insertion/deletion during the iterative improvement process of packing
     vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>> atoms_lookup;
+    ClusteringHelperContext() {
+        cluster_placement_stats.resize(2);
+    }
+
     ~ClusteringHelperContext() {
         delete[] primitives_list;
     }
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index 4f1382a990d..f049d1b7ab7 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -200,7 +200,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 	check_for_duplicate_inputs ();
 #endif
     alloc_and_init_clustering(max_molecule_stats,
-                              &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), molecule_head,
+                              &(helper_ctx.cluster_placement_stats[0]), &(helper_ctx.primitives_list), molecule_head,
                               clustering_data, net_output_feeds_driving_block_input,
                               unclustered_list_head_size, cluster_stats.num_molecules);
 
@@ -247,7 +247,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
              * stores PartitionRegion information while the cluster is packed*/
             PartitionRegion temp_cluster_pr;
 
-            start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list,
+            start_new_cluster(helper_ctx.cluster_placement_stats[0], helper_ctx.primitives_list,
                               clb_index, istart,
                               num_used_type_instances,
                               packer_opts.target_device_utilization,
@@ -300,7 +300,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                 /*it doesn't make sense to do a timing analysis here since there*
                  *is only one atom block clustered it would not change anything      */
             }
-            cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+            cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[0][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
             cluster_stats.num_unrelated_clustering_attempts = 0;
             next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index),
                                                      attraction_groups,
diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp
index 32d9c654470..c0cefcedd45 100644
--- a/vpr/src/pack/cluster_placement.cpp
+++ b/vpr/src/pack/cluster_placement.cpp
@@ -338,7 +338,7 @@ static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node
     t_cluster_placement_primitive* placement_primitive;
     if (pb_graph_node->is_primitive()) {
         /* is primitive */
-        placement_primitive = helper_ctx.cluster_placement_stats[pb_graph_node->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(pb_graph_node);
+        placement_primitive = helper_ctx.cluster_placement_stats[0][pb_graph_node->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(pb_graph_node);
         if (valid) {
             placement_primitive->incremental_cost += incremental_cost;
         } else {
@@ -369,7 +369,7 @@ static float try_place_molecule(const t_pack_molecule* molecule,
 
     if (primitive_type_feasible(molecule->atom_block_ids[molecule->root],
                                 root->pb_type)) {
-        t_cluster_placement_primitive* cur_primitive = helper_ctx.cluster_placement_stats[root->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(root);
+        t_cluster_placement_primitive* cur_primitive = helper_ctx.cluster_placement_stats[0][root->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(root);
         if (cur_primitive->valid) {
             for (i = 0; i < list_size; i++) {
                 primitives_list[i] = nullptr;
@@ -451,7 +451,7 @@ static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecul
                 next_primitive = next_pin->parent_node;
                 /* Check for legality of placement, if legal, expand from legal placement, if not, return false */
                 if (molecule->atom_block_ids[next_block->block_id] && primitives_list[next_block->block_id] == nullptr) {
-                    t_cluster_placement_primitive* placement_primitive = helper_ctx.cluster_placement_stats[next_primitive->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(next_primitive);
+                    t_cluster_placement_primitive* placement_primitive = helper_ctx.cluster_placement_stats[0][next_primitive->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(next_primitive);
                     if (placement_primitive->valid && primitive_type_feasible(molecule->atom_block_ids[next_block->block_id], next_primitive->pb_type)) {
                         primitives_list[next_block->block_id] = next_primitive;
                         *cost += placement_primitive->base_cost + placement_primitive->incremental_cost;
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 8843b9bc846..3087542795b 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -15,11 +15,9 @@
 #include "vtr_time.h"
 //#include <mutex>
 #include <thread>
-void try_n_packing_moves(int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
+void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
 void init_multithreading_locks();
 
-std::mutex apply_mu;
-
 void init_multithreading_locks() {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
     auto& helper_ctx = g_vpr_ctx.cl_helper();
@@ -73,9 +71,9 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
     init_multithreading_locks();
 
     for (unsigned int i = 0; i < num_threads - 1; i++) {
-        my_threads[i] = std::thread(try_n_packing_moves, moves_per_thread, packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
+        my_threads[i] = std::thread(try_n_packing_moves, i, moves_per_thread, packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
     }
-    my_threads[num_threads - 1] = std::thread(try_n_packing_moves, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
+    my_threads[num_threads - 1] = std::thread(try_n_packing_moves, num_threads-1, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
 
     for (auto & my_thread : my_threads)
         my_thread.join();
@@ -87,7 +85,7 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
             pack_stats.legal_moves);
 }
 
-void try_n_packing_moves(int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
+void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
 
     bool is_proposed, is_valid, is_successful;
@@ -127,7 +125,7 @@ void try_n_packing_moves(int n, const std::string& move_type, t_clustering_data&
         } else
             num_good_moves++;
 
-        is_successful = move_generator->apply_move(new_locs, clustering_data);
+        is_successful = move_generator->apply_move(new_locs, clustering_data, thread_num);
         if (is_successful)
             num_legal_moves++;
 
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index 7597eac2323..e3749dc6019 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -12,7 +12,7 @@ const int MAX_ITERATIONS = 100;
 
 /******************* Packing move base class ************************/
 /********************************************************************/
-bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data) {
+bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int num_thread = 0) {
     if (new_locs.size() == 1) {
         //We need to move a molecule to an existing CLB
         return (move_mol_to_existing_cluster(new_locs[0].molecule_to_move,
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
index 4b6dae4b3a6..780b4499283 100644
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -20,7 +20,7 @@ class packingMoveGenerator {
     virtual ~packingMoveGenerator() = default;
     virtual bool propose_move(std::vector<molMoveDescription>& new_locs) = 0;
     virtual bool evaluate_move(const std::vector<molMoveDescription>& new_locs) = 0;
-    bool apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data);
+    bool apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int num_thread);
 };
 
 class randomPackingSwap : public packingMoveGenerator {
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 5dac8031ae6..1b5767d6c41 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -257,7 +257,7 @@ bool try_pack(t_packer_opts* packer_opts,
         g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear();
         //attraction_groups.reset_attraction_groups();
 
-        free_cluster_placement_stats(helper_ctx.cluster_placement_stats);
+        free_cluster_placement_stats(helper_ctx.cluster_placement_stats[0]);
         delete[] helper_ctx.primitives_list;
 
         ++pack_iteration;
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index b3c1d2c2fa9..84c28e68e7d 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -148,10 +148,10 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
 
     e_block_pack_status pack_result = BLK_STATUS_UNDEFINED;
     pb->mode = mode;
-    reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[type->index]));
+    reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[0][type->index]));
     set_mode_cluster_placement_stats(pb->pb_graph_node, mode);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[type->index]),
+    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[0][type->index]),
                                     molecule,
                                     helper_ctx.primitives_list,
                                     pb,
@@ -216,14 +216,14 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
 
     //re-build cluster placement stats
     rebuild_cluster_placement_stats(new_clb, new_clb_atoms);
-    if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[block_type->index])))
+    if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[0][block_type->index])))
         return false;
 
     //re-build router_data structure for this cluster
     if (!is_swap)
         router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[block_type->index]),
+    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[0][block_type->index]),
                                     molecule,
                                     helper_ctx.primitives_list,
                                     temp_pb,
@@ -285,7 +285,7 @@ void revert_mol_move(const ClusterBlockId& old_clb,
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
     PartitionRegion temp_cluster_pr_original;
-    e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(old_clb)->index]),
+    e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[0][cluster_ctx.clb_nlist.block_type(old_clb)->index]),
                                                         molecule,
                                                         helper_ctx.primitives_list,
                                                         cluster_ctx.clb_nlist.block_pb(old_clb),
@@ -637,7 +637,7 @@ static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index,
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& atom_ctx = g_vpr_ctx.atom();
 
-    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[0][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
     reset_cluster_placement_stats(cluster_placement_stats);
     set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode);
 

From 6133c4359f4f4d2c3b8c4d959dda88fd2c660a00 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Tue, 6 Dec 2022 09:52:49 -0500
Subject: [PATCH 008/188] extending cluster_placement_stats to vector --WIP

---
 vpr/src/pack/cluster_placement.cpp            |  3 +-
 .../improvement/packing_move_generator.cpp    |  8 ++--
 .../pack/improvement/packing_move_generator.h |  2 +-
 vpr/src/pack/re_cluster.cpp                   | 30 +++++++------
 vpr/src/pack/re_cluster.h                     |  9 ++--
 vpr/src/pack/re_cluster_util.cpp              | 44 ++++++-------------
 vpr/src/pack/re_cluster_util.h                | 27 ++----------
 7 files changed, 46 insertions(+), 77 deletions(-)

diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp
index c0cefcedd45..ce8bbabd1b6 100644
--- a/vpr/src/pack/cluster_placement.cpp
+++ b/vpr/src/pack/cluster_placement.cpp
@@ -252,8 +252,7 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
     } else { // not a primitive, recursively call the function for all its children
         for (i = 0; i < pb_type->num_modes; i++) {
             for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
-                for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb;
-                     k++) {
+                for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
                     load_cluster_placement_stats_for_pb_graph_node(cluster_placement_stats,
                                                                    &pb_graph_node->child_pb_graph_nodes[i][j][k],
                                                                    lb_type_index);
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index e3749dc6019..c2c79a1866a 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -12,21 +12,23 @@ const int MAX_ITERATIONS = 100;
 
 /******************* Packing move base class ************************/
 /********************************************************************/
-bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int num_thread = 0) {
+bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int thread_id) {
     if (new_locs.size() == 1) {
         //We need to move a molecule to an existing CLB
         return (move_mol_to_existing_cluster(new_locs[0].molecule_to_move,
                                              new_locs[1].new_clb,
                                              true,
                                              0,
-                                             clustering_data));
+                                             clustering_data,
+                                             thread_id));
     } else if (new_locs.size() == 2) {
         //We need to swap two molecules
         return (swap_two_molecules(new_locs[0].molecule_to_move,
                                    new_locs[1].molecule_to_move,
                                    true,
                                    0,
-                                   clustering_data));
+                                   clustering_data,
+                                   thread_id));
     } else {
         //We have a more complicated move (moving multiple molecules at once)
         //TODO: This case is not supported yet
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
index 780b4499283..983f6235761 100644
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -20,7 +20,7 @@ class packingMoveGenerator {
     virtual ~packingMoveGenerator() = default;
     virtual bool propose_move(std::vector<molMoveDescription>& new_locs) = 0;
     virtual bool evaluate_move(const std::vector<molMoveDescription>& new_locs) = 0;
-    bool apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int num_thread);
+    bool apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int thread_id);
 };
 
 class randomPackingSwap : public packingMoveGenerator {
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 92f52b8f8e5..734b4c8d5b8 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -7,7 +7,8 @@
 bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                              bool during_packing,
                              int verbosity,
-                             t_clustering_data& clustering_data) {
+                             t_clustering_data& clustering_data,
+                             int thread_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& device_ctx = g_vpr_ctx.device();
@@ -66,14 +67,15 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                                            verbosity,
                                            clustering_data,
                                            &router_data,
-                                           temp_cluster_pr);
+                                           temp_cluster_pr,
+                                           thread_id);
 
     //Commit or revert the move
     if (is_created) {
         commit_mol_move(old_clb, new_clb, during_packing, true);
         VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]);
     } else {
-        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data);
+        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data, thread_id);
         VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]);
     }
 
@@ -93,7 +95,8 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
                                   const ClusterBlockId& new_clb,
                                   bool during_packing,
                                   int verbosity,
-                                  t_clustering_data& clustering_data) {
+                                  t_clustering_data& clustering_data,
+                                  int thread_id) {
     //define local variables
     bool is_removed, is_added;
     AtomBlockId root_atom_id = molecule->atom_block_ids[molecule->root];
@@ -128,14 +131,14 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
 
     //Add the atom to the new cluster
     t_lb_router_data* new_router_data = nullptr;
-    is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, false, clustering_data, new_router_data);
+    is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, false, clustering_data, new_router_data, thread_id);
 
     //Commit or revert the move
     if (is_added) {
         commit_mol_move(old_clb, new_clb, during_packing, false);
         VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]);
     } else {
-        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data);
+        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data, thread_id);
         VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]);
     }
 
@@ -156,7 +159,8 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         t_pack_molecule* molecule_2,
                         bool during_packing,
                         int verbosity,
-                        t_clustering_data& clustering_data) {
+                        t_clustering_data& clustering_data,
+                        int thread_id) {
     //define local variables
     PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2;
 
@@ -201,10 +205,10 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     commit_mol_removal(molecule_2, molecule_2_size, clb_2, during_packing, old_2_router_data, clustering_data);
 
     //Add the atom to the new cluster
-    mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data);
+    mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
     if (!mol_1_success) {
-        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data);
-        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data);
+        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
+        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
 
         VTR_ASSERT(mol_1_success && mol_2_success);
         free_router_data(old_1_router_data);
@@ -214,12 +218,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         return false;
     }
 
-    mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data);
+    mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
     if (!mol_2_success) {
         remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
-        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data);
-        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data);
+        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
+        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
 
         VTR_ASSERT(mol_1_success && mol_2_success);
         free_router_data(old_1_router_data);
diff --git a/vpr/src/pack/re_cluster.h b/vpr/src/pack/re_cluster.h
index 5ca2489aac4..610f09448ad 100644
--- a/vpr/src/pack/re_cluster.h
+++ b/vpr/src/pack/re_cluster.h
@@ -26,7 +26,8 @@
 bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                              bool during_packing,
                              int verbosity,
-                             t_clustering_data& clustering_data);
+                             t_clustering_data& clustering_data,
+                             int thread_id);
 
 /**
  * @brief This function moves a molecule out of its cluster to another cluster that already exists.
@@ -41,7 +42,8 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
                                   const ClusterBlockId& new_clb,
                                   bool during_packing,
                                   int verbosity,
-                                  t_clustering_data& clustering_data);
+                                  t_clustering_data& clustering_data,
+                                  int thread_id);
 
 /**
  * @brief This function swap two molecules between two different clusters.
@@ -56,5 +58,6 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         t_pack_molecule* molecule_2,
                         bool during_packing,
                         int verbosity,
-                        t_clustering_data& clustering_data);
+                        t_clustering_data& clustering_data,
+                        int thread_id);
 #endif
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 84c28e68e7d..aefb67327b6 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -35,8 +35,7 @@ static void fix_cluster_net_after_moving(const t_pack_molecule* molecule,
                                          const ClusterBlockId& old_clb,
                                          const ClusterBlockId& new_clb);
 
-static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index,
-                                            const std::unordered_set<AtomBlockId>* clb_atoms);
+static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms, int thread_id);
 
 static void update_cluster_pb_stats(const t_pack_molecule* molecule,
                                     int molecule_size,
@@ -114,17 +113,7 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
     return (router_data);
 }
 
-bool start_new_cluster_for_mol(t_pack_molecule* molecule,
-                               const t_logical_block_type_ptr& type,
-                               const int mode,
-                               const int feasible_block_array_size,
-                               bool enable_pin_feasibility_filter,
-                               ClusterBlockId clb_index,
-                               bool during_packing,
-                               int verbosity,
-                               t_clustering_data& clustering_data,
-                               t_lb_router_data** router_data,
-                               PartitionRegion& temp_cluster_pr) {
+bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_type_ptr& type, const int mode, const int feasible_block_array_size, bool enable_pin_feasibility_filter, ClusterBlockId clb_index, bool during_packing, int verbosity, t_clustering_data& clustering_data, t_lb_router_data** router_data, PartitionRegion& temp_cluster_pr, int thread_id) {
     auto& atom_ctx = g_vpr_ctx.atom();
     auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
@@ -148,10 +137,10 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
 
     e_block_pack_status pack_result = BLK_STATUS_UNDEFINED;
     pb->mode = mode;
-    reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[0][type->index]));
+    reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[thread_id][type->index]));
     set_mode_cluster_placement_stats(pb->pb_graph_node, mode);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[0][type->index]),
+    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][type->index]),
                                     molecule,
                                     helper_ctx.primitives_list,
                                     pb,
@@ -197,14 +186,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
     return (pack_result == BLK_PASSED);
 }
 
-bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
-                                  int molecule_size,
-                                  const ClusterBlockId new_clb,
-                                  std::unordered_set<AtomBlockId>* new_clb_atoms,
-                                  bool during_packing,
-                                  bool is_swap,
-                                  t_clustering_data& clustering_data,
-                                  t_lb_router_data*& router_data) {
+bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size, const ClusterBlockId new_clb, std::unordered_set<AtomBlockId>* new_clb_atoms, bool during_packing, bool is_swap, t_clustering_data& clustering_data, t_lb_router_data*& router_data, int thread_id) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
@@ -215,15 +197,15 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
     t_pb* temp_pb = cluster_ctx.clb_nlist.block_pb(new_clb);
 
     //re-build cluster placement stats
-    rebuild_cluster_placement_stats(new_clb, new_clb_atoms);
-    if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[0][block_type->index])))
+    rebuild_cluster_placement_stats(new_clb, new_clb_atoms, thread_id);
+    if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[thread_id][block_type->index])))
         return false;
 
     //re-build router_data structure for this cluster
     if (!is_swap)
         router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[0][block_type->index]),
+    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][block_type->index]),
                                     molecule,
                                     helper_ctx.primitives_list,
                                     temp_pb,
@@ -280,12 +262,13 @@ void revert_mol_move(const ClusterBlockId& old_clb,
                      t_pack_molecule* molecule,
                      t_lb_router_data*& old_router_data,
                      bool during_packing,
-                     t_clustering_data& clustering_data) {
+                     t_clustering_data& clustering_data,
+                     int thread_id) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
     PartitionRegion temp_cluster_pr_original;
-    e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[0][cluster_ctx.clb_nlist.block_type(old_clb)->index]),
+    e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][cluster_ctx.clb_nlist.block_type(old_clb)->index]),
                                                         molecule,
                                                         helper_ctx.primitives_list,
                                                         cluster_ctx.clb_nlist.block_pb(old_clb),
@@ -631,13 +614,12 @@ static bool count_children_pbs(const t_pb* pb) {
 }
 #endif
 
-static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index,
-                                            const std::unordered_set<AtomBlockId>* clb_atoms) {
+static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms, int thread_id) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& atom_ctx = g_vpr_ctx.atom();
 
-    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[0][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[thread_id][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
     reset_cluster_placement_stats(cluster_placement_stats);
     set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode);
 
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index 201321f741a..3a7861ef8f1 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -77,17 +77,7 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
  * @param router_data: returns the intra logic block router data.
  * @param temp_cluster_pr: returns the partition region of the new cluster.
  */
-bool start_new_cluster_for_mol(t_pack_molecule* molecule,
-                               const t_logical_block_type_ptr& type,
-                               const int mode,
-                               const int feasible_block_array_size,
-                               bool enable_pin_feasibility_filter,
-                               ClusterBlockId clb_index,
-                               bool during_packing,
-                               int verbosity,
-                               t_clustering_data& clustering_data,
-                               t_lb_router_data** router_data,
-                               PartitionRegion& temp_cluster_pr);
+bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_type_ptr& type, const int mode, const int feasible_block_array_size, bool enable_pin_feasibility_filter, ClusterBlockId clb_index, bool during_packing, int verbosity, t_clustering_data& clustering_data, t_lb_router_data** router_data, PartitionRegion& temp_cluster_pr, int thread_id);
 
 /**
  * @brief A function that packs a molecule into an existing cluster
@@ -100,14 +90,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
  *                          (is updated if this function is called during packing, especially intra_lb_routing data member).
  * @param router_data: returns the intra logic block router data.
  */
-bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
-                                  int molecule_size,
-                                  const ClusterBlockId clb_index,
-                                  std::unordered_set<AtomBlockId>* clb_atoms,
-                                  bool during_packing,
-                                  bool is_swap,
-                                  t_clustering_data& clustering_data,
-                                  t_lb_router_data*& router_data);
+bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size, const ClusterBlockId new_clb, std::unordered_set<AtomBlockId>* new_clb_atoms, bool during_packing, bool is_swap, t_clustering_data& clustering_data, t_lb_router_data*& router_data, int thread_id);
 
 /**
  * @brief A function that fix the clustered netlist if the move is performed
@@ -133,11 +116,7 @@ void commit_mol_move(const ClusterBlockId& old_clb,
                      bool during_packing,
                      bool new_clb_created);
 
-void revert_mol_move(const ClusterBlockId& old_clb,
-                     t_pack_molecule* molecule,
-                     t_lb_router_data*& old_router_data,
-                     bool during_packing,
-                     t_clustering_data& clustering_data);
+void revert_mol_move(const ClusterBlockId& old_clb, t_pack_molecule* molecule, t_lb_router_data*& old_router_data, bool during_packing, t_clustering_data& clustering_data, int thread_id);
 
 bool is_cluster_legal(t_lb_router_data*& router_data);
 

From 424b4a686c5a1aca7d90047da19b02bc865266d5 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 8 Dec 2022 12:24:26 -0500
Subject: [PATCH 009/188] extending cluster_placement_stats to vector --WIP2

---
 vpr/src/pack/cluster.cpp                |  2 +
 vpr/src/pack/cluster_placement.cpp      | 53 ++++++++++---------------
 vpr/src/pack/cluster_placement.h        |  3 +-
 vpr/src/pack/cluster_util.cpp           |  2 +-
 vpr/src/pack/improvement/pack_utils.cpp |  2 +-
 vpr/src/pack/pack.cpp                   |  1 +
 vpr/src/pack/re_cluster_util.cpp        |  9 +++--
 7 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index f049d1b7ab7..ebd96846360 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -204,6 +204,8 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                               clustering_data, net_output_feeds_driving_block_input,
                               unclustered_list_head_size, cluster_stats.num_molecules);
 
+    helper_ctx.cluster_placement_stats[1] = alloc_and_load_cluster_placement_stats();
+
     auto primitive_candidate_block_types = identify_primitive_candidate_block_types();
     // find the cluster type that has lut primitives
     auto logic_block_type = identify_logic_block_type(primitive_candidate_block_types);
diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp
index ce8bbabd1b6..63fd4d46c86 100644
--- a/vpr/src/pack/cluster_placement.cpp
+++ b/vpr/src/pack/cluster_placement.cpp
@@ -32,16 +32,12 @@
 static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_stats* cluster_placement_stats,
                                                            t_pb_graph_node* pb_graph_node,
                                                            int lb_type_index);
-static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node,
+static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_placement_stats,
+                                            const t_pb_graph_node* pb_graph_node,
                                             float incremental_cost,
                                             bool valid);
-static float try_place_molecule(const t_pack_molecule* molecule,
-                                t_pb_graph_node* root,
-                                t_pb_graph_node** primitives_list);
-static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecule,
-                                                  const t_pack_pattern_block* pack_pattern_block,
-                                                  t_pb_graph_node** primitives_list,
-                                                  float* cost);
+static float try_place_molecule(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, t_pb_graph_node* root, t_pb_graph_node** primitives_list);
+static bool expand_forced_pack_molecule_placement(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, const t_pack_pattern_block* pack_pattern_block, t_pb_graph_node** primitives_list, float* cost);
 static t_pb_graph_pin* expand_pack_molecule_pin_edge(int pattern_id,
                                                      const t_pb_graph_pin* cur_pin,
                                                      bool forward);
@@ -139,7 +135,7 @@ bool get_next_primitive_list(t_cluster_placement_stats* cluster_placement_stats,
                     }
 
                     /* try place molecule at root location cur */
-                    cost = try_place_molecule(molecule, it->second->pb_graph_node, primitives_list);
+                    cost = try_place_molecule(cluster_placement_stats, molecule, it->second->pb_graph_node, primitives_list);
 
                     // if the cost is lower than the best, or is equal to the best but this
                     // primitive is more available in the cluster mark it as the best primitive
@@ -162,7 +158,7 @@ bool get_next_primitive_list(t_cluster_placement_stats* cluster_placement_stats,
         }
     } else {
         /* populate primitive list with best */
-        cost = try_place_molecule(molecule, best->second->pb_graph_node, primitives_list);
+        cost = try_place_molecule(cluster_placement_stats, molecule, best->second->pb_graph_node, primitives_list);
         VTR_ASSERT(cost == lowest_cost);
 
         /* take out best node and put it in flight */
@@ -281,7 +277,8 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
     cluster_placement_stats->flush_intermediate_queues();
 
     /* commit primitive as used, invalidate it */
-    cur = cluster_placement_stats->valid_primitives[primitive->cluster_placement_type_index][primitive->cluster_placement_primitive_index];
+    //cur = cluster_placement_stats->valid_primitives[primitive->cluster_placement_type_index][primitive->cluster_placement_primitive_index];
+    cur = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(primitive);
     VTR_ASSERT(cur->valid == true);
 
     cur->valid = false;
@@ -297,7 +294,7 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
                     if (&pb_graph_node->child_pb_graph_nodes[i][j][k] != skip) {
-                        update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k],
+                        update_primitive_cost_or_status(cluster_placement_stats, &pb_graph_node->child_pb_graph_nodes[i][j][k],
                                                         incr_cost, (bool)(i == valid_mode));
                     }
                 }
@@ -310,13 +307,13 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
 /**
  * Set mode of cluster
  */
-void set_mode_cluster_placement_stats(const t_pb_graph_node* pb_graph_node, int mode) {
+void set_mode_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats, const t_pb_graph_node* pb_graph_node, int mode) {
     int i, j, k;
     for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
         if (i != mode) {
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
-                    update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k], 0, false);
+                    update_primitive_cost_or_status(cluster_placement_stats, &pb_graph_node->child_pb_graph_nodes[i][j][k], 0, false);
                 }
             }
         }
@@ -328,16 +325,15 @@ void set_mode_cluster_placement_stats(const t_pb_graph_node* pb_graph_node, int
  * For modes invalidated by pb_graph_node, invalidate primitive
  * int distance is the distance of current pb_graph_node from original
  */
-static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node,
+static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_placement_stats,
+                                            const t_pb_graph_node* pb_graph_node,
                                             const float incremental_cost,
                                             const bool valid) {
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
-
     int i, j, k;
     t_cluster_placement_primitive* placement_primitive;
     if (pb_graph_node->is_primitive()) {
         /* is primitive */
-        placement_primitive = helper_ctx.cluster_placement_stats[0][pb_graph_node->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(pb_graph_node);
+        placement_primitive = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(pb_graph_node);
         if (valid) {
             placement_primitive->incremental_cost += incremental_cost;
         } else {
@@ -347,7 +343,7 @@ static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node
         for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
-                    update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k],
+                    update_primitive_cost_or_status(cluster_placement_stats,&pb_graph_node->child_pb_graph_nodes[i][j][k],
                                                     incremental_cost, valid);
                 }
             }
@@ -358,17 +354,14 @@ static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node
 /**
  * Try place molecule at root location, populate primitives list with locations of placement if successful
  */
-static float try_place_molecule(const t_pack_molecule* molecule,
-                                t_pb_graph_node* root,
-                                t_pb_graph_node** primitives_list) {
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+static float try_place_molecule(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, t_pb_graph_node* root, t_pb_graph_node** primitives_list) {
     int list_size, i;
     float cost = HUGE_POSITIVE_FLOAT;
     list_size = get_array_size_of_molecule(molecule);
 
     if (primitive_type_feasible(molecule->atom_block_ids[molecule->root],
                                 root->pb_type)) {
-        t_cluster_placement_primitive* cur_primitive = helper_ctx.cluster_placement_stats[0][root->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(root);
+        t_cluster_placement_primitive* cur_primitive = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(root);
         if (cur_primitive->valid) {
             for (i = 0; i < list_size; i++) {
                 primitives_list[i] = nullptr;
@@ -377,7 +370,7 @@ static float try_place_molecule(const t_pack_molecule* molecule,
                    + cur_primitive->incremental_cost;
             primitives_list[molecule->root] = root;
             if (molecule->type == MOLECULE_FORCED_PACK) {
-                if (!expand_forced_pack_molecule_placement(molecule,
+                if (!expand_forced_pack_molecule_placement(cluster_placement_stats, molecule,
                                                            molecule->pack_pattern->root_block, primitives_list,
                                                            &cost)) {
                     return HUGE_POSITIVE_FLOAT;
@@ -402,11 +395,7 @@ static float try_place_molecule(const t_pack_molecule* molecule,
  * Expand molecule at pb_graph_node
  * Assumes molecule and pack pattern connections have fan-out 1
  */
-static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecule,
-                                                  const t_pack_pattern_block* pack_pattern_block,
-                                                  t_pb_graph_node** primitives_list,
-                                                  float* cost) {
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+static bool expand_forced_pack_molecule_placement(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, const t_pack_pattern_block* pack_pattern_block, t_pb_graph_node** primitives_list, float* cost) {
     t_pb_graph_node* pb_graph_node = primitives_list[pack_pattern_block->block_id];
     t_pb_graph_node* next_primitive;
     t_pack_pattern_connections* cur;
@@ -450,11 +439,11 @@ static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecul
                 next_primitive = next_pin->parent_node;
                 /* Check for legality of placement, if legal, expand from legal placement, if not, return false */
                 if (molecule->atom_block_ids[next_block->block_id] && primitives_list[next_block->block_id] == nullptr) {
-                    t_cluster_placement_primitive* placement_primitive = helper_ctx.cluster_placement_stats[0][next_primitive->lb_type_index].get_cluster_placement_primitive_from_pb_graph_node(next_primitive);
+                    t_cluster_placement_primitive* placement_primitive = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(next_primitive);
                     if (placement_primitive->valid && primitive_type_feasible(molecule->atom_block_ids[next_block->block_id], next_primitive->pb_type)) {
                         primitives_list[next_block->block_id] = next_primitive;
                         *cost += placement_primitive->base_cost + placement_primitive->incremental_cost;
-                        if (!expand_forced_pack_molecule_placement(molecule, next_block, primitives_list, cost)) {
+                        if (!expand_forced_pack_molecule_placement(cluster_placement_stats, molecule, next_block, primitives_list, cost)) {
                             return false;
                         }
                     } else {
diff --git a/vpr/src/pack/cluster_placement.h b/vpr/src/pack/cluster_placement.h
index 8715e611222..fe4529db4d6 100644
--- a/vpr/src/pack/cluster_placement.h
+++ b/vpr/src/pack/cluster_placement.h
@@ -14,8 +14,7 @@ bool get_next_primitive_list(
     t_pb_graph_node** primitives_list);
 void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
                       const t_pb_graph_node* primitive);
-void set_mode_cluster_placement_stats(const t_pb_graph_node* complex_block,
-                                      int mode);
+void set_mode_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats, const t_pb_graph_node* pb_graph_node, int mode);
 void reset_cluster_placement_stats(
     t_cluster_placement_stats* cluster_placement_stats);
 
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 6614facde65..78efd2423cd 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -2113,7 +2113,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
             pb->mode = j;
 
             reset_cluster_placement_stats(&cluster_placement_stats[type->index]);
-            set_mode_cluster_placement_stats(pb->pb_graph_node, j);
+            set_mode_cluster_placement_stats(&cluster_placement_stats[type->index], pb->pb_graph_node, j);
 
             //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL,
             //which allows all cluster pins to be used. This ensures that if we have a large
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 3087542795b..d963884ad45 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -64,7 +64,7 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
 
     unsigned int total_num_moves = packer_opts.pack_num_moves;
     //unsigned int num_threads = std::thread::hardware_concurrency();
-    const int num_threads = 1;
+    const int num_threads = 2;
     unsigned int moves_per_thread = total_num_moves / num_threads;
     std::thread my_threads[num_threads];
 
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 1b5767d6c41..29d58a666ad 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -258,6 +258,7 @@ bool try_pack(t_packer_opts* packer_opts,
         //attraction_groups.reset_attraction_groups();
 
         free_cluster_placement_stats(helper_ctx.cluster_placement_stats[0]);
+        free_cluster_placement_stats(helper_ctx.cluster_placement_stats[1]);
         delete[] helper_ctx.primitives_list;
 
         ++pack_iteration;
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index aefb67327b6..332aa00754f 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -137,10 +137,11 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_
 
     e_block_pack_status pack_result = BLK_STATUS_UNDEFINED;
     pb->mode = mode;
-    reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[thread_id][type->index]));
-    set_mode_cluster_placement_stats(pb->pb_graph_node, mode);
+    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[thread_id][type->index]);
+    reset_cluster_placement_stats(cluster_placement_stats);
+    set_mode_cluster_placement_stats(cluster_placement_stats, pb->pb_graph_node, mode);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][type->index]),
+    pack_result = try_pack_molecule(cluster_placement_stats,
                                     molecule,
                                     helper_ctx.primitives_list,
                                     pb,
@@ -621,7 +622,7 @@ static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, con
 
     t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[thread_id][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
     reset_cluster_placement_stats(cluster_placement_stats);
-    set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode);
+    set_mode_cluster_placement_stats(cluster_placement_stats, cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode);
 
     for (auto& atom : *clb_atoms) {
         const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom);

From 21d4be7bae70ac092262584dc8e5a2ba4857bfdc Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 8 Dec 2022 13:27:52 -0500
Subject: [PATCH 010/188] Multitreading working -- need some refactoring to be
 more readable

---
 vpr/src/base/vpr_context.h       | 6 ++++--
 vpr/src/pack/cluster.cpp         | 6 +++---
 vpr/src/pack/cluster_util.cpp    | 5 +++++
 vpr/src/pack/pack.cpp            | 3 ++-
 vpr/src/pack/re_cluster_util.cpp | 6 +++---
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index c426949a813..6cbb7065d7f 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -322,7 +322,7 @@ struct ClusteringHelperContext : public Context {
     int num_models;
 
     int max_cluster_size;
-    t_pb_graph_node** primitives_list;
+    std::vector<t_pb_graph_node**> primitives_list;
 
     bool enable_pin_feasibility_filter;
     int feasible_block_array_size;
@@ -341,10 +341,12 @@ struct ClusteringHelperContext : public Context {
     vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>> atoms_lookup;
     ClusteringHelperContext() {
         cluster_placement_stats.resize(2);
+        primitives_list.resize(2);
     }
 
     ~ClusteringHelperContext() {
-        delete[] primitives_list;
+        delete[] primitives_list[0];
+        delete[] primitives_list[1];
     }
 };
 
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index ebd96846360..17d016f0007 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -200,7 +200,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 	check_for_duplicate_inputs ();
 #endif
     alloc_and_init_clustering(max_molecule_stats,
-                              &(helper_ctx.cluster_placement_stats[0]), &(helper_ctx.primitives_list), molecule_head,
+                              &(helper_ctx.cluster_placement_stats[0]), &(helper_ctx.primitives_list[0]), molecule_head,
                               clustering_data, net_output_feeds_driving_block_input,
                               unclustered_list_head_size, cluster_stats.num_molecules);
 
@@ -249,7 +249,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
              * stores PartitionRegion information while the cluster is packed*/
             PartitionRegion temp_cluster_pr;
 
-            start_new_cluster(helper_ctx.cluster_placement_stats[0], helper_ctx.primitives_list,
+            start_new_cluster(helper_ctx.cluster_placement_stats[0], helper_ctx.primitives_list[0],
                               clb_index, istart,
                               num_used_type_instances,
                               packer_opts.target_device_utilization,
@@ -344,7 +344,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                                  prev_molecule,
                                  next_molecule,
                                  num_repeated_molecules,
-                                 helper_ctx.primitives_list,
+                                 helper_ctx.primitives_list[0],
                                  cluster_stats,
                                  helper_ctx.total_clb_num,
                                  num_models,
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 78efd2423cd..9003da6743e 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -580,6 +580,11 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
     *primitives_list = new t_pb_graph_node*[max_molecule_size];
     for (int i = 0; i < max_molecule_size; i++)
         (*primitives_list)[i] = nullptr;
+
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+    helper_ctx.primitives_list[1] = new t_pb_graph_node*[max_molecule_size];
+    for (int i = 0; i < max_molecule_size; i++)
+        helper_ctx.primitives_list[1][i] = nullptr;
 }
 
 /*****************************************/
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 29d58a666ad..41b1c4cec53 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -259,7 +259,8 @@ bool try_pack(t_packer_opts* packer_opts,
 
         free_cluster_placement_stats(helper_ctx.cluster_placement_stats[0]);
         free_cluster_placement_stats(helper_ctx.cluster_placement_stats[1]);
-        delete[] helper_ctx.primitives_list;
+        delete[] helper_ctx.primitives_list[0];
+        delete[] helper_ctx.primitives_list[1];
 
         ++pack_iteration;
     }
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 332aa00754f..329940c3f98 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -143,7 +143,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_
 
     pack_result = try_pack_molecule(cluster_placement_stats,
                                     molecule,
-                                    helper_ctx.primitives_list,
+                                    helper_ctx.primitives_list[thread_id],
                                     pb,
                                     helper_ctx.num_models,
                                     helper_ctx.max_cluster_size,
@@ -208,7 +208,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size,
 
     pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][block_type->index]),
                                     molecule,
-                                    helper_ctx.primitives_list,
+                                    helper_ctx.primitives_list[thread_id],
                                     temp_pb,
                                     helper_ctx.num_models,
                                     helper_ctx.max_cluster_size,
@@ -271,7 +271,7 @@ void revert_mol_move(const ClusterBlockId& old_clb,
     PartitionRegion temp_cluster_pr_original;
     e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][cluster_ctx.clb_nlist.block_type(old_clb)->index]),
                                                         molecule,
-                                                        helper_ctx.primitives_list,
+                                                        helper_ctx.primitives_list[thread_id],
                                                         cluster_ctx.clb_nlist.block_pb(old_clb),
                                                         helper_ctx.num_models,
                                                         helper_ctx.max_cluster_size,

From 3ee0bc44c00bba89b34bdc1a804440e262970b97 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 8 Dec 2022 14:54:31 -0500
Subject: [PATCH 011/188] Multithreading is working

---
 libs/libarchfpga/src/physical_types.h        |  2 +-
 vpr/src/base/SetupVPR.cpp                    |  1 +
 vpr/src/base/read_options.cpp                |  6 ++++
 vpr/src/base/read_options.h                  |  1 +
 vpr/src/base/vpr_context.h                   |  9 ------
 vpr/src/base/vpr_types.cpp                   | 26 ++++++++----------
 vpr/src/base/vpr_types.h                     |  1 +
 vpr/src/pack/cluster.cpp                     |  5 +---
 vpr/src/pack/cluster_placement.cpp           |  4 +--
 vpr/src/pack/cluster_util.cpp                | 29 ++++++++++++--------
 vpr/src/pack/cluster_util.h                  |  5 ++--
 vpr/src/pack/improvement/pack_move_utils.cpp | 16 +++++------
 vpr/src/pack/improvement/pack_utils.cpp      | 12 ++++----
 vpr/src/pack/pack.cpp                        |  8 +++---
 14 files changed, 62 insertions(+), 63 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 746c80f9af9..eff91fc970f 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1241,7 +1241,7 @@ class t_pb_graph_node {
 
     int total_pb_pins; /* only valid for top-level */
 
-    void* temp_scratch_pad;                                     /* temporary data, useful for keeping track of things when traversing data structure */
+    void* temp_scratch_pad; /* temporary data, useful for keeping track of things when traversing data structure */
 
     /* indeces for cluster_placement_primitive in the cluster_placement_stats structure (useful during packing) */
     int cluster_placement_primitive_index;
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 33bd010346e..7b428ff4450 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -573,6 +573,7 @@ void SetupPackerOpts(const t_options& Options,
 
     PackerOpts->timing_update_type = Options.timing_update_type;
     PackerOpts->pack_num_moves = Options.pack_num_moves;
+    PackerOpts->pack_num_threads = Options.pack_num_threads;
     PackerOpts->pack_move_type = Options.pack_move_type;
 }
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index d0870e54591..d342650a339 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1787,6 +1787,12 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .default_value("100000")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    pack_grp.add_argument(args.pack_num_threads, "--pack_num_threads")
+        .help(
+            "The number of threads used in the packing iterative improvement")
+        .default_value("1")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     pack_grp.add_argument(args.pack_move_type, "--pack_move_type")
         .help(
             "The move type used in packing."
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index d1e9e1e1d61..b68815ef0e9 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -100,6 +100,7 @@ struct t_options {
     argparse::ArgValue<int> pack_verbosity;
     argparse::ArgValue<bool> use_attraction_groups;
     argparse::ArgValue<int> pack_num_moves;
+    argparse::ArgValue<int> pack_num_threads;
     argparse::ArgValue<std::string> pack_move_type;
     /* Placement options */
     argparse::ArgValue<int> Seed;
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 6cbb7065d7f..e193bcdea3b 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -339,15 +339,6 @@ struct ClusteringHelperContext : public Context {
     // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1]
     // unordered_set for faster insertion/deletion during the iterative improvement process of packing
     vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>> atoms_lookup;
-    ClusteringHelperContext() {
-        cluster_placement_stats.resize(2);
-        primitives_list.resize(2);
-    }
-
-    ~ClusteringHelperContext() {
-        delete[] primitives_list[0];
-        delete[] primitives_list[1];
-    }
 };
 
 /**
diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index 320b7a8360b..32c463d0eea 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -258,7 +258,6 @@ void t_cluster_placement_stats::move_primitive_to_inflight(int pb_type_index, st
  * @note that valid status is not changed because if the primitive is not valid, it will get properly collected later
  */
 void t_cluster_placement_stats::insert_primitive_in_valid_primitives(std::pair<int, t_cluster_placement_primitive*> cluster_placement_primitive) {
-
     int pb_type_index = cluster_placement_primitive.second->pb_graph_node->cluster_placement_type_index;
     valid_primitives[pb_type_index].insert(cluster_placement_primitive);
 }
@@ -305,25 +304,24 @@ void t_cluster_placement_stats::free_primitives() {
 }
 
 t_cluster_placement_primitive* t_cluster_placement_stats::get_cluster_placement_primitive_from_pb_graph_node(const t_pb_graph_node* pb_graph_node) {
-
     auto it = valid_primitives[pb_graph_node->cluster_placement_type_index].find(pb_graph_node->cluster_placement_primitive_index);
     if (it != valid_primitives[pb_graph_node->cluster_placement_type_index].end())
         return valid_primitives[pb_graph_node->cluster_placement_type_index][pb_graph_node->cluster_placement_primitive_index];
 
-   for(auto itr = tried.find(pb_graph_node->cluster_placement_primitive_index); itr != tried.end(); itr++) {
-       if(itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
-           return itr->second;
-   }
+    for (auto itr = tried.find(pb_graph_node->cluster_placement_primitive_index); itr != tried.end(); itr++) {
+        if (itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
+            return itr->second;
+    }
 
-   for(auto itr = invalid.find(pb_graph_node->cluster_placement_primitive_index); itr != invalid.end(); itr++) {
-       if(itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
-           return itr->second;
-   }
+    for (auto itr = invalid.find(pb_graph_node->cluster_placement_primitive_index); itr != invalid.end(); itr++) {
+        if (itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
+            return itr->second;
+    }
 
-   for(auto itr = in_flight.find(pb_graph_node->cluster_placement_primitive_index); itr != in_flight.end(); itr++) {
-       if(itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
-           return itr->second;
-   }
+    for (auto itr = in_flight.find(pb_graph_node->cluster_placement_primitive_index); itr != in_flight.end(); itr++) {
+        if (itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
+            return itr->second;
+    }
 
     return nullptr;
 }
\ No newline at end of file
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index f8eee40f7ae..7dec5c49341 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -858,6 +858,7 @@ struct t_packer_opts {
     e_timing_update_type timing_update_type;
     bool use_attraction_groups;
     int pack_num_moves;
+    int pack_num_threads;
     std::string pack_move_type;
 };
 
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index 17d016f0007..016f7265008 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -199,13 +199,10 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 #if 0
 	check_for_duplicate_inputs ();
 #endif
-    alloc_and_init_clustering(max_molecule_stats,
-                              &(helper_ctx.cluster_placement_stats[0]), &(helper_ctx.primitives_list[0]), molecule_head,
+    alloc_and_init_clustering(packer_opts, max_molecule_stats, molecule_head,
                               clustering_data, net_output_feeds_driving_block_input,
                               unclustered_list_head_size, cluster_stats.num_molecules);
 
-    helper_ctx.cluster_placement_stats[1] = alloc_and_load_cluster_placement_stats();
-
     auto primitive_candidate_block_types = identify_primitive_candidate_block_types();
     // find the cluster type that has lut primitives
     auto logic_block_type = identify_logic_block_type(primitive_candidate_block_types);
diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp
index 63fd4d46c86..a5db78ba357 100644
--- a/vpr/src/pack/cluster_placement.cpp
+++ b/vpr/src/pack/cluster_placement.cpp
@@ -219,7 +219,7 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
          *  - Check the pb_type of this element with the pb_type of pb_graph_node
          *      - if matched --> insert the primitive
          */
-        for(size_t type_index = 0; type_index < cluster_placement_stats->valid_primitives.size(); type_index++) {
+        for (size_t type_index = 0; type_index < cluster_placement_stats->valid_primitives.size(); type_index++) {
             auto& type_primitives = cluster_placement_stats->valid_primitives[type_index];
             auto first_elem = type_primitives.find(0);
             if (first_elem != type_primitives.end() && first_elem->second->pb_graph_node->pb_type == pb_graph_node->pb_type) {
@@ -343,7 +343,7 @@ static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_p
         for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
-                    update_primitive_cost_or_status(cluster_placement_stats,&pb_graph_node->child_pb_graph_nodes[i][j][k],
+                    update_primitive_cost_or_status(cluster_placement_stats, &pb_graph_node->child_pb_graph_nodes[i][j][k],
                                                     incremental_cost, valid);
                 }
             }
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 9003da6743e..e445d8585cc 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -494,9 +494,8 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule,
 }
 
 /*****************************************/
-void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
-                               t_cluster_placement_stats** cluster_placement_stats,
-                               t_pb_graph_node*** primitives_list,
+void alloc_and_init_clustering(const t_packer_opts& packer_opts,
+                               const t_molecule_stats& max_molecule_stats,
                                t_pack_molecule* molecules_head,
                                t_clustering_data& clustering_data,
                                std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
@@ -505,6 +504,15 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
     /* Allocates the main data structures used for clustering and properly *
      * initializes them.                                                   */
 
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+    if (packer_opts.pack_num_moves > 0) {
+        helper_ctx.primitives_list.resize(packer_opts.pack_num_threads);
+        helper_ctx.cluster_placement_stats.resize(packer_opts.pack_num_threads);
+    } else {
+        helper_ctx.primitives_list.resize(1);
+        helper_ctx.cluster_placement_stats.resize(1);
+    }
+
     t_molecule_link* next_ptr;
     t_pack_molecule* cur_molecule;
     t_pack_molecule** molecule_array;
@@ -563,7 +571,8 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
     }
 
     /* alloc and load cluster placement info */
-    *cluster_placement_stats = alloc_and_load_cluster_placement_stats();
+    for (int thread_id = 0; thread_id < packer_opts.pack_num_threads; thread_id++)
+        helper_ctx.cluster_placement_stats[thread_id] = alloc_and_load_cluster_placement_stats();
 
     /* alloc array that will store primitives that a molecule gets placed to,
      * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list
@@ -577,14 +586,12 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
         }
         cur_molecule = cur_molecule->next;
     }
-    *primitives_list = new t_pb_graph_node*[max_molecule_size];
-    for (int i = 0; i < max_molecule_size; i++)
-        (*primitives_list)[i] = nullptr;
 
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
-    helper_ctx.primitives_list[1] = new t_pb_graph_node*[max_molecule_size];
-    for (int i = 0; i < max_molecule_size; i++)
-        helper_ctx.primitives_list[1][i] = nullptr;
+    for (int thread_id = 0; thread_id < packer_opts.pack_num_threads; thread_id++) {
+        helper_ctx.primitives_list[thread_id] = new t_pb_graph_node*[max_molecule_size];
+        for (int i = 0; i < max_molecule_size; i++)
+            helper_ctx.primitives_list[thread_id][i] = nullptr;
+    }
 }
 
 /*****************************************/
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index 1316229abc5..6048bc63d2b 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -147,9 +147,8 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule,
 void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule,
                                               t_pb* pb);
 
-void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
-                               t_cluster_placement_stats** cluster_placement_stats,
-                               t_pb_graph_node*** primitives_list,
+void alloc_and_init_clustering(const t_packer_opts& packer_opts,
+                               const t_molecule_stats& max_molecule_stats,
                                t_pack_molecule* molecules_head,
                                t_clustering_data& clustering_data,
                                std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index 7dd339fe617..7babeab2e2c 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -156,7 +156,7 @@ t_pack_molecule* pick_molecule_randomly() {
         ClusterBlockId clb_index = atom_to_cluster(random_atom);
         if (!clb_index)
             continue;
-        if(packing_multithreading_ctx.mu[clb_index]->try_lock()){
+        if (packing_multithreading_ctx.mu[clb_index]->try_lock()) {
             auto rng = atom_ctx.atom_molecules.equal_range(random_atom);
             for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
                 molecule = kv.second;
@@ -186,7 +186,7 @@ bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     while (clb2_not_found && iteration < 20) {
         int rand_num = vtr::irand((int)connected_blocks.size() - 1);
         clb_index_2 = connected_blocks[rand_num];
-        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
         }
         iteration++;
@@ -227,13 +227,13 @@ bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_mole
     int iteration = 0;
     while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
         }
         iteration++;
     }
 
-    if(clb2_not_found)
+    if (clb2_not_found)
         return false;
 
     //pick a random molecule for the chosen block
@@ -275,13 +275,13 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
     int iteration = 0;
     while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
         }
         iteration++;
     }
 
-    if(clb2_not_found)
+    if (clb2_not_found)
         return false;
 
     //pick a random molecule for the chosen block
@@ -325,13 +325,13 @@ bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*&
     int iteration = 0;
     while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-        if(packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
             clb2_not_found = false;
         }
         ++iteration;
     }
 
-    if(clb2_not_found)
+    if (clb2_not_found)
         return false;
 
     //pick a random molecule for the chosen block
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index d963884ad45..3bbeaf00281 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -23,7 +23,7 @@ void init_multithreading_locks() {
     auto& helper_ctx = g_vpr_ctx.cl_helper();
 
     packing_multithreading_ctx.mu.resize(helper_ctx.total_clb_num);
-    for(auto& m : packing_multithreading_ctx.mu) {
+    for (auto& m : packing_multithreading_ctx.mu) {
         m = new std::mutex;
     }
 }
@@ -63,19 +63,18 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
 #endif
 
     unsigned int total_num_moves = packer_opts.pack_num_moves;
-    //unsigned int num_threads = std::thread::hardware_concurrency();
-    const int num_threads = 2;
+    const int num_threads = packer_opts.pack_num_threads;
     unsigned int moves_per_thread = total_num_moves / num_threads;
     std::thread my_threads[num_threads];
 
     init_multithreading_locks();
 
-    for (unsigned int i = 0; i < num_threads - 1; i++) {
+    for (int i = 0; i < (num_threads - 1); i++) {
         my_threads[i] = std::thread(try_n_packing_moves, i, moves_per_thread, packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
     }
-    my_threads[num_threads - 1] = std::thread(try_n_packing_moves, num_threads-1, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
+    my_threads[num_threads - 1] = std::thread(try_n_packing_moves, num_threads - 1, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
 
-    for (auto & my_thread : my_threads)
+    for (auto& my_thread : my_threads)
         my_thread.join();
 
     VTR_LOG("\n### Iterative packing stats: \n\tpack move type = %s\n\ttotal pack moves = %zu\n\tgood pack moves = %zu\n\tlegal pack moves = %zu\n\n",
@@ -131,7 +130,6 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
 
         packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
         packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
-
     }
 
     pack_stats.mu.lock();
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 41b1c4cec53..8f28eec57cd 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -257,10 +257,10 @@ bool try_pack(t_packer_opts* packer_opts,
         g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear();
         //attraction_groups.reset_attraction_groups();
 
-        free_cluster_placement_stats(helper_ctx.cluster_placement_stats[0]);
-        free_cluster_placement_stats(helper_ctx.cluster_placement_stats[1]);
-        delete[] helper_ctx.primitives_list[0];
-        delete[] helper_ctx.primitives_list[1];
+        for (int thread_id = 0; thread_id < packer_opts->pack_num_threads; thread_id++) {
+            free_cluster_placement_stats(helper_ctx.cluster_placement_stats[thread_id]);
+            delete[] helper_ctx.primitives_list[thread_id];
+        }
 
         ++pack_iteration;
     }

From 99d2cfbf4307fdc486eff4ff6e49be8f157697d2 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 8 Dec 2022 15:05:52 -0500
Subject: [PATCH 012/188] solve a variable size array issue

---
 vpr/src/pack/improvement/pack_utils.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 3bbeaf00281..e77b1e25729 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -65,7 +65,7 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
     unsigned int total_num_moves = packer_opts.pack_num_moves;
     const int num_threads = packer_opts.pack_num_threads;
     unsigned int moves_per_thread = total_num_moves / num_threads;
-    std::thread my_threads[num_threads];
+    std::thread* my_threads = new std::thread[num_threads];
 
     init_multithreading_locks();
 
@@ -74,8 +74,8 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
     }
     my_threads[num_threads - 1] = std::thread(try_n_packing_moves, num_threads - 1, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
 
-    for (auto& my_thread : my_threads)
-        my_thread.join();
+    for (int i =0; i < num_threads; i++)
+        my_threads[i].join();
 
     VTR_LOG("\n### Iterative packing stats: \n\tpack move type = %s\n\ttotal pack moves = %zu\n\tgood pack moves = %zu\n\tlegal pack moves = %zu\n\n",
             packer_opts.pack_move_type.c_str(),

From 28875dd7d9986a9c69cfdfdec2ebd43f4355f50a Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 8 Dec 2022 15:06:36 -0500
Subject: [PATCH 013/188] fix formatting

---
 vpr/src/pack/improvement/pack_utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index e77b1e25729..336aaf8c27a 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -74,7 +74,7 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
     }
     my_threads[num_threads - 1] = std::thread(try_n_packing_moves, num_threads - 1, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
 
-    for (int i =0; i < num_threads; i++)
+    for (int i = 0; i < num_threads; i++)
         my_threads[i].join();
 
     VTR_LOG("\n### Iterative packing stats: \n\tpack move type = %s\n\ttotal pack moves = %zu\n\tgood pack moves = %zu\n\tlegal pack moves = %zu\n\n",

From a8cd188e39e324ab9983931e050c4c9679417c63 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Mon, 9 Jan 2023 15:24:43 -0500
Subject: [PATCH 014/188] Fix all issues with mulithreading

---
 vpr/src/base/atom_lookup.cpp                  | 10 ++++++++-
 vpr/src/base/vpr_context.h                    |  2 +-
 vpr/src/pack/cluster_util.cpp                 |  3 ++-
 vpr/src/pack/improvement/pack_move_utils.cpp  |  8 ++++---
 vpr/src/pack/improvement/pack_utils.cpp       |  7 ++++---
 .../improvement/packing_move_generator.cpp    | 21 ++++++++++++++++++-
 6 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp
index 8b3a45c0098..d53dad95c91 100644
--- a/vpr/src/base/atom_lookup.cpp
+++ b/vpr/src/base/atom_lookup.cpp
@@ -2,6 +2,7 @@
 #include "vtr_log.h"
 
 #include "atom_lookup.h"
+#include "globals.h"
 /*
  * PB
  */
@@ -15,11 +16,16 @@ const t_pb* AtomLookup::atom_pb(const AtomBlockId blk_id) const {
 }
 
 AtomBlockId AtomLookup::pb_atom(const t_pb* pb) const {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+    packing_multithreading_ctx.lookup_mu.lock();
+
     auto iter = atom_to_pb_.find(pb);
     if (iter == atom_to_pb_.inverse_end()) {
         //Not found
+        packing_multithreading_ctx.lookup_mu.unlock();
         return AtomBlockId::INVALID();
     }
+    packing_multithreading_ctx.lookup_mu.unlock();
     return iter->second;
 }
 
@@ -35,7 +41,8 @@ const t_pb_graph_node* AtomLookup::atom_pb_graph_node(const AtomBlockId blk_id)
 void AtomLookup::set_atom_pb(const AtomBlockId blk_id, const t_pb* pb) {
     //If either of blk_id or pb are not valid,
     //remove any mapping
-
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+    packing_multithreading_ctx.lookup_mu.lock();
     if (!blk_id && pb) {
         //Remove
         atom_to_pb_.erase(pb);
@@ -46,6 +53,7 @@ void AtomLookup::set_atom_pb(const AtomBlockId blk_id, const t_pb* pb) {
         //If both are valid store the mapping
         atom_to_pb_.update(blk_id, pb);
     }
+    packing_multithreading_ctx.lookup_mu.unlock();
 }
 
 /*
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index e193bcdea3b..d3da6c3ae0b 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -347,8 +347,8 @@ struct ClusteringHelperContext : public Context {
  * This contain data structures to synchronize multithreading of packing iterative improvement.
  */
 struct PackingMultithreadingContext : public Context {
-    //vtr::vector<ClusterBlockId, bool> clb_in_flight;
     vtr::vector<ClusterBlockId, std::mutex*> mu;
+    std::mutex lookup_mu;
 };
 
 /**
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index e445d8585cc..d28608b6b51 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -3041,6 +3041,7 @@ void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph
             /* find location of net driver if exist in clb, NULL otherwise */
             // find the driver of the input net connected to the pin being studied
             const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id);
+
             // find the id of the atom occupying the input primitive_pb
             const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb);
             // find the pb block occupied by the driving atom
@@ -3699,4 +3700,4 @@ void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBl
 
         atoms_lookup[clb_index].insert(atom_blk_id);
     }
-}
\ No newline at end of file
+}
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index 7babeab2e2c..a0d45e0111b 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -288,6 +288,7 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
     std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
     iteration = 0;
     const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
+
     do {
         int rand_num = vtr::irand((int)atom_ids->size() - 1);
         auto it = atom_ids->begin();
@@ -299,11 +300,12 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
             const t_pb* pb_2 = atom_ctx.lookup.atom_pb(mol_2->atom_block_ids[mol_2->root]);
             if (pb_1->pb_graph_node->pb_type == pb_2->pb_graph_node->pb_type)
                 return true;
-            else
+            else {
                 iteration++;
+                break;
+            }
         }
-    } while (iteration < 20);
-
+    } while (iteration < 10);
     packing_multithreading_ctx.mu[clb_index_2]->unlock();
     return false;
 }
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 336aaf8c27a..190a130ce28 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -113,16 +113,17 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
     for (int i = 0; i < n; i++) {
         new_locs.clear();
         is_proposed = move_generator->propose_move(new_locs);
-        if (!is_proposed)
+        if (!is_proposed) {
             continue;
-
+        }
         is_valid = move_generator->evaluate_move(new_locs);
         if (!is_valid) {
             packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
             packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
             continue;
-        } else
+        } else {
             num_good_moves++;
+        }
 
         is_successful = move_generator->apply_move(new_locs, clustering_data, thread_num);
         if (is_successful)
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index c2c79a1866a..19eadd90a66 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -8,7 +8,7 @@
 #include "re_cluster_util.h"
 #include "pack_move_utils.h"
 
-const int MAX_ITERATIONS = 100;
+const int MAX_ITERATIONS = 10;
 
 /******************* Packing move base class ************************/
 /********************************************************************/
@@ -40,6 +40,8 @@ bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs,
 /****************************************************************/
 bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
 
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1, clb_index_2;
@@ -59,10 +61,15 @@ bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs)
         if (block_type_1 == block_type_2 && clb_index_1 != clb_index_2) {
             found = true;
             build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+        } else {
+            packing_multithreading_ctx.mu[clb_index_2]->unlock();
         }
         ++iteration;
     } while (!found && iteration < MAX_ITERATIONS);
 
+    if(!found) {
+        packing_multithreading_ctx.mu[clb_index_1]->unlock();
+    }
     return found;
 }
 
@@ -106,6 +113,8 @@ bool quasiDirectedSameTypePackingSwap::evaluate_move(const std::vector<molMoveDe
 }
 
 bool quasiDirectedSameTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
 
@@ -120,6 +129,8 @@ bool quasiDirectedSameTypePackingSwap::propose_move(std::vector<molMoveDescripti
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    } else {
+        packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
     return found;
 }
@@ -131,6 +142,8 @@ bool quasiDirectedCompatibleTypePackingSwap::evaluate_move(const std::vector<mol
 }
 
 bool quasiDirectedCompatibleTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
 
@@ -145,6 +158,8 @@ bool quasiDirectedCompatibleTypePackingSwap::propose_move(std::vector<molMoveDes
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    } else {
+        packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
     return found;
 }
@@ -156,6 +171,8 @@ bool quasiDirectedSameSizePackingSwap::evaluate_move(const std::vector<molMoveDe
 }
 
 bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
 
@@ -170,6 +187,8 @@ bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescripti
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
+    } else {
+        packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
     return found;
 }
\ No newline at end of file

From 04fbb771fdca85d79309c847e2a1dec3b3f1fe2a Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Mon, 9 Jan 2023 15:27:52 -0500
Subject: [PATCH 015/188] Fix formatting

---
 vpr/src/pack/improvement/packing_move_generator.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index 19eadd90a66..bbf99730dbe 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -42,7 +42,6 @@ bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs)
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
 
-
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1, clb_index_2;
     t_logical_block_type_ptr block_type_1, block_type_2;
@@ -67,7 +66,7 @@ bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs)
         ++iteration;
     } while (!found && iteration < MAX_ITERATIONS);
 
-    if(!found) {
+    if (!found) {
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
     return found;

From 69aa3fba5dab63db5c145b72779abcc6aff01636 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Tue, 10 Jan 2023 11:46:29 -0500
Subject: [PATCH 016/188] Add new moves that are evaluated based on absorbed
 connections not nets

---
 vpr/src/pack/improvement/pack_move_utils.cpp  | 48 +++++++++++++++++++
 vpr/src/pack/improvement/pack_move_utils.h    |  2 +
 vpr/src/pack/improvement/pack_utils.cpp       | 27 ++++-------
 vpr/src/pack/improvement/pack_utils.h         |  3 --
 .../improvement/packing_move_generator.cpp    | 20 ++++++++
 .../pack/improvement/packing_move_generator.h | 27 +++++++++--
 6 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index a0d45e0111b..777cb7b2f52 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -90,6 +90,48 @@ int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs) {
     return change_cutsize;
 }
 
+int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    // initialize the old and new cut sizes
+    int absorbed_conn_change = 0;
+
+    // define some temporary
+    AtomBlockId cur_atom;
+    ClusterBlockId cur_clb;
+
+    for (auto& new_loc : new_locs) {
+        ClusterBlockId new_block_id = new_loc.new_clb;
+        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
+
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (!moving_atom)
+                continue;
+            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
+                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
+                    continue;
+
+                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
+                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
+                    if (cur_atom == moving_atom)
+                        continue;
+
+                    cur_clb = atom_to_cluster(cur_atom);
+                    if (cur_clb == new_block_id) {
+                        absorbed_conn_change++;
+
+                    } else if (cur_clb == old_block_id) {
+                        absorbed_conn_change--;
+                    }
+                }
+            }
+        }
+    }
+
+    return absorbed_conn_change;
+}
+
 #if 0
 int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
                                         int original_cutsize) {
@@ -382,6 +424,12 @@ bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_l
     else
         return false;
 }
+
+bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs) {
+    int change_in_absorbed_conn = absorbed_conn_change(new_locs);
+
+    return (change_in_absorbed_conn > 0);
+}
 /********* static functions ************/
 /***************************************/
 #if 0
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
index a597c9966bf..f0bc951e5da 100644
--- a/vpr/src/pack/improvement/pack_move_utils.h
+++ b/vpr/src/pack/improvement/pack_move_utils.h
@@ -30,8 +30,10 @@ void build_mol_move_description(std::vector<molMoveDescription>& new_locs,
                                 ClusterBlockId clb_index_2);
 
 bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_locs);
+bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs);
 
 int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
+int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
 
 #if 0
 int calculate_cutsize_of_clb(ClusterBlockId clb_index);
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 190a130ce28..4af5938dd88 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -28,23 +28,6 @@ void init_multithreading_locks() {
     }
 }
 
-void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>>& atoms_lookup) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-#ifdef pack_improve_debug
-    vtr::ScopedFinishTimer lookup_timer("Building CLB atoms lookup");
-#endif
-
-    atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size());
-
-    for (auto atom_blk_id : atom_ctx.nlist.blocks()) {
-        ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id);
-
-        atoms_lookup[clb_index].insert(atom_blk_id);
-    }
-}
-
 void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_data& clustering_data, int) {
     /*
      * auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -103,6 +86,16 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
         move_generator = std::make_unique<quasiDirectedCompatibleTypePackingSwap>();
     else if (strcmp(move_type.c_str(), "semiDirectedSameSizeSwap") == 0)
         move_generator = std::make_unique<quasiDirectedSameSizePackingSwap>();
+    else if (strcmp(move_type.c_str(), "randomConnSwap") == 0)
+        move_generator = std::make_unique<randomConnPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedConnSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedConnPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeConnSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypeConnPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeConnSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypeConnPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeConnSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizeConnPackingSwap>();
 
     else {
         VTR_LOG("Packing move type (%s) is not correct!\n", move_type.c_str());
diff --git a/vpr/src/pack/improvement/pack_utils.h b/vpr/src/pack/improvement/pack_utils.h
index 17e25d5fa00..a5054f61d8b 100644
--- a/vpr/src/pack/improvement/pack_utils.h
+++ b/vpr/src/pack/improvement/pack_utils.h
@@ -14,7 +14,4 @@ struct t_pack_iterative_stats {
 void iteratively_improve_packing(const t_packer_opts& packer_opts,
                                  t_clustering_data& clustering_data,
                                  int verbosity);
-
-void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>>& atoms_lookup);
-
 #endif //VTR_PACK_UTILS_H
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index bbf99730dbe..35a08004769 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -190,4 +190,24 @@ bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescripti
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
     return found;
+}
+
+bool randomConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_connection(new_locs));
+}
+
+bool quasiDirectedConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_connection(new_locs));
+}
+
+bool quasiDirectedSameTypeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_connection(new_locs));
+}
+
+bool quasiDirectedCompatibleTypeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_connection(new_locs));
+}
+
+bool quasiDirectedSameSizeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_connection(new_locs));
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
index 983f6235761..4f9c567b0db 100644
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -52,8 +52,29 @@ class quasiDirectedSameSizePackingSwap : public packingMoveGenerator {
     bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
 };
 
-class quasiDirectedCompatibleTypeSameSizePackingSwap : public packingMoveGenerator {
-    bool propose_move(std::vector<molMoveDescription>& new_locs);
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
+/************ Moves that evaluate on abosrbed Connections *********************/
+class randomConnPackingSwap : public randomPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedConnPackingSwap : public quasiDirectedPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameTypeConnPackingSwap : public quasiDirectedSameTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCompatibleTypeConnPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameSizeConnPackingSwap : public quasiDirectedSameSizePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
 };
 #endif //VTR_PACKINGMOVEGENERATOR_H
\ No newline at end of file

From a943734d0e1dc5ff4a60ca22a062a03e25f4036a Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 11 Jan 2023 16:32:05 -0500
Subject: [PATCH 017/188] change the definition of the Conn moves to be
 absorbed pins / total number of pins

---
 vpr/src/pack/improvement/pack_move_utils.cpp | 14 ++++++++------
 vpr/src/pack/improvement/pack_move_utils.h   |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index 777cb7b2f52..a94fb61c93a 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -90,11 +90,11 @@ int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs) {
     return change_cutsize;
 }
 
-int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
+float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
     auto& atom_ctx = g_vpr_ctx.atom();
 
     // initialize the old and new cut sizes
-    int absorbed_conn_change = 0;
+    float absorbed_conn_change = 0;
 
     // define some temporary
     AtomBlockId cur_atom;
@@ -107,24 +107,26 @@ int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
         for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
             if (!moving_atom)
                 continue;
+            
             for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
                 AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
                 if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
                     continue;
 
+                double num_pins_in_new = 0;
                 for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
                     cur_atom = atom_ctx.nlist.pin_block(net_pin);
                     if (cur_atom == moving_atom)
-                        continue;
+                        num_pins_in_new++;
 
                     cur_clb = atom_to_cluster(cur_atom);
                     if (cur_clb == new_block_id) {
-                        absorbed_conn_change++;
-
+                        num_pins_in_new++;
                     } else if (cur_clb == old_block_id) {
-                        absorbed_conn_change--;
+                        num_pins_in_new--;
                     }
                 }
+                absorbed_conn_change += num_pins_in_new/(float)atom_ctx.nlist.net_pins(atom_net).size();
             }
         }
     }
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
index f0bc951e5da..040f7370cb5 100644
--- a/vpr/src/pack/improvement/pack_move_utils.h
+++ b/vpr/src/pack/improvement/pack_move_utils.h
@@ -33,7 +33,7 @@ bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_l
 bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs);
 
 int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
-int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
+float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
 
 #if 0
 int calculate_cutsize_of_clb(ClusterBlockId clb_index);

From 3a34e5ae3434d33083a9d30e47043a6341ff6603 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 12 Jan 2023 08:49:27 -0500
Subject: [PATCH 018/188] Fix an issue with calculating the abosorbed
 connections

---
 vpr/src/pack/improvement/pack_move_utils.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index a94fb61c93a..efeaa20ae2f 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -107,7 +107,7 @@ float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
         for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
             if (!moving_atom)
                 continue;
-            
+
             for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
                 AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
                 if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
@@ -126,7 +126,7 @@ float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
                         num_pins_in_new--;
                     }
                 }
-                absorbed_conn_change += num_pins_in_new/(float)atom_ctx.nlist.net_pins(atom_net).size();
+                absorbed_conn_change += num_pins_in_new / (float)atom_ctx.nlist.net_pins(atom_net).size();
             }
         }
     }
@@ -428,7 +428,7 @@ bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_l
 }
 
 bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs) {
-    int change_in_absorbed_conn = absorbed_conn_change(new_locs);
+    float change_in_absorbed_conn = absorbed_conn_change(new_locs);
 
     return (change_in_absorbed_conn > 0);
 }

From a0e1f8ee7da3f2728a244f3475308c2d2ef35fa9 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Sun, 15 Jan 2023 20:29:06 -0500
Subject: [PATCH 019/188] Build and print a hash table for the count of
 connections between CLB blocks (maybe useful for some new moves)

---
 vpr/src/base/vpr_context.h                   |  5 +++
 vpr/src/base/vpr_types.h                     |  6 +++
 vpr/src/pack/improvement/pack_move_utils.cpp | 40 ++++++++++++++++++++
 vpr/src/pack/improvement/pack_move_utils.h   |  3 ++
 vpr/src/pack/improvement/pack_utils.cpp      |  3 +-
 5 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index d3da6c3ae0b..57552a9ab73 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -339,6 +339,11 @@ struct ClusteringHelperContext : public Context {
     // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1]
     // unordered_set for faster insertion/deletion during the iterative improvement process of packing
     vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>> atoms_lookup;
+
+    // An unordered map of the count of connections between different clb blocks
+    // Only blocks that have connections between each others are added to this hash table
+    // This may be useful for some type of packing moves.
+    std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash> clb_conn_counts;
 };
 
 /**
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 7dec5c49341..b7e1cd43791 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1793,4 +1793,10 @@ void free_pack_molecules(t_pack_molecule* list_of_pack_molecules);
  */
 void free_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats);
 
+struct pair_hash {
+    std::size_t operator()(const std::pair<ClusterBlockId, ClusterBlockId>& p) const noexcept {
+        return std::hash<ClusterBlockId>()(p.first) ^ (std::hash<ClusterBlockId>()(p.second) << 1);
+    }
+};
+
 #endif
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index efeaa20ae2f..a98a63d1883 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -510,4 +510,44 @@ static void calculate_connected_clbs_to_moving_mol(const t_pack_molecule* mol_1,
             }
         }
     }
+}
+
+/************* CLB-CLB connection count hash table helper functions ***************/
+void init_clb_clb_conn_numbers(std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_counts) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    for (auto atom_net : atom_ctx.nlist.nets()) {
+        if (atom_ctx.nlist.net_pins(atom_net).size() > 7)
+            continue;
+
+        std::unordered_set<ClusterBlockId> clusters;
+        for (auto atom_pin_it = atom_ctx.nlist.net_pins(atom_net).begin(); atom_pin_it != atom_ctx.nlist.net_pins(atom_net).end(); atom_pin_it++) {
+            auto clb1 = atom_to_cluster(atom_ctx.nlist.pin_block(*atom_pin_it));
+            clusters.insert(clb1);
+            for (auto atom_pin_it2 = atom_pin_it + 1; atom_pin_it2 != atom_ctx.nlist.net_pins(atom_net).end(); atom_pin_it2++) {
+                auto clb2 = atom_to_cluster(atom_ctx.nlist.pin_block(*atom_pin_it2));
+                if (clusters.count(clb2) == 0) {
+                    if (conn_counts.find({clb1, clb2}) == conn_counts.end())
+                        conn_counts.insert({{clb1, clb2}, 1});
+                    else
+                        conn_counts[{clb1, clb2}]++;
+
+                    clusters.insert(clb2);
+                }
+            }
+        }
+    }
+}
+
+void print_block_connections(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count) {
+    for (const auto& block_pair_count : conn_count) {
+        VTR_LOG("Block : %d is connected to Block: %d with %d direct connections.\n",
+                block_pair_count.first.first, block_pair_count.first.second, block_pair_count.second);
+    }
+}
+
+std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count) {
+    auto max_iter = std::max_element(conn_count.begin(), conn_count.end(),
+                                     [](const auto& a, auto& b) { return a.second < b.second; });
+    return *max_iter;
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
index 040f7370cb5..ebbeafa52d1 100644
--- a/vpr/src/pack/improvement/pack_move_utils.h
+++ b/vpr/src/pack/improvement/pack_move_utils.h
@@ -35,6 +35,9 @@ bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& ne
 int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
 float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
 
+void init_clb_clb_conn_numbers(std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_counts);
+void print_block_connections(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
+std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
 #if 0
 int calculate_cutsize_of_clb(ClusterBlockId clb_index);
 int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 4af5938dd88..d01beb819a6 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -37,7 +37,8 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
 
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     init_clb_atoms_lookup(helper_ctx.atoms_lookup);
-
+    init_clb_clb_conn_numbers(helper_ctx.clb_conn_counts);
+    print_block_connections(helper_ctx.clb_conn_counts);
 #ifdef pack_improve_debug
     float propose_sec = 0;
     float evaluate_sec = 0;

From 30ad1e2d0093e6873cbd958e261a8b807f949c2c Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Wed, 18 Jan 2023 13:21:51 -0500
Subject: [PATCH 020/188] [WIP] implementing multiple moves and evaluation
 formulas for packing moves

---
 vpr/src/base/vpr_context.h                    |   1 +
 vpr/src/pack/improvement/pack_move_utils.cpp  | 220 +++++++++++++++++-
 vpr/src/pack/improvement/pack_move_utils.h    |  20 +-
 vpr/src/pack/improvement/pack_utils.cpp       |  35 ++-
 .../improvement/packing_move_generator.cpp    |  60 +++++
 .../pack/improvement/packing_move_generator.h |  79 +++++++
 6 files changed, 403 insertions(+), 12 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 57552a9ab73..44899566f4a 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -354,6 +354,7 @@ struct ClusteringHelperContext : public Context {
 struct PackingMultithreadingContext : public Context {
     vtr::vector<ClusterBlockId, std::mutex*> mu;
     std::mutex lookup_mu;
+    std::mutex apply_mu;
 };
 
 /**
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index a98a63d1883..161a1059d99 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -90,7 +90,57 @@ int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs) {
     return change_cutsize;
 }
 
-float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
+int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    // initialize the old and new cut sizes
+    int newly_absorbed_conn = 0;
+
+    // define some temporary
+    AtomBlockId cur_atom;
+    ClusterBlockId cur_clb;
+
+    std::unordered_set<AtomBlockId> moving_atoms;
+    for (auto& new_loc : new_locs) {
+        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (atom)
+                moving_atoms.insert(atom);
+        }
+    }
+
+    for (auto& new_loc : new_locs) {
+        ClusterBlockId new_block_id = new_loc.new_clb;
+        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
+
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (!moving_atom)
+                continue;
+
+            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
+                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
+                    continue;
+
+                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
+                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
+                    if (moving_atoms.count(cur_atom))
+                        continue;
+
+                    cur_clb = atom_to_cluster(cur_atom);
+                    if (cur_clb == new_block_id) {
+                        newly_absorbed_conn++;
+                    } else if (cur_clb == old_block_id) {
+                        newly_absorbed_conn--;
+                    }
+                }
+            }
+        }
+    }
+
+    return newly_absorbed_conn;
+}
+
+float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs) {
     auto& atom_ctx = g_vpr_ctx.atom();
 
     // initialize the old and new cut sizes
@@ -99,34 +149,118 @@ float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
     // define some temporary
     AtomBlockId cur_atom;
     ClusterBlockId cur_clb;
+    std::unordered_set<AtomBlockId> moving_atoms;
+    for (auto& new_loc : new_locs) {
+        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (atom)
+                moving_atoms.insert(atom);
+        }
+    }
 
+    // iterate over the molecules that will be moving
     for (auto& new_loc : new_locs) {
         ClusterBlockId new_block_id = new_loc.new_clb;
         ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
 
+        // iterate over atoms of the moving molecule
         for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
             if (!moving_atom)
                 continue;
 
+            // iterate over the atom pins
             for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
                 AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
                 if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
                     continue;
 
-                double num_pins_in_new = 0;
+                int num_pins_in_new = 0;
+                // iterate over the net pins
                 for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
                     cur_atom = atom_ctx.nlist.pin_block(net_pin);
                     if (cur_atom == moving_atom)
                         num_pins_in_new++;
+                    else if (moving_atoms.count(cur_atom)) {
+                        cur_clb = atom_to_cluster(cur_atom);
+                        if (cur_clb == new_block_id)
+                            num_pins_in_new--;
+                        else
+                            num_pins_in_new++;
+                    } else {
+                        cur_clb = atom_to_cluster(cur_atom);
+                        if (cur_clb == new_block_id) {
+                            num_pins_in_new++;
+                        } else if (cur_clb == old_block_id) {
+                            num_pins_in_new--;
+                        }
+                    }
+                }
+                absorbed_conn_change += (float)num_pins_in_new / (float)atom_ctx.nlist.net_pins(atom_net).size();
+            }
+        }
+    }
 
-                    cur_clb = atom_to_cluster(cur_atom);
-                    if (cur_clb == new_block_id) {
+    return absorbed_conn_change;
+}
+
+bool evaluate_move_based_on_terminals(const std::vector<molMoveDescription>& new_locs) {
+    return absorbed_pin_terminals(new_locs) > 0;
+}
+
+float absorbed_pin_terminals_and_nets(const std::vector<molMoveDescription>& new_locs) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    // initialize the old and new cut sizes
+    float absorbed_conn_change = 0;
+
+    // define some temporary
+    AtomBlockId cur_atom;
+    ClusterBlockId cur_clb;
+    std::unordered_set<AtomBlockId> moving_atoms;
+    for (auto& new_loc : new_locs) {
+        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (atom)
+                moving_atoms.insert(atom);
+        }
+    }
+
+    // iterate over the molecules that will be moving
+    for (auto& new_loc : new_locs) {
+        ClusterBlockId new_block_id = new_loc.new_clb;
+        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
+
+        // iterate over atoms of the moving molecule
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (!moving_atom)
+                continue;
+
+            // iterate over the atom pins
+            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
+                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
+                    continue;
+
+                int num_pins_in_new = 0;
+                // iterate over the net pins
+                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
+                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
+                    if (cur_atom == moving_atom)
                         num_pins_in_new++;
-                    } else if (cur_clb == old_block_id) {
-                        num_pins_in_new--;
+                    else if (moving_atoms.count(cur_atom)) {
+                        cur_clb = atom_to_cluster(cur_atom);
+                        if (cur_clb == new_block_id)
+                            num_pins_in_new--;
+                        else
+                            num_pins_in_new++;
+                    } else {
+                        cur_clb = atom_to_cluster(cur_atom);
+                        if (cur_clb == new_block_id) {
+                            num_pins_in_new++;
+                        } else if (cur_clb == old_block_id) {
+                            num_pins_in_new--;
+                        }
                     }
                 }
-                absorbed_conn_change += num_pins_in_new / (float)atom_ctx.nlist.net_pins(atom_net).size();
+                absorbed_conn_change += (float)num_pins_in_new / (float)atom_ctx.nlist.net_pins(atom_net).size() + (int)num_pins_in_new / (int)atom_ctx.nlist.net_pins(atom_net).size();
             }
         }
     }
@@ -134,6 +268,78 @@ float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
     return absorbed_conn_change;
 }
 
+bool evaluate_move_based_on_terminals_and_nets(const std::vector<molMoveDescription>& new_locs) {
+    return absorbed_pin_terminals_and_nets(new_locs) > 0;
+}
+
+float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_locs) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    // initialize the old and new cut sizes
+    float absorbed_conn_change = 0;
+
+    // define some temporary
+    AtomBlockId cur_atom;
+    ClusterBlockId cur_clb;
+    std::unordered_set<AtomBlockId> moving_atoms;
+    for (auto& new_loc : new_locs) {
+        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (atom)
+                moving_atoms.insert(atom);
+        }
+    }
+
+    // iterate over the molecules that will be moving
+    for (auto& new_loc : new_locs) {
+        ClusterBlockId new_block_id = new_loc.new_clb;
+        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
+
+        // iterate over atoms of the moving molecule
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (!moving_atom)
+                continue;
+
+            // iterate over the atom pins
+            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
+                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
+                    continue;
+
+                int old_pin_outside = 0;
+                int new_pin_outside = 0;
+                // iterate over the net pins
+                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
+                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
+                    if (cur_atom == moving_atom) {
+                        old_pin_outside++;
+                    } else if (moving_atoms.count(cur_atom)) {
+                        cur_clb = atom_to_cluster(cur_atom);
+                        if (cur_clb == new_block_id)
+                            new_pin_outside++;
+                        else
+                            old_pin_outside++;
+                    } else {
+                        cur_clb = atom_to_cluster(cur_atom);
+                        if (cur_clb != new_block_id) {
+                            new_pin_outside++;
+                        }
+                        if (cur_clb != old_block_id) {
+                            old_pin_outside++;
+                        }
+                    }
+                }
+                float terminals = (float)atom_ctx.nlist.net_pins(atom_net).size();
+                absorbed_conn_change += (float)old_pin_outside / (terminals - new_pin_outside + 1.) - (float)new_pin_outside / (terminals - new_pin_outside + 1.);
+            }
+        }
+    }
+
+    return absorbed_conn_change;
+}
+
+bool evaluate_move_based_on_terminals_new_formula(const std::vector<molMoveDescription>& new_locs) {
+    return abosrbed_terminal_new_formula(new_locs) > 0;
+}
 #if 0
 int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
                                         int original_cutsize) {
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
index ebbeafa52d1..ba94139f4fa 100644
--- a/vpr/src/pack/improvement/pack_move_utils.h
+++ b/vpr/src/pack/improvement/pack_move_utils.h
@@ -30,10 +30,26 @@ void build_mol_move_description(std::vector<molMoveDescription>& new_locs,
                                 ClusterBlockId clb_index_2);
 
 bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_locs);
+int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
+
+/* Calculate the change of the absorbed connection */
+/* +ve means more connections are absorbed         */
+int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
 bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs);
 
-int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
-float absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
+/* Calculate the number of abosrbed terminals of a net */
+/* +ve means more terminal are now absorbed            */
+float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs);
+bool evaluate_move_based_on_terminals(const std::vector<molMoveDescription>& new_locs);
+
+/* Calculate the number of absorbed terminals of a net *
+ * and add a bonus for absorbing the whole net         *
+ * +ve means more terminals are now absorbed           */
+float absorbed_pin_terminals_and_nets(const std::vector<molMoveDescription>& new_locs);
+bool evaluate_move_based_on_terminals_and_nets(const std::vector<molMoveDescription>& new_locs);
+
+float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_locs);
+bool evaluate_move_based_on_terminals_new_formula(const std::vector<molMoveDescription>& new_locs);
 
 void init_clb_clb_conn_numbers(std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_counts);
 void print_block_connections(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index d01beb819a6..435d5eefd2f 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -37,8 +37,8 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
 
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     init_clb_atoms_lookup(helper_ctx.atoms_lookup);
-    init_clb_clb_conn_numbers(helper_ctx.clb_conn_counts);
-    print_block_connections(helper_ctx.clb_conn_counts);
+    //init_clb_clb_conn_numbers(helper_ctx.clb_conn_counts);
+    //print_block_connections(helper_ctx.clb_conn_counts);
 #ifdef pack_improve_debug
     float propose_sec = 0;
     float evaluate_sec = 0;
@@ -97,7 +97,36 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
         move_generator = std::make_unique<quasiDirectedCompatibleTypeConnPackingSwap>();
     else if (strcmp(move_type.c_str(), "semiDirectedSameSizeConnSwap") == 0)
         move_generator = std::make_unique<quasiDirectedSameSizeConnPackingSwap>();
-
+    else if (strcmp(move_type.c_str(), "randomTerminalSwap") == 0)
+        move_generator = std::make_unique<randomTerminalPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedTerminalSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedTerminalPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypeTerminalPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizeTerminalPackingSwap>();
+    else if (strcmp(move_type.c_str(), "randomTerminalNetSwap") == 0)
+        move_generator = std::make_unique<randomTerminalNetPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedTerminalNetSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedTerminalNetPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalNetSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypeTerminalNetPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalNetSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalNetPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalNetSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizeTerminalNetPackingSwap>();
+    else if (strcmp(move_type.c_str(), "randomTerminalNetNewFormulaSwap") == 0)
+        move_generator = std::make_unique<randomTerminalNetNewFormulaPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedTerminalNetNewFormulaSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedTerminalNetNewFormulaPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalNetNewFormulaSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypeTerminalNetNewFormulaPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalNetNewFormulaSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalNetNewFormulaSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap>();
     else {
         VTR_LOG("Packing move type (%s) is not correct!\n", move_type.c_str());
         VTR_LOG("Packing iterative improvement is aborted\n");
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index 35a08004769..6e4aa35569c 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -210,4 +210,64 @@ bool quasiDirectedCompatibleTypeConnPackingSwap::evaluate_move(const std::vector
 
 bool quasiDirectedSameSizeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
     return (evaluate_move_based_on_connection(new_locs));
+}
+
+bool randomTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals(new_locs));
+}
+
+bool quasiDirectedTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals(new_locs));
+}
+
+bool quasiDirectedSameTypeTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals(new_locs));
+}
+
+bool quasiDirectedCompatibleTypeTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals(new_locs));
+}
+
+bool quasiDirectedSameSizeTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals(new_locs));
+}
+
+bool randomTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_and_nets(new_locs));
+}
+
+bool quasiDirectedTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_and_nets(new_locs));
+}
+
+bool quasiDirectedSameTypeTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_and_nets(new_locs));
+}
+
+bool quasiDirectedCompatibleTypeTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_and_nets(new_locs));
+}
+
+bool quasiDirectedSameSizeTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_and_nets(new_locs));
+}
+
+bool randomTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_new_formula(new_locs));
+}
+
+bool quasiDirectedTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_new_formula(new_locs));
+}
+
+bool quasiDirectedSameTypeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_new_formula(new_locs));
+}
+
+bool quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_new_formula(new_locs));
+}
+
+bool quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_new_formula(new_locs));
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
index 4f9c567b0db..7f044c2d417 100644
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -77,4 +77,83 @@ class quasiDirectedSameSizeConnPackingSwap : public quasiDirectedSameSizePacking
   public:
     bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
 };
+
+/************ Moves that evaluate on abosrbed Terminals *********************/
+class randomTerminalPackingSwap : public randomPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedTerminalPackingSwap : public quasiDirectedPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameTypeTerminalPackingSwap : public quasiDirectedSameTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCompatibleTypeTerminalPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameSizeTerminalPackingSwap : public quasiDirectedSameSizePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+/************ Moves that evaluate on abosrbed Terminals and nets *********************/
+class randomTerminalNetPackingSwap : public randomPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedTerminalNetPackingSwap : public quasiDirectedPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameTypeTerminalNetPackingSwap : public quasiDirectedSameTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCompatibleTypeTerminalNetPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameSizeTerminalNetPackingSwap : public quasiDirectedSameSizePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+/************ Moves that evaluate on abosrbed Terminals and nets new formula *********************/
+class randomTerminalNetNewFormulaPackingSwap : public randomPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedTerminalNetNewFormulaPackingSwap : public quasiDirectedPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameTypeTerminalNetNewFormulaPackingSwap : public quasiDirectedSameTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap : public quasiDirectedSameSizePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
 #endif //VTR_PACKINGMOVEGENERATOR_H
\ No newline at end of file

From 5153c54d8f505dc0fc0d45429c02043a2055f0bf Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Fri, 20 Jan 2023 14:46:00 -0500
Subject: [PATCH 021/188] update how moves are evaluated

---
 vpr/src/base/vpr_context.h                   |  1 -
 vpr/src/pack/improvement/pack_move_utils.cpp | 57 ++++++++++----------
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 44899566f4a..57552a9ab73 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -354,7 +354,6 @@ struct ClusteringHelperContext : public Context {
 struct PackingMultithreadingContext : public Context {
     vtr::vector<ClusterBlockId, std::mutex*> mu;
     std::mutex lookup_mu;
-    std::mutex apply_mu;
 };
 
 /**
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index 161a1059d99..b32ec402792 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -150,10 +150,15 @@ float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs) {
     AtomBlockId cur_atom;
     ClusterBlockId cur_clb;
     std::unordered_set<AtomBlockId> moving_atoms;
+    std::unordered_set<AtomNetId> moving_nets;
     for (auto& new_loc : new_locs) {
         for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (atom)
+            if (atom) {
                 moving_atoms.insert(atom);
+                for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
+                    moving_nets.insert(atom_ctx.nlist.pin_net(atom_pin));
+                }
+            }
         }
     }
 
@@ -170,6 +175,10 @@ float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs) {
             // iterate over the atom pins
             for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
                 AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                if (!moving_nets.count(atom_net))
+                    continue;
+
+                moving_nets.erase(atom_net);
                 if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
                     continue;
 
@@ -177,15 +186,7 @@ float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs) {
                 // iterate over the net pins
                 for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
                     cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (cur_atom == moving_atom)
-                        num_pins_in_new++;
-                    else if (moving_atoms.count(cur_atom)) {
-                        cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == new_block_id)
-                            num_pins_in_new--;
-                        else
-                            num_pins_in_new++;
-                    } else {
+                    if (!moving_atoms.count(cur_atom)) {
                         cur_clb = atom_to_cluster(cur_atom);
                         if (cur_clb == new_block_id) {
                             num_pins_in_new++;
@@ -194,7 +195,7 @@ float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs) {
                         }
                     }
                 }
-                absorbed_conn_change += (float)num_pins_in_new / (float)atom_ctx.nlist.net_pins(atom_net).size();
+                absorbed_conn_change += (float)(num_pins_in_new) / (float)atom_ctx.nlist.net_pins(atom_net).size();
             }
         }
     }
@@ -239,28 +240,30 @@ float absorbed_pin_terminals_and_nets(const std::vector<molMoveDescription>& new
                 if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
                     continue;
 
-                int num_pins_in_new = 0;
+                int num_old_absorbed = 0;
+                int num_new_absorbed = 0;
                 // iterate over the net pins
                 for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
                     cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (cur_atom == moving_atom)
-                        num_pins_in_new++;
-                    else if (moving_atoms.count(cur_atom)) {
+                    if (cur_atom == moving_atom) {
+                        num_old_absorbed++;
+                        num_new_absorbed++;
+                    } else if (moving_atoms.count(cur_atom)) {
                         cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == new_block_id)
-                            num_pins_in_new--;
-                        else
-                            num_pins_in_new++;
+                        if (cur_clb == old_block_id) {
+                            num_old_absorbed++;
+                            num_new_absorbed++;
+                        }
                     } else {
                         cur_clb = atom_to_cluster(cur_atom);
                         if (cur_clb == new_block_id) {
-                            num_pins_in_new++;
+                            num_new_absorbed++;
                         } else if (cur_clb == old_block_id) {
-                            num_pins_in_new--;
+                            num_old_absorbed++;
                         }
                     }
                 }
-                absorbed_conn_change += (float)num_pins_in_new / (float)atom_ctx.nlist.net_pins(atom_net).size() + (int)num_pins_in_new / (int)atom_ctx.nlist.net_pins(atom_net).size();
+                absorbed_conn_change += (float)(num_new_absorbed - num_old_absorbed) / (float)atom_ctx.nlist.net_pins(atom_net).size() + (int)(num_new_absorbed) / (int)atom_ctx.nlist.net_pins(atom_net).size() - (int)num_old_absorbed / (int)atom_ctx.nlist.net_pins(atom_net).size();
             }
         }
     }
@@ -311,13 +314,13 @@ float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_l
                 for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
                     cur_atom = atom_ctx.nlist.pin_block(net_pin);
                     if (cur_atom == moving_atom) {
-                        old_pin_outside++;
+                        //old_pin_outside++;
                     } else if (moving_atoms.count(cur_atom)) {
                         cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == new_block_id)
-                            new_pin_outside++;
-                        else
+                        if (cur_clb == new_block_id) {
                             old_pin_outside++;
+                            new_pin_outside++;
+                        }
                     } else {
                         cur_clb = atom_to_cluster(cur_atom);
                         if (cur_clb != new_block_id) {
@@ -329,7 +332,7 @@ float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_l
                     }
                 }
                 float terminals = (float)atom_ctx.nlist.net_pins(atom_net).size();
-                absorbed_conn_change += (float)old_pin_outside / (terminals - new_pin_outside + 1.) - (float)new_pin_outside / (terminals - new_pin_outside + 1.);
+                absorbed_conn_change += (float)old_pin_outside / (terminals - old_pin_outside + 1.) - (float)new_pin_outside / (terminals - new_pin_outside + 1.);
             }
         }
     }

From 5c8159ad1d9d2c6c8889a6b43051c7fc7eb8b0f8 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Tue, 24 Jan 2023 15:29:21 -0500
Subject: [PATCH 022/188] fix the renaming bug to avoid having two pbs with the
 same name

---
 vpr/src/pack/cluster_util.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index d28608b6b51..f56e21db51e 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -949,7 +949,6 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
     int i;
     enum e_block_pack_status block_pack_status;
     t_pb* parent;
-    t_pb* cur_pb;
 
     auto& atom_ctx = g_vpr_ctx.atom();
     auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning();
@@ -1086,15 +1085,6 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                      */
                     VTR_ASSERT(block_pack_status == BLK_PASSED);
                     if (molecule->is_chain()) {
-                        /* Chained molecules often take up lots of area and are important,
-                         * if a chain is packed in, want to rename logic block to match chain name */
-                        AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
-                        cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
-                        while (cur_pb != nullptr) {
-                            free(cur_pb->name);
-                            cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str());
-                            cur_pb = cur_pb->parent_pb;
-                        }
                         // if this molecule is part of a chain, mark the cluster as having a long chain
                         // molecule. Also check if it's the first molecule in the chain to be packed.
                         // If so, update the chain id for this chain of molecules to make sure all

From 8e2430d613dbb972f5bb0e4ae6d7b5681c152a49 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Thu, 26 Jan 2023 13:24:17 -0500
Subject: [PATCH 023/188] Add a new way to evaluate moves based on the
 terminals outside

---
 vpr/src/pack/improvement/pack_move_utils.cpp  | 66 +++++++++++++++++++
 vpr/src/pack/improvement/pack_move_utils.h    |  2 +
 vpr/src/pack/improvement/pack_utils.cpp       | 10 +++
 .../improvement/packing_move_generator.cpp    | 20 ++++++
 .../pack/improvement/packing_move_generator.h | 25 +++++++
 5 files changed, 123 insertions(+)

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index b32ec402792..d2f06dbebcd 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -759,4 +759,70 @@ std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(con
     auto max_iter = std::max_element(conn_count.begin(), conn_count.end(),
                                      [](const auto& a, auto& b) { return a.second < b.second; });
     return *max_iter;
+}
+
+bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescription>& new_locs) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    int pins_absorbed_before, pins_absorbed_after, pins_outside_before, pins_outside_after;
+    double cost = 0;
+    std::unordered_set<AtomBlockId> moving_atoms;
+
+    for (auto& new_loc : new_locs) {
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (moving_atom) {
+                moving_atoms.insert(moving_atom);
+            }
+        }
+    }
+
+    // iterate over moves proposed (a swap is two moves)
+    for (auto& new_loc : new_locs) {
+        std::unordered_set<AtomNetId> moving_nets;
+        auto cur_clb = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
+        // iterate over atoms in the moving molcule
+        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (moving_atom) {
+                // iterate over moving atom pins
+                for (auto& moving_atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
+                    auto atom_net = atom_ctx.nlist.pin_net(moving_atom_pin);
+                    // Make sure that we didn't count this net before
+                    if (moving_nets.count(atom_net))
+                        continue;
+
+                    moving_nets.insert(atom_net);
+                    pins_absorbed_before = 0;
+                    pins_absorbed_after = 0;
+                    pins_outside_before = 0;
+                    pins_outside_after = 0;
+
+                    for (auto& pin : atom_ctx.nlist.net_pins(atom_net)) {
+                        auto atom = atom_ctx.nlist.pin_block(pin);
+                        auto cluster = atom_to_cluster(atom);
+                        if (moving_atoms.count(atom)) {
+                            if (cluster == cur_clb) {
+                                pins_absorbed_before++;
+                                pins_absorbed_after++;
+                            } else {
+                                pins_outside_before++;
+                                pins_outside_after++;
+                            }
+                        } else {
+                            if (cluster == cur_clb) {
+                                pins_absorbed_before++;
+                                pins_outside_after++;
+                            } else {
+                                pins_outside_before++;
+                                if (cluster == new_loc.new_clb) {
+                                    pins_absorbed_after++;
+                                }
+                            }
+                        }
+                    }
+                    cost += (double)pins_absorbed_after / (pins_outside_after + 1.) - (double)pins_absorbed_before / (pins_outside_before + 1.);
+                }
+            }
+        }
+    }
+    return (cost > 0);
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
index ba94139f4fa..981c135af81 100644
--- a/vpr/src/pack/improvement/pack_move_utils.h
+++ b/vpr/src/pack/improvement/pack_move_utils.h
@@ -51,6 +51,8 @@ bool evaluate_move_based_on_terminals_and_nets(const std::vector<molMoveDescript
 float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_locs);
 bool evaluate_move_based_on_terminals_new_formula(const std::vector<molMoveDescription>& new_locs);
 
+bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescription>& new_locs);
+
 void init_clb_clb_conn_numbers(std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_counts);
 void print_block_connections(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
 std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 435d5eefd2f..4d33bd1af62 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -127,6 +127,16 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
         move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap>();
     else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalNetNewFormulaSwap") == 0)
         move_generator = std::make_unique<quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap>();
+    else if (strcmp(move_type.c_str(), "randomTerminalOutsideSwap") == 0)
+        move_generator = std::make_unique<randomTerminalOutsidePackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedTerminalOutsideSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedTerminalOutsidePackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalOutsideSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypeTerminalOutsidePackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalOutsideSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalOutsidePackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalOutsideSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizeTerminalOutsidePackingSwap>();
     else {
         VTR_LOG("Packing move type (%s) is not correct!\n", move_type.c_str());
         VTR_LOG("Packing iterative improvement is aborted\n");
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index 6e4aa35569c..8dd69c10885 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -270,4 +270,24 @@ bool quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap::evaluate_move(
 
 bool quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
     return (evaluate_move_based_on_terminals_new_formula(new_locs));
+}
+
+bool randomTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_outside(new_locs));
+}
+
+bool quasiDirectedTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_outside(new_locs));
+}
+
+bool quasiDirectedSameTypeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_outside(new_locs));
+}
+
+bool quasiDirectedCompatibleTypeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_outside(new_locs));
+}
+
+bool quasiDirectedSameSizeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_terminals_outside(new_locs));
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
index 7f044c2d417..abbd7653c24 100644
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -156,4 +156,29 @@ class quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap : public quasiDirect
     bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
 };
 
+/************ Moves that evaluate on abosrbed Terminals and nets new formula *********************/
+class randomTerminalOutsidePackingSwap : public randomPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedTerminalOutsidePackingSwap : public quasiDirectedPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameTypeTerminalOutsidePackingSwap : public quasiDirectedSameTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCompatibleTypeTerminalOutsidePackingSwap : public quasiDirectedCompatibleTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameSizeTerminalOutsidePackingSwap : public quasiDirectedSameSizePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
 #endif //VTR_PACKINGMOVEGENERATOR_H
\ No newline at end of file

From 18feb40036280f1d01250a7f73caf2ba30396fbb Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Fri, 27 Jan 2023 15:40:51 -0500
Subject: [PATCH 024/188] a more generic way to fix the renaming bug

---
 vpr/src/pack/cluster_util.cpp | 11 +++++++++++
 vpr/src/pack/re_cluster.cpp   | 18 ++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index f56e21db51e..1c45cbe0559 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -949,6 +949,7 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
     int i;
     enum e_block_pack_status block_pack_status;
     t_pb* parent;
+    t_pb* cur_pb;
 
     auto& atom_ctx = g_vpr_ctx.atom();
     auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning();
@@ -1085,6 +1086,16 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                      */
                     VTR_ASSERT(block_pack_status == BLK_PASSED);
                     if (molecule->is_chain()) {
+                        /* Chained molecules often take up lots of area and are important,
+-                        * if a chain is packed in, want to rename logic block to match chain name */
+                        AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
+                        cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
+                        while (cur_pb != nullptr) {
+                            free(cur_pb->name);
+                            cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str());
+                            cur_pb = cur_pb->parent_pb;
+                        }
+
                         // if this molecule is part of a chain, mark the cluster as having a long chain
                         // molecule. Also check if it's the first molecule in the chain to be packed.
                         // If so, update the chain id for this chain of molecules to make sure all
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 734b4c8d5b8..68c322dc25e 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -4,6 +4,8 @@
 #include "cluster_placement.h"
 #include "cluster_router.h"
 
+const char* move_suffix = "_m";
+
 bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                              bool during_packing,
                              int verbosity,
@@ -161,6 +163,8 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         int verbosity,
                         t_clustering_data& clustering_data,
                         int thread_id) {
+    auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+
     //define local variables
     PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2;
 
@@ -197,6 +201,11 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         return false;
     }
 
+    t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1);
+    std::string clb_pb_1_name = (std::string)clb_pb_1->name + move_suffix;
+    t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
+    std::string clb_pb_2_name = (std::string)clb_pb_2->name + move_suffix;
+
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);
     commit_mol_removal(molecule_1, molecule_1_size, clb_1, during_packing, old_1_router_data, clustering_data);
@@ -235,6 +244,15 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
 
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
+    if(molecule_2->is_chain()) {
+        free(clb_pb_1->name);
+        cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+    }
+    if(molecule_1->is_chain()) {
+        free(clb_pb_2->name);
+        cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+    }
+
 
     //If the move is done after packing not during it, some fixes need to be done on the clustered netlist
     if (!during_packing) {

From ff651efc10b7a9aafcde177c7d2c9b1496578816 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Mon, 30 Jan 2023 17:51:45 -0500
Subject: [PATCH 025/188] Hope it is now fixed

---
 vpr/src/pack/cluster_util.cpp | 3 ++-
 vpr/src/pack/re_cluster.cpp   | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 1c45cbe0559..39119368ed5 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -1090,12 +1090,13 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
 -                        * if a chain is packed in, want to rename logic block to match chain name */
                         AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
                         cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
+                        /*
                         while (cur_pb != nullptr) {
                             free(cur_pb->name);
                             cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str());
                             cur_pb = cur_pb->parent_pb;
                         }
-
+                        */
                         // if this molecule is part of a chain, mark the cluster as having a long chain
                         // molecule. Also check if it's the first molecule in the chain to be packed.
                         // If so, update the chain id for this chain of molecules to make sure all
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 68c322dc25e..553b6023ddc 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -244,11 +244,13 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
 
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
-    if(molecule_2->is_chain()) {
+    if(molecule_2->is_chain())
+    {
         free(clb_pb_1->name);
         cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
     }
-    if(molecule_1->is_chain()) {
+    if(molecule_1->is_chain())
+    {
         free(clb_pb_2->name);
         cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
     }

From 93b9b6515cd842926360fb59f90703fe7aaa8232 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Tue, 31 Jan 2023 16:49:11 -0500
Subject: [PATCH 026/188] remove unnecesary logging

---
 vpr/src/pack/cluster_util.cpp |  2 +-
 vpr/src/pack/pack.cpp         | 13 +++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 39119368ed5..9fa8f8d993c 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -305,7 +305,7 @@ bool check_cluster_legality(const int& verbosity,
         if (is_cluster_legal) {
             VTR_LOGV(verbosity > 2, "\tPassed route at end.\n");
         } else {
-            VTR_LOGV(verbosity > 0, "Failed route at end, repack cluster trying detailed routing at each stage.\n");
+            VTR_LOGV(verbosity > 2, "Failed route at end, repack cluster trying detailed routing at each stage.\n");
         }
     } else {
         is_cluster_legal = true;
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 8f28eec57cd..51c0a7d29b3 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -266,13 +266,22 @@ bool try_pack(t_packer_opts* packer_opts,
     }
 
     /* Packing iterative improvement can be done here */
-    /*       Use the re-cluster API to edit it        */
     /******************* Start *************************/
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    /*
+    for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
+        VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
+    }
+    */
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
+    /*
+    for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
+        VTR_LOG("@@@ block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
+    }
+    */
 
-    auto& cluster_ctx = g_vpr_ctx.clustering();
     for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
         free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
     }

From be78ac86fe9fbea85aaeba6f95ceaeaa68649062 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Sun, 5 Feb 2023 20:36:31 -0500
Subject: [PATCH 027/188] WIP fixing the renaming bug by make sure to bring the
 name back after movement (still need cleaning and testing)

---
 vpr/src/pack/cluster_util.cpp    | 25 ++++++++++-----
 vpr/src/pack/cluster_util.h      | 15 +--------
 vpr/src/pack/pack.cpp            |  8 ++---
 vpr/src/pack/re_cluster.cpp      | 52 +++++++++++++++++++++++++++++---
 vpr/src/pack/re_cluster_util.cpp |  6 ++--
 5 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 9fa8f8d993c..f59b70e5cd6 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -6,6 +6,7 @@
 
 #include "vtr_math.h"
 #include "SetupGrid.h"
+#include "string.h"
 
 /**********************************/
 /* Global variables in clustering */
@@ -305,7 +306,7 @@ bool check_cluster_legality(const int& verbosity,
         if (is_cluster_legal) {
             VTR_LOGV(verbosity > 2, "\tPassed route at end.\n");
         } else {
-            VTR_LOGV(verbosity > 2, "Failed route at end, repack cluster trying detailed routing at each stage.\n");
+            VTR_LOGV(verbosity > 0, "Failed route at end, repack cluster trying detailed routing at each stage.\n");
         }
     } else {
         is_cluster_legal = true;
@@ -944,7 +945,8 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                                            bool enable_pin_feasibility_filter,
                                            const int feasible_block_array_size,
                                            t_ext_pin_util max_external_pin_util,
-                                           PartitionRegion& temp_cluster_pr) {
+                                           PartitionRegion& temp_cluster_pr,
+                                           bool during_recluster) {
     int molecule_size, failed_location;
     int i;
     enum e_block_pack_status block_pack_status;
@@ -1090,13 +1092,19 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
 -                        * if a chain is packed in, want to rename logic block to match chain name */
                         AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
                         cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
-                        /*
-                        while (cur_pb != nullptr) {
+
+                        if(strcmp(atom_ctx.nlist.block_name(chain_root_blk_id).c_str(), "sv_chip2_hierarchy_no_mem.v_fltr_4_left.inst_fltr_compute_h3^ADD~334-0[0]") == 0)
+                            VTR_LOG("rename: %s\n", cur_pb->name);
+
+                        while (!during_recluster && cur_pb != nullptr) {
                             free(cur_pb->name);
                             cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str());
+                            if(cur_pb->is_root() && strcmp(atom_ctx.nlist.block_name(chain_root_blk_id).c_str(), "sv_chip2_hierarchy_no_mem.v_fltr_4_left.inst_fltr_compute_h3^ADD~334-0[0]") == 0)
+                                VTR_LOG("\t %p\n", cur_pb);
+                            //VTR_LOG("$ %s\n", cur_pb->name);
                             cur_pb = cur_pb->parent_pb;
                         }
-                        */
+
                         // if this molecule is part of a chain, mark the cluster as having a long chain
                         // molecule. Also check if it's the first molecule in the chain to be packed.
                         // If so, update the chain id for this chain of molecules to make sure all
@@ -1229,6 +1237,7 @@ enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_grap
 
         VTR_ASSERT(parent_pb->name == nullptr);
         parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str());
+        //VTR_LOG("$$ %s\n", parent_pb->name);
         parent_pb->mode = pb_graph_node->pb_type->parent_mode->index;
         set_reset_pb_modes(router_data, parent_pb, true);
         const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode];
@@ -1280,6 +1289,7 @@ enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_grap
         /* try pack to location */
         VTR_ASSERT(pb->name == nullptr);
         pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str());
+        //VTR_LOG("$$$ %s\n", pb->name);
 
         //Update the atom netlist mappings
         atom_ctx.lookup.set_atom_clb(blk_id, clb_index);
@@ -1537,7 +1547,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts,
                                           packer_opts.enable_pin_feasibility_filter,
                                           packer_opts.feasible_block_array_size,
                                           target_ext_pin_util,
-                                          temp_cluster_pr);
+                                          temp_cluster_pr, false);
 
     auto blk_id = next_molecule->atom_block_ids[next_molecule->root];
     VTR_ASSERT(blk_id);
@@ -2142,7 +2152,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
                                             enable_pin_feasibility_filter,
                                             feasible_block_array_size,
                                             FULL_EXTERNAL_PIN_UTIL,
-                                            temp_cluster_pr);
+                                            temp_cluster_pr, false);
 
             success = (pack_result == BLK_PASSED);
         }
@@ -2154,6 +2164,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
                 free(pb->name);
             }
             pb->name = vtr::strdup(root_atom_name.c_str());
+            //VTR_LOG("$$$$ %s\n", pb->name);
             clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type);
             break;
         } else {
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index 6048bc63d2b..7967e727658 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -199,20 +199,7 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups);
 
 void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb);
 
-enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr,
-                                           t_pack_molecule* molecule,
-                                           t_pb_graph_node** primitives_list,
-                                           t_pb* pb,
-                                           const int max_models,
-                                           const int max_cluster_size,
-                                           const ClusterBlockId clb_index,
-                                           const int detailed_routing_stage,
-                                           t_lb_router_data* router_data,
-                                           int verbosity,
-                                           bool enable_pin_feasibility_filter,
-                                           const int feasible_block_array_size,
-                                           t_ext_pin_util max_external_pin_util,
-                                           PartitionRegion& temp_cluster_pr);
+enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, t_pack_molecule* molecule, t_pb_graph_node** primitives_list, t_pb* pb, const int max_models, const int max_cluster_size, const ClusterBlockId clb_index, const int detailed_routing_stage, t_lb_router_data* router_data, int verbosity, bool enable_pin_feasibility_filter, const int feasible_block_array_size, t_ext_pin_util max_external_pin_util, PartitionRegion& temp_cluster_pr, bool during_recluster);
 
 void try_fill_cluster(const t_packer_opts& packer_opts,
                       t_cluster_placement_stats* cur_cluster_placement_stats_ptr,
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 51c0a7d29b3..6ba2c83bec7 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -268,19 +268,19 @@ bool try_pack(t_packer_opts* packer_opts,
     /* Packing iterative improvement can be done here */
     /******************* Start *************************/
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    /*
+
     for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
         VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
     }
-    */
+
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
-    /*
+
     for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
         VTR_LOG("@@@ block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
     }
-    */
+
 
     for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
         free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 553b6023ddc..8438dbe73e2 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -202,9 +202,13 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     }
 
     t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1);
-    std::string clb_pb_1_name = (std::string)clb_pb_1->name + move_suffix;
+    std::string clb_pb_1_name = (std::string)clb_pb_1->name;
     t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
-    std::string clb_pb_2_name = (std::string)clb_pb_2->name + move_suffix;
+    std::string clb_pb_2_name = (std::string)clb_pb_2->name;
+    if(clb_1 == ClusterBlockId(721))
+        VTR_LOG("before clb1: %p --> %s\n", cluster_ctx.clb_nlist.block_pb(clb_1), cluster_ctx.clb_nlist.block_pb(clb_1)->name);
+    if(clb_2 == ClusterBlockId(721))
+        VTR_LOG("before clb2: %p --> %s\n", cluster_ctx.clb_nlist.block_pb(clb_2), cluster_ctx.clb_nlist.block_pb(clb_2)->name);
 
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);
@@ -216,6 +220,9 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     //Add the atom to the new cluster
     mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
     if (!mol_1_success) {
+        if(clb_1 ==  ClusterBlockId(721) || clb_2 == ClusterBlockId(721)) {
+            VTR_LOG("packing clb2 failed\n");
+        }
         mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
         mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
 
@@ -224,11 +231,28 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free_router_data(old_2_router_data);
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
+        /*
+        if(molecule_2->is_chain())
+        {
+            free(clb_pb_1->name);
+            cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+        }
+        if(molecule_1->is_chain())
+        {
+            free(clb_pb_2->name);
+            cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+        }
+        */
         return false;
     }
-
+    if(clb_1 ==  ClusterBlockId(721) || clb_2 == ClusterBlockId(721)) {
+        VTR_LOG("packing clb2 success, %s\n", cluster_ctx.clb_nlist.block_pb(clb_2)->name);
+    }
     mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
     if (!mol_2_success) {
+        if(clb_1 == ClusterBlockId(721)) {
+            VTR_LOG("packing clb1 failed\n");
+        }
         remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
         mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
@@ -239,11 +263,27 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free_router_data(old_2_router_data);
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
+        /*
+        if(molecule_2->is_chain())
+        {
+            free(clb_pb_1->name);
+            cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+        }
+        if(molecule_1->is_chain())
+        {
+            free(clb_pb_2->name);
+            cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+        }
+         */
         return false;
     }
+    if(clb_2 ==  ClusterBlockId(721) || clb_1 == ClusterBlockId(721)) {
+        VTR_LOG("packing clb1 succes, %s\n", cluster_ctx.clb_nlist.block_pb(clb_1)->name);
+    }
 
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
+    /*
     if(molecule_2->is_chain())
     {
         free(clb_pb_1->name);
@@ -254,7 +294,11 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free(clb_pb_2->name);
         cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
     }
-
+    */
+    if(clb_1 == ClusterBlockId(721))
+        VTR_LOG("after clb1: %p --> %s\n\n", cluster_ctx.clb_nlist.block_pb(clb_1), cluster_ctx.clb_nlist.block_pb(clb_1)->name);
+    if(clb_2 == ClusterBlockId(721))
+        VTR_LOG("after clb2: %p --> %s\n\n", cluster_ctx.clb_nlist.block_pb(clb_2), cluster_ctx.clb_nlist.block_pb(clb_2)->name);
 
     //If the move is done after packing not during it, some fixes need to be done on the clustered netlist
     if (!during_packing) {
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 329940c3f98..8da5370eb10 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -154,7 +154,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_
                                     enable_pin_feasibility_filter,
                                     0,
                                     FULL_EXTERNAL_PIN_UTIL,
-                                    temp_cluster_pr);
+                                    temp_cluster_pr, false);
 
     // If clustering succeeds, add it to the clb netlist
     if (pack_result == BLK_PASSED) {
@@ -220,7 +220,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size,
                                     //false,
                                     helper_ctx.feasible_block_array_size,
                                     target_ext_pin_util,
-                                    temp_cluster_pr);
+                                    temp_cluster_pr, false);
 
     // If clustering succeeds, add it to the clb netlist
     if (pack_result == BLK_PASSED) {
@@ -282,7 +282,7 @@ void revert_mol_move(const ClusterBlockId& old_clb,
                                                         helper_ctx.enable_pin_feasibility_filter,
                                                         helper_ctx.feasible_block_array_size,
                                                         helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(old_clb)->name),
-                                                        temp_cluster_pr_original);
+                                                        temp_cluster_pr_original, false);
 
     VTR_ASSERT(pack_result == BLK_PASSED);
     //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist.

From 086ffb81c7a2ccd8bef74299418d84316f1b82b7 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Sun, 5 Feb 2023 23:04:27 -0500
Subject: [PATCH 028/188] solve the renaming issue and print a progress bar

---
 vpr/src/pack/cluster_util.cpp           | 18 +++++------
 vpr/src/pack/cluster_util.h             |  2 +-
 vpr/src/pack/improvement/pack_utils.cpp | 20 ++++++++++++
 vpr/src/pack/pack.cpp                   |  7 ++--
 vpr/src/pack/re_cluster.cpp             | 43 +++++++++++++------------
 vpr/src/pack/re_cluster_util.cpp        |  6 ++--
 6 files changed, 60 insertions(+), 36 deletions(-)

diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index f59b70e5cd6..9f4a2fb6741 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -945,8 +945,7 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                                            bool enable_pin_feasibility_filter,
                                            const int feasible_block_array_size,
                                            t_ext_pin_util max_external_pin_util,
-                                           PartitionRegion& temp_cluster_pr,
-                                           bool during_recluster) {
+                                           PartitionRegion& temp_cluster_pr) {
     int molecule_size, failed_location;
     int i;
     enum e_block_pack_status block_pack_status;
@@ -1089,19 +1088,20 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                     VTR_ASSERT(block_pack_status == BLK_PASSED);
                     if (molecule->is_chain()) {
                         /* Chained molecules often take up lots of area and are important,
--                        * if a chain is packed in, want to rename logic block to match chain name */
+                         * -                        * if a chain is packed in, want to rename logic block to match chain name */
                         AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
                         cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
-
+                        /* // Elgammal debugging
                         if(strcmp(atom_ctx.nlist.block_name(chain_root_blk_id).c_str(), "sv_chip2_hierarchy_no_mem.v_fltr_4_left.inst_fltr_compute_h3^ADD~334-0[0]") == 0)
                             VTR_LOG("rename: %s\n", cur_pb->name);
-
-                        while (!during_recluster && cur_pb != nullptr) {
+                        */
+                        while (cur_pb != nullptr) {
                             free(cur_pb->name);
                             cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str());
+                            /* // Elgammal debugging
                             if(cur_pb->is_root() && strcmp(atom_ctx.nlist.block_name(chain_root_blk_id).c_str(), "sv_chip2_hierarchy_no_mem.v_fltr_4_left.inst_fltr_compute_h3^ADD~334-0[0]") == 0)
                                 VTR_LOG("\t %p\n", cur_pb);
-                            //VTR_LOG("$ %s\n", cur_pb->name);
+                            */
                             cur_pb = cur_pb->parent_pb;
                         }
 
@@ -1547,7 +1547,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts,
                                           packer_opts.enable_pin_feasibility_filter,
                                           packer_opts.feasible_block_array_size,
                                           target_ext_pin_util,
-                                          temp_cluster_pr, false);
+                                          temp_cluster_pr);
 
     auto blk_id = next_molecule->atom_block_ids[next_molecule->root];
     VTR_ASSERT(blk_id);
@@ -2152,7 +2152,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
                                             enable_pin_feasibility_filter,
                                             feasible_block_array_size,
                                             FULL_EXTERNAL_PIN_UTIL,
-                                            temp_cluster_pr, false);
+                                            temp_cluster_pr);
 
             success = (pack_result == BLK_PASSED);
         }
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index 7967e727658..9521df5f30e 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -199,7 +199,7 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups);
 
 void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb);
 
-enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, t_pack_molecule* molecule, t_pb_graph_node** primitives_list, t_pb* pb, const int max_models, const int max_cluster_size, const ClusterBlockId clb_index, const int detailed_routing_stage, t_lb_router_data* router_data, int verbosity, bool enable_pin_feasibility_filter, const int feasible_block_array_size, t_ext_pin_util max_external_pin_util, PartitionRegion& temp_cluster_pr, bool during_recluster);
+enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, t_pack_molecule* molecule, t_pb_graph_node** primitives_list, t_pb* pb, const int max_models, const int max_cluster_size, const ClusterBlockId clb_index, const int detailed_routing_stage, t_lb_router_data* router_data, int verbosity, bool enable_pin_feasibility_filter, const int feasible_block_array_size, t_ext_pin_util max_external_pin_util, PartitionRegion& temp_cluster_pr);
 
 void try_fill_cluster(const t_packer_opts& packer_opts,
                       t_cluster_placement_stats* cur_cluster_placement_stats_ptr,
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 4d33bd1af62..85d9dafe53c 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -15,6 +15,7 @@
 #include "vtr_time.h"
 //#include <mutex>
 #include <thread>
+void printProgressBar(double progress);
 void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
 void init_multithreading_locks();
 
@@ -144,6 +145,9 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
     }
 
     for (int i = 0; i < n; i++) {
+        if(thread_num == 0 && (i*10)%n == 0){
+            printProgressBar(double(i)/n);
+        }
         new_locs.clear();
         is_proposed = move_generator->propose_move(new_locs);
         if (!is_proposed) {
@@ -171,3 +175,19 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
     pack_stats.legal_moves += num_legal_moves;
     pack_stats.mu.unlock();
 }
+
+#include <iostream>
+#include <string>
+
+void printProgressBar(double progress) {
+  int barWidth = 70;
+
+  VTR_LOG("[");
+  int pos = barWidth * progress;
+  for (int i = 0; i < barWidth; ++i) {
+    if (i < pos) VTR_LOG("=");
+    else if (i == pos) VTR_LOG(">");
+    else VTR_LOG(" ");
+  }
+  VTR_LOG("] %zu %\n", int(progress * 100.0));
+}
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 6ba2c83bec7..bf01c67e690 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -268,19 +268,20 @@ bool try_pack(t_packer_opts* packer_opts,
     /* Packing iterative improvement can be done here */
     /******************* Start *************************/
     auto& cluster_ctx = g_vpr_ctx.clustering();
-
+    /* // Elgammal debugging
     for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
         VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
     }
-
+    */
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
 
+    /* // Elgammal debugging
     for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
         VTR_LOG("@@@ block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
     }
-
+    */
 
     for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
         free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 8438dbe73e2..db440b6c10f 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -205,10 +205,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     std::string clb_pb_1_name = (std::string)clb_pb_1->name;
     t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
     std::string clb_pb_2_name = (std::string)clb_pb_2->name;
+    /* // Elgammal debugging
     if(clb_1 == ClusterBlockId(721))
         VTR_LOG("before clb1: %p --> %s\n", cluster_ctx.clb_nlist.block_pb(clb_1), cluster_ctx.clb_nlist.block_pb(clb_1)->name);
     if(clb_2 == ClusterBlockId(721))
         VTR_LOG("before clb2: %p --> %s\n", cluster_ctx.clb_nlist.block_pb(clb_2), cluster_ctx.clb_nlist.block_pb(clb_2)->name);
+    */
 
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);
@@ -220,9 +222,11 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     //Add the atom to the new cluster
     mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
     if (!mol_1_success) {
+        /* // Elgammal debugging
         if(clb_1 ==  ClusterBlockId(721) || clb_2 == ClusterBlockId(721)) {
             VTR_LOG("packing clb2 failed\n");
         }
+         */
         mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
         mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
 
@@ -231,28 +235,30 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free_router_data(old_2_router_data);
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
-        /*
-        if(molecule_2->is_chain())
-        {
+
+        if (molecule_2->is_chain()) {
             free(clb_pb_1->name);
             cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
         }
-        if(molecule_1->is_chain())
-        {
+        if (molecule_1->is_chain()) {
             free(clb_pb_2->name);
             cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
         }
-        */
+
         return false;
     }
+    /* // Elgammal debugging
     if(clb_1 ==  ClusterBlockId(721) || clb_2 == ClusterBlockId(721)) {
         VTR_LOG("packing clb2 success, %s\n", cluster_ctx.clb_nlist.block_pb(clb_2)->name);
     }
+     */
     mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
     if (!mol_2_success) {
+        /* //Elgammal debugging
         if(clb_1 == ClusterBlockId(721)) {
             VTR_LOG("packing clb1 failed\n");
         }
+        */
         remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
         mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
@@ -263,43 +269,40 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free_router_data(old_2_router_data);
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
-        /*
-        if(molecule_2->is_chain())
-        {
+
+        if (molecule_2->is_chain()) {
             free(clb_pb_1->name);
             cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
         }
-        if(molecule_1->is_chain())
-        {
+        if (molecule_1->is_chain()) {
             free(clb_pb_2->name);
             cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
         }
-         */
+
         return false;
     }
+    /* // Elgammal debugging
     if(clb_2 ==  ClusterBlockId(721) || clb_1 == ClusterBlockId(721)) {
         VTR_LOG("packing clb1 succes, %s\n", cluster_ctx.clb_nlist.block_pb(clb_1)->name);
     }
-
+    */
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
-    /*
-    if(molecule_2->is_chain())
-    {
+
+    if (molecule_2->is_chain()) {
         free(clb_pb_1->name);
         cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
     }
-    if(molecule_1->is_chain())
-    {
+    if (molecule_1->is_chain()) {
         free(clb_pb_2->name);
         cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
     }
-    */
+    /* //Elgammal debugging
     if(clb_1 == ClusterBlockId(721))
         VTR_LOG("after clb1: %p --> %s\n\n", cluster_ctx.clb_nlist.block_pb(clb_1), cluster_ctx.clb_nlist.block_pb(clb_1)->name);
     if(clb_2 == ClusterBlockId(721))
         VTR_LOG("after clb2: %p --> %s\n\n", cluster_ctx.clb_nlist.block_pb(clb_2), cluster_ctx.clb_nlist.block_pb(clb_2)->name);
-
+    */
     //If the move is done after packing not during it, some fixes need to be done on the clustered netlist
     if (!during_packing) {
         fix_clustered_netlist(molecule_1, molecule_1_size, clb_1, clb_2);
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 8da5370eb10..329940c3f98 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -154,7 +154,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_
                                     enable_pin_feasibility_filter,
                                     0,
                                     FULL_EXTERNAL_PIN_UTIL,
-                                    temp_cluster_pr, false);
+                                    temp_cluster_pr);
 
     // If clustering succeeds, add it to the clb netlist
     if (pack_result == BLK_PASSED) {
@@ -220,7 +220,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size,
                                     //false,
                                     helper_ctx.feasible_block_array_size,
                                     target_ext_pin_util,
-                                    temp_cluster_pr, false);
+                                    temp_cluster_pr);
 
     // If clustering succeeds, add it to the clb netlist
     if (pack_result == BLK_PASSED) {
@@ -282,7 +282,7 @@ void revert_mol_move(const ClusterBlockId& old_clb,
                                                         helper_ctx.enable_pin_feasibility_filter,
                                                         helper_ctx.feasible_block_array_size,
                                                         helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(old_clb)->name),
-                                                        temp_cluster_pr_original, false);
+                                                        temp_cluster_pr_original);
 
     VTR_ASSERT(pack_result == BLK_PASSED);
     //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist.

From 3d1d5653d3df541480a641b1755d7acb1e04ba36 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Sun, 5 Feb 2023 23:05:09 -0500
Subject: [PATCH 029/188] fix formatting

---
 vpr/src/pack/improvement/pack_utils.cpp | 27 ++++++++++++++-----------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 85d9dafe53c..555e9a994ba 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -145,8 +145,8 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
     }
 
     for (int i = 0; i < n; i++) {
-        if(thread_num == 0 && (i*10)%n == 0){
-            printProgressBar(double(i)/n);
+        if (thread_num == 0 && (i * 10) % n == 0) {
+            printProgressBar(double(i) / n);
         }
         new_locs.clear();
         is_proposed = move_generator->propose_move(new_locs);
@@ -180,14 +180,17 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
 #include <string>
 
 void printProgressBar(double progress) {
-  int barWidth = 70;
-
-  VTR_LOG("[");
-  int pos = barWidth * progress;
-  for (int i = 0; i < barWidth; ++i) {
-    if (i < pos) VTR_LOG("=");
-    else if (i == pos) VTR_LOG(">");
-    else VTR_LOG(" ");
-  }
-  VTR_LOG("] %zu %\n", int(progress * 100.0));
+    int barWidth = 70;
+
+    VTR_LOG("[");
+    int pos = barWidth * progress;
+    for (int i = 0; i < barWidth; ++i) {
+        if (i < pos)
+            VTR_LOG("=");
+        else if (i == pos)
+            VTR_LOG(">");
+        else
+            VTR_LOG(" ");
+    }
+    VTR_LOG("] %zu %\n", int(progress * 100.0));
 }

From 1a523d66ce1f94ce838700531507357342f1bef8 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Mon, 6 Feb 2023 13:09:09 -0500
Subject: [PATCH 030/188] Fix the bug with more generic way

---
 vpr/src/pack/re_cluster.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index db440b6c10f..58fc50f9418 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -236,11 +236,13 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
 
-        if (molecule_2->is_chain()) {
+        //if (molecule_2->is_chain())
+        {
             free(clb_pb_1->name);
             cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
         }
-        if (molecule_1->is_chain()) {
+        //if (molecule_1->is_chain())
+        {
             free(clb_pb_2->name);
             cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
         }
@@ -270,11 +272,13 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
 
-        if (molecule_2->is_chain()) {
+        //if (molecule_2->is_chain())
+        {
             free(clb_pb_1->name);
             cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
         }
-        if (molecule_1->is_chain()) {
+        //if (molecule_1->is_chain())
+        {
             free(clb_pb_2->name);
             cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
         }
@@ -289,11 +293,13 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
 
-    if (molecule_2->is_chain()) {
+    //if (molecule_2->is_chain())
+    {
         free(clb_pb_1->name);
         cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
     }
-    if (molecule_1->is_chain()) {
+    //if(molecule_1->is_chain())
+    {
         free(clb_pb_2->name);
         cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
     }

From c77ffd5b2f3bf6d60a2cc45d779ab8688a074072 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utoronto.ca>
Date: Mon, 6 Feb 2023 16:34:02 -0500
Subject: [PATCH 031/188] update the evaluation function for terminals outside

---
 vpr/src/pack/improvement/pack_move_utils.cpp | 34 ++++++++++++--------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index d2f06dbebcd..37fa1cb733a 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -764,7 +764,7 @@ std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(con
 bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescription>& new_locs) {
     auto& atom_ctx = g_vpr_ctx.atom();
 
-    int pins_absorbed_before, pins_absorbed_after, pins_outside_before, pins_outside_after;
+    int pins_in1_before, pins_in2_before, pins_in1_after, pins_in2_after, pins_outside_before, pins_outside_after;
     double cost = 0;
     std::unordered_set<AtomBlockId> moving_atoms;
 
@@ -777,8 +777,8 @@ bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescripti
     }
 
     // iterate over moves proposed (a swap is two moves)
+    std::unordered_set<AtomNetId> moving_nets;
     for (auto& new_loc : new_locs) {
-        std::unordered_set<AtomNetId> moving_nets;
         auto cur_clb = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
         // iterate over atoms in the moving molcule
         for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
@@ -786,13 +786,18 @@ bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescripti
                 // iterate over moving atom pins
                 for (auto& moving_atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
                     auto atom_net = atom_ctx.nlist.pin_net(moving_atom_pin);
+                    if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
+                        continue;
+
                     // Make sure that we didn't count this net before
                     if (moving_nets.count(atom_net))
                         continue;
 
                     moving_nets.insert(atom_net);
-                    pins_absorbed_before = 0;
-                    pins_absorbed_after = 0;
+                    pins_in1_before = 0;
+                    pins_in2_before = 0;
+                    pins_in1_after = 0;
+                    pins_in2_after = 0;
                     pins_outside_before = 0;
                     pins_outside_after = 0;
 
@@ -801,25 +806,26 @@ bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescripti
                         auto cluster = atom_to_cluster(atom);
                         if (moving_atoms.count(atom)) {
                             if (cluster == cur_clb) {
-                                pins_absorbed_before++;
-                                pins_absorbed_after++;
+                                pins_in1_before++;
+                                pins_in2_after++;
                             } else {
-                                pins_outside_before++;
-                                pins_outside_after++;
+                                pins_in2_before++;
+                                pins_in1_after++;
                             }
                         } else {
                             if (cluster == cur_clb) {
-                                pins_absorbed_before++;
-                                pins_outside_after++;
+                                pins_in1_before++;
+                                pins_in1_after++;
+                            } else if (cluster == new_loc.new_clb) {
+                                pins_in2_before++;
+                                pins_in2_after++;
                             } else {
                                 pins_outside_before++;
-                                if (cluster == new_loc.new_clb) {
-                                    pins_absorbed_after++;
-                                }
+                                pins_outside_after++;
                             }
                         }
                     }
-                    cost += (double)pins_absorbed_after / (pins_outside_after + 1.) - (double)pins_absorbed_before / (pins_outside_before + 1.);
+                    cost += (double)std::max(pins_in1_after, pins_in2_after) / (pins_outside_after + std::min(pins_in1_after, pins_in2_after) + 1.) - (double)std::max(pins_in1_before, pins_in2_before) / (pins_outside_before + std::min(pins_in1_before, pins_in2_before) + 1.);
                 }
             }
         }

From d0b268605c4c52341c212246a13465a1a8210d75 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Fri, 10 Feb 2023 11:46:17 -0500
Subject: [PATCH 032/188] [WIP] working on using packing ost functions to
 evaluate moves

---
 vpr/src/pack/improvement/packing_cost.cpp | 59 +++++++++++++++++++++++
 vpr/src/pack/improvement/packing_cost.h   | 23 +++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 vpr/src/pack/improvement/packing_cost.cpp
 create mode 100644 vpr/src/pack/improvement/packing_cost.h

diff --git a/vpr/src/pack/improvement/packing_cost.cpp b/vpr/src/pack/improvement/packing_cost.cpp
new file mode 100644
index 00000000000..3d923e31219
--- /dev/null
+++ b/vpr/src/pack/improvement/packing_cost.cpp
@@ -0,0 +1,59 @@
+#include "packing_cost.h"
+#include "re_cluster_util.h"
+
+bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves) {
+
+    float gain = 0;
+
+    // Kepp track of all the moving atoms
+    std::unordered_set<AtomBlockId> moving_atoms;
+    for(auto& proposed_move : proposed_moves) {
+        for(auto& atom : proposed_move.molecule_to_move->atom_block_ids) {
+            if(atom)
+                moving_atoms.insert(atom);
+        }
+    }
+
+    for(auto& proposed_move : proposed_moves) {
+        const t_pack_molecule* moving_molecule = proposed_move.molecule_to_move;
+        ClusterBlockId original_clb = atom_to_cluster(proposed_move.molecule_to_move->atom_block_ids[proposed_move.molecule_to_move->root]);
+        ClusterBlockId proposed_clb = proposed_move.new_clb;
+        gain += calculate_molecule_attraction_to_cluster(moving_molecule, proposed_clb);
+        gain -= calculate_molecule_attraction_to_cluster(moving_molecule, original_clb);
+    }
+    return (gain > 0);
+}
+
+float calculate_molecule_attraction_to_cluster(const t_pack_molecule* molecule,
+                                                              ClusterBlockId clb) {
+    float attraction = 0;
+    for (auto& atom : molecule->atom_block_ids) {
+        if(atom) {
+            attraction += calculate_atom_attraction_to_cluster(atom, clb);
+        }
+    }
+    return attraction;
+}
+
+float calculate_gain_from_attractions(const t_packing_attraction& attractions,
+                                      AtomBlockId atom) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    float alpha = 0.75;
+    float beta = 0.9;
+
+    float gain;
+    int num_used_pins = atom_ctx.nlist.block_pins(atom).size();
+    gain = ((1 - beta) * attractions.sharinggain + beta * attractions.connectiongain)
+                            / (num_used_pins);
+
+    gain = alpha * attractions.timinggain + (1 - alpha) * gain;
+    return gain;
+}
+
+float calculate_atom_attraction_to_cluster(AtomBlockId atom,
+                                           ClusterBlockId clb) {
+    t_packing_attraction attraction;
+
+    return calculate_gain_from_attractions(attraction, atom);
+}
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_cost.h b/vpr/src/pack/improvement/packing_cost.h
new file mode 100644
index 00000000000..eb319431ca6
--- /dev/null
+++ b/vpr/src/pack/improvement/packing_cost.h
@@ -0,0 +1,23 @@
+#ifndef PACKING_COST_H
+#define PACKING_COST_H
+
+#include "vpr_types.h"
+#include "pack_move_utils.h"
+struct t_packing_attraction {
+    float timinggain;
+    float connectiongain;
+    float sharinggain;
+};
+
+float calculate_gain_from_attractions(const t_packing_attraction& attraction,
+                                      AtomBlockId atom);
+
+float calculate_atom_attraction_to_cluster(AtomBlockId atom,
+                                           ClusterBlockId clb);
+
+float calculate_molecule_attraction_to_cluster(const t_pack_molecule* molecule,
+                                               ClusterBlockId clb);
+
+bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves);
+
+#endif
\ No newline at end of file

From e4fb89d3f873d0e60fa2b4a6b04a275a225701c2 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 13 Feb 2023 16:05:57 -0500
Subject: [PATCH 033/188] Using packing cost function to evaluate moves

---
 vpr/src/base/vpr_context.h                    |  4 +
 vpr/src/pack/cluster.cpp                      | 16 ++--
 vpr/src/pack/improvement/pack_utils.cpp       | 12 +++
 vpr/src/pack/improvement/packing_cost.cpp     | 82 ++++++++++++++++---
 vpr/src/pack/improvement/packing_cost.h       | 14 ++--
 .../improvement/packing_move_generator.cpp    | 21 +++++
 .../pack/improvement/packing_move_generator.h | 26 ++++++
 7 files changed, 149 insertions(+), 26 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 57552a9ab73..94a2538b7aa 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -32,6 +32,7 @@
 #include "noc_traffic_flows.h"
 #include "noc_routing.h"
 
+class SetupTimingInfo;
 /**
  * @brief A Context is collection of state relating to a particular part of VPR
  *
@@ -344,6 +345,9 @@ struct ClusteringHelperContext : public Context {
     // Only blocks that have connections between each others are added to this hash table
     // This may be useful for some type of packing moves.
     std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash> clb_conn_counts;
+
+    std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
+    std::shared_ptr<SetupTimingInfo> timing_info;
 };
 
 /**
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index 016f7265008..cea0869bbc8 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -130,7 +130,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
     const int verbosity = packer_opts.pack_verbosity;
 
     int unclustered_list_head_size;
-    std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
+    //std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
 
     cluster_stats.num_molecules_processed = 0;
     cluster_stats.mols_since_last_print = 0;
@@ -153,7 +153,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
     helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size;
 
     std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc;
-    std::shared_ptr<SetupTimingInfo> timing_info;
+    //std::shared_ptr<SetupTimingInfo> timing_info;
 
     // this data structure tracks the number of Logic Elements (LEs) used. It is
     // populated only for architectures which has LEs. The architecture is assumed
@@ -200,7 +200,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 	check_for_duplicate_inputs ();
 #endif
     alloc_and_init_clustering(packer_opts, max_molecule_stats, molecule_head,
-                              clustering_data, net_output_feeds_driving_block_input,
+                              clustering_data, helper_ctx.net_output_feeds_driving_block_input,
                               unclustered_list_head_size, cluster_stats.num_molecules);
 
     auto primitive_candidate_block_types = identify_primitive_candidate_block_types();
@@ -220,7 +220,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 
     if (packer_opts.timing_driven) {
         calc_init_packing_timing(packer_opts, analysis_opts, expected_lowest_cost_pb_gnode,
-                                 clustering_delay_calc, timing_info, atom_criticality);
+                                 clustering_delay_calc, helper_ctx.timing_info, atom_criticality);
     }
 
     auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, max_molecule_stats, atom_criticality);
@@ -289,9 +289,9 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                                  packer_opts.alpha, packer_opts.beta,
                                  packer_opts.timing_driven, packer_opts.connection_driven,
                                  high_fanout_threshold,
-                                 *timing_info,
+                                 *(helper_ctx.timing_info),
                                  attraction_groups,
-                                 net_output_feeds_driving_block_input);
+                                 helper_ctx.net_output_feeds_driving_block_input);
             helper_ctx.total_clb_num++;
 
             if (packer_opts.timing_driven) {
@@ -353,14 +353,14 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                                  allow_unrelated_clustering,
                                  high_fanout_threshold,
                                  is_clock,
-                                 timing_info,
+                                 helper_ctx.timing_info,
                                  router_data,
                                  target_ext_pin_util,
                                  temp_cluster_pr,
                                  block_pack_status,
                                  clustering_data.unclustered_list_head,
                                  unclustered_list_head_size,
-                                 net_output_feeds_driving_block_input,
+                                 helper_ctx.net_output_feeds_driving_block_input,
                                  primitive_candidate_block_types);
             }
 
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index 555e9a994ba..d792e3b934a 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -138,6 +138,18 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
         move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalOutsidePackingSwap>();
     else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalOutsideSwap") == 0)
         move_generator = std::make_unique<quasiDirectedSameSizeTerminalOutsidePackingSwap>();
+
+    else if (strcmp(move_type.c_str(), "randomCostEvaluationSwap") == 0)
+        move_generator = std::make_unique<randomCostEvaluationPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCostEvaluationSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCostEvaluationPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeCostEvaluationSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameTypeCostEvaluationPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeCostEvaluationSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedCompatibleTypeCostEvaluationPackingSwap>();
+    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeCostEvaluationSwap") == 0)
+        move_generator = std::make_unique<quasiDirectedSameSizeCostEvaluationPackingSwap>();
+
     else {
         VTR_LOG("Packing move type (%s) is not correct!\n", move_type.c_str());
         VTR_LOG("Packing iterative improvement is aborted\n");
diff --git a/vpr/src/pack/improvement/packing_cost.cpp b/vpr/src/pack/improvement/packing_cost.cpp
index 3d923e31219..127af5dd208 100644
--- a/vpr/src/pack/improvement/packing_cost.cpp
+++ b/vpr/src/pack/improvement/packing_cost.cpp
@@ -2,34 +2,34 @@
 #include "re_cluster_util.h"
 
 bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves) {
-
     float gain = 0;
 
     // Kepp track of all the moving atoms
     std::unordered_set<AtomBlockId> moving_atoms;
-    for(auto& proposed_move : proposed_moves) {
-        for(auto& atom : proposed_move.molecule_to_move->atom_block_ids) {
-            if(atom)
+    for (auto& proposed_move : proposed_moves) {
+        for (auto& atom : proposed_move.molecule_to_move->atom_block_ids) {
+            if (atom)
                 moving_atoms.insert(atom);
         }
     }
 
-    for(auto& proposed_move : proposed_moves) {
+    for (auto& proposed_move : proposed_moves) {
         const t_pack_molecule* moving_molecule = proposed_move.molecule_to_move;
         ClusterBlockId original_clb = atom_to_cluster(proposed_move.molecule_to_move->atom_block_ids[proposed_move.molecule_to_move->root]);
         ClusterBlockId proposed_clb = proposed_move.new_clb;
-        gain += calculate_molecule_attraction_to_cluster(moving_molecule, proposed_clb);
-        gain -= calculate_molecule_attraction_to_cluster(moving_molecule, original_clb);
+        gain += calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, proposed_clb);
+        gain -= calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, original_clb);
     }
     return (gain > 0);
 }
 
-float calculate_molecule_attraction_to_cluster(const t_pack_molecule* molecule,
-                                                              ClusterBlockId clb) {
+float calculate_molecule_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
+                                               const t_pack_molecule* molecule,
+                                               ClusterBlockId clb) {
     float attraction = 0;
     for (auto& atom : molecule->atom_block_ids) {
-        if(atom) {
-            attraction += calculate_atom_attraction_to_cluster(atom, clb);
+        if (atom) {
+            attraction += calculate_atom_attraction_to_cluster(moving_atoms, atom, clb);
         }
     }
     return attraction;
@@ -45,15 +45,71 @@ float calculate_gain_from_attractions(const t_packing_attraction& attractions,
     float gain;
     int num_used_pins = atom_ctx.nlist.block_pins(atom).size();
     gain = ((1 - beta) * attractions.sharinggain + beta * attractions.connectiongain)
-                            / (num_used_pins);
+           / (num_used_pins);
 
     gain = alpha * attractions.timinggain + (1 - alpha) * gain;
     return gain;
 }
 
-float calculate_atom_attraction_to_cluster(AtomBlockId atom,
+float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
+                                           AtomBlockId atom,
                                            ClusterBlockId clb) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+
     t_packing_attraction attraction;
 
+    for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
+        auto net_id = atom_ctx.nlist.pin_net(atom_pin);
+
+        if ((int)atom_ctx.nlist.net_pins(net_id).size() > HIGH_FANOUT_NET_THRESHOLD)
+            continue;
+
+        int num_internal_connections = 0;
+        int num_stuck_connections = 0;
+        std::unordered_set<AtomPinId> connected_pins;
+
+        // calculate sharing gain
+        auto pins = atom_ctx.nlist.net_pins(net_id);
+        if (helper_ctx.net_output_feeds_driving_block_input[net_id] != 0)
+            pins = atom_ctx.nlist.net_sinks(net_id);
+
+        for (auto& pin : pins) {
+            auto blk_id = atom_ctx.nlist.pin_block(pin);
+            if (moving_atoms.count(blk_id))
+                continue;
+
+            auto cluster = atom_to_cluster(blk_id);
+            if (cluster == clb) {
+                attraction.sharinggain++;
+                num_internal_connections++;
+                connected_pins.insert(pin);
+            } else {
+                num_stuck_connections++;
+            }
+        }
+
+        // calculate connection gain
+        for (auto& connected_pin : connected_pins) {
+            if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER || (atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK && atom_pin == atom_ctx.nlist.net_driver(net_id))) {
+                if (num_internal_connections > 1)
+                    attraction.connectiongain -= 1 / (float)(1.5 * num_stuck_connections + 1 + 0.1);
+                attraction.connectiongain += 1 / (float)(1.5 * num_stuck_connections + 0.1);
+            }
+        }
+
+        for (auto& connected_pin : connected_pins) {
+            if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER) {
+                float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
+                attraction.timinggain = std::max(timinggain, attraction.timinggain);
+            } else {
+                VTR_ASSERT(atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK);
+                if (atom_pin == atom_ctx.nlist.net_driver(net_id)) {
+                    float timinggain = helper_ctx.timing_info->setup_pin_criticality(connected_pin);
+                    attraction.timinggain = std::max(timinggain, attraction.timinggain);
+                }
+            }
+        }
+    }
     return calculate_gain_from_attractions(attraction, atom);
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_cost.h b/vpr/src/pack/improvement/packing_cost.h
index eb319431ca6..79061f7026d 100644
--- a/vpr/src/pack/improvement/packing_cost.h
+++ b/vpr/src/pack/improvement/packing_cost.h
@@ -4,18 +4,22 @@
 #include "vpr_types.h"
 #include "pack_move_utils.h"
 struct t_packing_attraction {
-    float timinggain;
-    float connectiongain;
-    float sharinggain;
+    float timinggain = 0;
+    float connectiongain = 0;
+    float sharinggain = 0;
 };
 
+const int HIGH_FANOUT_NET_THRESHOLD = 5;
+
 float calculate_gain_from_attractions(const t_packing_attraction& attraction,
                                       AtomBlockId atom);
 
-float calculate_atom_attraction_to_cluster(AtomBlockId atom,
+float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
+                                           AtomBlockId atom,
                                            ClusterBlockId clb);
 
-float calculate_molecule_attraction_to_cluster(const t_pack_molecule* molecule,
+float calculate_molecule_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
+                                               const t_pack_molecule* molecule,
                                                ClusterBlockId clb);
 
 bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves);
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index 8dd69c10885..e6378a1106e 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -7,6 +7,7 @@
 #include <string.h>
 #include "re_cluster_util.h"
 #include "pack_move_utils.h"
+#include "packing_cost.h"
 
 const int MAX_ITERATIONS = 10;
 
@@ -290,4 +291,24 @@ bool quasiDirectedCompatibleTypeTerminalOutsidePackingSwap::evaluate_move(const
 
 bool quasiDirectedSameSizeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
     return (evaluate_move_based_on_terminals_outside(new_locs));
+}
+
+bool randomCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_attraction(new_locs));
+}
+
+bool quasiDirectedCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_attraction(new_locs));
+}
+
+bool quasiDirectedSameTypeCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_attraction(new_locs));
+}
+
+bool quasiDirectedCompatibleTypeCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_attraction(new_locs));
+}
+
+bool quasiDirectedSameSizeCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
+    return (evaluate_move_based_on_attraction(new_locs));
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
index abbd7653c24..13b399a96c1 100644
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ b/vpr/src/pack/improvement/packing_move_generator.h
@@ -181,4 +181,30 @@ class quasiDirectedSameSizeTerminalOutsidePackingSwap : public quasiDirectedSame
   public:
     bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
 };
+
+/************ Moves that evaluate on Packing cost function *********************/
+class randomCostEvaluationPackingSwap : public randomPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCostEvaluationPackingSwap : public quasiDirectedPackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameTypeCostEvaluationPackingSwap : public quasiDirectedSameTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedCompatibleTypeCostEvaluationPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
+
+class quasiDirectedSameSizeCostEvaluationPackingSwap : public quasiDirectedSameSizePackingSwap {
+  public:
+    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
+};
 #endif //VTR_PACKINGMOVEGENERATOR_H
\ No newline at end of file

From bb598beb4192325dd2720350037bedcc8360af71 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Wed, 22 Feb 2023 23:31:30 -0500
Subject: [PATCH 034/188] Fix a bug in the API -- setting the mode for all the
 pb hierarchy levels not just the child node

---
 vpr/src/pack/re_cluster_util.cpp | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 329940c3f98..1bbb2143c09 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -65,10 +65,20 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
                              t_lb_router_data*& router_data) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
 
+
+    for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
+        if (molecule->atom_block_ids[i_atom]) {
+            auto it = old_clb_atoms->find(molecule->atom_block_ids[i_atom]);
+            if (it != old_clb_atoms->end())
+                old_clb_atoms->erase(molecule->atom_block_ids[i_atom]);
+        }
+    }
+
+
     //re-build router_data structure for this cluster
     if (!router_data_ready)
         router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms);
-
+    /*
     //remove atom from router_data
     for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
         if (molecule->atom_block_ids[i_atom]) {
@@ -78,6 +88,7 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
                 old_clb_atoms->erase(molecule->atom_block_ids[i_atom]);
         }
     }
+    */
     update_cluster_pb_stats(molecule, molecule_size, old_clb, false);
 }
 
@@ -100,6 +111,7 @@ void commit_mol_move(const ClusterBlockId& old_clb,
 t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr_graphs, const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms) {
     //build data structures used by intra-logic block router
     auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& atom_ctx = g_vpr_ctx.atom();
     auto block_type = cluster_ctx.clb_nlist.block_type(clb_index);
     t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs[block_type->index], block_type);
 
@@ -109,6 +121,11 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
 
     for (auto atom_id : *clb_atoms) {
         add_atom_as_target(router_data, atom_id);
+        const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id);
+        while(pb) {
+            set_reset_pb_modes(router_data, pb, true);
+            pb = pb->parent_pb;
+        }
     }
     return (router_data);
 }
@@ -203,8 +220,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size,
         return false;
 
     //re-build router_data structure for this cluster
-    if (!is_swap)
-        router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
+    router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
 
     pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][block_type->index]),
                                     molecule,

From c256c116af9d61d73bfb8ab26cb504047c11b4c1 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Thu, 23 Feb 2023 11:30:00 -0500
Subject: [PATCH 035/188] Fixing issues with move_mol_to_new_cluster function  
  * update clb-->atoms lookup table    * update the cluster pb stats data
 structure

---
 vpr/src/pack/re_cluster.cpp      |  7 +++++++
 vpr/src/pack/re_cluster_util.cpp | 32 ++++++++++++++++----------------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 58fc50f9418..434b79a4ee2 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -75,6 +75,13 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule,
     //Commit or revert the move
     if (is_created) {
         commit_mol_move(old_clb, new_clb, during_packing, true);
+        // Update the clb-->atoms lookup table
+        helper_ctx.atoms_lookup.resize(helper_ctx.total_clb_num);
+        for (int i_atom = 0; i_atom < molecule_size; ++i_atom) {
+            if (molecule->atom_block_ids[i_atom]) {
+                helper_ctx.atoms_lookup[new_clb].insert(molecule->atom_block_ids[i_atom]);
+            }
+        }
         VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]);
     } else {
         revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data, thread_id);
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 1bbb2143c09..7837dd74e08 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -65,7 +65,6 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
                              t_lb_router_data*& router_data) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
 
-
     for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
         if (molecule->atom_block_ids[i_atom]) {
             auto it = old_clb_atoms->find(molecule->atom_block_ids[i_atom]);
@@ -74,21 +73,10 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
         }
     }
 
-
     //re-build router_data structure for this cluster
     if (!router_data_ready)
         router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms);
-    /*
-    //remove atom from router_data
-    for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
-        if (molecule->atom_block_ids[i_atom]) {
-            remove_atom_from_target(router_data, molecule->atom_block_ids[i_atom]);
-            auto it = old_clb_atoms->find(molecule->atom_block_ids[i_atom]);
-            if (it != old_clb_atoms->end())
-                old_clb_atoms->erase(molecule->atom_block_ids[i_atom]);
-        }
-    }
-    */
+
     update_cluster_pb_stats(molecule, molecule_size, old_clb, false);
 }
 
@@ -122,7 +110,7 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
     for (auto atom_id : *clb_atoms) {
         add_atom_as_target(router_data, atom_id);
         const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id);
-        while(pb) {
+        while (pb) {
             set_reset_pb_modes(router_data, pb, true);
             pb = pb->parent_pb;
         }
@@ -130,7 +118,18 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
     return (router_data);
 }
 
-bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_type_ptr& type, const int mode, const int feasible_block_array_size, bool enable_pin_feasibility_filter, ClusterBlockId clb_index, bool during_packing, int verbosity, t_clustering_data& clustering_data, t_lb_router_data** router_data, PartitionRegion& temp_cluster_pr, int thread_id) {
+bool start_new_cluster_for_mol(t_pack_molecule* molecule,
+                               const t_logical_block_type_ptr& type,
+                               const int mode,
+                               const int feasible_block_array_size,
+                               bool enable_pin_feasibility_filter,
+                               ClusterBlockId clb_index,
+                               bool during_packing,
+                               int verbosity,
+                               t_clustering_data& clustering_data,
+                               t_lb_router_data** router_data,
+                               PartitionRegion& temp_cluster_pr,
+                               int thread_id) {
     auto& atom_ctx = g_vpr_ctx.atom();
     auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
@@ -184,6 +183,8 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_
         pb->name = vtr::strdup(new_name.c_str());
         clb_index = cluster_ctx.clb_nlist.create_block(new_name.c_str(), pb, type);
         helper_ctx.total_clb_num++;
+        int molecule_size = get_array_size_of_molecule(molecule);
+        update_cluster_pb_stats(molecule, molecule_size, clb_index, true);
 
         //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist.
         if (during_packing) {
@@ -657,7 +658,6 @@ void commit_mol_removal(const t_pack_molecule* molecule,
                         t_lb_router_data*& router_data,
                         t_clustering_data& clustering_data) {
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
-
     for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
         if (molecule->atom_block_ids[i_atom]) {
             revert_place_atom_block(molecule->atom_block_ids[i_atom], router_data);

From a775f91e662ad4d44cda4b6861c6162c3b24061b Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Tue, 28 Feb 2023 00:26:53 -0500
Subject: [PATCH 036/188] commit the version of the packing cost evaluation
 before updating it

---
 vpr/src/base/vpr_context.h                |  2 +
 vpr/src/pack/cluster.cpp                  |  1 +
 vpr/src/pack/improvement/packing_cost.cpp | 50 +++++++++++++---
 vpr/src/pack/pack.cpp                     | 70 +++++++++++++++++++++--
 4 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 94a2538b7aa..47fb12956f2 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -348,6 +348,8 @@ struct ClusteringHelperContext : public Context {
 
     std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
     std::shared_ptr<SetupTimingInfo> timing_info;
+    t_pack_high_fanout_thresholds high_fanout_thresholds;
+    bool timing_driven;
 };
 
 /**
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index cea0869bbc8..7111eda41f9 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -151,6 +151,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 
     helper_ctx.enable_pin_feasibility_filter = packer_opts.enable_pin_feasibility_filter;
     helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size;
+    helper_ctx.timing_driven = packer_opts.timing_driven;
 
     std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc;
     //std::shared_ptr<SetupTimingInfo> timing_info;
diff --git a/vpr/src/pack/improvement/packing_cost.cpp b/vpr/src/pack/improvement/packing_cost.cpp
index 127af5dd208..731267b6727 100644
--- a/vpr/src/pack/improvement/packing_cost.cpp
+++ b/vpr/src/pack/improvement/packing_cost.cpp
@@ -2,24 +2,33 @@
 #include "re_cluster_util.h"
 
 bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves) {
+    auto& atom_ctx = g_vpr_ctx.atom();
     float gain = 0;
 
-    // Kepp track of all the moving atoms
+    // Keep track of all the moving atoms
     std::unordered_set<AtomBlockId> moving_atoms;
+    std::unordered_set<AtomNetId> moving_nets;
+
     for (auto& proposed_move : proposed_moves) {
         for (auto& atom : proposed_move.molecule_to_move->atom_block_ids) {
-            if (atom)
+            if (atom) {
                 moving_atoms.insert(atom);
+            }
         }
     }
 
     for (auto& proposed_move : proposed_moves) {
         const t_pack_molecule* moving_molecule = proposed_move.molecule_to_move;
-        ClusterBlockId original_clb = atom_to_cluster(proposed_move.molecule_to_move->atom_block_ids[proposed_move.molecule_to_move->root]);
+        ClusterBlockId original_clb = atom_to_cluster(moving_molecule->atom_block_ids[moving_molecule->root]);
         ClusterBlockId proposed_clb = proposed_move.new_clb;
         gain += calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, proposed_clb);
         gain -= calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, original_clb);
     }
+    /*
+    bool good = evaluate_move_based_on_cutsize(proposed_moves);
+    if(good != (gain>0))
+        VTR_LOG("mismatch\n");
+    */
     return (gain > 0);
 }
 
@@ -43,7 +52,10 @@ float calculate_gain_from_attractions(const t_packing_attraction& attractions,
     float beta = 0.9;
 
     float gain;
-    int num_used_pins = atom_ctx.nlist.block_pins(atom).size();
+    int num_used_input_pins = atom_ctx.nlist.block_input_pins(atom).size();
+    int num_used_output_pins = atom_ctx.nlist.block_output_pins(atom).size();
+    int num_used_pins = num_used_input_pins + num_used_output_pins;
+
     gain = ((1 - beta) * attractions.sharinggain + beta * attractions.connectiongain)
            / (num_used_pins);
 
@@ -55,6 +67,7 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
                                            AtomBlockId atom,
                                            ClusterBlockId clb) {
     auto& atom_ctx = g_vpr_ctx.atom();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
 
     t_packing_attraction attraction;
@@ -62,12 +75,13 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
     for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
         auto net_id = atom_ctx.nlist.pin_net(atom_pin);
 
-        if ((int)atom_ctx.nlist.net_pins(net_id).size() > HIGH_FANOUT_NET_THRESHOLD)
+        if ((int)atom_ctx.nlist.net_pins(net_id).size() > helper_ctx.high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb)->name))
             continue;
 
-        int num_internal_connections = 0;
+        //int num_internal_connections = 0;
         int num_stuck_connections = 0;
         std::unordered_set<AtomPinId> connected_pins;
+        bool net_shared = false;
 
         // calculate sharing gain
         auto pins = atom_ctx.nlist.net_pins(net_id);
@@ -81,14 +95,30 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
 
             auto cluster = atom_to_cluster(blk_id);
             if (cluster == clb) {
-                attraction.sharinggain++;
-                num_internal_connections++;
-                connected_pins.insert(pin);
+                if(!net_shared) {
+                    net_shared = true;
+                    attraction.sharinggain++;
+                }
+                if (helper_ctx.timing_driven) {
+                    if (atom_ctx.nlist.pin_type(pin) == PinType::DRIVER) {
+                        float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
+                        attraction.timinggain = std::max(timinggain, attraction.timinggain);
+                    } else {
+                        VTR_ASSERT(atom_ctx.nlist.pin_type(pin) == PinType::SINK);
+                        if (atom_pin == atom_ctx.nlist.net_driver(net_id)) {
+                            float timinggain = helper_ctx.timing_info->setup_pin_criticality(pin);
+                            attraction.timinggain = std::max(timinggain, attraction.timinggain);
+                        }
+                    }
+                }
             } else {
                 num_stuck_connections++;
             }
         }
 
+        attraction.connectiongain += 1/(float)(0.1 + num_stuck_connections);
+
+        /*
         // calculate connection gain
         for (auto& connected_pin : connected_pins) {
             if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER || (atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK && atom_pin == atom_ctx.nlist.net_driver(net_id))) {
@@ -98,6 +128,7 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
             }
         }
 
+        // calculate timing gain
         for (auto& connected_pin : connected_pins) {
             if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER) {
                 float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
@@ -110,6 +141,7 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
                 }
             }
         }
+        */
     }
     return calculate_gain_from_attractions(attraction, atom);
 }
\ No newline at end of file
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index bf01c67e690..48aa6be783a 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -25,6 +25,7 @@
 #include "re_cluster.h"
 
 #include "pack_utils.h"
+#include "re_cluster_util.h"
 /* #define DUMP_PB_GRAPH 1 */
 /* #define DUMP_BLIF_INPUT 1 */
 
@@ -115,10 +116,10 @@ bool try_pack(t_packer_opts* packer_opts,
     }
 
     helper_ctx.target_external_pin_util = parse_target_external_pin_util(packer_opts->target_external_pin_util);
-    t_pack_high_fanout_thresholds high_fanout_thresholds = parse_high_fanout_thresholds(packer_opts->high_fanout_threshold);
+    helper_ctx.high_fanout_thresholds = parse_high_fanout_thresholds(packer_opts->high_fanout_threshold);
 
     VTR_LOG("Packing with pin utilization targets: %s\n", target_external_pin_util_to_string(helper_ctx.target_external_pin_util).c_str());
-    VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds_to_string(high_fanout_thresholds).c_str());
+    VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds_to_string(helper_ctx.high_fanout_thresholds).c_str());
 
     bool allow_unrelated_clustering = false;
     if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::ON) {
@@ -151,7 +152,7 @@ bool try_pack(t_packer_opts* packer_opts,
             balance_block_type_util,
             lb_type_rr_graphs,
             helper_ctx.target_external_pin_util,
-            high_fanout_thresholds,
+            helper_ctx.high_fanout_thresholds,
             attraction_groups,
             floorplan_regions_overfull,
             clustering_data);
@@ -268,11 +269,70 @@ bool try_pack(t_packer_opts* packer_opts,
     /* Packing iterative improvement can be done here */
     /******************* Start *************************/
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    /* // Elgammal debugging
-    for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
+    // Elgammal debugging
+    /*
+    for (auto& clb : cluster_ctx.clb_nlist.blocks()) {
         VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
     }
     */
+    /*
+    auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
+    t_pack_molecule* mol = rng.first->second;
+    VTR_LOG("Pack move is starting:\n\n");
+    bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+
+    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(55));
+    mol = rng.first->second;
+    moved = move_mol_to_existing_cluster(mol,
+                                         ClusterBlockId(43),
+                                         true,
+                                         0,
+                                         clustering_data,
+                                         0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+
+
+    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
+    mol = rng.first->second;
+    auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(77));
+    t_pack_molecule* mol2 = rng2.first->second;
+    moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+    */
+
+    auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
+    t_pack_molecule* mol = rng.first->second;
+    auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(42));
+    t_pack_molecule* mol2 = rng2.first->second;
+    bool moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+
+    /*
+    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
+        VTR_LOG("\n# block id = %d\n", blk_id);
+        VTR_LOG("type = %d\n atoms:\n ", cluster_ctx.clb_nlist.block_type(blk_id)->index);
+        for (auto atom : *cluster_to_atoms(blk_id)) {
+            VTR_LOG("\tatom = %d\n", atom);
+            for (auto atom_pin : atom_ctx.nlist.block_pins(atom)) {
+                VTR_LOG("\t\tatom_pin = %d, type = %d, atom_net=%d, cluster_net=%d\n", atom_pin, atom_ctx.nlist.pin_type(atom_pin), atom_ctx.nlist.pin_net(atom_pin), atom_ctx.lookup.clb_net(atom_ctx.nlist.pin_net(atom_pin)));
+            }
+        }
+    }
+    */
+
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");

From e2abf60d18353d74813e4c45ee1a85e0d51fbd4a Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Wed, 1 Mar 2023 01:22:41 -0500
Subject: [PATCH 037/188] Fixing the function to evaluate the moves based on
 the cutsize

---
 vpr/src/pack/improvement/pack_move_utils.cpp | 79 ++++++++++----------
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index 37fa1cb733a..db540855322 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -40,56 +40,55 @@ int calculate_cutsize_of_clb(ClusterBlockId clb_index) {
 int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs) {
     auto& atom_ctx = g_vpr_ctx.atom();
 
-    // initialize the old and new cut sizes
-    int change_cutsize = 0;
+    std::unordered_set<AtomBlockId> moving_atoms;
+    std::unordered_set<AtomNetId> moving_nets;
+    int cutsize_change = 0;
 
-    // define some temporary
-    AtomBlockId cur_atom;
-    ClusterBlockId cur_clb;
-    std::set<ClusterBlockId> net_blocks;
-    std::map<AtomNetId, int> nets_between_old_new_blks;
+    auto clb_1 = new_locs[0].new_clb;
+    auto clb_2 = new_locs[1].new_clb;
 
     for (auto& new_loc : new_locs) {
-        ClusterBlockId new_block_id = new_loc.new_clb;
-        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
-
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (!moving_atom)
-                continue;
-            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
-                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
-                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
-                    continue;
-
-                net_blocks.clear();
-                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
-                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (cur_atom == moving_atom)
-                        continue;
-
-                    cur_clb = atom_to_cluster(cur_atom);
-                    net_blocks.insert(cur_clb);
-                }
-                if (net_blocks.size() == 1 && *(net_blocks.begin()) == old_block_id)
-                    change_cutsize += 1;
-                else if (net_blocks.size() == 1 && *(net_blocks.begin()) == new_block_id) {
-                    change_cutsize -= 1;
-                    if (nets_between_old_new_blks.find(atom_net) == nets_between_old_new_blks.end())
-                        nets_between_old_new_blks.insert(std::make_pair(atom_net, 1));
-                    else
-                        nets_between_old_new_blks[atom_net]++;
+        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
+            if (atom) {
+                moving_atoms.insert(atom);
+                for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
+                    auto atom_net = atom_ctx.nlist.pin_net(atom_pin);
+                    if (atom_net && atom_ctx.nlist.net_pins(atom_net).size() < LARGE_FANOUT_LIMIT)
+                        moving_nets.insert(atom_net);
                 }
             }
         }
     }
 
-    for (auto& direct_conn : nets_between_old_new_blks) {
-        if (direct_conn.second > 1)
-            change_cutsize += 2;
+    for (auto& net_id : moving_nets) {
+        bool net_has_pin_outside = false;
+        std::unordered_set<ClusterBlockId> clbs_before;
+        std::unordered_set<ClusterBlockId> clbs_after;
+
+        for (auto& pin_id : atom_ctx.nlist.net_pins(net_id)) {
+            if (net_has_pin_outside)
+                break;
+
+            auto atom_blk_id = atom_ctx.nlist.pin_block(pin_id);
+            auto clb = atom_to_cluster(atom_blk_id);
+            if (moving_atoms.count(atom_blk_id) == 0) { // this atom is NOT one of the moving blocks
+                clbs_before.insert(clb);
+                clbs_after.insert(clb);
+            } else { // this atom is one of the moving blocks
+                clbs_before.insert(clb);
+                if (clb == clb_1)
+                    clbs_after.insert(clb_2);
+                else
+                    clbs_after.insert(clb_1);
+            }
+        }
+        if (clbs_before.size() == 1 && clbs_after.size() > 1)
+            cutsize_change++;
+        else if (clbs_before.size() > 1 && clbs_after.size() == 1)
+            cutsize_change--;
     }
-    return change_cutsize;
+    return cutsize_change;
 }
-
 int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
     auto& atom_ctx = g_vpr_ctx.atom();
 

From a8fa3469d0aa9a9d0b6699b66b03e968d3c4a159 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Wed, 1 Mar 2023 15:17:54 -0500
Subject: [PATCH 038/188] fix formatting and some debugging code

---
 vpr/src/pack/improvement/packing_cost.cpp | 56 ++++++-------
 vpr/src/pack/pack.cpp                     | 96 +++++++++++------------
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/vpr/src/pack/improvement/packing_cost.cpp b/vpr/src/pack/improvement/packing_cost.cpp
index 731267b6727..4ca0a42ab7b 100644
--- a/vpr/src/pack/improvement/packing_cost.cpp
+++ b/vpr/src/pack/improvement/packing_cost.cpp
@@ -24,11 +24,11 @@ bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& pr
         gain += calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, proposed_clb);
         gain -= calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, original_clb);
     }
-    /*
+
     bool good = evaluate_move_based_on_cutsize(proposed_moves);
-    if(good != (gain>0))
+    if (good != (gain > 0))
         VTR_LOG("mismatch\n");
-    */
+
     return (gain > 0);
 }
 
@@ -95,7 +95,7 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
 
             auto cluster = atom_to_cluster(blk_id);
             if (cluster == clb) {
-                if(!net_shared) {
+                if (!net_shared) {
                     net_shared = true;
                     attraction.sharinggain++;
                 }
@@ -116,32 +116,32 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
             }
         }
 
-        attraction.connectiongain += 1/(float)(0.1 + num_stuck_connections);
+        attraction.connectiongain += 1 / (float)(0.1 + num_stuck_connections);
 
         /*
-        // calculate connection gain
-        for (auto& connected_pin : connected_pins) {
-            if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER || (atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK && atom_pin == atom_ctx.nlist.net_driver(net_id))) {
-                if (num_internal_connections > 1)
-                    attraction.connectiongain -= 1 / (float)(1.5 * num_stuck_connections + 1 + 0.1);
-                attraction.connectiongain += 1 / (float)(1.5 * num_stuck_connections + 0.1);
-            }
-        }
-
-        // calculate timing gain
-        for (auto& connected_pin : connected_pins) {
-            if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER) {
-                float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
-                attraction.timinggain = std::max(timinggain, attraction.timinggain);
-            } else {
-                VTR_ASSERT(atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK);
-                if (atom_pin == atom_ctx.nlist.net_driver(net_id)) {
-                    float timinggain = helper_ctx.timing_info->setup_pin_criticality(connected_pin);
-                    attraction.timinggain = std::max(timinggain, attraction.timinggain);
-                }
-            }
-        }
-        */
+         * // calculate connection gain
+         * for (auto& connected_pin : connected_pins) {
+         * if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER || (atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK && atom_pin == atom_ctx.nlist.net_driver(net_id))) {
+         * if (num_internal_connections > 1)
+         * attraction.connectiongain -= 1 / (float)(1.5 * num_stuck_connections + 1 + 0.1);
+         * attraction.connectiongain += 1 / (float)(1.5 * num_stuck_connections + 0.1);
+         * }
+         * }
+         *
+         * // calculate timing gain
+         * for (auto& connected_pin : connected_pins) {
+         * if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER) {
+         * float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
+         * attraction.timinggain = std::max(timinggain, attraction.timinggain);
+         * } else {
+         * VTR_ASSERT(atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK);
+         * if (atom_pin == atom_ctx.nlist.net_driver(net_id)) {
+         * float timinggain = helper_ctx.timing_info->setup_pin_criticality(connected_pin);
+         * attraction.timinggain = std::max(timinggain, attraction.timinggain);
+         * }
+         * }
+         * }
+         */
     }
     return calculate_gain_from_attractions(attraction, atom);
 }
\ No newline at end of file
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 48aa6be783a..866c69e5baf 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -271,44 +271,44 @@ bool try_pack(t_packer_opts* packer_opts,
     auto& cluster_ctx = g_vpr_ctx.clustering();
     // Elgammal debugging
     /*
-    for (auto& clb : cluster_ctx.clb_nlist.blocks()) {
-        VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
-    }
-    */
+     * for (auto& clb : cluster_ctx.clb_nlist.blocks()) {
+     * VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
+     * }
+     */
     /*
-    auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
-    t_pack_molecule* mol = rng.first->second;
-    VTR_LOG("Pack move is starting:\n\n");
-    bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-
-    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(55));
-    mol = rng.first->second;
-    moved = move_mol_to_existing_cluster(mol,
-                                         ClusterBlockId(43),
-                                         true,
-                                         0,
-                                         clustering_data,
-                                         0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-
-
-    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
-    mol = rng.first->second;
-    auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(77));
-    t_pack_molecule* mol2 = rng2.first->second;
-    moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-    */
+     * auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
+     * t_pack_molecule* mol = rng.first->second;
+     * VTR_LOG("Pack move is starting:\n\n");
+     * bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
+     * if (moved)
+     * VTR_LOG("Move is Done :)\n");
+     * else
+     * VTR_LOG("Move failed! :((\n");
+     *
+     * rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(55));
+     * mol = rng.first->second;
+     * moved = move_mol_to_existing_cluster(mol,
+     * ClusterBlockId(43),
+     * true,
+     * 0,
+     * clustering_data,
+     * 0);
+     * if (moved)
+     * VTR_LOG("Move is Done :)\n");
+     * else
+     * VTR_LOG("Move failed! :((\n");
+     *
+     *
+     * rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
+     * mol = rng.first->second;
+     * auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(77));
+     * t_pack_molecule* mol2 = rng2.first->second;
+     * moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
+     * if (moved)
+     * VTR_LOG("Move is Done :)\n");
+     * else
+     * VTR_LOG("Move failed! :((\n");
+     */
 
     auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
     t_pack_molecule* mol = rng.first->second;
@@ -321,17 +321,17 @@ bool try_pack(t_packer_opts* packer_opts,
         VTR_LOG("Move failed! :((\n");
 
     /*
-    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
-        VTR_LOG("\n# block id = %d\n", blk_id);
-        VTR_LOG("type = %d\n atoms:\n ", cluster_ctx.clb_nlist.block_type(blk_id)->index);
-        for (auto atom : *cluster_to_atoms(blk_id)) {
-            VTR_LOG("\tatom = %d\n", atom);
-            for (auto atom_pin : atom_ctx.nlist.block_pins(atom)) {
-                VTR_LOG("\t\tatom_pin = %d, type = %d, atom_net=%d, cluster_net=%d\n", atom_pin, atom_ctx.nlist.pin_type(atom_pin), atom_ctx.nlist.pin_net(atom_pin), atom_ctx.lookup.clb_net(atom_ctx.nlist.pin_net(atom_pin)));
-            }
-        }
-    }
-    */
+     * for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
+     * VTR_LOG("\n# block id = %d\n", blk_id);
+     * VTR_LOG("type = %d\n atoms:\n ", cluster_ctx.clb_nlist.block_type(blk_id)->index);
+     * for (auto atom : *cluster_to_atoms(blk_id)) {
+     * VTR_LOG("\tatom = %d\n", atom);
+     * for (auto atom_pin : atom_ctx.nlist.block_pins(atom)) {
+     * VTR_LOG("\t\tatom_pin = %d, type = %d, atom_net=%d, cluster_net=%d\n", atom_pin, atom_ctx.nlist.pin_type(atom_pin), atom_ctx.nlist.pin_net(atom_pin), atom_ctx.lookup.clb_net(atom_ctx.nlist.pin_net(atom_pin)));
+     * }
+     * }
+     * }
+     */
 
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);

From d0a9e38472d50eb2d2b650f3b4c1e1feb19d3541 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Wed, 1 Mar 2023 17:05:58 -0500
Subject: [PATCH 039/188] update the cost function to evaluate the packing move

---
 vpr/src/pack/improvement/packing_cost.cpp | 136 ++++++++++------------
 vpr/src/pack/improvement/packing_cost.h   |   9 +-
 2 files changed, 62 insertions(+), 83 deletions(-)

diff --git a/vpr/src/pack/improvement/packing_cost.cpp b/vpr/src/pack/improvement/packing_cost.cpp
index 4ca0a42ab7b..b6ca6723315 100644
--- a/vpr/src/pack/improvement/packing_cost.cpp
+++ b/vpr/src/pack/improvement/packing_cost.cpp
@@ -7,7 +7,6 @@ bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& pr
 
     // Keep track of all the moving atoms
     std::unordered_set<AtomBlockId> moving_atoms;
-    std::unordered_set<AtomNetId> moving_nets;
 
     for (auto& proposed_move : proposed_moves) {
         for (auto& atom : proposed_move.molecule_to_move->atom_block_ids) {
@@ -21,66 +20,42 @@ bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& pr
         const t_pack_molecule* moving_molecule = proposed_move.molecule_to_move;
         ClusterBlockId original_clb = atom_to_cluster(moving_molecule->atom_block_ids[moving_molecule->root]);
         ClusterBlockId proposed_clb = proposed_move.new_clb;
-        gain += calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, proposed_clb);
-        gain -= calculate_molecule_attraction_to_cluster(moving_atoms, moving_molecule, original_clb);
-    }
 
-    bool good = evaluate_move_based_on_cutsize(proposed_moves);
-    if (good != (gain > 0))
-        VTR_LOG("mismatch\n");
+        std::unordered_set<AtomNetId> moving_nets;
+        for (auto& atom : moving_molecule->atom_block_ids) {
+            if (atom) {
+                for (auto& pin : atom_ctx.nlist.block_pins(atom)) {
+                    auto net_id = atom_ctx.nlist.pin_net(pin);
+                    if (net_id)
+                        moving_nets.insert(net_id);
+                }
+            }
+        }
+        gain += calculate_molecule_attraction_to_cluster(moving_atoms, moving_nets, moving_molecule, proposed_clb);
+        gain -= calculate_molecule_attraction_to_cluster(moving_atoms, moving_nets, moving_molecule, original_clb);
+    }
 
     return (gain > 0);
 }
 
 float calculate_molecule_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
+                                               const std::unordered_set<AtomNetId>& moving_nets,
                                                const t_pack_molecule* molecule,
                                                ClusterBlockId clb) {
-    float attraction = 0;
-    for (auto& atom : molecule->atom_block_ids) {
-        if (atom) {
-            attraction += calculate_atom_attraction_to_cluster(moving_atoms, atom, clb);
-        }
-    }
-    return attraction;
-}
-
-float calculate_gain_from_attractions(const t_packing_attraction& attractions,
-                                      AtomBlockId atom) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    float alpha = 0.75;
-    float beta = 0.9;
-
-    float gain;
-    int num_used_input_pins = atom_ctx.nlist.block_input_pins(atom).size();
-    int num_used_output_pins = atom_ctx.nlist.block_output_pins(atom).size();
-    int num_used_pins = num_used_input_pins + num_used_output_pins;
-
-    gain = ((1 - beta) * attractions.sharinggain + beta * attractions.connectiongain)
-           / (num_used_pins);
-
-    gain = alpha * attractions.timinggain + (1 - alpha) * gain;
-    return gain;
-}
-
-float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
-                                           AtomBlockId atom,
-                                           ClusterBlockId clb) {
     auto& atom_ctx = g_vpr_ctx.atom();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
 
+    float gain = 0;
     t_packing_attraction attraction;
 
-    for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
-        auto net_id = atom_ctx.nlist.pin_net(atom_pin);
-
+    for (auto& net_id : moving_nets) {
         if ((int)atom_ctx.nlist.net_pins(net_id).size() > helper_ctx.high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb)->name))
             continue;
 
-        //int num_internal_connections = 0;
+        std::unordered_set<AtomBlockId> connected_moving_blocks;
+
         int num_stuck_connections = 0;
-        std::unordered_set<AtomPinId> connected_pins;
         bool net_shared = false;
 
         // calculate sharing gain
@@ -90,8 +65,10 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
 
         for (auto& pin : pins) {
             auto blk_id = atom_ctx.nlist.pin_block(pin);
-            if (moving_atoms.count(blk_id))
+            if (moving_atoms.count(blk_id)) {
+                connected_moving_blocks.insert(blk_id);
                 continue;
+            }
 
             auto cluster = atom_to_cluster(blk_id);
             if (cluster == clb) {
@@ -100,48 +77,53 @@ float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>
                     attraction.sharinggain++;
                 }
                 if (helper_ctx.timing_driven) {
-                    if (atom_ctx.nlist.pin_type(pin) == PinType::DRIVER) {
-                        float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
-                        attraction.timinggain = std::max(timinggain, attraction.timinggain);
-                    } else {
-                        VTR_ASSERT(atom_ctx.nlist.pin_type(pin) == PinType::SINK);
-                        if (atom_pin == atom_ctx.nlist.net_driver(net_id)) {
+                    if (atom_ctx.nlist.pin_type(pin) == PinType::SINK) {
+                        auto net_driver_block = atom_ctx.nlist.net_driver_block(net_id);
+                        if (moving_atoms.count(net_driver_block) != 0) {
                             float timinggain = helper_ctx.timing_info->setup_pin_criticality(pin);
                             attraction.timinggain = std::max(timinggain, attraction.timinggain);
                         }
+                    } else if (atom_ctx.nlist.pin_type(pin) == PinType::DRIVER) {
+                        for (auto& pin_id : atom_ctx.nlist.net_sinks(net_id)) {
+                            auto net_sink_block = atom_ctx.nlist.pin_block(pin_id);
+                            if (moving_atoms.count(net_sink_block) != 0) {
+                                float timinggain = helper_ctx.timing_info->setup_pin_criticality(pin_id);
+                                attraction.timinggain = std::max(timinggain, attraction.timinggain);
+                            }
+                        }
                     }
                 }
             } else {
                 num_stuck_connections++;
             }
         }
-
         attraction.connectiongain += 1 / (float)(0.1 + num_stuck_connections);
+    }
+
+    gain += calculate_gain_from_attractions(attraction, molecule);
+
+    return gain;
+}
+
+float calculate_gain_from_attractions(const t_packing_attraction& attractions,
+                                      const t_pack_molecule* molecule) {
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    float alpha = 0.75;
+    float beta = 0.9;
 
-        /*
-         * // calculate connection gain
-         * for (auto& connected_pin : connected_pins) {
-         * if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER || (atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK && atom_pin == atom_ctx.nlist.net_driver(net_id))) {
-         * if (num_internal_connections > 1)
-         * attraction.connectiongain -= 1 / (float)(1.5 * num_stuck_connections + 1 + 0.1);
-         * attraction.connectiongain += 1 / (float)(1.5 * num_stuck_connections + 0.1);
-         * }
-         * }
-         *
-         * // calculate timing gain
-         * for (auto& connected_pin : connected_pins) {
-         * if (atom_ctx.nlist.pin_type(connected_pin) == PinType::DRIVER) {
-         * float timinggain = helper_ctx.timing_info->setup_pin_criticality(atom_pin);
-         * attraction.timinggain = std::max(timinggain, attraction.timinggain);
-         * } else {
-         * VTR_ASSERT(atom_ctx.nlist.pin_type(connected_pin) == PinType::SINK);
-         * if (atom_pin == atom_ctx.nlist.net_driver(net_id)) {
-         * float timinggain = helper_ctx.timing_info->setup_pin_criticality(connected_pin);
-         * attraction.timinggain = std::max(timinggain, attraction.timinggain);
-         * }
-         * }
-         * }
-         */
+    float gain;
+    int num_used_pins = 0;
+    for (auto& atom : molecule->atom_block_ids) {
+        if (atom) {
+            num_used_pins += atom_ctx.nlist.block_input_pins(atom).size();
+            num_used_pins += atom_ctx.nlist.block_output_pins(atom).size();
+        }
     }
-    return calculate_gain_from_attractions(attraction, atom);
+
+    gain = ((1 - beta) * attractions.sharinggain + beta * attractions.connectiongain)
+           / (num_used_pins);
+
+    gain = alpha * attractions.timinggain + (1 - alpha) * gain;
+    return gain;
 }
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_cost.h b/vpr/src/pack/improvement/packing_cost.h
index 79061f7026d..606d6d0dd38 100644
--- a/vpr/src/pack/improvement/packing_cost.h
+++ b/vpr/src/pack/improvement/packing_cost.h
@@ -11,14 +11,11 @@ struct t_packing_attraction {
 
 const int HIGH_FANOUT_NET_THRESHOLD = 5;
 
-float calculate_gain_from_attractions(const t_packing_attraction& attraction,
-                                      AtomBlockId atom);
-
-float calculate_atom_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
-                                           AtomBlockId atom,
-                                           ClusterBlockId clb);
+float calculate_gain_from_attractions(const t_packing_attraction& attractions,
+                                      const t_pack_molecule* molecule);
 
 float calculate_molecule_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
+                                               const std::unordered_set<AtomNetId>& moving_nets,
                                                const t_pack_molecule* molecule,
                                                ClusterBlockId clb);
 

From 618cfb78374c0c89c9f0c175aaffd93d3749787e Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Fri, 17 Mar 2023 21:37:22 -0400
Subject: [PATCH 040/188] fix memory leaks

---
 vpr/src/pack/improvement/pack_utils.cpp | 11 +++++++++++
 vpr/src/pack/pack.cpp                   |  5 +++++
 vpr/src/pack/re_cluster.cpp             |  2 +-
 vpr/src/pack/re_cluster_util.cpp        | 11 +++++------
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index d792e3b934a..f89d2813590 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -18,6 +18,7 @@
 void printProgressBar(double progress);
 void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
 void init_multithreading_locks();
+void free_multithreading_locks();
 
 void init_multithreading_locks() {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
@@ -29,6 +30,13 @@ void init_multithreading_locks() {
     }
 }
 
+void free_multithreading_locks() {
+    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+    for (auto& m : packing_multithreading_ctx.mu) {
+        delete m;
+    }
+}
+
 void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_data& clustering_data, int) {
     /*
      * auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -67,6 +75,9 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
             packer_opts.pack_num_moves,
             pack_stats.good_moves,
             pack_stats.legal_moves);
+
+    delete[] my_threads;
+    free_multithreading_locks();
 }
 
 void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 866c69e5baf..96eb486a930 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -354,6 +354,11 @@ bool try_pack(t_packer_opts* packer_opts,
     // Free Data Structures
     free_clustering_data(*packer_opts, clustering_data);
 
+    for (int i = 0; i < packer_opts->pack_num_threads; i++) {
+        free_cluster_placement_stats(helper_ctx.cluster_placement_stats[i]);
+        delete[] helper_ctx.primitives_list[i];
+    }
+
     VTR_LOG("\n");
     VTR_LOG("Netlist conversion complete.\n");
     VTR_LOG("\n");
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 434b79a4ee2..3a277c05ce2 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -268,7 +268,7 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
             VTR_LOG("packing clb1 failed\n");
         }
         */
-        remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data);
+        remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, false, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
         mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
         mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 7837dd74e08..24a77bd6d55 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -205,7 +205,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
     return (pack_result == BLK_PASSED);
 }
 
-bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size, const ClusterBlockId new_clb, std::unordered_set<AtomBlockId>* new_clb_atoms, bool during_packing, bool is_swap, t_clustering_data& clustering_data, t_lb_router_data*& router_data, int thread_id) {
+bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size, const ClusterBlockId new_clb, std::unordered_set<AtomBlockId>* new_clb_atoms, bool during_packing, bool /*is_swap*/, t_clustering_data& clustering_data, t_lb_router_data*& router_data, int thread_id) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
@@ -259,11 +259,9 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size,
         update_cluster_pb_stats(molecule, molecule_size, new_clb, true);
     }
 
-    if (!is_swap) {
-        //Free clustering router data
-        free_router_data(router_data);
-        router_data = nullptr;
-    }
+    //Free clustering router data
+    free_router_data(router_data);
+    router_data = nullptr;
 
     return (pack_result == BLK_PASSED);
 }
@@ -675,6 +673,7 @@ void commit_mol_removal(const t_pack_molecule* molecule,
         cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear();
         cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node);
     }
+    free_router_data(router_data);
 }
 
 bool check_type_and_mode_compitability(const ClusterBlockId& old_clb,

From de790460465470598eebd5f46d149c10b294536a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 10 May 2023 19:02:36 -0400
Subject: [PATCH 041/188] create
 primitive_critical_uniform_move_generator.h/.cpp

---
 .../place/primitive_critical_uniform_move_generator.cpp   | 3 +++
 vpr/src/place/primitive_critical_uniform_move_generator.h | 8 ++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 vpr/src/place/primitive_critical_uniform_move_generator.cpp
 create mode 100644 vpr/src/place/primitive_critical_uniform_move_generator.h

diff --git a/vpr/src/place/primitive_critical_uniform_move_generator.cpp b/vpr/src/place/primitive_critical_uniform_move_generator.cpp
new file mode 100644
index 00000000000..21e96f8878c
--- /dev/null
+++ b/vpr/src/place/primitive_critical_uniform_move_generator.cpp
@@ -0,0 +1,3 @@
+//
+// Created by amin on 5/10/23.
+//
diff --git a/vpr/src/place/primitive_critical_uniform_move_generator.h b/vpr/src/place/primitive_critical_uniform_move_generator.h
new file mode 100644
index 00000000000..16191f5b161
--- /dev/null
+++ b/vpr/src/place/primitive_critical_uniform_move_generator.h
@@ -0,0 +1,8 @@
+//
+// Created by amin on 5/10/23.
+//
+
+#ifndef VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
+#define VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
+
+#endif //VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H

From 8e341caf2531ce3036f371fa8b654454a0529259 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 11 May 2023 15:56:10 -0400
Subject: [PATCH 042/188] change the name of the file from primitive_*_move* to
 atom_*_move*

---
 ... atom_critical_uniform_move_generator.cpp} |  0
 .../atom_critical_uniform_move_generator.h    | 25 +++++++++++++++++++
 ...rimitive_critical_uniform_move_generator.h |  8 ------
 3 files changed, 25 insertions(+), 8 deletions(-)
 rename vpr/src/place/{primitive_critical_uniform_move_generator.cpp => atom_critical_uniform_move_generator.cpp} (100%)
 create mode 100644 vpr/src/place/atom_critical_uniform_move_generator.h
 delete mode 100644 vpr/src/place/primitive_critical_uniform_move_generator.h

diff --git a/vpr/src/place/primitive_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
similarity index 100%
rename from vpr/src/place/primitive_critical_uniform_move_generator.cpp
rename to vpr/src/place/atom_critical_uniform_move_generator.cpp
diff --git a/vpr/src/place/atom_critical_uniform_move_generator.h b/vpr/src/place/atom_critical_uniform_move_generator.h
new file mode 100644
index 00000000000..b2f99a2a39d
--- /dev/null
+++ b/vpr/src/place/atom_critical_uniform_move_generator.h
@@ -0,0 +1,25 @@
+//
+// Created by amin on 5/10/23.
+//
+
+#ifndef VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
+#define VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
+
+#include "move_generator.h"
+#include "timing_place.h"
+
+/**
+ * @file
+ * @author Amin Mohaghegh
+ * @brief Primitive critical uniform move type
+ *
+ * This move picks a random block from the the critical blocks (those with one or more critical nets)
+ * and moves it (swapping with what's there if necessary) to a random location within rlim units
+ * away in the x and y dimensions in the compressed block grid.
+ *
+ * Returns its choices by filling in affected_blocks.
+ */
+class AtomCriticalUniformMoveGenerator : public MoveGenerator {
+    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/);
+};
+#endif //VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
diff --git a/vpr/src/place/primitive_critical_uniform_move_generator.h b/vpr/src/place/primitive_critical_uniform_move_generator.h
deleted file mode 100644
index 16191f5b161..00000000000
--- a/vpr/src/place/primitive_critical_uniform_move_generator.h
+++ /dev/null
@@ -1,8 +0,0 @@
-//
-// Created by amin on 5/10/23.
-//
-
-#ifndef VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
-#define VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
-
-#endif //VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H

From 0e4db8c9ccf50065eb910de7d4b800df23ba064e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 11 May 2023 17:03:59 -0400
Subject: [PATCH 043/188] write getCriticalAtomBlock under atom_critical_uni*

---
 .../atom_critical_uniform_move_generator.cpp  | 67 ++++++++++++++++++-
 1 file changed, 64 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index 21e96f8878c..e6bf8040a6b 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -1,3 +1,64 @@
-//
-// Created by amin on 5/10/23.
-//
+#include "atom_critical_uniform_move_generator.h"
+#include "globals.h"
+#include "place_constraints.h"
+
+static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock();
+
+e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.move();
+
+    ClusterBlockId cluster_blk_id = ClusterBlockId::INVALID();
+    AtomBlockId atom_blk_id = AtomBlockId::INVALID();
+    std::tie(cluster_blk_id, atom_blk_id) = getCriticalAtomBlock();
+
+    if(b_from == AtomBlockId::INVALID()) {
+        return e_create_move::ABORT; // Not a valid block
+    }
+
+    t_pl_loc from = place_ctx.block_locs[b_from].loc;
+    auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
+
+    t_pl_loc to;
+
+    if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
+        return e_create_move::ABORT;
+    }
+
+    e_create_move create_move = ::create_move(blocks_affected, b_from, to);
+
+    //Check that all of the blocks affected by the move would still be in a legal floorplan region after the swap
+    if (!floorplan_legal(blocks_affected)) {
+        return e_create_move::ABORT;
+    }
+
+    return create_move;
+}
+
+static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock() {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& cluster_netlist = cluster_ctx.clb_nlist;
+    const auto& atom_netlist = g_vpr_ctx.atom().nlist;
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
+    const auto& place_move_ctx = g_placer_ctx.move();
+    const auto& place_ctx = g_vpr_ctx.placement();
+    /* Pick a random block to be swapped with another random block.   */
+    // pick it from the highly critical blocks
+    if (place_move_ctx.highly_crit_pins.size() == 0) {
+        return std::make_pair(ClusterBlockId::INVALID(), AtomBlockId::INVALID()); //No critical block
+    }
+    std::pair<ClusterNetId, int> crit_cluster_net_pin = place_move_ctx.highly_crit_pins[vtr::irand(place_move_ctx.highly_crit_pins.size() - 1)];
+    ClusterBlockId cluster_crit_blk = cluster_netlist.net_driver_block(crit_cluster_net_pin.first);
+    if (place_ctx.block_locs[cluster_crit_blk].is_fixed) {
+        return std::make_pair(ClusterBlockId::INVALID(), AtomBlockId::INVALID()); //Block is fixed, cannot move
+    }
+
+    AtomNetId atom_crit_net = atom_lookup.atom_net(crit_cluster_net_pin.first);
+    AtomBlockId atom_crit_blk = atom_netlist.net_driver_block(atom_crit_net);
+
+    return std::make_pair(cluster_crit_blk, atom_crit_blk);
+
+}

From 8df2762075422b473d1262a45ba01f174cd7f96d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 11 May 2023 17:34:47 -0400
Subject: [PATCH 044/188] use cluster_blk_id in functions under
 atom_critical_uniform*

---
 vpr/src/place/atom_critical_uniform_move_generator.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index e6bf8040a6b..ab218f7511a 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -13,22 +13,22 @@ e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_m
     AtomBlockId atom_blk_id = AtomBlockId::INVALID();
     std::tie(cluster_blk_id, atom_blk_id) = getCriticalAtomBlock();
 
-    if(b_from == AtomBlockId::INVALID()) {
+    if(cluster_blk_id == ClusterBlockId::INVALID() || atom_blk_id == AtomBlockId::INVALID()) {
         return e_create_move::ABORT; // Not a valid block
     }
 
-    t_pl_loc from = place_ctx.block_locs[b_from].loc;
-    auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
+    t_pl_loc from = place_ctx.block_locs[cluster_blk_id].loc;
+    auto cluster_from_type = cluster_ctx.clb_nlist.block_type(cluster_blk_id);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
 
-    if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
+    if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, cluster_blk_id)) {
         return e_create_move::ABORT;
     }
 
-    e_create_move create_move = ::create_move(blocks_affected, b_from, to);
+    e_create_move create_move = ::create_move(blocks_affected, cluster_blk_id, to);
 
     //Check that all of the blocks affected by the move would still be in a legal floorplan region after the swap
     if (!floorplan_legal(blocks_affected)) {

From 1ced7770bb492f1b89d0531d076b0afcb9bf89c7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 18 May 2023 11:45:26 -0400
Subject: [PATCH 045/188] remove total_primitive_count field from
 pb_type_graph_node since it wasn't used by any routine

---
 libs/libarchfpga/src/physical_types.h |  4 +---
 vpr/src/pack/pb_type_graph.cpp        | 13 -------------
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index a4699e2ccd8..141bbd8ebf7 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1248,9 +1248,7 @@ class t_pb_graph_node {
     int num_input_pin_class;    /* number of input pin classes that this pb_graph_node has */
     int* output_pin_class_size; /* Stores the number of pins that belong to a particular output pin class */
     int num_output_pin_class;   /* number of output pin classes that this pb_graph_node has */
-
-    int total_primitive_count; /* total number of this primitive type in the cluster */
-
+                                
     /* Interconnect instances for this pb
      * Only used for power
      */
diff --git a/vpr/src/pack/pb_type_graph.cpp b/vpr/src/pack/pb_type_graph.cpp
index 72467921f21..3bd5b493dc6 100644
--- a/vpr/src/pack/pb_type_graph.cpp
+++ b/vpr/src/pack/pb_type_graph.cpp
@@ -236,8 +236,6 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
     pb_graph_node->num_output_ports = 0;
     pb_graph_node->num_clock_ports = 0;
 
-    pb_graph_node->total_primitive_count = 0;
-
     /* Generate ports for pb graph node */
     for (i = 0; i < pb_type->num_ports; i++) {
         if (pb_type->ports[i].type == IN_PORT && !pb_type->ports[i].is_clock) {
@@ -370,17 +368,6 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                          &pb_type->modes[i],
                                          load_power_structures);
     }
-
-    // update the total number of primitives of that type
-    if (pb_graph_node->is_primitive()) {
-        int total_count = 1;
-        auto pb_node = pb_graph_node;
-        while (!pb_node->is_root()) {
-            total_count *= pb_node->pb_type->num_pb;
-            pb_node = pb_node->parent_pb_graph_node;
-        }
-        pb_graph_node->total_primitive_count = total_count;
-    }
 }
 
 static void alloc_and_load_pb_graph_pin_sinks(t_pb_graph_node* pb_graph_node) {

From e8af9b46f1930d0944cbb4ee08b24be7de09fd7a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 18 May 2023 11:52:24 -0400
Subject: [PATCH 046/188] add primitive num to t_pb_graph_node

---
 libs/libarchfpga/src/physical_types.h |  3 ++-
 vpr/src/pack/pb_type_graph.cpp        | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 141bbd8ebf7..f4770a3ffdf 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1202,6 +1202,7 @@ class t_pb_graph_node {
 
     int placement_index;
 
+    int primitive_num;
     /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
      *
      * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to
@@ -1248,7 +1249,7 @@ class t_pb_graph_node {
     int num_input_pin_class;    /* number of input pin classes that this pb_graph_node has */
     int* output_pin_class_size; /* Stores the number of pins that belong to a particular output pin class */
     int num_output_pin_class;   /* number of output pin classes that this pb_graph_node has */
-                                
+
     /* Interconnect instances for this pb
      * Only used for power
      */
diff --git a/vpr/src/pack/pb_type_graph.cpp b/vpr/src/pack/pb_type_graph.cpp
index 3bd5b493dc6..473b651f7c7 100644
--- a/vpr/src/pack/pb_type_graph.cpp
+++ b/vpr/src/pack/pb_type_graph.cpp
@@ -49,7 +49,8 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                     t_pb_type* pb_type,
                                     const int index,
                                     bool load_power_structures,
-                                    int& pin_count_in_cluster);
+                                    int& pin_count_in_cluster,
+                                    int& primitive_num);
 
 static void alloc_and_load_pb_graph_pin_sinks(t_pb_graph_node* pb_graph_node);
 
@@ -147,12 +148,14 @@ void alloc_and_load_all_pb_graphs(bool load_power_structures, bool is_flat) {
         if (type.pb_type) {
             type.pb_graph_head = new t_pb_graph_node();
             int pin_count_in_cluster = 0;
+            int primitive_num = 0;
             alloc_and_load_pb_graph(type.pb_graph_head,
                                     nullptr,
                                     type.pb_type,
                                     0,
                                     load_power_structures,
-                                    pin_count_in_cluster);
+                                    pin_count_in_cluster,
+                                    primitive_num);
             type.pb_graph_head->total_pb_pins = pin_count_in_cluster;
             load_pin_classes_in_pb_graph_head(type.pb_graph_head);
             if (is_flat) {
@@ -225,7 +228,8 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                     t_pb_type* pb_type,
                                     const int index,
                                     bool load_power_structures,
-                                    int& pin_count_in_cluster) {
+                                    int& pin_count_in_cluster,
+                                    int& primitive_num) {
     int i, j, k, i_input, i_output, i_clockport;
 
     pb_graph_node->placement_index = index;
@@ -339,6 +343,11 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
         pb_graph_node->pb_node_power->transistor_cnt_pb_children = 0.;
     }
 
+    if (pb_graph_node->is_primitive()) {
+        pb_graph_node->primitive_num = primitive_num;
+        primitive_num++;
+    }
+
     /* Allocate and load child nodes for each mode and create interconnect in each mode */
 
     pb_graph_node->child_pb_graph_nodes = (t_pb_graph_node***)vtr::calloc(pb_type->num_modes, sizeof(t_pb_graph_node**));
@@ -353,7 +362,8 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                         &pb_type->modes[i].pb_type_children[j],
                                         k,
                                         load_power_structures,
-                                        pin_count_in_cluster);
+                                        pin_count_in_cluster,
+                                        primitive_num);
             }
         }
     }

From 5a72ec03cfa8ff4c5a1c099a88b976a06001a0e2 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Tue, 8 Aug 2023 11:52:04 -0400
Subject: [PATCH 047/188] remove debugging code and adding more comments

---
 vpr/src/pack/re_cluster.cpp      | 100 +++++++++----------------------
 vpr/src/pack/re_cluster_util.cpp |  27 +++++----
 vpr/src/pack/re_cluster_util.h   |  58 ++++++++++++++++--
 3 files changed, 95 insertions(+), 90 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 3a277c05ce2..5ca8f7d470e 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -4,8 +4,6 @@
 #include "cluster_placement.h"
 #include "cluster_router.h"
 
-const char* move_suffix = "_m";
-
 bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                              bool during_packing,
                              int verbosity,
@@ -140,7 +138,7 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
 
     //Add the atom to the new cluster
     t_lb_router_data* new_router_data = nullptr;
-    is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, false, clustering_data, new_router_data, thread_id);
+    is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, clustering_data, new_router_data, thread_id);
 
     //Commit or revert the move
     if (is_added) {
@@ -163,7 +161,6 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
     return (is_added);
 }
 
-#if 1
 bool swap_two_molecules(t_pack_molecule* molecule_1,
                         t_pack_molecule* molecule_2,
                         bool during_packing,
@@ -212,12 +209,6 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     std::string clb_pb_1_name = (std::string)clb_pb_1->name;
     t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
     std::string clb_pb_2_name = (std::string)clb_pb_2->name;
-    /* // Elgammal debugging
-    if(clb_1 == ClusterBlockId(721))
-        VTR_LOG("before clb1: %p --> %s\n", cluster_ctx.clb_nlist.block_pb(clb_1), cluster_ctx.clb_nlist.block_pb(clb_1)->name);
-    if(clb_2 == ClusterBlockId(721))
-        VTR_LOG("before clb2: %p --> %s\n", cluster_ctx.clb_nlist.block_pb(clb_2), cluster_ctx.clb_nlist.block_pb(clb_2)->name);
-    */
 
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);
@@ -227,15 +218,10 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     commit_mol_removal(molecule_2, molecule_2_size, clb_2, during_packing, old_2_router_data, clustering_data);
 
     //Add the atom to the new cluster
-    mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
+    mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data, thread_id);
     if (!mol_1_success) {
-        /* // Elgammal debugging
-        if(clb_1 ==  ClusterBlockId(721) || clb_2 == ClusterBlockId(721)) {
-            VTR_LOG("packing clb2 failed\n");
-        }
-         */
-        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
-        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
+        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data, thread_id);
+        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data, thread_id);
 
         VTR_ASSERT(mol_1_success && mol_2_success);
         free_router_data(old_1_router_data);
@@ -243,35 +229,21 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
 
-        //if (molecule_2->is_chain())
-        {
-            free(clb_pb_1->name);
-            cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
-        }
-        //if (molecule_1->is_chain())
-        {
-            free(clb_pb_2->name);
-            cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
-        }
+
+        free(clb_pb_1->name);
+        cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+        free(clb_pb_2->name);
+        cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
 
         return false;
     }
-    /* // Elgammal debugging
-    if(clb_1 ==  ClusterBlockId(721) || clb_2 == ClusterBlockId(721)) {
-        VTR_LOG("packing clb2 success, %s\n", cluster_ctx.clb_nlist.block_pb(clb_2)->name);
-    }
-     */
-    mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
+
+    mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data, thread_id);
     if (!mol_2_success) {
-        /* //Elgammal debugging
-        if(clb_1 == ClusterBlockId(721)) {
-            VTR_LOG("packing clb1 failed\n");
-        }
-        */
         remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, false, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
-        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data, thread_id);
-        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data, thread_id);
+        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data, thread_id);
+        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data, thread_id);
 
         VTR_ASSERT(mol_1_success && mol_2_success);
         free_router_data(old_1_router_data);
@@ -279,53 +251,35 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
 
-        //if (molecule_2->is_chain())
-        {
-            free(clb_pb_1->name);
-            cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
-        }
-        //if (molecule_1->is_chain())
-        {
-            free(clb_pb_2->name);
-            cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
-        }
+        free(clb_pb_1->name);
+        cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+        free(clb_pb_2->name);
+        cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
 
         return false;
     }
-    /* // Elgammal debugging
-    if(clb_2 ==  ClusterBlockId(721) || clb_1 == ClusterBlockId(721)) {
-        VTR_LOG("packing clb1 succes, %s\n", cluster_ctx.clb_nlist.block_pb(clb_1)->name);
-    }
-    */
+
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
 
-    //if (molecule_2->is_chain())
-    {
-        free(clb_pb_1->name);
-        cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
-    }
-    //if(molecule_1->is_chain())
-    {
-        free(clb_pb_2->name);
-        cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
-    }
-    /* //Elgammal debugging
-    if(clb_1 == ClusterBlockId(721))
-        VTR_LOG("after clb1: %p --> %s\n\n", cluster_ctx.clb_nlist.block_pb(clb_1), cluster_ctx.clb_nlist.block_pb(clb_1)->name);
-    if(clb_2 == ClusterBlockId(721))
-        VTR_LOG("after clb2: %p --> %s\n\n", cluster_ctx.clb_nlist.block_pb(clb_2), cluster_ctx.clb_nlist.block_pb(clb_2)->name);
-    */
+    //Fix block names
+    free(clb_pb_1->name);
+    cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+    free(clb_pb_2->name);
+    cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+
+
+
     //If the move is done after packing not during it, some fixes need to be done on the clustered netlist
     if (!during_packing) {
         fix_clustered_netlist(molecule_1, molecule_1_size, clb_1, clb_2);
         fix_clustered_netlist(molecule_2, molecule_2_size, clb_2, clb_1);
     }
 
+    //Free
     free_router_data(old_1_router_data);
     free_router_data(old_2_router_data);
     old_1_router_data = nullptr;
     old_2_router_data = nullptr;
     return true;
 }
-#endif
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 24a77bd6d55..bba2c4a0ea1 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -10,16 +10,14 @@
 #include "read_netlist.h"
 #include <cstring>
 
-//The name suffix of the new block (if exists)
+// The name suffix of the new block (if exists)
+// This suffex is useful in preventing duplicate high-level cluster block names
 const char* name_suffix = "_m";
 
 /******************* Static Functions ********************/
-//static void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, const AtomBlockId atom_blk, const AtomPortId atom_port, const t_pb_graph_pin* gpin);
 static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin);
 static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route);
-//static bool count_children_pbs(const t_pb* pb);
 static void fix_atom_pin_mapping(const AtomBlockId blk);
-
 static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index);
 static void check_net_absorbtion(const AtomNetId atom_net_id,
                                  const ClusterBlockId new_clb,
@@ -27,21 +25,19 @@ static void check_net_absorbtion(const AtomNetId atom_net_id,
                                  ClusterPinId& cluster_pin_id,
                                  bool& previously_absorbed,
                                  bool& now_abosrbed);
-
 static void fix_cluster_port_after_moving(const ClusterBlockId clb_index);
-
 static void fix_cluster_net_after_moving(const t_pack_molecule* molecule,
                                          int molecule_size,
                                          const ClusterBlockId& old_clb,
                                          const ClusterBlockId& new_clb);
-
-static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms, int thread_id);
-
+static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms, int thread_id = 0);
 static void update_cluster_pb_stats(const t_pack_molecule* molecule,
                                     int molecule_size,
                                     ClusterBlockId clb_index,
                                     bool is_added);
+
 /*****************  API functions ***********************/
+
 ClusterBlockId atom_to_cluster(const AtomBlockId& atom) {
     auto& atom_ctx = g_vpr_ctx.atom();
     return (atom_ctx.lookup.atom_clb(atom));
@@ -120,8 +116,8 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
 
 bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                const t_logical_block_type_ptr& type,
-                               const int mode,
-                               const int feasible_block_array_size,
+                               const int& mode,
+                               const int& feasible_block_array_size,
                                bool enable_pin_feasibility_filter,
                                ClusterBlockId clb_index,
                                bool during_packing,
@@ -205,7 +201,14 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
     return (pack_result == BLK_PASSED);
 }
 
-bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size, const ClusterBlockId new_clb, std::unordered_set<AtomBlockId>* new_clb_atoms, bool during_packing, bool /*is_swap*/, t_clustering_data& clustering_data, t_lb_router_data*& router_data, int thread_id) {
+bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
+                                  int molecule_size,
+                                  const ClusterBlockId& new_clb,
+                                  std::unordered_set<AtomBlockId>* new_clb_atoms,
+                                  bool during_packing,
+                                  t_clustering_data& clustering_data,
+                                  t_lb_router_data*& router_data,
+                                  int thread_id) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index 3a7861ef8f1..986b6aa9f1e 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -77,7 +77,18 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
  * @param router_data: returns the intra logic block router data.
  * @param temp_cluster_pr: returns the partition region of the new cluster.
  */
-bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_type_ptr& type, const int mode, const int feasible_block_array_size, bool enable_pin_feasibility_filter, ClusterBlockId clb_index, bool during_packing, int verbosity, t_clustering_data& clustering_data, t_lb_router_data** router_data, PartitionRegion& temp_cluster_pr, int thread_id);
+bool start_new_cluster_for_mol(t_pack_molecule* molecule,
+                               const t_logical_block_type_ptr& type,
+                               const int& mode,
+                               const int& feasible_block_array_size,
+                               bool enable_pin_feasibility_filter,
+                               ClusterBlockId clb_index,
+                               bool during_packing,
+                               int verbosity,
+                               t_clustering_data& clustering_data,
+                               t_lb_router_data** router_data,
+                               PartitionRegion& temp_cluster_pr,
+                               int thread_id = 0);
 
 /**
  * @brief A function that packs a molecule into an existing cluster
@@ -90,7 +101,14 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule, const t_logical_block_
  *                          (is updated if this function is called during packing, especially intra_lb_routing data member).
  * @param router_data: returns the intra logic block router data.
  */
-bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, int molecule_size, const ClusterBlockId new_clb, std::unordered_set<AtomBlockId>* new_clb_atoms, bool during_packing, bool is_swap, t_clustering_data& clustering_data, t_lb_router_data*& router_data, int thread_id);
+bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
+                                  int molecule_size,
+                                  const ClusterBlockId& new_clb,
+                                  std::unordered_set<AtomBlockId>* new_clb_atoms,
+                                  bool during_packing,
+                                  t_clustering_data& clustering_data,
+                                  t_lb_router_data*& router_data,
+                                  int thread_id = 0);
 
 /**
  * @brief A function that fix the clustered netlist if the move is performed
@@ -108,18 +126,43 @@ void fix_clustered_netlist(t_pack_molecule* molecule,
 /**
  * @brief A function that commits the molecule move if it is legal
  *
- * @during_packing: true if this function is called during packing, false if it is called during placement
- * @new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
+ * @params during_packing: true if this function is called during packing, false if it is called during placement
+ * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
  */
 void commit_mol_move(const ClusterBlockId& old_clb,
                      const ClusterBlockId& new_clb,
                      bool during_packing,
                      bool new_clb_created);
 
-void revert_mol_move(const ClusterBlockId& old_clb, t_pack_molecule* molecule, t_lb_router_data*& old_router_data, bool during_packing, t_clustering_data& clustering_data, int thread_id);
 
+/**
+ * @brief A function that reverts the molecule move if it is illegal
+ *
+ * @params during_packing: true if this function is called during packing, false if it is called during placement
+ * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
+ * @params
+ */
+void revert_mol_move(const ClusterBlockId& old_clb,
+                     t_pack_molecule* molecule,
+                     t_lb_router_data*& old_router_data,
+                     bool during_packing,
+                     t_clustering_data& clustering_data,
+                     int thread_id = 0);
+
+/**
+ *
+ * @brief A function that checks the legality of a cluster by running the intra-cluster routing
+ *
+ */
 bool is_cluster_legal(t_lb_router_data*& router_data);
 
+/**
+ * @brief A function that commits the molecule removal if it is legal
+ *
+ * @params during_packing: true if this function is called during packing, false if it is called during placement
+ * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
+ * @params
+ */
 void commit_mol_removal(const t_pack_molecule* molecule,
                         const int& molecule_size,
                         const ClusterBlockId& old_clb,
@@ -127,6 +170,11 @@ void commit_mol_removal(const t_pack_molecule* molecule,
                         t_lb_router_data*& router_data,
                         t_clustering_data& clustering_data);
 
+/**
+ *
+ * @brief A function that check that two clusters are of the same type and in the same mode of operation
+ *
+ */
 bool check_type_and_mode_compitability(const ClusterBlockId& old_clb,
                                        const ClusterBlockId& new_clb,
                                        int verbosity);

From accb38f1b2de72dea7e3bac6f7a23e666a831252 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Tue, 8 Aug 2023 16:26:40 -0400
Subject: [PATCH 048/188] prevent double freeing when removing a molecule from
 cluster

---
 vpr/src/pack/pack.cpp            | 120 +++++++++++++++++++------------
 vpr/src/pack/re_cluster_util.cpp |   2 +-
 2 files changed, 76 insertions(+), 46 deletions(-)

diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 96eb486a930..43b905df7c7 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -276,39 +276,38 @@ bool try_pack(t_packer_opts* packer_opts,
      * }
      */
     /*
-     * auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
-     * t_pack_molecule* mol = rng.first->second;
-     * VTR_LOG("Pack move is starting:\n\n");
-     * bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
-     * if (moved)
-     * VTR_LOG("Move is Done :)\n");
-     * else
-     * VTR_LOG("Move failed! :((\n");
-     *
-     * rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(55));
-     * mol = rng.first->second;
-     * moved = move_mol_to_existing_cluster(mol,
-     * ClusterBlockId(43),
-     * true,
-     * 0,
-     * clustering_data,
-     * 0);
-     * if (moved)
-     * VTR_LOG("Move is Done :)\n");
-     * else
-     * VTR_LOG("Move failed! :((\n");
-     *
-     *
-     * rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
-     * mol = rng.first->second;
-     * auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(77));
-     * t_pack_molecule* mol2 = rng2.first->second;
-     * moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
-     * if (moved)
-     * VTR_LOG("Move is Done :)\n");
-     * else
-     * VTR_LOG("Move failed! :((\n");
-     */
+     auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
+     t_pack_molecule* mol = rng.first->second;
+     VTR_LOG("Pack move is starting:\n\n");
+     bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
+     if (moved)
+     VTR_LOG("Move is Done :)\n");
+     else
+     VTR_LOG("Move failed! :((\n");
+
+     rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(55));
+     mol = rng.first->second;
+     moved = move_mol_to_existing_cluster(mol,
+     ClusterBlockId(43),
+     true,
+     0,
+     clustering_data,
+     0);
+     if (moved)
+     VTR_LOG("Move is Done :)\n");
+     else
+     VTR_LOG("Move failed! :((\n");
+
+     rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
+     mol = rng.first->second;
+     auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(77));
+     t_pack_molecule* mol2 = rng2.first->second;
+     moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
+     if (moved)
+     VTR_LOG("Move is Done :)\n");
+     else
+     VTR_LOG("Move failed! :((\n");
+
 
     auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
     t_pack_molecule* mol = rng.first->second;
@@ -319,19 +318,50 @@ bool try_pack(t_packer_opts* packer_opts,
         VTR_LOG("Move is Done :)\n");
     else
         VTR_LOG("Move failed! :((\n");
+*/
+    auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
+    t_pack_molecule* mol = rng.first->second;
+    bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+
+    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(4));
+    mol = rng.first->second;
+    moved = move_mol_to_existing_cluster(mol,
+                                         ClusterBlockId(4),
+                                         true,
+                                         0,
+                                         clustering_data,
+                                         0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+
+    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(4));
+    mol = rng.first->second;
+    auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(5));
+    t_pack_molecule* mol2 = rng2.first->second;
+    moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
+    if (moved)
+        VTR_LOG("Move is Done :)\n");
+    else
+        VTR_LOG("Move failed! :((\n");
+
+
+    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
+        VTR_LOG("\n# block id = %d\n", blk_id);
+        VTR_LOG("type = %d\n atoms:\n ", cluster_ctx.clb_nlist.block_type(blk_id)->index);
+        for (auto atom : *cluster_to_atoms(blk_id)) {
+            VTR_LOG("\tatom = %d\n", atom);
+            for (auto atom_pin : atom_ctx.nlist.block_pins(atom)) {
+                VTR_LOG("\t\tatom_pin = %d, type = %d, atom_net=%d, cluster_net=%d\n", atom_pin, atom_ctx.nlist.pin_type(atom_pin), atom_ctx.nlist.pin_net(atom_pin), atom_ctx.lookup.clb_net(atom_ctx.nlist.pin_net(atom_pin)));
+            }
+        }
+    }
 
-    /*
-     * for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
-     * VTR_LOG("\n# block id = %d\n", blk_id);
-     * VTR_LOG("type = %d\n atoms:\n ", cluster_ctx.clb_nlist.block_type(blk_id)->index);
-     * for (auto atom : *cluster_to_atoms(blk_id)) {
-     * VTR_LOG("\tatom = %d\n", atom);
-     * for (auto atom_pin : atom_ctx.nlist.block_pins(atom)) {
-     * VTR_LOG("\t\tatom_pin = %d, type = %d, atom_net=%d, cluster_net=%d\n", atom_pin, atom_ctx.nlist.pin_type(atom_pin), atom_ctx.nlist.pin_net(atom_pin), atom_ctx.lookup.clb_net(atom_ctx.nlist.pin_net(atom_pin)));
-     * }
-     * }
-     * }
-     */
 
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index bba2c4a0ea1..9b7ce2685b9 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -676,7 +676,7 @@ void commit_mol_removal(const t_pack_molecule* molecule,
         cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear();
         cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node);
     }
-    free_router_data(router_data);
+    //free_router_data(router_data);
 }
 
 bool check_type_and_mode_compitability(const ClusterBlockId& old_clb,

From 08a68c3f87150b4832208069b5fcfc0a8ed04104 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Tue, 12 Sep 2023 16:53:19 -0400
Subject: [PATCH 049/188] more comments and remove debugging code

---
 libs/libarchfpga/src/physical_types.h |  4 +++-
 vpr/src/base/read_options.cpp         |  4 ++--
 vpr/src/base/vpr_context.h            | 10 ++++++++--
 vpr/src/pack/pack.cpp                 |  4 ++--
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index dd7ad403f81..b79bc8a5326 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1273,7 +1273,9 @@ class t_pb_graph_node {
 
     void* temp_scratch_pad; /* temporary data, useful for keeping track of things when traversing data structure */
 
-    /* indeces for cluster_placement_primitive in the cluster_placement_stats structure (useful during packing) */
+    /* Indices for cluster_placement_primitive in the cluster_placement_stats structure (useful during packing) */
+    /* Now, we pass these indices instead of passing a pointer (t_cluster_placement_primitive*).                */
+    /* This is useful especially in case of multi-threaded packing                                              */
     int cluster_placement_primitive_index;
     int cluster_placement_type_index;
     int lb_type_index;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 48c84f0f0c4..5cc0c2d47e1 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1815,12 +1815,12 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
     pack_grp.add_argument(args.pack_num_moves, "--pack_num_moves")
         .help(
             "The number of moves that can be tried in packing stage")
-        .default_value("100000")
+        .default_value("0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     pack_grp.add_argument(args.pack_num_threads, "--pack_num_threads")
         .help(
-            "The number of threads used in the packing iterative improvement")
+            "The number of threads used in the iterative improvement packing (IIP)")
         .default_value("1")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index bde9d720bd2..01c2bffa50f 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -325,6 +325,7 @@ struct ClusteringHelperContext : public Context {
     std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
 
     // Stats keeper for placement information during packing/clustering
+    // The vector size equals to the number of threads used in the IIP (pack_num_threads)
     std::vector<t_cluster_placement_stats*> cluster_placement_stats;
 
     // total number of models in the architecture
@@ -352,8 +353,10 @@ struct ClusteringHelperContext : public Context {
     // An unordered map of the count of connections between different clb blocks
     // Only blocks that have connections between each others are added to this hash table
     // This may be useful for some type of packing moves.
-    std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash> clb_conn_counts;
+    // Currently unused, commented out
+    //std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash> clb_conn_counts;
 
+    // Some packing options. Saving them here instead of passing them to every packing function
     std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
     std::shared_ptr<SetupTimingInfo> timing_info;
     t_pack_high_fanout_thresholds high_fanout_thresholds;
@@ -363,10 +366,13 @@ struct ClusteringHelperContext : public Context {
 /**
  * @brief State relating to packing multithreading
  *
- * This contain data structures to synchronize multithreading of packing iterative improvement.
+ * This contain data structures to synchronize multithreading of the iterative improvement packing (IIP).
  */
 struct PackingMultithreadingContext : public Context {
+    // One lock per cluster
     vtr::vector<ClusterBlockId, std::mutex*> mu;
+
+    // lock to synchronize atop_pb lookup access
     std::mutex lookup_mu;
 };
 
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index d6b74333ec6..b6b3140d6ab 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -318,7 +318,7 @@ bool try_pack(t_packer_opts* packer_opts,
         VTR_LOG("Move is Done :)\n");
     else
         VTR_LOG("Move failed! :((\n");
-*/
+
     auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
     t_pack_molecule* mol = rng.first->second;
     bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
@@ -366,7 +366,7 @@ bool try_pack(t_packer_opts* packer_opts,
     VTR_LOG("Start the iterative improvement process\n");
     iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
-
+    */
     /* // Elgammal debugging
     for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
         VTR_LOG("@@@ block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);

From 15d7ce3e82144b9b97a20133ccc21a85e702e98c Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Wed, 13 Sep 2023 13:02:39 -0400
Subject: [PATCH 050/188] Add a guard for packing multithreading

---
 vpr/src/base/atom_lookup.cpp  | 11 ++++++++++-
 vpr/src/base/vpr_context.h    |  7 +++++++
 vpr/src/pack/cluster.cpp      |  2 --
 vpr/src/pack/cluster_util.cpp |  6 +++++-
 vpr/src/pack/cluster_util.h   | 15 ++++++++++++++-
 vpr/src/pack/pack.cpp         |  6 ++++--
 6 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp
index d53dad95c91..426eee84e81 100644
--- a/vpr/src/base/atom_lookup.cpp
+++ b/vpr/src/base/atom_lookup.cpp
@@ -16,16 +16,21 @@ const t_pb* AtomLookup::atom_pb(const AtomBlockId blk_id) const {
 }
 
 AtomBlockId AtomLookup::pb_atom(const t_pb* pb) const {
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
     packing_multithreading_ctx.lookup_mu.lock();
-
+#endif
     auto iter = atom_to_pb_.find(pb);
     if (iter == atom_to_pb_.inverse_end()) {
         //Not found
+#ifdef PACK_MULTITHREADED
         packing_multithreading_ctx.lookup_mu.unlock();
+#endif
         return AtomBlockId::INVALID();
     }
+#ifdef PACK_MULTITHREADED
     packing_multithreading_ctx.lookup_mu.unlock();
+#endif
     return iter->second;
 }
 
@@ -41,8 +46,10 @@ const t_pb_graph_node* AtomLookup::atom_pb_graph_node(const AtomBlockId blk_id)
 void AtomLookup::set_atom_pb(const AtomBlockId blk_id, const t_pb* pb) {
     //If either of blk_id or pb are not valid,
     //remove any mapping
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
     packing_multithreading_ctx.lookup_mu.lock();
+#endif
     if (!blk_id && pb) {
         //Remove
         atom_to_pb_.erase(pb);
@@ -53,7 +60,9 @@ void AtomLookup::set_atom_pb(const AtomBlockId blk_id, const t_pb* pb) {
         //If both are valid store the mapping
         atom_to_pb_.update(blk_id, pb);
     }
+#ifdef PACK_MULTITHREADED
     packing_multithreading_ctx.lookup_mu.unlock();
+#endif
 }
 
 /*
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 01c2bffa50f..93e5581d4e4 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -33,6 +33,9 @@
 #include "noc_traffic_flows.h"
 #include "noc_routing.h"
 
+//Flag to enable multithreading code for iterative packing (IIP)
+#define PACK_MULTITHREADED
+
 class SetupTimingInfo;
 /**
  * @brief A Context is collection of state relating to a particular part of VPR
@@ -626,8 +629,10 @@ class VprContext : public Context {
     const NocContext& noc() const { return noc_; }
     NocContext& mutable_noc() { return noc_; }
 
+#ifdef PACK_MULTITHREADED
     const PackingMultithreadingContext& packing_multithreading() const { return packing_multithreading_; }
     PackingMultithreadingContext& mutable_packing_multithreading() { return packing_multithreading_; }
+#endif
 
   private:
     DeviceContext device_;
@@ -645,7 +650,9 @@ class VprContext : public Context {
     FloorplanningContext constraints_;
     NocContext noc_;
 
+#ifdef PACK_MULTITHREADED
     PackingMultithreadingContext packing_multithreading_;
+#endif
 };
 
 #endif
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index 7111eda41f9..f556e0988df 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -130,7 +130,6 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
     const int verbosity = packer_opts.pack_verbosity;
 
     int unclustered_list_head_size;
-    //std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
 
     cluster_stats.num_molecules_processed = 0;
     cluster_stats.mols_since_last_print = 0;
@@ -154,7 +153,6 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
     helper_ctx.timing_driven = packer_opts.timing_driven;
 
     std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc;
-    //std::shared_ptr<SetupTimingInfo> timing_info;
 
     // this data structure tracks the number of Logic Elements (LEs) used. It is
     // populated only for architectures which has LEs. The architecture is assumed
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index fe5613db91e..578f8922104 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -1088,7 +1088,7 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                     VTR_ASSERT(block_pack_status == BLK_PASSED);
                     if (molecule->is_chain()) {
                         /* Chained molecules often take up lots of area and are important,
-                         * -                        * if a chain is packed in, want to rename logic block to match chain name */
+                         * if a chain is packed in, want to rename logic block to match chain name */
                         AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
                         cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
                         /* // Elgammal debugging
@@ -1698,6 +1698,10 @@ void store_cluster_info_and_free(const t_packer_opts& packer_opts,
 
     //print clustering progress incrementally
     //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height());
+
+    // If no more iterative improvements to be run after the initial packing, clear the date for the packed cluster now
+    if(packer_opts.pack_num_moves == 0)
+        free_pb_stats_recursive(cur_pb);
 }
 
 /* Free up data structures and requeue used molecules */
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index 0cb25cdcd06..b6e194a3b27 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -199,7 +199,20 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups);
 
 void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb);
 
-enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, t_pack_molecule* molecule, t_pb_graph_node** primitives_list, t_pb* pb, const int max_models, const int max_cluster_size, const ClusterBlockId clb_index, const int detailed_routing_stage, t_lb_router_data* router_data, int verbosity, bool enable_pin_feasibility_filter, const int feasible_block_array_size, t_ext_pin_util max_external_pin_util, PartitionRegion& temp_cluster_pr);
+enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr,
+                                           t_pack_molecule* molecule,
+                                           t_pb_graph_node** primitives_list,
+                                           t_pb* pb,
+                                           const int max_models,
+                                           const int max_cluster_size,
+                                           const ClusterBlockId clb_index,
+                                           const int detailed_routing_stage,
+                                           t_lb_router_data* router_data,
+                                           int verbosity,
+                                           bool enable_pin_feasibility_filter,
+                                           const int feasible_block_array_size,
+                                           t_ext_pin_util max_external_pin_util,
+                                           PartitionRegion& temp_cluster_pr);
 
 void try_fill_cluster(const t_packer_opts& packer_opts,
                       t_cluster_placement_stats* cur_cluster_placement_stats_ptr,
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index b6b3140d6ab..23c147c244a 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -373,8 +373,10 @@ bool try_pack(t_packer_opts* packer_opts,
     }
     */
 
-    for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
-        free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
+    if(packer_opts->pack_num_moves != 0) {
+        for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
+            free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
+        }
     }
     /******************** End **************************/
 

From 49d3f66e17d377e901bf3bf2a1b19f10c094982c Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Wed, 13 Sep 2023 13:32:45 -0400
Subject: [PATCH 051/188] mor guards for multithreading

---
 vpr/src/base/vpr_context.h                    |  2 +-
 vpr/src/pack/improvement/pack_move_utils.cpp  | 43 ++++++++++++++++---
 vpr/src/pack/improvement/pack_utils.cpp       | 20 ++++++++-
 .../improvement/packing_move_generator.cpp    | 41 +++++++++++++++---
 4 files changed, 93 insertions(+), 13 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 93e5581d4e4..7fa9f1f2010 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -34,7 +34,7 @@
 #include "noc_routing.h"
 
 //Flag to enable multithreading code for iterative packing (IIP)
-#define PACK_MULTITHREADED
+//#define PACK_MULTITHREADED
 
 class SetupTimingInfo;
 /**
diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
index db540855322..68d4766ba7e 100644
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ b/vpr/src/pack/improvement/pack_move_utils.cpp
@@ -397,8 +397,9 @@ int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
 
 t_pack_molecule* pick_molecule_randomly() {
     auto& atom_ctx = g_vpr_ctx.atom();
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-
+#endif
     bool molecule_picked = false;
     t_pack_molecule* molecule;
 
@@ -408,24 +409,29 @@ t_pack_molecule* pick_molecule_randomly() {
         ClusterBlockId clb_index = atom_to_cluster(random_atom);
         if (!clb_index)
             continue;
+#ifdef PACK_MULTITHREADED
         if (packing_multithreading_ctx.mu[clb_index]->try_lock()) {
+#endif
             auto rng = atom_ctx.atom_molecules.equal_range(random_atom);
             for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
                 molecule = kv.second;
                 molecule_picked = true;
                 break;
             }
+#ifdef PACK_MULTITHREADED
         } else {
             continue; //CLB is already in-flight
         }
+#endif
     }
     return molecule;
 }
 
 bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     auto& atom_ctx = g_vpr_ctx.atom();
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-
+#endif
     std::vector<ClusterBlockId> connected_blocks;
     calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
     if (connected_blocks.empty())
@@ -438,9 +444,13 @@ bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     while (clb2_not_found && iteration < 20) {
         int rand_num = vtr::irand((int)connected_blocks.size() - 1);
         clb_index_2 = connected_blocks[rand_num];
+#ifdef PACK_MULTITHREADED
         if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+#endif
             clb2_not_found = false;
+#ifdef PACK_MULTITHREADED
         }
+#endif
         iteration++;
     }
 
@@ -459,14 +469,17 @@ bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
         mol_2 = kv.second;
         return true;
     }
-
+#ifdef PACK_MULTITHREADED
     packing_multithreading_ctx.mu[clb_index_2]->unlock();
+#endif
     return false;
 }
 
 bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     auto& atom_ctx = g_vpr_ctx.atom();
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     std::vector<ClusterBlockId> connected_blocks;
     calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
@@ -479,9 +492,13 @@ bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_mole
     int iteration = 0;
     while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
+#ifdef PACK_MULTITHREADED
         if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+#endif
             clb2_not_found = false;
+#ifdef PACK_MULTITHREADED
         }
+#endif
         iteration++;
     }
 
@@ -507,14 +524,17 @@ bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_mole
                 iteration++;
         }
     } while (iteration < 20);
-
+#ifdef PACK_MULTITHREADED
     packing_multithreading_ctx.mu[clb_index_2]->unlock();
+#endif
     return false;
 }
 
 bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     auto& atom_ctx = g_vpr_ctx.atom();
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     std::vector<ClusterBlockId> connected_blocks;
     calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
@@ -527,9 +547,13 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
     int iteration = 0;
     while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
+#ifdef PACK_MULTITHREADED
         if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+#endif
             clb2_not_found = false;
+#ifdef PACK_MULTITHREADED
         }
+#endif
         iteration++;
     }
 
@@ -558,13 +582,17 @@ bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*&
             }
         }
     } while (iteration < 10);
+#ifdef PACK_MULTITHREADED
     packing_multithreading_ctx.mu[clb_index_2]->unlock();
+#endif
     return false;
 }
 
 bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
     auto& atom_ctx = g_vpr_ctx.atom();
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     std::vector<ClusterBlockId> connected_blocks;
     calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
@@ -579,9 +607,13 @@ bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*&
     int iteration = 0;
     while (clb2_not_found && iteration < 10) {
         clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
+#ifdef PACK_MULTITHREADED
         if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
+#endif
             clb2_not_found = false;
+#ifdef PACK_MULTITHREADED
         }
+#endif
         ++iteration;
     }
 
@@ -605,8 +637,9 @@ bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*&
                 iteration++;
         }
     } while (iteration < 20);
-
+#ifdef PACK_MULTITHREADED
     packing_multithreading_ctx.mu[clb_index_2]->unlock();
+#endif
     return false;
 }
 
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index f89d2813590..c5f2e1c4606 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -17,9 +17,13 @@
 #include <thread>
 void printProgressBar(double progress);
 void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
+
+#ifdef PACK_MULTITHREADED
 void init_multithreading_locks();
 void free_multithreading_locks();
+#endif
 
+#ifdef PACK_MULTITHREADED
 void init_multithreading_locks() {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
     auto& helper_ctx = g_vpr_ctx.cl_helper();
@@ -29,13 +33,16 @@ void init_multithreading_locks() {
         m = new std::mutex;
     }
 }
+#endif
 
+#ifdef PACK_MULTITHREADED
 void free_multithreading_locks() {
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
     for (auto& m : packing_multithreading_ctx.mu) {
         delete m;
     }
 }
+#endif
 
 void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_data& clustering_data, int) {
     /*
@@ -59,8 +66,9 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
     const int num_threads = packer_opts.pack_num_threads;
     unsigned int moves_per_thread = total_num_moves / num_threads;
     std::thread* my_threads = new std::thread[num_threads];
-
+#ifdef PACK_MULTITHREADED
     init_multithreading_locks();
+#endif
 
     for (int i = 0; i < (num_threads - 1); i++) {
         my_threads[i] = std::thread(try_n_packing_moves, i, moves_per_thread, packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
@@ -77,11 +85,16 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
             pack_stats.legal_moves);
 
     delete[] my_threads;
+#ifdef PACK_MULTITHREADED
     free_multithreading_locks();
+#endif
 }
 
 void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
+
 
     bool is_proposed, is_valid, is_successful;
     std::vector<molMoveDescription> new_locs;
@@ -178,8 +191,10 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
         }
         is_valid = move_generator->evaluate_move(new_locs);
         if (!is_valid) {
+#ifdef PACK_MULTITHREADED
             packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
             packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
+#endif
             continue;
         } else {
             num_good_moves++;
@@ -188,9 +203,10 @@ void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_
         is_successful = move_generator->apply_move(new_locs, clustering_data, thread_num);
         if (is_successful)
             num_legal_moves++;
-
+#ifdef PACK_MULTITHREADED
         packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
         packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
+#endif
     }
 
     pack_stats.mu.lock();
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
index e6378a1106e..fbc1f7d0166 100644
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ b/vpr/src/pack/improvement/packing_move_generator.cpp
@@ -41,7 +41,9 @@ bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs,
 /****************************************************************/
 bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1, clb_index_2;
@@ -61,15 +63,21 @@ bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs)
         if (block_type_1 == block_type_2 && clb_index_1 != clb_index_2) {
             found = true;
             build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-        } else {
+        }
+#ifdef PACK_MULTITHREADED
+        else {
             packing_multithreading_ctx.mu[clb_index_2]->unlock();
         }
+#endif
         ++iteration;
     } while (!found && iteration < MAX_ITERATIONS);
 
+#ifdef PACK_MULTITHREADED
     if (!found) {
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
+#endif
+
     return found;
 }
 
@@ -80,7 +88,9 @@ bool randomPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new
 /***************** Quasi directed packing move class *******************/
 /***********************************************************************/
 bool quasiDirectedPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
@@ -96,9 +106,12 @@ bool quasiDirectedPackingSwap::propose_move(std::vector<molMoveDescription>& new
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    } else {
+    }
+#ifdef PACK_MULTITHREADED
+    else {
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
+#endif
     return found;
 }
 
@@ -113,7 +126,9 @@ bool quasiDirectedSameTypePackingSwap::evaluate_move(const std::vector<molMoveDe
 }
 
 bool quasiDirectedSameTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
@@ -129,9 +144,13 @@ bool quasiDirectedSameTypePackingSwap::propose_move(std::vector<molMoveDescripti
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    } else {
+    }
+#ifdef PACK_MULTITHREADED
+    else {
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
+#endif
+
     return found;
 }
 
@@ -142,7 +161,9 @@ bool quasiDirectedCompatibleTypePackingSwap::evaluate_move(const std::vector<mol
 }
 
 bool quasiDirectedCompatibleTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
@@ -158,9 +179,13 @@ bool quasiDirectedCompatibleTypePackingSwap::propose_move(std::vector<molMoveDes
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    } else {
+    }
+#ifdef PACK_MULTITHREADED
+    else {
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
+#endif
+
     return found;
 }
 
@@ -171,7 +196,9 @@ bool quasiDirectedSameSizePackingSwap::evaluate_move(const std::vector<molMoveDe
 }
 
 bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
+#ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
+#endif
 
     t_pack_molecule *mol_1, *mol_2;
     ClusterBlockId clb_index_1;
@@ -187,9 +214,13 @@ bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescripti
     if (found) {
         ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
         build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    } else {
+    }
+#ifdef PACK_MULTITHREADED
+    else {
         packing_multithreading_ctx.mu[clb_index_1]->unlock();
     }
+#endif
+
     return found;
 }
 

From 3553f5e32b8e4192f8850e4a6da31940f8711f7c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 10:33:49 -0400
Subject: [PATCH 052/188] create place_re_cluster.cpp/.h

---
 vpr/src/place/place_re_cluster.cpp | 5 +++++
 vpr/src/place/place_re_cluster.h   | 8 ++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 vpr/src/place/place_re_cluster.cpp
 create mode 100644 vpr/src/place/place_re_cluster.h

diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
new file mode 100644
index 00000000000..5f9d77a18ec
--- /dev/null
+++ b/vpr/src/place/place_re_cluster.cpp
@@ -0,0 +1,5 @@
+//
+// Created by amin on 9/15/23.
+//
+
+#include "place_re_cluster.h"
diff --git a/vpr/src/place/place_re_cluster.h b/vpr/src/place/place_re_cluster.h
new file mode 100644
index 00000000000..4876c210d9d
--- /dev/null
+++ b/vpr/src/place/place_re_cluster.h
@@ -0,0 +1,8 @@
+//
+// Created by amin on 9/15/23.
+//
+
+#ifndef VTR_PLACE_RE_CLUSTER_H
+#define VTR_PLACE_RE_CLUSTER_H
+
+#endif //VTR_PLACE_RE_CLUSTER_H

From 2623ca8f89473de2e5f222fa0dd5fa4a0ed6956b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 10:41:12 -0400
Subject: [PATCH 053/188] call place_re_cluster after quench

---
 vpr/src/place/place.cpp          | 6 ++++++
 vpr/src/place/place_re_cluster.h | 7 +++++++
 2 files changed, 13 insertions(+)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b9b1a06b84d..06714cf1ea4 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -69,6 +69,8 @@
 
 #include "noc_place_utils.h"
 
+#include "place_re_cluster.h"
+
 /*  define the RL agent's reward function factor constant. This factor controls the weight of bb cost *
  *  compared to the timing cost in the agent's reward function. The reward is calculated as           *
  * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost)
@@ -483,6 +485,8 @@ void try_place(const Netlist<>& net_list,
     t_placement_checkpoint placement_checkpoint;
     t_graph_type graph_directionality;
 
+    PlaceReCluster place_re_cluster;
+
     std::shared_ptr<SetupTimingInfo> timing_info;
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc;
     std::unique_ptr<PlaceDelayModel> place_delay_model;
@@ -942,6 +946,8 @@ void try_place(const Netlist<>& net_list,
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
+    place_re_cluster.re_cluster();
+
     //Final timing analysis
     PlaceCritParams crit_params;
     crit_params.crit_exponent = state.crit_exponent;
diff --git a/vpr/src/place/place_re_cluster.h b/vpr/src/place/place_re_cluster.h
index 4876c210d9d..57fe135ac23 100644
--- a/vpr/src/place/place_re_cluster.h
+++ b/vpr/src/place/place_re_cluster.h
@@ -5,4 +5,11 @@
 #ifndef VTR_PLACE_RE_CLUSTER_H
 #define VTR_PLACE_RE_CLUSTER_H
 
+class PlaceReCluster {
+  public:
+    PlaceReCluster() = default;
+
+    void re_cluster();
+};
+
 #endif //VTR_PLACE_RE_CLUSTER_H

From a29e44bfa331611349375fa5cc688ce195a54512 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 11:33:08 -0400
Subject: [PATCH 054/188] choose random cluster blocks and choose random atoms
 inside them

---
 vpr/src/place/place_re_cluster.cpp | 61 ++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
index 5f9d77a18ec..6182939b63e 100644
--- a/vpr/src/place/place_re_cluster.cpp
+++ b/vpr/src/place/place_re_cluster.cpp
@@ -3,3 +3,64 @@
 //
 
 #include "place_re_cluster.h"
+
+#include "globals.h"
+#include "move_utils.h"
+
+static ClusterBlockId random_cluster();
+
+static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id);
+
+void PlaceReCluster::re_cluster() {
+    const int num_moves = 2 << 20;
+
+    for (int move_num = 0; move_num < num_moves; ++move_num) {
+        ClusterBlockId from_cluster_blk_id;
+        AtomBlockId from_atom_blk_id;
+        ClusterBlockId to_cluster_blk_id;
+        AtomBlockId to_atom_blk_id;
+
+        from_cluster_blk_id = random_cluster();
+        from_atom_blk_id = random_atom_in_cluster(from_cluster_blk_id);
+
+
+        while (true) {
+            to_cluster_blk_id = random_cluster();
+            to_atom_blk_id = random_atom_in_cluster(to_cluster_blk_id);
+
+            if (from_cluster_blk_id != to_cluster_blk_id) {
+                break;
+            }
+        }
+    }
+
+}
+
+static ClusterBlockId random_cluster() {
+
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    int rand_id = vtr::irand(cluster_ctx.clb_nlist.blocks().size() - 1);
+
+    return ClusterBlockId(rand_id);
+
+}
+
+static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id) {
+
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup[cluster_blk_id];
+
+    int rand_id = vtr::irand(cluster_atoms.size() - 1);
+
+    auto it = cluster_atoms.begin();
+
+    std::advance(it, rand_id);
+
+    AtomBlockId atom_blk_id = *it;
+
+    return atom_blk_id;
+
+}
+

From d9c433a477febe4b3f0837440b5782e4b3c38f04 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 12:42:09 -0400
Subject: [PATCH 055/188] write a basic impl for t_pl_atom_loc

---
 vpr/src/base/vpr_types.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 0a4907556bb..58dcc0df4a3 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -752,6 +752,22 @@ struct hash<t_pl_loc> {
 };
 } // namespace std
 
+struct t_pl_atom_loc {
+    t_pl_atom_loc() = default;
+    t_pl_atom_loc(int primitive_id_, int x_, int y_, int sub_tile_, int layer_)
+        : primitive_id(primitive_id_)
+        , x(x_)
+        , y(y_)
+        , sub_tile(sub_tile_)
+        , layer(layer_) {}
+
+    int primitive_id = OPEN;
+    int x = OPEN;
+    int y = OPEN;
+    int sub_tile = OPEN;
+    int layer = OPEN;
+}
+
 struct t_place_region {
     float capacity; ///<Capacity of this region, in tracks.
     float inv_capacity;

From 7e949b3eb632c2f4a746670a2d20a8c6b541b862 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 12:48:19 -0400
Subject: [PATCH 056/188] add move transaction data structure for atoms

---
 vpr/src/place/move_transactions.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 47e06ba808a..4efec641405 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -14,6 +14,12 @@ struct t_pl_moved_block {
     t_pl_loc new_loc;
 };
 
+struct t_pl_moved_atom_block {
+    AtomBlockId block_num;
+    t_pl_atom_loc old_loc;
+    t_pl_atom_loc new_loc;
+};
+
 /* Stores the list of blocks to be moved in a swap during       *
  * placement.                                                   *
  * Store the information on the blocks to be moved in a swap during     *
@@ -40,6 +46,18 @@ struct t_pl_blocks_to_be_moved {
     std::vector<ClusterPinId> affected_pins;
 };
 
+struct t_pl_atom_blocks_to_be_moved {
+    t_pl_atom_blocks_to_be_moved(size_t max_blocks)
+        : moved_blocks(max_blocks) {}
+
+    int num_moved_blocks = 0;
+    std::vector<t_pl_moved_atom_block> moved_blocks;
+    std::unordered_set<t_pl_atom_loc> moved_from;
+    std::unordered_set<t_pl_atom_loc> moved_to;
+
+    std::vector<AtomPinId> affected_pins;
+};
+
 enum class e_block_move_result {
     VALID,       //Move successful
     ABORT,       //Unable to perform move

From 03b9c142fb06aa254d874aafa383610a1e2e45cf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 13:05:10 -0400
Subject: [PATCH 057/188] add get_atom_mol to pack_utils

---
 vpr/src/pack/improvement/pack_utils.cpp | 15 +++++++++++++++
 vpr/src/pack/improvement/pack_utils.h   |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
index c5f2e1c4606..d9f6dec2663 100644
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ b/vpr/src/pack/improvement/pack_utils.cpp
@@ -90,6 +90,21 @@ void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_
 #endif
 }
 
+const t_pack_molecule* get_atom_mol (AtomBlockId atom_blk_id) {
+    const t_pack_molecule* mol = nullptr;
+    const auto& atom_mol_map = g_vpr_ctx.atom().atom_molecules;
+    auto rng = atom_mol_map.equal_range(atom_blk_id);
+
+    for (auto it = rng.first; it != rng.second; ++it) {
+        mol = it->second;
+        if (mol->valid) {
+            break;
+        }
+    }
+
+    return mol;
+}
+
 void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
 #ifdef PACK_MULTITHREADED
     auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
diff --git a/vpr/src/pack/improvement/pack_utils.h b/vpr/src/pack/improvement/pack_utils.h
index a5054f61d8b..b021aef5fc0 100644
--- a/vpr/src/pack/improvement/pack_utils.h
+++ b/vpr/src/pack/improvement/pack_utils.h
@@ -14,4 +14,6 @@ struct t_pack_iterative_stats {
 void iteratively_improve_packing(const t_packer_opts& packer_opts,
                                  t_clustering_data& clustering_data,
                                  int verbosity);
+
+const t_pack_molecule* get_atom_mol (AtomBlockId atom_blk_id);
 #endif //VTR_PACK_UTILS_H

From c764bda78a69698465feaf6ef71ca6328a071b65 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Fri, 15 Sep 2023 17:00:44 -0400
Subject: [PATCH 058/188] Re-clustering API bug fixes

---
 vpr/src/pack/re_cluster.cpp      | 34 ++++++++++++++++++++++++
 vpr/src/pack/re_cluster_util.cpp | 45 ++++++++++++++++++--------------
 vpr/src/pack/re_cluster_util.h   | 34 +++++++++++++++++++-----
 3 files changed, 87 insertions(+), 26 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 34e0ada9669..f313874a88f 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -71,6 +71,14 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule,
     //Commit or revert the move
     if (is_created) {
         commit_mol_move(old_clb, new_clb, during_packing, true);
+        // Update the clb-->atoms lookup table
+        helper_ctx.atoms_lookup.resize(helper_ctx.total_clb_num);
+        for (int i_atom = 0; i_atom < molecule_size; ++i_atom) {
+            if (molecule->atom_block_ids[i_atom]) {
+                helper_ctx.atoms_lookup[new_clb].insert(molecule->atom_block_ids[i_atom]);
+            }
+        }
+
         VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]);
     } else {
         revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data);
@@ -157,6 +165,9 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         bool during_packing,
                         int verbosity,
                         t_clustering_data& clustering_data) {
+    auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
+
     //define local variables
     PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2;
 
@@ -193,6 +204,11 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         return false;
     }
 
+    t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1);
+    std::string clb_pb_1_name = (std::string)clb_pb_1->name;
+    t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
+    std::string clb_pb_2_name = (std::string)clb_pb_2->name;
+
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);
     commit_mol_removal(molecule_1, molecule_1_size, clb_1, during_packing, old_1_router_data, clustering_data);
@@ -211,6 +227,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free_router_data(old_2_router_data);
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
+
+        free(clb_pb_1->name);
+        cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+        free(clb_pb_2->name);
+        cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+
         return false;
     }
 
@@ -226,6 +248,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         free_router_data(old_2_router_data);
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
+
+        free(clb_pb_1->name);
+        cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+        free(clb_pb_2->name);
+        cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+
         return false;
     }
 
@@ -242,6 +270,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     free_router_data(old_2_router_data);
     old_1_router_data = nullptr;
     old_2_router_data = nullptr;
+
+    free(clb_pb_1->name);
+    cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
+    free(clb_pb_2->name);
+    cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
+
     return true;
 }
 #endif
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index b3c1d2c2fa9..a76cfd95751 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -10,14 +10,13 @@
 #include "read_netlist.h"
 #include <cstring>
 
-//The name suffix of the new block (if exists)
+// The name suffix of the new block (if exists)
+// This suffex is useful in preventing duplicate high-level cluster block names
 const char* name_suffix = "_m";
 
 /******************* Static Functions ********************/
-//static void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, const AtomBlockId atom_blk, const AtomPortId atom_port, const t_pb_graph_pin* gpin);
 static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin);
 static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route);
-//static bool count_children_pbs(const t_pb* pb);
 static void fix_atom_pin_mapping(const AtomBlockId blk);
 
 static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index);
@@ -42,6 +41,7 @@ static void update_cluster_pb_stats(const t_pack_molecule* molecule,
                                     int molecule_size,
                                     ClusterBlockId clb_index,
                                     bool is_added);
+
 /*****************  API functions ***********************/
 ClusterBlockId atom_to_cluster(const AtomBlockId& atom) {
     auto& atom_ctx = g_vpr_ctx.atom();
@@ -66,19 +66,18 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
                              t_lb_router_data*& router_data) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
 
-    //re-build router_data structure for this cluster
-    if (!router_data_ready)
-        router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms);
-
-    //remove atom from router_data
     for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
         if (molecule->atom_block_ids[i_atom]) {
-            remove_atom_from_target(router_data, molecule->atom_block_ids[i_atom]);
             auto it = old_clb_atoms->find(molecule->atom_block_ids[i_atom]);
             if (it != old_clb_atoms->end())
                 old_clb_atoms->erase(molecule->atom_block_ids[i_atom]);
         }
     }
+
+    //re-build router_data structure for this cluster
+    if (!router_data_ready)
+        router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms);
+
     update_cluster_pb_stats(molecule, molecule_size, old_clb, false);
 }
 
@@ -101,6 +100,7 @@ void commit_mol_move(const ClusterBlockId& old_clb,
 t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr_graphs, const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms) {
     //build data structures used by intra-logic block router
     auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& atom_ctx = g_vpr_ctx.atom();
     auto block_type = cluster_ctx.clb_nlist.block_type(clb_index);
     t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs[block_type->index], block_type);
 
@@ -110,14 +110,19 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
 
     for (auto atom_id : *clb_atoms) {
         add_atom_as_target(router_data, atom_id);
+        const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id);
+        while(pb) {
+            set_reset_pb_modes(router_data, pb, true);
+            pb = pb->parent_pb;
+        }
     }
     return (router_data);
 }
 
 bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                const t_logical_block_type_ptr& type,
-                               const int mode,
-                               const int feasible_block_array_size,
+                               const int& mode,
+                               const int& feasible_block_array_size,
                                bool enable_pin_feasibility_filter,
                                ClusterBlockId clb_index,
                                bool during_packing,
@@ -148,10 +153,11 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
 
     e_block_pack_status pack_result = BLK_STATUS_UNDEFINED;
     pb->mode = mode;
-    reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[type->index]));
+    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[type->index]);
+    reset_cluster_placement_stats(cluster_placement_stats);
     set_mode_cluster_placement_stats(pb->pb_graph_node, mode);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[type->index]),
+    pack_result = try_pack_molecule(cluster_placement_stats,
                                     molecule,
                                     helper_ctx.primitives_list,
                                     pb,
@@ -177,6 +183,8 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
         pb->name = vtr::strdup(new_name.c_str());
         clb_index = cluster_ctx.clb_nlist.create_block(new_name.c_str(), pb, type);
         helper_ctx.total_clb_num++;
+        int molecule_size = get_array_size_of_molecule(molecule);
+        update_cluster_pb_stats(molecule, molecule_size, clb_index, true);
 
         //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist.
         if (during_packing) {
@@ -220,8 +228,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
         return false;
 
     //re-build router_data structure for this cluster
-    if (!is_swap)
-        router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
+    router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
 
     pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[block_type->index]),
                                     molecule,
@@ -259,11 +266,9 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
         update_cluster_pb_stats(molecule, molecule_size, new_clb, true);
     }
 
-    if (!is_swap) {
-        //Free clustering router data
-        free_router_data(router_data);
-        router_data = nullptr;
-    }
+    //Free clustering router data
+    free_router_data(router_data);
+    router_data = nullptr;
 
     return (pack_result == BLK_PASSED);
 }
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index 201321f741a..d57eaa5f8db 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -79,8 +79,8 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
  */
 bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                const t_logical_block_type_ptr& type,
-                               const int mode,
-                               const int feasible_block_array_size,
+                               const int& mode,
+                               const int& feasible_block_array_size,
                                bool enable_pin_feasibility_filter,
                                ClusterBlockId clb_index,
                                bool during_packing,
@@ -102,8 +102,8 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
  */
 bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
                                   int molecule_size,
-                                  const ClusterBlockId clb_index,
-                                  std::unordered_set<AtomBlockId>* clb_atoms,
+                                  const ClusterBlockId& clb_index,
+                                  std::unordered_set<AtomBlockId>* new_clb_atoms,
                                   bool during_packing,
                                   bool is_swap,
                                   t_clustering_data& clustering_data,
@@ -125,22 +125,39 @@ void fix_clustered_netlist(t_pack_molecule* molecule,
 /**
  * @brief A function that commits the molecule move if it is legal
  *
- * @during_packing: true if this function is called during packing, false if it is called during placement
- * @new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
+ * @params during_packing: true if this function is called during packing, false if it is called during placement
+ * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
  */
 void commit_mol_move(const ClusterBlockId& old_clb,
                      const ClusterBlockId& new_clb,
                      bool during_packing,
                      bool new_clb_created);
 
+/**
+ * @brief A function that reverts the molecule move if it is illegal
+ *
+ * @params during_packing: true if this function is called during packing, false if it is called during placement
+ * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
+ * @params
+ */
 void revert_mol_move(const ClusterBlockId& old_clb,
                      t_pack_molecule* molecule,
                      t_lb_router_data*& old_router_data,
                      bool during_packing,
                      t_clustering_data& clustering_data);
 
+/**
+ *
+ * @brief A function that checks the legality of a cluster by running the intra-cluster routing
+ */
 bool is_cluster_legal(t_lb_router_data*& router_data);
 
+/**
+ * @brief A function that commits the molecule removal if it is legal
+ *
+ * @params during_packing: true if this function is called during packing, false if it is called during placement
+ * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
+ */
 void commit_mol_removal(const t_pack_molecule* molecule,
                         const int& molecule_size,
                         const ClusterBlockId& old_clb,
@@ -148,6 +165,11 @@ void commit_mol_removal(const t_pack_molecule* molecule,
                         t_lb_router_data*& router_data,
                         t_clustering_data& clustering_data);
 
+/**
+ *
+ * @brief A function that check that two clusters are of the same type and in the same mode of operation
+ *
+ */
 bool check_type_and_mode_compitability(const ClusterBlockId& old_clb,
                                        const ClusterBlockId& new_clb,
                                        int verbosity);

From 36aaa4ea6288906b4c458e908a46e901f0879903 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 17:11:15 -0400
Subject: [PATCH 059/188] remove ClusterAtomsLookup class

---
 vpr/src/base/clustered_netlist_utils.cpp | 22 ----------------------
 vpr/src/base/clustered_netlist_utils.h   | 19 -------------------
 2 files changed, 41 deletions(-)

diff --git a/vpr/src/base/clustered_netlist_utils.cpp b/vpr/src/base/clustered_netlist_utils.cpp
index a7488d9ba89..797f9dab368 100644
--- a/vpr/src/base/clustered_netlist_utils.cpp
+++ b/vpr/src/base/clustered_netlist_utils.cpp
@@ -34,25 +34,3 @@ void ClusteredPinAtomPinsLookup::init_lookup(const ClusteredNetlist& clustered_n
         }
     }
 }
-
-ClusterAtomsLookup::ClusterAtomsLookup() {
-    init_lookup();
-}
-
-void ClusterAtomsLookup::init_lookup() {
-    auto& atom_ctx = g_vpr_ctx.atom();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    cluster_atoms.resize(cluster_ctx.clb_nlist.blocks().size());
-
-    for (auto atom_blk_id : atom_ctx.nlist.blocks()) {
-        ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id);
-
-        cluster_atoms[clb_index].push_back(atom_blk_id);
-    }
-}
-
-std::vector<AtomBlockId> ClusterAtomsLookup::atoms_in_cluster(ClusterBlockId blk_id) {
-    std::vector<AtomBlockId> atoms = cluster_atoms[blk_id];
-    return atoms;
-}
diff --git a/vpr/src/base/clustered_netlist_utils.h b/vpr/src/base/clustered_netlist_utils.h
index 52688f88e47..84bc11998c5 100644
--- a/vpr/src/base/clustered_netlist_utils.h
+++ b/vpr/src/base/clustered_netlist_utils.h
@@ -26,23 +26,4 @@ class ClusteredPinAtomPinsLookup {
     vtr::vector<ClusterPinId, std::vector<AtomPinId>> clustered_pin_connected_atom_pins_;
     vtr::vector<AtomPinId, ClusterPinId> atom_pin_connected_cluster_pin_;
 };
-
-/*
- * This lookup is used to see which atoms are in each cluster block.
- * Getting the atoms inside of a cluster is an order k lookup.
- * The data is initialized automatically upon creation of the object.
- * The class should only be used after the clustered netlist is created.
- */
-class ClusterAtomsLookup {
-  public:
-    ClusterAtomsLookup();
-    std::vector<AtomBlockId> atoms_in_cluster(ClusterBlockId blk_id);
-
-  public:
-    void init_lookup();
-
-  private:
-    //Store the atom ids of the atoms inside each cluster
-    vtr::vector<ClusterBlockId, std::vector<AtomBlockId>> cluster_atoms;
-};
 #endif

From 1969e170c7a4418430b2aa851ccae1849e53934f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 19:16:36 -0400
Subject: [PATCH 060/188] add operator== to t_pl_atom_loc

---
 vpr/src/base/vpr_types.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 58dcc0df4a3..8f32b9e77af 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -732,7 +732,7 @@ struct t_pl_loc {
     }
 
     friend bool operator==(const t_pl_loc& lhs, const t_pl_loc& rhs) {
-        return std::tie(lhs.layer, lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.layer, rhs.x, rhs.y, rhs.sub_tile);
+        return std::tie(lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
     }
 
     friend bool operator!=(const t_pl_loc& lhs, const t_pl_loc& rhs) {
@@ -766,7 +766,11 @@ struct t_pl_atom_loc {
     int y = OPEN;
     int sub_tile = OPEN;
     int layer = OPEN;
-}
+
+    friend bool operator==(const t_pl_atom_loc& lhs, const t_pl_atom_loc& rhs) {
+        return std::tie(lhs.primitive_id, lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.primitive_id, rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
+    }
+};
 
 struct t_place_region {
     float capacity; ///<Capacity of this region, in tracks.

From d495243f5ab8a248f73d2e14ccece9a961c70834 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 15 Sep 2023 19:17:39 -0400
Subject: [PATCH 061/188] add a method to gridblock to get the atom id from
 atom loc

---
 vpr/src/base/vpr_types.cpp | 20 ++++++++++++++++++++
 vpr/src/base/vpr_types.h   |  2 ++
 2 files changed, 22 insertions(+)

diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index 32c463d0eea..7907ff58825 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -303,6 +303,26 @@ void t_cluster_placement_stats::free_primitives() {
     }
 }
 
+AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
+    t_pl_loc cluster_loc (loc.x, loc.y, loc.sub_tile, loc.layer);
+    ClusterBlockId cluster_at_loc = block_at_location(cluster_loc);
+    if (cluster_at_loc == ClusterBlockId::INVALID()) {
+        return AtomBlockId::INVALID();
+    } else {
+        const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup;
+        const auto& atom_list = cluster_atoms.at(cluster_at_loc);
+        for (const auto& atom : atom_list) {
+            int primitive_pin = atom_lookup.atom_pb_graph_node(atom)->primitive_num;
+            t_pl_atom_loc atom_loc(primitive_pin, cluster_loc.x, cluster_loc.y, cluster_loc.sub_tile, cluster_loc.layer);
+            if (atom_loc == loc) {
+                return atom;
+            }
+        }
+        return AtomBlockId::INVALID();
+    }
+}
+
 t_cluster_placement_primitive* t_cluster_placement_stats::get_cluster_placement_primitive_from_pb_graph_node(const t_pb_graph_node* pb_graph_node) {
     auto it = valid_primitives[pb_graph_node->cluster_placement_type_index].find(pb_graph_node->cluster_placement_primitive_index);
     if (it != valid_primitives[pb_graph_node->cluster_placement_type_index].end())
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 8f32b9e77af..c662bf85ebe 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -836,6 +836,8 @@ class GridBlock {
         return grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile];
     }
 
+    AtomBlockId block_at_location(const t_pl_atom_loc& loc) const;
+
     inline size_t num_blocks_at_location(const t_physical_tile_loc& loc) const {
         return grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.size();
     }

From 9b35c9aa674cf2e2c3850d1fdf65efc929a855a9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 16 Sep 2023 11:39:12 -0400
Subject: [PATCH 062/188] impl for find_affected_blocks for atom blocks

---
 vpr/src/place/move_utils.cpp | 29 +++++++++++++++++++++++++++++
 vpr/src/place/move_utils.h   |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 5a1e9f1cd60..f18b234bd8e 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -66,6 +66,35 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock
     }
 }
 
+e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc) {
+
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
+    e_block_move_result outcome = e_block_move_result::VALID;
+
+    ClusterBlockId from_cluster_block = atom_lookup.atom_clb(b_from);
+    VTR_ASSERT(from_cluster_block.is_valid());
+
+    //TODO: Currently, if the atom belong to a cluster that is a part of a macro, we don't move it
+    const auto& pl_macros = g_vpr_ctx.placement().pl_macros;
+    int imacro = OPEN;
+    get_imacro_from_iblk(&imacro, from_cluster_block, pl_macros);
+    if (imacro != OPEN) {
+        return e_block_move_result::ABORT;
+    } else {
+        const auto& grid_blocks = g_vpr_ctx.placement().grid_blocks;
+        AtomBlockId to_atom = grid_blocks.block_at_location(to_loc);
+        if (to_atom.is_valid()) {
+            ClusterBlockId to_cluster_block = atom_lookup.atom_clb(to_atom);
+            get_imacro_from_iblk(&imacro, to_cluster_block, pl_macros);
+            if (imacro != OPEN) {
+                return e_block_move_result::ABORT;
+            }
+        }
+        outcome = record_single_block_swap(atom_blocks_affected, b_from, to_loc);
+        return outcome;
+    }
+}
+
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
     /* Finds and set ups the affected_blocks array.
      * Returns abort_swap. */
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index acb2b0d95b0..8fc95c875be 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -92,6 +92,8 @@ void report_aborted_moves();
 
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
+e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);
+
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);

From a44efb73eb50ab000f55a087dbd8e30650c8a280 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 16 Sep 2023 17:01:29 -0400
Subject: [PATCH 063/188] add macros for EMPTY_PRIMITIVE_BLOCK_ID

---
 vpr/src/base/vpr_types.cpp | 9 ++++++---
 vpr/src/base/vpr_types.h   | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index 7907ff58825..b76ed193450 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -307,9 +307,12 @@ AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
     t_pl_loc cluster_loc (loc.x, loc.y, loc.sub_tile, loc.layer);
     ClusterBlockId cluster_at_loc = block_at_location(cluster_loc);
-    if (cluster_at_loc == ClusterBlockId::INVALID()) {
-        return AtomBlockId::INVALID();
+    if (cluster_at_loc == EMPTY_BLOCK_ID) {
+        return EMPTY_PRIMITIVE_BLOCK_ID;
+    } else if (cluster_at_loc == INVALID_BLOCK_ID) {
+        return INVALID_PRIMITIVE_BLOCK_ID;
     } else {
+        VTR_ASSERT(cluster_at_loc.is_valid());
         const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup;
         const auto& atom_list = cluster_atoms.at(cluster_at_loc);
         for (const auto& atom : atom_list) {
@@ -319,7 +322,7 @@ AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
                 return atom;
             }
         }
-        return AtomBlockId::INVALID();
+        return EMPTY_PRIMITIVE_BLOCK_ID;
     }
 }
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index c662bf85ebe..cda9a967ec0 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -91,7 +91,9 @@ enum class ScreenUpdatePriority {
 
 /* Defining macros for the placement_ctx t_grid_blocks. Assumes that ClusterBlockId's won't exceed positive 32-bit integers */
 constexpr auto EMPTY_BLOCK_ID = ClusterBlockId(-1);
+constexpr auto EMPTY_PRIMITIVE_BLOCK_ID = AtomBlockId(-1);
 constexpr auto INVALID_BLOCK_ID = ClusterBlockId(-2);
+constexpr auto INVALID_PRIMITIVE_BLOCK_ID = AtomBlockId(-2);
 
 /*
  * Files

From 3f70a1cfae0fdcb1e3497b37c1fdf7514e03d35c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 16 Sep 2023 17:26:02 -0400
Subject: [PATCH 064/188] add hashing function for t_pl_atom_loc

---
 vpr/src/base/vpr_types.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index cda9a967ec0..cd59c882ec6 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -774,6 +774,20 @@ struct t_pl_atom_loc {
     }
 };
 
+namespace std {
+template<>
+struct hash<t_pl_atom_loc> {
+    std::size_t operator()(const t_pl_atom_loc& v) const noexcept {
+        std::size_t seed = std::hash<int>{}(v.x);
+        vtr::hash_combine(seed, v.y);
+        vtr::hash_combine(seed, v.sub_tile);
+        vtr::hash_combine(seed, v.layer);
+        vtr::hash_combine(seed, v.primitive_id);
+        return seed;
+    }
+};
+} // namespace std
+
 struct t_place_region {
     float capacity; ///<Capacity of this region, in tracks.
     float inv_capacity;

From 5fe87facc6cf36bf783c639cd20214f920952d35 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 16 Sep 2023 17:44:02 -0400
Subject: [PATCH 065/188] write record_block_move for atom blocks

---
 vpr/src/place/move_transactions.cpp | 29 +++++++++++++++++++++++++++++
 vpr/src/place/move_transactions.h   |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 2c7d6dc180d..0a95879ec83 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -3,6 +3,35 @@
 #include "globals.h"
 #include "place_util.h"
 
+e_block_move_result record_block_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId blk, t_pl_atom_loc to) {
+    auto res = blocks_affected.moved_to.emplace(to);
+    if (!res.second) {
+        log_move_abort("duplicate block move to location");
+        return e_block_move_result::ABORT;
+    }
+
+    const auto& place_ctx = g_vpr_ctx.placement();
+
+    t_pl_atom_loc from = get_atom_loc(blk);
+
+    auto res2 = blocks_affected.moved_from.emplace(from);
+    if (!res2.second) {
+        log_move_abort("duplicate block move from location");
+        return e_block_move_result::ABORT;
+    }
+
+    VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer})));
+
+    // Sets up the blocks moved
+    int imoved_blk = blocks_affected.num_moved_blocks;
+    blocks_affected.moved_blocks[imoved_blk].block_num = blk;
+    blocks_affected.moved_blocks[imoved_blk].old_loc = from;
+    blocks_affected.moved_blocks[imoved_blk].new_loc = to;
+    blocks_affected.num_moved_blocks++;
+
+    return e_block_move_result::VALID;
+}
+
 //Records that block 'blk' should be moved to the specified 'to' location
 e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId blk, t_pl_loc to) {
     auto res = blocks_affected.moved_to.emplace(to);
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 4efec641405..d1b3b6f0363 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -65,6 +65,8 @@ enum class e_block_move_result {
     INVERT_VALID //Completed inverted move
 };
 
+e_block_move_result record_block_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId blk, t_pl_atom_loc to);
+
 e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId blk, t_pl_loc to);
 
 void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);

From dd0126039f4354c62c9dcc23b605183fde36329f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 16 Sep 2023 17:45:44 -0400
Subject: [PATCH 066/188] write a (temp) helper function to get the location of
 an atom

---
 vpr/src/place/place_util.cpp | 9 +++++++++
 vpr/src/place/place_util.h   | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 75ff2d2bf12..39e732a4f5f 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -547,3 +547,12 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 
     return (mac_can_be_placed);
 }
+
+t_pl_atom_loc get_atom_loc (AtomBlockId atom) {
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
+    ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom);
+    t_pl_loc cluster_loc = g_vpr_ctx.placement().block_locs[cluster_blk].loc;
+    int primitive_id = atom_lookup.atom_pb_graph_node(atom)->primitive_num;
+
+    return {primitive_id, cluster_loc.x, cluster_loc.y, cluster_loc.sub_tile, cluster_loc.layer};
+}
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index cc903cf4f71..08ba65f08f5 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -264,4 +264,8 @@ inline bool is_loc_on_chip(t_physical_tile_loc loc) {
  */
 bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_legality);
 
+//TODO: The atom loc should be stored in place_ctx -- I am creating this function because I didn't want to create another
+// Array in place_ctx.
+t_pl_atom_loc get_atom_loc (AtomBlockId atom);
+
 #endif

From 13998dc29af8d9f4c5101deb32baa340bd594a0a Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 18 Sep 2023 08:07:11 -0400
Subject: [PATCH 067/188] remove unused variable

---
 vpr/src/pack/re_cluster.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index f313874a88f..3cd99f31267 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -166,8 +166,7 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         int verbosity,
                         t_clustering_data& clustering_data) {
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
-
+    
     //define local variables
     PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2;
 

From ac3113618ea3388bae3fa0f8b28b777e66fcf425 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 18 Sep 2023 08:09:40 -0400
Subject: [PATCH 068/188] fix formatting

---
 vpr/src/pack/re_cluster_util.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index a76cfd95751..9f1fb106deb 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -111,7 +111,7 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
     for (auto atom_id : *clb_atoms) {
         add_atom_as_target(router_data, atom_id);
         const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id);
-        while(pb) {
+        while (pb) {
             set_reset_pb_modes(router_data, pb, true);
             pb = pb->parent_pb;
         }

From e5486f3edfb71b8b2efbdd73c33a82bb4264599b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 09:42:58 -0400
Subject: [PATCH 069/188] set a default number (-1) to pb_graph_node
 primitive_num

---
 libs/libarchfpga/src/physical_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index f1faa3cefed..029616d3953 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1232,7 +1232,7 @@ class t_pb_graph_node {
 
     int placement_index;
 
-    int primitive_num;
+    int primitive_num = OPEN;
     /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
      *
      * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to

From 93be443bb892f090cc0dc40b99f73c656f3360fc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 10:27:48 -0400
Subject: [PATCH 070/188] rename pb_graph_node_class_range under logical block
 to primitive_pb_graph_node_class_range

---
 libs/libarchfpga/src/physical_types.h        | 2 +-
 libs/libarchfpga/src/physical_types_util.cpp | 2 +-
 vpr/src/pack/pb_type_graph.cpp               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 029616d3953..5d4f9e422f2 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -926,7 +926,7 @@ struct t_logical_block_type {
     std::unordered_map<int, t_pb_graph_pin*> pin_logical_num_to_pb_pin_mapping;                   /* pin_logical_num_to_pb_pin_mapping[pin logical number] -> pb_graph_pin ptr} */
     std::unordered_map<const t_pb_graph_pin*, int> primitive_pb_pin_to_logical_class_num_mapping; /* primitive_pb_pin_to_logical_class_num_mapping[pb_graph_pin ptr] -> class logical number */
     std::vector<t_class> primitive_logical_class_inf;                                             /* primitive_logical_class_inf[class_logical_number] -> class */
-    std::unordered_map<const t_pb_graph_node*, t_class_range> pb_graph_node_class_range;
+    std::unordered_map<const t_pb_graph_node*, t_class_range> primitive_pb_graph_node_class_range;
 
     // Is this t_logical_block_type empty?
     bool is_empty() const;
diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index f6a7732ca8a..43a0fbc54da 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -965,7 +965,7 @@ t_class_range get_pb_graph_node_class_physical_range(t_physical_tile_type_ptr /*
                                                      const t_pb_graph_node* pb_graph_node) {
     VTR_ASSERT(pb_graph_node->is_primitive());
 
-    t_class_range class_range = logical_block->pb_graph_node_class_range.at(pb_graph_node);
+    t_class_range class_range = logical_block->primitive_pb_graph_node_class_range.at(pb_graph_node);
     int logical_block_class_offset = sub_tile->primitive_class_range[sub_tile_relative_cap].at(logical_block).low;
 
     class_range.low += logical_block_class_offset;
diff --git a/vpr/src/pack/pb_type_graph.cpp b/vpr/src/pack/pb_type_graph.cpp
index 473b651f7c7..03ad3b2fc03 100644
--- a/vpr/src/pack/pb_type_graph.cpp
+++ b/vpr/src/pack/pb_type_graph.cpp
@@ -529,7 +529,7 @@ static void add_primitive_logical_classes(t_logical_block_type* logical_block) {
             }
             num_added_classes += add_port_logical_classes(logical_block, pb_graph_pins, num_ports, num_pins);
         }
-        logical_block->pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
+        logical_block->primitive_pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
                                                                                                     first_class_num + num_added_classes - 1)));
     }
 }

From 8a13c7c13d2d2209bda371b36858a02c65f37189 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 10:32:03 -0400
Subject: [PATCH 071/188] add is_atom_compatible to physical types util

---
 libs/libarchfpga/src/physical_types_util.cpp | 19 +++++++++++++++++++
 libs/libarchfpga/src/physical_types_util.h   |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index 43a0fbc54da..da3a6ccc577 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -524,6 +524,25 @@ bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_bl
     return capacity_compatible && is_tile_compatible(physical_tile, logical_block);
 }
 
+bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num) {
+    VTR_ASSERT(loc_primitive_num != OPEN);
+    const t_pb_graph_node* loc_pb_graph_node = nullptr;
+    for (const auto& primiive_node_class_pair : logical_block->primitive_pb_graph_node_class_range) {
+        const auto& primitive_node = primiive_node_class_pair.first;
+        VTR_ASSERT(primitive_node->primitive_num != OPEN);
+        if (primitive_node->primitive_num == loc_primitive_num) {
+            loc_pb_graph_node = primitive_node;
+            break;
+        }
+    }
+    VTR_ASSERT(loc_pb_graph_node != nullptr);
+    if (loc_pb_graph_node->pb_type == atom_pb_graph_node->pb_type)
+        return true;
+    else
+        return false;
+
+}
+
 int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
                                           t_logical_block_type_ptr logical_block,
                                           int sub_tile_capacity,
diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index e27ba096b54..30cadec5962 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -187,6 +187,8 @@ bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_
 ///@brief Verifies whether a logical block and a relative placement location is compatible with a given physical tile
 bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc);
 
+bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num);
+
 /**
  * @brief Returns the first physical tile type that matches the logical block
  *

From 99076337042852fe4178ac66a0af40408863f2c6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 10:41:53 -0400
Subject: [PATCH 072/188] add place_re_cluster option

---
 vpr/src/base/SetupVPR.cpp     | 1 +
 vpr/src/base/read_options.cpp | 8 ++++++++
 vpr/src/base/read_options.h   | 1 +
 3 files changed, 10 insertions(+)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 0eb76f86b70..dfc366e1cd0 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -676,6 +676,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->place_constraint_subtile = Options.place_constraint_subtile;
     PlacerOpts->floorplan_num_horizontal_partitions = Options.floorplan_num_horizontal_partitions;
     PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions;
+    PlacerOpts->place_re_cluster = Options.place_re_cluster;
 
     PlacerOpts->seed = Options.Seed;
 }
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 5cc0c2d47e1..9e6a21f6138 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2128,6 +2128,14 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .choices({"move_type", "move_block_type"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_grp.add_argument<bool, ParseOnOff>(args.place_re_cluster, "--place_re_cluster")
+        .help(
+            "Use this option to determine whether reclustering occurs during placement. """
+            "When this option is set to 'on,' the placement stage may result in changes to the clustering of certain clusters. "
+            "Conversely, if the option is set to 'off,' the clustering determined by the packer will remain unchanged")
+        .default_value("off")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     auto& place_timing_grp = parser.add_argument_group("timing-driven placement options");
 
     place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 98f9e084846..7a73d12b4d3 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -143,6 +143,7 @@ struct t_options {
     argparse::ArgValue<bool> place_constraint_subtile;
     argparse::ArgValue<int> floorplan_num_horizontal_partitions;
     argparse::ArgValue<int> floorplan_num_vertical_partitions;
+    argparse::ArgValue<bool> place_re_cluster;
 
     /*NoC Options*/
     argparse::ArgValue<bool> noc;

From b8b9ec179c84b5f16997759f421e5f2c501a49a7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 10:44:38 -0400
Subject: [PATCH 073/188] do reclustring if the option is set

---
 vpr/src/place/place.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 06714cf1ea4..48f53aeffb9 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -946,7 +946,9 @@ void try_place(const Netlist<>& net_list,
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
-    place_re_cluster.re_cluster();
+    if (placer_opts.place_re_cluster) {
+        place_re_cluster.re_cluster();
+    }
 
     //Final timing analysis
     PlaceCritParams crit_params;

From 48f77c80edeff0d1d5c4534d4bd5873a47e9a525 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 10:46:24 -0400
Subject: [PATCH 074/188] remained from adding the new Cl arg

---
 vpr/src/base/vpr_types.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index cd59c882ec6..f0c4c38bf2a 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1240,6 +1240,8 @@ struct t_placer_opts {
     int floorplan_num_horizontal_partitions;
     int floorplan_num_vertical_partitions;
 
+    bool place_re_cluster;
+
     /**
      * @brief Tile types that should be used during delay sampling.
      *

From 0c00cf1cdb3682562bbc59cce8bb16646dffdf5a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 18 Sep 2023 11:05:16 -0400
Subject: [PATCH 075/188] impl is_legal_swap for atoms

---
 vpr/src/place/move_utils.cpp | 39 ++++++++++++++++++++++++++++++++++++
 vpr/src/place/move_utils.h   |  2 ++
 2 files changed, 41 insertions(+)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index f18b234bd8e..c70c23b868a 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -469,6 +469,45 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe
     return outcome;
 }
 
+bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to) {
+    //Make sure that the swap_to location is valid
+    //It must be:
+    // * on chip, and
+    // * match the correct block type
+    //
+    //Note that we need to explicitly check that the types match, since the device floorplan is not
+    //(neccessarily) translationally invariant for an arbitrary macro
+
+    const auto& atom_pb = g_vpr_ctx.atom().lookup.atom_pb(blk);
+
+    ClusterBlockId cluster_block = g_vpr_ctx.placement().grid_blocks.block_at_location({to.x, to.y, to.sub_tile, to.layer});
+    t_pl_loc cluster_loc (to.x, to.y, to.sub_tile, to.layer);
+
+    if (!is_legal_swap_to_location(cluster_block, cluster_loc)) {
+        return false;
+    }
+
+    std::vector<t_logical_block_type_ptr> logical_blocks;
+
+    if (cluster_block.is_valid() && cluster_block != INVALID_BLOCK_ID) {
+        const auto& cluster_ctx = g_vpr_ctx.clustering();
+        auto logical_block = cluster_ctx.clb_nlist.block_type(cluster_block);
+        logical_blocks.push_back(logical_block);
+    } else if (cluster_block == EMPTY_BLOCK_ID) {
+        const auto& physical_tile = g_vpr_ctx.device().grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer));
+        const auto& sub_tile = physical_tile->sub_tiles[to.sub_tile];
+        logical_blocks = sub_tile.equivalent_sites;
+    }
+
+    for (const auto& logical_block : logical_blocks) {
+        if (is_atom_compatible(logical_block, atom_pb->pb_graph_node, to.primitive_id)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
     //Make sure that the swap_to location is valid
     //It must be:
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 8fc95c875be..94f178339c7 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -108,6 +108,8 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected,
 e_block_move_result identify_macro_self_swap_affected_macros(std::vector<int>& macros, const int imacro, t_pl_offset swap_offset);
 e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro, t_pl_offset swap_offset);
 
+bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to);
+
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to);
 
 std::set<t_pl_loc> determine_locations_emptied_by_move(t_pl_blocks_to_be_moved& blocks_affected);

From ea24fbc63e9696e782867482b54e51da1e92334e Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 18 Sep 2023 12:43:26 -0400
Subject: [PATCH 076/188] remove unused parameters and fix formatting

---
 vpr/src/pack/re_cluster.cpp      | 16 ++++++++--------
 vpr/src/pack/re_cluster_util.cpp |  3 +--
 vpr/src/pack/re_cluster_util.h   |  3 +--
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 3cd99f31267..fb67f0b37b4 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -136,7 +136,7 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
 
     //Add the atom to the new cluster
     t_lb_router_data* new_router_data = nullptr;
-    is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, false, clustering_data, new_router_data);
+    is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, clustering_data, new_router_data);
 
     //Commit or revert the move
     if (is_added) {
@@ -166,7 +166,7 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         int verbosity,
                         t_clustering_data& clustering_data) {
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
-    
+
     //define local variables
     PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2;
 
@@ -216,10 +216,10 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     commit_mol_removal(molecule_2, molecule_2_size, clb_2, during_packing, old_2_router_data, clustering_data);
 
     //Add the atom to the new cluster
-    mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data);
+    mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data);
     if (!mol_1_success) {
-        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data);
-        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data);
+        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data);
+        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data);
 
         VTR_ASSERT(mol_1_success && mol_2_success);
         free_router_data(old_1_router_data);
@@ -235,12 +235,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         return false;
     }
 
-    mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data);
+    mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data);
     if (!mol_2_success) {
         remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
-        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, true, clustering_data, old_1_router_data);
-        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, true, clustering_data, old_2_router_data);
+        mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data);
+        mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data);
 
         VTR_ASSERT(mol_1_success && mol_2_success);
         free_router_data(old_1_router_data);
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 9f1fb106deb..2cdbf8dab53 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -207,10 +207,9 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
 
 bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
                                   int molecule_size,
-                                  const ClusterBlockId new_clb,
+                                  const ClusterBlockId& new_clb,
                                   std::unordered_set<AtomBlockId>* new_clb_atoms,
                                   bool during_packing,
-                                  bool is_swap,
                                   t_clustering_data& clustering_data,
                                   t_lb_router_data*& router_data) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index d57eaa5f8db..e5bf3f89096 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -102,10 +102,9 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
  */
 bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
                                   int molecule_size,
-                                  const ClusterBlockId& clb_index,
+                                  const ClusterBlockId& new_clb,
                                   std::unordered_set<AtomBlockId>* new_clb_atoms,
                                   bool during_packing,
-                                  bool is_swap,
                                   t_clustering_data& clustering_data,
                                   t_lb_router_data*& router_data);
 

From 176c913f7fa0d2d3956b40ac2aa03962b167a4cf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 19 Sep 2023 17:11:50 -0400
Subject: [PATCH 077/188] implement record_single_block_swap for atom moves

---
 vpr/src/place/move_utils.cpp | 43 ++++++++++++++++++++++++++++++++++--
 vpr/src/place/move_utils.h   |  2 ++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index c70c23b868a..1edffca9bb8 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -90,9 +90,10 @@ e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_bloc
                 return e_block_move_result::ABORT;
             }
         }
-        outcome = record_single_block_swap(atom_blocks_affected, b_from, to_loc);
-        return outcome;
     }
+
+    outcome = record_single_block_swap(atom_blocks_affected, b_from, to_loc);
+    return outcome;
 }
 
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
@@ -142,6 +143,44 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte
     return outcome;
 }
 
+e_block_move_result record_single_block_swap(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc) {
+
+    VTR_ASSERT(b_from);
+    ClusterBlockId cluster_b_from = g_vpr_ctx.atom().lookup.atom_clb(b_from);
+
+    const auto& place_ctx = g_vpr_ctx.placement();
+
+    if (place_ctx.block_locs[cluster_b_from].is_fixed) {
+        return e_block_move_result::ABORT;
+    }
+
+    VTR_ASSERT_SAFE(to_loc.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to_loc.x, to_loc.y, to_loc.layer})));
+
+    e_block_move_result outcome = e_block_move_result::ABORT;
+
+    AtomBlockId b_to = place_ctx.grid_blocks.block_at_location(to_loc);
+
+    if (b_to == EMPTY_PRIMITIVE_BLOCK_ID) {
+        outcome = record_block_move(blocks_affected, b_from, to_loc);
+    } else if (b_to != INVALID_PRIMITIVE_BLOCK_ID) {
+        ClusterBlockId cluster_b_to = g_vpr_ctx.atom().lookup.atom_clb(b_to);
+        if (!(is_legal_swap_to_location(b_to, to_loc)) || place_ctx.block_locs[cluster_b_to].is_fixed) {
+            return e_block_move_result::ABORT;
+        }
+
+        outcome = record_block_move(blocks_affected, b_from, to_loc);
+
+        if (outcome != e_block_move_result::VALID) {
+            return outcome;
+        }
+
+        t_pl_atom_loc from_atom_loc = get_atom_loc(b_from);
+        outcome = record_block_move(blocks_affected, b_to, from_atom_loc);
+    }
+
+    return outcome;
+}
+
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
     /* Find all the blocks affected when b_from is swapped with b_to.
      * Returns abort_swap.                  */
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 94f178339c7..b7289c0029b 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -96,6 +96,8 @@ e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_bloc
 
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
+e_block_move_result record_single_block_swap(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);
+
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
 e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro_from, int& imember_from, t_pl_offset swap_offset);

From 8c11b1bd4c451fcc0261f2568307a787a7c7c9a4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 19 Sep 2023 17:24:23 -0400
Subject: [PATCH 078/188] impl create_move for placement reclustering

---
 vpr/src/place/move_utils.cpp | 13 +++++++++++++
 vpr/src/place/move_utils.h   |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 1edffca9bb8..bcbed9a17ba 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -33,6 +33,19 @@ void report_aborted_moves() {
     }
 }
 
+e_create_move create_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to) {
+    e_block_move_result outcome = find_affected_blocks(blocks_affected, b_from, to);
+    // Currently, for re-clustering during placement, we don't support INVERT
+    VTR_ASSERT(outcome != e_block_move_result::INVERT || outcome != e_block_move_result::INVERT_VALID);
+
+    if (outcome == e_block_move_result::VALID) {
+        return e_create_move::VALID;
+    } else {
+        VTR_ASSERT(outcome == e_block_move_result::ABORT);
+        return e_create_move::ABORT;
+    }
+}
+
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
     e_block_move_result outcome = find_affected_blocks(blocks_affected, b_from, to);
 
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index b7289c0029b..9faf7f62a15 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -90,6 +90,8 @@ void log_move_abort(const std::string& reason);
 //Prints a breif report about aborted move reasons and counts
 void report_aborted_moves();
 
+e_create_move create_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to);
+
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
 e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);

From a0880e8bd8b6c486b936d4014cf4adcc9293c9c3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 19 Sep 2023 17:44:13 -0400
Subject: [PATCH 079/188] floorplan_legal for atom block

---
 vpr/src/place/place_constraints.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h
index 5fbc481fd2b..ffec92923ed 100644
--- a/vpr/src/place/place_constraints.h
+++ b/vpr/src/place/place_constraints.h
@@ -65,6 +65,25 @@ void propagate_place_constraints();
 
 void print_macro_constraint_error(const t_pl_macro& pl_macro);
 
+inline bool floorplan_legal(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
+    bool floorplan_legal;
+
+    for (int i = 0; i < blocks_affected.num_moved_blocks; i++) {
+        AtomBlockId mv_atom_blk = blocks_affected.moved_blocks[i].block_num;
+        ClusterBlockId cluster_blk = g_vpr_ctx.atom().lookup.atom_clb(mv_atom_blk);
+        const t_pl_atom_loc& to_pl_atom_loc  = blocks_affected.moved_blocks[i].new_loc;
+        t_pl_loc to_pl_loc = {to_pl_atom_loc.x, to_pl_atom_loc.y, to_pl_atom_loc.sub_tile, to_pl_atom_loc.layer};
+        floorplan_legal = cluster_floorplanning_legal(cluster_blk, to_pl_loc);
+        if (!floorplan_legal) {
+#    ifdef VERBOSE
+            VTR_LOG("Move aborted for block %zu, location tried was x: %d, y: %d, subtile: %d \n", size_t(blocks_affected.moved_blocks[i].block_num), blocks_affected.moved_blocks[i].new_loc.x, blocks_affected.moved_blocks[i].new_loc.y, blocks_affected.moved_blocks[i].new_loc.sub_tile);
+#    endif
+            return false;
+        }
+    }
+    return true;
+}
+
 inline bool floorplan_legal(const t_pl_blocks_to_be_moved& blocks_affected) {
     bool floorplan_legal;
 

From ccbf5a3cbaa9656770887922c626cf0c3da51a28 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 13:38:47 -0400
Subject: [PATCH 080/188] impl: cluster_pins_connected_to_atom_pin

---
 vpr/src/util/vpr_utils.cpp | 42 +++++++++++++++++++++++++++++++++++++-
 vpr/src/util/vpr_utils.h   |  3 +++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 6157c9b980d..fed27deb15d 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -439,7 +439,7 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_
 }
 
 /* Return the net pin which drive the CLB input connected to sink_pb_pin_id, or nullptr if none (i.e. driven internally)
- *   clb: Block in which the the sink pin is located on
+ *   clb: Block in which the sink pin is located on
  *   sink_pb_pin_id: The physical pin index of the sink pin on the block
  *
  *  Returns a tuple containing
@@ -501,6 +501,46 @@ std::tuple<ClusterNetId, int, int> find_pb_route_clb_input_net_pin(ClusterBlockI
     return std::tuple<ClusterNetId, int, int>(clb_net_idx, curr_pb_pin_id, clb_net_pin_idx);
 }
 
+std::vector<ClusterPinId> cluster_pins_connected_to_atom_pin(AtomPinId atom_pin) {
+    std::vector<ClusterPinId> cluster_pins;
+    const auto& atom_net_list = g_vpr_ctx.atom().nlist;
+    const auto& atom_look_up = g_vpr_ctx.atom().lookup;
+    const auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist;
+    AtomBlockId atom_block_id = atom_net_list.pin_block(atom_pin);
+    AtomNetId atom_net_id = atom_net_list.pin_net(atom_pin);
+    ClusterNetId cluster_net_id = atom_look_up.clb_net(atom_net_id);
+    ClusterBlockId cluster_block_id = atom_look_up.atom_clb(atom_block_id);
+    if (cluster_net_id == ClusterNetId::INVALID()) {
+        return cluster_pins;
+    }
+
+    const auto& atom_pb_graph_pin = g_vpr_ctx.atom().lookup.atom_pin_pb_graph_pin(atom_pin);
+    int atom_pb_pin_id = atom_pb_graph_pin->pin_count_in_cluster;
+    std::vector<int> cluster_pb_pin_id;
+    if (atom_pb_graph_pin->port->type == PORTS::IN_PORT) {
+        int cluster_pin_id;
+        int cluster_net_pin_id;
+        std::tie(cluster_net_id, cluster_pin_id, cluster_net_pin_id) =
+            find_pb_route_clb_input_net_pin(cluster_block_id, atom_pb_pin_id);
+        if (cluster_net_id != ClusterNetId::INVALID()) {
+            VTR_ASSERT(cluster_pin_id != -1 && cluster_net_pin_id != -1);
+            cluster_pins.push_back(cluster_net_list.net_pin(cluster_net_id, cluster_net_pin_id));
+        }
+    } else {
+        VTR_ASSERT(atom_pb_graph_pin->port->type == PORTS::OUT_PORT);
+        std::vector<int> connected_sink_pb_pins;
+        connected_sink_pb_pins = find_connected_internal_clb_sink_pins(cluster_block_id, atom_pb_pin_id);
+        for (int sink_pb_pin : connected_sink_pb_pins) {
+            int net_pin_idx = cluster_net_list.block_pin_net_index(cluster_block_id, sink_pb_pin);
+            if (net_pin_idx != OPEN) {
+                cluster_pins.push_back(cluster_net_list.net_pin(cluster_net_id, net_pin_idx));
+            }
+        }
+    }
+
+    return cluster_pins;
+}
+
 bool is_clb_external_pin(ClusterBlockId blk_id, int pb_pin_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 1ba3dcb35b7..5d7eb1767b5 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -152,6 +152,9 @@ std::vector<AtomPinId> find_clb_pin_sink_atom_pins(ClusterBlockId clb, int logic
 
 std::tuple<ClusterNetId, int, int> find_pb_route_clb_input_net_pin(ClusterBlockId clb, int sink_pb_route_id);
 
+// Return the cluster pins connected to the atom pin
+std::vector<ClusterPinId> cluster_pins_connected_to_atom_pin(AtomPinId atom_pin);
+
 //Returns the port matching name within pb_gnode
 const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string port_name);
 

From 27f54f466c5ac645cfb13a83fdfeec763007a6ff Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 17:06:41 -0400
Subject: [PATCH 081/188] impl a function to update td and bb costs when a pin
 of a net is moved

---
 vpr/src/place/place.cpp | 73 ++++++++++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 48f53aeffb9..61ba9ad9b56 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -340,6 +340,16 @@ static int find_affected_nets_and_update_costs(
     double& bb_delta_c,
     double& timing_delta_c);
 
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                        const PlaceDelayModel* delay_model,
+                                        const PlacerCriticalities& criticalities,
+                                        const ClusterNetId& net_id,
+                                        const ClusterPinId& pin_id,
+                                        const int affected_blk_id,
+                                        t_pl_blocks_to_be_moved& blocks_affected,
+                                        double& timing_delta_c,
+                                        int& num_affected_nets);
+
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
 static void update_net_bb(const ClusterNetId net,
@@ -347,6 +357,7 @@ static void update_net_bb(const ClusterNetId net,
                           int iblk,
                           const ClusterBlockId blk,
                           const ClusterPinId blk_pin);
+
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
@@ -1778,25 +1789,15 @@ static int find_affected_nets_and_update_costs(
         /* Go through all the pins in the moved block. */
         for (ClusterPinId blk_pin : cluster_ctx.clb_nlist.block_pins(blk)) {
             ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(blk_pin);
-            VTR_ASSERT_SAFE_MSG(net_id,
-                                "Only valid nets should be found in compressed netlist block pins");
-
-            if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
-                //TODO: Do we require anyting special here for global nets?
-                //"Global nets are assumed to span the whole chip, and do not effect costs."
-                continue;
-
-            /* Record effected nets */
-            record_affected_net(net_id, num_affected_nets);
-
-            /* Update the net bounding boxes. */
-            update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
-
-            if (place_algorithm.is_timing_driven()) {
-                /* Determine the change in connection delay and timing cost. */
-                update_td_delta_costs(delay_model, *criticalities, net_id,
-                                      blk_pin, blocks_affected, timing_delta_c);
-            }
+            update_net_info_on_pin_move(place_algorithm,
+                                        delay_model,
+                                        criticalities,
+                                        net_id,
+                                        blk_pin,
+                                        iblk,
+                                        blocks_affected,
+                                        timing_delta_c,
+                                        num_affected_nets);
         }
     }
 
@@ -1814,6 +1815,40 @@ static int find_affected_nets_and_update_costs(
     return num_affected_nets;
 }
 
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                        const PlaceDelayModel* delay_model,
+                                        const PlacerCriticalities& criticalities,
+                                        const ClusterNetId& net_id,
+                                        const ClusterPinId& pin_id,
+                                        const int affected_blk_id,
+                                        t_pl_blocks_to_be_moved& blocks_affected,
+                                        double& timing_delta_c,
+                                        int& num_affected_nets) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    VTR_ASSERT_SAFE_MSG(net_id,
+                        "Only valid nets should be found in compressed netlist block pins");
+
+    if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+        //TODO: Do we require anyting special here for global nets?
+        //"Global nets are assumed to span the whole chip, and do not effect costs."
+        return;
+    }
+
+    ClusterBlockId blk_id = blocks_affected.moved_blocks[affected_blk_id].block_num;
+
+    /* Record effected nets */
+    record_affected_net(net_id, num_affected_nets);
+
+    /* Update the net bounding boxes. */
+    update_net_bb(net_id, blocks_affected, affected_blk_id, blk_id, pin_id);
+
+    if (place_algorithm.is_timing_driven()) {
+        /* Determine the change in connection delay and timing cost. */
+        update_td_delta_costs(delay_model, criticalities, net_id,
+                              pin_id, blocks_affected, timing_delta_c);
+    }
+}
+
 ///@brief Record effected nets.
 static void record_affected_net(const ClusterNetId net,
                                 int& num_affected_nets) {

From d54036a51288e9d83deefcac6df164e48d86de4e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 17:10:33 -0400
Subject: [PATCH 082/188] fix a bug with passsing criticalities by pointer

---
 vpr/src/place/place.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 61ba9ad9b56..6eb4d67caa0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -342,7 +342,7 @@ static int find_affected_nets_and_update_costs(
 
 static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                         const PlaceDelayModel* delay_model,
-                                        const PlacerCriticalities& criticalities,
+                                        const PlacerCriticalities* criticalities,
                                         const ClusterNetId& net_id,
                                         const ClusterPinId& pin_id,
                                         const int affected_blk_id,
@@ -1817,7 +1817,7 @@ static int find_affected_nets_and_update_costs(
 
 static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                         const PlaceDelayModel* delay_model,
-                                        const PlacerCriticalities& criticalities,
+                                        const PlacerCriticalities* criticalities,
                                         const ClusterNetId& net_id,
                                         const ClusterPinId& pin_id,
                                         const int affected_blk_id,
@@ -1844,7 +1844,7 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
 
     if (place_algorithm.is_timing_driven()) {
         /* Determine the change in connection delay and timing cost. */
-        update_td_delta_costs(delay_model, criticalities, net_id,
+        update_td_delta_costs(delay_model, *criticalities, net_id,
                               pin_id, blocks_affected, timing_delta_c);
     }
 }

From 0fdc7e08e4c6c44ac34dd4dc2919e69764afed34 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 18:39:57 -0400
Subject: [PATCH 083/188] impl apply_move_blocks for atom blocks affected

---
 vpr/src/place/move_transactions.cpp | 15 +++++++++++++++
 vpr/src/place/move_transactions.h   |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 0a95879ec83..9f2b1f5eaa9 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -62,6 +62,21 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
     return e_block_move_result::VALID;
 }
 
+//Moves the blocks in blocks_affected to their new locations
+void apply_move_blocks(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
+    std::set<ClusterBlockId> seen_clusters;
+    for (int blk_idx = 0; blk_idx < blocks_affected.num_moved_blocks; blk_idx++) {
+        AtomBlockId atom_blk = blocks_affected.moved_blocks[blk_idx].block_num;
+        ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom_blk);
+        if (seen_clusters.find(cluster_blk) == seen_clusters.end()) {
+            seen_clusters.insert(cluster_blk);
+            place_sync_external_block_connections(cluster_blk);
+        }
+
+    }
+}
+
 //Moves the blocks in blocks_affected to their new locations
 void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index d1b3b6f0363..23e13849dd8 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -69,6 +69,8 @@ e_block_move_result record_block_move(t_pl_atom_blocks_to_be_moved& blocks_affec
 
 e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId blk, t_pl_loc to);
 
+void apply_move_blocks(const t_pl_atom_blocks_to_be_moved& blocks_affected);
+
 void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);
 
 void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);

From 6a5430a1c6b1fb98494dc29eacc8b1e61415da76 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 18:46:33 -0400
Subject: [PATCH 084/188] impl revert_move_blocks for atom block move

---
 vpr/src/place/move_transactions.cpp | 13 +++++++++++++
 vpr/src/place/move_transactions.h   |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 9f2b1f5eaa9..8ef9ec29ad5 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -130,6 +130,19 @@ void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
     } // Finish updating clb for all blocks
 }
 
+void revert_move_blocks(t_pl_atom_blocks_to_be_moved& blocks_affected) {
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
+    std::set<ClusterBlockId> seen_clusters;
+    for (int blk_idx = 0; blk_idx < blocks_affected.num_moved_blocks; blk_idx++) {
+        AtomBlockId atom_blk = blocks_affected.moved_blocks[blk_idx].block_num;
+        ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom_blk);
+        if (seen_clusters.find(cluster_blk) == seen_clusters.end()) {
+            seen_clusters.insert(cluster_blk);
+            place_sync_external_block_connections(cluster_blk);
+        }
+    }
+}
+
 //Moves the blocks in blocks_affected to their old locations
 void revert_move_blocks(t_pl_blocks_to_be_moved& blocks_affected) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 23e13849dd8..8359a510619 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -75,6 +75,8 @@ void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);
 
 void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);
 
+void revert_move_blocks(t_pl_atom_blocks_to_be_moved& blocks_affected);
+
 void revert_move_blocks(t_pl_blocks_to_be_moved& blocks_affected);
 
 void clear_move_blocks(t_pl_blocks_to_be_moved& blocks_affected);

From 6e41db822d106093cbf206bdbeb50dca5641aba8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 19:20:36 -0400
Subject: [PATCH 085/188] pass move blocks to driven_by_moved_block instead of
 the whole data structures

---
 vpr/src/place/place.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6eb4d67caa0..ccde0ddb9fc 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -322,7 +322,7 @@ static void invalidate_affected_connections(
     TimingInfo* timing_info);
 
 static bool driven_by_moved_block(const ClusterNetId net,
-                                  const t_pl_blocks_to_be_moved& blocks_affected);
+                                  const std::vector<t_pl_moved_block>& moved_blocks);
 
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 
@@ -2225,13 +2225,13 @@ static void invalidate_affected_connections(
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const ClusterNetId net,
-                                  const t_pl_blocks_to_be_moved& blocks_affected) {
+                                  const std::vector<t_pl_moved_block>& moved_blocks) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     ClusterBlockId net_driver_block = cluster_ctx.clb_nlist.net_driver_block(
         net);
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        if (net_driver_block == blocks_affected.moved_blocks[iblk].block_num) {
+    for (const auto& move_blk : moved_blocks) {
+        if (net_driver_block == move_blk.block_num) {
             return true;
         }
     }

From 04bd8a363ef0728cfc0bb332af1d3b24517b622c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 19:25:34 -0400
Subject: [PATCH 086/188] use any_of instead of for loop

---
 vpr/src/place/place.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index ccde0ddb9fc..cee75622d53 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2227,15 +2227,15 @@ static void invalidate_affected_connections(
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const std::vector<t_pl_moved_block>& moved_blocks) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
-
+    bool is_driven_by_move_blk;
     ClusterBlockId net_driver_block = cluster_ctx.clb_nlist.net_driver_block(
         net);
-    for (const auto& move_blk : moved_blocks) {
-        if (net_driver_block == move_blk.block_num) {
-            return true;
-        }
-    }
-    return false;
+
+    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
+        return net_driver_block == move_blk.block_num;
+    });
+
+    return is_driven_by_move_blk;
 }
 
 /* Finds the cost from scratch.  Done only when the placement   *

From 9a0c16ab112076ece59b950df938bdf2b0d4748b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 19:30:44 -0400
Subject: [PATCH 087/188] pass moved_blocks and affected_pins to
 update_td_delta_costs

---
 vpr/src/place/place.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index cee75622d53..7bc25fd2538 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -362,6 +362,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
                                   const ClusterPinId pin,
+                                  const std::vector<t_pl_moved_block>& moved_blocks,
                                   t_pl_blocks_to_be_moved& blocks_affected,
                                   double& delta_timing_cost);
 
@@ -1934,7 +1935,8 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
                                   const ClusterPinId pin,
-                                  t_pl_blocks_to_be_moved& blocks_affected,
+                                  const std::vector<t_pl_moved_block>& moved_blocks,
+                                  std::vector<ClusterPinId>& affected_pins,
                                   double& delta_timing_cost) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
@@ -1964,14 +1966,14 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
 
             /* Record this connection in blocks_affected.affected_pins */
             ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin);
-            blocks_affected.affected_pins.push_back(sink_pin);
+            affected_pins.push_back(sink_pin);
         }
     } else {
         /* This pin is a net sink on a moved block */
         VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
 
         /* Check if this sink's net is driven by a moved block */
-        if (!driven_by_moved_block(net, blocks_affected)) {
+        if (!driven_by_moved_block(net, moved_blocks)) {
             /* Get the sink pin index in the net */
             int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
 
@@ -1990,7 +1992,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                  - connection_timing_cost[net][ipin];
 
             /* Record this connection in blocks_affected.affected_pins */
-            blocks_affected.affected_pins.push_back(pin);
+            affected_pins.push_back(pin);
         }
     }
 }

From 191130189911f4d08dc0fab939117b8b1a9ff9ae Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 11:21:44 -0400
Subject: [PATCH 088/188] pass t_pl_moved_block to update_net_bb instead of all
 t_pl_blocks_to_be_moved

---
 vpr/src/place/place.cpp | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 7bc25fd2538..6c74e432569 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -352,11 +352,10 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
 
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
-static void update_net_bb(const ClusterNetId net,
-                          const t_pl_blocks_to_be_moved& blocks_affected,
-                          int iblk,
-                          const ClusterBlockId blk,
-                          const ClusterPinId blk_pin);
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block);
 
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
@@ -1835,13 +1834,11 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
         return;
     }
 
-    ClusterBlockId blk_id = blocks_affected.moved_blocks[affected_blk_id].block_num;
-
     /* Record effected nets */
     record_affected_net(net_id, num_affected_nets);
 
     /* Update the net bounding boxes. */
-    update_net_bb(net_id, blocks_affected, affected_blk_id, blk_id, pin_id);
+    update_net_bb(net_id, blk_id, pin_id, pl_moved_block);
 
     if (place_algorithm.is_timing_driven()) {
         /* Determine the change in connection delay and timing cost. */
@@ -1870,11 +1867,10 @@ static void record_affected_net(const ClusterNetId net,
  * Do not update the net cost here since it should only
  * be updated once per net, not once per pin.
  */
-static void update_net_bb(const ClusterNetId net,
-                          const t_pl_blocks_to_be_moved& blocks_affected,
-                          int iblk,
-                          const ClusterBlockId blk,
-                          const ClusterPinId blk_pin) {
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
@@ -1893,11 +1889,11 @@ static void update_net_bb(const ClusterNetId net,
 
         //Incremental bounding box update
         update_bb(net, &ts_bb_coord_new[net], &ts_bb_edge_new[net],
-                  blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
-                  blocks_affected.moved_blocks[iblk].old_loc.y
+                  pl_moved_block.old_loc.x + pin_width_offset,
+                  pl_moved_block.old_loc.y
                       + pin_height_offset,
-                  blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
-                  blocks_affected.moved_blocks[iblk].new_loc.y
+                  pl_moved_block.new_loc.x + pin_width_offset,
+                  pl_moved_block.new_loc.y
                       + pin_height_offset);
     }
 }

From aee362da52f26e78eef70a42ca373bb013d57c8a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 12:34:10 -0400
Subject: [PATCH 089/188] change the update_net_info_on_pin_move to accept
 related parameters

---
 vpr/src/place/place.cpp | 86 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 76 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6c74e432569..0109f6a8929 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -332,6 +332,14 @@ static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
 
 static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew);
 
+static int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_atom_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c);
+
 static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -344,9 +352,11 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                                         const PlaceDelayModel* delay_model,
                                         const PlacerCriticalities* criticalities,
                                         const ClusterNetId& net_id,
+                                        const ClusterBlockId& blk_id,
                                         const ClusterPinId& pin_id,
-                                        const int affected_blk_id,
-                                        t_pl_blocks_to_be_moved& blocks_affected,
+                                        const std::vector<t_pl_moved_block>& moved_blocks,
+                                        const int moving_block_idx,
+                                        std::vector<ClusterPinId>& affected_pins,
                                         double& timing_delta_c,
                                         int& num_affected_nets);
 
@@ -362,7 +372,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const ClusterNetId net,
                                   const ClusterPinId pin,
                                   const std::vector<t_pl_moved_block>& moved_blocks,
-                                  t_pl_blocks_to_be_moved& blocks_affected,
+                                  std::vector<ClusterPinId>& affected_pins,
                                   double& delta_timing_cost);
 
 static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
@@ -958,7 +968,8 @@ void try_place(const Netlist<>& net_list,
     auto post_quench_timing_stats = timing_ctx.stats;
 
     if (placer_opts.place_re_cluster) {
-        place_re_cluster.re_cluster();
+        place_re_cluster.re_cluster(place_delay_model.get(),
+                                    placer_criticalities.get());
     }
 
     //Final timing analysis
@@ -1747,6 +1758,52 @@ static e_move_result try_swap(const t_annealing_state* state,
     return move_outcome;
 }
 
+static int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel& delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_atom_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c) {
+
+    const auto& atom_look_up = g_vpr_ctx.atom().lookup;
+    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
+    const auto& cluster_nlist = g_vpr_ctx.clustering().clb_nlist;
+
+    VTR_ASSERT_SAFE(bb_delta_c == 0.);
+    VTR_ASSERT_SAFE(timing_delta_c == 0.);
+
+    int num_affected_nets = 0;
+
+    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
+        AtomBlockId atom_blk = blocks_affected.moved_blocks[iblk].block_num;
+
+        for (const AtomPinId& atom_pin: atom_nlist.block_pins(atom_blk)) {
+            auto cluster_pins = cluster_pins_connected_to_atom_pin(atom_pin);
+            for (const auto& cluster_pin : cluster_pins) {
+                ClusterNetId net_id = cluster_nlist.pin_net(cluster_pin);
+                record_affected_net(net_id,
+                                    num_affected_nets);
+
+
+            }
+        }
+    }
+
+    /* Now update the bounding box costs (since the net bounding     *
+     * boxes are up-to-date). The cost is only updated once per net. */
+    for (int inet_affected = 0; inet_affected < num_affected_nets;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+
+        proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                 &ts_bb_coord_new[net_id]);
+        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
+    }
+
+    return num_affected_nets;
+}
+
 /**
  * @brief Find all the nets and pins affected by this swap and update costs.
  *
@@ -1793,9 +1850,11 @@ static int find_affected_nets_and_update_costs(
                                         delay_model,
                                         criticalities,
                                         net_id,
+                                        blk,
                                         blk_pin,
+                                        blocks_affected.moved_blocks,
                                         iblk,
-                                        blocks_affected,
+                                        blocks_affected.affected_pins,
                                         timing_delta_c,
                                         num_affected_nets);
         }
@@ -1819,9 +1878,11 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                                         const PlaceDelayModel* delay_model,
                                         const PlacerCriticalities* criticalities,
                                         const ClusterNetId& net_id,
+                                        const ClusterBlockId& blk_id,
                                         const ClusterPinId& pin_id,
-                                        const int affected_blk_id,
-                                        t_pl_blocks_to_be_moved& blocks_affected,
+                                        const std::vector<t_pl_moved_block>& moved_blocks,
+                                        const int moving_block_idx,
+                                        std::vector<ClusterPinId>& affected_pins,
                                         double& timing_delta_c,
                                         int& num_affected_nets) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -1838,12 +1899,17 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
     record_affected_net(net_id, num_affected_nets);
 
     /* Update the net bounding boxes. */
-    update_net_bb(net_id, blk_id, pin_id, pl_moved_block);
+    update_net_bb(net_id, blk_id, pin_id, moved_blocks[moving_block_idx]);
 
     if (place_algorithm.is_timing_driven()) {
         /* Determine the change in connection delay and timing cost. */
-        update_td_delta_costs(delay_model, *criticalities, net_id,
-                              pin_id, blocks_affected, timing_delta_c);
+        update_td_delta_costs(delay_model,
+                              *criticalities,
+                              net_id,
+                              pin_id,
+                              moved_blocks,
+                              affected_pins,
+                              timing_delta_c);
     }
 }
 

From 2da67de90b7883c28ab9a8a048ee6f63307a8bb1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 12:36:09 -0400
Subject: [PATCH 090/188] fix a type in find_affected_nets_and_update_costs
 parameter list

---
 vpr/src/place/place.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 0109f6a8929..838c1e4853c 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1760,7 +1760,7 @@ static e_move_result try_swap(const t_annealing_state* state,
 
 static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel& delay_model,
+    const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
     t_pl_atom_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,

From 199c12fb3a1c103f40fd0ca1d1f5c85a467380f1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 12:51:12 -0400
Subject: [PATCH 091/188] add is_src_moving parameter to update_td_delta_costs
 instead of determining it in the function

---
 vpr/src/place/place.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 838c1e4853c..0c72dd3bb3d 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -371,9 +371,9 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
                                   const ClusterPinId pin,
-                                  const std::vector<t_pl_moved_block>& moved_blocks,
                                   std::vector<ClusterPinId>& affected_pins,
-                                  double& delta_timing_cost);
+                                  double& delta_timing_cost,
+                                  bool is_src_moving);
 
 static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
@@ -1997,9 +1997,9 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
                                   const ClusterPinId pin,
-                                  const std::vector<t_pl_moved_block>& moved_blocks,
                                   std::vector<ClusterPinId>& affected_pins,
-                                  double& delta_timing_cost) {
+                                  double& delta_timing_cost,
+                                  bool is_src_moving) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     const auto& connection_delay = g_placer_ctx.timing().connection_delay;
@@ -2035,7 +2035,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
         VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
 
         /* Check if this sink's net is driven by a moved block */
-        if (!driven_by_moved_block(net, moved_blocks)) {
+        if (!is_src_moving) {
             /* Get the sink pin index in the net */
             int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
 

From 59f6e3e1b3b67d13cbcbdc90e6d6faf1c8ff139b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 14:12:44 -0400
Subject: [PATCH 092/188] remove unused parameters from
 update_net_info_on_pin_move and pass is_src_moving to it

---
 vpr/src/place/place.cpp | 42 ++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 0c72dd3bb3d..d8d4e7b32a4 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -351,14 +351,13 @@ static int find_affected_nets_and_update_costs(
 static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                         const PlaceDelayModel* delay_model,
                                         const PlacerCriticalities* criticalities,
-                                        const ClusterNetId& net_id,
                                         const ClusterBlockId& blk_id,
                                         const ClusterPinId& pin_id,
-                                        const std::vector<t_pl_moved_block>& moved_blocks,
-                                        const int moving_block_idx,
+                                        const t_pl_moved_block& moving_blk_inf,
                                         std::vector<ClusterPinId>& affected_pins,
                                         double& timing_delta_c,
-                                        int& num_affected_nets);
+                                        int& num_affected_nets,
+                                        bool is_src_moving);
 
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
@@ -1835,28 +1834,33 @@ static int find_affected_nets_and_update_costs(
     double& timing_delta_c) {
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
 
     int num_affected_nets = 0;
 
     /* Go through all the blocks moved. */
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
+        const auto& moving_block_inf = blocks_affected.moved_blocks[iblk];
+        auto& affected_pins = blocks_affected.affected_pins;
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         /* Go through all the pins in the moved block. */
-        for (ClusterPinId blk_pin : cluster_ctx.clb_nlist.block_pins(blk)) {
-            ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(blk_pin);
+        for (ClusterPinId blk_pin : clb_nlsit.block_pins(blk)) {
+            bool is_src_moving = false;
+            if (clb_nlsit.pin_type(blk_pin) == PinType::SINK) {
+                ClusterNetId net_id = clb_nlsit.pin_net(blk_pin);
+                is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
+            }
             update_net_info_on_pin_move(place_algorithm,
                                         delay_model,
                                         criticalities,
-                                        net_id,
                                         blk,
                                         blk_pin,
-                                        blocks_affected.moved_blocks,
-                                        iblk,
-                                        blocks_affected.affected_pins,
+                                        moving_block_inf,
+                                        affected_pins,
                                         timing_delta_c,
-                                        num_affected_nets);
+                                        num_affected_nets,
+                                        is_src_moving);
         }
     }
 
@@ -1877,15 +1881,15 @@ static int find_affected_nets_and_update_costs(
 static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                         const PlaceDelayModel* delay_model,
                                         const PlacerCriticalities* criticalities,
-                                        const ClusterNetId& net_id,
                                         const ClusterBlockId& blk_id,
                                         const ClusterPinId& pin_id,
-                                        const std::vector<t_pl_moved_block>& moved_blocks,
-                                        const int moving_block_idx,
+                                        const t_pl_moved_block& moving_blk_inf,
                                         std::vector<ClusterPinId>& affected_pins,
                                         double& timing_delta_c,
-                                        int& num_affected_nets) {
+                                        int& num_affected_nets,
+                                        bool is_src_moving) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
     VTR_ASSERT_SAFE_MSG(net_id,
                         "Only valid nets should be found in compressed netlist block pins");
 
@@ -1899,7 +1903,7 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
     record_affected_net(net_id, num_affected_nets);
 
     /* Update the net bounding boxes. */
-    update_net_bb(net_id, blk_id, pin_id, moved_blocks[moving_block_idx]);
+    update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
 
     if (place_algorithm.is_timing_driven()) {
         /* Determine the change in connection delay and timing cost. */
@@ -1907,9 +1911,9 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                               *criticalities,
                               net_id,
                               pin_id,
-                              moved_blocks,
                               affected_pins,
-                              timing_delta_c);
+                              timing_delta_c,
+                              is_src_moving);
     }
 }
 

From dc896f1af8662156c2e2185591b43357c37d218e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 14:59:27 -0400
Subject: [PATCH 093/188] write driven_by_moved_block for atom net

---
 vpr/src/place/place.cpp | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d8d4e7b32a4..907c45a05eb 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -321,6 +321,9 @@ static void invalidate_affected_connections(
     NetPinTimingInvalidator* pin_tedges_invalidator,
     TimingInfo* timing_info);
 
+static bool driven_by_moved_block(const AtomNetId net,
+                                  const std::vector<t_pl_moved_atom_block>& moved_blocks);
+
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const std::vector<t_pl_moved_block>& moved_blocks);
 
@@ -2291,12 +2294,26 @@ static void invalidate_affected_connections(
     }
 }
 
+static bool driven_by_moved_block(const AtomNetId net,
+                                  const std::vector<t_pl_moved_atom_block>& moved_blocks) {
+    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
+    bool is_driven_by_move_blk;
+    AtomBlockId net_driver_block = atom_nlist.net_driver_block(
+        net);
+
+    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
+        return net_driver_block == move_blk.block_num;
+    });
+
+    return is_driven_by_move_blk;
+}
+
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const std::vector<t_pl_moved_block>& moved_blocks) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
     bool is_driven_by_move_blk;
-    ClusterBlockId net_driver_block = cluster_ctx.clb_nlist.net_driver_block(
+    ClusterBlockId net_driver_block = clb_nlist.net_driver_block(
         net);
 
     is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {

From ac9d1b0305a0cef641a8201e0d01ca6b273172da Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 15:23:49 -0400
Subject: [PATCH 094/188] impl find_affected_nets_and_update_costs for changing
 atom loc

---
 vpr/src/place/place.cpp | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 907c45a05eb..a127affcfd0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1770,22 +1770,43 @@ static int find_affected_nets_and_update_costs(
 
     const auto& atom_look_up = g_vpr_ctx.atom().lookup;
     const auto& atom_nlist = g_vpr_ctx.atom().nlist;
-    const auto& cluster_nlist = g_vpr_ctx.clustering().clb_nlist;
+    const auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
 
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
 
     int num_affected_nets = 0;
 
+    std::vector<ClusterPinId> affected_pins;
+
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        AtomBlockId atom_blk = blocks_affected.moved_blocks[iblk].block_num;
+        AtomBlockId atom_blk_id = blocks_affected.moved_blocks[iblk].block_num;
+        ClusterBlockId cluster_blk_id = atom_look_up.atom_clb(atom_blk_id);
+        const auto& atom_old_loc = blocks_affected.moved_blocks[iblk].old_loc;
+        const auto& atom_new_loc = blocks_affected.moved_blocks[iblk].new_loc;
 
-        for (const AtomPinId& atom_pin: atom_nlist.block_pins(atom_blk)) {
+        for (const AtomPinId& atom_pin: atom_nlist.block_pins(atom_blk_id)) {
             auto cluster_pins = cluster_pins_connected_to_atom_pin(atom_pin);
             for (const auto& cluster_pin : cluster_pins) {
-                ClusterNetId net_id = cluster_nlist.pin_net(cluster_pin);
-                record_affected_net(net_id,
-                                    num_affected_nets);
+                bool is_src_moving = false;
+                if (atom_nlist.pin_type(atom_pin) == PinType::SINK) {
+                    AtomNetId net_id = atom_nlist.pin_net(atom_pin);
+                    is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
+                }
+                t_pl_moved_block move_cluster_inf;
+                move_cluster_inf.block_num = cluster_blk_id;
+                move_cluster_inf.old_loc = t_pl_loc(atom_old_loc.x, atom_old_loc.y, atom_old_loc.sub_tile, atom_old_loc.layer);
+                move_cluster_inf.new_loc = t_pl_loc(atom_new_loc.x, atom_new_loc.y, atom_new_loc.sub_tile, atom_new_loc.layer);
+                update_net_info_on_pin_move(place_algorithm,
+                                            delay_model,
+                                            criticalities,
+                                            cluster_blk_id,
+                                            cluster_pin,
+                                            move_cluster_inf,
+                                            affected_pins,
+                                            timing_delta_c,
+                                            num_affected_nets,
+                                            is_src_moving);
 
 
             }

From 4491881b0884568271418197620ec7c4d6ad7cff Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 15:29:33 -0400
Subject: [PATCH 095/188] add a constructor to t_pl_moved_block

---
 vpr/src/place/move_transactions.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 8359a510619..5fd68efe1b5 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -9,7 +9,10 @@
  * old_loc: the location the block is moved from                *
  * new_loc: the location the block is moved to                  */
 struct t_pl_moved_block {
-    ClusterBlockId block_num;
+    t_pl_moved_block() = default;
+    t_pl_moved_block(ClusterBlockId block_num, const t_pl_loc& old_loc, const t_pl_loc& new_loc)
+        : block_num(block_num), old_loc(old_loc), new_loc(new_loc) {}
+    ClusterBlockId block_num = ClusterBlockId::INVALID();
     t_pl_loc old_loc;
     t_pl_loc new_loc;
 };

From 17536625f903380e9714585a157f80a8593ce425 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 15:30:30 -0400
Subject: [PATCH 096/188] add place_algorithm to re_cluster

---
 vpr/src/place/place.cpp          | 3 ++-
 vpr/src/place/place_re_cluster.h | 7 ++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a127affcfd0..58130684935 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -970,7 +970,8 @@ void try_place(const Netlist<>& net_list,
     auto post_quench_timing_stats = timing_ctx.stats;
 
     if (placer_opts.place_re_cluster) {
-        place_re_cluster.re_cluster(place_delay_model.get(),
+        place_re_cluster.re_cluster(placer_opts.place_algorithm,
+                                    place_delay_model.get(),
                                     placer_criticalities.get());
     }
 
diff --git a/vpr/src/place/place_re_cluster.h b/vpr/src/place/place_re_cluster.h
index 57fe135ac23..fb5dc5cda29 100644
--- a/vpr/src/place/place_re_cluster.h
+++ b/vpr/src/place/place_re_cluster.h
@@ -5,11 +5,16 @@
 #ifndef VTR_PLACE_RE_CLUSTER_H
 #define VTR_PLACE_RE_CLUSTER_H
 
+#include "pack_utils.h"
+#include "timing_place.h"
+
 class PlaceReCluster {
   public:
     PlaceReCluster() = default;
 
-    void re_cluster();
+    void re_cluster(const t_place_algorithm& place_algorithm,
+                    const PlaceDelayModel* delay_model,
+                    PlacerCriticalities* criticalities);
 };
 
 #endif //VTR_PLACE_RE_CLUSTER_H

From 2fca2e0df6631d52a8c487e255649d2e8e7a880a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:27:14 -0400
Subject: [PATCH 097/188] add net_cost_handler to place dir

---
 vpr/src/place/net_cost_handler.cpp | 0
 vpr/src/place/net_cost_handler.h   | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 vpr/src/place/net_cost_handler.cpp
 create mode 100644 vpr/src/place/net_cost_handler.h

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
new file mode 100644
index 00000000000..e69de29bb2d

From 9c7d2d436a966d96017ec0d0479f73a3b49cc244 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:38:48 -0400
Subject: [PATCH 098/188] add driven_by_moved block to net_list_handler.cpp

---
 vpr/src/place/net_cost_handler.cpp | 40 ++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index e69de29bb2d..d6ac6d52bc9 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -0,0 +1,40 @@
+#include "net_cost_handler.h"
+
+
+static bool driven_by_moved_block(const AtomNetId net,
+                                  const std::vector<t_pl_moved_atom_block>& moved_blocks);
+
+static bool driven_by_moved_block(const ClusterNetId net,
+                                  const std::vector<t_pl_moved_block>& moved_blocks);
+
+
+
+//Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
+static bool driven_by_moved_block(const AtomNetId net,
+                                  const std::vector<t_pl_moved_atom_block>& moved_blocks) {
+    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
+    bool is_driven_by_move_blk;
+    AtomBlockId net_driver_block = atom_nlist.net_driver_block(
+        net);
+
+    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
+        return net_driver_block == move_blk.block_num;
+    });
+
+    return is_driven_by_move_blk;
+}
+
+//Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
+static bool driven_by_moved_block(const ClusterNetId net,
+                                  const std::vector<t_pl_moved_block>& moved_blocks) {
+    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+    bool is_driven_by_move_blk;
+    ClusterBlockId net_driver_block = clb_nlist.net_driver_block(
+        net);
+
+    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
+        return net_driver_block == move_blk.block_num;
+    });
+
+    return is_driven_by_move_blk;
+}
\ No newline at end of file

From 48fa5b51aad1e2a8c252d92c892d56f83f9eefdd Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:39:49 -0400
Subject: [PATCH 099/188] add find_affected_nets_and_update_costs for both
 cluster and atom nets

---
 vpr/src/place/net_cost_handler.cpp | 144 ++++++++++++++++++++++++++++-
 vpr/src/place/net_cost_handler.h   |  20 ++++
 2 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index d6ac6d52bc9..a06b3824fbb 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -37,4 +37,146 @@ static bool driven_by_moved_block(const ClusterNetId net,
     });
 
     return is_driven_by_move_blk;
-}
\ No newline at end of file
+}
+
+static int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_atom_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c) {
+
+    const auto& atom_look_up = g_vpr_ctx.atom().lookup;
+    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
+    const auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
+
+    VTR_ASSERT_SAFE(bb_delta_c == 0.);
+    VTR_ASSERT_SAFE(timing_delta_c == 0.);
+
+    int num_affected_nets = 0;
+
+    std::vector<ClusterPinId> affected_pins;
+
+    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
+        AtomBlockId atom_blk_id = blocks_affected.moved_blocks[iblk].block_num;
+        ClusterBlockId cluster_blk_id = atom_look_up.atom_clb(atom_blk_id);
+        const auto& atom_old_loc = blocks_affected.moved_blocks[iblk].old_loc;
+        const auto& atom_new_loc = blocks_affected.moved_blocks[iblk].new_loc;
+
+        for (const AtomPinId& atom_pin: atom_nlist.block_pins(atom_blk_id)) {
+            auto cluster_pins = cluster_pins_connected_to_atom_pin(atom_pin);
+            for (const auto& cluster_pin : cluster_pins) {
+                bool is_src_moving = false;
+                if (atom_nlist.pin_type(atom_pin) == PinType::SINK) {
+                    AtomNetId net_id = atom_nlist.pin_net(atom_pin);
+                    is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
+                }
+                t_pl_moved_block move_cluster_inf;
+                move_cluster_inf.block_num = cluster_blk_id;
+                move_cluster_inf.old_loc = t_pl_loc(atom_old_loc.x, atom_old_loc.y, atom_old_loc.sub_tile, atom_old_loc.layer);
+                move_cluster_inf.new_loc = t_pl_loc(atom_new_loc.x, atom_new_loc.y, atom_new_loc.sub_tile, atom_new_loc.layer);
+                update_net_info_on_pin_move(place_algorithm,
+                                            delay_model,
+                                            criticalities,
+                                            cluster_blk_id,
+                                            cluster_pin,
+                                            move_cluster_inf,
+                                            affected_pins,
+                                            timing_delta_c,
+                                            num_affected_nets,
+                                            is_src_moving);
+
+
+            }
+        }
+    }
+
+    /* Now update the bounding box costs (since the net bounding     *
+     * boxes are up-to-date). The cost is only updated once per net. */
+    for (int inet_affected = 0; inet_affected < num_affected_nets;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+
+        proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                 &ts_bb_coord_new[net_id]);
+        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
+    }
+
+    return num_affected_nets;
+}
+
+/**
+ * @brief Find all the nets and pins affected by this swap and update costs.
+ *
+ * Find all the nets affected by this swap and update the bounding box (wiring)
+ * costs. This cost function doesn't depend on the timing info.
+ *
+ * Find all the connections affected by this swap and update the timing cost.
+ * For a connection to be affected, it not only needs to be on or driven by
+ * a block, but it also needs to have its delay changed. Otherwise, it will
+ * not be added to the affected_pins structure.
+ *
+ * For more, see update_td_delta_costs().
+ *
+ * The timing costs are calculated by getting the new connection delays,
+ * multiplied by the connection criticalities returned by the timing
+ * analyzer. These timing costs are stored in the proposed_* data structures.
+ *
+ * The change in the bounding box cost is stored in `bb_delta_c`.
+ * The change in the timing cost is stored in `timing_delta_c`.
+ *
+ * @return The number of affected nets.
+ */
+static int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c) {
+    VTR_ASSERT_SAFE(bb_delta_c == 0.);
+    VTR_ASSERT_SAFE(timing_delta_c == 0.);
+    auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
+
+    int num_affected_nets = 0;
+
+    /* Go through all the blocks moved. */
+    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
+        const auto& moving_block_inf = blocks_affected.moved_blocks[iblk];
+        auto& affected_pins = blocks_affected.affected_pins;
+        ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
+
+        /* Go through all the pins in the moved block. */
+        for (ClusterPinId blk_pin : clb_nlsit.block_pins(blk)) {
+            bool is_src_moving = false;
+            if (clb_nlsit.pin_type(blk_pin) == PinType::SINK) {
+                ClusterNetId net_id = clb_nlsit.pin_net(blk_pin);
+                is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
+            }
+            update_net_info_on_pin_move(place_algorithm,
+                                        delay_model,
+                                        criticalities,
+                                        blk,
+                                        blk_pin,
+                                        moving_block_inf,
+                                        affected_pins,
+                                        timing_delta_c,
+                                        num_affected_nets,
+                                        is_src_moving);
+        }
+    }
+
+    /* Now update the bounding box costs (since the net bounding     *
+     * boxes are up-to-date). The cost is only updated once per net. */
+    for (int inet_affected = 0; inet_affected < num_affected_nets;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+
+        proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                 &ts_bb_coord_new[net_id]);
+        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
+    }
+
+    return num_affected_nets;
+}
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index e69de29bb2d..cc01ae4f60e 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -0,0 +1,20 @@
+#pragma once
+#include "place_delay_model.h"
+#include "timing_place.h"
+#include "move_transactions.h"
+
+static int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_atom_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c);
+
+static int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c);

From 7ae6bbd1890e72506c641e8a5f815f7ea00308ab Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:41:25 -0400
Subject: [PATCH 100/188] add update_net_bb to net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 42 ++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index a06b3824fbb..35d61c6806d 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -7,6 +7,11 @@ static bool driven_by_moved_block(const AtomNetId net,
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const std::vector<t_pl_moved_block>& moved_blocks);
 
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block);
+
 
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
@@ -39,6 +44,43 @@ static bool driven_by_moved_block(const ClusterNetId net,
     return is_driven_by_move_blk;
 }
 
+/**
+ * @brief Update the net bounding boxes.
+ *
+ * Do not update the net cost here since it should only
+ * be updated once per net, not once per pin.
+ */
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
+        //For small nets brute-force bounding box update is faster
+
+        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
+            get_non_updateable_bb(net, &ts_bb_coord_new[net]);
+        }
+    } else {
+        //For large nets, update bounding box incrementally
+        int iblk_pin = tile_pin_index(blk_pin);
+
+        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
+        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
+        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
+
+        //Incremental bounding box update
+        update_bb(net, &ts_bb_coord_new[net], &ts_bb_edge_new[net],
+                  pl_moved_block.old_loc.x + pin_width_offset,
+                  pl_moved_block.old_loc.y
+                      + pin_height_offset,
+                  pl_moved_block.new_loc.x + pin_width_offset,
+                  pl_moved_block.new_loc.y
+                      + pin_height_offset);
+    }
+}
+
 static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,

From 7adad23d9ab88d56080ec3dc557166d896f9d1b4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:43:52 -0400
Subject: [PATCH 101/188] add update_td_delta_costs and record_affected_net to
 net_list_handler

---
 vpr/src/place/net_cost_handler.cpp | 119 +++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 35d61c6806d..023f0488e6c 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -12,6 +12,16 @@ static void update_net_bb(const ClusterNetId& net,
                           const ClusterPinId& blk_pin,
                           const t_pl_moved_block& pl_moved_block);
 
+static void update_td_delta_costs(const PlaceDelayModel* delay_model,
+                                  const PlacerCriticalities& criticalities,
+                                  const ClusterNetId net,
+                                  const ClusterPinId pin,
+                                  std::vector<ClusterPinId>& affected_pins,
+                                  double& delta_timing_cost,
+                                  bool is_src_moving);
+
+static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
+
 
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
@@ -81,6 +91,115 @@ static void update_net_bb(const ClusterNetId& net,
     }
 }
 
+/**
+ * @brief Calculate the new connection delay and timing cost of all the
+ *        sink pins affected by moving a specific pin to a new location.
+ *        Also calculates the total change in the timing cost.
+ *
+ * Assumes that the blocks have been moved to the proposed new locations.
+ * Otherwise, the routine comp_td_single_connection_delay() will not be
+ * able to calculate the most up to date connection delay estimation value.
+ *
+ * If the moved pin is a driver pin, then all the sink connections that are
+ * driven by this driver pin are considered.
+ *
+ * If the moved pin is a sink pin, then it is the only pin considered. But
+ * in some cases, the sink is already accounted for if it is also driven
+ * by a driver pin located on a moved block. Computing it again would double
+ * count its affect on the total timing cost change (delta_timing_cost).
+ *
+ * It is possible for some connections to have unchanged delays. For instance,
+ * if we are using a dx/dy delay model, this could occur if a sink pin moved
+ * to a new position with the same dx/dy from its net's driver pin.
+ *
+ * We skip these connections with unchanged delay values as their delay need
+ * not be updated. Their timing costs also do not require any update, since
+ * the criticalities values are always kept stale/unchanged during an block
+ * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
+ *
+ * This is also done to minimize the number of timing node/edge invalidations
+ * for incremental static timing analysis (incremental STA).
+ */
+static void update_td_delta_costs(const PlaceDelayModel* delay_model,
+                                  const PlacerCriticalities& criticalities,
+                                  const ClusterNetId net,
+                                  const ClusterPinId pin,
+                                  std::vector<ClusterPinId>& affected_pins,
+                                  double& delta_timing_cost,
+                                  bool is_src_moving) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    const auto& connection_delay = g_placer_ctx.timing().connection_delay;
+    auto& connection_timing_cost = g_placer_ctx.mutable_timing().connection_timing_cost;
+    auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay;
+    auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost;
+
+    if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
+        /* This pin is a net driver on a moved block. */
+        /* Recompute all point to point connection delays for the net sinks. */
+        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size();
+             ipin++) {
+            float temp_delay = comp_td_single_connection_delay(delay_model, net,
+                                                               ipin);
+            /* If the delay hasn't changed, do not mark this pin as affected */
+            if (temp_delay == connection_delay[net][ipin]) {
+                continue;
+            }
+
+            /* Calculate proposed delay and cost values */
+            proposed_connection_delay[net][ipin] = temp_delay;
+
+            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
+                                 - connection_timing_cost[net][ipin];
+
+            /* Record this connection in blocks_affected.affected_pins */
+            ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin);
+            affected_pins.push_back(sink_pin);
+        }
+    } else {
+        /* This pin is a net sink on a moved block */
+        VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
+
+        /* Check if this sink's net is driven by a moved block */
+        if (!is_src_moving) {
+            /* Get the sink pin index in the net */
+            int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
+
+            float temp_delay = comp_td_single_connection_delay(delay_model, net,
+                                                               ipin);
+            /* If the delay hasn't changed, do not mark this pin as affected */
+            if (temp_delay == connection_delay[net][ipin]) {
+                return;
+            }
+
+            /* Calculate proposed delay and cost values */
+            proposed_connection_delay[net][ipin] = temp_delay;
+
+            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
+                                 - connection_timing_cost[net][ipin];
+
+            /* Record this connection in blocks_affected.affected_pins */
+            affected_pins.push_back(pin);
+        }
+    }
+}
+
+///@brief Record effected nets.
+static void record_affected_net(const ClusterNetId net,
+                                int& num_affected_nets) {
+    /* Record effected nets. */
+    if (proposed_net_cost[net] < 0.) {
+        /* Net not marked yet. */
+        ts_nets_to_update[num_affected_nets] = net;
+        num_affected_nets++;
+
+        /* Flag to say we've marked this net. */
+        proposed_net_cost[net] = 1.;
+    }
+}
+
 static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,

From 627e2fe57fd03b977f734734d5c0e70822f85b3f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:46:40 -0400
Subject: [PATCH 102/188] add update_net_info_on_pin_move to net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 54 ++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 023f0488e6c..883749a7dab 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -22,6 +22,17 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
 
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                        const PlaceDelayModel* delay_model,
+                                        const PlacerCriticalities* criticalities,
+                                        const ClusterBlockId& blk_id,
+                                        const ClusterPinId& pin_id,
+                                        const t_pl_moved_block& moving_blk_inf,
+                                        std::vector<ClusterPinId>& affected_pins,
+                                        double& timing_delta_c,
+                                        int& num_affected_nets,
+                                        bool is_src_moving);
+
 
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
@@ -200,7 +211,46 @@ static void record_affected_net(const ClusterNetId net,
     }
 }
 
-static int find_affected_nets_and_update_costs(
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                        const PlaceDelayModel* delay_model,
+                                        const PlacerCriticalities* criticalities,
+                                        const ClusterBlockId& blk_id,
+                                        const ClusterPinId& pin_id,
+                                        const t_pl_moved_block& moving_blk_inf,
+                                        std::vector<ClusterPinId>& affected_pins,
+                                        double& timing_delta_c,
+                                        int& num_affected_nets,
+                                        bool is_src_moving) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
+    VTR_ASSERT_SAFE_MSG(net_id,
+                        "Only valid nets should be found in compressed netlist block pins");
+
+    if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+        //TODO: Do we require anyting special here for global nets?
+        //"Global nets are assumed to span the whole chip, and do not effect costs."
+        return;
+    }
+
+    /* Record effected nets */
+    record_affected_net(net_id, num_affected_nets);
+
+    /* Update the net bounding boxes. */
+    update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
+
+    if (place_algorithm.is_timing_driven()) {
+        /* Determine the change in connection delay and timing cost. */
+        update_td_delta_costs(delay_model,
+                              *criticalities,
+                              net_id,
+                              pin_id,
+                              affected_pins,
+                              timing_delta_c,
+                              is_src_moving);
+    }
+}
+
+int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
@@ -289,7 +339,7 @@ static int find_affected_nets_and_update_costs(
  *
  * @return The number of affected nets.
  */
-static int find_affected_nets_and_update_costs(
+int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,

From 6ece0f4c28cfb3ea93b0b57edf235d55e138b271 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 16:47:06 -0400
Subject: [PATCH 103/188] remove static identifiers from
 find_affected_nets_and_update_costs

---
 vpr/src/place/net_cost_handler.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index cc01ae4f60e..06d30293831 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -3,7 +3,7 @@
 #include "timing_place.h"
 #include "move_transactions.h"
 
-static int find_affected_nets_and_update_costs(
+int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
@@ -11,7 +11,7 @@ static int find_affected_nets_and_update_costs(
     double& bb_delta_c,
     double& timing_delta_c);
 
-static int find_affected_nets_and_update_costs(
+int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,

From 113c4e1379718e74e0dd4d7e0ca2fc814d012e92 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 17:04:36 -0400
Subject: [PATCH 104/188] add local variables related to net cost to
 net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 883749a7dab..c4d5a2072fb 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1,4 +1,34 @@
 #include "net_cost_handler.h"
+#include "globals.h"
+#include "move_utils.h"
+
+/* Flags for the states of the bounding box.                              *
+ * Stored as char for memory efficiency.                                  */
+#define NOT_UPDATED_YET 'N'
+#define UPDATED_ONCE 'U'
+#define GOT_FROM_SCRATCH 'S'
+
+/* Cost of a net, and a temporary cost of a net used during move assessment. */
+static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
+
+/* [0...cluster_ctx.clb_nlist.nets().size()-1]                                               *
+ * A flag array to indicate whether the specific bounding box has been updated   *
+ * in this particular swap or not. If it has been updated before, the code       *
+ * must use the updated data, instead of the out-of-date data passed into the    *
+ * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET        *
+ * indicates that the net has not been updated before, UPDATED_ONCE indicated    *
+ * that the net has been updated once, if it is going to be updated again, the   *
+ * values from the previous update must be used. GOT_FROM_SCRATCH is only        *
+ * applicable for nets larger than SMALL_NETS and it indicates that the          *
+ * particular bounding box cannot be updated incrementally before, hence the     *
+ * bounding box is got from scratch, so the bounding box would definitely be     *
+ * right, DO NOT update again.                                                   */
+static vtr::vector<ClusterNetId, char> bb_updated_before;
+
+/* The following arrays are used by the try_swap function for speed.   */
+/* [0...cluster_ctx.clb_nlist.nets().size()-1] */
+static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
+static std::vector<ClusterNetId> ts_nets_to_update;
 
 
 static bool driven_by_moved_block(const AtomNetId net,

From eec039bebc6dd157111e3c4f926c902f5b633438 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 17:43:08 -0400
Subject: [PATCH 105/188] add alloc_and_load_for_fast_cost_update and its vars
 to place_util

---
 vpr/src/place/place_util.cpp | 130 +++++++++++++++++++++++++++++++++++
 vpr/src/place/place_util.h   |   2 +
 2 files changed, 132 insertions(+)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 39e732a4f5f..cfd6a67571b 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -9,6 +9,30 @@
 #include "draw_global.h"
 #include "place_constraints.h"
 
+/* Expected crossing counts for nets with different #'s of pins.  From *
+ * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
+ * Multiplied to bounding box of a net to better estimate wire length  *
+ * for higher fanout nets. Each entry is the correction factor for the *
+ * fanout index-1                                                      */
+static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
+                                      1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
+                                      1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
+                                      2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
+                                      2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356,
+                                      2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
+                                      2.7933};
+
+/* The arrays below are used to precompute the inverse of the average   *
+ * number of tracks per channel between [subhigh] and [sublow].  Access *
+ * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
+ * speed up the computation of the cost function that takes the length  *
+ * of the net bounding box in each dimension, divided by the average    *
+ * number of tracks in that direction; for other cost functions they    *
+ * will never be used.                                                  *
+ */
+static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); //[0...device_ctx.grid.width()-2]
+static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.grid.height()-2]
+
 /* File-scope routines */
 static GridBlock init_grid_blocks();
 
@@ -556,3 +580,109 @@ t_pl_atom_loc get_atom_loc (AtomBlockId atom) {
 
     return {primitive_id, cluster_loc.x, cluster_loc.y, cluster_loc.sub_tile, cluster_loc.layer};
 }
+
+void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
+    /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
+     * arrays with the inverse of the average number of tracks per channel   *
+     * between [subhigh] and [sublow].  This is only useful for the cost     *
+     * function that takes the length of the net bounding box in each        *
+     * dimension divided by the average number of tracks in that direction.  *
+     * For other cost functions, you don't have to bother calling this       *
+     * routine; when using the cost function described above, however, you   *
+     * must always call this routine after you call init_chan and before     *
+     * you do any placement cost determination.  The place_cost_exp factor   *
+     * specifies to what power the width of the channel should be taken --   *
+     * larger numbers make narrower channels more expensive.                 */
+
+    auto& device_ctx = g_vpr_ctx.device();
+
+    /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
+     * subhigh must be greater than or equal to sublow, we only need to       *
+     * allocate storage for the lower half of a matrix.                       */
+
+    //chanx_place_cost_fac = new float*[(device_ctx.grid.height())];
+    //for (size_t i = 0; i < device_ctx.grid.height(); i++)
+    //    chanx_place_cost_fac[i] = new float[(i + 1)];
+
+    //chany_place_cost_fac = new float*[(device_ctx.grid.width() + 1)];
+    //for (size_t i = 0; i < device_ctx.grid.width(); i++)
+    //    chany_place_cost_fac[i] = new float[(i + 1)];
+
+    chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
+    chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
+
+    /* First compute the number of tracks between channel high and channel *
+     * low, inclusive, in an efficient manner.                             */
+
+    chanx_place_cost_fac[0][0] = device_ctx.chan_width.x_list[0];
+
+    for (size_t high = 1; high < device_ctx.grid.height(); high++) {
+        chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high];
+        for (size_t low = 0; low < high; low++) {
+            chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low]
+                                              + device_ctx.chan_width.x_list[high];
+        }
+    }
+
+    /* Now compute the inverse of the average number of tracks per channel *
+     * between high and low.  The cost function divides by the average     *
+     * number of tracks per channel, so by storing the inverse I convert   *
+     * this to a faster multiplication.  Take this final number to the     *
+     * place_cost_exp power -- numbers other than one mean this is no      *
+     * longer a simple "average number of tracks"; it is some power of     *
+     * that, allowing greater penalization of narrow channels.             */
+
+    for (size_t high = 0; high < device_ctx.grid.height(); high++)
+        for (size_t low = 0; low <= high; low++) {
+            /* Since we will divide the wiring cost by the average channel *
+             * capacity between high and low, having only 0 width channels *
+             * will result in infinite wiring capacity normalization       *
+             * factor, and extremely bad placer behaviour. Hence we change *
+             * this to a small (1 track) channel capacity instead.         */
+            if (chanx_place_cost_fac[high][low] == 0.0f) {
+                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
+                chanx_place_cost_fac[high][low] = 1.0f;
+            }
+
+            chanx_place_cost_fac[high][low] = (high - low + 1.)
+                                              / chanx_place_cost_fac[high][low];
+            chanx_place_cost_fac[high][low] = pow(
+                (double)chanx_place_cost_fac[high][low],
+                (double)place_cost_exp);
+        }
+
+    /* Now do the same thing for the y-directed channels.  First get the  *
+     * number of tracks between channel high and channel low, inclusive.  */
+
+    chany_place_cost_fac[0][0] = device_ctx.chan_width.y_list[0];
+
+    for (size_t high = 1; high < device_ctx.grid.width(); high++) {
+        chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high];
+        for (size_t low = 0; low < high; low++) {
+            chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low]
+                                              + device_ctx.chan_width.y_list[high];
+        }
+    }
+
+    /* Now compute the inverse of the average number of tracks per channel *
+     * between high and low.  Take to specified power.                     */
+
+    for (size_t high = 0; high < device_ctx.grid.width(); high++)
+        for (size_t low = 0; low <= high; low++) {
+            /* Since we will divide the wiring cost by the average channel *
+             * capacity between high and low, having only 0 width channels *
+             * will result in infinite wiring capacity normalization       *
+             * factor, and extremely bad placer behaviour. Hence we change *
+             * this to a small (1 track) channel capacity instead.         */
+            if (chany_place_cost_fac[high][low] == 0.0f) {
+                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
+                chany_place_cost_fac[high][low] = 1.0f;
+            }
+
+            chany_place_cost_fac[high][low] = (high - low + 1.)
+                                              / chany_place_cost_fac[high][low];
+            chany_place_cost_fac[high][low] = pow(
+                (double)chany_place_cost_fac[high][low],
+                (double)place_cost_exp);
+        }
+}
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 08ba65f08f5..8edc3f18c39 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -268,4 +268,6 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 // Array in place_ctx.
 t_pl_atom_loc get_atom_loc (AtomBlockId atom);
 
+void alloc_and_load_for_fast_cost_update(float place_cost_exp);
+
 #endif

From b1619885749250ebe6c773c7ea2efad211f383b4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 16 Oct 2023 17:44:01 -0400
Subject: [PATCH 106/188] add wirelength_crossing_count and
 get_net_bounding_box_cost to place_util

---
 vpr/src/place/place_util.cpp | 37 ++++++++++++++++++++++++++++++++++++
 vpr/src/place/place_util.h   |  4 ++++
 2 files changed, 41 insertions(+)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index cfd6a67571b..0713757a2b2 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -686,3 +686,40 @@ void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
                 (double)place_cost_exp);
         }
 }
+
+double wirelength_crossing_count(size_t fanout) {
+    /* Get the expected "crossing count" of a net, based on its number *
+     * of pins.  Extrapolate for very large nets.                      */
+
+    if (fanout > 50) {
+        return 2.7933 + 0.02616 * (fanout - 50);
+    } else {
+        return cross_count[fanout - 1];
+    }
+}
+
+double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr) {
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
+            * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
+
+    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
+             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
+
+    return (ncost);
+}
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 8edc3f18c39..c3e7ad9daa5 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -270,4 +270,8 @@ t_pl_atom_loc get_atom_loc (AtomBlockId atom);
 
 void alloc_and_load_for_fast_cost_update(float place_cost_exp);
 
+double wirelength_crossing_count(size_t fanout);
+
+double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bb_ptr);
+
 #endif

From f04a17619fa841aa3c6a419f6c2756bce9ec46b9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 10:38:49 -0400
Subject: [PATCH 107/188] add init_net_cost_structs

---
 vpr/src/place/net_cost_handler.cpp | 21 +++++++++++++++++++--
 vpr/src/place/net_cost_handler.h   |  9 +++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index c4d5a2072fb..68639ff0336 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1,7 +1,11 @@
 #include "net_cost_handler.h"
 #include "globals.h"
+#include "placer_globals.h"
 #include "move_utils.h"
 
+using std::max;
+using std::min;
+
 /* Flags for the states of the bounding box.                              *
  * Stored as char for memory efficiency.                                  */
 #define NOT_UPDATED_YET 'N'
@@ -339,7 +343,7 @@ int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        proposed_net_cost[net_id] = get_net_cost(net_id,
+        proposed_net_cost[net_id] = get_net_bounding_box_cost(net_id,
                                                  &ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
@@ -414,10 +418,23 @@ int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        proposed_net_cost[net_id] = get_net_cost(net_id,
+        proposed_net_cost[net_id] = get_net_bounding_box_cost(net_id,
                                                  &ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 
     return num_affected_nets;
 }
+
+
+
+
+void init_net_cost_structs(size_t num_nets) {
+    net_cost.resize(num_nets, -1.);
+    proposed_net_cost.resize(num_nets, -1.);
+    /* Used to store costs for moves not yet made and to indicate when a net's   *
+     * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
+     * been recomputed.                                                          */
+    bb_updated_before.resize(num_nets, NOT_UPDATED_YET);
+
+}
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 06d30293831..28398908b5b 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -2,6 +2,12 @@
 #include "place_delay_model.h"
 #include "timing_place.h"
 #include "move_transactions.h"
+#include "place_util.h"
+
+enum e_cost_methods {
+    NORMAL,
+    CHECK
+};
 
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
@@ -18,3 +24,6 @@ int find_affected_nets_and_update_costs(
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c);
+
+
+void init_net_cost_structs(size_t num_nets);

From 2633c6a26ee85741e0816fffe61881ee52d23cf5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 10:48:33 -0400
Subject: [PATCH 108/188] add get_non_updateable_bb, update_bb,
 get_bb_from_scratch, get_net_wirelength_estimate, and recompute_bb_cost to
 net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 401 +++++++++++++++++++++++++++++
 1 file changed, 401 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 68639ff0336..4c3c110c3f1 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -2,6 +2,8 @@
 #include "globals.h"
 #include "placer_globals.h"
 #include "move_utils.h"
+#include "place_timing_update.h"
+#include "noc_place_utils.h"
 
 using std::max;
 using std::min;
@@ -67,6 +69,16 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                                         int& num_affected_nets,
                                         bool is_src_moving);
 
+static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
+
+static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew);
+
+static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges);
+
+static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr);
+
+static double recompute_bb_cost();
+
 
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
@@ -284,6 +296,395 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
     }
 }
 
+/* Finds the bounding box of a net and stores its coordinates in the  *
+ * bb_coord_new data structure.  This routine should only be called   *
+ * for small nets, since it does not determine enough information for *
+ * the bounding box to be updated incrementally later.                *
+ * Currently assumes channels on both sides of the CLBs forming the   *
+ * edges of the bounding box can be used.  Essentially, I am assuming *
+ * the pins always lie on the outside of the bounding box.            */
+static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) {
+    //TODO: account for multiple physical pin instances per logical pin
+
+    int xmax, ymax, xmin, ymin, x, y;
+    int pnum;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& device_ctx = g_vpr_ctx.device();
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+
+    x = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum];
+    y = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+    xmin = x;
+    ymin = y;
+    xmax = x;
+    ymax = y;
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        x = place_ctx.block_locs[bnum].loc.x
+            + physical_tile_type(bnum)->pin_width_offset[pnum];
+        y = place_ctx.block_locs[bnum].loc.y
+            + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        if (x < xmin) {
+            xmin = x;
+        } else if (x > xmax) {
+            xmax = x;
+        }
+
+        if (y < ymin) {
+            ymin = y;
+        } else if (y > ymax) {
+            ymax = y;
+        }
+    }
+
+    /* Now I've found the coordinates of the bounding box.  There are no *
+     * channels beyond device_ctx.grid.width()-2 and                     *
+     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
+     * since I'll always include the channel immediately below and the   *
+     * channel immediately to the left of the bounding box, I want to    *
+     * clip to 1 in both directions as well (since minimum channel index *
+     * is 0).  See route_common.cpp for a channel diagram.               */
+
+    bb_coord_new->xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new->ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new->xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new->ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+}
+
+static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew) {
+    /* Updates the bounding box of a net by storing its coordinates in    *
+     * the bb_coord_new data structure and the number of blocks on each   *
+     * edge in the bb_edge_new data structure.  This routine should only  *
+     * be called for large nets, since it has some overhead relative to   *
+     * just doing a brute force bounding box calculation.  The bounding   *
+     * box coordinate and edge information for inet must be valid before  *
+     * this routine is called.                                            *
+     * Currently assumes channels on both sides of the CLBs forming the   *
+     * edges of the bounding box can be used.  Essentially, I am assuming *
+     * the pins always lie on the outside of the bounding box.            *
+     * The x and y coordinates are the pin's x and y coordinates.         */
+    /* IO blocks are considered to be one cell in for simplicity.         */
+    //TODO: account for multiple physical pin instances per logical pin
+    const t_bb *curr_bb_edge, *curr_bb_coord;
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& place_move_ctx = g_placer_ctx.move();
+
+    xnew = max(min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    ynew = max(min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    xold = max(min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    yold = max(min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+
+    /* Check if the net had been updated before. */
+    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        /* The net had been updated from scratch, DO NOT update again! */
+        return;
+    } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        /* The net had NOT been updated before, could use the old values */
+        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
+        curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    } else {
+        /* The net had been updated before, must use the new values */
+        curr_bb_coord = bb_coord_new;
+        curr_bb_edge = bb_edge_new;
+    }
+
+    /* Check if I can update the bounding box incrementally. */
+
+    if (xnew < xold) { /* Move to left. */
+
+        /* Update the xmax fields for coordinates and number of edges first. */
+
+        if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */
+            if (curr_bb_edge->xmax == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new->xmax = curr_bb_edge->xmax - 1;
+                bb_coord_new->xmax = curr_bb_coord->xmax;
+            }
+        } else { /* Move to left, old postion was not at xmax. */
+            bb_coord_new->xmax = curr_bb_coord->xmax;
+            bb_edge_new->xmax = curr_bb_edge->xmax;
+        }
+
+        /* Now do the xmin fields for coordinates and number of edges. */
+
+        if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
+            bb_coord_new->xmin = xnew;
+            bb_edge_new->xmin = 1;
+        } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
+            bb_coord_new->xmin = xnew;
+            bb_edge_new->xmin = curr_bb_edge->xmin + 1;
+        } else { /* Xmin unchanged. */
+            bb_coord_new->xmin = curr_bb_coord->xmin;
+            bb_edge_new->xmin = curr_bb_edge->xmin;
+        }
+        /* End of move to left case. */
+
+    } else if (xnew > xold) { /* Move to right. */
+
+        /* Update the xmin fields for coordinates and number of edges first. */
+
+        if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */
+            if (curr_bb_edge->xmin == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new->xmin = curr_bb_edge->xmin - 1;
+                bb_coord_new->xmin = curr_bb_coord->xmin;
+            }
+        } else { /* Move to right, old position was not at xmin. */
+            bb_coord_new->xmin = curr_bb_coord->xmin;
+            bb_edge_new->xmin = curr_bb_edge->xmin;
+        }
+
+        /* Now do the xmax fields for coordinates and number of edges. */
+
+        if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
+            bb_coord_new->xmax = xnew;
+            bb_edge_new->xmax = 1;
+        } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
+            bb_coord_new->xmax = xnew;
+            bb_edge_new->xmax = curr_bb_edge->xmax + 1;
+        } else { /* Xmax unchanged. */
+            bb_coord_new->xmax = curr_bb_coord->xmax;
+            bb_edge_new->xmax = curr_bb_edge->xmax;
+        }
+        /* End of move to right case. */
+
+    } else { /* xnew == xold -- no x motion. */
+        bb_coord_new->xmin = curr_bb_coord->xmin;
+        bb_coord_new->xmax = curr_bb_coord->xmax;
+        bb_edge_new->xmin = curr_bb_edge->xmin;
+        bb_edge_new->xmax = curr_bb_edge->xmax;
+    }
+
+    /* Now account for the y-direction motion. */
+
+    if (ynew < yold) { /* Move down. */
+
+        /* Update the ymax fields for coordinates and number of edges first. */
+
+        if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */
+            if (curr_bb_edge->ymax == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new->ymax = curr_bb_edge->ymax - 1;
+                bb_coord_new->ymax = curr_bb_coord->ymax;
+            }
+        } else { /* Move down, old postion was not at ymax. */
+            bb_coord_new->ymax = curr_bb_coord->ymax;
+            bb_edge_new->ymax = curr_bb_edge->ymax;
+        }
+
+        /* Now do the ymin fields for coordinates and number of edges. */
+
+        if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
+            bb_coord_new->ymin = ynew;
+            bb_edge_new->ymin = 1;
+        } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
+            bb_coord_new->ymin = ynew;
+            bb_edge_new->ymin = curr_bb_edge->ymin + 1;
+        } else { /* ymin unchanged. */
+            bb_coord_new->ymin = curr_bb_coord->ymin;
+            bb_edge_new->ymin = curr_bb_edge->ymin;
+        }
+        /* End of move down case. */
+
+    } else if (ynew > yold) { /* Moved up. */
+
+        /* Update the ymin fields for coordinates and number of edges first. */
+
+        if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */
+            if (curr_bb_edge->ymin == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new->ymin = curr_bb_edge->ymin - 1;
+                bb_coord_new->ymin = curr_bb_coord->ymin;
+            }
+        } else { /* Moved up, old position was not at ymin. */
+            bb_coord_new->ymin = curr_bb_coord->ymin;
+            bb_edge_new->ymin = curr_bb_edge->ymin;
+        }
+
+        /* Now do the ymax fields for coordinates and number of edges. */
+
+        if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
+            bb_coord_new->ymax = ynew;
+            bb_edge_new->ymax = 1;
+        } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
+            bb_coord_new->ymax = ynew;
+            bb_edge_new->ymax = curr_bb_edge->ymax + 1;
+        } else { /* ymax unchanged. */
+            bb_coord_new->ymax = curr_bb_coord->ymax;
+            bb_edge_new->ymax = curr_bb_edge->ymax;
+        }
+        /* End of move up case. */
+
+    } else { /* ynew == yold -- no y motion. */
+        bb_coord_new->ymin = curr_bb_coord->ymin;
+        bb_coord_new->ymax = curr_bb_coord->ymax;
+        bb_edge_new->ymin = curr_bb_edge->ymin;
+        bb_edge_new->ymax = curr_bb_edge->ymax;
+    }
+
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    }
+}
+
+/* This routine finds the bounding box of each net from scratch (i.e.   *
+ * from only the block location information).  It updates both the       *
+ * coordinate and number of pins on each edge information.  It           *
+ * should only be called when the bounding box information is not valid. */
+static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges) {
+    int pnum, x, y, xmin, xmax, ymin, ymax;
+    int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& grid = device_ctx.grid;
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+    VTR_ASSERT(pnum >= 0);
+    x = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum];
+    y = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+    x = max(min<int>(x, grid.width() - 2), 1);
+    y = max(min<int>(y, grid.height() - 2), 1);
+
+    xmin = x;
+    ymin = y;
+    xmax = x;
+    ymax = y;
+    xmin_edge = 1;
+    ymin_edge = 1;
+    xmax_edge = 1;
+    ymax_edge = 1;
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        x = place_ctx.block_locs[bnum].loc.x
+            + physical_tile_type(bnum)->pin_width_offset[pnum];
+        y = place_ctx.block_locs[bnum].loc.y
+            + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
+         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
+         * I always take all channels impinging on the bounding box to be within   *
+         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
+         * the which channels are included within the bounding box, and it         *
+         * simplifies the code a lot.                                              */
+
+        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
+        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
+
+        if (x == xmin) {
+            xmin_edge++;
+        }
+        if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
+            xmax_edge++;
+        } else if (x < xmin) {
+            xmin = x;
+            xmin_edge = 1;
+        } else if (x > xmax) {
+            xmax = x;
+            xmax_edge = 1;
+        }
+
+        if (y == ymin) {
+            ymin_edge++;
+        }
+        if (y == ymax) {
+            ymax_edge++;
+        } else if (y < ymin) {
+            ymin = y;
+            ymin_edge = 1;
+        } else if (y > ymax) {
+            ymax = y;
+            ymax_edge = 1;
+        }
+    }
+
+    /* Copy the coordinates and number on edges information into the proper   *
+     * structures.                                                            */
+    coords->xmin = xmin;
+    coords->xmax = xmax;
+    coords->ymin = ymin;
+    coords->ymax = ymax;
+
+    num_on_edges->xmin = xmin_edge;
+    num_on_edges->xmax = xmax_edge;
+    num_on_edges->ymin = ymin_edge;
+    num_on_edges->ymax = ymax_edge;
+}
+
+static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr) {
+    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
+     * its coordinate bounding box.                                         */
+
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
+
+    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
+
+    return (ncost);
+}
+
+static double recompute_bb_cost() {
+    /* Recomputes the cost to eliminate roundoff that may have accrued.  *
+     * This routine does as little work as possible to compute this new  *
+     * cost.                                                             */
+
+    double cost = 0;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
+            /* Bounding boxes don't have to be recomputed; they're correct. */
+            cost += net_cost[net_id];
+        }
+    }
+
+    return (cost);
+}
+
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,

From e50de51de51cd63a3ab4aca92adb6603512d2580 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 10:50:48 -0400
Subject: [PATCH 109/188] add comp_bb_cost, update_move_nets, reset_move_nets,
 recompute_costs_from_scratch to net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 138 +++++++++++++++++++++++++++++
 vpr/src/place/net_cost_handler.h   |  11 +++
 2 files changed, 149 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 4c3c110c3f1..22d86b14d9d 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -827,8 +827,146 @@ int find_affected_nets_and_update_costs(
     return num_affected_nets;
 }
 
+/* Finds the cost from scratch.  Done only when the placement   *
+ * has been radically changed (i.e. after initial placement).   *
+ * Otherwise find the cost change incrementally.  If method     *
+ * check is NORMAL, we find bounding boxes that are updateable  *
+ * for the larger nets.  If method is CHECK, all bounding boxes *
+ * are found via the non_updateable_bb routine, to provide a    *
+ * cost which can be used to check the correctness of the       *
+ * other routine.                                               */
+double comp_bb_cost(e_cost_methods method) {
+    double cost = 0;
+    double expected_wirelength = 0.0;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
+            /* Small nets don't use incremental updating on their bounding boxes, *
+             * so they can use a fast bounding box calculator.                    */
+            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
+                && method == NORMAL) {
+                get_bb_from_scratch(net_id, &place_move_ctx.bb_coords[net_id],
+                                    &place_move_ctx.bb_num_on_edges[net_id]);
+            } else {
+                get_non_updateable_bb(net_id,
+                                      &place_move_ctx.bb_coords[net_id]);
+            }
+
+            net_cost[net_id] = get_net_bounding_box_cost(net_id,
+                                                         &place_move_ctx.bb_coords[net_id]);
+            cost += net_cost[net_id];
+            if (method == CHECK)
+                expected_wirelength += get_net_wirelength_estimate(net_id,
+                                                                   &place_move_ctx.bb_coords[net_id]);
+        }
+    }
+
+    if (method == CHECK) {
+        VTR_LOG("\n");
+        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
+                expected_wirelength);
+    }
+    return cost;
+}
+
+void update_move_nets(int num_nets_affected) {
+    /* update net cost functions and reset flags. */
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+
+    for (int inet_affected = 0; inet_affected < num_nets_affected;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+
+        place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
+        if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
+            place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
 
+        net_cost[net_id] = proposed_net_cost[net_id];
 
+        /* negative proposed_net_cost value is acting as a flag. */
+        proposed_net_cost[net_id] = -1;
+        bb_updated_before[net_id] = NOT_UPDATED_YET;
+    }
+}
+
+void reset_move_nets(int num_nets_affected) {
+    /* Reset the net cost function flags first. */
+    for (int inet_affected = 0; inet_affected < num_nets_affected;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+        proposed_net_cost[net_id] = -1;
+        bb_updated_before[net_id] = NOT_UPDATED_YET;
+    }
+}
+
+void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
+                                  const t_noc_opts& noc_opts,
+                                  const PlaceDelayModel* delay_model,
+                                  const PlacerCriticalities* criticalities,
+                                  t_placer_costs* costs) {
+    double new_bb_cost = recompute_bb_cost();
+    if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
+        std::string msg = vtr::string_fmt(
+            "in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
+            new_bb_cost, costs->bb_cost);
+        VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+    }
+    costs->bb_cost = new_bb_cost;
+
+    if (placer_opts.place_algorithm.is_timing_driven()) {
+        double new_timing_cost = 0.;
+        comp_td_costs(delay_model, *criticalities, &new_timing_cost);
+        if (fabs(
+                new_timing_cost
+                - costs->timing_cost)
+            > costs->timing_cost * ERROR_TOL) {
+            std::string msg = vtr::string_fmt(
+                "in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n",
+                new_timing_cost, costs->timing_cost, ERROR_TOL);
+            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+        }
+        costs->timing_cost = new_timing_cost;
+    } else {
+        VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
+
+        costs->cost = new_bb_cost * costs->bb_cost_norm;
+    }
+
+    if (noc_opts.noc) {
+        double new_noc_aggregate_bandwidth_cost = 0.;
+        double new_noc_latency_cost = 0.;
+        recompute_noc_costs(new_noc_aggregate_bandwidth_cost, new_noc_latency_cost);
+
+        if (fabs(
+                new_noc_aggregate_bandwidth_cost
+                - costs->noc_aggregate_bandwidth_cost)
+            > costs->noc_aggregate_bandwidth_cost * ERROR_TOL) {
+            std::string msg = vtr::string_fmt(
+                "in recompute_costs_from_scratch: new_noc_aggregate_bandwidth_cost = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
+                new_noc_aggregate_bandwidth_cost, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
+            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+        }
+        costs->noc_aggregate_bandwidth_cost = new_noc_aggregate_bandwidth_cost;
+
+        // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
+        // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
+        if (new_noc_latency_cost > MIN_EXPECTED_NOC_LATENCY_COST) {
+            if (fabs(
+                    new_noc_latency_cost
+                    - costs->noc_latency_cost)
+                > costs->noc_latency_cost * ERROR_TOL) {
+                std::string msg = vtr::string_fmt(
+                    "in recompute_costs_from_scratch: new_noc_latency_cost = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
+                    new_noc_latency_cost, costs->noc_latency_cost, ERROR_TOL);
+                VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+            }
+        }
+        costs->noc_latency_cost = new_noc_latency_cost;
+    }
+}
 
 void init_net_cost_structs(size_t num_nets) {
     net_cost.resize(num_nets, -1.);
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 28398908b5b..eb83b711d76 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -25,5 +25,16 @@ int find_affected_nets_and_update_costs(
     double& bb_delta_c,
     double& timing_delta_c);
 
+double comp_bb_cost(e_cost_methods method);
+
+void update_move_nets(int num_nets_affected);
+
+void reset_move_nets(int num_nets_affected);
+
+void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
+                                  const t_noc_opts& noc_opts,
+                                  const PlaceDelayModel* delay_model,
+                                  const PlacerCriticalities* criticalities,
+                                  t_placer_costs* costs);
 
 void init_net_cost_structs(size_t num_nets);

From c0af5fe3224032e7df0ce468869e9bde6f16e050 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 11:08:45 -0400
Subject: [PATCH 110/188] alloc_and_load_for_fast_cost_update &
 free_fast_cost_update & wirelength_crossing_count & get_net_bounding_box_cost
 from place_util

---
 vpr/src/place/place_util.cpp | 169 +----------------------------------
 vpr/src/place/place_util.h   |   6 --
 2 files changed, 1 insertion(+), 174 deletions(-)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 0713757a2b2..7cc7a1ee68d 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -9,30 +9,6 @@
 #include "draw_global.h"
 #include "place_constraints.h"
 
-/* Expected crossing counts for nets with different #'s of pins.  From *
- * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
- * Multiplied to bounding box of a net to better estimate wire length  *
- * for higher fanout nets. Each entry is the correction factor for the *
- * fanout index-1                                                      */
-static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
-                                      1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
-                                      1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
-                                      2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
-                                      2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356,
-                                      2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
-                                      2.7933};
-
-/* The arrays below are used to precompute the inverse of the average   *
- * number of tracks per channel between [subhigh] and [sublow].  Access *
- * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
- * speed up the computation of the cost function that takes the length  *
- * of the net bounding box in each dimension, divided by the average    *
- * number of tracks in that direction; for other cost functions they    *
- * will never be used.                                                  *
- */
-static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); //[0...device_ctx.grid.width()-2]
-static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.grid.height()-2]
-
 /* File-scope routines */
 static GridBlock init_grid_blocks();
 
@@ -579,147 +555,4 @@ t_pl_atom_loc get_atom_loc (AtomBlockId atom) {
     int primitive_id = atom_lookup.atom_pb_graph_node(atom)->primitive_num;
 
     return {primitive_id, cluster_loc.x, cluster_loc.y, cluster_loc.sub_tile, cluster_loc.layer};
-}
-
-void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
-    /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
-     * arrays with the inverse of the average number of tracks per channel   *
-     * between [subhigh] and [sublow].  This is only useful for the cost     *
-     * function that takes the length of the net bounding box in each        *
-     * dimension divided by the average number of tracks in that direction.  *
-     * For other cost functions, you don't have to bother calling this       *
-     * routine; when using the cost function described above, however, you   *
-     * must always call this routine after you call init_chan and before     *
-     * you do any placement cost determination.  The place_cost_exp factor   *
-     * specifies to what power the width of the channel should be taken --   *
-     * larger numbers make narrower channels more expensive.                 */
-
-    auto& device_ctx = g_vpr_ctx.device();
-
-    /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
-     * subhigh must be greater than or equal to sublow, we only need to       *
-     * allocate storage for the lower half of a matrix.                       */
-
-    //chanx_place_cost_fac = new float*[(device_ctx.grid.height())];
-    //for (size_t i = 0; i < device_ctx.grid.height(); i++)
-    //    chanx_place_cost_fac[i] = new float[(i + 1)];
-
-    //chany_place_cost_fac = new float*[(device_ctx.grid.width() + 1)];
-    //for (size_t i = 0; i < device_ctx.grid.width(); i++)
-    //    chany_place_cost_fac[i] = new float[(i + 1)];
-
-    chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
-    chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
-
-    /* First compute the number of tracks between channel high and channel *
-     * low, inclusive, in an efficient manner.                             */
-
-    chanx_place_cost_fac[0][0] = device_ctx.chan_width.x_list[0];
-
-    for (size_t high = 1; high < device_ctx.grid.height(); high++) {
-        chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high];
-        for (size_t low = 0; low < high; low++) {
-            chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low]
-                                              + device_ctx.chan_width.x_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low.  The cost function divides by the average     *
-     * number of tracks per channel, so by storing the inverse I convert   *
-     * this to a faster multiplication.  Take this final number to the     *
-     * place_cost_exp power -- numbers other than one mean this is no      *
-     * longer a simple "average number of tracks"; it is some power of     *
-     * that, allowing greater penalization of narrow channels.             */
-
-    for (size_t high = 0; high < device_ctx.grid.height(); high++)
-        for (size_t low = 0; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chanx_place_cost_fac[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
-                chanx_place_cost_fac[high][low] = 1.0f;
-            }
-
-            chanx_place_cost_fac[high][low] = (high - low + 1.)
-                                              / chanx_place_cost_fac[high][low];
-            chanx_place_cost_fac[high][low] = pow(
-                (double)chanx_place_cost_fac[high][low],
-                (double)place_cost_exp);
-        }
-
-    /* Now do the same thing for the y-directed channels.  First get the  *
-     * number of tracks between channel high and channel low, inclusive.  */
-
-    chany_place_cost_fac[0][0] = device_ctx.chan_width.y_list[0];
-
-    for (size_t high = 1; high < device_ctx.grid.width(); high++) {
-        chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high];
-        for (size_t low = 0; low < high; low++) {
-            chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low]
-                                              + device_ctx.chan_width.y_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low.  Take to specified power.                     */
-
-    for (size_t high = 0; high < device_ctx.grid.width(); high++)
-        for (size_t low = 0; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chany_place_cost_fac[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
-                chany_place_cost_fac[high][low] = 1.0f;
-            }
-
-            chany_place_cost_fac[high][low] = (high - low + 1.)
-                                              / chany_place_cost_fac[high][low];
-            chany_place_cost_fac[high][low] = pow(
-                (double)chany_place_cost_fac[high][low],
-                (double)place_cost_exp);
-        }
-}
-
-double wirelength_crossing_count(size_t fanout) {
-    /* Get the expected "crossing count" of a net, based on its number *
-     * of pins.  Extrapolate for very large nets.                      */
-
-    if (fanout > 50) {
-        return 2.7933 + 0.02616 * (fanout - 50);
-    } else {
-        return cross_count[fanout - 1];
-    }
-}
-
-double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr) {
-    /* Finds the cost due to one net by looking at its coordinate bounding  *
-     * box.                                                                 */
-
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
-
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
-
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
-
-    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
-            * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
-
-    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
-             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
-
-    return (ncost);
-}
+}
\ No newline at end of file
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index c3e7ad9daa5..08ba65f08f5 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -268,10 +268,4 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 // Array in place_ctx.
 t_pl_atom_loc get_atom_loc (AtomBlockId atom);
 
-void alloc_and_load_for_fast_cost_update(float place_cost_exp);
-
-double wirelength_crossing_count(size_t fanout);
-
-double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bb_ptr);
-
 #endif

From 8975b8de0cb02994d483d5500bc74cfb42cfc269 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 11:13:46 -0400
Subject: [PATCH 111/188] move functions related to chanx/y_place_cost_fac to
 net_cost handler - add initializer/free functions to net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 195 +++++++++++++++++++++++++++++
 vpr/src/place/net_cost_handler.h   |  10 ++
 2 files changed, 205 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 22d86b14d9d..8dcd567b8c3 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -14,6 +14,30 @@ using std::min;
 #define UPDATED_ONCE 'U'
 #define GOT_FROM_SCRATCH 'S'
 
+/* Expected crossing counts for nets with different #'s of pins.  From *
+ * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
+ * Multiplied to bounding box of a net to better estimate wire length  *
+ * for higher fanout nets. Each entry is the correction factor for the *
+ * fanout index-1                                                      */
+static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
+                                      1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
+                                      1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
+                                      2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
+                                      2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356,
+                                      2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
+                                      2.7933};
+
+/* The arrays below are used to precompute the inverse of the average   *
+ * number of tracks per channel between [subhigh] and [sublow].  Access *
+ * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
+ * speed up the computation of the cost function that takes the length  *
+ * of the net bounding box in each dimension, divided by the average    *
+ * number of tracks in that direction; for other cost functions they    *
+ * will never be used.                                                  *
+ */
+static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); //[0...device_ctx.grid.width()-2]
+static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.grid.height()-2]
+
 /* Cost of a net, and a temporary cost of a net used during move assessment. */
 static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
 
@@ -79,6 +103,10 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr);
 
 static double recompute_bb_cost();
 
+static double wirelength_crossing_count(size_t fanout);
+
+static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr);
+
 
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
@@ -685,6 +713,44 @@ static double recompute_bb_cost() {
     return (cost);
 }
 
+static double wirelength_crossing_count(size_t fanout) {
+    /* Get the expected "crossing count" of a net, based on its number *
+     * of pins.  Extrapolate for very large nets.                      */
+
+    if (fanout > 50) {
+        return 2.7933 + 0.02616 * (fanout - 50);
+    } else {
+        return cross_count[fanout - 1];
+    }
+}
+
+
+static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr) {
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
+            * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
+
+    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
+             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
+
+    return (ncost);
+}
+
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -968,6 +1034,117 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
     }
 }
 
+void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
+    /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
+     * arrays with the inverse of the average number of tracks per channel   *
+     * between [subhigh] and [sublow].  This is only useful for the cost     *
+     * function that takes the length of the net bounding box in each        *
+     * dimension divided by the average number of tracks in that direction.  *
+     * For other cost functions, you don't have to bother calling this       *
+     * routine; when using the cost function described above, however, you   *
+     * must always call this routine after you call init_chan and before     *
+     * you do any placement cost determination.  The place_cost_exp factor   *
+     * specifies to what power the width of the channel should be taken --   *
+     * larger numbers make narrower channels more expensive.                 */
+
+    auto& device_ctx = g_vpr_ctx.device();
+
+    /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
+     * subhigh must be greater than or equal to sublow, we only need to       *
+     * allocate storage for the lower half of a matrix.                       */
+
+    //chanx_place_cost_fac = new float*[(device_ctx.grid.height())];
+    //for (size_t i = 0; i < device_ctx.grid.height(); i++)
+    //    chanx_place_cost_fac[i] = new float[(i + 1)];
+
+    //chany_place_cost_fac = new float*[(device_ctx.grid.width() + 1)];
+    //for (size_t i = 0; i < device_ctx.grid.width(); i++)
+    //    chany_place_cost_fac[i] = new float[(i + 1)];
+
+    chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
+    chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
+
+    /* First compute the number of tracks between channel high and channel *
+     * low, inclusive, in an efficient manner.                             */
+
+    chanx_place_cost_fac[0][0] = device_ctx.chan_width.x_list[0];
+
+    for (size_t high = 1; high < device_ctx.grid.height(); high++) {
+        chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high];
+        for (size_t low = 0; low < high; low++) {
+            chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low]
+                                              + device_ctx.chan_width.x_list[high];
+        }
+    }
+
+    /* Now compute the inverse of the average number of tracks per channel *
+     * between high and low.  The cost function divides by the average     *
+     * number of tracks per channel, so by storing the inverse I convert   *
+     * this to a faster multiplication.  Take this final number to the     *
+     * place_cost_exp power -- numbers other than one mean this is no      *
+     * longer a simple "average number of tracks"; it is some power of     *
+     * that, allowing greater penalization of narrow channels.             */
+
+    for (size_t high = 0; high < device_ctx.grid.height(); high++)
+        for (size_t low = 0; low <= high; low++) {
+            /* Since we will divide the wiring cost by the average channel *
+             * capacity between high and low, having only 0 width channels *
+             * will result in infinite wiring capacity normalization       *
+             * factor, and extremely bad placer behaviour. Hence we change *
+             * this to a small (1 track) channel capacity instead.         */
+            if (chanx_place_cost_fac[high][low] == 0.0f) {
+                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
+                chanx_place_cost_fac[high][low] = 1.0f;
+            }
+
+            chanx_place_cost_fac[high][low] = (high - low + 1.)
+                                              / chanx_place_cost_fac[high][low];
+            chanx_place_cost_fac[high][low] = pow(
+                (double)chanx_place_cost_fac[high][low],
+                (double)place_cost_exp);
+        }
+
+    /* Now do the same thing for the y-directed channels.  First get the  *
+     * number of tracks between channel high and channel low, inclusive.  */
+
+    chany_place_cost_fac[0][0] = device_ctx.chan_width.y_list[0];
+
+    for (size_t high = 1; high < device_ctx.grid.width(); high++) {
+        chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high];
+        for (size_t low = 0; low < high; low++) {
+            chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low]
+                                              + device_ctx.chan_width.y_list[high];
+        }
+    }
+
+    /* Now compute the inverse of the average number of tracks per channel *
+     * between high and low.  Take to specified power.                     */
+
+    for (size_t high = 0; high < device_ctx.grid.width(); high++)
+        for (size_t low = 0; low <= high; low++) {
+            /* Since we will divide the wiring cost by the average channel *
+             * capacity between high and low, having only 0 width channels *
+             * will result in infinite wiring capacity normalization       *
+             * factor, and extremely bad placer behaviour. Hence we change *
+             * this to a small (1 track) channel capacity instead.         */
+            if (chany_place_cost_fac[high][low] == 0.0f) {
+                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
+                chany_place_cost_fac[high][low] = 1.0f;
+            }
+
+            chany_place_cost_fac[high][low] = (high - low + 1.)
+                                              / chany_place_cost_fac[high][low];
+            chany_place_cost_fac[high][low] = pow(
+                (double)chany_place_cost_fac[high][low],
+                (double)place_cost_exp);
+        }
+}
+
+void free_fast_cost_update() {
+    chanx_place_cost_fac.clear();
+    chany_place_cost_fac.clear();
+}
+
 void init_net_cost_structs(size_t num_nets) {
     net_cost.resize(num_nets, -1.);
     proposed_net_cost.resize(num_nets, -1.);
@@ -977,3 +1154,21 @@ void init_net_cost_structs(size_t num_nets) {
     bb_updated_before.resize(num_nets, NOT_UPDATED_YET);
 
 }
+
+void free_net_cost_structs() {
+    vtr::release_memory(net_cost);
+    vtr::release_memory(proposed_net_cost);
+    vtr::release_memory(bb_updated_before);
+}
+
+void init_try_swap_net_cost_structs(size_t num_nets) {
+    ts_bb_coord_new.resize(num_nets, t_bb());
+    ts_bb_edge_new.resize(num_nets, t_bb());
+    ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
+}
+
+void free_try_swap_net_cost_structs() {
+    vtr::release_memory(ts_bb_coord_new);
+    vtr::release_memory(ts_bb_edge_new);
+    vtr::release_memory(ts_nets_to_update);
+}
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index eb83b711d76..67ed61ce700 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -37,4 +37,14 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                   const PlacerCriticalities* criticalities,
                                   t_placer_costs* costs);
 
+void alloc_and_load_for_fast_cost_update(float place_cost_exp);
+
+void free_fast_cost_update();
+
 void init_net_cost_structs(size_t num_nets);
+
+void free_net_cost_structs();
+
+void init_try_swap_net_cost_structs(size_t num_nets);
+
+void free_try_swap_net_cost_structs();

From 598d1364fbe7771bfd1fe4e00cf726a9b9a87d34 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 11:15:04 -0400
Subject: [PATCH 112/188] remove all the functions moved to net_cost_handler
 from place.cpp

---
 vpr/src/place/place.cpp | 1195 +--------------------------------------
 1 file changed, 8 insertions(+), 1187 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 58130684935..4dea6697d80 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -63,7 +63,6 @@
 
 #include "clustered_netlist_utils.h"
 
-#include "re_cluster.h"
 #include "re_cluster_util.h"
 #include "cluster_placement.h"
 
@@ -71,6 +70,8 @@
 
 #include "place_re_cluster.h"
 
+#include "net_cost_handler.h"
+
 /*  define the RL agent's reward function factor constant. This factor controls the weight of bb cost *
  *  compared to the timing cost in the agent's reward function. The reward is calculated as           *
  * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost)
@@ -89,70 +90,22 @@ using std::min;
 
 /************** Types and defines local to place.c ***************************/
 
-/* This defines the error tolerance for floating points variables used in *
- * cost computation. 0.01 means that there is a 1% error tolerance.       */
-#define ERROR_TOL .01
-
 /* This defines the maximum number of swap attempts before invoking the   *
  * once-in-a-while placement legality check as well as floating point     *
  * variables round-offs check.                                            */
 #define MAX_MOVES_BEFORE_RECOMPUTE 500000
 
-/* Flags for the states of the bounding box.                              *
- * Stored as char for memory efficiency.                                  */
-#define NOT_UPDATED_YET 'N'
-#define UPDATED_ONCE 'U'
-#define GOT_FROM_SCRATCH 'S'
-
 /* For comp_cost.  NORMAL means use the method that generates updateable  *
  * bounding boxes for speed.  CHECK means compute all bounding boxes from *
  * scratch using a very simple routine to allow checks of the other       *
  * costs.                                   
  */
 
-enum e_cost_methods {
-    NORMAL,
-    CHECK
-};
-
 constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
 constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
 /********************** Variables local to place.c ***************************/
 
-/* Cost of a net, and a temporary cost of a net used during move assessment. */
-static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
-
-/* [0...cluster_ctx.clb_nlist.nets().size()-1]                                               *
- * A flag array to indicate whether the specific bounding box has been updated   *
- * in this particular swap or not. If it has been updated before, the code       *
- * must use the updated data, instead of the out-of-date data passed into the    *
- * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET        *
- * indicates that the net has not been updated before, UPDATED_ONCE indicated    *
- * that the net has been updated once, if it is going to be updated again, the   *
- * values from the previous update must be used. GOT_FROM_SCRATCH is only        *
- * applicable for nets larger than SMALL_NETS and it indicates that the          *
- * particular bounding box cannot be updated incrementally before, hence the     *
- * bounding box is got from scratch, so the bounding box would definitely be     *
- * right, DO NOT update again.                                                   */
-static vtr::vector<ClusterNetId, char> bb_updated_before;
-
-/* The arrays below are used to precompute the inverse of the average   *
- * number of tracks per channel between [subhigh] and [sublow].  Access *
- * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
- * speed up the computation of the cost function that takes the length  *
- * of the net bounding box in each dimension, divided by the average    *
- * number of tracks in that direction; for other cost functions they    *
- * will never be used.                                                  *
- */
-static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); //[0...device_ctx.grid.width()-2]
-static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.grid.height()-2]
-
-/* The following arrays are used by the try_swap function for speed.   */
-/* [0...cluster_ctx.clb_nlist.nets().size()-1] */
-static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
-static std::vector<ClusterNetId> ts_nets_to_update;
-
 /* These file-scoped variables keep track of the number of swaps       *
  * rejected, accepted or aborted. The total number of swap attempts    *
  * is the sum of the three number.                                     */
@@ -161,19 +114,6 @@ static int num_swap_accepted = 0;
 static int num_swap_aborted = 0;
 static int num_ts_called = 0;
 
-/* Expected crossing counts for nets with different #'s of pins.  From *
- * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
- * Multiplied to bounding box of a net to better estimate wire length  *
- * for higher fanout nets. Each entry is the correction factor for the *
- * fanout index-1                                                      */
-static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
-                                      1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
-                                      1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
-                                      2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
-                                      2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356,
-                                      2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
-                                      2.7933};
-
 std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,
                                                                 vtr::fclose);
 
@@ -265,15 +205,6 @@ static void free_try_swap_structs();
 
 static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
-static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
-
-static void free_fast_cost_update();
-
-static double comp_bb_cost(e_cost_methods method);
-
-static void update_move_nets(int num_nets_affected);
-static void reset_move_nets(int num_nets_affected);
-
 static e_move_result try_swap(const t_annealing_state* state,
                               t_placer_costs* costs,
                               MoveGenerator& move_generator,
@@ -310,8 +241,6 @@ static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t
 
 static int count_connections();
 
-static double recompute_bb_cost();
-
 static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
 
 static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
@@ -321,72 +250,14 @@ static void invalidate_affected_connections(
     NetPinTimingInvalidator* pin_tedges_invalidator,
     TimingInfo* timing_info);
 
-static bool driven_by_moved_block(const AtomNetId net,
-                                  const std::vector<t_pl_moved_atom_block>& moved_blocks);
-
-static bool driven_by_moved_block(const ClusterNetId net,
-                                  const std::vector<t_pl_moved_block>& moved_blocks);
-
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 
 static e_move_result assess_swap(double delta_c, double t);
 
-static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
-
-static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew);
-
-static int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_atom_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c);
-
-static int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c);
-
-static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
-                                        const PlaceDelayModel* delay_model,
-                                        const PlacerCriticalities* criticalities,
-                                        const ClusterBlockId& blk_id,
-                                        const ClusterPinId& pin_id,
-                                        const t_pl_moved_block& moving_blk_inf,
-                                        std::vector<ClusterPinId>& affected_pins,
-                                        double& timing_delta_c,
-                                        int& num_affected_nets,
-                                        bool is_src_moving);
-
-static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
-
-static void update_net_bb(const ClusterNetId& net,
-                          const ClusterBlockId& blk,
-                          const ClusterPinId& blk_pin,
-                          const t_pl_moved_block& pl_moved_block);
-
-static void update_td_delta_costs(const PlaceDelayModel* delay_model,
-                                  const PlacerCriticalities& criticalities,
-                                  const ClusterNetId net,
-                                  const ClusterPinId pin,
-                                  std::vector<ClusterPinId>& affected_pins,
-                                  double& delta_timing_cost,
-                                  bool is_src_moving);
-
 static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
 static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
-static double get_net_cost(ClusterNetId net_id, t_bb* bb_ptr);
-
-static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges);
-
-static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr);
-
 static void free_try_swap_arrays();
 
 static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
@@ -420,12 +291,6 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  MoveTypeStat& move_type_stat,
                                  float timing_bb_factor);
 
-static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
-                                         const t_noc_opts& noc_opts,
-                                         const PlaceDelayModel* delay_model,
-                                         const PlacerCriticalities* criticalities,
-                                         t_placer_costs* costs);
-
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const t_analysis_opts& analysis_opts,
                                                const SetupTimingInfo& timing_info,
@@ -1240,72 +1105,6 @@ static void placement_inner_loop(const t_annealing_state* state,
     stats->calc_iteration_stats(*costs, state->move_lim);
 }
 
-static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
-                                         const t_noc_opts& noc_opts,
-                                         const PlaceDelayModel* delay_model,
-                                         const PlacerCriticalities* criticalities,
-                                         t_placer_costs* costs) {
-    double new_bb_cost = recompute_bb_cost();
-    if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
-        std::string msg = vtr::string_fmt(
-            "in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
-            new_bb_cost, costs->bb_cost);
-        VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-    }
-    costs->bb_cost = new_bb_cost;
-
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        double new_timing_cost = 0.;
-        comp_td_costs(delay_model, *criticalities, &new_timing_cost);
-        if (fabs(
-                new_timing_cost
-                - costs->timing_cost)
-            > costs->timing_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n",
-                new_timing_cost, costs->timing_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
-        costs->timing_cost = new_timing_cost;
-    } else {
-        VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
-
-        costs->cost = new_bb_cost * costs->bb_cost_norm;
-    }
-
-    if (noc_opts.noc) {
-        double new_noc_aggregate_bandwidth_cost = 0.;
-        double new_noc_latency_cost = 0.;
-        recompute_noc_costs(new_noc_aggregate_bandwidth_cost, new_noc_latency_cost);
-
-        if (fabs(
-                new_noc_aggregate_bandwidth_cost
-                - costs->noc_aggregate_bandwidth_cost)
-            > costs->noc_aggregate_bandwidth_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_noc_aggregate_bandwidth_cost = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
-                new_noc_aggregate_bandwidth_cost, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
-        costs->noc_aggregate_bandwidth_cost = new_noc_aggregate_bandwidth_cost;
-
-        // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
-        // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
-        if (new_noc_latency_cost > MIN_EXPECTED_NOC_LATENCY_COST) {
-            if (fabs(
-                    new_noc_latency_cost
-                    - costs->noc_latency_cost)
-                > costs->noc_latency_cost * ERROR_TOL) {
-                std::string msg = vtr::string_fmt(
-                    "in recompute_costs_from_scratch: new_noc_latency_cost = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
-                    new_noc_latency_cost, costs->noc_latency_cost, ERROR_TOL);
-                VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-            }
-        }
-        costs->noc_latency_cost = new_noc_latency_cost;
-    }
-}
-
 /*only count non-global connections */
 static int count_connections() {
     int count = 0;
@@ -1400,37 +1199,6 @@ static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t
     return init_temp;
 }
 
-static void update_move_nets(int num_nets_affected) {
-    /* update net cost functions and reset flags. */
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = g_placer_ctx.mutable_move();
-
-    for (int inet_affected = 0; inet_affected < num_nets_affected;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-
-        place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
-        if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
-            place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
-
-        net_cost[net_id] = proposed_net_cost[net_id];
-
-        /* negative proposed_net_cost value is acting as a flag. */
-        proposed_net_cost[net_id] = -1;
-        bb_updated_before[net_id] = NOT_UPDATED_YET;
-    }
-}
-
-static void reset_move_nets(int num_nets_affected) {
-    /* Reset the net cost function flags first. */
-    for (int inet_affected = 0; inet_affected < num_nets_affected;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-        proposed_net_cost[net_id] = -1;
-        bb_updated_before[net_id] = NOT_UPDATED_YET;
-    }
-}
-
 /**
  * @brief Pick some block and moves it to another spot.
  *
@@ -1761,333 +1529,6 @@ static e_move_result try_swap(const t_annealing_state* state,
     return move_outcome;
 }
 
-static int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_atom_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c) {
-
-    const auto& atom_look_up = g_vpr_ctx.atom().lookup;
-    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
-    const auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
-
-    VTR_ASSERT_SAFE(bb_delta_c == 0.);
-    VTR_ASSERT_SAFE(timing_delta_c == 0.);
-
-    int num_affected_nets = 0;
-
-    std::vector<ClusterPinId> affected_pins;
-
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        AtomBlockId atom_blk_id = blocks_affected.moved_blocks[iblk].block_num;
-        ClusterBlockId cluster_blk_id = atom_look_up.atom_clb(atom_blk_id);
-        const auto& atom_old_loc = blocks_affected.moved_blocks[iblk].old_loc;
-        const auto& atom_new_loc = blocks_affected.moved_blocks[iblk].new_loc;
-
-        for (const AtomPinId& atom_pin: atom_nlist.block_pins(atom_blk_id)) {
-            auto cluster_pins = cluster_pins_connected_to_atom_pin(atom_pin);
-            for (const auto& cluster_pin : cluster_pins) {
-                bool is_src_moving = false;
-                if (atom_nlist.pin_type(atom_pin) == PinType::SINK) {
-                    AtomNetId net_id = atom_nlist.pin_net(atom_pin);
-                    is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
-                }
-                t_pl_moved_block move_cluster_inf;
-                move_cluster_inf.block_num = cluster_blk_id;
-                move_cluster_inf.old_loc = t_pl_loc(atom_old_loc.x, atom_old_loc.y, atom_old_loc.sub_tile, atom_old_loc.layer);
-                move_cluster_inf.new_loc = t_pl_loc(atom_new_loc.x, atom_new_loc.y, atom_new_loc.sub_tile, atom_new_loc.layer);
-                update_net_info_on_pin_move(place_algorithm,
-                                            delay_model,
-                                            criticalities,
-                                            cluster_blk_id,
-                                            cluster_pin,
-                                            move_cluster_inf,
-                                            affected_pins,
-                                            timing_delta_c,
-                                            num_affected_nets,
-                                            is_src_moving);
-
-
-            }
-        }
-    }
-
-    /* Now update the bounding box costs (since the net bounding     *
-     * boxes are up-to-date). The cost is only updated once per net. */
-    for (int inet_affected = 0; inet_affected < num_affected_nets;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-
-        proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                 &ts_bb_coord_new[net_id]);
-        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
-    }
-
-    return num_affected_nets;
-}
-
-/**
- * @brief Find all the nets and pins affected by this swap and update costs.
- *
- * Find all the nets affected by this swap and update the bounding box (wiring)
- * costs. This cost function doesn't depend on the timing info.
- *
- * Find all the connections affected by this swap and update the timing cost.
- * For a connection to be affected, it not only needs to be on or driven by
- * a block, but it also needs to have its delay changed. Otherwise, it will
- * not be added to the affected_pins structure.
- *
- * For more, see update_td_delta_costs().
- *
- * The timing costs are calculated by getting the new connection delays,
- * multiplied by the connection criticalities returned by the timing
- * analyzer. These timing costs are stored in the proposed_* data structures.
- *
- * The change in the bounding box cost is stored in `bb_delta_c`.
- * The change in the timing cost is stored in `timing_delta_c`.
- *
- * @return The number of affected nets.
- */
-static int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c) {
-    VTR_ASSERT_SAFE(bb_delta_c == 0.);
-    VTR_ASSERT_SAFE(timing_delta_c == 0.);
-    auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
-
-    int num_affected_nets = 0;
-
-    /* Go through all the blocks moved. */
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        const auto& moving_block_inf = blocks_affected.moved_blocks[iblk];
-        auto& affected_pins = blocks_affected.affected_pins;
-        ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
-
-        /* Go through all the pins in the moved block. */
-        for (ClusterPinId blk_pin : clb_nlsit.block_pins(blk)) {
-            bool is_src_moving = false;
-            if (clb_nlsit.pin_type(blk_pin) == PinType::SINK) {
-                ClusterNetId net_id = clb_nlsit.pin_net(blk_pin);
-                is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
-            }
-            update_net_info_on_pin_move(place_algorithm,
-                                        delay_model,
-                                        criticalities,
-                                        blk,
-                                        blk_pin,
-                                        moving_block_inf,
-                                        affected_pins,
-                                        timing_delta_c,
-                                        num_affected_nets,
-                                        is_src_moving);
-        }
-    }
-
-    /* Now update the bounding box costs (since the net bounding     *
-     * boxes are up-to-date). The cost is only updated once per net. */
-    for (int inet_affected = 0; inet_affected < num_affected_nets;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-
-        proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                 &ts_bb_coord_new[net_id]);
-        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
-    }
-
-    return num_affected_nets;
-}
-
-static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
-                                        const PlaceDelayModel* delay_model,
-                                        const PlacerCriticalities* criticalities,
-                                        const ClusterBlockId& blk_id,
-                                        const ClusterPinId& pin_id,
-                                        const t_pl_moved_block& moving_blk_inf,
-                                        std::vector<ClusterPinId>& affected_pins,
-                                        double& timing_delta_c,
-                                        int& num_affected_nets,
-                                        bool is_src_moving) {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
-    VTR_ASSERT_SAFE_MSG(net_id,
-                        "Only valid nets should be found in compressed netlist block pins");
-
-    if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-        //TODO: Do we require anyting special here for global nets?
-        //"Global nets are assumed to span the whole chip, and do not effect costs."
-        return;
-    }
-
-    /* Record effected nets */
-    record_affected_net(net_id, num_affected_nets);
-
-    /* Update the net bounding boxes. */
-    update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
-
-    if (place_algorithm.is_timing_driven()) {
-        /* Determine the change in connection delay and timing cost. */
-        update_td_delta_costs(delay_model,
-                              *criticalities,
-                              net_id,
-                              pin_id,
-                              affected_pins,
-                              timing_delta_c,
-                              is_src_moving);
-    }
-}
-
-///@brief Record effected nets.
-static void record_affected_net(const ClusterNetId net,
-                                int& num_affected_nets) {
-    /* Record effected nets. */
-    if (proposed_net_cost[net] < 0.) {
-        /* Net not marked yet. */
-        ts_nets_to_update[num_affected_nets] = net;
-        num_affected_nets++;
-
-        /* Flag to say we've marked this net. */
-        proposed_net_cost[net] = 1.;
-    }
-}
-
-/**
- * @brief Update the net bounding boxes.
- *
- * Do not update the net cost here since it should only
- * be updated once per net, not once per pin.
- */
-static void update_net_bb(const ClusterNetId& net,
-                          const ClusterBlockId& blk,
-                          const ClusterPinId& blk_pin,
-                          const t_pl_moved_block& pl_moved_block) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
-        //For small nets brute-force bounding box update is faster
-
-        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net, &ts_bb_coord_new[net]);
-        }
-    } else {
-        //For large nets, update bounding box incrementally
-        int iblk_pin = tile_pin_index(blk_pin);
-
-        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
-        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
-        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
-
-        //Incremental bounding box update
-        update_bb(net, &ts_bb_coord_new[net], &ts_bb_edge_new[net],
-                  pl_moved_block.old_loc.x + pin_width_offset,
-                  pl_moved_block.old_loc.y
-                      + pin_height_offset,
-                  pl_moved_block.new_loc.x + pin_width_offset,
-                  pl_moved_block.new_loc.y
-                      + pin_height_offset);
-    }
-}
-
-/**
- * @brief Calculate the new connection delay and timing cost of all the
- *        sink pins affected by moving a specific pin to a new location.
- *        Also calculates the total change in the timing cost.
- *
- * Assumes that the blocks have been moved to the proposed new locations.
- * Otherwise, the routine comp_td_single_connection_delay() will not be
- * able to calculate the most up to date connection delay estimation value.
- *
- * If the moved pin is a driver pin, then all the sink connections that are
- * driven by this driver pin are considered.
- *
- * If the moved pin is a sink pin, then it is the only pin considered. But
- * in some cases, the sink is already accounted for if it is also driven
- * by a driver pin located on a moved block. Computing it again would double
- * count its affect on the total timing cost change (delta_timing_cost).
- *
- * It is possible for some connections to have unchanged delays. For instance,
- * if we are using a dx/dy delay model, this could occur if a sink pin moved
- * to a new position with the same dx/dy from its net's driver pin.
- *
- * We skip these connections with unchanged delay values as their delay need
- * not be updated. Their timing costs also do not require any update, since
- * the criticalities values are always kept stale/unchanged during an block
- * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
- *
- * This is also done to minimize the number of timing node/edge invalidations
- * for incremental static timing analysis (incremental STA).
- */
-static void update_td_delta_costs(const PlaceDelayModel* delay_model,
-                                  const PlacerCriticalities& criticalities,
-                                  const ClusterNetId net,
-                                  const ClusterPinId pin,
-                                  std::vector<ClusterPinId>& affected_pins,
-                                  double& delta_timing_cost,
-                                  bool is_src_moving) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    const auto& connection_delay = g_placer_ctx.timing().connection_delay;
-    auto& connection_timing_cost = g_placer_ctx.mutable_timing().connection_timing_cost;
-    auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay;
-    auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost;
-
-    if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
-        /* This pin is a net driver on a moved block. */
-        /* Recompute all point to point connection delays for the net sinks. */
-        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size();
-             ipin++) {
-            float temp_delay = comp_td_single_connection_delay(delay_model, net,
-                                                               ipin);
-            /* If the delay hasn't changed, do not mark this pin as affected */
-            if (temp_delay == connection_delay[net][ipin]) {
-                continue;
-            }
-
-            /* Calculate proposed delay and cost values */
-            proposed_connection_delay[net][ipin] = temp_delay;
-
-            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
-            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
-                                 - connection_timing_cost[net][ipin];
-
-            /* Record this connection in blocks_affected.affected_pins */
-            ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin);
-            affected_pins.push_back(sink_pin);
-        }
-    } else {
-        /* This pin is a net sink on a moved block */
-        VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
-
-        /* Check if this sink's net is driven by a moved block */
-        if (!is_src_moving) {
-            /* Get the sink pin index in the net */
-            int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
-
-            float temp_delay = comp_td_single_connection_delay(delay_model, net,
-                                                               ipin);
-            /* If the delay hasn't changed, do not mark this pin as affected */
-            if (temp_delay == connection_delay[net][ipin]) {
-                return;
-            }
-
-            /* Calculate proposed delay and cost values */
-            proposed_connection_delay[net][ipin] = temp_delay;
-
-            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
-            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
-                                 - connection_timing_cost[net][ipin];
-
-            /* Record this connection in blocks_affected.affected_pins */
-            affected_pins.push_back(pin);
-        }
-    }
-}
-
 /**
  * @brief Updates all the cost normalization factors during the outer
  * loop iteration of the placement. At each temperature change, these
@@ -2221,25 +1662,6 @@ static e_move_result assess_swap(double delta_c, double t) {
     return REJECTED;
 }
 
-static double recompute_bb_cost() {
-    /* Recomputes the cost to eliminate roundoff that may have accrued.  *
-     * This routine does as little work as possible to compute this new  *
-     * cost.                                                             */
-
-    double cost = 0;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
-            /* Bounding boxes don't have to be recomputed; they're correct. */
-            cost += net_cost[net_id];
-        }
-    }
-
-    return (cost);
-}
-
 /**
  * @brief Update the connection_timing_cost values from the temporary
  *        values for all connections that have/haven't changed.
@@ -2316,79 +1738,6 @@ static void invalidate_affected_connections(
     }
 }
 
-static bool driven_by_moved_block(const AtomNetId net,
-                                  const std::vector<t_pl_moved_atom_block>& moved_blocks) {
-    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
-    bool is_driven_by_move_blk;
-    AtomBlockId net_driver_block = atom_nlist.net_driver_block(
-        net);
-
-    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
-        return net_driver_block == move_blk.block_num;
-    });
-
-    return is_driven_by_move_blk;
-}
-
-//Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
-static bool driven_by_moved_block(const ClusterNetId net,
-                                  const std::vector<t_pl_moved_block>& moved_blocks) {
-    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
-    bool is_driven_by_move_blk;
-    ClusterBlockId net_driver_block = clb_nlist.net_driver_block(
-        net);
-
-    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
-        return net_driver_block == move_blk.block_num;
-    });
-
-    return is_driven_by_move_blk;
-}
-
-/* Finds the cost from scratch.  Done only when the placement   *
- * has been radically changed (i.e. after initial placement).   *
- * Otherwise find the cost change incrementally.  If method     *
- * check is NORMAL, we find bounding boxes that are updateable  *
- * for the larger nets.  If method is CHECK, all bounding boxes *
- * are found via the non_updateable_bb routine, to provide a    *
- * cost which can be used to check the correctness of the       *
- * other routine.                                               */
-static double comp_bb_cost(e_cost_methods method) {
-    double cost = 0;
-    double expected_wirelength = 0.0;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = g_placer_ctx.mutable_move();
-
-    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
-            /* Small nets don't use incremental updating on their bounding boxes, *
-             * so they can use a fast bounding box calculator.                    */
-            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
-                && method == NORMAL) {
-                get_bb_from_scratch(net_id, &place_move_ctx.bb_coords[net_id],
-                                    &place_move_ctx.bb_num_on_edges[net_id]);
-            } else {
-                get_non_updateable_bb(net_id,
-                                      &place_move_ctx.bb_coords[net_id]);
-            }
-
-            net_cost[net_id] = get_net_cost(net_id,
-                                            &place_move_ctx.bb_coords[net_id]);
-            cost += net_cost[net_id];
-            if (method == CHECK)
-                expected_wirelength += get_net_wirelength_estimate(net_id,
-                                                                   &place_move_ctx.bb_coords[net_id]);
-        }
-    }
-
-    if (method == CHECK) {
-        VTR_LOG("\n");
-        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
-                expected_wirelength);
-    }
-    return cost;
-}
-
 /* Allocates the major structures needed only by the placer, primarily for *
  * computing costs quickly and such.                                       */
 static void alloc_and_load_placement_structs(float place_cost_exp,
@@ -2449,15 +1798,11 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         }
     }
 
-    net_cost.resize(num_nets, -1.);
-    proposed_net_cost.resize(num_nets, -1.);
+    init_net_cost_structs(num_nets);
+
     place_move_ctx.bb_coords.resize(num_nets, t_bb());
     place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
 
-    /* Used to store costs for moves not yet made and to indicate when a net's   *
-     * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
-     * been recomputed.                                                          */
-    bb_updated_before.resize(num_nets, NOT_UPDATED_YET);
 
     alloc_and_load_for_fast_cost_update(place_cost_exp);
 
@@ -2488,13 +1833,11 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
 
     free_placement_macros_structs();
 
-    vtr::release_memory(net_cost);
-    vtr::release_memory(proposed_net_cost);
+    free_net_cost_structs();
+
     vtr::release_memory(place_move_ctx.bb_coords);
     vtr::release_memory(place_move_ctx.bb_num_on_edges);
 
-    vtr::release_memory(bb_updated_before);
-
     free_fast_cost_update();
 
     free_try_swap_structs();
@@ -2511,541 +1854,19 @@ static void alloc_and_load_try_swap_structs() {
 
     size_t num_nets = cluster_ctx.clb_nlist.nets().size();
 
-    ts_bb_coord_new.resize(num_nets, t_bb());
-    ts_bb_edge_new.resize(num_nets, t_bb());
-    ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
+    init_try_swap_net_cost_structs(num_nets);
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 }
 
 static void free_try_swap_structs() {
-    vtr::release_memory(ts_bb_coord_new);
-    vtr::release_memory(ts_bb_edge_new);
-    vtr::release_memory(ts_nets_to_update);
+    free_try_swap_net_cost_structs();
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     vtr::release_memory(place_ctx.compressed_block_grids);
 }
 
-/* This routine finds the bounding box of each net from scratch (i.e.   *
- * from only the block location information).  It updates both the       *
- * coordinate and number of pins on each edge information.  It           *
- * should only be called when the bounding box information is not valid. */
-static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges) {
-    int pnum, x, y, xmin, xmax, ymin, ymax;
-    int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& grid = device_ctx.grid;
-
-    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    pnum = net_pin_to_tile_pin_index(net_id, 0);
-    VTR_ASSERT(pnum >= 0);
-    x = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-    x = max(min<int>(x, grid.width() - 2), 1);
-    y = max(min<int>(y, grid.height() - 2), 1);
-
-    xmin = x;
-    ymin = y;
-    xmax = x;
-    ymax = y;
-    xmin_edge = 1;
-    ymin_edge = 1;
-    xmax_edge = 1;
-    ymax_edge = 1;
-
-    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
-        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        pnum = tile_pin_index(pin_id);
-        x = place_ctx.block_locs[bnum].loc.x
-            + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y
-            + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
-         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
-         * I always take all channels impinging on the bounding box to be within   *
-         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
-         * the which channels are included within the bounding box, and it         *
-         * simplifies the code a lot.                                              */
-
-        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
-        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
-
-        if (x == xmin) {
-            xmin_edge++;
-        }
-        if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
-            xmax_edge++;
-        } else if (x < xmin) {
-            xmin = x;
-            xmin_edge = 1;
-        } else if (x > xmax) {
-            xmax = x;
-            xmax_edge = 1;
-        }
-
-        if (y == ymin) {
-            ymin_edge++;
-        }
-        if (y == ymax) {
-            ymax_edge++;
-        } else if (y < ymin) {
-            ymin = y;
-            ymin_edge = 1;
-        } else if (y > ymax) {
-            ymax = y;
-            ymax_edge = 1;
-        }
-    }
-
-    /* Copy the coordinates and number on edges information into the proper   *
-     * structures.                                                            */
-    coords->xmin = xmin;
-    coords->xmax = xmax;
-    coords->ymin = ymin;
-    coords->ymax = ymax;
-
-    num_on_edges->xmin = xmin_edge;
-    num_on_edges->xmax = xmax_edge;
-    num_on_edges->ymin = ymin_edge;
-    num_on_edges->ymax = ymax_edge;
-}
-
-static double wirelength_crossing_count(size_t fanout) {
-    /* Get the expected "crossing count" of a net, based on its number *
-     * of pins.  Extrapolate for very large nets.                      */
-
-    if (fanout > 50) {
-        return 2.7933 + 0.02616 * (fanout - 50);
-    } else {
-        return cross_count[fanout - 1];
-    }
-}
-
-static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr) {
-    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
-     * its coordinate bounding box.                                         */
-
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
-
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
-
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
-
-    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
-
-    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
-
-    return (ncost);
-}
-
-static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) {
-    /* Finds the cost due to one net by looking at its coordinate bounding  *
-     * box.                                                                 */
-
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
-
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
-
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
-
-    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
-            * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
-
-    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
-             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
-
-    return (ncost);
-}
-
-/* Finds the bounding box of a net and stores its coordinates in the  *
- * bb_coord_new data structure.  This routine should only be called   *
- * for small nets, since it does not determine enough information for *
- * the bounding box to be updated incrementally later.                *
- * Currently assumes channels on both sides of the CLBs forming the   *
- * edges of the bounding box can be used.  Essentially, I am assuming *
- * the pins always lie on the outside of the bounding box.            */
-static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) {
-    //TODO: account for multiple physical pin instances per logical pin
-
-    int xmax, ymax, xmin, ymin, x, y;
-    int pnum;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& device_ctx = g_vpr_ctx.device();
-
-    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    pnum = net_pin_to_tile_pin_index(net_id, 0);
-
-    x = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-    xmin = x;
-    ymin = y;
-    xmax = x;
-    ymax = y;
-
-    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
-        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        pnum = tile_pin_index(pin_id);
-        x = place_ctx.block_locs[bnum].loc.x
-            + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y
-            + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-        if (x < xmin) {
-            xmin = x;
-        } else if (x > xmax) {
-            xmax = x;
-        }
-
-        if (y < ymin) {
-            ymin = y;
-        } else if (y > ymax) {
-            ymax = y;
-        }
-    }
-
-    /* Now I've found the coordinates of the bounding box.  There are no *
-     * channels beyond device_ctx.grid.width()-2 and                     *
-     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
-     * since I'll always include the channel immediately below and the   *
-     * channel immediately to the left of the bounding box, I want to    *
-     * clip to 1 in both directions as well (since minimum channel index *
-     * is 0).  See route_common.cpp for a channel diagram.               */
-
-    bb_coord_new->xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new->ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    bb_coord_new->xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new->ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-}
-
-static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew) {
-    /* Updates the bounding box of a net by storing its coordinates in    *
-     * the bb_coord_new data structure and the number of blocks on each   *
-     * edge in the bb_edge_new data structure.  This routine should only  *
-     * be called for large nets, since it has some overhead relative to   *
-     * just doing a brute force bounding box calculation.  The bounding   *
-     * box coordinate and edge information for inet must be valid before  *
-     * this routine is called.                                            *
-     * Currently assumes channels on both sides of the CLBs forming the   *
-     * edges of the bounding box can be used.  Essentially, I am assuming *
-     * the pins always lie on the outside of the bounding box.            *
-     * The x and y coordinates are the pin's x and y coordinates.         */
-    /* IO blocks are considered to be one cell in for simplicity.         */
-    //TODO: account for multiple physical pin instances per logical pin
-    const t_bb *curr_bb_edge, *curr_bb_coord;
-
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& place_move_ctx = g_placer_ctx.move();
-
-    xnew = max(min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    ynew = max(min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    xold = max(min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    yold = max(min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-
-    /* Check if the net had been updated before. */
-    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-        /* The net had been updated from scratch, DO NOT update again! */
-        return;
-    } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        /* The net had NOT been updated before, could use the old values */
-        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
-        curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    } else {
-        /* The net had been updated before, must use the new values */
-        curr_bb_coord = bb_coord_new;
-        curr_bb_edge = bb_edge_new;
-    }
-
-    /* Check if I can update the bounding box incrementally. */
-
-    if (xnew < xold) { /* Move to left. */
-
-        /* Update the xmax fields for coordinates and number of edges first. */
-
-        if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */
-            if (curr_bb_edge->xmax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new->xmax = curr_bb_edge->xmax - 1;
-                bb_coord_new->xmax = curr_bb_coord->xmax;
-            }
-        } else { /* Move to left, old postion was not at xmax. */
-            bb_coord_new->xmax = curr_bb_coord->xmax;
-            bb_edge_new->xmax = curr_bb_edge->xmax;
-        }
-
-        /* Now do the xmin fields for coordinates and number of edges. */
-
-        if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
-            bb_coord_new->xmin = xnew;
-            bb_edge_new->xmin = 1;
-        } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
-            bb_coord_new->xmin = xnew;
-            bb_edge_new->xmin = curr_bb_edge->xmin + 1;
-        } else { /* Xmin unchanged. */
-            bb_coord_new->xmin = curr_bb_coord->xmin;
-            bb_edge_new->xmin = curr_bb_edge->xmin;
-        }
-        /* End of move to left case. */
-
-    } else if (xnew > xold) { /* Move to right. */
-
-        /* Update the xmin fields for coordinates and number of edges first. */
-
-        if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */
-            if (curr_bb_edge->xmin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new->xmin = curr_bb_edge->xmin - 1;
-                bb_coord_new->xmin = curr_bb_coord->xmin;
-            }
-        } else { /* Move to right, old position was not at xmin. */
-            bb_coord_new->xmin = curr_bb_coord->xmin;
-            bb_edge_new->xmin = curr_bb_edge->xmin;
-        }
-
-        /* Now do the xmax fields for coordinates and number of edges. */
-
-        if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
-            bb_coord_new->xmax = xnew;
-            bb_edge_new->xmax = 1;
-        } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
-            bb_coord_new->xmax = xnew;
-            bb_edge_new->xmax = curr_bb_edge->xmax + 1;
-        } else { /* Xmax unchanged. */
-            bb_coord_new->xmax = curr_bb_coord->xmax;
-            bb_edge_new->xmax = curr_bb_edge->xmax;
-        }
-        /* End of move to right case. */
-
-    } else { /* xnew == xold -- no x motion. */
-        bb_coord_new->xmin = curr_bb_coord->xmin;
-        bb_coord_new->xmax = curr_bb_coord->xmax;
-        bb_edge_new->xmin = curr_bb_edge->xmin;
-        bb_edge_new->xmax = curr_bb_edge->xmax;
-    }
-
-    /* Now account for the y-direction motion. */
-
-    if (ynew < yold) { /* Move down. */
-
-        /* Update the ymax fields for coordinates and number of edges first. */
-
-        if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */
-            if (curr_bb_edge->ymax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new->ymax = curr_bb_edge->ymax - 1;
-                bb_coord_new->ymax = curr_bb_coord->ymax;
-            }
-        } else { /* Move down, old postion was not at ymax. */
-            bb_coord_new->ymax = curr_bb_coord->ymax;
-            bb_edge_new->ymax = curr_bb_edge->ymax;
-        }
-
-        /* Now do the ymin fields for coordinates and number of edges. */
-
-        if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
-            bb_coord_new->ymin = ynew;
-            bb_edge_new->ymin = 1;
-        } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
-            bb_coord_new->ymin = ynew;
-            bb_edge_new->ymin = curr_bb_edge->ymin + 1;
-        } else { /* ymin unchanged. */
-            bb_coord_new->ymin = curr_bb_coord->ymin;
-            bb_edge_new->ymin = curr_bb_edge->ymin;
-        }
-        /* End of move down case. */
-
-    } else if (ynew > yold) { /* Moved up. */
-
-        /* Update the ymin fields for coordinates and number of edges first. */
-
-        if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */
-            if (curr_bb_edge->ymin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new->ymin = curr_bb_edge->ymin - 1;
-                bb_coord_new->ymin = curr_bb_coord->ymin;
-            }
-        } else { /* Moved up, old position was not at ymin. */
-            bb_coord_new->ymin = curr_bb_coord->ymin;
-            bb_edge_new->ymin = curr_bb_edge->ymin;
-        }
-
-        /* Now do the ymax fields for coordinates and number of edges. */
-
-        if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
-            bb_coord_new->ymax = ynew;
-            bb_edge_new->ymax = 1;
-        } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
-            bb_coord_new->ymax = ynew;
-            bb_edge_new->ymax = curr_bb_edge->ymax + 1;
-        } else { /* ymax unchanged. */
-            bb_coord_new->ymax = curr_bb_coord->ymax;
-            bb_edge_new->ymax = curr_bb_edge->ymax;
-        }
-        /* End of move up case. */
-
-    } else { /* ynew == yold -- no y motion. */
-        bb_coord_new->ymin = curr_bb_coord->ymin;
-        bb_coord_new->ymax = curr_bb_coord->ymax;
-        bb_edge_new->ymin = curr_bb_edge->ymin;
-        bb_edge_new->ymax = curr_bb_edge->ymax;
-    }
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    }
-}
-
-static void free_fast_cost_update() {
-    chanx_place_cost_fac.clear();
-    chany_place_cost_fac.clear();
-}
-
-static void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
-    /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
-     * arrays with the inverse of the average number of tracks per channel   *
-     * between [subhigh] and [sublow].  This is only useful for the cost     *
-     * function that takes the length of the net bounding box in each        *
-     * dimension divided by the average number of tracks in that direction.  *
-     * For other cost functions, you don't have to bother calling this       *
-     * routine; when using the cost function described above, however, you   *
-     * must always call this routine after you call init_chan and before     *
-     * you do any placement cost determination.  The place_cost_exp factor   *
-     * specifies to what power the width of the channel should be taken --   *
-     * larger numbers make narrower channels more expensive.                 */
-
-    auto& device_ctx = g_vpr_ctx.device();
-
-    /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
-     * subhigh must be greater than or equal to sublow, we only need to       *
-     * allocate storage for the lower half of a matrix.                       */
-
-    //chanx_place_cost_fac = new float*[(device_ctx.grid.height())];
-    //for (size_t i = 0; i < device_ctx.grid.height(); i++)
-    //    chanx_place_cost_fac[i] = new float[(i + 1)];
-
-    //chany_place_cost_fac = new float*[(device_ctx.grid.width() + 1)];
-    //for (size_t i = 0; i < device_ctx.grid.width(); i++)
-    //    chany_place_cost_fac[i] = new float[(i + 1)];
-
-    chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
-    chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
-
-    /* First compute the number of tracks between channel high and channel *
-     * low, inclusive, in an efficient manner.                             */
-
-    chanx_place_cost_fac[0][0] = device_ctx.chan_width.x_list[0];
-
-    for (size_t high = 1; high < device_ctx.grid.height(); high++) {
-        chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high];
-        for (size_t low = 0; low < high; low++) {
-            chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low]
-                                              + device_ctx.chan_width.x_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low.  The cost function divides by the average     *
-     * number of tracks per channel, so by storing the inverse I convert   *
-     * this to a faster multiplication.  Take this final number to the     *
-     * place_cost_exp power -- numbers other than one mean this is no      *
-     * longer a simple "average number of tracks"; it is some power of     *
-     * that, allowing greater penalization of narrow channels.             */
-
-    for (size_t high = 0; high < device_ctx.grid.height(); high++)
-        for (size_t low = 0; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chanx_place_cost_fac[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
-                chanx_place_cost_fac[high][low] = 1.0f;
-            }
-
-            chanx_place_cost_fac[high][low] = (high - low + 1.)
-                                              / chanx_place_cost_fac[high][low];
-            chanx_place_cost_fac[high][low] = pow(
-                (double)chanx_place_cost_fac[high][low],
-                (double)place_cost_exp);
-        }
-
-    /* Now do the same thing for the y-directed channels.  First get the  *
-     * number of tracks between channel high and channel low, inclusive.  */
-
-    chany_place_cost_fac[0][0] = device_ctx.chan_width.y_list[0];
-
-    for (size_t high = 1; high < device_ctx.grid.width(); high++) {
-        chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high];
-        for (size_t low = 0; low < high; low++) {
-            chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low]
-                                              + device_ctx.chan_width.y_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low.  Take to specified power.                     */
-
-    for (size_t high = 0; high < device_ctx.grid.width(); high++)
-        for (size_t low = 0; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chany_place_cost_fac[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
-                chany_place_cost_fac[high][low] = 1.0f;
-            }
-
-            chany_place_cost_fac[high][low] = (high - low + 1.)
-                                              / chany_place_cost_fac[high][low];
-            chany_place_cost_fac[high][low] = pow(
-                (double)chany_place_cost_fac[high][low],
-                (double)place_cost_exp);
-        }
-}
-
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,

From 0d1b0396591654d484f1c00c960fef9dc1bf6ce1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 15:43:13 -0400
Subject: [PATCH 113/188] add ERROR_TOL to placer globals

---
 vpr/src/place/placer_globals.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vpr/src/place/placer_globals.h b/vpr/src/place/placer_globals.h
index 5e927d3b59c..195a6fc7890 100644
--- a/vpr/src/place/placer_globals.h
+++ b/vpr/src/place/placer_globals.h
@@ -7,4 +7,8 @@
 #pragma once
 #include "placer_context.h"
 
+/* This defines the error tolerance for floating points variables used in *
+ * cost computation. 0.01 means that there is a 1% error tolerance.       */
+#define ERROR_TOL .01
+
 extern PlacerContext g_placer_ctx;

From b0ed53f226d4cf7eb884e3a526721e6f630312f5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 15:45:28 -0400
Subject: [PATCH 114/188] impl swap atoms in place re cluster

---
 vpr/src/place/place_re_cluster.cpp | 48 +++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
index 6182939b63e..93149c9c727 100644
--- a/vpr/src/place/place_re_cluster.cpp
+++ b/vpr/src/place/place_re_cluster.cpp
@@ -6,14 +6,26 @@
 
 #include "globals.h"
 #include "move_utils.h"
+#include "net_cost_handler.h"
 
 static ClusterBlockId random_cluster();
 
 static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id);
 
-void PlaceReCluster::re_cluster() {
+static bool swap_atoms(const t_place_algorithm& place_algorithm,
+                       const PlaceDelayModel* delay_model,
+                       PlacerCriticalities* criticalities,
+                       t_pl_atom_blocks_to_be_moved& blocks_affected,
+                       AtomBlockId from_atom_blk_id,
+                       AtomBlockId to_atom_blk_id);
+
+void PlaceReCluster::re_cluster(const t_place_algorithm& place_algorithm,
+                                const PlaceDelayModel* delay_model,
+                                PlacerCriticalities* criticalities) {
     const int num_moves = 2 << 20;
 
+    t_pl_atom_blocks_to_be_moved blocks_affected(g_vpr_ctx.atom().nlist.blocks().size());
+
     for (int move_num = 0; move_num < num_moves; ++move_num) {
         ClusterBlockId from_cluster_blk_id;
         AtomBlockId from_atom_blk_id;
@@ -32,8 +44,42 @@ void PlaceReCluster::re_cluster() {
                 break;
             }
         }
+
+        if(!swap_atoms(place_algorithm, delay_model, criticalities, blocks_affected, from_atom_blk_id, to_atom_blk_id)) {
+            revert_move_blocks(blocks_affected);
+        }
     }
 
+}
+
+static bool swap_atoms (const t_place_algorithm& place_algorithm,
+                       const PlaceDelayModel* delay_model,
+                       PlacerCriticalities* criticalities,
+                       t_pl_atom_blocks_to_be_moved& blocks_affected,
+                       AtomBlockId from_atom_blk_id,
+                       AtomBlockId to_atom_blk_id) {
+
+    double delta_c = 0;        //Change in cost due to this swap.
+    double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
+    double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
+
+    const auto& to_atom_loc = get_atom_loc(to_atom_blk_id);
+
+    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
+
+    if (!floorplan_legal(blocks_affected)) {
+        return false;
+    }
+
+    apply_move_blocks(blocks_affected);
+
+    int num_nets_affected = find_affected_nets_and_update_costs(
+        place_algorithm, delay_model, criticalities, blocks_affected,
+        bb_delta_c, timing_delta_c);
+
+
+
+
 }
 
 static ClusterBlockId random_cluster() {

From 9c24775bcf82572c7f0ac215cecb4f5f2f370d52 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 15:57:40 -0400
Subject: [PATCH 115/188] fix parameter names of t_pl_moved_block

---
 vpr/src/place/move_transactions.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 5fd68efe1b5..0ec0972dec3 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -10,8 +10,8 @@
  * new_loc: the location the block is moved to                  */
 struct t_pl_moved_block {
     t_pl_moved_block() = default;
-    t_pl_moved_block(ClusterBlockId block_num, const t_pl_loc& old_loc, const t_pl_loc& new_loc)
-        : block_num(block_num), old_loc(old_loc), new_loc(new_loc) {}
+    t_pl_moved_block(ClusterBlockId block_num_, const t_pl_loc& old_loc_, const t_pl_loc& new_loc_)
+        : block_num(block_num_), old_loc(old_loc_), new_loc(new_loc_) {}
     ClusterBlockId block_num = ClusterBlockId::INVALID();
     t_pl_loc old_loc;
     t_pl_loc new_loc;

From 60eee08d5d62412fa78828c4a4f1db057b8c9c5d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 16:47:01 -0400
Subject: [PATCH 116/188] fix a bug with passing loc to get_physical_type

---
 vpr/src/place/atom_critical_uniform_move_generator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index ab218f7511a..05263b5e9ef 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -19,7 +19,7 @@ e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_m
 
     t_pl_loc from = place_ctx.block_locs[cluster_blk_id].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(cluster_blk_id);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;

From daf4352d76643a42c20f868929397bcd51473d67 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 17:52:27 -0400
Subject: [PATCH 117/188] remove ERROR_TOL and add it to place.cpp and
 net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 4 ++++
 vpr/src/place/place.cpp            | 4 ++++
 vpr/src/place/placer_globals.h     | 4 ----
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 8dcd567b8c3..670dec1ea25 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -14,6 +14,10 @@ using std::min;
 #define UPDATED_ONCE 'U'
 #define GOT_FROM_SCRATCH 'S'
 
+/* This defines the error tolerance for floating points variables used in *
+ * cost computation. 0.01 means that there is a 1% error tolerance.       */
+#define ERROR_TOL .01
+
 /* Expected crossing counts for nets with different #'s of pins.  From *
  * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
  * Multiplied to bounding box of a net to better estimate wire length  *
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index ae131058030..85d67f1e531 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -90,6 +90,10 @@ using std::min;
 
 /************** Types and defines local to place.c ***************************/
 
+/* This defines the error tolerance for floating points variables used in *
+ * cost computation. 0.01 means that there is a 1% error tolerance.       */
+#define ERROR_TOL .01
+
 /* This defines the maximum number of swap attempts before invoking the   *
  * once-in-a-while placement legality check as well as floating point     *
  * variables round-offs check.                                            */
diff --git a/vpr/src/place/placer_globals.h b/vpr/src/place/placer_globals.h
index 195a6fc7890..5e927d3b59c 100644
--- a/vpr/src/place/placer_globals.h
+++ b/vpr/src/place/placer_globals.h
@@ -7,8 +7,4 @@
 #pragma once
 #include "placer_context.h"
 
-/* This defines the error tolerance for floating points variables used in *
- * cost computation. 0.01 means that there is a 1% error tolerance.       */
-#define ERROR_TOL .01
-
 extern PlacerContext g_placer_ctx;

From d2a9c06a2166374f032fa3bc659137de58220243 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 18:40:36 -0400
Subject: [PATCH 118/188] comment unused vars in place_re_cluster

---
 vpr/src/place/place_re_cluster.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
index 93149c9c727..5bbfe8a7218 100644
--- a/vpr/src/place/place_re_cluster.cpp
+++ b/vpr/src/place/place_re_cluster.cpp
@@ -56,16 +56,16 @@ static bool swap_atoms (const t_place_algorithm& place_algorithm,
                        const PlaceDelayModel* delay_model,
                        PlacerCriticalities* criticalities,
                        t_pl_atom_blocks_to_be_moved& blocks_affected,
-                       AtomBlockId from_atom_blk_id,
-                       AtomBlockId to_atom_blk_id) {
+                       AtomBlockId /* from_atom_blk_id */,
+                       AtomBlockId /* to_atom_blk_id */) {
 
     double delta_c = 0;        //Change in cost due to this swap.
     double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
     double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
 
-    const auto& to_atom_loc = get_atom_loc(to_atom_blk_id);
+//    const auto& to_atom_loc = get_atom_loc(to_atom_blk_id);
 
-    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
+//    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
 
     if (!floorplan_legal(blocks_affected)) {
         return false;
@@ -77,8 +77,8 @@ static bool swap_atoms (const t_place_algorithm& place_algorithm,
         place_algorithm, delay_model, criticalities, blocks_affected,
         bb_delta_c, timing_delta_c);
 
-
-
+    // TODO:dummy return just to remove warnings
+    return (num_nets_affected + delta_c) == 0;
 
 }
 
@@ -94,7 +94,7 @@ static ClusterBlockId random_cluster() {
 
 static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id) {
 
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
+//    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup[cluster_blk_id];
 

From c9b951dddafd5a79f4ca436ae224b31c39f08d15 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 18:41:18 -0400
Subject: [PATCH 119/188] comment unused vars in net_cost_handler and
 atom_critical...

---
 vpr/src/place/atom_critical_uniform_move_generator.cpp | 1 -
 vpr/src/place/net_cost_handler.cpp                     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index 05263b5e9ef..3801cd7d315 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -7,7 +7,6 @@ static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock();
 e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
     auto& place_ctx = g_vpr_ctx.placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = g_placer_ctx.move();
 
     ClusterBlockId cluster_blk_id = ClusterBlockId::INVALID();
     AtomBlockId atom_blk_id = AtomBlockId::INVALID();
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 670dec1ea25..19df5aee79b 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -765,7 +765,6 @@ int find_affected_nets_and_update_costs(
 
     const auto& atom_look_up = g_vpr_ctx.atom().lookup;
     const auto& atom_nlist = g_vpr_ctx.atom().nlist;
-    const auto& clb_nlsit = g_vpr_ctx.clustering().clb_nlist;
 
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);

From c747efb7170be1af6b2e6a681291153939991b95 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 18:41:59 -0400
Subject: [PATCH 120/188] remove assigning invalid to cluster block id

---
 vpr/src/place/move_transactions.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 0ec0972dec3..cb37c4b97b3 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -12,7 +12,7 @@ struct t_pl_moved_block {
     t_pl_moved_block() = default;
     t_pl_moved_block(ClusterBlockId block_num_, const t_pl_loc& old_loc_, const t_pl_loc& new_loc_)
         : block_num(block_num_), old_loc(old_loc_), new_loc(new_loc_) {}
-    ClusterBlockId block_num = ClusterBlockId::INVALID();
+    ClusterBlockId block_num;
     t_pl_loc old_loc;
     t_pl_loc new_loc;
 };

From 83a7b025d3c170fbbf64330d1e9313ce5ca89671 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 17 Oct 2023 19:05:29 -0400
Subject: [PATCH 121/188] use the interface for propose move of
 AtomCriticalUniformMoveGenerator

---
 vpr/src/place/atom_critical_uniform_move_generator.cpp | 3 ++-
 vpr/src/place/atom_critical_uniform_move_generator.h   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index 3801cd7d315..08c8b39d957 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -4,7 +4,8 @@
 
 static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock();
 
-e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
+e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */,
+                                                             float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) {
     auto& place_ctx = g_vpr_ctx.placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
diff --git a/vpr/src/place/atom_critical_uniform_move_generator.h b/vpr/src/place/atom_critical_uniform_move_generator.h
index b2f99a2a39d..2e934a11bbd 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.h
+++ b/vpr/src/place/atom_critical_uniform_move_generator.h
@@ -20,6 +20,7 @@
  * Returns its choices by filling in affected_blocks.
  */
 class AtomCriticalUniformMoveGenerator : public MoveGenerator {
-    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/);
+    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */,
+                               float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) override;
 };
 #endif //VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H

From cb21aaf7d5a7705ed92c5b172eb80998df6f2c66 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 10:50:54 -0400
Subject: [PATCH 122/188] solve conflicts with api debug branch dir:
 libarchfpga and src/base

---
 libs/libarchfpga/src/physical_types.h    | 10 ++-----
 vpr/src/base/SetupVPR.cpp                |  1 -
 vpr/src/base/atom_lookup.cpp             | 19 +-----------
 vpr/src/base/clustered_netlist_utils.cpp | 22 ++++++++++++++
 vpr/src/base/clustered_netlist_utils.h   | 19 ++++++++++++
 vpr/src/base/read_options.cpp            |  8 +----
 vpr/src/base/read_options.h              |  1 -
 vpr/src/base/vpr_context.h               | 37 +++++-------------------
 vpr/src/base/vpr_types.cpp               | 23 +++++++++++++--
 vpr/src/pack/re_cluster.cpp              |  4 ++-
 vpr/src/pack/re_cluster.h                |  9 ++----
 vpr/src/pack/re_cluster_util.cpp         |  6 ----
 vpr/src/pack/re_cluster_util.h           |  4 ---
 13 files changed, 80 insertions(+), 83 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 38241eaafdf..f0c66e6e11e 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1272,14 +1272,8 @@ class t_pb_graph_node {
 
     int total_pb_pins; /* only valid for top-level */
 
-    void* temp_scratch_pad; /* temporary data, useful for keeping track of things when traversing data structure */
-
-    /* Indices for cluster_placement_primitive in the cluster_placement_stats structure (useful during packing) */
-    /* Now, we pass these indices instead of passing a pointer (t_cluster_placement_primitive*).                */
-    /* This is useful especially in case of multi-threaded packing                                              */
-    int cluster_placement_primitive_index;
-    int cluster_placement_type_index;
-    int lb_type_index;
+    void* temp_scratch_pad;                                     /* temporary data, useful for keeping track of things when traversing data structure */
+    t_cluster_placement_primitive* cluster_placement_primitive; /* pointer to indexing structure useful during packing stage */
 
     int* input_pin_class_size;  /* Stores the number of pins that belong to a particular input pin class */
     int num_input_pin_class;    /* number of input pin classes that this pb_graph_node has */
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 50b9adfd9a5..3f3761e1ad9 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -584,7 +584,6 @@ void SetupPackerOpts(const t_options& Options,
 
     PackerOpts->timing_update_type = Options.timing_update_type;
     PackerOpts->pack_num_moves = Options.pack_num_moves;
-    PackerOpts->pack_num_threads = Options.pack_num_threads;
     PackerOpts->pack_move_type = Options.pack_move_type;
 }
 
diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp
index 426eee84e81..8b3a45c0098 100644
--- a/vpr/src/base/atom_lookup.cpp
+++ b/vpr/src/base/atom_lookup.cpp
@@ -2,7 +2,6 @@
 #include "vtr_log.h"
 
 #include "atom_lookup.h"
-#include "globals.h"
 /*
  * PB
  */
@@ -16,21 +15,11 @@ const t_pb* AtomLookup::atom_pb(const AtomBlockId blk_id) const {
 }
 
 AtomBlockId AtomLookup::pb_atom(const t_pb* pb) const {
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-    packing_multithreading_ctx.lookup_mu.lock();
-#endif
     auto iter = atom_to_pb_.find(pb);
     if (iter == atom_to_pb_.inverse_end()) {
         //Not found
-#ifdef PACK_MULTITHREADED
-        packing_multithreading_ctx.lookup_mu.unlock();
-#endif
         return AtomBlockId::INVALID();
     }
-#ifdef PACK_MULTITHREADED
-    packing_multithreading_ctx.lookup_mu.unlock();
-#endif
     return iter->second;
 }
 
@@ -46,10 +35,7 @@ const t_pb_graph_node* AtomLookup::atom_pb_graph_node(const AtomBlockId blk_id)
 void AtomLookup::set_atom_pb(const AtomBlockId blk_id, const t_pb* pb) {
     //If either of blk_id or pb are not valid,
     //remove any mapping
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-    packing_multithreading_ctx.lookup_mu.lock();
-#endif
+
     if (!blk_id && pb) {
         //Remove
         atom_to_pb_.erase(pb);
@@ -60,9 +46,6 @@ void AtomLookup::set_atom_pb(const AtomBlockId blk_id, const t_pb* pb) {
         //If both are valid store the mapping
         atom_to_pb_.update(blk_id, pb);
     }
-#ifdef PACK_MULTITHREADED
-    packing_multithreading_ctx.lookup_mu.unlock();
-#endif
 }
 
 /*
diff --git a/vpr/src/base/clustered_netlist_utils.cpp b/vpr/src/base/clustered_netlist_utils.cpp
index 797f9dab368..a7488d9ba89 100644
--- a/vpr/src/base/clustered_netlist_utils.cpp
+++ b/vpr/src/base/clustered_netlist_utils.cpp
@@ -34,3 +34,25 @@ void ClusteredPinAtomPinsLookup::init_lookup(const ClusteredNetlist& clustered_n
         }
     }
 }
+
+ClusterAtomsLookup::ClusterAtomsLookup() {
+    init_lookup();
+}
+
+void ClusterAtomsLookup::init_lookup() {
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    cluster_atoms.resize(cluster_ctx.clb_nlist.blocks().size());
+
+    for (auto atom_blk_id : atom_ctx.nlist.blocks()) {
+        ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id);
+
+        cluster_atoms[clb_index].push_back(atom_blk_id);
+    }
+}
+
+std::vector<AtomBlockId> ClusterAtomsLookup::atoms_in_cluster(ClusterBlockId blk_id) {
+    std::vector<AtomBlockId> atoms = cluster_atoms[blk_id];
+    return atoms;
+}
diff --git a/vpr/src/base/clustered_netlist_utils.h b/vpr/src/base/clustered_netlist_utils.h
index 84bc11998c5..52688f88e47 100644
--- a/vpr/src/base/clustered_netlist_utils.h
+++ b/vpr/src/base/clustered_netlist_utils.h
@@ -26,4 +26,23 @@ class ClusteredPinAtomPinsLookup {
     vtr::vector<ClusterPinId, std::vector<AtomPinId>> clustered_pin_connected_atom_pins_;
     vtr::vector<AtomPinId, ClusterPinId> atom_pin_connected_cluster_pin_;
 };
+
+/*
+ * This lookup is used to see which atoms are in each cluster block.
+ * Getting the atoms inside of a cluster is an order k lookup.
+ * The data is initialized automatically upon creation of the object.
+ * The class should only be used after the clustered netlist is created.
+ */
+class ClusterAtomsLookup {
+  public:
+    ClusterAtomsLookup();
+    std::vector<AtomBlockId> atoms_in_cluster(ClusterBlockId blk_id);
+
+  public:
+    void init_lookup();
+
+  private:
+    //Store the atom ids of the atoms inside each cluster
+    vtr::vector<ClusterBlockId, std::vector<AtomBlockId>> cluster_atoms;
+};
 #endif
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 569772cd052..75a65f78799 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1815,13 +1815,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
     pack_grp.add_argument(args.pack_num_moves, "--pack_num_moves")
         .help(
             "The number of moves that can be tried in packing stage")
-        .default_value("0")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
-    pack_grp.add_argument(args.pack_num_threads, "--pack_num_threads")
-        .help(
-            "The number of threads used in the iterative improvement packing (IIP)")
-        .default_value("1")
+        .default_value("100000")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     pack_grp.add_argument(args.pack_move_type, "--pack_move_type")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 10714d8fd3e..86f5d81651e 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -100,7 +100,6 @@ struct t_options {
     argparse::ArgValue<int> pack_verbosity;
     argparse::ArgValue<bool> use_attraction_groups;
     argparse::ArgValue<int> pack_num_moves;
-    argparse::ArgValue<int> pack_num_threads;
     argparse::ArgValue<std::string> pack_move_type;
     /* Placement options */
     argparse::ArgValue<int> Seed;
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 26f06d5dbc0..82e7be31249 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -33,10 +33,6 @@
 #include "noc_traffic_flows.h"
 #include "noc_routing.h"
 
-//Flag to enable multithreading code for iterative packing (IIP)
-//#define PACK_MULTITHREADED
-
-class SetupTimingInfo;
 /**
  * @brief A Context is collection of state relating to a particular part of VPR
  *
@@ -328,14 +324,13 @@ struct ClusteringHelperContext : public Context {
     std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
 
     // Stats keeper for placement information during packing/clustering
-    // The vector size equals to the number of threads used in the IIP (pack_num_threads)
-    std::vector<t_cluster_placement_stats*> cluster_placement_stats;
+    t_cluster_placement_stats* cluster_placement_stats;
 
     // total number of models in the architecture
     int num_models;
 
     int max_cluster_size;
-    std::vector<t_pb_graph_node**> primitives_list;
+    t_pb_graph_node** primitives_list;
 
     bool enable_pin_feasibility_filter;
     int feasible_block_array_size;
@@ -352,31 +347,19 @@ struct ClusteringHelperContext : public Context {
     // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1]
     // unordered_set for faster insertion/deletion during the iterative improvement process of packing
     vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>> atoms_lookup;
-
-    // An unordered map of the count of connections between different clb blocks
-    // Only blocks that have connections between each others are added to this hash table
-    // This may be useful for some type of packing moves.
-    // Currently unused, commented out
-    //std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash> clb_conn_counts;
-
-    // Some packing options. Saving them here instead of passing them to every packing function
-    std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
-    std::shared_ptr<SetupTimingInfo> timing_info;
-    t_pack_high_fanout_thresholds high_fanout_thresholds;
-    bool timing_driven;
+    ~ClusteringHelperContext() {
+        delete[] primitives_list;
+    }
 };
 
 /**
  * @brief State relating to packing multithreading
  *
- * This contain data structures to synchronize multithreading of the iterative improvement packing (IIP).
+ * This contain data structures to synchronize multithreading of packing iterative improvement.
  */
 struct PackingMultithreadingContext : public Context {
-    // One lock per cluster
-    vtr::vector<ClusterBlockId, std::mutex*> mu;
-
-    // lock to synchronize atop_pb lookup access
-    std::mutex lookup_mu;
+    vtr::vector<ClusterBlockId, bool> clb_in_flight;
+    vtr::vector<ClusterBlockId, std::mutex> mu;
 };
 
 /**
@@ -635,10 +618,8 @@ class VprContext : public Context {
     const NocContext& noc() const { return noc_; }
     NocContext& mutable_noc() { return noc_; }
 
-#ifdef PACK_MULTITHREADED
     const PackingMultithreadingContext& packing_multithreading() const { return packing_multithreading_; }
     PackingMultithreadingContext& mutable_packing_multithreading() { return packing_multithreading_; }
-#endif
 
   private:
     DeviceContext device_;
@@ -656,9 +637,7 @@ class VprContext : public Context {
     FloorplanningContext constraints_;
     NocContext noc_;
 
-#ifdef PACK_MULTITHREADED
     PackingMultithreadingContext packing_multithreading_;
-#endif
 };
 
 #endif
diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index b76ed193450..c6dae8df2c7 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -258,8 +258,27 @@ void t_cluster_placement_stats::move_primitive_to_inflight(int pb_type_index, st
  * @note that valid status is not changed because if the primitive is not valid, it will get properly collected later
  */
 void t_cluster_placement_stats::insert_primitive_in_valid_primitives(std::pair<int, t_cluster_placement_primitive*> cluster_placement_primitive) {
-    int pb_type_index = cluster_placement_primitive.second->pb_graph_node->cluster_placement_type_index;
-    valid_primitives[pb_type_index].insert(cluster_placement_primitive);
+    int i;
+    bool success = false;
+    int null_index = OPEN;
+    t_cluster_placement_primitive* input_cluster_placement_primitive = cluster_placement_primitive.second;
+
+    for (i = 0; i < num_pb_types && !success; i++) {
+        if (valid_primitives[i].empty()) {
+            null_index = i;
+            continue;
+        }
+        t_cluster_placement_primitive* cur_cluster_placement_primitive = valid_primitives[i].begin()->second;
+        if (input_cluster_placement_primitive->pb_graph_node->pb_type
+            == cur_cluster_placement_primitive->pb_graph_node->pb_type) {
+            success = true;
+            valid_primitives[i].insert(cluster_placement_primitive);
+        }
+    }
+    if (!success) {
+        VTR_ASSERT(null_index != OPEN);
+        valid_primitives[null_index].insert(cluster_placement_primitive);
+    }
 }
 
 void t_cluster_placement_stats::flush_queue(std::unordered_multimap<int, t_cluster_placement_primitive*>& queue) {
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 8f786ae67cb..04e7ae23d3e 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -162,6 +162,7 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
     return (is_added);
 }
 
+#if 1
 bool swap_two_molecules(t_pack_molecule* molecule_1,
                         t_pack_molecule* molecule_2,
                         bool during_packing,
@@ -229,7 +230,7 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
 
-        
+
         free(clb_pb_1->name);
         cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
         free(clb_pb_2->name);
@@ -289,3 +290,4 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
 
     return true;
 }
+#endif
diff --git a/vpr/src/pack/re_cluster.h b/vpr/src/pack/re_cluster.h
index 610f09448ad..5ca2489aac4 100644
--- a/vpr/src/pack/re_cluster.h
+++ b/vpr/src/pack/re_cluster.h
@@ -26,8 +26,7 @@
 bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                              bool during_packing,
                              int verbosity,
-                             t_clustering_data& clustering_data,
-                             int thread_id);
+                             t_clustering_data& clustering_data);
 
 /**
  * @brief This function moves a molecule out of its cluster to another cluster that already exists.
@@ -42,8 +41,7 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
                                   const ClusterBlockId& new_clb,
                                   bool during_packing,
                                   int verbosity,
-                                  t_clustering_data& clustering_data,
-                                  int thread_id);
+                                  t_clustering_data& clustering_data);
 
 /**
  * @brief This function swap two molecules between two different clusters.
@@ -58,6 +56,5 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
                         t_pack_molecule* molecule_2,
                         bool during_packing,
                         int verbosity,
-                        t_clustering_data& clustering_data,
-                        int thread_id);
+                        t_clustering_data& clustering_data);
 #endif
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index a6d2653635c..c22b72d5750 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -149,15 +149,9 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
 
     e_block_pack_status pack_result = BLK_STATUS_UNDEFINED;
     pb->mode = mode;
-<<<<<<< HEAD
-    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[thread_id][type->index]);
-    reset_cluster_placement_stats(cluster_placement_stats);
-    set_mode_cluster_placement_stats(cluster_placement_stats, pb->pb_graph_node, mode);
-=======
     t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[type->index]);
     reset_cluster_placement_stats(cluster_placement_stats);
     set_mode_cluster_placement_stats(pb->pb_graph_node, mode);
->>>>>>> f84a79291df0319f3b0d0d6bf2bc989091c36541
 
     pack_result = try_pack_molecule(cluster_placement_stats,
                                     molecule,
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index b523257a91b..2d001932c26 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -134,10 +134,6 @@ void commit_mol_move(const ClusterBlockId& old_clb,
                      bool during_packing,
                      bool new_clb_created);
 
-<<<<<<< HEAD
-
-=======
->>>>>>> f84a79291df0319f3b0d0d6bf2bc989091c36541
 /**
  * @brief A function that reverts the molecule move if it is illegal
  *

From cabe15ec3b3c98cb59195ab3be1bee52d4065378 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 10:51:51 -0400
Subject: [PATCH 123/188] remove dir vpr/src/pack/improvements

---
 vpr/src/pack/improvement/pack_move_utils.cpp  | 866 ------------------
 vpr/src/pack/improvement/pack_move_utils.h    |  65 --
 vpr/src/pack/improvement/pack_utils.cpp       | 250 -----
 vpr/src/pack/improvement/pack_utils.h         |  19 -
 vpr/src/pack/improvement/packing_cost.cpp     | 129 ---
 vpr/src/pack/improvement/packing_cost.h       |  24 -
 .../improvement/packing_move_generator.cpp    | 345 -------
 .../pack/improvement/packing_move_generator.h | 210 -----
 8 files changed, 1908 deletions(-)
 delete mode 100644 vpr/src/pack/improvement/pack_move_utils.cpp
 delete mode 100644 vpr/src/pack/improvement/pack_move_utils.h
 delete mode 100644 vpr/src/pack/improvement/pack_utils.cpp
 delete mode 100644 vpr/src/pack/improvement/pack_utils.h
 delete mode 100644 vpr/src/pack/improvement/packing_cost.cpp
 delete mode 100644 vpr/src/pack/improvement/packing_cost.h
 delete mode 100644 vpr/src/pack/improvement/packing_move_generator.cpp
 delete mode 100644 vpr/src/pack/improvement/packing_move_generator.h

diff --git a/vpr/src/pack/improvement/pack_move_utils.cpp b/vpr/src/pack/improvement/pack_move_utils.cpp
deleted file mode 100644
index 68d4766ba7e..00000000000
--- a/vpr/src/pack/improvement/pack_move_utils.cpp
+++ /dev/null
@@ -1,866 +0,0 @@
-//
-// Created by elgammal on 2022-09-13.
-//
-#include "pack_move_utils.h"
-#include "globals.h"
-#include "cluster_placement.h"
-#include "re_cluster_util.h"
-#include <string>
-
-static void calculate_connected_clbs_to_moving_mol(const t_pack_molecule* mol_1, std::vector<ClusterBlockId>& connected_blocks);
-#if 0
-static void check_net_absorption(const AtomNetId& atom_net_id,
-                          const ClusterBlockId & new_clb,
-                          std::map<AtomNetId, int> direct_connections,
-                          bool& previously_absorbed,
-                          bool& newly_absorbed);
-
-static void update_cutsize_for_net(int& new_cutsize,
-                           bool previously_absorbed,
-                           bool newly_absorbed);
-#endif
-
-#if 0
-int calculate_cutsize_of_clb(ClusterBlockId clb_index) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    //Define the initial conditions
-    int num_unabsorbed = 0;
-
-    //list the atoms inside the current cluster
-    for (auto& pin_id : cluster_ctx.clb_nlist.block_pins(clb_index)) {
-        if (cluster_ctx.clb_nlist.pin_net(pin_id) != ClusterNetId::INVALID()) {
-            ++num_unabsorbed;
-        }
-    }
-    return num_unabsorbed;
-}
-#endif
-
-int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    std::unordered_set<AtomBlockId> moving_atoms;
-    std::unordered_set<AtomNetId> moving_nets;
-    int cutsize_change = 0;
-
-    auto clb_1 = new_locs[0].new_clb;
-    auto clb_2 = new_locs[1].new_clb;
-
-    for (auto& new_loc : new_locs) {
-        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (atom) {
-                moving_atoms.insert(atom);
-                for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
-                    auto atom_net = atom_ctx.nlist.pin_net(atom_pin);
-                    if (atom_net && atom_ctx.nlist.net_pins(atom_net).size() < LARGE_FANOUT_LIMIT)
-                        moving_nets.insert(atom_net);
-                }
-            }
-        }
-    }
-
-    for (auto& net_id : moving_nets) {
-        bool net_has_pin_outside = false;
-        std::unordered_set<ClusterBlockId> clbs_before;
-        std::unordered_set<ClusterBlockId> clbs_after;
-
-        for (auto& pin_id : atom_ctx.nlist.net_pins(net_id)) {
-            if (net_has_pin_outside)
-                break;
-
-            auto atom_blk_id = atom_ctx.nlist.pin_block(pin_id);
-            auto clb = atom_to_cluster(atom_blk_id);
-            if (moving_atoms.count(atom_blk_id) == 0) { // this atom is NOT one of the moving blocks
-                clbs_before.insert(clb);
-                clbs_after.insert(clb);
-            } else { // this atom is one of the moving blocks
-                clbs_before.insert(clb);
-                if (clb == clb_1)
-                    clbs_after.insert(clb_2);
-                else
-                    clbs_after.insert(clb_1);
-            }
-        }
-        if (clbs_before.size() == 1 && clbs_after.size() > 1)
-            cutsize_change++;
-        else if (clbs_before.size() > 1 && clbs_after.size() == 1)
-            cutsize_change--;
-    }
-    return cutsize_change;
-}
-int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    // initialize the old and new cut sizes
-    int newly_absorbed_conn = 0;
-
-    // define some temporary
-    AtomBlockId cur_atom;
-    ClusterBlockId cur_clb;
-
-    std::unordered_set<AtomBlockId> moving_atoms;
-    for (auto& new_loc : new_locs) {
-        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (atom)
-                moving_atoms.insert(atom);
-        }
-    }
-
-    for (auto& new_loc : new_locs) {
-        ClusterBlockId new_block_id = new_loc.new_clb;
-        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
-
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (!moving_atom)
-                continue;
-
-            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
-                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
-                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
-                    continue;
-
-                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
-                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (moving_atoms.count(cur_atom))
-                        continue;
-
-                    cur_clb = atom_to_cluster(cur_atom);
-                    if (cur_clb == new_block_id) {
-                        newly_absorbed_conn++;
-                    } else if (cur_clb == old_block_id) {
-                        newly_absorbed_conn--;
-                    }
-                }
-            }
-        }
-    }
-
-    return newly_absorbed_conn;
-}
-
-float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    // initialize the old and new cut sizes
-    float absorbed_conn_change = 0;
-
-    // define some temporary
-    AtomBlockId cur_atom;
-    ClusterBlockId cur_clb;
-    std::unordered_set<AtomBlockId> moving_atoms;
-    std::unordered_set<AtomNetId> moving_nets;
-    for (auto& new_loc : new_locs) {
-        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (atom) {
-                moving_atoms.insert(atom);
-                for (auto& atom_pin : atom_ctx.nlist.block_pins(atom)) {
-                    moving_nets.insert(atom_ctx.nlist.pin_net(atom_pin));
-                }
-            }
-        }
-    }
-
-    // iterate over the molecules that will be moving
-    for (auto& new_loc : new_locs) {
-        ClusterBlockId new_block_id = new_loc.new_clb;
-        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
-
-        // iterate over atoms of the moving molecule
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (!moving_atom)
-                continue;
-
-            // iterate over the atom pins
-            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
-                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
-                if (!moving_nets.count(atom_net))
-                    continue;
-
-                moving_nets.erase(atom_net);
-                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
-                    continue;
-
-                int num_pins_in_new = 0;
-                // iterate over the net pins
-                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
-                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (!moving_atoms.count(cur_atom)) {
-                        cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == new_block_id) {
-                            num_pins_in_new++;
-                        } else if (cur_clb == old_block_id) {
-                            num_pins_in_new--;
-                        }
-                    }
-                }
-                absorbed_conn_change += (float)(num_pins_in_new) / (float)atom_ctx.nlist.net_pins(atom_net).size();
-            }
-        }
-    }
-
-    return absorbed_conn_change;
-}
-
-bool evaluate_move_based_on_terminals(const std::vector<molMoveDescription>& new_locs) {
-    return absorbed_pin_terminals(new_locs) > 0;
-}
-
-float absorbed_pin_terminals_and_nets(const std::vector<molMoveDescription>& new_locs) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    // initialize the old and new cut sizes
-    float absorbed_conn_change = 0;
-
-    // define some temporary
-    AtomBlockId cur_atom;
-    ClusterBlockId cur_clb;
-    std::unordered_set<AtomBlockId> moving_atoms;
-    for (auto& new_loc : new_locs) {
-        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (atom)
-                moving_atoms.insert(atom);
-        }
-    }
-
-    // iterate over the molecules that will be moving
-    for (auto& new_loc : new_locs) {
-        ClusterBlockId new_block_id = new_loc.new_clb;
-        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
-
-        // iterate over atoms of the moving molecule
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (!moving_atom)
-                continue;
-
-            // iterate over the atom pins
-            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
-                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
-                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
-                    continue;
-
-                int num_old_absorbed = 0;
-                int num_new_absorbed = 0;
-                // iterate over the net pins
-                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
-                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (cur_atom == moving_atom) {
-                        num_old_absorbed++;
-                        num_new_absorbed++;
-                    } else if (moving_atoms.count(cur_atom)) {
-                        cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == old_block_id) {
-                            num_old_absorbed++;
-                            num_new_absorbed++;
-                        }
-                    } else {
-                        cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == new_block_id) {
-                            num_new_absorbed++;
-                        } else if (cur_clb == old_block_id) {
-                            num_old_absorbed++;
-                        }
-                    }
-                }
-                absorbed_conn_change += (float)(num_new_absorbed - num_old_absorbed) / (float)atom_ctx.nlist.net_pins(atom_net).size() + (int)(num_new_absorbed) / (int)atom_ctx.nlist.net_pins(atom_net).size() - (int)num_old_absorbed / (int)atom_ctx.nlist.net_pins(atom_net).size();
-            }
-        }
-    }
-
-    return absorbed_conn_change;
-}
-
-bool evaluate_move_based_on_terminals_and_nets(const std::vector<molMoveDescription>& new_locs) {
-    return absorbed_pin_terminals_and_nets(new_locs) > 0;
-}
-
-float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_locs) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    // initialize the old and new cut sizes
-    float absorbed_conn_change = 0;
-
-    // define some temporary
-    AtomBlockId cur_atom;
-    ClusterBlockId cur_clb;
-    std::unordered_set<AtomBlockId> moving_atoms;
-    for (auto& new_loc : new_locs) {
-        for (auto& atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (atom)
-                moving_atoms.insert(atom);
-        }
-    }
-
-    // iterate over the molecules that will be moving
-    for (auto& new_loc : new_locs) {
-        ClusterBlockId new_block_id = new_loc.new_clb;
-        ClusterBlockId old_block_id = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
-
-        // iterate over atoms of the moving molecule
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (!moving_atom)
-                continue;
-
-            // iterate over the atom pins
-            for (auto& atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
-                AtomNetId atom_net = atom_ctx.nlist.pin_net(atom_pin);
-                if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
-                    continue;
-
-                int old_pin_outside = 0;
-                int new_pin_outside = 0;
-                // iterate over the net pins
-                for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net)) {
-                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    if (cur_atom == moving_atom) {
-                        //old_pin_outside++;
-                    } else if (moving_atoms.count(cur_atom)) {
-                        cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb == new_block_id) {
-                            old_pin_outside++;
-                            new_pin_outside++;
-                        }
-                    } else {
-                        cur_clb = atom_to_cluster(cur_atom);
-                        if (cur_clb != new_block_id) {
-                            new_pin_outside++;
-                        }
-                        if (cur_clb != old_block_id) {
-                            old_pin_outside++;
-                        }
-                    }
-                }
-                float terminals = (float)atom_ctx.nlist.net_pins(atom_net).size();
-                absorbed_conn_change += (float)old_pin_outside / (terminals - old_pin_outside + 1.) - (float)new_pin_outside / (terminals - new_pin_outside + 1.);
-            }
-        }
-    }
-
-    return absorbed_conn_change;
-}
-
-bool evaluate_move_based_on_terminals_new_formula(const std::vector<molMoveDescription>& new_locs) {
-    return abosrbed_terminal_new_formula(new_locs) > 0;
-}
-#if 0
-int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
-                                        int original_cutsize) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-    int new_cutsize = original_cutsize;
-    std::map<AtomNetId, int> direct_connections;
-
-    //iterate over the molecules that are moving
-    for(auto new_loc : new_locs) {
-        //iterate over the atom of a molecule
-        for (int i_atom = 0; i_atom < new_loc.molecule_size; i_atom++) {
-            if (new_loc.molecule_to_move->atom_block_ids[i_atom]) {
-                //iterate over the moving atom pins
-                for (auto& pin_id : atom_ctx.nlist.block_pins(new_loc.molecule_to_move->atom_block_ids[i_atom])) {
-                    AtomNetId atom_net_id = atom_ctx.nlist.pin_net(pin_id);
-
-                    //if this pin is connected to a net
-                    if (atom_net_id) {
-                        ClusterPinId cluster_pin;
-                        bool previously_absorbed, newly_absorbed;
-
-                        //check the status of this net (absorbed or not) before and after the proposed move
-                        check_net_absorption(atom_net_id,
-                                             new_loc.new_clb,
-                                             direct_connections,
-                                             previously_absorbed,
-                                             newly_absorbed);
-
-                        //update the cutsize based on the absorption of a net before and after the move
-                        update_cutsize_for_net(new_cutsize,
-                                               previously_absorbed,
-                                               newly_absorbed);
-                    }
-                }
-            }
-        }
-    }
-
-    /* consider the case of swapping two atoms that are directly connected
-     *
-     * In this case, the algorithm will minimize the cutsize by one when iterating over the first atom pins and minimize it again
-     * when iterating over the 2nd atom pins. However, the cutsize should remain the same. Hence,
-     * We are increasing the cutsize by 2 for this specific case
-     */
-    for(auto& direct_conn: direct_connections) {
-        if(direct_conn.second > 1) {
-            new_cutsize += 2;
-        }
-    }
-    return new_cutsize;
-}
-#endif
-
-t_pack_molecule* pick_molecule_randomly() {
-    auto& atom_ctx = g_vpr_ctx.atom();
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-    bool molecule_picked = false;
-    t_pack_molecule* molecule;
-
-    while (!molecule_picked) {
-        int rand_num = vtr::irand((int)atom_ctx.nlist.blocks().size() - 1);
-        AtomBlockId random_atom = AtomBlockId(rand_num);
-        ClusterBlockId clb_index = atom_to_cluster(random_atom);
-        if (!clb_index)
-            continue;
-#ifdef PACK_MULTITHREADED
-        if (packing_multithreading_ctx.mu[clb_index]->try_lock()) {
-#endif
-            auto rng = atom_ctx.atom_molecules.equal_range(random_atom);
-            for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
-                molecule = kv.second;
-                molecule_picked = true;
-                break;
-            }
-#ifdef PACK_MULTITHREADED
-        } else {
-            continue; //CLB is already in-flight
-        }
-#endif
-    }
-    return molecule;
-}
-
-bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-    std::vector<ClusterBlockId> connected_blocks;
-    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
-    if (connected_blocks.empty())
-        return false;
-
-    // pick a random clb block from the connected blocks
-    bool clb2_not_found = true;
-    ClusterBlockId clb_index_2;
-    int iteration = 0;
-    while (clb2_not_found && iteration < 20) {
-        int rand_num = vtr::irand((int)connected_blocks.size() - 1);
-        clb_index_2 = connected_blocks[rand_num];
-#ifdef PACK_MULTITHREADED
-        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
-#endif
-            clb2_not_found = false;
-#ifdef PACK_MULTITHREADED
-        }
-#endif
-        iteration++;
-    }
-
-    if (clb2_not_found)
-        return false;
-
-    //pick a random molecule for the chosen block
-    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-
-    int rand_num = vtr::irand((int)atom_ids->size() - 1);
-    auto it = atom_ids->begin();
-    std::advance(it, rand_num);
-    AtomBlockId atom_id = *it;
-    auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
-    for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
-        mol_2 = kv.second;
-        return true;
-    }
-#ifdef PACK_MULTITHREADED
-    packing_multithreading_ctx.mu[clb_index_2]->unlock();
-#endif
-    return false;
-}
-
-bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    std::vector<ClusterBlockId> connected_blocks;
-    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
-    if (connected_blocks.empty())
-        return false;
-
-    // pick a random clb block from the connected blocks
-    bool clb2_not_found = true;
-    ClusterBlockId clb_index_2;
-    int iteration = 0;
-    while (clb2_not_found && iteration < 10) {
-        clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-#ifdef PACK_MULTITHREADED
-        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
-#endif
-            clb2_not_found = false;
-#ifdef PACK_MULTITHREADED
-        }
-#endif
-        iteration++;
-    }
-
-    if (clb2_not_found)
-        return false;
-
-    //pick a random molecule for the chosen block
-    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-    iteration = 0;
-    const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
-    do {
-        int rand_num = vtr::irand((int)atom_ids->size() - 1);
-        auto it = atom_ids->begin();
-        std::advance(it, rand_num);
-        AtomBlockId atom_id = *it;
-        auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
-        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
-            mol_2 = kv.second;
-            const t_pb* pb_2 = atom_ctx.lookup.atom_pb(mol_2->atom_block_ids[mol_2->root]);
-            if (strcmp(pb_1->pb_graph_node->pb_type->name, pb_2->pb_graph_node->pb_type->name) == 0)
-                return true;
-            else
-                iteration++;
-        }
-    } while (iteration < 20);
-#ifdef PACK_MULTITHREADED
-    packing_multithreading_ctx.mu[clb_index_2]->unlock();
-#endif
-    return false;
-}
-
-bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    std::vector<ClusterBlockId> connected_blocks;
-    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
-    if (connected_blocks.empty())
-        return false;
-
-    // pick a random clb block from the connected blocks
-    bool clb2_not_found = true;
-    ClusterBlockId clb_index_2;
-    int iteration = 0;
-    while (clb2_not_found && iteration < 10) {
-        clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-#ifdef PACK_MULTITHREADED
-        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
-#endif
-            clb2_not_found = false;
-#ifdef PACK_MULTITHREADED
-        }
-#endif
-        iteration++;
-    }
-
-    if (clb2_not_found)
-        return false;
-
-    //pick a random molecule for the chosen block
-    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-    iteration = 0;
-    const t_pb* pb_1 = atom_ctx.lookup.atom_pb(mol_1->atom_block_ids[mol_1->root]);
-
-    do {
-        int rand_num = vtr::irand((int)atom_ids->size() - 1);
-        auto it = atom_ids->begin();
-        std::advance(it, rand_num);
-        AtomBlockId atom_id = *it;
-        auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
-        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
-            mol_2 = kv.second;
-            const t_pb* pb_2 = atom_ctx.lookup.atom_pb(mol_2->atom_block_ids[mol_2->root]);
-            if (pb_1->pb_graph_node->pb_type == pb_2->pb_graph_node->pb_type)
-                return true;
-            else {
-                iteration++;
-                break;
-            }
-        }
-    } while (iteration < 10);
-#ifdef PACK_MULTITHREADED
-    packing_multithreading_ctx.mu[clb_index_2]->unlock();
-#endif
-    return false;
-}
-
-bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*& mol_2) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    std::vector<ClusterBlockId> connected_blocks;
-    calculate_connected_clbs_to_moving_mol(mol_1, connected_blocks);
-    if (connected_blocks.empty())
-        return false;
-
-    int mol_1_size = get_array_size_of_molecule(mol_1);
-
-    // pick a random clb block from the connected blocks
-    bool clb2_not_found = true;
-    ClusterBlockId clb_index_2;
-    int iteration = 0;
-    while (clb2_not_found && iteration < 10) {
-        clb_index_2 = connected_blocks[vtr::irand((int)connected_blocks.size() - 1)];
-#ifdef PACK_MULTITHREADED
-        if (packing_multithreading_ctx.mu[clb_index_2]->try_lock()) {
-#endif
-            clb2_not_found = false;
-#ifdef PACK_MULTITHREADED
-        }
-#endif
-        ++iteration;
-    }
-
-    if (clb2_not_found)
-        return false;
-
-    //pick a random molecule for the chosen block
-    std::unordered_set<AtomBlockId>* atom_ids = cluster_to_atoms(clb_index_2);
-    iteration = 0;
-    do {
-        int rand_num = vtr::irand((int)atom_ids->size() - 1);
-        auto it = atom_ids->begin();
-        std::advance(it, rand_num);
-        AtomBlockId atom_id = *it;
-        auto rng = atom_ctx.atom_molecules.equal_range(atom_id);
-        for (const auto& kv : vtr::make_range(rng.first, rng.second)) {
-            mol_2 = kv.second;
-            if (std::abs(mol_1_size - get_array_size_of_molecule(mol_2)) <= 1)
-                return true;
-            else
-                iteration++;
-        }
-    } while (iteration < 20);
-#ifdef PACK_MULTITHREADED
-    packing_multithreading_ctx.mu[clb_index_2]->unlock();
-#endif
-    return false;
-}
-
-void build_mol_move_description(std::vector<molMoveDescription>& new_locs,
-                                t_pack_molecule* mol_1,
-                                ClusterBlockId clb_index_1,
-                                t_pack_molecule* mol_2,
-                                ClusterBlockId clb_index_2) {
-    molMoveDescription temp;
-    temp.molecule_to_move = mol_1;
-    temp.new_clb = clb_index_2;
-    temp.molecule_size = get_array_size_of_molecule(mol_1);
-    new_locs.push_back(temp);
-
-    temp.molecule_to_move = mol_2;
-    temp.new_clb = clb_index_1;
-    temp.molecule_size = get_array_size_of_molecule(mol_2);
-    new_locs.push_back(temp);
-}
-
-bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_locs) {
-    int change_in_cutsize = calculate_cutsize_change(new_locs);
-    if (change_in_cutsize < 0)
-        return true;
-    else
-        return false;
-}
-
-bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs) {
-    float change_in_absorbed_conn = absorbed_conn_change(new_locs);
-
-    return (change_in_absorbed_conn > 0);
-}
-/********* static functions ************/
-/***************************************/
-#if 0
-static void check_net_absorption(const AtomNetId& atom_net_id,
-                          const ClusterBlockId & new_clb,
-                          std::map<AtomNetId, int> direct_connections,
-                          bool& previously_absorbed,
-                          bool& newly_absorbed) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    //check the status of the atom net before the move (absorbed or not)
-    ClusterNetId clb_net_id = atom_ctx.lookup.clb_net(atom_net_id);
-    if(clb_net_id == ClusterNetId::INVALID()) {
-        previously_absorbed = true;
-    } else {
-        previously_absorbed = false;
-    }
-
-    //check the status of the atom net after the move (absorbed or not)
-    newly_absorbed = true;
-    AtomBlockId  atom_block_id;
-    ClusterBlockId  clb_index;
-    for(auto& net_pin_id : atom_ctx.nlist.net_pins(atom_net_id)) {
-        atom_block_id = atom_ctx.nlist.pin_block(net_pin_id);
-        clb_index = atom_ctx.lookup.atom_clb(atom_block_id);
-        if(clb_index == new_clb) {
-            if(direct_connections.find(atom_net_id) == direct_connections.end()) {
-                direct_connections.insert(std::make_pair(atom_net_id, 1));
-            } else {
-                ++direct_connections[atom_net_id];
-            }
-        }
-        if(clb_index != new_clb) {
-            newly_absorbed = false;
-            break;
-        }
-    }
-}
-static void update_cutsize_for_net(int& new_cutsize, bool previously_absorbed, bool newly_absorbed) {
-    if(previously_absorbed && !newly_absorbed) {
-        new_cutsize++;
-    } else if(!previously_absorbed && newly_absorbed) {
-        new_cutsize--;
-    }
-}
-#endif
-
-static void calculate_connected_clbs_to_moving_mol(const t_pack_molecule* mol_1, std::vector<ClusterBlockId>& connected_blocks) {
-    // get the clb index of the first molecule
-    ClusterBlockId clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    t_logical_block_type_ptr block_type_1 = cluster_ctx.clb_nlist.block_type(clb_index_1);
-    t_logical_block_type_ptr block_type_2;
-
-    AtomNetId cur_net;
-    AtomBlockId cur_atom;
-    ClusterBlockId cur_clb;
-
-    // Calculate the connected blocks to the moving molecule
-    for (auto& atom_id : mol_1->atom_block_ids) {
-        if (atom_id) {
-            for (auto& atom_pin : atom_ctx.nlist.block_pins(atom_id)) {
-                cur_net = atom_ctx.nlist.pin_net(atom_pin);
-                if (atom_ctx.nlist.net_pins(cur_net).size() > LARGE_FANOUT_LIMIT)
-                    continue;
-                for (auto& net_pin : atom_ctx.nlist.net_pins(cur_net)) {
-                    cur_atom = atom_ctx.nlist.pin_block(net_pin);
-                    cur_clb = atom_to_cluster(cur_atom);
-                    block_type_2 = cluster_ctx.clb_nlist.block_type(cur_clb);
-                    if (cur_clb != clb_index_1 && block_type_1 == block_type_2)
-                        connected_blocks.push_back(cur_clb);
-                }
-            }
-        }
-    }
-}
-
-/************* CLB-CLB connection count hash table helper functions ***************/
-void init_clb_clb_conn_numbers(std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_counts) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    for (auto atom_net : atom_ctx.nlist.nets()) {
-        if (atom_ctx.nlist.net_pins(atom_net).size() > 7)
-            continue;
-
-        std::unordered_set<ClusterBlockId> clusters;
-        for (auto atom_pin_it = atom_ctx.nlist.net_pins(atom_net).begin(); atom_pin_it != atom_ctx.nlist.net_pins(atom_net).end(); atom_pin_it++) {
-            auto clb1 = atom_to_cluster(atom_ctx.nlist.pin_block(*atom_pin_it));
-            clusters.insert(clb1);
-            for (auto atom_pin_it2 = atom_pin_it + 1; atom_pin_it2 != atom_ctx.nlist.net_pins(atom_net).end(); atom_pin_it2++) {
-                auto clb2 = atom_to_cluster(atom_ctx.nlist.pin_block(*atom_pin_it2));
-                if (clusters.count(clb2) == 0) {
-                    if (conn_counts.find({clb1, clb2}) == conn_counts.end())
-                        conn_counts.insert({{clb1, clb2}, 1});
-                    else
-                        conn_counts[{clb1, clb2}]++;
-
-                    clusters.insert(clb2);
-                }
-            }
-        }
-    }
-}
-
-void print_block_connections(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count) {
-    for (const auto& block_pair_count : conn_count) {
-        VTR_LOG("Block : %d is connected to Block: %d with %d direct connections.\n",
-                block_pair_count.first.first, block_pair_count.first.second, block_pair_count.second);
-    }
-}
-
-std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count) {
-    auto max_iter = std::max_element(conn_count.begin(), conn_count.end(),
-                                     [](const auto& a, auto& b) { return a.second < b.second; });
-    return *max_iter;
-}
-
-bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescription>& new_locs) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    int pins_in1_before, pins_in2_before, pins_in1_after, pins_in2_after, pins_outside_before, pins_outside_after;
-    double cost = 0;
-    std::unordered_set<AtomBlockId> moving_atoms;
-
-    for (auto& new_loc : new_locs) {
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (moving_atom) {
-                moving_atoms.insert(moving_atom);
-            }
-        }
-    }
-
-    // iterate over moves proposed (a swap is two moves)
-    std::unordered_set<AtomNetId> moving_nets;
-    for (auto& new_loc : new_locs) {
-        auto cur_clb = atom_to_cluster(new_loc.molecule_to_move->atom_block_ids[new_loc.molecule_to_move->root]);
-        // iterate over atoms in the moving molcule
-        for (auto& moving_atom : new_loc.molecule_to_move->atom_block_ids) {
-            if (moving_atom) {
-                // iterate over moving atom pins
-                for (auto& moving_atom_pin : atom_ctx.nlist.block_pins(moving_atom)) {
-                    auto atom_net = atom_ctx.nlist.pin_net(moving_atom_pin);
-                    if (atom_ctx.nlist.net_pins(atom_net).size() > LARGE_FANOUT_LIMIT)
-                        continue;
-
-                    // Make sure that we didn't count this net before
-                    if (moving_nets.count(atom_net))
-                        continue;
-
-                    moving_nets.insert(atom_net);
-                    pins_in1_before = 0;
-                    pins_in2_before = 0;
-                    pins_in1_after = 0;
-                    pins_in2_after = 0;
-                    pins_outside_before = 0;
-                    pins_outside_after = 0;
-
-                    for (auto& pin : atom_ctx.nlist.net_pins(atom_net)) {
-                        auto atom = atom_ctx.nlist.pin_block(pin);
-                        auto cluster = atom_to_cluster(atom);
-                        if (moving_atoms.count(atom)) {
-                            if (cluster == cur_clb) {
-                                pins_in1_before++;
-                                pins_in2_after++;
-                            } else {
-                                pins_in2_before++;
-                                pins_in1_after++;
-                            }
-                        } else {
-                            if (cluster == cur_clb) {
-                                pins_in1_before++;
-                                pins_in1_after++;
-                            } else if (cluster == new_loc.new_clb) {
-                                pins_in2_before++;
-                                pins_in2_after++;
-                            } else {
-                                pins_outside_before++;
-                                pins_outside_after++;
-                            }
-                        }
-                    }
-                    cost += (double)std::max(pins_in1_after, pins_in2_after) / (pins_outside_after + std::min(pins_in1_after, pins_in2_after) + 1.) - (double)std::max(pins_in1_before, pins_in2_before) / (pins_outside_before + std::min(pins_in1_before, pins_in2_before) + 1.);
-                }
-            }
-        }
-    }
-    return (cost > 0);
-}
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/pack_move_utils.h b/vpr/src/pack/improvement/pack_move_utils.h
deleted file mode 100644
index 981c135af81..00000000000
--- a/vpr/src/pack/improvement/pack_move_utils.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//
-// Created by elgammal on 2022-09-13.
-//
-
-#ifndef VTR_PACK_MOVE_UTILS_H
-#define VTR_PACK_MOVE_UTILS_H
-
-#include "vpr_types.h"
-
-//#define pack_improve_debug
-
-const int LARGE_FANOUT_LIMIT = 5;
-
-struct molMoveDescription {
-    t_pack_molecule* molecule_to_move = nullptr;
-    int molecule_size = 0;
-    ClusterBlockId new_clb = INVALID_BLOCK_ID;
-};
-
-t_pack_molecule* pick_molecule_randomly();
-bool pick_molecule_connected(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
-bool pick_molecule_connected_same_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
-bool pick_molecule_connected_compatible_type(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
-bool pick_molecule_connected_same_size(t_pack_molecule* mol_1, t_pack_molecule*& mol_2);
-
-void build_mol_move_description(std::vector<molMoveDescription>& new_locs,
-                                t_pack_molecule* mol_1,
-                                ClusterBlockId clb_index_1,
-                                t_pack_molecule* mol_2,
-                                ClusterBlockId clb_index_2);
-
-bool evaluate_move_based_on_cutsize(const std::vector<molMoveDescription>& new_locs);
-int calculate_cutsize_change(const std::vector<molMoveDescription>& new_locs);
-
-/* Calculate the change of the absorbed connection */
-/* +ve means more connections are absorbed         */
-int absorbed_conn_change(const std::vector<molMoveDescription>& new_locs);
-bool evaluate_move_based_on_connection(const std::vector<molMoveDescription>& new_locs);
-
-/* Calculate the number of abosrbed terminals of a net */
-/* +ve means more terminal are now absorbed            */
-float absorbed_pin_terminals(const std::vector<molMoveDescription>& new_locs);
-bool evaluate_move_based_on_terminals(const std::vector<molMoveDescription>& new_locs);
-
-/* Calculate the number of absorbed terminals of a net *
- * and add a bonus for absorbing the whole net         *
- * +ve means more terminals are now absorbed           */
-float absorbed_pin_terminals_and_nets(const std::vector<molMoveDescription>& new_locs);
-bool evaluate_move_based_on_terminals_and_nets(const std::vector<molMoveDescription>& new_locs);
-
-float abosrbed_terminal_new_formula(const std::vector<molMoveDescription>& new_locs);
-bool evaluate_move_based_on_terminals_new_formula(const std::vector<molMoveDescription>& new_locs);
-
-bool evaluate_move_based_on_terminals_outside(const std::vector<molMoveDescription>& new_locs);
-
-void init_clb_clb_conn_numbers(std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_counts);
-void print_block_connections(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
-std::pair<std::pair<ClusterBlockId, ClusterBlockId>, int> get_max_value_pair(const std::unordered_map<std::pair<ClusterBlockId, ClusterBlockId>, int, pair_hash>& conn_count);
-#if 0
-int calculate_cutsize_of_clb(ClusterBlockId clb_index);
-int update_cutsize_after_move(const std::vector<molMoveDescription>& new_locs,
-                                        int original_cutsize);
-#endif
-
-#endif //VTR_PACK_MOVE_UTILS_H
diff --git a/vpr/src/pack/improvement/pack_utils.cpp b/vpr/src/pack/improvement/pack_utils.cpp
deleted file mode 100644
index d9f6dec2663..00000000000
--- a/vpr/src/pack/improvement/pack_utils.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-//
-// Created by elgammal on 2022-07-27.
-//
-
-#include "pack_utils.h"
-#include "re_cluster.h"
-#include "re_cluster_util.h"
-#include "globals.h"
-#include "clustered_netlist_fwd.h"
-#include "move_utils.h"
-#include "cluster_placement.h"
-#include "packing_move_generator.h"
-#include "pack_move_utils.h"
-#include "string.h"
-#include "vtr_time.h"
-//#include <mutex>
-#include <thread>
-void printProgressBar(double progress);
-void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats);
-
-#ifdef PACK_MULTITHREADED
-void init_multithreading_locks();
-void free_multithreading_locks();
-#endif
-
-#ifdef PACK_MULTITHREADED
-void init_multithreading_locks() {
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-    auto& helper_ctx = g_vpr_ctx.cl_helper();
-
-    packing_multithreading_ctx.mu.resize(helper_ctx.total_clb_num);
-    for (auto& m : packing_multithreading_ctx.mu) {
-        m = new std::mutex;
-    }
-}
-#endif
-
-#ifdef PACK_MULTITHREADED
-void free_multithreading_locks() {
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-    for (auto& m : packing_multithreading_ctx.mu) {
-        delete m;
-    }
-}
-#endif
-
-void iteratively_improve_packing(const t_packer_opts& packer_opts, t_clustering_data& clustering_data, int) {
-    /*
-     * auto& cluster_ctx = g_vpr_ctx.clustering();
-     * auto& atom_ctx = g_vpr_ctx.atom();
-     */
-    t_pack_iterative_stats pack_stats;
-
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
-    init_clb_atoms_lookup(helper_ctx.atoms_lookup);
-    //init_clb_clb_conn_numbers(helper_ctx.clb_conn_counts);
-    //print_block_connections(helper_ctx.clb_conn_counts);
-#ifdef pack_improve_debug
-    float propose_sec = 0;
-    float evaluate_sec = 0;
-    float apply_suc_sec = 0;
-    float apply_fail_sec = 0;
-#endif
-
-    unsigned int total_num_moves = packer_opts.pack_num_moves;
-    const int num_threads = packer_opts.pack_num_threads;
-    unsigned int moves_per_thread = total_num_moves / num_threads;
-    std::thread* my_threads = new std::thread[num_threads];
-#ifdef PACK_MULTITHREADED
-    init_multithreading_locks();
-#endif
-
-    for (int i = 0; i < (num_threads - 1); i++) {
-        my_threads[i] = std::thread(try_n_packing_moves, i, moves_per_thread, packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
-    }
-    my_threads[num_threads - 1] = std::thread(try_n_packing_moves, num_threads - 1, total_num_moves - (moves_per_thread * (num_threads - 1)), packer_opts.pack_move_type, std::ref(clustering_data), std::ref(pack_stats));
-
-    for (int i = 0; i < num_threads; i++)
-        my_threads[i].join();
-
-    VTR_LOG("\n### Iterative packing stats: \n\tpack move type = %s\n\ttotal pack moves = %zu\n\tgood pack moves = %zu\n\tlegal pack moves = %zu\n\n",
-            packer_opts.pack_move_type.c_str(),
-            packer_opts.pack_num_moves,
-            pack_stats.good_moves,
-            pack_stats.legal_moves);
-
-    delete[] my_threads;
-#ifdef PACK_MULTITHREADED
-    free_multithreading_locks();
-#endif
-}
-
-const t_pack_molecule* get_atom_mol (AtomBlockId atom_blk_id) {
-    const t_pack_molecule* mol = nullptr;
-    const auto& atom_mol_map = g_vpr_ctx.atom().atom_molecules;
-    auto rng = atom_mol_map.equal_range(atom_blk_id);
-
-    for (auto it = rng.first; it != rng.second; ++it) {
-        mol = it->second;
-        if (mol->valid) {
-            break;
-        }
-    }
-
-    return mol;
-}
-
-void try_n_packing_moves(int thread_num, int n, const std::string& move_type, t_clustering_data& clustering_data, t_pack_iterative_stats& pack_stats) {
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-
-    bool is_proposed, is_valid, is_successful;
-    std::vector<molMoveDescription> new_locs;
-    int num_good_moves = 0;
-    int num_legal_moves = 0;
-
-    std::unique_ptr<packingMoveGenerator> move_generator;
-    if (strcmp(move_type.c_str(), "randomSwap") == 0)
-        move_generator = std::make_unique<randomPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypePackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypePackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizePackingSwap>();
-    else if (strcmp(move_type.c_str(), "randomConnSwap") == 0)
-        move_generator = std::make_unique<randomConnPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedConnSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedConnPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeConnSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypeConnPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeConnSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypeConnPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeConnSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizeConnPackingSwap>();
-    else if (strcmp(move_type.c_str(), "randomTerminalSwap") == 0)
-        move_generator = std::make_unique<randomTerminalPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedTerminalSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedTerminalPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypeTerminalPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizeTerminalPackingSwap>();
-    else if (strcmp(move_type.c_str(), "randomTerminalNetSwap") == 0)
-        move_generator = std::make_unique<randomTerminalNetPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedTerminalNetSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedTerminalNetPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalNetSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypeTerminalNetPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalNetSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalNetPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalNetSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizeTerminalNetPackingSwap>();
-    else if (strcmp(move_type.c_str(), "randomTerminalNetNewFormulaSwap") == 0)
-        move_generator = std::make_unique<randomTerminalNetNewFormulaPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedTerminalNetNewFormulaSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedTerminalNetNewFormulaPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalNetNewFormulaSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypeTerminalNetNewFormulaPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalNetNewFormulaSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalNetNewFormulaSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap>();
-    else if (strcmp(move_type.c_str(), "randomTerminalOutsideSwap") == 0)
-        move_generator = std::make_unique<randomTerminalOutsidePackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedTerminalOutsideSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedTerminalOutsidePackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeTerminalOutsideSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypeTerminalOutsidePackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeTerminalOutsideSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypeTerminalOutsidePackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeTerminalOutsideSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizeTerminalOutsidePackingSwap>();
-
-    else if (strcmp(move_type.c_str(), "randomCostEvaluationSwap") == 0)
-        move_generator = std::make_unique<randomCostEvaluationPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCostEvaluationSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCostEvaluationPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameTypeCostEvaluationSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameTypeCostEvaluationPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedCompatibleTypeCostEvaluationSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedCompatibleTypeCostEvaluationPackingSwap>();
-    else if (strcmp(move_type.c_str(), "semiDirectedSameSizeCostEvaluationSwap") == 0)
-        move_generator = std::make_unique<quasiDirectedSameSizeCostEvaluationPackingSwap>();
-
-    else {
-        VTR_LOG("Packing move type (%s) is not correct!\n", move_type.c_str());
-        VTR_LOG("Packing iterative improvement is aborted\n");
-        return;
-    }
-
-    for (int i = 0; i < n; i++) {
-        if (thread_num == 0 && (i * 10) % n == 0) {
-            printProgressBar(double(i) / n);
-        }
-        new_locs.clear();
-        is_proposed = move_generator->propose_move(new_locs);
-        if (!is_proposed) {
-            continue;
-        }
-        is_valid = move_generator->evaluate_move(new_locs);
-        if (!is_valid) {
-#ifdef PACK_MULTITHREADED
-            packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
-            packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
-#endif
-            continue;
-        } else {
-            num_good_moves++;
-        }
-
-        is_successful = move_generator->apply_move(new_locs, clustering_data, thread_num);
-        if (is_successful)
-            num_legal_moves++;
-#ifdef PACK_MULTITHREADED
-        packing_multithreading_ctx.mu[new_locs[0].new_clb]->unlock();
-        packing_multithreading_ctx.mu[new_locs[1].new_clb]->unlock();
-#endif
-    }
-
-    pack_stats.mu.lock();
-    pack_stats.good_moves += num_good_moves;
-    pack_stats.legal_moves += num_legal_moves;
-    pack_stats.mu.unlock();
-}
-
-#include <iostream>
-#include <string>
-
-void printProgressBar(double progress) {
-    int barWidth = 70;
-
-    VTR_LOG("[");
-    int pos = barWidth * progress;
-    for (int i = 0; i < barWidth; ++i) {
-        if (i < pos)
-            VTR_LOG("=");
-        else if (i == pos)
-            VTR_LOG(">");
-        else
-            VTR_LOG(" ");
-    }
-    VTR_LOG("] %zu %\n", int(progress * 100.0));
-}
diff --git a/vpr/src/pack/improvement/pack_utils.h b/vpr/src/pack/improvement/pack_utils.h
deleted file mode 100644
index b021aef5fc0..00000000000
--- a/vpr/src/pack/improvement/pack_utils.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//
-// Created by elgammal on 2022-07-27.
-//
-
-#ifndef VTR_PACK_UTILS_H
-#define VTR_PACK_UTILS_H
-#include "cluster_util.h"
-
-struct t_pack_iterative_stats {
-    int good_moves = 0;
-    int legal_moves = 0;
-    std::mutex mu;
-};
-void iteratively_improve_packing(const t_packer_opts& packer_opts,
-                                 t_clustering_data& clustering_data,
-                                 int verbosity);
-
-const t_pack_molecule* get_atom_mol (AtomBlockId atom_blk_id);
-#endif //VTR_PACK_UTILS_H
diff --git a/vpr/src/pack/improvement/packing_cost.cpp b/vpr/src/pack/improvement/packing_cost.cpp
deleted file mode 100644
index b6ca6723315..00000000000
--- a/vpr/src/pack/improvement/packing_cost.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-#include "packing_cost.h"
-#include "re_cluster_util.h"
-
-bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-    float gain = 0;
-
-    // Keep track of all the moving atoms
-    std::unordered_set<AtomBlockId> moving_atoms;
-
-    for (auto& proposed_move : proposed_moves) {
-        for (auto& atom : proposed_move.molecule_to_move->atom_block_ids) {
-            if (atom) {
-                moving_atoms.insert(atom);
-            }
-        }
-    }
-
-    for (auto& proposed_move : proposed_moves) {
-        const t_pack_molecule* moving_molecule = proposed_move.molecule_to_move;
-        ClusterBlockId original_clb = atom_to_cluster(moving_molecule->atom_block_ids[moving_molecule->root]);
-        ClusterBlockId proposed_clb = proposed_move.new_clb;
-
-        std::unordered_set<AtomNetId> moving_nets;
-        for (auto& atom : moving_molecule->atom_block_ids) {
-            if (atom) {
-                for (auto& pin : atom_ctx.nlist.block_pins(atom)) {
-                    auto net_id = atom_ctx.nlist.pin_net(pin);
-                    if (net_id)
-                        moving_nets.insert(net_id);
-                }
-            }
-        }
-        gain += calculate_molecule_attraction_to_cluster(moving_atoms, moving_nets, moving_molecule, proposed_clb);
-        gain -= calculate_molecule_attraction_to_cluster(moving_atoms, moving_nets, moving_molecule, original_clb);
-    }
-
-    return (gain > 0);
-}
-
-float calculate_molecule_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
-                                               const std::unordered_set<AtomNetId>& moving_nets,
-                                               const t_pack_molecule* molecule,
-                                               ClusterBlockId clb) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    float gain = 0;
-    t_packing_attraction attraction;
-
-    for (auto& net_id : moving_nets) {
-        if ((int)atom_ctx.nlist.net_pins(net_id).size() > helper_ctx.high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb)->name))
-            continue;
-
-        std::unordered_set<AtomBlockId> connected_moving_blocks;
-
-        int num_stuck_connections = 0;
-        bool net_shared = false;
-
-        // calculate sharing gain
-        auto pins = atom_ctx.nlist.net_pins(net_id);
-        if (helper_ctx.net_output_feeds_driving_block_input[net_id] != 0)
-            pins = atom_ctx.nlist.net_sinks(net_id);
-
-        for (auto& pin : pins) {
-            auto blk_id = atom_ctx.nlist.pin_block(pin);
-            if (moving_atoms.count(blk_id)) {
-                connected_moving_blocks.insert(blk_id);
-                continue;
-            }
-
-            auto cluster = atom_to_cluster(blk_id);
-            if (cluster == clb) {
-                if (!net_shared) {
-                    net_shared = true;
-                    attraction.sharinggain++;
-                }
-                if (helper_ctx.timing_driven) {
-                    if (atom_ctx.nlist.pin_type(pin) == PinType::SINK) {
-                        auto net_driver_block = atom_ctx.nlist.net_driver_block(net_id);
-                        if (moving_atoms.count(net_driver_block) != 0) {
-                            float timinggain = helper_ctx.timing_info->setup_pin_criticality(pin);
-                            attraction.timinggain = std::max(timinggain, attraction.timinggain);
-                        }
-                    } else if (atom_ctx.nlist.pin_type(pin) == PinType::DRIVER) {
-                        for (auto& pin_id : atom_ctx.nlist.net_sinks(net_id)) {
-                            auto net_sink_block = atom_ctx.nlist.pin_block(pin_id);
-                            if (moving_atoms.count(net_sink_block) != 0) {
-                                float timinggain = helper_ctx.timing_info->setup_pin_criticality(pin_id);
-                                attraction.timinggain = std::max(timinggain, attraction.timinggain);
-                            }
-                        }
-                    }
-                }
-            } else {
-                num_stuck_connections++;
-            }
-        }
-        attraction.connectiongain += 1 / (float)(0.1 + num_stuck_connections);
-    }
-
-    gain += calculate_gain_from_attractions(attraction, molecule);
-
-    return gain;
-}
-
-float calculate_gain_from_attractions(const t_packing_attraction& attractions,
-                                      const t_pack_molecule* molecule) {
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    float alpha = 0.75;
-    float beta = 0.9;
-
-    float gain;
-    int num_used_pins = 0;
-    for (auto& atom : molecule->atom_block_ids) {
-        if (atom) {
-            num_used_pins += atom_ctx.nlist.block_input_pins(atom).size();
-            num_used_pins += atom_ctx.nlist.block_output_pins(atom).size();
-        }
-    }
-
-    gain = ((1 - beta) * attractions.sharinggain + beta * attractions.connectiongain)
-           / (num_used_pins);
-
-    gain = alpha * attractions.timinggain + (1 - alpha) * gain;
-    return gain;
-}
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_cost.h b/vpr/src/pack/improvement/packing_cost.h
deleted file mode 100644
index 606d6d0dd38..00000000000
--- a/vpr/src/pack/improvement/packing_cost.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef PACKING_COST_H
-#define PACKING_COST_H
-
-#include "vpr_types.h"
-#include "pack_move_utils.h"
-struct t_packing_attraction {
-    float timinggain = 0;
-    float connectiongain = 0;
-    float sharinggain = 0;
-};
-
-const int HIGH_FANOUT_NET_THRESHOLD = 5;
-
-float calculate_gain_from_attractions(const t_packing_attraction& attractions,
-                                      const t_pack_molecule* molecule);
-
-float calculate_molecule_attraction_to_cluster(const std::unordered_set<AtomBlockId>& moving_atoms,
-                                               const std::unordered_set<AtomNetId>& moving_nets,
-                                               const t_pack_molecule* molecule,
-                                               ClusterBlockId clb);
-
-bool evaluate_move_based_on_attraction(const std::vector<molMoveDescription>& proposed_moves);
-
-#endif
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.cpp b/vpr/src/pack/improvement/packing_move_generator.cpp
deleted file mode 100644
index fbc1f7d0166..00000000000
--- a/vpr/src/pack/improvement/packing_move_generator.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-//
-// Created by elgammal on 2022-07-28.
-//
-
-#include "packing_move_generator.h"
-#include "re_cluster.h"
-#include <string.h>
-#include "re_cluster_util.h"
-#include "pack_move_utils.h"
-#include "packing_cost.h"
-
-const int MAX_ITERATIONS = 10;
-
-/******************* Packing move base class ************************/
-/********************************************************************/
-bool packingMoveGenerator::apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int thread_id) {
-    if (new_locs.size() == 1) {
-        //We need to move a molecule to an existing CLB
-        return (move_mol_to_existing_cluster(new_locs[0].molecule_to_move,
-                                             new_locs[1].new_clb,
-                                             true,
-                                             0,
-                                             clustering_data,
-                                             thread_id));
-    } else if (new_locs.size() == 2) {
-        //We need to swap two molecules
-        return (swap_two_molecules(new_locs[0].molecule_to_move,
-                                   new_locs[1].molecule_to_move,
-                                   true,
-                                   0,
-                                   clustering_data,
-                                   thread_id));
-    } else {
-        //We have a more complicated move (moving multiple molecules at once)
-        //TODO: This case is not supported yet
-        return false;
-    }
-}
-
-/****************** Random packing move class *******************/
-/****************************************************************/
-bool randomPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    t_pack_molecule *mol_1, *mol_2;
-    ClusterBlockId clb_index_1, clb_index_2;
-    t_logical_block_type_ptr block_type_1, block_type_2;
-    int iteration = 0;
-    bool found = false;
-
-    //pick the 1st molecule randomly
-    mol_1 = pick_molecule_randomly();
-    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
-    block_type_1 = cluster_ctx.clb_nlist.block_type(clb_index_1);
-
-    do {
-        mol_2 = pick_molecule_randomly();
-        clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
-        block_type_2 = cluster_ctx.clb_nlist.block_type(clb_index_2);
-        if (block_type_1 == block_type_2 && clb_index_1 != clb_index_2) {
-            found = true;
-            build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-        }
-#ifdef PACK_MULTITHREADED
-        else {
-            packing_multithreading_ctx.mu[clb_index_2]->unlock();
-        }
-#endif
-        ++iteration;
-    } while (!found && iteration < MAX_ITERATIONS);
-
-#ifdef PACK_MULTITHREADED
-    if (!found) {
-        packing_multithreading_ctx.mu[clb_index_1]->unlock();
-    }
-#endif
-
-    return found;
-}
-
-bool randomPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_cutsize(new_locs));
-}
-
-/***************** Quasi directed packing move class *******************/
-/***********************************************************************/
-bool quasiDirectedPackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    t_pack_molecule *mol_1, *mol_2;
-    ClusterBlockId clb_index_1;
-
-    //pick the 1st molecule randomly
-    mol_1 = pick_molecule_randomly();
-    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
-
-    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
-    mol_2 = nullptr;
-    bool found = pick_molecule_connected(mol_1, mol_2);
-
-    if (found) {
-        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
-        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    }
-#ifdef PACK_MULTITHREADED
-    else {
-        packing_multithreading_ctx.mu[clb_index_1]->unlock();
-    }
-#endif
-    return found;
-}
-
-bool quasiDirectedPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_cutsize(new_locs));
-}
-
-/***************** Quasi directed same type packing move class *******************/
-/*********************************************************************************/
-bool quasiDirectedSameTypePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_cutsize(new_locs));
-}
-
-bool quasiDirectedSameTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    t_pack_molecule *mol_1, *mol_2;
-    ClusterBlockId clb_index_1;
-
-    //pick the 1st molecule randomly
-    mol_1 = pick_molecule_randomly();
-    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
-
-    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
-    mol_2 = nullptr;
-    bool found = pick_molecule_connected_same_type(mol_1, mol_2);
-
-    if (found) {
-        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
-        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    }
-#ifdef PACK_MULTITHREADED
-    else {
-        packing_multithreading_ctx.mu[clb_index_1]->unlock();
-    }
-#endif
-
-    return found;
-}
-
-/***************** Quasi directed compatible type packing move class *******************/
-/*********************************************************************************/
-bool quasiDirectedCompatibleTypePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_cutsize(new_locs));
-}
-
-bool quasiDirectedCompatibleTypePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    t_pack_molecule *mol_1, *mol_2;
-    ClusterBlockId clb_index_1;
-
-    //pick the 1st molecule randomly
-    mol_1 = pick_molecule_randomly();
-    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
-
-    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
-    mol_2 = nullptr;
-    bool found = pick_molecule_connected_compatible_type(mol_1, mol_2);
-
-    if (found) {
-        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
-        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    }
-#ifdef PACK_MULTITHREADED
-    else {
-        packing_multithreading_ctx.mu[clb_index_1]->unlock();
-    }
-#endif
-
-    return found;
-}
-
-/***************** Quasi directed same size packing move class *******************/
-/*********************************************************************************/
-bool quasiDirectedSameSizePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_cutsize(new_locs));
-}
-
-bool quasiDirectedSameSizePackingSwap::propose_move(std::vector<molMoveDescription>& new_locs) {
-#ifdef PACK_MULTITHREADED
-    auto& packing_multithreading_ctx = g_vpr_ctx.mutable_packing_multithreading();
-#endif
-
-    t_pack_molecule *mol_1, *mol_2;
-    ClusterBlockId clb_index_1;
-
-    //pick the 1st molecule randomly
-    mol_1 = pick_molecule_randomly();
-    clb_index_1 = atom_to_cluster(mol_1->atom_block_ids[mol_1->root]);
-
-    //pick the 2nd molecule from a cluster that is directly connected to mol_1 cluster
-    mol_2 = nullptr;
-    bool found = pick_molecule_connected_same_size(mol_1, mol_2);
-
-    if (found) {
-        ClusterBlockId clb_index_2 = atom_to_cluster(mol_2->atom_block_ids[mol_2->root]);
-        build_mol_move_description(new_locs, mol_1, clb_index_1, mol_2, clb_index_2);
-    }
-#ifdef PACK_MULTITHREADED
-    else {
-        packing_multithreading_ctx.mu[clb_index_1]->unlock();
-    }
-#endif
-
-    return found;
-}
-
-bool randomConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_connection(new_locs));
-}
-
-bool quasiDirectedConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_connection(new_locs));
-}
-
-bool quasiDirectedSameTypeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_connection(new_locs));
-}
-
-bool quasiDirectedCompatibleTypeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_connection(new_locs));
-}
-
-bool quasiDirectedSameSizeConnPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_connection(new_locs));
-}
-
-bool randomTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals(new_locs));
-}
-
-bool quasiDirectedTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals(new_locs));
-}
-
-bool quasiDirectedSameTypeTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals(new_locs));
-}
-
-bool quasiDirectedCompatibleTypeTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals(new_locs));
-}
-
-bool quasiDirectedSameSizeTerminalPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals(new_locs));
-}
-
-bool randomTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_and_nets(new_locs));
-}
-
-bool quasiDirectedTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_and_nets(new_locs));
-}
-
-bool quasiDirectedSameTypeTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_and_nets(new_locs));
-}
-
-bool quasiDirectedCompatibleTypeTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_and_nets(new_locs));
-}
-
-bool quasiDirectedSameSizeTerminalNetPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_and_nets(new_locs));
-}
-
-bool randomTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_new_formula(new_locs));
-}
-
-bool quasiDirectedTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_new_formula(new_locs));
-}
-
-bool quasiDirectedSameTypeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_new_formula(new_locs));
-}
-
-bool quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_new_formula(new_locs));
-}
-
-bool quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_new_formula(new_locs));
-}
-
-bool randomTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_outside(new_locs));
-}
-
-bool quasiDirectedTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_outside(new_locs));
-}
-
-bool quasiDirectedSameTypeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_outside(new_locs));
-}
-
-bool quasiDirectedCompatibleTypeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_outside(new_locs));
-}
-
-bool quasiDirectedSameSizeTerminalOutsidePackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_terminals_outside(new_locs));
-}
-
-bool randomCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_attraction(new_locs));
-}
-
-bool quasiDirectedCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_attraction(new_locs));
-}
-
-bool quasiDirectedSameTypeCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_attraction(new_locs));
-}
-
-bool quasiDirectedCompatibleTypeCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_attraction(new_locs));
-}
-
-bool quasiDirectedSameSizeCostEvaluationPackingSwap::evaluate_move(const std::vector<molMoveDescription>& new_locs) {
-    return (evaluate_move_based_on_attraction(new_locs));
-}
\ No newline at end of file
diff --git a/vpr/src/pack/improvement/packing_move_generator.h b/vpr/src/pack/improvement/packing_move_generator.h
deleted file mode 100644
index 13b399a96c1..00000000000
--- a/vpr/src/pack/improvement/packing_move_generator.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//
-// Created by elgammal on 2022-07-28.
-//
-
-#ifndef VTR_PACKINGMOVEGENERATOR_H
-#define VTR_PACKINGMOVEGENERATOR_H
-
-#include "vpr_types.h"
-#include "cluster_util.h"
-#include "pack_move_utils.h"
-
-/**
- * @brief a base class for packing move generators
- *
- * This class represents the base class for all move generators.
- */
-class packingMoveGenerator {
-  public:
-    //Propose
-    virtual ~packingMoveGenerator() = default;
-    virtual bool propose_move(std::vector<molMoveDescription>& new_locs) = 0;
-    virtual bool evaluate_move(const std::vector<molMoveDescription>& new_locs) = 0;
-    bool apply_move(std::vector<molMoveDescription>& new_locs, t_clustering_data& clustering_data, int thread_id);
-};
-
-class randomPackingSwap : public packingMoveGenerator {
-  public:
-    bool propose_move(std::vector<molMoveDescription>& new_locs);
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
-};
-
-class quasiDirectedPackingSwap : public packingMoveGenerator {
-  public:
-    bool propose_move(std::vector<molMoveDescription>& new_locs);
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
-};
-
-class quasiDirectedSameTypePackingSwap : public packingMoveGenerator {
-  public:
-    bool propose_move(std::vector<molMoveDescription>& new_locs);
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
-};
-
-class quasiDirectedCompatibleTypePackingSwap : public packingMoveGenerator {
-  public:
-    bool propose_move(std::vector<molMoveDescription>& new_locs);
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
-};
-
-class quasiDirectedSameSizePackingSwap : public packingMoveGenerator {
-    bool propose_move(std::vector<molMoveDescription>& new_locs);
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs);
-};
-
-/************ Moves that evaluate on abosrbed Connections *********************/
-class randomConnPackingSwap : public randomPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedConnPackingSwap : public quasiDirectedPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameTypeConnPackingSwap : public quasiDirectedSameTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCompatibleTypeConnPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameSizeConnPackingSwap : public quasiDirectedSameSizePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-/************ Moves that evaluate on abosrbed Terminals *********************/
-class randomTerminalPackingSwap : public randomPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedTerminalPackingSwap : public quasiDirectedPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameTypeTerminalPackingSwap : public quasiDirectedSameTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCompatibleTypeTerminalPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameSizeTerminalPackingSwap : public quasiDirectedSameSizePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-/************ Moves that evaluate on abosrbed Terminals and nets *********************/
-class randomTerminalNetPackingSwap : public randomPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedTerminalNetPackingSwap : public quasiDirectedPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameTypeTerminalNetPackingSwap : public quasiDirectedSameTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCompatibleTypeTerminalNetPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameSizeTerminalNetPackingSwap : public quasiDirectedSameSizePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-/************ Moves that evaluate on abosrbed Terminals and nets new formula *********************/
-class randomTerminalNetNewFormulaPackingSwap : public randomPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedTerminalNetNewFormulaPackingSwap : public quasiDirectedPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameTypeTerminalNetNewFormulaPackingSwap : public quasiDirectedSameTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCompatibleTypeTerminalNetNewFormulaPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameSizeTerminalNetNewFormulaPackingSwap : public quasiDirectedSameSizePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-/************ Moves that evaluate on abosrbed Terminals and nets new formula *********************/
-class randomTerminalOutsidePackingSwap : public randomPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedTerminalOutsidePackingSwap : public quasiDirectedPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameTypeTerminalOutsidePackingSwap : public quasiDirectedSameTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCompatibleTypeTerminalOutsidePackingSwap : public quasiDirectedCompatibleTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameSizeTerminalOutsidePackingSwap : public quasiDirectedSameSizePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-/************ Moves that evaluate on Packing cost function *********************/
-class randomCostEvaluationPackingSwap : public randomPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCostEvaluationPackingSwap : public quasiDirectedPackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameTypeCostEvaluationPackingSwap : public quasiDirectedSameTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedCompatibleTypeCostEvaluationPackingSwap : public quasiDirectedCompatibleTypePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-
-class quasiDirectedSameSizeCostEvaluationPackingSwap : public quasiDirectedSameSizePackingSwap {
-  public:
-    bool evaluate_move(const std::vector<molMoveDescription>& new_locs) override;
-};
-#endif //VTR_PACKINGMOVEGENERATOR_H
\ No newline at end of file

From 2613b425e1ff27544c86d7257fd3e6822a44cd1e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 11:25:36 -0400
Subject: [PATCH 124/188] add api debug fixes to files under vpr/src/pack

---
 vpr/src/base/vpr_types.cpp         |  23 -----
 vpr/src/base/vpr_types.h           |   3 -
 vpr/src/pack/cluster.cpp           |  24 +++---
 vpr/src/pack/cluster_placement.cpp |  86 +++++++++---------
 vpr/src/pack/cluster_placement.h   |   3 +-
 vpr/src/pack/cluster_util.cpp      |  49 +++--------
 vpr/src/pack/cluster_util.h        |   5 +-
 vpr/src/pack/pack.cpp              | 134 +++--------------------------
 vpr/src/pack/pb_type_graph.cpp     |   2 +-
 vpr/src/pack/re_cluster.cpp        |  25 ++----
 vpr/src/pack/re_cluster_util.cpp   |  41 +++++----
 vpr/src/pack/re_cluster_util.h     |  17 +---
 12 files changed, 114 insertions(+), 298 deletions(-)

diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index c6dae8df2c7..74d6447cb41 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -343,27 +343,4 @@ AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
         }
         return EMPTY_PRIMITIVE_BLOCK_ID;
     }
-}
-
-t_cluster_placement_primitive* t_cluster_placement_stats::get_cluster_placement_primitive_from_pb_graph_node(const t_pb_graph_node* pb_graph_node) {
-    auto it = valid_primitives[pb_graph_node->cluster_placement_type_index].find(pb_graph_node->cluster_placement_primitive_index);
-    if (it != valid_primitives[pb_graph_node->cluster_placement_type_index].end())
-        return valid_primitives[pb_graph_node->cluster_placement_type_index][pb_graph_node->cluster_placement_primitive_index];
-
-    for (auto itr = tried.find(pb_graph_node->cluster_placement_primitive_index); itr != tried.end(); itr++) {
-        if (itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
-            return itr->second;
-    }
-
-    for (auto itr = invalid.find(pb_graph_node->cluster_placement_primitive_index); itr != invalid.end(); itr++) {
-        if (itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
-            return itr->second;
-    }
-
-    for (auto itr = in_flight.find(pb_graph_node->cluster_placement_primitive_index); itr != in_flight.end(); itr++) {
-        if (itr->second->pb_graph_node->cluster_placement_type_index == pb_graph_node->cluster_placement_type_index)
-            return itr->second;
-    }
-
-    return nullptr;
 }
\ No newline at end of file
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index c02759c8bd5..6eae3415cef 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -493,8 +493,6 @@ class t_cluster_placement_stats {
      */
     void free_primitives();
 
-    t_cluster_placement_primitive* get_cluster_placement_primitive_from_pb_graph_node(const t_pb_graph_node* pb_graph_node);
-
   private:
     std::unordered_multimap<int, t_cluster_placement_primitive*> in_flight; ///<ptrs to primitives currently being considered to pack into
     std::unordered_multimap<int, t_cluster_placement_primitive*> tried;     ///<ptrs to primitives that are already tried but current logic block unable to pack to
@@ -956,7 +954,6 @@ struct t_packer_opts {
     e_timing_update_type timing_update_type;
     bool use_attraction_groups;
     int pack_num_moves;
-    int pack_num_threads;
     std::string pack_move_type;
 };
 
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index f556e0988df..4f1382a990d 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -130,6 +130,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
     const int verbosity = packer_opts.pack_verbosity;
 
     int unclustered_list_head_size;
+    std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
 
     cluster_stats.num_molecules_processed = 0;
     cluster_stats.mols_since_last_print = 0;
@@ -150,9 +151,9 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 
     helper_ctx.enable_pin_feasibility_filter = packer_opts.enable_pin_feasibility_filter;
     helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size;
-    helper_ctx.timing_driven = packer_opts.timing_driven;
 
     std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc;
+    std::shared_ptr<SetupTimingInfo> timing_info;
 
     // this data structure tracks the number of Logic Elements (LEs) used. It is
     // populated only for architectures which has LEs. The architecture is assumed
@@ -198,8 +199,9 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 #if 0
 	check_for_duplicate_inputs ();
 #endif
-    alloc_and_init_clustering(packer_opts, max_molecule_stats, molecule_head,
-                              clustering_data, helper_ctx.net_output_feeds_driving_block_input,
+    alloc_and_init_clustering(max_molecule_stats,
+                              &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), molecule_head,
+                              clustering_data, net_output_feeds_driving_block_input,
                               unclustered_list_head_size, cluster_stats.num_molecules);
 
     auto primitive_candidate_block_types = identify_primitive_candidate_block_types();
@@ -219,7 +221,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
 
     if (packer_opts.timing_driven) {
         calc_init_packing_timing(packer_opts, analysis_opts, expected_lowest_cost_pb_gnode,
-                                 clustering_delay_calc, helper_ctx.timing_info, atom_criticality);
+                                 clustering_delay_calc, timing_info, atom_criticality);
     }
 
     auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, max_molecule_stats, atom_criticality);
@@ -245,7 +247,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
              * stores PartitionRegion information while the cluster is packed*/
             PartitionRegion temp_cluster_pr;
 
-            start_new_cluster(helper_ctx.cluster_placement_stats[0], helper_ctx.primitives_list[0],
+            start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list,
                               clb_index, istart,
                               num_used_type_instances,
                               packer_opts.target_device_utilization,
@@ -288,9 +290,9 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                                  packer_opts.alpha, packer_opts.beta,
                                  packer_opts.timing_driven, packer_opts.connection_driven,
                                  high_fanout_threshold,
-                                 *(helper_ctx.timing_info),
+                                 *timing_info,
                                  attraction_groups,
-                                 helper_ctx.net_output_feeds_driving_block_input);
+                                 net_output_feeds_driving_block_input);
             helper_ctx.total_clb_num++;
 
             if (packer_opts.timing_driven) {
@@ -298,7 +300,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                 /*it doesn't make sense to do a timing analysis here since there*
                  *is only one atom block clustered it would not change anything      */
             }
-            cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[0][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+            cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
             cluster_stats.num_unrelated_clustering_attempts = 0;
             next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index),
                                                      attraction_groups,
@@ -340,7 +342,7 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                                  prev_molecule,
                                  next_molecule,
                                  num_repeated_molecules,
-                                 helper_ctx.primitives_list[0],
+                                 helper_ctx.primitives_list,
                                  cluster_stats,
                                  helper_ctx.total_clb_num,
                                  num_models,
@@ -352,14 +354,14 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
                                  allow_unrelated_clustering,
                                  high_fanout_threshold,
                                  is_clock,
-                                 helper_ctx.timing_info,
+                                 timing_info,
                                  router_data,
                                  target_ext_pin_util,
                                  temp_cluster_pr,
                                  block_pack_status,
                                  clustering_data.unclustered_list_head,
                                  unclustered_list_head_size,
-                                 helper_ctx.net_output_feeds_driving_block_input,
+                                 net_output_feeds_driving_block_input,
                                  primitive_candidate_block_types);
             }
 
diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp
index a5db78ba357..c0458154cf1 100644
--- a/vpr/src/pack/cluster_placement.cpp
+++ b/vpr/src/pack/cluster_placement.cpp
@@ -30,14 +30,17 @@
 /*Local Function Declaration			*/
 /****************************************/
 static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_stats* cluster_placement_stats,
-                                                           t_pb_graph_node* pb_graph_node,
-                                                           int lb_type_index);
-static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_placement_stats,
-                                            const t_pb_graph_node* pb_graph_node,
+                                                           t_pb_graph_node* pb_graph_node);
+static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node,
                                             float incremental_cost,
                                             bool valid);
-static float try_place_molecule(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, t_pb_graph_node* root, t_pb_graph_node** primitives_list);
-static bool expand_forced_pack_molecule_placement(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, const t_pack_pattern_block* pack_pattern_block, t_pb_graph_node** primitives_list, float* cost);
+static float try_place_molecule(const t_pack_molecule* molecule,
+                                t_pb_graph_node* root,
+                                t_pb_graph_node** primitives_list);
+static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecule,
+                                                  const t_pack_pattern_block* pack_pattern_block,
+                                                  t_pb_graph_node** primitives_list,
+                                                  float* cost);
 static t_pb_graph_pin* expand_pack_molecule_pin_edge(int pattern_id,
                                                      const t_pb_graph_pin* cur_pin,
                                                      bool forward);
@@ -63,8 +66,7 @@ t_cluster_placement_stats* alloc_and_load_cluster_placement_stats() {
         if (!is_empty_type(&type)) {
             cluster_placement_stats_list[type.index].curr_molecule = nullptr;
             load_cluster_placement_stats_for_pb_graph_node(&cluster_placement_stats_list[type.index],
-                                                           type.pb_graph_head,
-                                                           type.index);
+                                                           type.pb_graph_head);
         }
     }
     return cluster_placement_stats_list;
@@ -135,7 +137,7 @@ bool get_next_primitive_list(t_cluster_placement_stats* cluster_placement_stats,
                     }
 
                     /* try place molecule at root location cur */
-                    cost = try_place_molecule(cluster_placement_stats, molecule, it->second->pb_graph_node, primitives_list);
+                    cost = try_place_molecule(molecule, it->second->pb_graph_node, primitives_list);
 
                     // if the cost is lower than the best, or is equal to the best but this
                     // primitive is more available in the cluster mark it as the best primitive
@@ -158,7 +160,7 @@ bool get_next_primitive_list(t_cluster_placement_stats* cluster_placement_stats,
         }
     } else {
         /* populate primitive list with best */
-        cost = try_place_molecule(cluster_placement_stats, molecule, best->second->pb_graph_node, primitives_list);
+        cost = try_place_molecule(molecule, best->second->pb_graph_node, primitives_list);
         VTR_ASSERT(cost == lowest_cost);
 
         /* take out best node and put it in flight */
@@ -197,8 +199,7 @@ void reset_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_
  * Adds backward link from pb_graph_node to cluster_placement_primitive
  */
 static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_stats* cluster_placement_stats,
-                                                           t_pb_graph_node* pb_graph_node,
-                                                           int lb_type_index) {
+                                                           t_pb_graph_node* pb_graph_node) {
     int i, j, k;
     t_cluster_placement_primitive* placement_primitive;
     const t_pb_type* pb_type = pb_graph_node->pb_type;
@@ -207,7 +208,7 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
         placement_primitive = new t_cluster_placement_primitive();
         placement_primitive->pb_graph_node = pb_graph_node;
         placement_primitive->valid = true;
-        pb_graph_node->lb_type_index = lb_type_index;
+        pb_graph_node->cluster_placement_primitive = placement_primitive;
         placement_primitive->base_cost = compute_primitive_base_cost(pb_graph_node);
 
         bool success = false;
@@ -219,15 +220,10 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
          *  - Check the pb_type of this element with the pb_type of pb_graph_node
          *      - if matched --> insert the primitive
          */
-        for (size_t type_index = 0; type_index < cluster_placement_stats->valid_primitives.size(); type_index++) {
-            auto& type_primitives = cluster_placement_stats->valid_primitives[type_index];
+        for (auto& type_primitives : cluster_placement_stats->valid_primitives) {
             auto first_elem = type_primitives.find(0);
             if (first_elem != type_primitives.end() && first_elem->second->pb_graph_node->pb_type == pb_graph_node->pb_type) {
-                size_t index = type_primitives.size();
-                pb_graph_node->cluster_placement_primitive_index = index;
-                pb_graph_node->cluster_placement_type_index = type_index;
-
-                type_primitives.insert({index, placement_primitive});
+                type_primitives.insert({type_primitives.size(), placement_primitive});
                 success = true;
                 break;
             }
@@ -238,8 +234,6 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
          * and insert the placement primitive into the new map with index 0
          */
         if (!success) {
-            pb_graph_node->cluster_placement_primitive_index = 0;
-            pb_graph_node->cluster_placement_type_index = cluster_placement_stats->num_pb_types;
             cluster_placement_stats->valid_primitives.emplace_back();
             cluster_placement_stats->valid_primitives[cluster_placement_stats->valid_primitives.size() - 1].insert({0, placement_primitive});
             cluster_placement_stats->num_pb_types++;
@@ -248,10 +242,10 @@ static void load_cluster_placement_stats_for_pb_graph_node(t_cluster_placement_s
     } else { // not a primitive, recursively call the function for all its children
         for (i = 0; i < pb_type->num_modes; i++) {
             for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
-                for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
+                for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb;
+                     k++) {
                     load_cluster_placement_stats_for_pb_graph_node(cluster_placement_stats,
-                                                                   &pb_graph_node->child_pb_graph_nodes[i][j][k],
-                                                                   lb_type_index);
+                                                                   &pb_graph_node->child_pb_graph_nodes[i][j][k]);
                 }
             }
         }
@@ -277,8 +271,7 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
     cluster_placement_stats->flush_intermediate_queues();
 
     /* commit primitive as used, invalidate it */
-    //cur = cluster_placement_stats->valid_primitives[primitive->cluster_placement_type_index][primitive->cluster_placement_primitive_index];
-    cur = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(primitive);
+    cur = primitive->cluster_placement_primitive;
     VTR_ASSERT(cur->valid == true);
 
     cur->valid = false;
@@ -294,7 +287,7 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
                     if (&pb_graph_node->child_pb_graph_nodes[i][j][k] != skip) {
-                        update_primitive_cost_or_status(cluster_placement_stats, &pb_graph_node->child_pb_graph_nodes[i][j][k],
+                        update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k],
                                                         incr_cost, (bool)(i == valid_mode));
                     }
                 }
@@ -307,13 +300,13 @@ void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
 /**
  * Set mode of cluster
  */
-void set_mode_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats, const t_pb_graph_node* pb_graph_node, int mode) {
+void set_mode_cluster_placement_stats(const t_pb_graph_node* pb_graph_node, int mode) {
     int i, j, k;
     for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
         if (i != mode) {
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
-                    update_primitive_cost_or_status(cluster_placement_stats, &pb_graph_node->child_pb_graph_nodes[i][j][k], 0, false);
+                    update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k], 0, false);
                 }
             }
         }
@@ -325,15 +318,14 @@ void set_mode_cluster_placement_stats(t_cluster_placement_stats* cluster_placeme
  * For modes invalidated by pb_graph_node, invalidate primitive
  * int distance is the distance of current pb_graph_node from original
  */
-static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_placement_stats,
-                                            const t_pb_graph_node* pb_graph_node,
+static void update_primitive_cost_or_status(const t_pb_graph_node* pb_graph_node,
                                             const float incremental_cost,
                                             const bool valid) {
     int i, j, k;
     t_cluster_placement_primitive* placement_primitive;
     if (pb_graph_node->is_primitive()) {
         /* is primitive */
-        placement_primitive = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(pb_graph_node);
+        placement_primitive = (t_cluster_placement_primitive*)pb_graph_node->cluster_placement_primitive;
         if (valid) {
             placement_primitive->incremental_cost += incremental_cost;
         } else {
@@ -343,7 +335,7 @@ static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_p
         for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
             for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
                 for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
-                    update_primitive_cost_or_status(cluster_placement_stats, &pb_graph_node->child_pb_graph_nodes[i][j][k],
+                    update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k],
                                                     incremental_cost, valid);
                 }
             }
@@ -354,23 +346,24 @@ static void update_primitive_cost_or_status(t_cluster_placement_stats* cluster_p
 /**
  * Try place molecule at root location, populate primitives list with locations of placement if successful
  */
-static float try_place_molecule(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, t_pb_graph_node* root, t_pb_graph_node** primitives_list) {
+static float try_place_molecule(const t_pack_molecule* molecule,
+                                t_pb_graph_node* root,
+                                t_pb_graph_node** primitives_list) {
     int list_size, i;
     float cost = HUGE_POSITIVE_FLOAT;
     list_size = get_array_size_of_molecule(molecule);
 
     if (primitive_type_feasible(molecule->atom_block_ids[molecule->root],
                                 root->pb_type)) {
-        t_cluster_placement_primitive* cur_primitive = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(root);
-        if (cur_primitive->valid) {
+        if (root->cluster_placement_primitive->valid) {
             for (i = 0; i < list_size; i++) {
                 primitives_list[i] = nullptr;
             }
-            cost = cur_primitive->base_cost
-                   + cur_primitive->incremental_cost;
+            cost = root->cluster_placement_primitive->base_cost
+                   + root->cluster_placement_primitive->incremental_cost;
             primitives_list[molecule->root] = root;
             if (molecule->type == MOLECULE_FORCED_PACK) {
-                if (!expand_forced_pack_molecule_placement(cluster_placement_stats, molecule,
+                if (!expand_forced_pack_molecule_placement(molecule,
                                                            molecule->pack_pattern->root_block, primitives_list,
                                                            &cost)) {
                     return HUGE_POSITIVE_FLOAT;
@@ -395,7 +388,10 @@ static float try_place_molecule(t_cluster_placement_stats* cluster_placement_sta
  * Expand molecule at pb_graph_node
  * Assumes molecule and pack pattern connections have fan-out 1
  */
-static bool expand_forced_pack_molecule_placement(t_cluster_placement_stats* cluster_placement_stats, const t_pack_molecule* molecule, const t_pack_pattern_block* pack_pattern_block, t_pb_graph_node** primitives_list, float* cost) {
+static bool expand_forced_pack_molecule_placement(const t_pack_molecule* molecule,
+                                                  const t_pack_pattern_block* pack_pattern_block,
+                                                  t_pb_graph_node** primitives_list,
+                                                  float* cost) {
     t_pb_graph_node* pb_graph_node = primitives_list[pack_pattern_block->block_id];
     t_pb_graph_node* next_primitive;
     t_pack_pattern_connections* cur;
@@ -439,11 +435,10 @@ static bool expand_forced_pack_molecule_placement(t_cluster_placement_stats* clu
                 next_primitive = next_pin->parent_node;
                 /* Check for legality of placement, if legal, expand from legal placement, if not, return false */
                 if (molecule->atom_block_ids[next_block->block_id] && primitives_list[next_block->block_id] == nullptr) {
-                    t_cluster_placement_primitive* placement_primitive = cluster_placement_stats->get_cluster_placement_primitive_from_pb_graph_node(next_primitive);
-                    if (placement_primitive->valid && primitive_type_feasible(molecule->atom_block_ids[next_block->block_id], next_primitive->pb_type)) {
+                    if (next_primitive->cluster_placement_primitive->valid && primitive_type_feasible(molecule->atom_block_ids[next_block->block_id], next_primitive->pb_type)) {
                         primitives_list[next_block->block_id] = next_primitive;
-                        *cost += placement_primitive->base_cost + placement_primitive->incremental_cost;
-                        if (!expand_forced_pack_molecule_placement(cluster_placement_stats, molecule, next_block, primitives_list, cost)) {
+                        *cost += next_primitive->cluster_placement_primitive->base_cost + next_primitive->cluster_placement_primitive->incremental_cost;
+                        if (!expand_forced_pack_molecule_placement(molecule, next_block, primitives_list, cost)) {
                             return false;
                         }
                     } else {
@@ -584,6 +579,7 @@ bool exists_free_primitive_for_atom_block(t_cluster_placement_stats* cluster_pla
 
     /* Look through list of available primitives to see if any valid */
     for (i = 0; i < cluster_placement_stats->num_pb_types; i++) {
+        //for (auto& primitive : cluster_placement_stats->valid_primitives[i]) {
         if (!cluster_placement_stats->valid_primitives[i].empty() && primitive_type_feasible(blk_id, cluster_placement_stats->valid_primitives[i].begin()->second->pb_graph_node->pb_type)) {
             for (auto it = cluster_placement_stats->valid_primitives[i].begin(); it != cluster_placement_stats->valid_primitives[i].end();) {
                 if (it->second->valid)
diff --git a/vpr/src/pack/cluster_placement.h b/vpr/src/pack/cluster_placement.h
index fe4529db4d6..8715e611222 100644
--- a/vpr/src/pack/cluster_placement.h
+++ b/vpr/src/pack/cluster_placement.h
@@ -14,7 +14,8 @@ bool get_next_primitive_list(
     t_pb_graph_node** primitives_list);
 void commit_primitive(t_cluster_placement_stats* cluster_placement_stats,
                       const t_pb_graph_node* primitive);
-void set_mode_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats, const t_pb_graph_node* pb_graph_node, int mode);
+void set_mode_cluster_placement_stats(const t_pb_graph_node* complex_block,
+                                      int mode);
 void reset_cluster_placement_stats(
     t_cluster_placement_stats* cluster_placement_stats);
 
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 578f8922104..0e12305dc70 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -6,7 +6,6 @@
 
 #include "vtr_math.h"
 #include "SetupGrid.h"
-#include "string.h"
 
 /**********************************/
 /* Global variables in clustering */
@@ -495,8 +494,9 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule,
 }
 
 /*****************************************/
-void alloc_and_init_clustering(const t_packer_opts& packer_opts,
-                               const t_molecule_stats& max_molecule_stats,
+void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
+                               t_cluster_placement_stats** cluster_placement_stats,
+                               t_pb_graph_node*** primitives_list,
                                t_pack_molecule* molecules_head,
                                t_clustering_data& clustering_data,
                                std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
@@ -505,15 +505,6 @@ void alloc_and_init_clustering(const t_packer_opts& packer_opts,
     /* Allocates the main data structures used for clustering and properly *
      * initializes them.                                                   */
 
-    auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
-    if (packer_opts.pack_num_moves > 0) {
-        helper_ctx.primitives_list.resize(packer_opts.pack_num_threads);
-        helper_ctx.cluster_placement_stats.resize(packer_opts.pack_num_threads);
-    } else {
-        helper_ctx.primitives_list.resize(1);
-        helper_ctx.cluster_placement_stats.resize(1);
-    }
-
     t_molecule_link* next_ptr;
     t_pack_molecule* cur_molecule;
     t_pack_molecule** molecule_array;
@@ -572,8 +563,7 @@ void alloc_and_init_clustering(const t_packer_opts& packer_opts,
     }
 
     /* alloc and load cluster placement info */
-    for (int thread_id = 0; thread_id < packer_opts.pack_num_threads; thread_id++)
-        helper_ctx.cluster_placement_stats[thread_id] = alloc_and_load_cluster_placement_stats();
+    *cluster_placement_stats = alloc_and_load_cluster_placement_stats();
 
     /* alloc array that will store primitives that a molecule gets placed to,
      * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list
@@ -587,12 +577,9 @@ void alloc_and_init_clustering(const t_packer_opts& packer_opts,
         }
         cur_molecule = cur_molecule->next;
     }
-
-    for (int thread_id = 0; thread_id < packer_opts.pack_num_threads; thread_id++) {
-        helper_ctx.primitives_list[thread_id] = new t_pb_graph_node*[max_molecule_size];
-        for (int i = 0; i < max_molecule_size; i++)
-            helper_ctx.primitives_list[thread_id][i] = nullptr;
-    }
+    *primitives_list = new t_pb_graph_node*[max_molecule_size];
+    for (int i = 0; i < max_molecule_size; i++)
+        (*primitives_list)[i] = nullptr;
 }
 
 /*****************************************/
@@ -1091,20 +1078,11 @@ enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_pl
                          * if a chain is packed in, want to rename logic block to match chain name */
                         AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id];
                         cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb;
-                        /* // Elgammal debugging
-                        if(strcmp(atom_ctx.nlist.block_name(chain_root_blk_id).c_str(), "sv_chip2_hierarchy_no_mem.v_fltr_4_left.inst_fltr_compute_h3^ADD~334-0[0]") == 0)
-                            VTR_LOG("rename: %s\n", cur_pb->name);
-                        */
                         while (cur_pb != nullptr) {
                             free(cur_pb->name);
                             cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str());
-                            /* // Elgammal debugging
-                            if(cur_pb->is_root() && strcmp(atom_ctx.nlist.block_name(chain_root_blk_id).c_str(), "sv_chip2_hierarchy_no_mem.v_fltr_4_left.inst_fltr_compute_h3^ADD~334-0[0]") == 0)
-                                VTR_LOG("\t %p\n", cur_pb);
-                            */
                             cur_pb = cur_pb->parent_pb;
                         }
-
                         // if this molecule is part of a chain, mark the cluster as having a long chain
                         // molecule. Also check if it's the first molecule in the chain to be packed.
                         // If so, update the chain id for this chain of molecules to make sure all
@@ -1237,7 +1215,6 @@ enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_grap
 
         VTR_ASSERT(parent_pb->name == nullptr);
         parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str());
-        //VTR_LOG("$$ %s\n", parent_pb->name);
         parent_pb->mode = pb_graph_node->pb_type->parent_mode->index;
         set_reset_pb_modes(router_data, parent_pb, true);
         const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode];
@@ -1289,7 +1266,6 @@ enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_grap
         /* try pack to location */
         VTR_ASSERT(pb->name == nullptr);
         pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str());
-        //VTR_LOG("$$$ %s\n", pb->name);
 
         //Update the atom netlist mappings
         atom_ctx.lookup.set_atom_clb(blk_id, clb_index);
@@ -1698,10 +1674,7 @@ void store_cluster_info_and_free(const t_packer_opts& packer_opts,
 
     //print clustering progress incrementally
     //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height());
-
-    // If no more iterative improvements to be run after the initial packing, clear the date for the packed cluster now
-    if(packer_opts.pack_num_moves == 0)
-        free_pb_stats_recursive(cur_pb);
+    free_pb_stats_recursive(cur_pb);
 }
 
 /* Free up data structures and requeue used molecules */
@@ -2141,7 +2114,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
             pb->mode = j;
 
             reset_cluster_placement_stats(&cluster_placement_stats[type->index]);
-            set_mode_cluster_placement_stats(&cluster_placement_stats[type->index], pb->pb_graph_node, j);
+            set_mode_cluster_placement_stats(pb->pb_graph_node, j);
 
             //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL,
             //which allows all cluster pins to be used. This ensures that if we have a large
@@ -2168,7 +2141,6 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
                 free(pb->name);
             }
             pb->name = vtr::strdup(root_atom_name.c_str());
-            //VTR_LOG("$$$$ %s\n", pb->name);
             clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type);
             break;
         } else {
@@ -3058,7 +3030,6 @@ void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph
             /* find location of net driver if exist in clb, NULL otherwise */
             // find the driver of the input net connected to the pin being studied
             const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id);
-
             // find the id of the atom occupying the input primitive_pb
             const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb);
             // find the pb block occupied by the driving atom
@@ -3717,4 +3688,4 @@ void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBl
 
         atoms_lookup[clb_index].insert(atom_blk_id);
     }
-}
+}
\ No newline at end of file
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index b6e194a3b27..9a91e47ea7a 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -147,8 +147,9 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule,
 void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule,
                                               t_pb* pb);
 
-void alloc_and_init_clustering(const t_packer_opts& packer_opts,
-                               const t_molecule_stats& max_molecule_stats,
+void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
+                               t_cluster_placement_stats** cluster_placement_stats,
+                               t_pb_graph_node*** primitives_list,
                                t_pack_molecule* molecules_head,
                                t_clustering_data& clustering_data,
                                std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 23c147c244a..252dc37a98d 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -24,8 +24,6 @@
 #include "SetupGrid.h"
 #include "re_cluster.h"
 
-#include "pack_utils.h"
-#include "re_cluster_util.h"
 /* #define DUMP_PB_GRAPH 1 */
 /* #define DUMP_BLIF_INPUT 1 */
 
@@ -116,10 +114,10 @@ bool try_pack(t_packer_opts* packer_opts,
     }
 
     helper_ctx.target_external_pin_util = parse_target_external_pin_util(packer_opts->target_external_pin_util);
-    helper_ctx.high_fanout_thresholds = parse_high_fanout_thresholds(packer_opts->high_fanout_threshold);
+    t_pack_high_fanout_thresholds high_fanout_thresholds = parse_high_fanout_thresholds(packer_opts->high_fanout_threshold);
 
     VTR_LOG("Packing with pin utilization targets: %s\n", target_external_pin_util_to_string(helper_ctx.target_external_pin_util).c_str());
-    VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds_to_string(helper_ctx.high_fanout_thresholds).c_str());
+    VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds_to_string(high_fanout_thresholds).c_str());
 
     bool allow_unrelated_clustering = false;
     if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::ON) {
@@ -152,7 +150,7 @@ bool try_pack(t_packer_opts* packer_opts,
             balance_block_type_util,
             lb_type_rr_graphs,
             helper_ctx.target_external_pin_util,
-            helper_ctx.high_fanout_thresholds,
+            high_fanout_thresholds,
             attraction_groups,
             floorplan_regions_overfull,
             clustering_data);
@@ -258,126 +256,25 @@ bool try_pack(t_packer_opts* packer_opts,
         g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear();
         //attraction_groups.reset_attraction_groups();
 
-        for (int thread_id = 0; thread_id < packer_opts->pack_num_threads; thread_id++) {
-            free_cluster_placement_stats(helper_ctx.cluster_placement_stats[thread_id]);
-            delete[] helper_ctx.primitives_list[thread_id];
-        }
+        free_cluster_placement_stats(helper_ctx.cluster_placement_stats);
+        delete[] helper_ctx.primitives_list;
 
         ++pack_iteration;
     }
 
     /* Packing iterative improvement can be done here */
+    /*       Use the re-cluster API to edit it        */
     /******************* Start *************************/
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    // Elgammal debugging
-    /*
-     * for (auto& clb : cluster_ctx.clb_nlist.blocks()) {
-     * VTR_LOG("### block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
-     * }
-     */
-    /*
-     auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
-     t_pack_molecule* mol = rng.first->second;
-     VTR_LOG("Pack move is starting:\n\n");
-     bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
-     if (moved)
-     VTR_LOG("Move is Done :)\n");
-     else
-     VTR_LOG("Move failed! :((\n");
-
-     rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(55));
-     mol = rng.first->second;
-     moved = move_mol_to_existing_cluster(mol,
-     ClusterBlockId(43),
-     true,
-     0,
-     clustering_data,
-     0);
-     if (moved)
-     VTR_LOG("Move is Done :)\n");
-     else
-     VTR_LOG("Move failed! :((\n");
-
-     rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(44));
-     mol = rng.first->second;
-     auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(77));
-     t_pack_molecule* mol2 = rng2.first->second;
-     moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
-     if (moved)
-     VTR_LOG("Move is Done :)\n");
-     else
-     VTR_LOG("Move failed! :((\n");
-
-
-    auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
-    t_pack_molecule* mol = rng.first->second;
-    auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(42));
-    t_pack_molecule* mol2 = rng2.first->second;
-    bool moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-
-    auto rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(3));
-    t_pack_molecule* mol = rng.first->second;
-    bool moved = move_mol_to_new_cluster(mol, true, 0, clustering_data, 0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-
-    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(4));
-    mol = rng.first->second;
-    moved = move_mol_to_existing_cluster(mol,
-                                         ClusterBlockId(4),
-                                         true,
-                                         0,
-                                         clustering_data,
-                                         0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-
-    rng = atom_ctx.atom_molecules.equal_range(AtomBlockId(4));
-    mol = rng.first->second;
-    auto rng2 = atom_ctx.atom_molecules.equal_range(AtomBlockId(5));
-    t_pack_molecule* mol2 = rng2.first->second;
-    moved = swap_two_molecules(mol, mol2, true, 0, clustering_data, 0);
-    if (moved)
-        VTR_LOG("Move is Done :)\n");
-    else
-        VTR_LOG("Move failed! :((\n");
-
-
-    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
-        VTR_LOG("\n# block id = %d\n", blk_id);
-        VTR_LOG("type = %d\n atoms:\n ", cluster_ctx.clb_nlist.block_type(blk_id)->index);
-        for (auto atom : *cluster_to_atoms(blk_id)) {
-            VTR_LOG("\tatom = %d\n", atom);
-            for (auto atom_pin : atom_ctx.nlist.block_pins(atom)) {
-                VTR_LOG("\t\tatom_pin = %d, type = %d, atom_net=%d, cluster_net=%d\n", atom_pin, atom_ctx.nlist.pin_type(atom_pin), atom_ctx.nlist.pin_net(atom_pin), atom_ctx.lookup.clb_net(atom_ctx.nlist.pin_net(atom_pin)));
-            }
-        }
-    }
-
-
     VTR_LOG("Start the iterative improvement process\n");
-    iteratively_improve_packing(*packer_opts, clustering_data, 2);
+    //iteratively_improve_packing(*packer_opts, clustering_data, 2);
     VTR_LOG("the iterative improvement process is done\n");
-    */
-    /* // Elgammal debugging
-    for(auto& clb : cluster_ctx.clb_nlist.blocks()) {
-        VTR_LOG("@@@ block: %zu --> %s\n", clb, cluster_ctx.clb_nlist.block_pb(clb)->name);
-    }
-    */
 
-    if(packer_opts->pack_num_moves != 0) {
-        for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
-            free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
-        }
-    }
+    /*
+     * auto& cluster_ctx = g_vpr_ctx.clustering();
+     * for (auto& blk_id : g_vpr_ctx.clustering().clb_nlist.blocks()) {
+     * free_pb_stats_recursive(cluster_ctx.clb_nlist.block_pb(blk_id));
+     * }
+     */
     /******************** End **************************/
 
     //check clustering and output it
@@ -386,11 +283,6 @@ bool try_pack(t_packer_opts* packer_opts,
     // Free Data Structures
     free_clustering_data(*packer_opts, clustering_data);
 
-    for (int i = 0; i < packer_opts->pack_num_threads; i++) {
-        free_cluster_placement_stats(helper_ctx.cluster_placement_stats[i]);
-        delete[] helper_ctx.primitives_list[i];
-    }
-
     VTR_LOG("\n");
     VTR_LOG("Netlist conversion complete.\n");
     VTR_LOG("\n");
diff --git a/vpr/src/pack/pb_type_graph.cpp b/vpr/src/pack/pb_type_graph.cpp
index 03ad3b2fc03..473b651f7c7 100644
--- a/vpr/src/pack/pb_type_graph.cpp
+++ b/vpr/src/pack/pb_type_graph.cpp
@@ -529,7 +529,7 @@ static void add_primitive_logical_classes(t_logical_block_type* logical_block) {
             }
             num_added_classes += add_port_logical_classes(logical_block, pb_graph_pins, num_ports, num_pins);
         }
-        logical_block->primitive_pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
+        logical_block->pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
                                                                                                     first_class_num + num_added_classes - 1)));
     }
 }
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 04e7ae23d3e..fb67f0b37b4 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -7,8 +7,7 @@
 bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                              bool during_packing,
                              int verbosity,
-                             t_clustering_data& clustering_data,
-                             int thread_id) {
+                             t_clustering_data& clustering_data) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& device_ctx = g_vpr_ctx.device();
@@ -67,8 +66,7 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule,
                                            verbosity,
                                            clustering_data,
                                            &router_data,
-                                           temp_cluster_pr,
-                                           thread_id);
+                                           temp_cluster_pr);
 
     //Commit or revert the move
     if (is_created) {
@@ -83,7 +81,7 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule,
 
         VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]);
     } else {
-        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data, thread_id);
+        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data);
         VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]);
     }
 
@@ -103,8 +101,7 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
                                   const ClusterBlockId& new_clb,
                                   bool during_packing,
                                   int verbosity,
-                                  t_clustering_data& clustering_data,
-                                  int thread_id) {
+                                  t_clustering_data& clustering_data) {
     //define local variables
     bool is_removed, is_added;
     AtomBlockId root_atom_id = molecule->atom_block_ids[molecule->root];
@@ -146,7 +143,7 @@ bool move_mol_to_existing_cluster(t_pack_molecule* molecule,
         commit_mol_move(old_clb, new_clb, during_packing, false);
         VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]);
     } else {
-        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data, thread_id);
+        revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data);
         VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]);
     }
 
@@ -230,7 +227,6 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
         old_1_router_data = nullptr;
         old_2_router_data = nullptr;
 
-
         free(clb_pb_1->name);
         cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
         free(clb_pb_2->name);
@@ -241,7 +237,7 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
 
     mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data);
     if (!mol_2_success) {
-        remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, false, old_2_router_data);
+        remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data);
         commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data);
         mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data);
         mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data);
@@ -263,21 +259,12 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     //commit the move if succeeded or revert if failed
     VTR_ASSERT(mol_1_success && mol_2_success);
 
-    //Fix block names
-    free(clb_pb_1->name);
-    cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str());
-    free(clb_pb_2->name);
-    cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str());
-
-
-
     //If the move is done after packing not during it, some fixes need to be done on the clustered netlist
     if (!during_packing) {
         fix_clustered_netlist(molecule_1, molecule_1_size, clb_1, clb_2);
         fix_clustered_netlist(molecule_2, molecule_2_size, clb_2, clb_1);
     }
 
-    //Free
     free_router_data(old_1_router_data);
     free_router_data(old_2_router_data);
     old_1_router_data = nullptr;
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index c22b72d5750..2cdbf8dab53 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -18,6 +18,7 @@ const char* name_suffix = "_m";
 static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin);
 static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route);
 static void fix_atom_pin_mapping(const AtomBlockId blk);
+
 static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index);
 static void check_net_absorbtion(const AtomNetId atom_net_id,
                                  const ClusterBlockId new_clb,
@@ -25,19 +26,23 @@ static void check_net_absorbtion(const AtomNetId atom_net_id,
                                  ClusterPinId& cluster_pin_id,
                                  bool& previously_absorbed,
                                  bool& now_abosrbed);
+
 static void fix_cluster_port_after_moving(const ClusterBlockId clb_index);
+
 static void fix_cluster_net_after_moving(const t_pack_molecule* molecule,
                                          int molecule_size,
                                          const ClusterBlockId& old_clb,
                                          const ClusterBlockId& new_clb);
-static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms, int thread_id = 0);
+
+static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index,
+                                            const std::unordered_set<AtomBlockId>* clb_atoms);
+
 static void update_cluster_pb_stats(const t_pack_molecule* molecule,
                                     int molecule_size,
                                     ClusterBlockId clb_index,
                                     bool is_added);
 
 /*****************  API functions ***********************/
-
 ClusterBlockId atom_to_cluster(const AtomBlockId& atom) {
     auto& atom_ctx = g_vpr_ctx.atom();
     return (atom_ctx.lookup.atom_clb(atom));
@@ -124,8 +129,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                int verbosity,
                                t_clustering_data& clustering_data,
                                t_lb_router_data** router_data,
-                               PartitionRegion& temp_cluster_pr,
-                               int thread_id) {
+                               PartitionRegion& temp_cluster_pr) {
     auto& atom_ctx = g_vpr_ctx.atom();
     auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning();
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
@@ -155,7 +159,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
 
     pack_result = try_pack_molecule(cluster_placement_stats,
                                     molecule,
-                                    helper_ctx.primitives_list[thread_id],
+                                    helper_ctx.primitives_list,
                                     pb,
                                     helper_ctx.num_models,
                                     helper_ctx.max_cluster_size,
@@ -207,8 +211,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
                                   std::unordered_set<AtomBlockId>* new_clb_atoms,
                                   bool during_packing,
                                   t_clustering_data& clustering_data,
-                                  t_lb_router_data*& router_data,
-                                  int thread_id) {
+                                  t_lb_router_data*& router_data) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
@@ -219,16 +222,16 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
     t_pb* temp_pb = cluster_ctx.clb_nlist.block_pb(new_clb);
 
     //re-build cluster placement stats
-    rebuild_cluster_placement_stats(new_clb, new_clb_atoms, thread_id);
-    if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[thread_id][block_type->index])))
+    rebuild_cluster_placement_stats(new_clb, new_clb_atoms);
+    if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[block_type->index])))
         return false;
 
     //re-build router_data structure for this cluster
     router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms);
 
-    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][block_type->index]),
+    pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[block_type->index]),
                                     molecule,
-                                    helper_ctx.primitives_list[thread_id],
+                                    helper_ctx.primitives_list,
                                     temp_pb,
                                     helper_ctx.num_models,
                                     helper_ctx.max_cluster_size,
@@ -281,15 +284,14 @@ void revert_mol_move(const ClusterBlockId& old_clb,
                      t_pack_molecule* molecule,
                      t_lb_router_data*& old_router_data,
                      bool during_packing,
-                     t_clustering_data& clustering_data,
-                     int thread_id) {
+                     t_clustering_data& clustering_data) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 
     PartitionRegion temp_cluster_pr_original;
-    e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[thread_id][cluster_ctx.clb_nlist.block_type(old_clb)->index]),
+    e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(old_clb)->index]),
                                                         molecule,
-                                                        helper_ctx.primitives_list[thread_id],
+                                                        helper_ctx.primitives_list,
                                                         cluster_ctx.clb_nlist.block_pb(old_clb),
                                                         helper_ctx.num_models,
                                                         helper_ctx.max_cluster_size,
@@ -633,14 +635,15 @@ static bool count_children_pbs(const t_pb* pb) {
 }
 #endif
 
-static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index, const std::unordered_set<AtomBlockId>* clb_atoms, int thread_id) {
+static void rebuild_cluster_placement_stats(const ClusterBlockId& clb_index,
+                                            const std::unordered_set<AtomBlockId>* clb_atoms) {
     auto& helper_ctx = g_vpr_ctx.mutable_cl_helper();
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& atom_ctx = g_vpr_ctx.atom();
 
-    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[thread_id][cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+    t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
     reset_cluster_placement_stats(cluster_placement_stats);
-    set_mode_cluster_placement_stats(cluster_placement_stats, cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode);
+    set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode);
 
     for (auto& atom : *clb_atoms) {
         const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom);
@@ -659,6 +662,7 @@ void commit_mol_removal(const t_pack_molecule* molecule,
                         t_lb_router_data*& router_data,
                         t_clustering_data& clustering_data) {
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+
     for (int i_atom = 0; i_atom < molecule_size; i_atom++) {
         if (molecule->atom_block_ids[i_atom]) {
             revert_place_atom_block(molecule->atom_block_ids[i_atom], router_data);
@@ -676,7 +680,6 @@ void commit_mol_removal(const t_pack_molecule* molecule,
         cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear();
         cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node);
     }
-    //free_router_data(router_data);
 }
 
 bool check_type_and_mode_compitability(const ClusterBlockId& old_clb,
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index 2d001932c26..e5bf3f89096 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -87,8 +87,7 @@ bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                int verbosity,
                                t_clustering_data& clustering_data,
                                t_lb_router_data** router_data,
-                               PartitionRegion& temp_cluster_pr,
-                               int thread_id = 0);
+                               PartitionRegion& temp_cluster_pr);
 
 /**
  * @brief A function that packs a molecule into an existing cluster
@@ -107,8 +106,7 @@ bool pack_mol_in_existing_cluster(t_pack_molecule* molecule,
                                   std::unordered_set<AtomBlockId>* new_clb_atoms,
                                   bool during_packing,
                                   t_clustering_data& clustering_data,
-                                  t_lb_router_data*& router_data,
-                                  int thread_id = 0);
+                                  t_lb_router_data*& router_data);
 
 /**
  * @brief A function that fix the clustered netlist if the move is performed
@@ -145,16 +143,11 @@ void revert_mol_move(const ClusterBlockId& old_clb,
                      t_pack_molecule* molecule,
                      t_lb_router_data*& old_router_data,
                      bool during_packing,
-                     t_clustering_data& clustering_data,
-                     int thread_id = 0);
+                     t_clustering_data& clustering_data);
 
 /**
  *
  * @brief A function that checks the legality of a cluster by running the intra-cluster routing
-<<<<<<< HEAD
- *
-=======
->>>>>>> f84a79291df0319f3b0d0d6bf2bc989091c36541
  */
 bool is_cluster_legal(t_lb_router_data*& router_data);
 
@@ -163,10 +156,6 @@ bool is_cluster_legal(t_lb_router_data*& router_data);
  *
  * @params during_packing: true if this function is called during packing, false if it is called during placement
  * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster)
-<<<<<<< HEAD
- * @params
-=======
->>>>>>> f84a79291df0319f3b0d0d6bf2bc989091c36541
  */
 void commit_mol_removal(const t_pack_molecule* molecule,
                         const int& molecule_size,

From 4076172765d4635f69e5dfb6b89f5e59e68dd9c1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 11:32:49 -0400
Subject: [PATCH 125/188] fix a typo in a field name

---
 vpr/src/pack/pb_type_graph.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/pack/pb_type_graph.cpp b/vpr/src/pack/pb_type_graph.cpp
index 473b651f7c7..7ed4c397027 100644
--- a/vpr/src/pack/pb_type_graph.cpp
+++ b/vpr/src/pack/pb_type_graph.cpp
@@ -529,8 +529,8 @@ static void add_primitive_logical_classes(t_logical_block_type* logical_block) {
             }
             num_added_classes += add_port_logical_classes(logical_block, pb_graph_pins, num_ports, num_pins);
         }
-        logical_block->pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
-                                                                                                    first_class_num + num_added_classes - 1)));
+        logical_block->primitive_pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
+                                                                                                              first_class_num + num_added_classes - 1)));
     }
 }
 

From 3a16d9b2680e7f63e536e5c0a15b28b97858524d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 11:35:35 -0400
Subject: [PATCH 126/188] comment an error in place_recluster

---
 vpr/src/place/place_re_cluster.cpp | 6 +++---
 vpr/src/place/place_re_cluster.h   | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
index 5bbfe8a7218..62215623d6a 100644
--- a/vpr/src/place/place_re_cluster.cpp
+++ b/vpr/src/place/place_re_cluster.cpp
@@ -67,9 +67,9 @@ static bool swap_atoms (const t_place_algorithm& place_algorithm,
 
 //    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
 
-    if (!floorplan_legal(blocks_affected)) {
-        return false;
-    }
+//    if (!floorplan_legal(blocks_affected)) {
+//        return false;
+//    }
 
     apply_move_blocks(blocks_affected);
 
diff --git a/vpr/src/place/place_re_cluster.h b/vpr/src/place/place_re_cluster.h
index fb5dc5cda29..63cd227775c 100644
--- a/vpr/src/place/place_re_cluster.h
+++ b/vpr/src/place/place_re_cluster.h
@@ -5,7 +5,6 @@
 #ifndef VTR_PLACE_RE_CLUSTER_H
 #define VTR_PLACE_RE_CLUSTER_H
 
-#include "pack_utils.h"
 #include "timing_place.h"
 
 class PlaceReCluster {

From 1daf8b949a989433623b714e60eb7fc1c90477b0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 11:40:30 -0400
Subject: [PATCH 127/188] add algorithm lib to vtr_vec_id

---
 libs/libvtrutil/src/vtr_vec_id_set.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/libvtrutil/src/vtr_vec_id_set.h b/libs/libvtrutil/src/vtr_vec_id_set.h
index 7207225932c..10dc10e0f0a 100644
--- a/libs/libvtrutil/src/vtr_vec_id_set.h
+++ b/libs/libvtrutil/src/vtr_vec_id_set.h
@@ -2,6 +2,7 @@
 #define VTR_SET_H
 
 #include <vector>
+#include <algorithm>
 
 namespace vtr {
 

From e8bc42c86cdc6eafb3b9d19039b6434d2d228571 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 18 Oct 2023 11:46:39 -0400
Subject: [PATCH 128/188] make format

---
 libs/libarchfpga/src/physical_types_util.cpp  |  1 -
 vpr/src/base/read_options.cpp                 |  3 ++-
 vpr/src/base/vpr_types.cpp                    |  2 +-
 .../atom_critical_uniform_move_generator.cpp  | 10 +++-----
 .../atom_critical_uniform_move_generator.h    |  3 +--
 vpr/src/place/move_transactions.cpp           |  1 -
 vpr/src/place/move_transactions.h             |  4 ++-
 vpr/src/place/move_utils.cpp                  |  4 +--
 vpr/src/place/net_cost_handler.cpp            | 14 +++--------
 vpr/src/place/place.cpp                       |  1 -
 vpr/src/place/place_constraints.h             |  2 +-
 vpr/src/place/place_re_cluster.cpp            | 25 ++++++-------------
 vpr/src/place/place_util.cpp                  |  2 +-
 vpr/src/place/place_util.h                    |  2 +-
 vpr/src/util/vpr_utils.cpp                    |  3 +--
 15 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index da3a6ccc577..ce7821502e8 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -540,7 +540,6 @@ bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph
         return true;
     else
         return false;
-
 }
 
 int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 75a65f78799..9b6e88e6809 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2124,7 +2124,8 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
 
     place_grp.add_argument<bool, ParseOnOff>(args.place_re_cluster, "--place_re_cluster")
         .help(
-            "Use this option to determine whether reclustering occurs during placement. """
+            "Use this option to determine whether reclustering occurs during placement. "
+            ""
             "When this option is set to 'on,' the placement stage may result in changes to the clustering of certain clusters. "
             "Conversely, if the option is set to 'off,' the clustering determined by the packer will remain unchanged")
         .default_value("off")
diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index 74d6447cb41..f770f7c097a 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -324,7 +324,7 @@ void t_cluster_placement_stats::free_primitives() {
 
 AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    t_pl_loc cluster_loc (loc.x, loc.y, loc.sub_tile, loc.layer);
+    t_pl_loc cluster_loc(loc.x, loc.y, loc.sub_tile, loc.layer);
     ClusterBlockId cluster_at_loc = block_at_location(cluster_loc);
     if (cluster_at_loc == EMPTY_BLOCK_ID) {
         return EMPTY_PRIMITIVE_BLOCK_ID;
diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index 08c8b39d957..7ba5e2115e2 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -2,10 +2,9 @@
 #include "globals.h"
 #include "place_constraints.h"
 
-static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock();
+static std::pair<ClusterBlockId, AtomBlockId> getCriticalAtomBlock();
 
-e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */,
-                                                             float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) {
+e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */, float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) {
     auto& place_ctx = g_vpr_ctx.placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
@@ -13,7 +12,7 @@ e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_m
     AtomBlockId atom_blk_id = AtomBlockId::INVALID();
     std::tie(cluster_blk_id, atom_blk_id) = getCriticalAtomBlock();
 
-    if(cluster_blk_id == ClusterBlockId::INVALID() || atom_blk_id == AtomBlockId::INVALID()) {
+    if (cluster_blk_id == ClusterBlockId::INVALID() || atom_blk_id == AtomBlockId::INVALID()) {
         return e_create_move::ABORT; // Not a valid block
     }
 
@@ -38,7 +37,7 @@ e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_m
     return create_move;
 }
 
-static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock() {
+static std::pair<ClusterBlockId, AtomBlockId> getCriticalAtomBlock() {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& cluster_netlist = cluster_ctx.clb_nlist;
     const auto& atom_netlist = g_vpr_ctx.atom().nlist;
@@ -60,5 +59,4 @@ static std::pair<ClusterBlockId,AtomBlockId> getCriticalAtomBlock() {
     AtomBlockId atom_crit_blk = atom_netlist.net_driver_block(atom_crit_net);
 
     return std::make_pair(cluster_crit_blk, atom_crit_blk);
-
 }
diff --git a/vpr/src/place/atom_critical_uniform_move_generator.h b/vpr/src/place/atom_critical_uniform_move_generator.h
index 2e934a11bbd..4cfd8b31c84 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.h
+++ b/vpr/src/place/atom_critical_uniform_move_generator.h
@@ -20,7 +20,6 @@
  * Returns its choices by filling in affected_blocks.
  */
 class AtomCriticalUniformMoveGenerator : public MoveGenerator {
-    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */,
-                               float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) override;
+    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */, float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) override;
 };
 #endif //VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 8ef9ec29ad5..e2474582ede 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -73,7 +73,6 @@ void apply_move_blocks(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
             seen_clusters.insert(cluster_blk);
             place_sync_external_block_connections(cluster_blk);
         }
-
     }
 }
 
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index cb37c4b97b3..0a164862395 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -11,7 +11,9 @@
 struct t_pl_moved_block {
     t_pl_moved_block() = default;
     t_pl_moved_block(ClusterBlockId block_num_, const t_pl_loc& old_loc_, const t_pl_loc& new_loc_)
-        : block_num(block_num_), old_loc(old_loc_), new_loc(new_loc_) {}
+        : block_num(block_num_)
+        , old_loc(old_loc_)
+        , new_loc(new_loc_) {}
     ClusterBlockId block_num;
     t_pl_loc old_loc;
     t_pl_loc new_loc;
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 19e265c847a..cf82c7c56b3 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -80,7 +80,6 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock
 }
 
 e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc) {
-
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
     e_block_move_result outcome = e_block_move_result::VALID;
 
@@ -157,7 +156,6 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte
 }
 
 e_block_move_result record_single_block_swap(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc) {
-
     VTR_ASSERT(b_from);
     ClusterBlockId cluster_b_from = g_vpr_ctx.atom().lookup.atom_clb(b_from);
 
@@ -533,7 +531,7 @@ bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to) {
     const auto& atom_pb = g_vpr_ctx.atom().lookup.atom_pb(blk);
 
     ClusterBlockId cluster_block = g_vpr_ctx.placement().grid_blocks.block_at_location({to.x, to.y, to.sub_tile, to.layer});
-    t_pl_loc cluster_loc (to.x, to.y, to.sub_tile, to.layer);
+    t_pl_loc cluster_loc(to.x, to.y, to.sub_tile, to.layer);
 
     if (!is_legal_swap_to_location(cluster_block, cluster_loc)) {
         return false;
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 19df5aee79b..660b348726a 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -64,7 +64,6 @@ static vtr::vector<ClusterNetId, char> bb_updated_before;
 static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
-
 static bool driven_by_moved_block(const AtomNetId net,
                                   const std::vector<t_pl_moved_atom_block>& moved_blocks);
 
@@ -111,8 +110,6 @@ static double wirelength_crossing_count(size_t fanout);
 
 static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr);
 
-
-
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const AtomNetId net,
                                   const std::vector<t_pl_moved_atom_block>& moved_blocks) {
@@ -728,7 +725,6 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-
 static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
@@ -762,7 +758,6 @@ int find_affected_nets_and_update_costs(
     t_pl_atom_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c) {
-
     const auto& atom_look_up = g_vpr_ctx.atom().lookup;
     const auto& atom_nlist = g_vpr_ctx.atom().nlist;
 
@@ -779,7 +774,7 @@ int find_affected_nets_and_update_costs(
         const auto& atom_old_loc = blocks_affected.moved_blocks[iblk].old_loc;
         const auto& atom_new_loc = blocks_affected.moved_blocks[iblk].new_loc;
 
-        for (const AtomPinId& atom_pin: atom_nlist.block_pins(atom_blk_id)) {
+        for (const AtomPinId& atom_pin : atom_nlist.block_pins(atom_blk_id)) {
             auto cluster_pins = cluster_pins_connected_to_atom_pin(atom_pin);
             for (const auto& cluster_pin : cluster_pins) {
                 bool is_src_moving = false;
@@ -801,8 +796,6 @@ int find_affected_nets_and_update_costs(
                                             timing_delta_c,
                                             num_affected_nets,
                                             is_src_moving);
-
-
             }
         }
     }
@@ -814,7 +807,7 @@ int find_affected_nets_and_update_costs(
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
         proposed_net_cost[net_id] = get_net_bounding_box_cost(net_id,
-                                                 &ts_bb_coord_new[net_id]);
+                                                              &ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 
@@ -889,7 +882,7 @@ int find_affected_nets_and_update_costs(
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
         proposed_net_cost[net_id] = get_net_bounding_box_cost(net_id,
-                                                 &ts_bb_coord_new[net_id]);
+                                                              &ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 
@@ -1155,7 +1148,6 @@ void init_net_cost_structs(size_t num_nets) {
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
      * been recomputed.                                                          */
     bb_updated_before.resize(num_nets, NOT_UPDATED_YET);
-
 }
 
 void free_net_cost_structs() {
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 85d67f1e531..7c930d71f5f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1829,7 +1829,6 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
     place_move_ctx.bb_coords.resize(num_nets, t_bb());
     place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
 
-
     alloc_and_load_for_fast_cost_update(place_cost_exp);
 
     alloc_and_load_try_swap_structs();
diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h
index fe0a21cf633..8ca39656f37 100644
--- a/vpr/src/place/place_constraints.h
+++ b/vpr/src/place/place_constraints.h
@@ -71,7 +71,7 @@ inline bool floorplan_legal(const t_pl_atom_blocks_to_be_moved& blocks_affected)
     for (int i = 0; i < blocks_affected.num_moved_blocks; i++) {
         AtomBlockId mv_atom_blk = blocks_affected.moved_blocks[i].block_num;
         ClusterBlockId cluster_blk = g_vpr_ctx.atom().lookup.atom_clb(mv_atom_blk);
-        const t_pl_atom_loc& to_pl_atom_loc  = blocks_affected.moved_blocks[i].new_loc;
+        const t_pl_atom_loc& to_pl_atom_loc = blocks_affected.moved_blocks[i].new_loc;
         t_pl_loc to_pl_loc = {to_pl_atom_loc.x, to_pl_atom_loc.y, to_pl_atom_loc.sub_tile, to_pl_atom_loc.layer};
         floorplan_legal = cluster_floorplanning_legal(cluster_blk, to_pl_loc);
         if (!floorplan_legal) {
diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
index 62215623d6a..96eca2a059f 100644
--- a/vpr/src/place/place_re_cluster.cpp
+++ b/vpr/src/place/place_re_cluster.cpp
@@ -35,7 +35,6 @@ void PlaceReCluster::re_cluster(const t_place_algorithm& place_algorithm,
         from_cluster_blk_id = random_cluster();
         from_atom_blk_id = random_atom_in_cluster(from_cluster_blk_id);
 
-
         while (true) {
             to_cluster_blk_id = random_cluster();
             to_atom_blk_id = random_atom_in_cluster(to_cluster_blk_id);
@@ -45,31 +44,29 @@ void PlaceReCluster::re_cluster(const t_place_algorithm& place_algorithm,
             }
         }
 
-        if(!swap_atoms(place_algorithm, delay_model, criticalities, blocks_affected, from_atom_blk_id, to_atom_blk_id)) {
+        if (!swap_atoms(place_algorithm, delay_model, criticalities, blocks_affected, from_atom_blk_id, to_atom_blk_id)) {
             revert_move_blocks(blocks_affected);
         }
     }
-
 }
 
-static bool swap_atoms (const t_place_algorithm& place_algorithm,
+static bool swap_atoms(const t_place_algorithm& place_algorithm,
                        const PlaceDelayModel* delay_model,
                        PlacerCriticalities* criticalities,
                        t_pl_atom_blocks_to_be_moved& blocks_affected,
                        AtomBlockId /* from_atom_blk_id */,
                        AtomBlockId /* to_atom_blk_id */) {
-
     double delta_c = 0;        //Change in cost due to this swap.
     double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
     double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
 
-//    const auto& to_atom_loc = get_atom_loc(to_atom_blk_id);
+    //    const auto& to_atom_loc = get_atom_loc(to_atom_blk_id);
 
-//    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
+    //    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
 
-//    if (!floorplan_legal(blocks_affected)) {
-//        return false;
-//    }
+    //    if (!floorplan_legal(blocks_affected)) {
+    //        return false;
+    //    }
 
     apply_move_blocks(blocks_affected);
 
@@ -79,22 +76,18 @@ static bool swap_atoms (const t_place_algorithm& place_algorithm,
 
     // TODO:dummy return just to remove warnings
     return (num_nets_affected + delta_c) == 0;
-
 }
 
 static ClusterBlockId random_cluster() {
-
     const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     int rand_id = vtr::irand(cluster_ctx.clb_nlist.blocks().size() - 1);
 
     return ClusterBlockId(rand_id);
-
 }
 
 static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id) {
-
-//    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    //    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup[cluster_blk_id];
 
@@ -107,6 +100,4 @@ static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id) {
     AtomBlockId atom_blk_id = *it;
 
     return atom_blk_id;
-
 }
-
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 7cc7a1ee68d..1e3452bacca 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -548,7 +548,7 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
     return (mac_can_be_placed);
 }
 
-t_pl_atom_loc get_atom_loc (AtomBlockId atom) {
+t_pl_atom_loc get_atom_loc(AtomBlockId atom) {
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
     ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom);
     t_pl_loc cluster_loc = g_vpr_ctx.placement().block_locs[cluster_blk].loc;
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 08ba65f08f5..bf0a0ea0537 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -266,6 +266,6 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 
 //TODO: The atom loc should be stored in place_ctx -- I am creating this function because I didn't want to create another
 // Array in place_ctx.
-t_pl_atom_loc get_atom_loc (AtomBlockId atom);
+t_pl_atom_loc get_atom_loc(AtomBlockId atom);
 
 #endif
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index fed27deb15d..d753a3604ee 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -520,8 +520,7 @@ std::vector<ClusterPinId> cluster_pins_connected_to_atom_pin(AtomPinId atom_pin)
     if (atom_pb_graph_pin->port->type == PORTS::IN_PORT) {
         int cluster_pin_id;
         int cluster_net_pin_id;
-        std::tie(cluster_net_id, cluster_pin_id, cluster_net_pin_id) =
-            find_pb_route_clb_input_net_pin(cluster_block_id, atom_pb_pin_id);
+        std::tie(cluster_net_id, cluster_pin_id, cluster_net_pin_id) = find_pb_route_clb_input_net_pin(cluster_block_id, atom_pb_pin_id);
         if (cluster_net_id != ClusterNetId::INVALID()) {
             VTR_ASSERT(cluster_pin_id != -1 && cluster_net_pin_id != -1);
             cluster_pins.push_back(cluster_net_list.net_pin(cluster_net_id, cluster_net_pin_id));

From b04e309cb203b9f49eb0ce5f05179e8a600f5acc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 09:06:00 -0400
Subject: [PATCH 129/188] fix placement debugging messages to show double
 numbers as long f instead of f

---
 vpr/src/place/place.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 7c930d71f5f..3e7057c129c 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1302,7 +1302,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     }
     LOG_MOVE_STATS_PROPOSED(t, blocks_affected);
 
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %f, bb_cost %f, timing cost %f\n", costs->cost, costs->bb_cost, costs->timing_cost);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %lf, bb_cost %lf, timing cost %lf\n", costs->cost, costs->bb_cost, costs->timing_cost);
 
     e_move_result move_outcome = e_move_result::ABORTED;
 
@@ -1374,8 +1374,8 @@ static e_move_result try_swap(const t_annealing_state* state,
             /* Take delta_c as a combination of timing and wiring cost. In
              * addition to `timing_tradeoff`, we normalize the cost values */
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %f, bb_cost_norm %f, timing_tradeoff %f, "
-                           "timing_delta_c %f, timing_cost_norm %f\n",
+                           "\t\tMove bb_delta_c %lf, bb_cost_norm %lf, timing_tradeoff %f, "
+                           "timing_delta_c %lf, timing_cost_norm %lf\n",
                            bb_delta_c,
                            costs->bb_cost_norm,
                            timing_tradeoff,
@@ -1387,8 +1387,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         } else {
             VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %f, bb_cost_norm %f, timing_tradeoff %f, "
-                           "timing_delta_c %f, timing_cost_norm %f\n",
+                           "\t\tMove bb_delta_c %lf, bb_cost_norm %lf\n",
                            bb_delta_c,
                            costs->bb_cost_norm);
             delta_c = bb_delta_c * costs->bb_cost_norm;
@@ -1547,7 +1546,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     // greatly slow the placer, but can debug some issues.
     check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
 #endif
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %f, bb_cost %f, timing cost %f\n", costs->cost, costs->bb_cost, costs->timing_cost);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %lf, bb_cost %lf, timing cost %lf\n", costs->cost, costs->bb_cost, costs->timing_cost);
     return move_outcome;
 }
 
@@ -1667,7 +1666,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) {
 
 static e_move_result assess_swap(double delta_c, double t) {
     /* Returns: 1 -> move accepted, 0 -> rejected. */
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %f delta_c is %f\n", t, delta_c);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %lf delta_c is %lf\n", t, delta_c);
     if (delta_c <= 0) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n");
         return ACCEPTED;

From b94880d4834195cab9acc318d00d457fee5f0eac Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 09:37:13 -0400
Subject: [PATCH 130/188] change lf to e

---
 vpr/src/place/place.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3e7057c129c..dd1644fc65f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1302,7 +1302,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     }
     LOG_MOVE_STATS_PROPOSED(t, blocks_affected);
 
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %lf, bb_cost %lf, timing cost %lf\n", costs->cost, costs->bb_cost, costs->timing_cost);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", costs->cost, costs->bb_cost, costs->timing_cost);
 
     e_move_result move_outcome = e_move_result::ABORTED;
 
@@ -1374,8 +1374,8 @@ static e_move_result try_swap(const t_annealing_state* state,
             /* Take delta_c as a combination of timing and wiring cost. In
              * addition to `timing_tradeoff`, we normalize the cost values */
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %lf, bb_cost_norm %lf, timing_tradeoff %f, "
-                           "timing_delta_c %lf, timing_cost_norm %lf\n",
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, "
+                           "timing_delta_c %e, timing_cost_norm %e\n",
                            bb_delta_c,
                            costs->bb_cost_norm,
                            timing_tradeoff,
@@ -1387,7 +1387,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         } else {
             VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %lf, bb_cost_norm %lf\n",
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e\n",
                            bb_delta_c,
                            costs->bb_cost_norm);
             delta_c = bb_delta_c * costs->bb_cost_norm;
@@ -1546,7 +1546,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     // greatly slow the placer, but can debug some issues.
     check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
 #endif
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %lf, bb_cost %lf, timing cost %lf\n", costs->cost, costs->bb_cost, costs->timing_cost);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", costs->cost, costs->bb_cost, costs->timing_cost);
     return move_outcome;
 }
 
@@ -1666,7 +1666,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) {
 
 static e_move_result assess_swap(double delta_c, double t) {
     /* Returns: 1 -> move accepted, 0 -> rejected. */
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %lf delta_c is %lf\n", t, delta_c);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c);
     if (delta_c <= 0) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n");
         return ACCEPTED;

From 042a8ed36ea47095d81dbf8a58ef266fc66a5e01 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 10:44:23 -0400
Subject: [PATCH 131/188] tmp: add some couts for dubbing

---
 vpr/src/place/net_cost_handler.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 660b348726a..510519c3c15 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -220,7 +220,9 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
     auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay;
     auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost;
 
+    std::cout << "update_td_delta_costs: net=" << size_t(net) << " pin=" << size_t(pin) << " is_src_moving=" << is_src_moving << std::endl;
     if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
+        std::cout << "\tPin type: " << "DRIVER" << std::endl;
         /* This pin is a net driver on a moved block. */
         /* Recompute all point to point connection delays for the net sinks. */
         for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size();
@@ -229,13 +231,15 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                                                ipin);
             /* If the delay hasn't changed, do not mark this pin as affected */
             if (temp_delay == connection_delay[net][ipin]) {
+                std::cout << "\t\ttmp delay is equal to connection delay: " << temp_delay << std::endl;
                 continue;
             }
 
             /* Calculate proposed delay and cost values */
             proposed_connection_delay[net][ipin] = temp_delay;
-
+            std::cout << "\t\tproposed connection delay: " << proposed_connection_delay[net][ipin] << " criticality: " <<  criticalities.criticality(net, ipin) << std::endl;
             proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            std::cout << "\t\tProposed connection delay: " << proposed_connection_timing_cost[net][ipin]<< " connection delay:" << connection_timing_cost[net][ipin] << std::endl;
             delta_timing_cost += proposed_connection_timing_cost[net][ipin]
                                  - connection_timing_cost[net][ipin];
 
@@ -244,6 +248,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
             affected_pins.push_back(sink_pin);
         }
     } else {
+        std::cout << "\tPin type: " << "DRIVER" << std::endl;
         /* This pin is a net sink on a moved block */
         VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
 
@@ -256,18 +261,22 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                                                ipin);
             /* If the delay hasn't changed, do not mark this pin as affected */
             if (temp_delay == connection_delay[net][ipin]) {
+                std::cout << "\t\ttmp delay is equal to connection delay: " << temp_delay << std::endl;
                 return;
             }
 
             /* Calculate proposed delay and cost values */
             proposed_connection_delay[net][ipin] = temp_delay;
-
+            std::cout << "\t\tproposed connection delay: " << proposed_connection_delay[net][ipin] << " criticality: " <<  criticalities.criticality(net, ipin) << std::endl;
             proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            std::cout << "\t\tProposed connection delay: " << proposed_connection_timing_cost[net][ipin]<< " connection delay:" << connection_timing_cost[net][ipin] << std::endl;
             delta_timing_cost += proposed_connection_timing_cost[net][ipin]
                                  - connection_timing_cost[net][ipin];
 
             /* Record this connection in blocks_affected.affected_pins */
             affected_pins.push_back(pin);
+        } else {
+            std::cout << "\t\t Pin's source is moving" << std::endl;
         }
     }
 }

From cd1ef4d31aca42267f7dd229d4ecd54ed5b2a305 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 15:06:48 -0400
Subject: [PATCH 132/188] Revert "tmp: add some couts for dubbing"

This reverts commit 042a8ed36ea47095d81dbf8a58ef266fc66a5e01.
---
 vpr/src/place/net_cost_handler.cpp | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 510519c3c15..660b348726a 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -220,9 +220,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
     auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay;
     auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost;
 
-    std::cout << "update_td_delta_costs: net=" << size_t(net) << " pin=" << size_t(pin) << " is_src_moving=" << is_src_moving << std::endl;
     if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
-        std::cout << "\tPin type: " << "DRIVER" << std::endl;
         /* This pin is a net driver on a moved block. */
         /* Recompute all point to point connection delays for the net sinks. */
         for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size();
@@ -231,15 +229,13 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                                                ipin);
             /* If the delay hasn't changed, do not mark this pin as affected */
             if (temp_delay == connection_delay[net][ipin]) {
-                std::cout << "\t\ttmp delay is equal to connection delay: " << temp_delay << std::endl;
                 continue;
             }
 
             /* Calculate proposed delay and cost values */
             proposed_connection_delay[net][ipin] = temp_delay;
-            std::cout << "\t\tproposed connection delay: " << proposed_connection_delay[net][ipin] << " criticality: " <<  criticalities.criticality(net, ipin) << std::endl;
+
             proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
-            std::cout << "\t\tProposed connection delay: " << proposed_connection_timing_cost[net][ipin]<< " connection delay:" << connection_timing_cost[net][ipin] << std::endl;
             delta_timing_cost += proposed_connection_timing_cost[net][ipin]
                                  - connection_timing_cost[net][ipin];
 
@@ -248,7 +244,6 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
             affected_pins.push_back(sink_pin);
         }
     } else {
-        std::cout << "\tPin type: " << "DRIVER" << std::endl;
         /* This pin is a net sink on a moved block */
         VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
 
@@ -261,22 +256,18 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                                                ipin);
             /* If the delay hasn't changed, do not mark this pin as affected */
             if (temp_delay == connection_delay[net][ipin]) {
-                std::cout << "\t\ttmp delay is equal to connection delay: " << temp_delay << std::endl;
                 return;
             }
 
             /* Calculate proposed delay and cost values */
             proposed_connection_delay[net][ipin] = temp_delay;
-            std::cout << "\t\tproposed connection delay: " << proposed_connection_delay[net][ipin] << " criticality: " <<  criticalities.criticality(net, ipin) << std::endl;
+
             proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
-            std::cout << "\t\tProposed connection delay: " << proposed_connection_timing_cost[net][ipin]<< " connection delay:" << connection_timing_cost[net][ipin] << std::endl;
             delta_timing_cost += proposed_connection_timing_cost[net][ipin]
                                  - connection_timing_cost[net][ipin];
 
             /* Record this connection in blocks_affected.affected_pins */
             affected_pins.push_back(pin);
-        } else {
-            std::cout << "\t\t Pin's source is moving" << std::endl;
         }
     }
 }

From f0bb77dd9d054fdd924f11192c3ab93ecf230d39 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 15:08:48 -0400
Subject: [PATCH 133/188] set the capacity instead of creating actual elements

---
 vpr/src/place/move_transactions.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 0a164862395..10db37acb3e 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -41,7 +41,9 @@ struct t_pl_moved_atom_block {
  *                graph.                                        */
 struct t_pl_blocks_to_be_moved {
     t_pl_blocks_to_be_moved(size_t max_blocks)
-        : moved_blocks(max_blocks) {}
+    {
+        moved_blocks.reserve(max_blocks);
+    }
 
     int num_moved_blocks = 0;
     std::vector<t_pl_moved_block> moved_blocks;

From 8b8027e9895e98e3514cfbcec5fa991a1ac0e7c5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 15:12:11 -0400
Subject: [PATCH 134/188] push back to blocks affected

---
 vpr/src/place/move_transactions.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index e2474582ede..1c8dfb6bcbd 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -54,9 +54,8 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
 
     // Sets up the blocks moved
     int imoved_blk = blocks_affected.num_moved_blocks;
-    blocks_affected.moved_blocks[imoved_blk].block_num = blk;
-    blocks_affected.moved_blocks[imoved_blk].old_loc = from;
-    blocks_affected.moved_blocks[imoved_blk].new_loc = to;
+    VTR_ASSERT_SAFE(imoved_blk == int(blocks_affected.moved_blocks.size()));
+    blocks_affected.moved_blocks.emplace_back(blk, from, to);
     blocks_affected.num_moved_blocks++;
 
     return e_block_move_result::VALID;

From 312ca5479124e7a23e455014c116084d156ad0a9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 15:54:30 -0400
Subject: [PATCH 135/188] clear moved_blocks once swap is done

---
 vpr/src/place/move_transactions.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 1c8dfb6bcbd..2dd0c31d49c 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -174,6 +174,7 @@ void clear_move_blocks(t_pl_blocks_to_be_moved& blocks_affected) {
 
     //For run-time we just reset num_moved_blocks to zero, but do not free the blocks_affected
     //array to avoid memory allocation
+    blocks_affected.moved_blocks.clear();
     blocks_affected.num_moved_blocks = 0;
 
     blocks_affected.affected_pins.clear();

From 463d61045a058cf1bee4ec7f63a2575a2b2539b7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 17:41:22 -0400
Subject: [PATCH 136/188] add comments for physical_types.h

---
 libs/libarchfpga/src/physical_types.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index f0c66e6e11e..17b2ca9b115 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -926,7 +926,7 @@ struct t_logical_block_type {
     std::unordered_map<int, t_pb_graph_pin*> pin_logical_num_to_pb_pin_mapping;                   /* pin_logical_num_to_pb_pin_mapping[pin logical number] -> pb_graph_pin ptr} */
     std::unordered_map<const t_pb_graph_pin*, int> primitive_pb_pin_to_logical_class_num_mapping; /* primitive_pb_pin_to_logical_class_num_mapping[pb_graph_pin ptr] -> class logical number */
     std::vector<t_class> primitive_logical_class_inf;                                             /* primitive_logical_class_inf[class_logical_number] -> class */
-    std::unordered_map<const t_pb_graph_node*, t_class_range> primitive_pb_graph_node_class_range;
+    std::unordered_map<const t_pb_graph_node*, t_class_range> primitive_pb_graph_node_class_range; /* primitive_pb_graph_node_class_range[primitive_pb_graph_node ptr] -> class range for that primitive*/
 
     // Is this t_logical_block_type empty?
     bool is_empty() const;
@@ -1232,7 +1232,13 @@ class t_pb_graph_node {
 
     int placement_index;
 
+    /*
+     * There is a root-level pb_graph_node assigned to each logical type. Each logical type can contain multiple primitives.
+     * If this pb_graph_node is associated with a primitive, a unique number is assigned to it within the logical block level.
+     */
     int primitive_num = OPEN;
+
+
     /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
      *
      * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to

From 472b279ca969a1f13475beead6725dc1cde684a5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 19:01:19 -0400
Subject: [PATCH 137/188] comment on is_atom_compatible

---
 libs/libarchfpga/src/physical_types_util.cpp | 13 +++++++++----
 libs/libarchfpga/src/physical_types_util.h   |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index ce7821502e8..42cbcb5bd67 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -527,15 +527,20 @@ bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_bl
 bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num) {
     VTR_ASSERT(loc_primitive_num != OPEN);
     const t_pb_graph_node* loc_pb_graph_node = nullptr;
-    for (const auto& primiive_node_class_pair : logical_block->primitive_pb_graph_node_class_range) {
-        const auto& primitive_node = primiive_node_class_pair.first;
-        VTR_ASSERT(primitive_node->primitive_num != OPEN);
+    /**
+     * Iterate over the data structure that maps primitive_pb_graph_node to their respective class range,
+     * and retrieve the primitive_pb_graph_node from that map. If the primitive number assigned to that
+     * primitive_pb_graph_node is equal to loc_primitive_num, then we have found the desired primitive_pb_graph_node.
+     */
+    for (const auto& primitive_node_class_pair : logical_block->primitive_pb_graph_node_class_range) {
+        const auto& primitive_node = primitive_node_class_pair.first;
+        VTR_ASSERT_SAFE(primitive_node->primitive_num != OPEN);
         if (primitive_node->primitive_num == loc_primitive_num) {
             loc_pb_graph_node = primitive_node;
             break;
         }
     }
-    VTR_ASSERT(loc_pb_graph_node != nullptr);
+    VTR_ASSERT_SAFE(loc_pb_graph_node != nullptr);
     if (loc_pb_graph_node->pb_type == atom_pb_graph_node->pb_type)
         return true;
     else
diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index 30cadec5962..4ce32fdff5c 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -187,6 +187,7 @@ bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_
 ///@brief Verifies whether a logical block and a relative placement location is compatible with a given physical tile
 bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc);
 
+///@brief Verifies whether the given atom_pb_graph_node can be mapped to the primitive that loc_primitive_num is pointing to.
 bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num);
 
 /**

From 25d52677539573400491ce92f7ec1972c667c2f0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 19:02:43 -0400
Subject: [PATCH 138/188] fix the help of place_re_cluster parameter

---
 vpr/src/base/read_options.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 9b6e88e6809..d154d5828c7 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2126,7 +2126,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .help(
             "Use this option to determine whether reclustering occurs during placement. "
             ""
-            "When this option is set to 'on,' the placement stage may result in changes to the clustering of certain clusters. "
+            "When this option is set to 'on,' the placement stage may change some clusters. "
             "Conversely, if the option is set to 'off,' the clustering determined by the packer will remain unchanged")
         .default_value("off")
         .show_in(argparse::ShowIn::HELP_ONLY);

From 2bc377d04da2d39d50998a1bbdacd392539eb742 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 19:09:04 -0400
Subject: [PATCH 139/188] add comment on atom block_at_location

---
 vpr/src/base/vpr_types.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index f770f7c097a..2a7aa4a4358 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -322,6 +322,10 @@ void t_cluster_placement_stats::free_primitives() {
     }
 }
 
+/**
+ * @brief Get the atom block id at the given location. Since we currently don't have any array to retrieve this information directly,
+ * we first find the cluster mapped to that location, and then find the atom inside that cluster that is mapped to the given location.
+ */
 AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
     t_pl_loc cluster_loc(loc.x, loc.y, loc.sub_tile, loc.layer);

From 21559aa13df70692ebc0c496ff5b493b38153ee0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 19:11:41 -0400
Subject: [PATCH 140/188] make format

---
 libs/libarchfpga/src/physical_types.h | 7 +++----
 vpr/src/place/move_transactions.h     | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 17b2ca9b115..1eaa5cd3abf 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -923,9 +923,9 @@ struct t_logical_block_type {
     std::vector<t_physical_tile_type_ptr> equivalent_tiles; ///>List of physical tiles at which one could
                                                             ///>place this type of netlist block.
 
-    std::unordered_map<int, t_pb_graph_pin*> pin_logical_num_to_pb_pin_mapping;                   /* pin_logical_num_to_pb_pin_mapping[pin logical number] -> pb_graph_pin ptr} */
-    std::unordered_map<const t_pb_graph_pin*, int> primitive_pb_pin_to_logical_class_num_mapping; /* primitive_pb_pin_to_logical_class_num_mapping[pb_graph_pin ptr] -> class logical number */
-    std::vector<t_class> primitive_logical_class_inf;                                             /* primitive_logical_class_inf[class_logical_number] -> class */
+    std::unordered_map<int, t_pb_graph_pin*> pin_logical_num_to_pb_pin_mapping;                    /* pin_logical_num_to_pb_pin_mapping[pin logical number] -> pb_graph_pin ptr} */
+    std::unordered_map<const t_pb_graph_pin*, int> primitive_pb_pin_to_logical_class_num_mapping;  /* primitive_pb_pin_to_logical_class_num_mapping[pb_graph_pin ptr] -> class logical number */
+    std::vector<t_class> primitive_logical_class_inf;                                              /* primitive_logical_class_inf[class_logical_number] -> class */
     std::unordered_map<const t_pb_graph_node*, t_class_range> primitive_pb_graph_node_class_range; /* primitive_pb_graph_node_class_range[primitive_pb_graph_node ptr] -> class range for that primitive*/
 
     // Is this t_logical_block_type empty?
@@ -1238,7 +1238,6 @@ class t_pb_graph_node {
      */
     int primitive_num = OPEN;
 
-
     /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
      *
      * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 10db37acb3e..28fd0c8ad79 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -40,8 +40,7 @@ struct t_pl_moved_atom_block {
  *                incrementally invalidate parts of the timing  *
  *                graph.                                        */
 struct t_pl_blocks_to_be_moved {
-    t_pl_blocks_to_be_moved(size_t max_blocks)
-    {
+    t_pl_blocks_to_be_moved(size_t max_blocks) {
         moved_blocks.reserve(max_blocks);
     }
 

From 1a975ad59838d445af917b29b0b08148c0e2218e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 19 Oct 2023 19:29:49 -0400
Subject: [PATCH 141/188] make the constructor explicit

---
 vpr/src/place/move_transactions.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 28fd0c8ad79..d1073d578e3 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -40,7 +40,7 @@ struct t_pl_moved_atom_block {
  *                incrementally invalidate parts of the timing  *
  *                graph.                                        */
 struct t_pl_blocks_to_be_moved {
-    t_pl_blocks_to_be_moved(size_t max_blocks) {
+    explicit t_pl_blocks_to_be_moved(size_t max_blocks) {
         moved_blocks.reserve(max_blocks);
     }
 

From 9e27014ed5c8c7e899ecd5bce57fcf6979606427 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 20 Oct 2023 20:03:05 -0400
Subject: [PATCH 142/188] check if pb_graph_node benlongs to the logical type.
 If it doesn't return false

---
 libs/libarchfpga/src/physical_types_util.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index 42cbcb5bd67..caaaa69c313 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -527,6 +527,16 @@ bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_bl
 bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num) {
     VTR_ASSERT(loc_primitive_num != OPEN);
     const t_pb_graph_node* loc_pb_graph_node = nullptr;
+
+    // Check whether the atom
+    const t_pb_graph_node* parent_pb_graph_node = atom_pb_graph_node->parent_pb_graph_node;
+    while (parent_pb_graph_node->parent_pb_graph_node != nullptr) {
+        parent_pb_graph_node = parent_pb_graph_node->parent_pb_graph_node;
+    }
+
+    if (logical_block->pb_graph_head != parent_pb_graph_node) {
+        return false;
+    }
     /**
      * Iterate over the data structure that maps primitive_pb_graph_node to their respective class range,
      * and retrieve the primitive_pb_graph_node from that map. If the primitive number assigned to that

From fba5a66ebac1e6d728c1e0b54003902466836eaa Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 20 Oct 2023 20:08:54 -0400
Subject: [PATCH 143/188] add some comments and fix atom
 is_legal_swap_to_location

---
 vpr/src/place/move_utils.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index cf82c7c56b3..a023702f76e 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -528,22 +528,30 @@ bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to) {
     //Note that we need to explicitly check that the types match, since the device floorplan is not
     //(neccessarily) translationally invariant for an arbitrary macro
 
+    const auto& place_ctx = g_vpr_ctx.placement();
+    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
     const auto& atom_pb = g_vpr_ctx.atom().lookup.atom_pb(blk);
 
-    ClusterBlockId cluster_block = g_vpr_ctx.placement().grid_blocks.block_at_location({to.x, to.y, to.sub_tile, to.layer});
-    t_pl_loc cluster_loc(to.x, to.y, to.sub_tile, to.layer);
+    ClusterBlockId from_cluster_block = atom_lookup.atom_clb(blk);
+    t_pl_loc to_cluster_loc(to.x, to.y, to.sub_tile, to.layer);
+    ClusterBlockId to_cluster_block = place_ctx.grid_blocks.block_at_location(to_cluster_loc);
+
 
-    if (!is_legal_swap_to_location(cluster_block, cluster_loc)) {
+    // If the clusters cannot be swapped return false
+    if (!is_legal_swap_to_location(from_cluster_block, to_cluster_loc)) {
         return false;
     }
 
+    // Check legality issues specific to atoms
     std::vector<t_logical_block_type_ptr> logical_blocks;
 
-    if (cluster_block.is_valid() && cluster_block != INVALID_BLOCK_ID) {
+    // If there is already a block at the destination, the only logical block there is the logical block of that particular cluster.
+    // If there isn't any, all logical blocks compatible to that sub_tile should be considered.
+    if (to_cluster_block.is_valid() && to_cluster_block != INVALID_BLOCK_ID) {
         const auto& cluster_ctx = g_vpr_ctx.clustering();
-        auto logical_block = cluster_ctx.clb_nlist.block_type(cluster_block);
+        auto logical_block = cluster_ctx.clb_nlist.block_type(to_cluster_block);
         logical_blocks.push_back(logical_block);
-    } else if (cluster_block == EMPTY_BLOCK_ID) {
+    } else if (to_cluster_block == EMPTY_BLOCK_ID) {
         const auto& physical_tile = g_vpr_ctx.device().grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer));
         const auto& sub_tile = physical_tile->sub_tiles[to.sub_tile];
         logical_blocks = sub_tile.equivalent_sites;

From b1bdfd11273872558e78b784b293fde299ae7363 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 20 Oct 2023 20:11:50 -0400
Subject: [PATCH 144/188] comment on move_transaction & move_utils

---
 vpr/src/place/move_transactions.h | 16 +++++++++++++++-
 vpr/src/place/move_utils.h        | 23 +++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index d1073d578e3..548f51eb0ae 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -25,7 +25,7 @@ struct t_pl_moved_atom_block {
     t_pl_atom_loc new_loc;
 };
 
-/* Stores the list of blocks to be moved in a swap during       *
+/* Stores the list of cluster blocks to be moved in a swap during       *
  * placement.                                                   *
  * Store the information on the blocks to be moved in a swap during     *
  * placement, in the form of array of structs instead of struct with    *
@@ -52,6 +52,20 @@ struct t_pl_blocks_to_be_moved {
     std::vector<ClusterPinId> affected_pins;
 };
 
+/* Stores the list of atom blocks to be moved in a swap during       *
+ * placement.                                                   *
+ * Store the information on the blocks to be moved in a swap during     *
+ * placement, in the form of array of structs instead of struct with    *
+ * arrays for cache efficiently                                          *
+ *
+ * num_moved_blocks: total number of blocks moved when          *
+ *                   swapping two blocks.                       *
+ * moved blocks: a list of moved blocks data structure with     *
+ *               information on the move.                       *
+ *               [0...max_blocks-1]                       *
+ * affected_pins: pins affected by this move (used to           *
+ *                incrementally invalidate parts of the timing  *
+ *                graph.                                        */
 struct t_pl_atom_blocks_to_be_moved {
     t_pl_atom_blocks_to_be_moved(size_t max_blocks)
         : moved_blocks(max_blocks) {}
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index d69e13fbbd0..fc3d8535c29 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -94,10 +94,21 @@ e_create_move create_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlo
 
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
+// Update atom_blocks_affects with the information about blocks which will be moved if b_from is to be moved to t_loc. Return Valid if the move is legal.
+// Currently, this function is much more limited compare to cluster one. It only supports single block move.
 e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);
 
+/**
+ * @brief Find the blocks that will be affected by a move of b_from to to_loc
+ * @param blocks_affected
+ * @param b_from
+ * @param to
+ * @return e_block_move_result ABORT if either of the the moving blocks are already stored, or either of the blocks are fixed, to location is not
+ * compatible, etc. INVERT if the "from" block is a single block and the "to" block is a macro. VALID otherwise.
+ */
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
+// Update blocks affected if neither b_from nor the block at to_loc (if there is any) is part of a macro
 e_block_move_result record_single_block_swap(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);
 
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
@@ -112,8 +123,20 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected,
 e_block_move_result identify_macro_self_swap_affected_macros(std::vector<int>& macros, const int imacro, t_pl_offset swap_offset);
 e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro, t_pl_offset swap_offset);
 
+/**
+ * @brief Check whether the "to" location is legal for the given "blk"
+ * @param blk
+ * @param to
+ * @return
+ */
 bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to);
 
+/**
+ * @brief Check whether the "to" location is legal for the given "blk"
+ * @param blk
+ * @param to
+ * @return
+ */
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to);
 
 std::set<t_pl_loc> determine_locations_emptied_by_move(t_pl_blocks_to_be_moved& blocks_affected);

From b4458039d28ec783e895f2d0981f5dca969023d7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 20 Oct 2023 20:32:15 -0400
Subject: [PATCH 145/188] experiment: allocate moved_blocks to max_num_blocks

---
 vpr/src/place/move_transactions.cpp |  5 ++++-
 vpr/src/place/move_transactions.h   |  5 ++---
 vpr/src/place/net_cost_handler.cpp  | 17 ++++++++++++-----
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 2dd0c31d49c..b3a8a8746e5 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -54,6 +54,9 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
 
     // Sets up the blocks moved
     int imoved_blk = blocks_affected.num_moved_blocks;
+    blocks_affected.moved_blocks[imoved_blk].block_num = blk;
+    blocks_affected.moved_blocks[imoved_blk].old_loc = from;
+    blocks_affected.moved_blocks[imoved_blk].new_loc = to;
     VTR_ASSERT_SAFE(imoved_blk == int(blocks_affected.moved_blocks.size()));
     blocks_affected.moved_blocks.emplace_back(blk, from, to);
     blocks_affected.num_moved_blocks++;
@@ -174,7 +177,7 @@ void clear_move_blocks(t_pl_blocks_to_be_moved& blocks_affected) {
 
     //For run-time we just reset num_moved_blocks to zero, but do not free the blocks_affected
     //array to avoid memory allocation
-    blocks_affected.moved_blocks.clear();
+
     blocks_affected.num_moved_blocks = 0;
 
     blocks_affected.affected_pins.clear();
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 548f51eb0ae..e854b742d15 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -40,9 +40,8 @@ struct t_pl_moved_atom_block {
  *                incrementally invalidate parts of the timing  *
  *                graph.                                        */
 struct t_pl_blocks_to_be_moved {
-    explicit t_pl_blocks_to_be_moved(size_t max_blocks) {
-        moved_blocks.reserve(max_blocks);
-    }
+    explicit t_pl_blocks_to_be_moved(size_t max_blocks)
+        : moved_blocks(max_blocks) {}
 
     int num_moved_blocks = 0;
     std::vector<t_pl_moved_block> moved_blocks;
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 660b348726a..e2e8d8a3c21 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -68,6 +68,7 @@ static bool driven_by_moved_block(const AtomNetId net,
                                   const std::vector<t_pl_moved_atom_block>& moved_blocks);
 
 static bool driven_by_moved_block(const ClusterNetId net,
+                                  const int num_blocks,
                                   const std::vector<t_pl_moved_block>& moved_blocks);
 
 static void update_net_bb(const ClusterNetId& net,
@@ -127,15 +128,19 @@ static bool driven_by_moved_block(const AtomNetId net,
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const ClusterNetId net,
+                                  const int num_blocks,
                                   const std::vector<t_pl_moved_block>& moved_blocks) {
     auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
-    bool is_driven_by_move_blk;
+    bool is_driven_by_move_blk = false;
     ClusterBlockId net_driver_block = clb_nlist.net_driver_block(
         net);
 
-    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
-        return net_driver_block == move_blk.block_num;
-    });
+    for (int block_num = 0; block_num < num_blocks; block_num++) {
+        if (net_driver_block == moved_blocks[block_num].block_num) {
+            is_driven_by_move_blk = true;
+            break;
+        }
+    }
 
     return is_driven_by_move_blk;
 }
@@ -860,7 +865,9 @@ int find_affected_nets_and_update_costs(
             bool is_src_moving = false;
             if (clb_nlsit.pin_type(blk_pin) == PinType::SINK) {
                 ClusterNetId net_id = clb_nlsit.pin_net(blk_pin);
-                is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
+                is_src_moving = driven_by_moved_block(net_id,
+                                                      blocks_affected.num_moved_blocks,
+                                                      blocks_affected.moved_blocks);
             }
             update_net_info_on_pin_move(place_algorithm,
                                         delay_model,

From b05d57b65c49c45250fd2b5dfe0ea02d8fd0b8fc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 18:03:10 -0500
Subject: [PATCH 146/188] debug update_bb

---
 vpr/src/place/net_cost_handler.cpp | 38 +++++++++++++-----------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 191b68b9881..61b5c967c8b 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -73,11 +73,10 @@ static bool driven_by_moved_block(const ClusterNetId net,
                                   const int num_blocks,
                                   const std::vector<t_pl_moved_block>& moved_blocks);
 
-static void update_net_bb(const ClusterNetId net,
-                          const t_pl_blocks_to_be_moved& blocks_affected,
-                          int iblk,
-                          const ClusterBlockId blk,
-                          const ClusterPinId blk_pin);
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block);
 
 static void update_net_layer_bb(const ClusterNetId net,
                                 const t_pl_blocks_to_be_moved& blocks_affected,
@@ -238,11 +237,10 @@ static bool driven_by_moved_block(const ClusterNetId net,
  * Do not update the net cost here since it should only
  * be updated once per net, not once per pin.
  */
-static void update_net_bb(const ClusterNetId net,
-                          const t_pl_blocks_to_be_moved& blocks_affected,
-                          int iblk,
-                          const ClusterBlockId blk,
-                          const ClusterPinId blk_pin) {
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
@@ -263,20 +261,16 @@ static void update_net_bb(const ClusterNetId net,
         int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
 
         //Incremental bounding box update
-        t_physical_tile_loc pin_old_loc(
-            blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.layer);
-        t_physical_tile_loc pin_new_loc(
-            blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.layer);
         update_bb(net,
                   ts_bb_edge_new[net],
                   ts_bb_coord_new[net],
                   ts_layer_sink_pin_count[size_t(net)],
-                  pin_old_loc,
-                  pin_new_loc,
+                  {pl_moved_block.old_loc.x + pin_width_offset,
+                  pl_moved_block.old_loc.y + pin_height_offset,
+                  pl_moved_block.old_loc.layer},
+                  {pl_moved_block.new_loc.x + pin_width_offset,
+                  pl_moved_block.new_loc.y + pin_height_offset,
+                  pl_moved_block.new_loc.layer},
                   src_pin);
     }
 }
@@ -1670,6 +1664,8 @@ int find_affected_nets_and_update_costs(
 
     /* Go through all the blocks moved. */
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
+        const auto& moving_block_inf = blocks_affected.moved_blocks[iblk];
+        auto& affected_pins = blocks_affected.affected_pins;
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         /* Go through all the pins in the moved block. */
@@ -1688,7 +1684,7 @@ int find_affected_nets_and_update_costs(
 
             /* Update the net bounding boxes. */
             if (cube_bb) {
-                update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+                update_net_bb(net_id, blk, blk_pin, moving_block_inf);
             } else {
                 update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
             }

From 71186a9a828bd6f6a27bac59e6890faaca57af29 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 18:19:48 -0500
Subject: [PATCH 147/188] fix find_affected_nets_and_update_costs

---
 vpr/src/place/net_cost_handler.cpp | 51 +++++++++++++++---------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 61b5c967c8b..1bf1bb9f55f 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -451,8 +451,14 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
     /* Record effected nets */
     record_affected_net(net_id, num_affected_nets);
 
+    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
+
     /* Update the net bounding boxes. */
-    update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
+    if (cube_bb) {
+        update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
+    } else {
+        update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+    }
 
     if (place_algorithm.is_timing_driven()) {
         /* Determine the change in connection delay and timing cost. */
@@ -1656,7 +1662,7 @@ int find_affected_nets_and_update_costs(
     double& timing_delta_c) {
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     int num_affected_nets = 0;
 
@@ -1669,31 +1675,24 @@ int find_affected_nets_and_update_costs(
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         /* Go through all the pins in the moved block. */
-        for (ClusterPinId blk_pin : cluster_ctx.clb_nlist.block_pins(blk)) {
-            ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(blk_pin);
-            VTR_ASSERT_SAFE_MSG(net_id,
-                                "Only valid nets should be found in compressed netlist block pins");
-
-            if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
-                //TODO: Do we require anyting special here for global nets?
-                //"Global nets are assumed to span the whole chip, and do not effect costs."
-                continue;
-
-            /* Record effected nets */
-            record_affected_net(net_id, num_affected_nets);
-
-            /* Update the net bounding boxes. */
-            if (cube_bb) {
-                update_net_bb(net_id, blk, blk_pin, moving_block_inf);
-            } else {
-                update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
-            }
-
-            if (place_algorithm.is_timing_driven()) {
-                /* Determine the change in connection delay and timing cost. */
-                update_td_delta_costs(delay_model, *criticalities, net_id,
-                                      blk_pin, blocks_affected, timing_delta_c);
+        for (ClusterPinId blk_pin : clb_nlist.block_pins(blk)) {
+            bool is_src_moving = false;
+            if (clb_nlist.pin_type(blk_pin) == PinType::SINK) {
+                ClusterNetId net_id = clb_nlist.pin_net(blk_pin);
+                is_src_moving = driven_by_moved_block(net_id,
+                                                      blocks_affected.num_moved_blocks,
+                                                      blocks_affected.moved_blocks);
             }
+            update_net_info_on_pin_move(place_algorithm,
+                                        delay_model,
+                                        criticalities,
+                                        blk,
+                                        blk_pin,
+                                        moving_block_inf,
+                                        affected_pins,
+                                        timing_delta_c,
+                                        num_affected_nets,
+                                        is_src_moving);
         }
     }
 

From f94c577f878aaaec223dbb9c2bddf1e2adaba85a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 18:26:49 -0500
Subject: [PATCH 148/188] fix update_net_layer_bb

---
 vpr/src/place/net_cost_handler.cpp | 40 +++++++++++++-----------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 1bf1bb9f55f..488166e98e8 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -78,11 +78,10 @@ static void update_net_bb(const ClusterNetId& net,
                           const ClusterPinId& blk_pin,
                           const t_pl_moved_block& pl_moved_block);
 
-static void update_net_layer_bb(const ClusterNetId net,
-                                const t_pl_blocks_to_be_moved& blocks_affected,
-                                int iblk,
-                                const ClusterBlockId blk,
-                                const ClusterPinId blk_pin);
+static void update_net_layer_bb(const ClusterNetId& net,
+                                const ClusterBlockId& blk,
+                                const ClusterPinId& blk_pin,
+                                const t_pl_moved_block& pl_moved_block);
 
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
@@ -275,11 +274,10 @@ static void update_net_bb(const ClusterNetId& net,
     }
 }
 
-static void update_net_layer_bb(const ClusterNetId net,
-                                const t_pl_blocks_to_be_moved& blocks_affected,
-                                int iblk,
-                                const ClusterBlockId blk,
-                                const ClusterPinId blk_pin) {
+static void update_net_layer_bb(const ClusterNetId& net,
+                                const ClusterBlockId& blk,
+                                const ClusterPinId& blk_pin,
+                                const t_pl_moved_block& pl_moved_block) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
@@ -293,28 +291,24 @@ static void update_net_layer_bb(const ClusterNetId net,
     } else {
         //For large nets, update bounding box incrementally
         int iblk_pin = tile_pin_index(blk_pin);
+        bool src_pin = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER;
 
         t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
         int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
         int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
 
         //Incremental bounding box update
-        t_physical_tile_loc pin_old_loc(
-            blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.layer);
-        t_physical_tile_loc pin_new_loc(
-            blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.layer);
-        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
         update_layer_bb(net,
                         layer_ts_bb_edge_new[net],
                         layer_ts_bb_coord_new[net],
                         ts_layer_sink_pin_count[size_t(net)],
-                        pin_old_loc,
-                        pin_new_loc,
-                        pin_dir == e_pin_type::DRIVER);
+                        {pl_moved_block.old_loc.x + pin_width_offset,
+                         pl_moved_block.old_loc.y + pin_height_offset,
+                         pl_moved_block.old_loc.layer},
+                        {pl_moved_block.new_loc.x + pin_width_offset,
+                         pl_moved_block.new_loc.y + pin_height_offset,
+                         pl_moved_block.new_loc.layer},
+                        src_pin);
     }
 }
 
@@ -457,7 +451,7 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
     if (cube_bb) {
         update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
     } else {
-        update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+        update_net_layer_bb(net_id, blk_id, pin_id, moving_blk_inf);
     }
 
     if (place_algorithm.is_timing_driven()) {

From df7fdd44dc046cca2cc0d269fb565f47e152388b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 18:27:36 -0500
Subject: [PATCH 149/188] fix comp_bb_cost

---
 vpr/src/place/net_cost_handler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 488166e98e8..dea8a8cbc11 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1719,7 +1719,7 @@ int find_affected_nets_and_update_costs(
  * are found via the non_updateable_bb routine, to provide a    *
  * cost which can be used to check the correctness of the       *
  * other routine.                                               */
-static double comp_bb_cost(e_cost_methods method) {
+double comp_bb_cost(e_cost_methods method) {
     double cost = 0;
     double expected_wirelength = 0.0;
     auto& cluster_ctx = g_vpr_ctx.clustering();

From 9989f2fba203a3d593db1e074735a02a2d3da4ed Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 19:06:03 -0500
Subject: [PATCH 150/188] remove pushing back to blocks_affected

---
 vpr/src/place/move_transactions.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index b3a8a8746e5..0af26c8ff76 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -57,8 +57,6 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
     blocks_affected.moved_blocks[imoved_blk].block_num = blk;
     blocks_affected.moved_blocks[imoved_blk].old_loc = from;
     blocks_affected.moved_blocks[imoved_blk].new_loc = to;
-    VTR_ASSERT_SAFE(imoved_blk == int(blocks_affected.moved_blocks.size()));
-    blocks_affected.moved_blocks.emplace_back(blk, from, to);
     blocks_affected.num_moved_blocks++;
 
     return e_block_move_result::VALID;

From 22f9237e7c1ea5189f5c7dec2274984cabeef94c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 08:31:51 -0500
Subject: [PATCH 151/188] change bbptr to bb

---
 vpr/src/place/net_cost_handler.cpp | 40 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index dea8a8cbc11..d3b60ac8da8 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -178,16 +178,16 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& coords,
                                       vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
-static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
+static double get_net_cost(ClusterNetId net_id, const t_bb& bb);
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_bb>& bbptr,
+                                 const std::vector<t_2D_bb>& bb,
                                  const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
-static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
+static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb);
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_bb>& bbptr,
+                                                const std::vector<t_2D_bb>& bb,
                                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
 static double recompute_bb_cost();
@@ -1390,7 +1390,7 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
     }
 }
 
-static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
+static double get_net_cost(ClusterNetId net_id, const t_bb& bb) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 
@@ -1407,17 +1407,17 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
     /* Cost = wire length along channel * cross_count / average      *
      * channel capacity.   Do this for x, then y direction and add.  */
 
-    ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing
-            * chanx_place_cost_fac[bbptr.ymax][bbptr.ymin - 1];
+    ncost = (bb.xmax - bb.xmin + 1) * crossing
+            * chanx_place_cost_fac[bb.ymax][bb.ymin - 1];
 
-    ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing
-             * chany_place_cost_fac[bbptr.xmax][bbptr.xmin - 1];
+    ncost += (bb.ymax - bb.ymin + 1) * crossing
+             * chany_place_cost_fac[bb.xmax][bb.xmin - 1];
 
     return (ncost);
 }
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_bb>& bbptr,
+                                 const std::vector<t_2D_bb>& bb,
                                  const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
@@ -1440,17 +1440,17 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
         /* Cost = wire length along channel * cross_count / average      *
          * channel capacity.   Do this for x, then y direction and add.  */
 
-        ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing
-                 * chanx_place_cost_fac[bbptr[layer_num].ymax][bbptr[layer_num].ymin - 1];
+        ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * crossing
+                 * chanx_place_cost_fac[bb[layer_num].ymax][bb[layer_num].ymin - 1];
 
-        ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing
-                 * chany_place_cost_fac[bbptr[layer_num].xmax][bbptr[layer_num].xmin - 1];
+        ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * crossing
+                 * chany_place_cost_fac[bb[layer_num].xmax][bb[layer_num].xmin - 1];
     }
 
     return (ncost);
 }
 
-static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr) {
+static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 
@@ -1467,15 +1467,15 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr
     /* Cost = wire length along channel * cross_count / average      *
      * channel capacity.   Do this for x, then y direction and add.  */
 
-    ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing;
+    ncost = (bb.xmax - bb.xmin + 1) * crossing;
 
-    ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing;
+    ncost += (bb.ymax - bb.ymin + 1) * crossing;
 
     return (ncost);
 }
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_bb>& bbptr,
+                                                const std::vector<t_2D_bb>& bb,
                                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
@@ -1498,9 +1498,9 @@ static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
         /* Cost = wire length along channel * cross_count / average      *
          * channel capacity.   Do this for x, then y direction and add.  */
 
-        ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing;
+        ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * crossing;
 
-        ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing;
+        ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * crossing;
     }
 
     return (ncost);

From e34d848e7ec2a7d11b0a901fe96c95d09bc01d12 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 08:34:53 -0500
Subject: [PATCH 152/188] change bb pointer to const ref in
 get_net_bounding_box_cost

---
 vpr/src/place/net_cost_handler.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index d3b60ac8da8..5e8c929d78e 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -194,7 +194,7 @@ static double recompute_bb_cost();
 
 static double wirelength_crossing_count(size_t fanout);
 
-static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr);
+static double get_net_bounding_box_cost(ClusterNetId net_id, const t_bb& bb);
 
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const AtomNetId net,
@@ -1536,7 +1536,7 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr) {
+static double get_net_bounding_box_cost(ClusterNetId net_id, const t_bb& bb) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 
@@ -1553,11 +1553,11 @@ static double get_net_bounding_box_cost(ClusterNetId net_id, t_bb* bbptr) {
     /* Cost = wire length along channel * cross_count / average      *
      * channel capacity.   Do this for x, then y direction and add.  */
 
-    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
-            * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
+    ncost = (bb.xmax - bb.xmin + 1) * crossing
+            * chanx_place_cost_fac[bb.ymax][bb.ymin - 1];
 
-    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
-             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
+    ncost += (bb.ymax - bb.ymin + 1) * crossing
+             * chany_place_cost_fac[bb.xmax][bb.xmin - 1];
 
     return (ncost);
 }
@@ -1618,7 +1618,7 @@ int find_affected_nets_and_update_costs(
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
         proposed_net_cost[net_id] = get_net_bounding_box_cost(net_id,
-                                                              &ts_bb_coord_new[net_id]);
+                                                              ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 

From 6a77e8050940e89d41900d28a92e79dd60cf9f55 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 08:46:43 -0500
Subject: [PATCH 153/188] comment on ts_bb_coord_new and layer_ts_bb_coord_new
 usage

---
 vpr/src/place/net_cost_handler.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 5e8c929d78e..175ee4a6efa 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -60,10 +60,24 @@ static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
 static vtr::vector<ClusterNetId, char> bb_updated_before;
 
 /* The following arrays are used by the try_swap function for speed.   */
-/* [0...cluster_ctx.clb_nlist.nets().size()-1] */
+
+/**
+ * The wire length estimation is based on the bounding box of the net. In the case of the 2D architecture,
+ * we use a 3D BB with the z-dimension (layer) set to 1. In the case of 3D architecture, there 2 types of bounding box:
+ * 3D and per-layer. The type is determined at the beginning of the placement and stored in the placement context.
+ *
+ *
+ * If the bonding box is of the type 3D, ts_bb_coord_new and ts_bb_edge_new are used. Otherwise, layer_ts_bb_edge_new and
+ * layer_ts_bb_coord_new are used.
+ */
+
+/* [0...cluster_ctx.clb_nlist.nets().size()-1] -> 3D bounding box*/
 static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
+/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers] -> 2D bonding box on a layer*/
 static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
+/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers] -> number of sink pins on a layer*/
 static vtr::Matrix<int> ts_layer_sink_pin_count;
+/* [0...num_afftected_nets] -> net_id of the affected nets */
 static std::vector<ClusterNetId> ts_nets_to_update;
 
 static bool driven_by_moved_block(const AtomNetId net,

From 49d33b8bc64747cdfe29192a508a7fc523ab3549 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 08:49:00 -0500
Subject: [PATCH 154/188] fix a type

---
 vpr/src/place/net_cost_handler.cpp | 44 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 175ee4a6efa..b48293a409f 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -118,13 +118,13 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                                         int& num_affected_nets,
                                         bool is_src_moving);
 
-static void get_non_updateable_bb(ClusterNetId net_id,
+static void get_non_updatable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
                                   vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
-static void get_non_updateable_layer_bb(ClusterNetId net_id,
-                                        std::vector<t_2D_bb>& bb_coord_new,
-                                        vtr::NdMatrixProxy<int, 1> num_sink_layer);
+static void get_non_updatable_layer_bb(ClusterNetId net_id,
+                                       std::vector<t_2D_bb>& bb_coord_new,
+                                       vtr::NdMatrixProxy<int, 1> num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
@@ -260,9 +260,9 @@ static void update_net_bb(const ClusterNetId& net,
         //For small nets brute-force bounding box update is faster
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net,
-                                  ts_bb_coord_new[net],
-                                  ts_layer_sink_pin_count[size_t(net)]);
+            get_non_updatable_bb(net,
+                                 ts_bb_coord_new[net],
+                                 ts_layer_sink_pin_count[size_t(net)]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -298,9 +298,9 @@ static void update_net_layer_bb(const ClusterNetId& net,
         //For small nets brute-force bounding box update is faster
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_layer_bb(net,
-                                        layer_ts_bb_coord_new[net],
-                                        ts_layer_sink_pin_count[size_t(net)]);
+            get_non_updatable_layer_bb(net,
+                                       layer_ts_bb_coord_new[net],
+                                       ts_layer_sink_pin_count[size_t(net)]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -487,9 +487,9 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
  * Currently assumes channels on both sides of the CLBs forming the   *
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
-static void get_non_updateable_bb(ClusterNetId net_id,
-                                  t_bb& bb_coord_new,
-                                  vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
+static void get_non_updatable_bb(ClusterNetId net_id,
+                                 t_bb& bb_coord_new,
+                                 vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     int xmax, ymax, xmin, ymin, x, y, layer;
@@ -554,9 +554,9 @@ static void get_non_updateable_bb(ClusterNetId net_id,
     bb_coord_new.ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 }
 
-static void get_non_updateable_layer_bb(ClusterNetId net_id,
-                                        std::vector<t_2D_bb>& bb_coord_new,
-                                        vtr::NdMatrixProxy<int, 1> num_sink_layer) {
+static void get_non_updatable_layer_bb(ClusterNetId net_id,
+                                       std::vector<t_2D_bb>& bb_coord_new,
+                                       vtr::NdMatrixProxy<int, 1> num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -1750,9 +1750,9 @@ double comp_bb_cost(e_cost_methods method) {
                                     place_move_ctx.bb_num_on_edges[net_id],
                                     place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             } else {
-                get_non_updateable_bb(net_id,
-                                      place_move_ctx.bb_coords[net_id],
-                                      place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+                get_non_updatable_bb(net_id,
+                                     place_move_ctx.bb_coords[net_id],
+                                     place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             }
 
             net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]);
@@ -1787,9 +1787,9 @@ double comp_layer_bb_cost(e_cost_methods method) {
                                           place_move_ctx.layer_bb_coords[net_id],
                                           place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             } else {
-                get_non_updateable_layer_bb(net_id,
-                                            place_move_ctx.layer_bb_coords[net_id],
-                                            place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+                get_non_updatable_layer_bb(net_id,
+                                           place_move_ctx.layer_bb_coords[net_id],
+                                           place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             }
 
             net_cost[net_id] = get_net_layer_cost(net_id,

From fa45f03aefb06a6253153caf955a90ea4651e510 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 10:12:27 -0500
Subject: [PATCH 155/188] remove an unused function - comment net_cost_handler
 helper functions

---
 vpr/src/place/net_cost_handler.cpp | 227 +++++++++++++++++++++++++----
 1 file changed, 195 insertions(+), 32 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index b48293a409f..04320c13d6e 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -80,23 +80,61 @@ static vtr::Matrix<int> ts_layer_sink_pin_count;
 /* [0...num_afftected_nets] -> net_id of the affected nets */
 static std::vector<ClusterNetId> ts_nets_to_update;
 
+
+/**
+ * @param net
+ * @param moved_blocks
+ * @return True if the driver block of the net is among the moving blocks
+ */
 static bool driven_by_moved_block(const AtomNetId net,
                                   const std::vector<t_pl_moved_atom_block>& moved_blocks);
 
+/**
+ * @param net
+ * @param moved_blocks
+ * @return True if the driver block of the net is among the moving blocks
+ */
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const int num_blocks,
                                   const std::vector<t_pl_moved_block>& moved_blocks);
-
+/**
+ * @brief Update the bounding box (3D) of the net connected to blk_pin. The old and new locations of the pin are
+ * stored in pl_moved_block. The updated bounding box will be stored in ts data structures.
+ * @param net
+ * @param blk
+ * @param blk_pin
+ * @param pl_moved_block
+ */
 static void update_net_bb(const ClusterNetId& net,
                           const ClusterBlockId& blk,
                           const ClusterPinId& blk_pin,
                           const t_pl_moved_block& pl_moved_block);
 
+/**
+ * @brief Update the bounding box (per-layer) of the net connected to blk_pin. The old and new locations of the pin are
+ * stored in pl_moved_block. The updated bounding box will be stored in ts data structures.
+ * @param net
+ * @param blk
+ * @param blk_pin
+ * @param pl_moved_block
+ */
 static void update_net_layer_bb(const ClusterNetId& net,
                                 const ClusterBlockId& blk,
                                 const ClusterPinId& blk_pin,
                                 const t_pl_moved_block& pl_moved_block);
 
+/**
+ * @brief Calculate the new connection delay and timing cost of all the
+ *        sink pins affected by moving a specific pin to a new location.
+ *        Also calculates the total change in the timing cost.
+ * @param delay_model
+ * @param criticalities
+ * @param net
+ * @param pin
+ * @param affected_pins Store the sink pins which delays are changed due to moving the block
+ * @param delta_timing_cost
+ * @param is_src_moving True if "pin" is a sink pin and its driver is among the moving blocks
+ */
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
@@ -105,8 +143,27 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   double& delta_timing_cost,
                                   bool is_src_moving);
 
+/**
+ * @brief if "net" is not already stored as an affected net, mark it in ts_nets_to_update and increment num_affected_nets
+ * @param net
+ * @param num_affected_nets
+ */
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
+/**
+ * @brief Call suitable function based on the bounding box type to update the bounding box of the net connected to pin_id. Also,
+ * call the function to update timing information if the placement algorithm is timing-driven.
+ * @param place_algorithm
+ * @param delay_model
+ * @param criticalities
+ * @param blk_id
+ * @param pin_id
+ * @param moving_blk_inf
+ * @param affected_pins
+ * @param timing_delta_c
+ * @param num_affected_nets
+ * @param is_src_moving
+ */
 static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                         const PlaceDelayModel* delay_model,
                                         const PlacerCriticalities* criticalities,
@@ -118,14 +175,37 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                                         int& num_affected_nets,
                                         bool is_src_moving);
 
+/**
+ * @brief Calculate the 3D bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
+ * store them in bb_coord_new
+ * @param net_id
+ * @param bb_coord_new
+ * @param num_sink_pin_layer Store the number of sink pins of "net_id" on each layer
+ */
 static void get_non_updatable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
                                   vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
+/**
+ * @brief Calculate the per-layer bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
+ * store them in bb_coord_new
+ * @param net_id
+ * @param bb_coord_new
+ * @param num_sink_layer
+ */
 static void get_non_updatable_layer_bb(ClusterNetId net_id,
                                        std::vector<t_2D_bb>& bb_coord_new,
                                        vtr::NdMatrixProxy<int, 1> num_sink_layer);
 
+/**
+ * @brief Update the 3D bounding box of "net_id" incrementally based on the old and new locations of the pin
+ * @param bb_edge_new
+ * @param bb_coord_new
+ * @param num_sink_pin_layer_new
+ * @param pin_old_loc
+ * @param pin_new_loc
+ * @param src_pin
+ */
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
@@ -134,6 +214,15 @@ static void update_bb(ClusterNetId net_id,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin);
 
+/**
+ * @brief Update the per-layer bounding box of "net_id" incrementally based on the old and new locations of the pin
+ * @param bb_edge_new
+ * @param bb_coord_new
+ * @param num_sink_pin_layer_new
+ * @param pin_old_loc
+ * @param pin_new_loc
+ * @param src_pin
+ */
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
@@ -142,6 +231,18 @@ static void update_layer_bb(ClusterNetId net_id,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin);
 
+/**
+ * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+ * the pin under consideration is not changing layer.
+ * @param net_id
+ * @param pin_old_loc
+ * @param pin_new_loc
+ * @param curr_bb_edge
+ * @param curr_bb_coord
+ * @param bb_pin_sink_count_new
+ * @param bb_edge_new
+ * @param bb_coord_new
+ */
 static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_old_loc,
                                         const t_physical_tile_loc& pin_new_loc,
@@ -151,6 +252,18 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new);
 
+/**
+* @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+* the pin under consideration change layer.
+ * @param net_id
+ * @param pin_old_loc
+ * @param pin_new_loc
+ * @param curr_bb_edge
+ * @param curr_bb_coord
+ * @param bb_pin_sink_count_new
+ * @param bb_edge_new
+ * @param bb_coord_new
+ */
 static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_old_loc,
                                            const t_physical_tile_loc& pin_new_loc,
@@ -159,7 +272,15 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new);
-
+/**
+ * @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
+ * @param net_id
+ * @param pin_old_loc
+ * @param pin_new_loc
+ * @param curr_layer_pin_sink_count
+ * @param bb_pin_sink_count_new
+ * @param is_output_pin
+ */
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
@@ -167,6 +288,18 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                      bool is_output_pin);
 
+/**
+ * @brief Update the data structure for large nets that keep track of
+ * the number of blocks on each edge of the bounding box.
+ * @param net_id
+ * @param bb_edge_new
+ * @param bb_coord_new
+ * @param bb_layer_pin_sink_count
+ * @param old_num_block_on_edge
+ * @param old_edge_coord
+ * @param new_num_block_on_edge
+ * @param new_edge_coord
+ */
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
@@ -176,40 +309,96 @@ static inline void update_bb_edge(ClusterNetId net_id,
                                   int& new_num_block_on_edge,
                                   int& new_edge_coord);
 
+/**
+ * @brief When BB is being updated incrementally, the pin is moving to a new layer, and the BB is of the type "per-layer,
+ * use this function to update the BB on the new layer.
+ * @param new_pin_loc
+ * @param bb_edge_old
+ * @param bb_coord_old
+ * @param bb_edge_new
+ * @param bb_coord_new
+ */
 static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
                             const t_2D_bb& bb_edge_old,
                             const t_2D_bb& bb_coord_old,
                             t_2D_bb& bb_edge_new,
                             t_2D_bb& bb_coord_new);
 
+/**
+ * @brief Calculate the 3D BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
+ * @param net_id
+ * @param coords
+ * @param num_on_edges
+ * @param num_sink_pin_layer
+ */
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
                                 vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
+/**
+ * @brief Calculate the per-layer BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
+ * @param net_id
+ * @param coords
+ * @param num_on_edges
+ * @param num_sink_pin_layer
+ */
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
                                       vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
+/**
+ * @brief Given the 3D BB, calculate the wire-length cost of the net
+ * @param net_id
+ * @param bb
+ * @return
+ */
 static double get_net_cost(ClusterNetId net_id, const t_bb& bb);
 
+/**
+ * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer
+ * and return the sum of the costs
+ * @param net_id
+ * @param bb
+ * @return
+ */
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bb,
                                  const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
+/**
+ * @brief Given the 3D BB, calculate the wire-length estimate of the net
+ * @param net_id
+ * @param bb
+ * @return
+ */
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb);
 
+/**
+ * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer
+ * and return the sum of the lengths
+ * @param net_id
+ * @param bb
+ * @return
+ */
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bb,
                                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
+/**
+ * @brief To mitigate round-off errors, every once in a while, the costs of nets are being added from scrath.
+ * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again.
+ * @return
+ */
 static double recompute_bb_cost();
 
+/**
+ * @brief To get the wirelength cost/est, BB perimiter is multiplied by a factor. This function returns that factor which is a function of net's fan-out.
+ * @return double
+ */
 static double wirelength_crossing_count(size_t fanout);
 
-static double get_net_bounding_box_cost(ClusterNetId net_id, const t_bb& bb);
-
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const AtomNetId net,
                                   const std::vector<t_pl_moved_atom_block>& moved_blocks) {
@@ -1550,32 +1739,6 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-static double get_net_bounding_box_cost(ClusterNetId net_id, const t_bb& bb) {
-    /* Finds the cost due to one net by looking at its coordinate bounding  *
-     * box.                                                                 */
-
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
-
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
-
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
-
-    ncost = (bb.xmax - bb.xmin + 1) * crossing
-            * chanx_place_cost_fac[bb.ymax][bb.ymin - 1];
-
-    ncost += (bb.ymax - bb.ymin + 1) * crossing
-             * chany_place_cost_fac[bb.xmax][bb.xmin - 1];
-
-    return (ncost);
-}
-
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -1631,8 +1794,8 @@ int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        proposed_net_cost[net_id] = get_net_bounding_box_cost(net_id,
-                                                              ts_bb_coord_new[net_id]);
+        proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                 ts_bb_coord_new[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 

From 2de0eca143ea2cb87fc3da2e2b7edb47e587ba6a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 10:45:13 -0500
Subject: [PATCH 156/188] get net cost based on bb type

---
 vpr/src/place/net_cost_handler.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 04320c13d6e..fd291eb2743 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1756,6 +1756,8 @@ int find_affected_nets_and_update_costs(
 
     std::vector<ClusterPinId> affected_pins;
 
+    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
+
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
         AtomBlockId atom_blk_id = blocks_affected.moved_blocks[iblk].block_num;
         ClusterBlockId cluster_blk_id = atom_look_up.atom_clb(atom_blk_id);
@@ -1794,8 +1796,15 @@ int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                 ts_bb_coord_new[net_id]);
+        if (cube_bb) {
+            proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                     ts_bb_coord_new[net_id]);
+        } else {
+            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
+                                                           layer_ts_bb_coord_new[net_id],
+                                                           ts_layer_sink_pin_count[size_t(net_id)]);
+        }
+
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 

From 6abf300cbc2379353fb743cbea655d26e3abadee Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 11:16:19 -0500
Subject: [PATCH 157/188] impl a function to set bb delta c

---
 vpr/src/place/net_cost_handler.cpp | 57 ++++++++++++++----------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index fd291eb2743..faac083c666 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -399,6 +399,11 @@ static double recompute_bb_cost();
  */
 static double wirelength_crossing_count(size_t fanout);
 
+/**
+ * @breif Calculate the wire-length cost of nets affected by moving the blocks and set bb_delta_c to the total cost change.
+ */
+static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c);
+
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const AtomNetId net,
                                   const std::vector<t_pl_moved_atom_block>& moved_blocks) {
@@ -1739,6 +1744,25 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
+static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c) {
+    for (int inet_affected = 0; inet_affected < num_affected_nets;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+        const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
+
+        if (cube_bb) {
+            proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                     ts_bb_coord_new[net_id]);
+        } else {
+            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
+                                                           layer_ts_bb_coord_new[net_id],
+                                                           ts_layer_sink_pin_count[size_t(net_id)]);
+        }
+
+        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
+    }
+}
+
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -1756,8 +1780,6 @@ int find_affected_nets_and_update_costs(
 
     std::vector<ClusterPinId> affected_pins;
 
-    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
-
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
         AtomBlockId atom_blk_id = blocks_affected.moved_blocks[iblk].block_num;
         ClusterBlockId cluster_blk_id = atom_look_up.atom_clb(atom_blk_id);
@@ -1792,21 +1814,8 @@ int find_affected_nets_and_update_costs(
 
     /* Now update the bounding box costs (since the net bounding     *
      * boxes are up-to-date). The cost is only updated once per net. */
-    for (int inet_affected = 0; inet_affected < num_affected_nets;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+    set_bb_delta_cost(num_affected_nets, bb_delta_c);
 
-        if (cube_bb) {
-            proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                     ts_bb_coord_new[net_id]);
-        } else {
-            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
-                                                           layer_ts_bb_coord_new[net_id],
-                                                           ts_layer_sink_pin_count[size_t(net_id)]);
-        }
-
-        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
-    }
 
     return num_affected_nets;
 }
@@ -1878,21 +1887,7 @@ int find_affected_nets_and_update_costs(
 
     /* Now update the bounding box costs (since the net bounding     *
      * boxes are up-to-date). The cost is only updated once per net. */
-    for (int inet_affected = 0; inet_affected < num_affected_nets;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-
-        if (cube_bb) {
-            proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                     ts_bb_coord_new[net_id]);
-        } else {
-            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
-                                                           layer_ts_bb_coord_new[net_id],
-                                                           ts_layer_sink_pin_count[size_t(net_id)]);
-        }
-
-        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
-    }
+    set_bb_delta_cost(num_affected_nets, bb_delta_c);
 
     return num_affected_nets;
 }

From d20732b20083844e3820591435f57b52e5a9790b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 13 Nov 2023 14:09:45 -0500
Subject: [PATCH 158/188] comment functions in net_cost_handler.h

---
 vpr/src/place/net_cost_handler.h | 88 ++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 63026677961..dc16f342b6f 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -9,6 +9,17 @@ enum e_cost_methods {
     CHECK
 };
 
+/**
+ * @brief Update the wire length and timing cost of the blocks (ts and proposed_* data structures) and set
+ * the delta costs in bb_delta_c and timing_delta_c. This functions is used when the moving bocks are atoms
+ * @param place_algorithm
+ * @param delay_model
+ * @param criticalities
+ * @param blocks_affected
+ * @param bb_delta_c
+ * @param timing_delta_c
+ * @return
+ */
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -17,6 +28,17 @@ int find_affected_nets_and_update_costs(
     double& bb_delta_c,
     double& timing_delta_c);
 
+/**
+ * @brief Update the wire length and timing cost of the blocks (ts and proposed_* data structures) and set
+ * the delta costs in bb_delta_c and timing_delta_c. This functions is used when the moving bocks are clusters
+ * @param place_algorithm
+ * @param delay_model
+ * @param criticalities
+ * @param blocks_affected
+ * @param bb_delta_c
+ * @param timing_delta_c
+ * @return
+ */
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -25,29 +47,95 @@ int find_affected_nets_and_update_costs(
     double& bb_delta_c,
     double& timing_delta_c);
 
+/**
+ * @brief Finds the bb cost from scratch (based on 3D BB).  Done only when the placement   *
+* has been radically changed (i.e. after initial placement).   *
+* Otherwise find the cost change incrementally.  If method     *
+* check is NORMAL, we find bounding boxes that are updatable  *
+* for the larger nets.  If method is CHECK, all bounding boxes *
+* are found via the non_updateable_bb routine, to provide a    *
+* cost which can be used to check the correctness of the       *
+* other routine.                                               *
+ * @param method
+ * @return
+ */
 double comp_bb_cost(e_cost_methods method);
 
+/**
+ * @brief Finds the bb cost from scratch (based on per-layer BB).  Done only when the placement   *
+* has been radically changed (i.e. after initial placement).   *
+* Otherwise find the cost change incrementally.  If method     *
+* check is NORMAL, we find bounding boxes that are updateable  *
+* for the larger nets.  If method is CHECK, all bounding boxes *
+* are found via the non_updateable_bb routine, to provide a    *
+* cost which can be used to check the correctness of the       *
+* other routine.                                               *
+ * @param method
+ * @return
+ */
 double comp_layer_bb_cost(e_cost_methods method);
 
+/**
+ * @brief update net cost data structures (in placer context and net_cost in .cpp file) and reset flags (proposed_net_cost and bb_updated_before).
+ * @param num_nets_affected
+ * @param cube_bb
+ */
 void update_move_nets(int num_nets_affected,
                       const bool cube_bb);
 
+/**
+ * @brief Reset the net cost function flags (proposed_net_cost and bb_updated_before)
+ * @param num_nets_affected
+ */
 void reset_move_nets(int num_nets_affected);
 
+/**
+ * @brief re-calculates different terms of the cost function (wire-length, timing, NoC) and update "costs" accordingly. It is important to note that
+ * in this function bounding box and connection delays are not calculated from scratch. However, it iterated over nets and add their costs from beginning.
+ * @param placer_opts
+ * @param noc_opts
+ * @param delay_model
+ * @param criticalities
+ * @param costs
+ */
 void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                   const t_noc_opts& noc_opts,
                                   const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities* criticalities,
                                   t_placer_costs* costs);
 
+/**
+ * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac
+ * arrays with the inverse of the average number of tracks per channel
+ * between [subhigh] and [sublow].
+ * @param place_cost_exp
+ */
 void alloc_and_load_for_fast_cost_update(float place_cost_exp);
 
+/**
+ * @brief Frees the chanx_place_cost_fac and chany_place_cost_fac arrays.
+ */
 void free_fast_cost_update();
 
+/**
+ * @brief Resize net_cost, proposed_net_cost, and  bb_updated_before data structures to accommodate all nets.
+ * @param num_nets
+ */
 void init_net_cost_structs(size_t num_nets);
 
+/**
+ * @brief Free net_cost, proposed_net_cost, and  bb_updated_before data structures.
+ */
 void free_net_cost_structs();
 
+/**
+ * @brief Resize (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update to accommodate all nets.
+ * @param num_nets
+ * @param cube_bb
+ */
 void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb);
 
+/**
+ * @brief Free (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update data structures.
+ */
 void free_try_swap_net_cost_structs();

From 113efec8ac228cef600afd0e83ea27d16ed1d3a9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 14 Nov 2023 11:51:11 -0500
Subject: [PATCH 159/188] expr: add static inline for
 update_net_info_on_pin_move

---
 vpr/src/place/net_cost_handler.cpp | 40 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index faac083c666..02df5b18f86 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -164,16 +164,16 @@ static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
  * @param num_affected_nets
  * @param is_src_moving
  */
-static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
-                                        const PlaceDelayModel* delay_model,
-                                        const PlacerCriticalities* criticalities,
-                                        const ClusterBlockId& blk_id,
-                                        const ClusterPinId& pin_id,
-                                        const t_pl_moved_block& moving_blk_inf,
-                                        std::vector<ClusterPinId>& affected_pins,
-                                        double& timing_delta_c,
-                                        int& num_affected_nets,
-                                        bool is_src_moving);
+static inline void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                               const PlaceDelayModel* delay_model,
+                                               const PlacerCriticalities* criticalities,
+                                               const ClusterBlockId& blk_id,
+                                               const ClusterPinId& pin_id,
+                                               const t_pl_moved_block& moving_blk_inf,
+                                               std::vector<ClusterPinId>& affected_pins,
+                                               double& timing_delta_c,
+                                               int& num_affected_nets,
+                                               bool is_src_moving);
 
 /**
  * @brief Calculate the 3D bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
@@ -629,16 +629,16 @@ static void record_affected_net(const ClusterNetId net,
     }
 }
 
-static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
-                                        const PlaceDelayModel* delay_model,
-                                        const PlacerCriticalities* criticalities,
-                                        const ClusterBlockId& blk_id,
-                                        const ClusterPinId& pin_id,
-                                        const t_pl_moved_block& moving_blk_inf,
-                                        std::vector<ClusterPinId>& affected_pins,
-                                        double& timing_delta_c,
-                                        int& num_affected_nets,
-                                        bool is_src_moving) {
+static inline void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                               const PlaceDelayModel* delay_model,
+                                               const PlacerCriticalities* criticalities,
+                                               const ClusterBlockId& blk_id,
+                                               const ClusterPinId& pin_id,
+                                               const t_pl_moved_block& moving_blk_inf,
+                                               std::vector<ClusterPinId>& affected_pins,
+                                               double& timing_delta_c,
+                                               int& num_affected_nets,
+                                               bool is_src_moving) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
     VTR_ASSERT_SAFE_MSG(net_id,

From d4e726fe227aca47fb108d6f4665e08322b1d42d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 16 Nov 2023 10:23:31 -0500
Subject: [PATCH 160/188] remove unused access to cube_bb

---
 vpr/src/place/net_cost_handler.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 02df5b18f86..ad182bb9f89 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1855,8 +1855,6 @@ int find_affected_nets_and_update_costs(
 
     int num_affected_nets = 0;
 
-    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
-
     /* Go through all the blocks moved. */
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
         const auto& moving_block_inf = blocks_affected.moved_blocks[iblk];

From dbbe5f6c499edfbf96bea0d5e0fe432fd5c60f1c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 16 Nov 2023 15:04:56 -0500
Subject: [PATCH 161/188] reverse inlining of update_net_info_on_pin_move

---
 vpr/src/place/net_cost_handler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index ad182bb9f89..81c0faccc3b 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -164,7 +164,7 @@ static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
  * @param num_affected_nets
  * @param is_src_moving
  */
-static inline void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                                const PlaceDelayModel* delay_model,
                                                const PlacerCriticalities* criticalities,
                                                const ClusterBlockId& blk_id,
@@ -629,7 +629,7 @@ static void record_affected_net(const ClusterNetId net,
     }
 }
 
-static inline void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                                const PlaceDelayModel* delay_model,
                                                const PlacerCriticalities* criticalities,
                                                const ClusterBlockId& blk_id,

From deb8297093642b60afeb6d893f21a4bae57bd683 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:26:31 -0500
Subject: [PATCH 162/188] inline the operator overloading for atom_loc

---
 libs/libvtrutil/src/vtr_vec_id_set.h | 1 -
 vpr/src/base/vpr_types.h             | 8 ++++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/libs/libvtrutil/src/vtr_vec_id_set.h b/libs/libvtrutil/src/vtr_vec_id_set.h
index 10dc10e0f0a..7207225932c 100644
--- a/libs/libvtrutil/src/vtr_vec_id_set.h
+++ b/libs/libvtrutil/src/vtr_vec_id_set.h
@@ -2,7 +2,6 @@
 #define VTR_SET_H
 
 #include <vector>
-#include <algorithm>
 
 namespace vtr {
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 64a131e6ac3..df7e6b5c42a 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -811,12 +811,12 @@ struct t_pl_atom_loc {
     int y = OPEN;
     int sub_tile = OPEN;
     int layer = OPEN;
-
-    friend bool operator==(const t_pl_atom_loc& lhs, const t_pl_atom_loc& rhs) {
-        return std::tie(lhs.primitive_id, lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.primitive_id, rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
-    }
 };
 
+inline bool operator==(const t_pl_atom_loc& lhs, const t_pl_atom_loc& rhs) {
+    return std::tie(lhs.primitive_id, lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.primitive_id, rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
+}
+
 namespace std {
 template<>
 struct hash<t_pl_atom_loc> {

From f04759054fac5327f1bebbdaf7b259c703a62c4a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:29:30 -0500
Subject: [PATCH 163/188] use string constructor instead casting char* to
 string

---
 vpr/src/pack/re_cluster.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index fb67f0b37b4..f75277eb1b5 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -204,9 +204,9 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     }
 
     t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1);
-    std::string clb_pb_1_name = (std::string)clb_pb_1->name;
+    std::string clb_pb_1_name = std::string(clb_pb_1->name);
     t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
-    std::string clb_pb_2_name = (std::string)clb_pb_2->name;
+    std::string clb_pb_2_name = std::string(clb_pb_2->name);
 
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);

From 569096a6414576b7e0340bbc2938bcb67f507d75 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:33:26 -0500
Subject: [PATCH 164/188] pass primitive types by value

---
 vpr/src/pack/re_cluster_util.cpp | 4 ++--
 vpr/src/pack/re_cluster_util.h   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 2cdbf8dab53..a8886dc0615 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -121,8 +121,8 @@ t_lb_router_data* lb_load_router_data(std::vector<t_lb_type_rr_node>* lb_type_rr
 
 bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                const t_logical_block_type_ptr& type,
-                               const int& mode,
-                               const int& feasible_block_array_size,
+                               const int mode,
+                               const int feasible_block_array_size,
                                bool enable_pin_feasibility_filter,
                                ClusterBlockId clb_index,
                                bool during_packing,
diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h
index e5bf3f89096..4ebb1ed1f52 100644
--- a/vpr/src/pack/re_cluster_util.h
+++ b/vpr/src/pack/re_cluster_util.h
@@ -79,8 +79,8 @@ void remove_mol_from_cluster(const t_pack_molecule* molecule,
  */
 bool start_new_cluster_for_mol(t_pack_molecule* molecule,
                                const t_logical_block_type_ptr& type,
-                               const int& mode,
-                               const int& feasible_block_array_size,
+                               const int mode,
+                               const int feasible_block_array_size,
                                bool enable_pin_feasibility_filter,
                                ClusterBlockId clb_index,
                                bool during_packing,

From f19d64a8559b9881bee52bca6de7810a45660614 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:35:26 -0500
Subject: [PATCH 165/188] use empty methon instead of comparing to zero

---
 vpr/src/place/atom_critical_uniform_move_generator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
index 7ba5e2115e2..17343412558 100644
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ b/vpr/src/place/atom_critical_uniform_move_generator.cpp
@@ -46,7 +46,7 @@ static std::pair<ClusterBlockId, AtomBlockId> getCriticalAtomBlock() {
     const auto& place_ctx = g_vpr_ctx.placement();
     /* Pick a random block to be swapped with another random block.   */
     // pick it from the highly critical blocks
-    if (place_move_ctx.highly_crit_pins.size() == 0) {
+    if (place_move_ctx.highly_crit_pins.empty()) {
         return std::make_pair(ClusterBlockId::INVALID(), AtomBlockId::INVALID()); //No critical block
     }
     std::pair<ClusterNetId, int> crit_cluster_net_pin = place_move_ctx.highly_crit_pins[vtr::irand(place_move_ctx.highly_crit_pins.size() - 1)];

From 85c53dc7ec4561d65e0d626cc3f3f2577e14c20c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:38:42 -0500
Subject: [PATCH 166/188] set the upper bound of the loop outside of the loop
 body

---
 vpr/src/place/move_transactions.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 0af26c8ff76..534ac56fa3c 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -66,7 +66,8 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
 void apply_move_blocks(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
     std::set<ClusterBlockId> seen_clusters;
-    for (int blk_idx = 0; blk_idx < blocks_affected.num_moved_blocks; blk_idx++) {
+    const int num_moved_blocks = blocks_affected.num_moved_blocks;
+    for (int blk_idx = 0; blk_idx < num_moved_blocks; blk_idx++) {
         AtomBlockId atom_blk = blocks_affected.moved_blocks[blk_idx].block_num;
         ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom_blk);
         if (seen_clusters.find(cluster_blk) == seen_clusters.end()) {

From 4e410cec09836dd6e8538b97bbc6d8e582c984cf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:43:22 -0500
Subject: [PATCH 167/188] impl of atom revert_move_blocks was wrong

---
 vpr/src/place/move_transactions.cpp | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 534ac56fa3c..1c7648c6c3f 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -131,16 +131,7 @@ void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
 }
 
 void revert_move_blocks(t_pl_atom_blocks_to_be_moved& blocks_affected) {
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    std::set<ClusterBlockId> seen_clusters;
-    for (int blk_idx = 0; blk_idx < blocks_affected.num_moved_blocks; blk_idx++) {
-        AtomBlockId atom_blk = blocks_affected.moved_blocks[blk_idx].block_num;
-        ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom_blk);
-        if (seen_clusters.find(cluster_blk) == seen_clusters.end()) {
-            seen_clusters.insert(cluster_blk);
-            place_sync_external_block_connections(cluster_blk);
-        }
-    }
+    //TODO: this function needs to be implemented
 }
 
 //Moves the blocks in blocks_affected to their old locations

From 8e28ea94e070d8573721a4a739067895cbef1757 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 09:51:57 -0500
Subject: [PATCH 168/188] use enum class instead of defining net update status

---
 vpr/src/place/net_cost_handler.cpp | 71 ++++++++++++++++--------------
 1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 81c0faccc3b..7cc2ee7fec4 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -10,9 +10,12 @@ using std::min;
 
 /* Flags for the states of the bounding box.                              *
  * Stored as char for memory efficiency.                                  */
-#define NOT_UPDATED_YET 'N'
-#define UPDATED_ONCE 'U'
-#define GOT_FROM_SCRATCH 'S'
+
+enum class NetUpdateState {
+    NOT_UPDATED_YET,
+    UPDATED_ONCE,
+    GOT_FROM_SCRATCH
+};
 
 /* This defines the error tolerance for floating points variables used in *
  * cost computation. 0.01 means that there is a 1% error tolerance.       */
@@ -57,7 +60,7 @@ static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
  * particular bounding box cannot be updated incrementally before, hence the     *
  * bounding box is got from scratch, so the bounding box would definitely be     *
  * right, DO NOT update again.                                                   */
-static vtr::vector<ClusterNetId, char> bb_updated_before;
+static vtr::vector<ClusterNetId, NetUpdateState> bb_updated_before;
 
 /* The following arrays are used by the try_swap function for speed.   */
 
@@ -453,7 +456,7 @@ static void update_net_bb(const ClusterNetId& net,
     if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
         //For small nets brute-force bounding box update is faster
 
-        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
+        if (bb_updated_before[net] == NetUpdateState::NOT_UPDATED_YET) { //Only once per-net
             get_non_updatable_bb(net,
                                  ts_bb_coord_new[net],
                                  ts_layer_sink_pin_count[size_t(net)]);
@@ -491,7 +494,7 @@ static void update_net_layer_bb(const ClusterNetId& net,
     if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
         //For small nets brute-force bounding box update is faster
 
-        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
+        if (bb_updated_before[net] == NetUpdateState::NOT_UPDATED_YET) { //Only once per-net
             get_non_updatable_layer_bb(net,
                                        layer_ts_bb_coord_new[net],
                                        ts_layer_sink_pin_count[size_t(net)]);
@@ -849,18 +852,18 @@ static void update_bb(ClusterNetId net_id,
     pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
     /* Check if the net had been updated before. */
-    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+    if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
         /* The net had been updated from scratch, DO NOT update again! */
         return;
     }
 
-    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
+    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
 
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
         curr_bb_coord = &place_move_ctx.bb_coords[net_id];
-        bb_updated_before[net_id] = UPDATED_ONCE;
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */
         curr_bb_coord = &bb_coord_new;
@@ -876,7 +879,7 @@ static void update_bb(ClusterNetId net_id,
         if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */
             if (curr_bb_edge->xmax == 1) {
                 get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
                 return;
             } else {
                 bb_edge_new.xmax = curr_bb_edge->xmax - 1;
@@ -908,7 +911,7 @@ static void update_bb(ClusterNetId net_id,
         if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */
             if (curr_bb_edge->xmin == 1) {
                 get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
                 return;
             } else {
                 bb_edge_new.xmin = curr_bb_edge->xmin - 1;
@@ -949,7 +952,7 @@ static void update_bb(ClusterNetId net_id,
         if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */
             if (curr_bb_edge->ymax == 1) {
                 get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
                 return;
             } else {
                 bb_edge_new.ymax = curr_bb_edge->ymax - 1;
@@ -981,7 +984,7 @@ static void update_bb(ClusterNetId net_id,
         if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */
             if (curr_bb_edge->ymin == 1) {
                 get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
                 return;
             } else {
                 bb_edge_new.ymin = curr_bb_edge->ymin - 1;
@@ -1028,8 +1031,8 @@ static void update_bb(ClusterNetId net_id,
         }
     }
 
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
     }
 }
 
@@ -1064,18 +1067,18 @@ static void update_layer_bb(ClusterNetId net_id,
     pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
     /* Check if the net had been updated before. */
-    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+    if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
         /* The net had been updated from scratch, DO NOT update again! */
         return;
     }
 
-    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
+    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
 
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id];
         curr_bb_coord = &place_move_ctx.layer_bb_coords[net_id];
-        bb_updated_before[net_id] = UPDATED_ONCE;
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */
         curr_bb_edge = &bb_edge_new;
@@ -1118,8 +1121,8 @@ static void update_layer_bb(ClusterNetId net_id,
                              bb_coord_new);
     }
 
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
     }
 }
 
@@ -1150,7 +1153,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                            curr_bb_coord[layer_num].xmax,
                            bb_edge_new[layer_num].xmax,
                            bb_coord_new[layer_num].xmax);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
                 return;
             }
         }
@@ -1173,7 +1176,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                            curr_bb_coord[layer_num].xmin,
                            bb_edge_new[layer_num].xmin,
                            bb_coord_new[layer_num].xmin);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
                 return;
             }
         }
@@ -1197,7 +1200,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                            curr_bb_coord[layer_num].ymax,
                            bb_edge_new[layer_num].ymax,
                            bb_coord_new[layer_num].ymax);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
                 return;
             }
         }
@@ -1220,7 +1223,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                            curr_bb_coord[layer_num].ymin,
                            bb_edge_new[layer_num].ymin,
                            bb_coord_new[layer_num].ymin);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
                 return;
             }
         }
@@ -1260,7 +1263,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                        curr_bb_coord[old_layer_num].xmax,
                        bb_edge_new[old_layer_num].xmax,
                        bb_coord_new[old_layer_num].xmax);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
             return;
         }
     } else if (x_old == curr_bb_coord[old_layer_num].xmin) {
@@ -1272,7 +1275,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                        curr_bb_coord[old_layer_num].xmin,
                        bb_edge_new[old_layer_num].xmin,
                        bb_coord_new[old_layer_num].xmin);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
             return;
         }
     }
@@ -1286,7 +1289,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                        curr_bb_coord[old_layer_num].ymax,
                        bb_edge_new[old_layer_num].ymax,
                        bb_coord_new[old_layer_num].ymax);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
             return;
         }
     } else if (y_old == curr_bb_coord[old_layer_num].ymin) {
@@ -1298,7 +1301,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                        curr_bb_coord[old_layer_num].ymin,
                        bb_edge_new[old_layer_num].ymin,
                        bb_coord_new[old_layer_num].ymin);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
             return;
         }
     }
@@ -1339,7 +1342,7 @@ static inline void update_bb_edge(ClusterNetId net_id,
                                   bb_edge_new,
                                   bb_coord_new,
                                   bb_layer_pin_sink_count);
-        bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+        bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
         return;
     } else {
         new_num_block_on_edge = old_num_block_on_edge - 1;
@@ -2008,7 +2011,7 @@ void update_move_nets(int num_nets_affected,
 
         /* negative proposed_net_cost value is acting as a flag. */
         proposed_net_cost[net_id] = -1;
-        bb_updated_before[net_id] = NOT_UPDATED_YET;
+        bb_updated_before[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
 }
 
@@ -2018,7 +2021,7 @@ void reset_move_nets(int num_nets_affected) {
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
         proposed_net_cost[net_id] = -1;
-        bb_updated_before[net_id] = NOT_UPDATED_YET;
+        bb_updated_before[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
 }
 
@@ -2205,7 +2208,7 @@ void init_net_cost_structs(size_t num_nets) {
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
      * been recomputed.                                                          */
-    bb_updated_before.resize(num_nets, NOT_UPDATED_YET);
+    bb_updated_before.resize(num_nets, NetUpdateState::NOT_UPDATED_YET);
 }
 
 void free_net_cost_structs() {

From b2eb2454b40aebff5178b38f52251d84509f79de Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 10:26:26 -0500
Subject: [PATCH 169/188] use num_layers variable as upper bound of a loop

---
 vpr/src/place/net_cost_handler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 7cc2ee7fec4..ec821899329 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -758,7 +758,7 @@ static void get_non_updatable_layer_bb(ClusterNetId net_id,
 
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
-    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         num_sink_layer[layer_num] = 0;
     }
 

From 16f8fff3b66e3522c84abfdb54f0b70d55811c3d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 10:28:11 -0500
Subject: [PATCH 170/188] store num_moved_blocks before the for loop

---
 vpr/src/place/place_constraints.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h
index 8ca39656f37..7a9fdeafdb1 100644
--- a/vpr/src/place/place_constraints.h
+++ b/vpr/src/place/place_constraints.h
@@ -67,8 +67,8 @@ void print_macro_constraint_error(const t_pl_macro& pl_macro);
 
 inline bool floorplan_legal(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
     bool floorplan_legal;
-
-    for (int i = 0; i < blocks_affected.num_moved_blocks; i++) {
+    const int num_moved_blocks = blocks_affected.num_moved_blocks;
+    for (int i = 0; i < num_moved_blocks; i++) {
         AtomBlockId mv_atom_blk = blocks_affected.moved_blocks[i].block_num;
         ClusterBlockId cluster_blk = g_vpr_ctx.atom().lookup.atom_clb(mv_atom_blk);
         const t_pl_atom_loc& to_pl_atom_loc = blocks_affected.moved_blocks[i].new_loc;
@@ -86,8 +86,8 @@ inline bool floorplan_legal(const t_pl_atom_blocks_to_be_moved& blocks_affected)
 
 inline bool floorplan_legal(const t_pl_blocks_to_be_moved& blocks_affected) {
     bool floorplan_legal;
-
-    for (int i = 0; i < blocks_affected.num_moved_blocks; i++) {
+    const int num_moved_blocks = blocks_affected.num_moved_blocks;
+    for (int i = 0; i < num_moved_blocks; i++) {
         floorplan_legal = cluster_floorplanning_legal(blocks_affected.moved_blocks[i].block_num, blocks_affected.moved_blocks[i].new_loc);
         if (!floorplan_legal) {
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tMove aborted for block %zu, location tried was x: %d, y: %d, subtile: %d \n", size_t(blocks_affected.moved_blocks[i].block_num), blocks_affected.moved_blocks[i].new_loc.x, blocks_affected.moved_blocks[i].new_loc.y, blocks_affected.moved_blocks[i].new_loc.sub_tile);

From 321344fed9a3e67f3e7a304eed24d33438d24411 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 22 Nov 2023 10:53:34 -0500
Subject: [PATCH 171/188] add std algorithm back since std sort is used

---
 libs/libvtrutil/src/vtr_vec_id_set.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/libvtrutil/src/vtr_vec_id_set.h b/libs/libvtrutil/src/vtr_vec_id_set.h
index 7207225932c..10dc10e0f0a 100644
--- a/libs/libvtrutil/src/vtr_vec_id_set.h
+++ b/libs/libvtrutil/src/vtr_vec_id_set.h
@@ -2,6 +2,7 @@
 #define VTR_SET_H
 
 #include <vector>
+#include <algorithm>
 
 namespace vtr {
 

From 574b6fc07cc10f14fdf5c17784aae27a11fec31f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 5 Jun 2024 09:52:38 -0400
Subject: [PATCH 172/188] [vpr] fix the bugs after merge was mastered

---
 vpr/src/base/vpr_types.h           |  2 +-
 vpr/src/place/net_cost_handler.cpp | 89 +++++++++++++++---------------
 vpr/src/place/net_cost_handler.h   |  8 +--
 3 files changed, 49 insertions(+), 50 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 0a29d86ed26..867ed756f32 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -790,7 +790,7 @@ struct t_pl_loc {
     }
 
     friend bool operator==(const t_pl_loc& lhs, const t_pl_loc& rhs) {
-        return std::tie(lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
+        return std::tie(lhs.layer, lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.layer, rhs.x, rhs.y, rhs.sub_tile);
     }
 
     friend bool operator!=(const t_pl_loc& lhs, const t_pl_loc& rhs) {
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index ec821899329..f852c799ebc 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -4,6 +4,7 @@
 #include "move_utils.h"
 #include "place_timing_update.h"
 #include "noc_place_utils.h"
+#include "vtr_math.h"
 
 using std::max;
 using std::min;
@@ -2026,68 +2027,66 @@ void reset_move_nets(int num_nets_affected) {
 }
 
 void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
-                                  const t_noc_opts& noc_opts,
-                                  const PlaceDelayModel* delay_model,
-                                  const PlacerCriticalities* criticalities,
-                                  t_placer_costs* costs) {
+                                const t_noc_opts& noc_opts,
+                                const PlaceDelayModel* delay_model,
+                                const PlacerCriticalities* criticalities,
+                                t_placer_costs* costs) {
+    auto check_and_print_cost = [](double new_cost,
+                                   double old_cost,
+                                   const std::string& cost_name) {
+        if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) {
+            std::string msg = vtr::string_fmt(
+                "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n",
+                cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, ERROR_TOL);
+            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+        }
+    };
+
     double new_bb_cost = recompute_bb_cost();
-    if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
-        std::string msg = vtr::string_fmt(
-            "in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
-            new_bb_cost, costs->bb_cost);
-        VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-    }
+    check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost");
     costs->bb_cost = new_bb_cost;
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
         comp_td_costs(delay_model, *criticalities, &new_timing_cost);
-        if (fabs(
-                new_timing_cost
-                - costs->timing_cost)
-            > costs->timing_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n",
-                new_timing_cost, costs->timing_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
+        check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost");
         costs->timing_cost = new_timing_cost;
     } else {
         VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
-
         costs->cost = new_bb_cost * costs->bb_cost_norm;
     }
 
     if (noc_opts.noc) {
-        double new_noc_aggregate_bandwidth_cost = 0.;
-        double new_noc_latency_cost = 0.;
-        recompute_noc_costs(new_noc_aggregate_bandwidth_cost, new_noc_latency_cost);
-
-        if (fabs(
-                new_noc_aggregate_bandwidth_cost
-                - costs->noc_aggregate_bandwidth_cost)
-            > costs->noc_aggregate_bandwidth_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_noc_aggregate_bandwidth_cost = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
-                new_noc_aggregate_bandwidth_cost, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
-        costs->noc_aggregate_bandwidth_cost = new_noc_aggregate_bandwidth_cost;
+        NocCostTerms new_noc_cost;
+        recompute_noc_costs(new_noc_cost);
+
+        check_and_print_cost(new_noc_cost.aggregate_bandwidth,
+                             costs->noc_cost_terms.aggregate_bandwidth,
+                             "noc_aggregate_bandwidth");
+        costs->noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth;
 
         // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
         // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
-        if (new_noc_latency_cost > MIN_EXPECTED_NOC_LATENCY_COST) {
-            if (fabs(
-                    new_noc_latency_cost
-                    - costs->noc_latency_cost)
-                > costs->noc_latency_cost * ERROR_TOL) {
-                std::string msg = vtr::string_fmt(
-                    "in recompute_costs_from_scratch: new_noc_latency_cost = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
-                    new_noc_latency_cost, costs->noc_latency_cost, ERROR_TOL);
-                VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-            }
+        if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency,
+                                 costs->noc_cost_terms.latency,
+                                 "noc_latency_cost");
+        }
+        costs->noc_cost_terms.latency = new_noc_cost.latency;
+
+        if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency_overrun,
+                                 costs->noc_cost_terms.latency_overrun,
+                                 "noc_latency_overrun_cost");
+        }
+        costs->noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun;
+
+        if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+            check_and_print_cost(new_noc_cost.congestion,
+                                 costs->noc_cost_terms.congestion,
+                                 "noc_congestion_cost");
         }
-        costs->noc_latency_cost = new_noc_latency_cost;
+        costs->noc_cost_terms.congestion = new_noc_cost.congestion;
     }
 }
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index dc16f342b6f..53d19c47ced 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -99,10 +99,10 @@ void reset_move_nets(int num_nets_affected);
  * @param costs
  */
 void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
-                                  const t_noc_opts& noc_opts,
-                                  const PlaceDelayModel* delay_model,
-                                  const PlacerCriticalities* criticalities,
-                                  t_placer_costs* costs);
+                                const t_noc_opts& noc_opts,
+                                const PlaceDelayModel* delay_model,
+                                const PlacerCriticalities* criticalities,
+                                t_placer_costs* costs);
 
 /**
  * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac

From 2a76d7980c261f0849375a73a3576b4b33c98fde Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jun 2024 15:24:48 -0400
Subject: [PATCH 173/188] [vpr][place] update bb_from_scratch to update layer
 too

---
 vpr/src/place/net_cost_handler.cpp | 32 ++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index f852c799ebc..03ac70db404 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1396,8 +1396,8 @@ static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
                                 vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
-    int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax;
-    int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
+    int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax, layer_min, layer_max;
+    int xmin_edge, xmax_edge, ymin_edge, ymax_edge, layer_min_edge, layer_max_edge;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
@@ -1411,18 +1411,25 @@ static void get_bb_from_scratch(ClusterNetId net_id,
         + physical_tile_type(bnum)->pin_width_offset[pnum];
     y = place_ctx.block_locs[bnum].loc.y
         + physical_tile_type(bnum)->pin_height_offset[pnum];
+    pin_layer = place_ctx.block_locs[bnum].loc.layer;
 
     x = max(min<int>(x, grid.width() - 2), 1);
     y = max(min<int>(y, grid.height() - 2), 1);
+    pin_layer = max(min<int>(pin_layer, grid.get_num_layers() - 1), 0);
 
     xmin = x;
     ymin = y;
+    layer_min = pin_layer;
     xmax = x;
     ymax = y;
+    layer_max = pin_layer;
+
     xmin_edge = 1;
     ymin_edge = 1;
+    layer_min_edge = 1;
     xmax_edge = 1;
     ymax_edge = 1;
+    layer_max_edge = 1;
 
     for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
         num_sink_pin_layer[layer_num] = 0;
@@ -1446,6 +1453,7 @@ static void get_bb_from_scratch(ClusterNetId net_id,
 
         x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
         y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
+        pin_layer = max(min<int>(pin_layer, grid.get_num_layers() - 1), 0);
 
         if (x == xmin) {
             xmin_edge++;
@@ -1473,6 +1481,19 @@ static void get_bb_from_scratch(ClusterNetId net_id,
             ymax_edge = 1;
         }
 
+        if (pin_layer == layer_min) {
+            layer_min_edge++;
+        }
+        if (pin_layer == layer_max) {
+            layer_max_edge++;
+        } else if (pin_layer < layer_min) {
+            layer_min = pin_layer;
+            layer_min_edge = 1;
+        } else if (pin_layer > layer_max) {
+            layer_max = pin_layer;
+            layer_max_edge = 1;
+        }
+
         num_sink_pin_layer[pin_layer]++;
     }
 
@@ -1482,11 +1503,18 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     coords.xmax = xmax;
     coords.ymin = ymin;
     coords.ymax = ymax;
+    coords.layer_min = layer_min;
+    coords.layer_max = layer_max;
+    VTR_ASSERT_DEBUG(layer_min >= 0 && layer_min < device_ctx.grid.get_num_layers());
+    VTR_ASSERT_DEBUG(layer_max >= 0 && layer_max < device_ctx.grid.get_num_layers());
+
 
     num_on_edges.xmin = xmin_edge;
     num_on_edges.xmax = xmax_edge;
     num_on_edges.ymin = ymin_edge;
     num_on_edges.ymax = ymax_edge;
+    num_on_edges.layer_min = layer_min_edge;
+    num_on_edges.layer_max = layer_max_edge;
 }
 
 /* This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e.   *

From a6d2fa40927c70f7be3acb3ab5414ad4bc8a6740 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jun 2024 15:31:14 -0400
Subject: [PATCH 174/188] [vpr][place] update get_non_updatable_bb to update
 layer

---
 vpr/src/place/net_cost_handler.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 03ac70db404..4a4e5cfbb66 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -686,11 +686,11 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
 static void get_non_updatable_bb(ClusterNetId net_id,
-                                 t_bb& bb_coord_new,
-                                 vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
+                                t_bb& bb_coord_new,
+                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
-    int xmax, ymax, xmin, ymin, x, y, layer;
+    int xmax, ymax, layer_max, xmin, ymin, layer_min, x, y, layer;
     int pnum;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -704,11 +704,14 @@ static void get_non_updatable_bb(ClusterNetId net_id,
         + physical_tile_type(bnum)->pin_width_offset[pnum];
     y = place_ctx.block_locs[bnum].loc.y
         + physical_tile_type(bnum)->pin_height_offset[pnum];
+    layer = place_ctx.block_locs[bnum].loc.layer;
 
     xmin = x;
     ymin = y;
+    layer_min = layer;
     xmax = x;
     ymax = y;
+    layer_max = layer;
 
     for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
         num_sink_pin_layer[layer_num] = 0;
@@ -735,6 +738,12 @@ static void get_non_updatable_bb(ClusterNetId net_id,
             ymax = y;
         }
 
+        if (layer < layer_min) {
+            layer_min = layer;
+        } else if (layer > layer_max) {
+            layer_max = layer;
+        }
+
         num_sink_pin_layer[layer]++;
     }
 
@@ -748,8 +757,10 @@ static void get_non_updatable_bb(ClusterNetId net_id,
 
     bb_coord_new.xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     bb_coord_new.ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new.layer_min = max(min<int>(layer_min, device_ctx.grid.get_num_layers() - 1), 0);
     bb_coord_new.xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     bb_coord_new.ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new.layer_max = max(min<int>(layer_max, device_ctx.grid.get_num_layers() - 1), 0);
 }
 
 static void get_non_updatable_layer_bb(ClusterNetId net_id,

From 8a8345e02f6e72d6a0dcdfccb0f70143daa97c4f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jun 2024 15:36:30 -0400
Subject: [PATCH 175/188] [vpr][place] initialize the num_sink_layer to zero
 per layer

---
 vpr/src/place/net_cost_handler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 4a4e5cfbb66..a94153bcde2 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -770,7 +770,7 @@ static void get_non_updatable_layer_bb(ClusterNetId net_id,
 
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
         num_sink_layer[layer_num] = 0;
     }
 

From 655409eb840ea96ece0989dc7e5a4b7bdca435a4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jun 2024 15:40:40 -0400
Subject: [PATCH 176/188] [vpr][place] update update_bb to update layer info

---
 vpr/src/place/net_cost_handler.cpp | 73 +++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index a94153bcde2..80ac56c4959 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -860,8 +860,10 @@ static void update_bb(ClusterNetId net_id,
 
     pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_new_loc.layer_num = max(min<int>(pin_new_loc.layer_num, device_ctx.grid.get_num_layers() - 1), 0);
     pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_old_loc.layer_num = max(min<int>(pin_old_loc.layer_num, device_ctx.grid.get_num_layers() - 1), 0);
 
     /* Check if the net had been updated before. */
     if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
@@ -897,7 +899,7 @@ static void update_bb(ClusterNetId net_id,
                 bb_edge_new.xmax = curr_bb_edge->xmax - 1;
                 bb_coord_new.xmax = curr_bb_coord->xmax;
             }
-        } else { /* Move to left, old postion was not at xmax. */
+        } else { /* Move to left, old position was not at xmax. */
             bb_coord_new.xmax = curr_bb_coord->xmax;
             bb_edge_new.xmax = curr_bb_edge->xmax;
         }
@@ -1041,6 +1043,75 @@ static void update_bb(ClusterNetId net_id,
                 num_sink_pin_layer_new[pin_new_loc.layer_num] = (curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1;
             }
         }
+
+        if (pin_new_loc.layer_num < pin_old_loc.layer_num) {
+            if (pin_old_loc.layer_num == curr_bb_coord->layer_max) {
+                if (curr_bb_edge->layer_max == 1) {
+                    get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                    bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                    return;
+                } else {
+                    bb_edge_new.layer_max = curr_bb_edge->layer_max - 1;
+                    bb_coord_new.layer_max = curr_bb_coord->layer_max;
+                }
+            } else {
+                bb_coord_new.layer_max = curr_bb_coord->layer_max;
+                bb_edge_new.layer_max = curr_bb_edge->layer_max;
+            }
+
+
+            if (pin_new_loc.layer_num < curr_bb_coord->layer_min) {
+                bb_coord_new.layer_min = pin_new_loc.layer_num;
+                bb_edge_new.layer_min = 1;
+            } else if (pin_new_loc.layer_num == curr_bb_coord->layer_min) {
+                bb_coord_new.layer_min = pin_new_loc.layer_num;
+                bb_edge_new.layer_min = curr_bb_edge->layer_min + 1;
+            } else {
+                bb_coord_new.layer_min = curr_bb_coord->layer_min;
+                bb_edge_new.layer_min = curr_bb_edge->layer_min;
+            }
+
+        } else if (pin_new_loc.layer_num > pin_old_loc.layer_num) {
+
+
+            if (pin_old_loc.layer_num == curr_bb_coord->layer_min) {
+                if (curr_bb_edge->layer_min == 1) {
+                    get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                    bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                    return;
+                } else {
+                    bb_edge_new.layer_min = curr_bb_edge->layer_min - 1;
+                    bb_coord_new.layer_min = curr_bb_coord->layer_min;
+                }
+            } else {
+                bb_coord_new.layer_min = curr_bb_coord->layer_min;
+                bb_edge_new.layer_min = curr_bb_edge->layer_min;
+            }
+
+            if (pin_new_loc.layer_num > curr_bb_coord->layer_max) {
+                bb_coord_new.layer_max = pin_new_loc.layer_num;
+                bb_edge_new.layer_max = 1;
+            } else if (pin_new_loc.layer_num == curr_bb_coord->layer_max) {
+                bb_coord_new.layer_max = pin_new_loc.layer_num;
+                bb_edge_new.layer_max = curr_bb_edge->layer_max + 1;
+            } else {
+                bb_coord_new.layer_max = curr_bb_coord->layer_max;
+                bb_edge_new.layer_max = curr_bb_edge->layer_max;
+            }
+
+
+        } else {
+            bb_coord_new.layer_min = curr_bb_coord->layer_min;
+            bb_coord_new.layer_max = curr_bb_coord->layer_max;
+            bb_edge_new.layer_min = curr_bb_edge->layer_min;
+            bb_edge_new.layer_max = curr_bb_edge->layer_max;
+        }
+
+    } else {
+        bb_coord_new.layer_min = curr_bb_coord->layer_min;
+        bb_coord_new.layer_max = curr_bb_coord->layer_max;
+        bb_edge_new.layer_min = curr_bb_edge->layer_min;
+        bb_edge_new.layer_max = curr_bb_edge->layer_max;
     }
 
     if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {

From 38e6b31bb32b0b6f84321938f9691ef76fbad229 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jun 2024 18:15:06 -0400
Subject: [PATCH 177/188] [vpr][place] get pin direction from get_pin_type_...

---
 vpr/src/place/net_cost_handler.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 80ac56c4959..4b1785ace73 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -122,10 +122,11 @@ static void update_net_bb(const ClusterNetId& net,
  * @param blk_pin
  * @param pl_moved_block
  */
-static void update_net_layer_bb(const ClusterNetId& net,
-                                const ClusterBlockId& blk,
-                                const ClusterPinId& blk_pin,
-                                const t_pl_moved_block& pl_moved_block);
+static void update_net_layer_bb(const ClusterNetId net,
+                                const t_pl_blocks_to_be_moved& blocks_affected,
+                                int iblk,
+                                const ClusterBlockId blk,
+                                const ClusterPinId blk_pin);
 
 /**
  * @brief Calculate the new connection delay and timing cost of all the
@@ -503,12 +504,13 @@ static void update_net_layer_bb(const ClusterNetId& net,
     } else {
         //For large nets, update bounding box incrementally
         int iblk_pin = tile_pin_index(blk_pin);
-        bool src_pin = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER;
 
         t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
         int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
         int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
 
+        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
+
         //Incremental bounding box update
         update_layer_bb(net,
                         layer_ts_bb_edge_new[net],
@@ -520,7 +522,7 @@ static void update_net_layer_bb(const ClusterNetId& net,
                         {pl_moved_block.new_loc.x + pin_width_offset,
                          pl_moved_block.new_loc.y + pin_height_offset,
                          pl_moved_block.new_loc.layer},
-                        src_pin);
+                        pin_dir == e_pin_type::DRIVER);
     }
 }
 

From ccffd514ddaaf8e9e4107ad32e66513c18e67cda Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jun 2024 18:18:29 -0400
Subject: [PATCH 178/188] [vpr][place] fix update_net_layer declaration

---
 vpr/src/place/net_cost_handler.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 4b1785ace73..4bb1ac66471 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -122,11 +122,10 @@ static void update_net_bb(const ClusterNetId& net,
  * @param blk_pin
  * @param pl_moved_block
  */
-static void update_net_layer_bb(const ClusterNetId net,
-                                const t_pl_blocks_to_be_moved& blocks_affected,
-                                int iblk,
-                                const ClusterBlockId blk,
-                                const ClusterPinId blk_pin);
+static void update_net_layer_bb(const ClusterNetId& net,
+                                const ClusterBlockId& blk,
+                                const ClusterPinId& blk_pin,
+                                const t_pl_moved_block& pl_moved_block);
 
 /**
  * @brief Calculate the new connection delay and timing cost of all the

From b9589642e12ec87130fba88e959267364a3f6a96 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 20 Jun 2024 11:45:02 -0400
Subject: [PATCH 179/188] [vpr][place] remove the parts for reclustering

---
 libs/libarchfpga/src/physical_types_util.cpp  |  33 -----
 libs/libarchfpga/src/physical_types_util.h    |   3 -
 vpr/src/base/SetupVPR.cpp                     |   1 -
 vpr/src/base/read_options.cpp                 |   9 --
 vpr/src/base/read_options.h                   |   1 -
 vpr/src/base/vpr_types.cpp                    |  27 ----
 vpr/src/base/vpr_types.h                      |  39 ------
 .../atom_critical_uniform_move_generator.cpp  |  62 ---------
 .../atom_critical_uniform_move_generator.h    |  25 ----
 vpr/src/place/move_transactions.cpp           |  48 -------
 vpr/src/place/move_transactions.h             |  38 ------
 vpr/src/place/move_utils.cpp                  | 126 ------------------
 vpr/src/place/move_utils.h                    |  17 ---
 vpr/src/place/net_cost_handler.cpp            |  81 -----------
 vpr/src/place/net_cost_handler.h              |  19 ---
 vpr/src/place/place.cpp                       |  11 --
 vpr/src/place/place_constraints.h             |  19 ---
 vpr/src/place/place_re_cluster.cpp            | 103 --------------
 vpr/src/place/place_re_cluster.h              |  19 ---
 vpr/src/place/place_util.cpp                  |   9 --
 vpr/src/place/place_util.h                    |   5 -
 vpr/src/util/vpr_utils.cpp                    |  39 ------
 vpr/src/util/vpr_utils.h                      |   3 -
 23 files changed, 737 deletions(-)
 delete mode 100644 vpr/src/place/atom_critical_uniform_move_generator.cpp
 delete mode 100644 vpr/src/place/atom_critical_uniform_move_generator.h
 delete mode 100644 vpr/src/place/place_re_cluster.cpp
 delete mode 100644 vpr/src/place/place_re_cluster.h

diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index caaaa69c313..43a0fbc54da 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -524,39 +524,6 @@ bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_bl
     return capacity_compatible && is_tile_compatible(physical_tile, logical_block);
 }
 
-bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num) {
-    VTR_ASSERT(loc_primitive_num != OPEN);
-    const t_pb_graph_node* loc_pb_graph_node = nullptr;
-
-    // Check whether the atom
-    const t_pb_graph_node* parent_pb_graph_node = atom_pb_graph_node->parent_pb_graph_node;
-    while (parent_pb_graph_node->parent_pb_graph_node != nullptr) {
-        parent_pb_graph_node = parent_pb_graph_node->parent_pb_graph_node;
-    }
-
-    if (logical_block->pb_graph_head != parent_pb_graph_node) {
-        return false;
-    }
-    /**
-     * Iterate over the data structure that maps primitive_pb_graph_node to their respective class range,
-     * and retrieve the primitive_pb_graph_node from that map. If the primitive number assigned to that
-     * primitive_pb_graph_node is equal to loc_primitive_num, then we have found the desired primitive_pb_graph_node.
-     */
-    for (const auto& primitive_node_class_pair : logical_block->primitive_pb_graph_node_class_range) {
-        const auto& primitive_node = primitive_node_class_pair.first;
-        VTR_ASSERT_SAFE(primitive_node->primitive_num != OPEN);
-        if (primitive_node->primitive_num == loc_primitive_num) {
-            loc_pb_graph_node = primitive_node;
-            break;
-        }
-    }
-    VTR_ASSERT_SAFE(loc_pb_graph_node != nullptr);
-    if (loc_pb_graph_node->pb_type == atom_pb_graph_node->pb_type)
-        return true;
-    else
-        return false;
-}
-
 int get_physical_pin_at_sub_tile_location(t_physical_tile_type_ptr physical_tile,
                                           t_logical_block_type_ptr logical_block,
                                           int sub_tile_capacity,
diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index 0a2118db4df..4d9c3013682 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -187,9 +187,6 @@ bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_
 ///@brief Verifies whether a logical block and a relative placement location is compatible with a given physical tile
 bool is_sub_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_type_ptr logical_block, int sub_tile_loc);
 
-///@brief Verifies whether the given atom_pb_graph_node can be mapped to the primitive that loc_primitive_num is pointing to.
-bool is_atom_compatible(t_logical_block_type_ptr logical_block, const t_pb_graph_node* atom_pb_graph_node, int loc_primitive_num);
-
 /**
  * @brief Returns the first physical tile type that matches the logical block
  *
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index f00a8a3a162..d05f7d5d7da 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -693,7 +693,6 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->place_constraint_subtile = Options.place_constraint_subtile;
     PlacerOpts->floorplan_num_horizontal_partitions = Options.floorplan_num_horizontal_partitions;
     PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions;
-    PlacerOpts->place_re_cluster = Options.place_re_cluster;
 
     PlacerOpts->seed = Options.Seed;
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index d67201da59c..454caae26cb 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2200,15 +2200,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .choices({"move_type", "move_block_type"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument<bool, ParseOnOff>(args.place_re_cluster, "--place_re_cluster")
-        .help(
-            "Use this option to determine whether reclustering occurs during placement. "
-            ""
-            "When this option is set to 'on,' the placement stage may change some clusters. "
-            "Conversely, if the option is set to 'off,' the clustering determined by the packer will remain unchanged")
-        .default_value("off")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
     place_grp.add_argument(args.placer_debug_block, "--placer_debug_block")
         .help(
             " Controls when placer debugging is enabled for blocks.\n"
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 841de9dfe7d..a2dcc49d244 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -151,7 +151,6 @@ struct t_options {
     argparse::ArgValue<bool> place_constraint_subtile;
     argparse::ArgValue<int> floorplan_num_horizontal_partitions;
     argparse::ArgValue<int> floorplan_num_vertical_partitions;
-    argparse::ArgValue<bool> place_re_cluster;
 
     argparse::ArgValue<int> placer_debug_block;
     argparse::ArgValue<int> placer_debug_net;
diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp
index 1642561db37..475a9a30d19 100644
--- a/vpr/src/base/vpr_types.cpp
+++ b/vpr/src/base/vpr_types.cpp
@@ -557,31 +557,4 @@ void t_cluster_placement_stats::free_primitives() {
             delete primitive.second;
         }
     }
-}
-
-/**
- * @brief Get the atom block id at the given location. Since we currently don't have any array to retrieve this information directly,
- * we first find the cluster mapped to that location, and then find the atom inside that cluster that is mapped to the given location.
- */
-AtomBlockId GridBlock::block_at_location(const t_pl_atom_loc& loc) const {
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    t_pl_loc cluster_loc(loc.x, loc.y, loc.sub_tile, loc.layer);
-    ClusterBlockId cluster_at_loc = block_at_location(cluster_loc);
-    if (cluster_at_loc == EMPTY_BLOCK_ID) {
-        return EMPTY_PRIMITIVE_BLOCK_ID;
-    } else if (cluster_at_loc == INVALID_BLOCK_ID) {
-        return INVALID_PRIMITIVE_BLOCK_ID;
-    } else {
-        VTR_ASSERT(cluster_at_loc.is_valid());
-        const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup;
-        const auto& atom_list = cluster_atoms.at(cluster_at_loc);
-        for (const auto& atom : atom_list) {
-            int primitive_pin = atom_lookup.atom_pb_graph_node(atom)->primitive_num;
-            t_pl_atom_loc atom_loc(primitive_pin, cluster_loc.x, cluster_loc.y, cluster_loc.sub_tile, cluster_loc.layer);
-            if (atom_loc == loc) {
-                return atom;
-            }
-        }
-        return EMPTY_PRIMITIVE_BLOCK_ID;
-    }
 }
\ No newline at end of file
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 4cf11b10a82..9cf9119329b 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -98,9 +98,7 @@ enum class ScreenUpdatePriority {
 
 /* Defining macros for the placement_ctx t_grid_blocks. Assumes that ClusterBlockId's won't exceed positive 32-bit integers */
 constexpr auto EMPTY_BLOCK_ID = ClusterBlockId(-1);
-constexpr auto EMPTY_PRIMITIVE_BLOCK_ID = AtomBlockId(-1);
 constexpr auto INVALID_BLOCK_ID = ClusterBlockId(-2);
-constexpr auto INVALID_PRIMITIVE_BLOCK_ID = AtomBlockId(-2);
 
 /*
  * Files
@@ -814,40 +812,6 @@ struct hash<t_pl_loc> {
 };
 } // namespace std
 
-struct t_pl_atom_loc {
-    t_pl_atom_loc() = default;
-    t_pl_atom_loc(int primitive_id_, int x_, int y_, int sub_tile_, int layer_)
-        : primitive_id(primitive_id_)
-        , x(x_)
-        , y(y_)
-        , sub_tile(sub_tile_)
-        , layer(layer_) {}
-
-    int primitive_id = OPEN;
-    int x = OPEN;
-    int y = OPEN;
-    int sub_tile = OPEN;
-    int layer = OPEN;
-};
-
-inline bool operator==(const t_pl_atom_loc& lhs, const t_pl_atom_loc& rhs) {
-    return std::tie(lhs.primitive_id, lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.primitive_id, rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
-}
-
-namespace std {
-template<>
-struct hash<t_pl_atom_loc> {
-    std::size_t operator()(const t_pl_atom_loc& v) const noexcept {
-        std::size_t seed = std::hash<int>{}(v.x);
-        vtr::hash_combine(seed, v.y);
-        vtr::hash_combine(seed, v.sub_tile);
-        vtr::hash_combine(seed, v.layer);
-        vtr::hash_combine(seed, v.primitive_id);
-        return seed;
-    }
-};
-} // namespace std
-
 struct t_place_region {
     float capacity; ///<Capacity of this region, in tracks.
     float inv_capacity;
@@ -912,8 +876,6 @@ class GridBlock {
         return grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile];
     }
 
-    AtomBlockId block_at_location(const t_pl_atom_loc& loc) const;
-
     inline size_t num_blocks_at_location(const t_physical_tile_loc& loc) const {
         return grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.size();
     }
@@ -1316,7 +1278,6 @@ struct t_placer_opts {
     int floorplan_num_horizontal_partitions;
     int floorplan_num_vertical_partitions;
 
-    bool place_re_cluster;
     int placer_debug_block;
     int placer_debug_net;
 
diff --git a/vpr/src/place/atom_critical_uniform_move_generator.cpp b/vpr/src/place/atom_critical_uniform_move_generator.cpp
deleted file mode 100644
index 17343412558..00000000000
--- a/vpr/src/place/atom_critical_uniform_move_generator.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "atom_critical_uniform_move_generator.h"
-#include "globals.h"
-#include "place_constraints.h"
-
-static std::pair<ClusterBlockId, AtomBlockId> getCriticalAtomBlock();
-
-e_create_move AtomCriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */, float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) {
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    ClusterBlockId cluster_blk_id = ClusterBlockId::INVALID();
-    AtomBlockId atom_blk_id = AtomBlockId::INVALID();
-    std::tie(cluster_blk_id, atom_blk_id) = getCriticalAtomBlock();
-
-    if (cluster_blk_id == ClusterBlockId::INVALID() || atom_blk_id == AtomBlockId::INVALID()) {
-        return e_create_move::ABORT; // Not a valid block
-    }
-
-    t_pl_loc from = place_ctx.block_locs[cluster_blk_id].loc;
-    auto cluster_from_type = cluster_ctx.clb_nlist.block_type(cluster_blk_id);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
-    VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
-
-    t_pl_loc to;
-
-    if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, cluster_blk_id)) {
-        return e_create_move::ABORT;
-    }
-
-    e_create_move create_move = ::create_move(blocks_affected, cluster_blk_id, to);
-
-    //Check that all of the blocks affected by the move would still be in a legal floorplan region after the swap
-    if (!floorplan_legal(blocks_affected)) {
-        return e_create_move::ABORT;
-    }
-
-    return create_move;
-}
-
-static std::pair<ClusterBlockId, AtomBlockId> getCriticalAtomBlock() {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const auto& cluster_netlist = cluster_ctx.clb_nlist;
-    const auto& atom_netlist = g_vpr_ctx.atom().nlist;
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    const auto& place_move_ctx = g_placer_ctx.move();
-    const auto& place_ctx = g_vpr_ctx.placement();
-    /* Pick a random block to be swapped with another random block.   */
-    // pick it from the highly critical blocks
-    if (place_move_ctx.highly_crit_pins.empty()) {
-        return std::make_pair(ClusterBlockId::INVALID(), AtomBlockId::INVALID()); //No critical block
-    }
-    std::pair<ClusterNetId, int> crit_cluster_net_pin = place_move_ctx.highly_crit_pins[vtr::irand(place_move_ctx.highly_crit_pins.size() - 1)];
-    ClusterBlockId cluster_crit_blk = cluster_netlist.net_driver_block(crit_cluster_net_pin.first);
-    if (place_ctx.block_locs[cluster_crit_blk].is_fixed) {
-        return std::make_pair(ClusterBlockId::INVALID(), AtomBlockId::INVALID()); //Block is fixed, cannot move
-    }
-
-    AtomNetId atom_crit_net = atom_lookup.atom_net(crit_cluster_net_pin.first);
-    AtomBlockId atom_crit_blk = atom_netlist.net_driver_block(atom_crit_net);
-
-    return std::make_pair(cluster_crit_blk, atom_crit_blk);
-}
diff --git a/vpr/src/place/atom_critical_uniform_move_generator.h b/vpr/src/place/atom_critical_uniform_move_generator.h
deleted file mode 100644
index 4cfd8b31c84..00000000000
--- a/vpr/src/place/atom_critical_uniform_move_generator.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//
-// Created by amin on 5/10/23.
-//
-
-#ifndef VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
-#define VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
-
-#include "move_generator.h"
-#include "timing_place.h"
-
-/**
- * @file
- * @author Amin Mohaghegh
- * @brief Primitive critical uniform move type
- *
- * This move picks a random block from the the critical blocks (those with one or more critical nets)
- * and moves it (swapping with what's there if necessary) to a random location within rlim units
- * away in the x and y dimensions in the compressed block grid.
- *
- * Returns its choices by filling in affected_blocks.
- */
-class AtomCriticalUniformMoveGenerator : public MoveGenerator {
-    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /* proposed_action */, float rlim, const t_placer_opts& /* placer_opts */, const PlacerCriticalities* /* criticalities */) override;
-};
-#endif //VTR_PRIMITIVE_CRITICAL_UNIFORM_MOVE_GENERATOR_H
diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index c80b2fc3488..f36dd4d5e39 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -3,35 +3,6 @@
 #include "globals.h"
 #include "place_util.h"
 
-e_block_move_result record_block_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId blk, t_pl_atom_loc to) {
-    auto res = blocks_affected.moved_to.emplace(to);
-    if (!res.second) {
-        log_move_abort("duplicate block move to location");
-        return e_block_move_result::ABORT;
-    }
-
-    const auto& place_ctx = g_vpr_ctx.placement();
-
-    t_pl_atom_loc from = get_atom_loc(blk);
-
-    auto res2 = blocks_affected.moved_from.emplace(from);
-    if (!res2.second) {
-        log_move_abort("duplicate block move from location");
-        return e_block_move_result::ABORT;
-    }
-
-    VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer})));
-
-    // Sets up the blocks moved
-    int imoved_blk = blocks_affected.num_moved_blocks;
-    blocks_affected.moved_blocks[imoved_blk].block_num = blk;
-    blocks_affected.moved_blocks[imoved_blk].old_loc = from;
-    blocks_affected.moved_blocks[imoved_blk].new_loc = to;
-    blocks_affected.num_moved_blocks++;
-
-    return e_block_move_result::VALID;
-}
-
 //Records that block 'blk' should be moved to the specified 'to' location
 e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId blk, t_pl_loc to) {
     auto res = blocks_affected.moved_to.emplace(to);
@@ -62,21 +33,6 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
     return e_block_move_result::VALID;
 }
 
-//Moves the blocks in blocks_affected to their new locations
-void apply_move_blocks(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    std::set<ClusterBlockId> seen_clusters;
-    const int num_moved_blocks = blocks_affected.num_moved_blocks;
-    for (int blk_idx = 0; blk_idx < num_moved_blocks; blk_idx++) {
-        AtomBlockId atom_blk = blocks_affected.moved_blocks[blk_idx].block_num;
-        ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom_blk);
-        if (seen_clusters.find(cluster_blk) == seen_clusters.end()) {
-            seen_clusters.insert(cluster_blk);
-            place_sync_external_block_connections(cluster_blk);
-        }
-    }
-}
-
 //Moves the blocks in blocks_affected to their new locations
 void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -135,10 +91,6 @@ void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
     } // Finish updating clb for all blocks
 }
 
-void revert_move_blocks(t_pl_atom_blocks_to_be_moved& blocks_affected) {
-    //TODO: this function needs to be implemented
-}
-
 //Moves the blocks in blocks_affected to their old locations
 void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index 61f84316c94..27dd2b1b3c6 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -19,12 +19,6 @@ struct t_pl_moved_block {
     t_pl_loc new_loc;
 };
 
-struct t_pl_moved_atom_block {
-    AtomBlockId block_num;
-    t_pl_atom_loc old_loc;
-    t_pl_atom_loc new_loc;
-};
-
 /* Stores the list of cluster blocks to be moved in a swap during       *
  * placement.                                                   *
  * Store the information on the blocks to be moved in a swap during     *
@@ -51,32 +45,6 @@ struct t_pl_blocks_to_be_moved {
     std::vector<ClusterPinId> affected_pins;
 };
 
-/* Stores the list of atom blocks to be moved in a swap during       *
- * placement.                                                   *
- * Store the information on the blocks to be moved in a swap during     *
- * placement, in the form of array of structs instead of struct with    *
- * arrays for cache efficiently                                          *
- *
- * num_moved_blocks: total number of blocks moved when          *
- *                   swapping two blocks.                       *
- * moved blocks: a list of moved blocks data structure with     *
- *               information on the move.                       *
- *               [0...max_blocks-1]                       *
- * affected_pins: pins affected by this move (used to           *
- *                incrementally invalidate parts of the timing  *
- *                graph.                                        */
-struct t_pl_atom_blocks_to_be_moved {
-    t_pl_atom_blocks_to_be_moved(size_t max_blocks)
-        : moved_blocks(max_blocks) {}
-
-    int num_moved_blocks = 0;
-    std::vector<t_pl_moved_atom_block> moved_blocks;
-    std::unordered_set<t_pl_atom_loc> moved_from;
-    std::unordered_set<t_pl_atom_loc> moved_to;
-
-    std::vector<AtomPinId> affected_pins;
-};
-
 enum class e_block_move_result {
     VALID,       //Move successful
     ABORT,       //Unable to perform move
@@ -84,18 +52,12 @@ enum class e_block_move_result {
     INVERT_VALID //Completed inverted move
 };
 
-e_block_move_result record_block_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId blk, t_pl_atom_loc to);
-
 e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId blk, t_pl_loc to);
 
-void apply_move_blocks(const t_pl_atom_blocks_to_be_moved& blocks_affected);
-
 void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);
 
 void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);
 
-void revert_move_blocks(t_pl_atom_blocks_to_be_moved& blocks_affected);
-
 void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected);
 
 void clear_move_blocks(t_pl_blocks_to_be_moved& blocks_affected);
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 1cbd7ef0f2d..3ec00c26970 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -38,19 +38,6 @@ void report_aborted_moves() {
     }
 }
 
-e_create_move create_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to) {
-    e_block_move_result outcome = find_affected_blocks(blocks_affected, b_from, to);
-    // Currently, for re-clustering during placement, we don't support INVERT
-    VTR_ASSERT(outcome != e_block_move_result::INVERT || outcome != e_block_move_result::INVERT_VALID);
-
-    if (outcome == e_block_move_result::VALID) {
-        return e_create_move::VALID;
-    } else {
-        VTR_ASSERT(outcome == e_block_move_result::ABORT);
-        return e_create_move::ABORT;
-    }
-}
-
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
     e_block_move_result outcome = find_affected_blocks(blocks_affected, b_from, to);
 
@@ -84,35 +71,6 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock
     }
 }
 
-e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc) {
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    e_block_move_result outcome = e_block_move_result::VALID;
-
-    ClusterBlockId from_cluster_block = atom_lookup.atom_clb(b_from);
-    VTR_ASSERT(from_cluster_block.is_valid());
-
-    //TODO: Currently, if the atom belong to a cluster that is a part of a macro, we don't move it
-    const auto& pl_macros = g_vpr_ctx.placement().pl_macros;
-    int imacro = OPEN;
-    get_imacro_from_iblk(&imacro, from_cluster_block, pl_macros);
-    if (imacro != OPEN) {
-        return e_block_move_result::ABORT;
-    } else {
-        const auto& grid_blocks = g_vpr_ctx.placement().grid_blocks;
-        AtomBlockId to_atom = grid_blocks.block_at_location(to_loc);
-        if (to_atom.is_valid()) {
-            ClusterBlockId to_cluster_block = atom_lookup.atom_clb(to_atom);
-            get_imacro_from_iblk(&imacro, to_cluster_block, pl_macros);
-            if (imacro != OPEN) {
-                return e_block_move_result::ABORT;
-            }
-        }
-    }
-
-    outcome = record_single_block_swap(atom_blocks_affected, b_from, to_loc);
-    return outcome;
-}
-
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
     /* Finds and set ups the affected_blocks array.
      * Returns abort_swap. */
@@ -160,43 +118,6 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte
     return outcome;
 }
 
-e_block_move_result record_single_block_swap(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc) {
-    VTR_ASSERT(b_from);
-    ClusterBlockId cluster_b_from = g_vpr_ctx.atom().lookup.atom_clb(b_from);
-
-    const auto& place_ctx = g_vpr_ctx.placement();
-
-    if (place_ctx.block_locs[cluster_b_from].is_fixed) {
-        return e_block_move_result::ABORT;
-    }
-
-    VTR_ASSERT_SAFE(to_loc.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to_loc.x, to_loc.y, to_loc.layer})));
-
-    e_block_move_result outcome = e_block_move_result::ABORT;
-
-    AtomBlockId b_to = place_ctx.grid_blocks.block_at_location(to_loc);
-
-    if (b_to == EMPTY_PRIMITIVE_BLOCK_ID) {
-        outcome = record_block_move(blocks_affected, b_from, to_loc);
-    } else if (b_to != INVALID_PRIMITIVE_BLOCK_ID) {
-        ClusterBlockId cluster_b_to = g_vpr_ctx.atom().lookup.atom_clb(b_to);
-        if (!(is_legal_swap_to_location(b_to, to_loc)) || place_ctx.block_locs[cluster_b_to].is_fixed) {
-            return e_block_move_result::ABORT;
-        }
-
-        outcome = record_block_move(blocks_affected, b_from, to_loc);
-
-        if (outcome != e_block_move_result::VALID) {
-            return outcome;
-        }
-
-        t_pl_atom_loc from_atom_loc = get_atom_loc(b_from);
-        outcome = record_block_move(blocks_affected, b_to, from_atom_loc);
-    }
-
-    return outcome;
-}
-
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) {
     /* Find all the blocks affected when b_from is swapped with b_to.
      * Returns abort_swap.                  */
@@ -524,53 +445,6 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe
     return outcome;
 }
 
-bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to) {
-    //Make sure that the swap_to location is valid
-    //It must be:
-    // * on chip, and
-    // * match the correct block type
-    //
-    //Note that we need to explicitly check that the types match, since the device floorplan is not
-    //(neccessarily) translationally invariant for an arbitrary macro
-
-    const auto& place_ctx = g_vpr_ctx.placement();
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    const auto& atom_pb = g_vpr_ctx.atom().lookup.atom_pb(blk);
-
-    ClusterBlockId from_cluster_block = atom_lookup.atom_clb(blk);
-    t_pl_loc to_cluster_loc(to.x, to.y, to.sub_tile, to.layer);
-    ClusterBlockId to_cluster_block = place_ctx.grid_blocks.block_at_location(to_cluster_loc);
-
-
-    // If the clusters cannot be swapped return false
-    if (!is_legal_swap_to_location(from_cluster_block, to_cluster_loc)) {
-        return false;
-    }
-
-    // Check legality issues specific to atoms
-    std::vector<t_logical_block_type_ptr> logical_blocks;
-
-    // If there is already a block at the destination, the only logical block there is the logical block of that particular cluster.
-    // If there isn't any, all logical blocks compatible to that sub_tile should be considered.
-    if (to_cluster_block.is_valid() && to_cluster_block != INVALID_BLOCK_ID) {
-        const auto& cluster_ctx = g_vpr_ctx.clustering();
-        auto logical_block = cluster_ctx.clb_nlist.block_type(to_cluster_block);
-        logical_blocks.push_back(logical_block);
-    } else if (to_cluster_block == EMPTY_BLOCK_ID) {
-        const auto& physical_tile = g_vpr_ctx.device().grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer));
-        const auto& sub_tile = physical_tile->sub_tiles[to.sub_tile];
-        logical_blocks = sub_tile.equivalent_sites;
-    }
-
-    for (const auto& logical_block : logical_blocks) {
-        if (is_atom_compatible(logical_block, atom_pb->pb_graph_node, to.primitive_id)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
     //Make sure that the swap_to location is valid
     //It must be:
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 3978f546b38..24cce99769f 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -93,14 +93,8 @@ void log_move_abort(std::string_view reason);
 //Prints a breif report about aborted move reasons and counts
 void report_aborted_moves();
 
-e_create_move create_move(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to);
-
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
-// Update atom_blocks_affects with the information about blocks which will be moved if b_from is to be moved to t_loc. Return Valid if the move is legal.
-// Currently, this function is much more limited compare to cluster one. It only supports single block move.
-e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);
-
 /**
  * @brief Find the blocks that will be affected by a move of b_from to to_loc
  * @param blocks_affected
@@ -111,9 +105,6 @@ e_block_move_result find_affected_blocks(t_pl_atom_blocks_to_be_moved& atom_bloc
  */
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
-// Update blocks affected if neither b_from nor the block at to_loc (if there is any) is part of a macro
-e_block_move_result record_single_block_swap(t_pl_atom_blocks_to_be_moved& blocks_affected, AtomBlockId b_from, t_pl_atom_loc to_loc);
-
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
 e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro_from, int& imember_from, t_pl_offset swap_offset);
@@ -126,14 +117,6 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected,
 e_block_move_result identify_macro_self_swap_affected_macros(std::vector<int>& macros, const int imacro, t_pl_offset swap_offset);
 e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro, t_pl_offset swap_offset);
 
-/**
- * @brief Check whether the "to" location is legal for the given "blk"
- * @param blk
- * @param to
- * @return
- */
-bool is_legal_swap_to_location(AtomBlockId blk, t_pl_atom_loc to);
-
 /**
  * @brief Check whether the "to" location is legal for the given "blk"
  * @param blk
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 4bb1ac66471..6bb24208503 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -84,15 +84,6 @@ static vtr::Matrix<int> ts_layer_sink_pin_count;
 /* [0...num_afftected_nets] -> net_id of the affected nets */
 static std::vector<ClusterNetId> ts_nets_to_update;
 
-
-/**
- * @param net
- * @param moved_blocks
- * @return True if the driver block of the net is among the moving blocks
- */
-static bool driven_by_moved_block(const AtomNetId net,
-                                  const std::vector<t_pl_moved_atom_block>& moved_blocks);
-
 /**
  * @param net
  * @param moved_blocks
@@ -408,21 +399,6 @@ static double wirelength_crossing_count(size_t fanout);
  */
 static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c);
 
-//Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
-static bool driven_by_moved_block(const AtomNetId net,
-                                  const std::vector<t_pl_moved_atom_block>& moved_blocks) {
-    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
-    bool is_driven_by_move_blk;
-    AtomBlockId net_driver_block = atom_nlist.net_driver_block(
-        net);
-
-    is_driven_by_move_blk = std::any_of(moved_blocks.begin(), moved_blocks.end(), [&net_driver_block](const auto& move_blk) {
-        return net_driver_block == move_blk.block_num;
-    });
-
-    return is_driven_by_move_blk;
-}
-
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const int num_blocks,
@@ -1878,63 +1854,6 @@ static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c) {
     }
 }
 
-int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_atom_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c) {
-    const auto& atom_look_up = g_vpr_ctx.atom().lookup;
-    const auto& atom_nlist = g_vpr_ctx.atom().nlist;
-
-    VTR_ASSERT_SAFE(bb_delta_c == 0.);
-    VTR_ASSERT_SAFE(timing_delta_c == 0.);
-
-    int num_affected_nets = 0;
-
-    std::vector<ClusterPinId> affected_pins;
-
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        AtomBlockId atom_blk_id = blocks_affected.moved_blocks[iblk].block_num;
-        ClusterBlockId cluster_blk_id = atom_look_up.atom_clb(atom_blk_id);
-        const auto& atom_old_loc = blocks_affected.moved_blocks[iblk].old_loc;
-        const auto& atom_new_loc = blocks_affected.moved_blocks[iblk].new_loc;
-
-        for (const AtomPinId& atom_pin : atom_nlist.block_pins(atom_blk_id)) {
-            auto cluster_pins = cluster_pins_connected_to_atom_pin(atom_pin);
-            for (const auto& cluster_pin : cluster_pins) {
-                bool is_src_moving = false;
-                if (atom_nlist.pin_type(atom_pin) == PinType::SINK) {
-                    AtomNetId net_id = atom_nlist.pin_net(atom_pin);
-                    is_src_moving = driven_by_moved_block(net_id, blocks_affected.moved_blocks);
-                }
-                t_pl_moved_block move_cluster_inf;
-                move_cluster_inf.block_num = cluster_blk_id;
-                move_cluster_inf.old_loc = t_pl_loc(atom_old_loc.x, atom_old_loc.y, atom_old_loc.sub_tile, atom_old_loc.layer);
-                move_cluster_inf.new_loc = t_pl_loc(atom_new_loc.x, atom_new_loc.y, atom_new_loc.sub_tile, atom_new_loc.layer);
-                update_net_info_on_pin_move(place_algorithm,
-                                            delay_model,
-                                            criticalities,
-                                            cluster_blk_id,
-                                            cluster_pin,
-                                            move_cluster_inf,
-                                            affected_pins,
-                                            timing_delta_c,
-                                            num_affected_nets,
-                                            is_src_moving);
-            }
-        }
-    }
-
-    /* Now update the bounding box costs (since the net bounding     *
-     * boxes are up-to-date). The cost is only updated once per net. */
-    set_bb_delta_cost(num_affected_nets, bb_delta_c);
-
-
-    return num_affected_nets;
-}
-
 /**
  * @brief Find all the nets and pins affected by this swap and update costs.
  *
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 53d19c47ced..4019c19c7eb 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -9,25 +9,6 @@ enum e_cost_methods {
     CHECK
 };
 
-/**
- * @brief Update the wire length and timing cost of the blocks (ts and proposed_* data structures) and set
- * the delta costs in bb_delta_c and timing_delta_c. This functions is used when the moving bocks are atoms
- * @param place_algorithm
- * @param delay_model
- * @param criticalities
- * @param blocks_affected
- * @param bb_delta_c
- * @param timing_delta_c
- * @return
- */
-int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_atom_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c);
-
 /**
  * @brief Update the wire length and timing cost of the blocks (ts and proposed_* data structures) and set
  * the delta costs in bb_delta_c and timing_delta_c. This functions is used when the moving bocks are clusters
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 1a7d4dace74..a83614c68ae 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -64,13 +64,10 @@
 
 #include "clustered_netlist_utils.h"
 
-#include "re_cluster_util.h"
 #include "cluster_placement.h"
 
 #include "noc_place_utils.h"
 
-#include "place_re_cluster.h"
-
 #include "net_cost_handler.h"
 
 /*  define the RL agent's reward function factor constant. This factor controls the weight of bb cost *
@@ -400,8 +397,6 @@ void try_place(const Netlist<>& net_list,
 
     t_placement_checkpoint placement_checkpoint;
 
-    PlaceReCluster place_re_cluster;
-
     std::shared_ptr<SetupTimingInfo> timing_info;
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc;
     std::unique_ptr<PlaceDelayModel> place_delay_model;
@@ -887,12 +882,6 @@ void try_place(const Netlist<>& net_list,
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
-    if (placer_opts.place_re_cluster) {
-        place_re_cluster.re_cluster(placer_opts.place_algorithm,
-                                    place_delay_model.get(),
-                                    placer_criticalities.get());
-    }
-
     //Final timing analysis
     PlaceCritParams crit_params;
     crit_params.crit_exponent = state.crit_exponent;
diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h
index 7a9fdeafdb1..526e9d23ebb 100644
--- a/vpr/src/place/place_constraints.h
+++ b/vpr/src/place/place_constraints.h
@@ -65,25 +65,6 @@ void propagate_place_constraints();
 
 void print_macro_constraint_error(const t_pl_macro& pl_macro);
 
-inline bool floorplan_legal(const t_pl_atom_blocks_to_be_moved& blocks_affected) {
-    bool floorplan_legal;
-    const int num_moved_blocks = blocks_affected.num_moved_blocks;
-    for (int i = 0; i < num_moved_blocks; i++) {
-        AtomBlockId mv_atom_blk = blocks_affected.moved_blocks[i].block_num;
-        ClusterBlockId cluster_blk = g_vpr_ctx.atom().lookup.atom_clb(mv_atom_blk);
-        const t_pl_atom_loc& to_pl_atom_loc = blocks_affected.moved_blocks[i].new_loc;
-        t_pl_loc to_pl_loc = {to_pl_atom_loc.x, to_pl_atom_loc.y, to_pl_atom_loc.sub_tile, to_pl_atom_loc.layer};
-        floorplan_legal = cluster_floorplanning_legal(cluster_blk, to_pl_loc);
-        if (!floorplan_legal) {
-#    ifdef VERBOSE
-            VTR_LOG("Move aborted for block %zu, location tried was x: %d, y: %d, subtile: %d \n", size_t(blocks_affected.moved_blocks[i].block_num), blocks_affected.moved_blocks[i].new_loc.x, blocks_affected.moved_blocks[i].new_loc.y, blocks_affected.moved_blocks[i].new_loc.sub_tile);
-#    endif
-            return false;
-        }
-    }
-    return true;
-}
-
 inline bool floorplan_legal(const t_pl_blocks_to_be_moved& blocks_affected) {
     bool floorplan_legal;
     const int num_moved_blocks = blocks_affected.num_moved_blocks;
diff --git a/vpr/src/place/place_re_cluster.cpp b/vpr/src/place/place_re_cluster.cpp
deleted file mode 100644
index 96eca2a059f..00000000000
--- a/vpr/src/place/place_re_cluster.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-//
-// Created by amin on 9/15/23.
-//
-
-#include "place_re_cluster.h"
-
-#include "globals.h"
-#include "move_utils.h"
-#include "net_cost_handler.h"
-
-static ClusterBlockId random_cluster();
-
-static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id);
-
-static bool swap_atoms(const t_place_algorithm& place_algorithm,
-                       const PlaceDelayModel* delay_model,
-                       PlacerCriticalities* criticalities,
-                       t_pl_atom_blocks_to_be_moved& blocks_affected,
-                       AtomBlockId from_atom_blk_id,
-                       AtomBlockId to_atom_blk_id);
-
-void PlaceReCluster::re_cluster(const t_place_algorithm& place_algorithm,
-                                const PlaceDelayModel* delay_model,
-                                PlacerCriticalities* criticalities) {
-    const int num_moves = 2 << 20;
-
-    t_pl_atom_blocks_to_be_moved blocks_affected(g_vpr_ctx.atom().nlist.blocks().size());
-
-    for (int move_num = 0; move_num < num_moves; ++move_num) {
-        ClusterBlockId from_cluster_blk_id;
-        AtomBlockId from_atom_blk_id;
-        ClusterBlockId to_cluster_blk_id;
-        AtomBlockId to_atom_blk_id;
-
-        from_cluster_blk_id = random_cluster();
-        from_atom_blk_id = random_atom_in_cluster(from_cluster_blk_id);
-
-        while (true) {
-            to_cluster_blk_id = random_cluster();
-            to_atom_blk_id = random_atom_in_cluster(to_cluster_blk_id);
-
-            if (from_cluster_blk_id != to_cluster_blk_id) {
-                break;
-            }
-        }
-
-        if (!swap_atoms(place_algorithm, delay_model, criticalities, blocks_affected, from_atom_blk_id, to_atom_blk_id)) {
-            revert_move_blocks(blocks_affected);
-        }
-    }
-}
-
-static bool swap_atoms(const t_place_algorithm& place_algorithm,
-                       const PlaceDelayModel* delay_model,
-                       PlacerCriticalities* criticalities,
-                       t_pl_atom_blocks_to_be_moved& blocks_affected,
-                       AtomBlockId /* from_atom_blk_id */,
-                       AtomBlockId /* to_atom_blk_id */) {
-    double delta_c = 0;        //Change in cost due to this swap.
-    double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
-    double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
-
-    //    const auto& to_atom_loc = get_atom_loc(to_atom_blk_id);
-
-    //    e_create_move create_move = ::create_move(blocks_affected, from_atom_blk_id, to_atom_loc);
-
-    //    if (!floorplan_legal(blocks_affected)) {
-    //        return false;
-    //    }
-
-    apply_move_blocks(blocks_affected);
-
-    int num_nets_affected = find_affected_nets_and_update_costs(
-        place_algorithm, delay_model, criticalities, blocks_affected,
-        bb_delta_c, timing_delta_c);
-
-    // TODO:dummy return just to remove warnings
-    return (num_nets_affected + delta_c) == 0;
-}
-
-static ClusterBlockId random_cluster() {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    int rand_id = vtr::irand(cluster_ctx.clb_nlist.blocks().size() - 1);
-
-    return ClusterBlockId(rand_id);
-}
-
-static AtomBlockId random_atom_in_cluster(ClusterBlockId cluster_blk_id) {
-    //    const auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    const auto& cluster_atoms = g_vpr_ctx.cl_helper().atoms_lookup[cluster_blk_id];
-
-    int rand_id = vtr::irand(cluster_atoms.size() - 1);
-
-    auto it = cluster_atoms.begin();
-
-    std::advance(it, rand_id);
-
-    AtomBlockId atom_blk_id = *it;
-
-    return atom_blk_id;
-}
diff --git a/vpr/src/place/place_re_cluster.h b/vpr/src/place/place_re_cluster.h
deleted file mode 100644
index 63cd227775c..00000000000
--- a/vpr/src/place/place_re_cluster.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//
-// Created by amin on 9/15/23.
-//
-
-#ifndef VTR_PLACE_RE_CLUSTER_H
-#define VTR_PLACE_RE_CLUSTER_H
-
-#include "timing_place.h"
-
-class PlaceReCluster {
-  public:
-    PlaceReCluster() = default;
-
-    void re_cluster(const t_place_algorithm& place_algorithm,
-                    const PlaceDelayModel* delay_model,
-                    PlacerCriticalities* criticalities);
-};
-
-#endif //VTR_PLACE_RE_CLUSTER_H
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 1fe4e8bca7b..52b9fdeb3d1 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -473,15 +473,6 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
     return (mac_can_be_placed);
 }
 
-t_pl_atom_loc get_atom_loc(AtomBlockId atom) {
-    const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    ClusterBlockId cluster_blk = atom_lookup.atom_clb(atom);
-    t_pl_loc cluster_loc = g_vpr_ctx.placement().block_locs[cluster_blk].loc;
-    int primitive_id = atom_lookup.atom_pb_graph_node(atom)->primitive_num;
-
-    return {primitive_id, cluster_loc.x, cluster_loc.y, cluster_loc.sub_tile, cluster_loc.layer};
-}
-
 NocCostTerms::NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest)
     : aggregate_bandwidth(agg_bw)
     , latency(lat)
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index e54203dec26..36c544ef344 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -383,9 +383,4 @@ inline bool is_loc_on_chip(t_physical_tile_loc loc) {
  *        require to check for all legality constraints.
  */
 bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_legality);
-
-//TODO: The atom loc should be stored in place_ctx -- I am creating this function because I didn't want to create another
-// Array in place_ctx.
-t_pl_atom_loc get_atom_loc(AtomBlockId atom);
-
 #endif
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index c4249a0cb09..ca95ff85eb0 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -497,45 +497,6 @@ std::tuple<ClusterNetId, int, int> find_pb_route_clb_input_net_pin(ClusterBlockI
     return std::tuple<ClusterNetId, int, int>(clb_net_idx, curr_pb_pin_id, clb_net_pin_idx);
 }
 
-std::vector<ClusterPinId> cluster_pins_connected_to_atom_pin(AtomPinId atom_pin) {
-    std::vector<ClusterPinId> cluster_pins;
-    const auto& atom_net_list = g_vpr_ctx.atom().nlist;
-    const auto& atom_look_up = g_vpr_ctx.atom().lookup;
-    const auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist;
-    AtomBlockId atom_block_id = atom_net_list.pin_block(atom_pin);
-    AtomNetId atom_net_id = atom_net_list.pin_net(atom_pin);
-    ClusterNetId cluster_net_id = atom_look_up.clb_net(atom_net_id);
-    ClusterBlockId cluster_block_id = atom_look_up.atom_clb(atom_block_id);
-    if (cluster_net_id == ClusterNetId::INVALID()) {
-        return cluster_pins;
-    }
-
-    const auto& atom_pb_graph_pin = g_vpr_ctx.atom().lookup.atom_pin_pb_graph_pin(atom_pin);
-    int atom_pb_pin_id = atom_pb_graph_pin->pin_count_in_cluster;
-    std::vector<int> cluster_pb_pin_id;
-    if (atom_pb_graph_pin->port->type == PORTS::IN_PORT) {
-        int cluster_pin_id;
-        int cluster_net_pin_id;
-        std::tie(cluster_net_id, cluster_pin_id, cluster_net_pin_id) = find_pb_route_clb_input_net_pin(cluster_block_id, atom_pb_pin_id);
-        if (cluster_net_id != ClusterNetId::INVALID()) {
-            VTR_ASSERT(cluster_pin_id != -1 && cluster_net_pin_id != -1);
-            cluster_pins.push_back(cluster_net_list.net_pin(cluster_net_id, cluster_net_pin_id));
-        }
-    } else {
-        VTR_ASSERT(atom_pb_graph_pin->port->type == PORTS::OUT_PORT);
-        std::vector<int> connected_sink_pb_pins;
-        connected_sink_pb_pins = find_connected_internal_clb_sink_pins(cluster_block_id, atom_pb_pin_id);
-        for (int sink_pb_pin : connected_sink_pb_pins) {
-            int net_pin_idx = cluster_net_list.block_pin_net_index(cluster_block_id, sink_pb_pin);
-            if (net_pin_idx != OPEN) {
-                cluster_pins.push_back(cluster_net_list.net_pin(cluster_net_id, net_pin_idx));
-            }
-        }
-    }
-
-    return cluster_pins;
-}
-
 bool is_clb_external_pin(ClusterBlockId blk_id, int pb_pin_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 38482e27b21..9382660142c 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -153,9 +153,6 @@ std::vector<AtomPinId> find_clb_pin_sink_atom_pins(ClusterBlockId clb, int logic
 
 std::tuple<ClusterNetId, int, int> find_pb_route_clb_input_net_pin(ClusterBlockId clb, int sink_pb_route_id);
 
-// Return the cluster pins connected to the atom pin
-std::vector<ClusterPinId> cluster_pins_connected_to_atom_pin(AtomPinId atom_pin);
-
 //Returns the port matching name within pb_gnode
 const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name);
 

From 5adfa2bc782613b0af0ff2e373e3c83517b5151c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 20 Jun 2024 11:57:05 -0400
Subject: [PATCH 180/188] [vpr][place] add a library

---
 vpr/src/place/place.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a83614c68ae..6cb794a95e3 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -55,6 +55,7 @@
 #include "VprTimingGraphResolver.h"
 #include "timing_util.h"
 #include "timing_info.h"
+#include "concrete_timing_info.h"
 #include "tatum/echo_writer.hpp"
 #include "tatum/TimingReporter.hpp"
 

From 94b51b93cd8624eca91277c73f2d53467d0d2241 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 26 Jun 2024 09:46:50 -0400
Subject: [PATCH 181/188] [vpr][place] add more comments to net_cost_handler.h

---
 vpr/src/place/move_utils.h         |  8 +--
 vpr/src/place/net_cost_handler.cpp | 28 +---------
 vpr/src/place/net_cost_handler.h   | 85 +++++++++++++++++++-----------
 3 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 24cce99769f..a16bcb240d0 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -97,9 +97,9 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock
 
 /**
  * @brief Find the blocks that will be affected by a move of b_from to to_loc
- * @param blocks_affected
- * @param b_from
- * @param to
+ * @param blocks_affected Loaded by this routine and returned via reference; it lists the blocks etc. moved
+ * @param b_from Id of the cluster-level block to be moved
+ * @param to Where b_from will be moved to
  * @return e_block_move_result ABORT if either of the the moving blocks are already stored, or either of the blocks are fixed, to location is not
  * compatible, etc. INVERT if the "from" block is a single block and the "to" block is a macro. VALID otherwise.
  */
@@ -121,7 +121,7 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe
  * @brief Check whether the "to" location is legal for the given "blk"
  * @param blk
  * @param to
- * @return
+ * @return True if this would be a legal move, false otherwise
  */
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to);
 
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 6bb24208503..196b1d87df4 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1854,28 +1854,6 @@ static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c) {
     }
 }
 
-/**
- * @brief Find all the nets and pins affected by this swap and update costs.
- *
- * Find all the nets affected by this swap and update the bounding box (wiring)
- * costs. This cost function doesn't depend on the timing info.
- *
- * Find all the connections affected by this swap and update the timing cost.
- * For a connection to be affected, it not only needs to be on or driven by
- * a block, but it also needs to have its delay changed. Otherwise, it will
- * not be added to the affected_pins structure.
- *
- * For more, see update_td_delta_costs().
- *
- * The timing costs are calculated by getting the new connection delays,
- * multiplied by the connection criticalities returned by the timing
- * analyzer. These timing costs are stored in the proposed_* data structures.
- *
- * The change in the bounding box cost is stored in `bb_delta_c`.
- * The change in the timing cost is stored in `timing_delta_c`.
- *
- * @return The number of affected nets.
- */
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
@@ -2258,11 +2236,7 @@ void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb) {
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
 
-    ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)});
-    for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) {
-        auto& elem = ts_layer_sink_pin_count.get(flat_idx);
-        elem = OPEN;
-    }
+    ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)}, OPEN);
 
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 }
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 4019c19c7eb..609cc80ea9d 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -4,21 +4,43 @@
 #include "move_transactions.h"
 #include "place_util.h"
 
+/**
+ * @brief The method used to calculate palcement cost
+ * NORMAL: Compute cost efficiently using incremental techniques.
+ * CHECK: Brute-force cost computation; useful to validate the more complex incremental cost update code.
+ */
 enum e_cost_methods {
     NORMAL,
     CHECK
 };
 
 /**
- * @brief Update the wire length and timing cost of the blocks (ts and proposed_* data structures) and set
- * the delta costs in bb_delta_c and timing_delta_c. This functions is used when the moving bocks are clusters
+ * @brief Find all the nets and pins affected by this swap and update costs.
+ *
+ * Find all the nets affected by this swap and update the bounding box (wiring)
+ * costs. This cost function doesn't depend on the timing info.
+ *
+ * Find all the connections affected by this swap and update the timing cost.
+ * For a connection to be affected, it not only needs to be on or driven by
+ * a block, but it also needs to have its delay changed. Otherwise, it will
+ * not be added to the affected_pins structure.
+ *
+ * For more, see update_td_delta_costs().
+ *
+ * The timing costs are calculated by getting the new connection delays,
+ * multiplied by the connection criticalities returned by the timing
+ * analyzer. These timing costs are stored in the proposed_* data structures.
+ *
+ * The change in the bounding box cost is stored in `bb_delta_c`.
+ * The change in the timing cost is stored in `timing_delta_c`.
+ * 
  * @param place_algorithm
  * @param delay_model
  * @param criticalities
  * @param blocks_affected
  * @param bb_delta_c
  * @param timing_delta_c
- * @return
+ * @return The number of affected nets.
  */
 int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
@@ -29,37 +51,36 @@ int find_affected_nets_and_update_costs(
     double& timing_delta_c);
 
 /**
- * @brief Finds the bb cost from scratch (based on 3D BB).  Done only when the placement   *
-* has been radically changed (i.e. after initial placement).   *
-* Otherwise find the cost change incrementally.  If method     *
-* check is NORMAL, we find bounding boxes that are updatable  *
-* for the larger nets.  If method is CHECK, all bounding boxes *
-* are found via the non_updateable_bb routine, to provide a    *
-* cost which can be used to check the correctness of the       *
-* other routine.                                               *
+ * @brief Finds the bb cost from scratch (based on 3D BB).  
+ * Done only when the placement has been radically changed 
+ * (i.e. after initial placement). Otherwise find the cost 
+ * change incrementally. If method check is NORMAL, we find 
+ * bounding boxes that are updatable for the larger nets.  
+ * If method is CHECK, all bounding boxes are found via the 
+ * non_updateable_bb routine, to provide a cost which can be 
+ * used to check the correctness of the other routine.                                               
  * @param method
- * @return
+ * @return The bounding box cost of the placement, computed by the 3D method.
  */
 double comp_bb_cost(e_cost_methods method);
 
 /**
- * @brief Finds the bb cost from scratch (based on per-layer BB).  Done only when the placement   *
-* has been radically changed (i.e. after initial placement).   *
-* Otherwise find the cost change incrementally.  If method     *
-* check is NORMAL, we find bounding boxes that are updateable  *
-* for the larger nets.  If method is CHECK, all bounding boxes *
-* are found via the non_updateable_bb routine, to provide a    *
-* cost which can be used to check the correctness of the       *
-* other routine.                                               *
+ * @brief Finds the bb cost from scratch (based on per-layer BB).  
+ * Done only when the placement has been radically changed 
+ * (i.e. after initial placement). Otherwise find the cost change 
+ * incrementally.  If method check is NORMAL, we find bounding boxes 
+ * that are updateable for the larger nets.  If method is CHECK, all 
+ * bounding boxes are found via the non_updateable_bb routine, to provide 
+ * a cost which can be used to check the correctness of the other routine.                                              
  * @param method
- * @return
+ * @return The placement bounding box cost, computed by the per layer method.
  */
 double comp_layer_bb_cost(e_cost_methods method);
 
 /**
  * @brief update net cost data structures (in placer context and net_cost in .cpp file) and reset flags (proposed_net_cost and bb_updated_before).
- * @param num_nets_affected
- * @param cube_bb
+ * @param num_nets_affected The number of nets affected by the move. It is used to determine the index up to which elements in ts_nets_to_update are valid.
+ * @param cube_bb True if we should use the 3D bounding box (cube_bb), false otherwise.
  */
 void update_move_nets(int num_nets_affected,
                       const bool cube_bb);
@@ -72,12 +93,13 @@ void reset_move_nets(int num_nets_affected);
 
 /**
  * @brief re-calculates different terms of the cost function (wire-length, timing, NoC) and update "costs" accordingly. It is important to note that
- * in this function bounding box and connection delays are not calculated from scratch. However, it iterated over nets and add their costs from beginning.
+ * in this function bounding box and connection delays are not calculated from scratch. However, it iterates over all nets and connections and updates 
+ * their costs by a complete summation, rather than incrementally.
  * @param placer_opts
  * @param noc_opts
  * @param delay_model
  * @param criticalities
- * @param costs
+ * @param costs passed by reference and computed by this routine (i.e. returned by reference)
  */
 void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                 const t_noc_opts& noc_opts,
@@ -89,7 +111,8 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
  * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac
  * arrays with the inverse of the average number of tracks per channel
  * between [subhigh] and [sublow].
- * @param place_cost_exp
+ * @param place_cost_exp It is an exponent to which you take the average inverse channel 
+ * capacity; a higher value would favour wider channels more over narrower channels during placement (usually we use 1).
  */
 void alloc_and_load_for_fast_cost_update(float place_cost_exp);
 
@@ -100,7 +123,7 @@ void free_fast_cost_update();
 
 /**
  * @brief Resize net_cost, proposed_net_cost, and  bb_updated_before data structures to accommodate all nets.
- * @param num_nets
+ * @param num_nets Number of nets in the netlist (clustered currently) that the placement engine uses.
  */
 void init_net_cost_structs(size_t num_nets);
 
@@ -110,9 +133,11 @@ void init_net_cost_structs(size_t num_nets);
 void free_net_cost_structs();
 
 /**
- * @brief Resize (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update to accommodate all nets.
- * @param num_nets
- * @param cube_bb
+ * @brief Resize temporary storage data structures needed to determine which nets are affected by a move and data needed per net 
+ * about where their terminals are in order to quickly (incrementally) update their wirelength costs. These data structures are  
+ * (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update.
+ * @param num_nets Number of nets in the netlist used by the placement engine (currently clustered netlist)
+ * @param cube_bb True if the 3D bounding box should be used, false otherwise.
  */
 void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb);
 

From 7ae2112ae957ea96d4c3bf43a341b4a3bfa23f1c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 4 Jul 2024 17:11:50 -0400
Subject: [PATCH 182/188] add more comments on net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 143 +++++++++++++++--------------
 vpr/src/place/net_cost_handler.h   |   4 +-
 vpr/src/place/place.cpp            |   4 +-
 3 files changed, 76 insertions(+), 75 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 196b1d87df4..e13a1707e25 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -126,8 +126,8 @@ static void update_net_layer_bb(const ClusterNetId& net,
  * @param criticalities
  * @param net
  * @param pin
- * @param affected_pins Store the sink pins which delays are changed due to moving the block
- * @param delta_timing_cost
+ * @param affected_pins Updated by this routine to store the sink pins whose delays are changed due to moving the block
+ * @param delta_timing_cost Computed by this routine and returned by reference.
  * @param is_src_moving True if "pin" is a sink pin and its driver is among the moving blocks
  */
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
@@ -140,24 +140,24 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
 
 /**
  * @brief if "net" is not already stored as an affected net, mark it in ts_nets_to_update and increment num_affected_nets
- * @param net
- * @param num_affected_nets
+ * @param net ID of a net affected by a move
+ * @param num_affected_nets Incremented if this is a new net affected, and returned via reference.
  */
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
 
 /**
  * @brief Call suitable function based on the bounding box type to update the bounding box of the net connected to pin_id. Also,
  * call the function to update timing information if the placement algorithm is timing-driven.
- * @param place_algorithm
- * @param delay_model
- * @param criticalities
- * @param blk_id
- * @param pin_id
- * @param moving_blk_inf
- * @param affected_pins
- * @param timing_delta_c
- * @param num_affected_nets
- * @param is_src_moving
+ * @param place_algorithm Placement algorithm
+ * @param delay_model Timing delay model used by placer
+ * @param criticalities Connections timing criticalities
+ * @param blk_id Block ID of that the moving pin blongs to.
+ * @param pin_id Pin ID of the moving pin
+ * @param moving_blk_inf Data structure that holds information, e.g., old location and new locatoin, about all moving blocks
+ * @param affected_pins Netlist pins which are affected, in terms placement cost, by the proposed move.
+ * @param timing_delta_c Timing cost change based on the proposed move
+ * @param num_affected_nets A pointer to the first free element of ts_nets_to_update. If a new net is added, the pointer should be increamented.
+ * @param is_src_moving Is the moving pin the source of a net.
  */
 static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
                                                const PlaceDelayModel* delay_model,
@@ -173,8 +173,8 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
 /**
  * @brief Calculate the 3D bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
  * store them in bb_coord_new
- * @param net_id
- * @param bb_coord_new
+ * @param net_id ID of the net for which the bounding box is requested
+ * @param bb_coord_new Computed by this function and returned by reference.
  * @param num_sink_pin_layer Store the number of sink pins of "net_id" on each layer
  */
 static void get_non_updatable_bb(ClusterNetId net_id,
@@ -184,22 +184,22 @@ static void get_non_updatable_bb(ClusterNetId net_id,
 /**
  * @brief Calculate the per-layer bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
  * store them in bb_coord_new
- * @param net_id
- * @param bb_coord_new
- * @param num_sink_layer
+ * @param net_id ID of the net for which the bounding box is requested
+ * @param bb_coord_new Computed by this function and returned by reference.
+ * @param num_sink_layer Store the number of sink pins of "net_id" on each layer
  */
 static void get_non_updatable_layer_bb(ClusterNetId net_id,
                                        std::vector<t_2D_bb>& bb_coord_new,
                                        vtr::NdMatrixProxy<int, 1> num_sink_layer);
 
 /**
- * @brief Update the 3D bounding box of "net_id" incrementally based on the old and new locations of the pin
- * @param bb_edge_new
- * @param bb_coord_new
- * @param num_sink_pin_layer_new
- * @param pin_old_loc
- * @param pin_new_loc
- * @param src_pin
+ * @brief Update the 3D bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
+ * @param bb_edge_new Number of blocks on the edges of the bounding box
+ * @param bb_coord_new Coordinates of the bounding box
+ * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
+ * @param pin_old_loc The old location of the moving pin
+ * @param pin_new_loc The new location of the moving pin
+ * @param src_pin Is the moving pin driving the net
  */
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
@@ -210,13 +210,13 @@ static void update_bb(ClusterNetId net_id,
                       bool src_pin);
 
 /**
- * @brief Update the per-layer bounding box of "net_id" incrementally based on the old and new locations of the pin
- * @param bb_edge_new
- * @param bb_coord_new
- * @param num_sink_pin_layer_new
- * @param pin_old_loc
- * @param pin_new_loc
- * @param src_pin
+ * @brief Update the per-layer bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
+ * @param bb_edge_new Number of blocks on the edges of the bounding box
+ * @param bb_coord_new Coordinates of the bounding box
+ * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
+ * @param pin_old_loc The old location of the moving pin
+ * @param pin_new_loc The new location of the moving pin
+ * @param is_output_pin Is the moving pin of the type output
  */
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
@@ -229,14 +229,14 @@ static void update_layer_bb(ClusterNetId net_id,
 /**
  * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
  * the pin under consideration is not changing layer.
- * @param net_id
- * @param pin_old_loc
- * @param pin_new_loc
- * @param curr_bb_edge
- * @param curr_bb_coord
- * @param bb_pin_sink_count_new
- * @param bb_edge_new
- * @param bb_coord_new
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param pin_old_loc Old location of the moving pin
+ * @param pin_new_loc New location of the moving pin
+ * @param curr_bb_edge The current known number of blocks of the net on bounding box edges
+ * @param curr_bb_coord The current known boudning box of the net
+ * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
+ * @param bb_edge_new The new bb edge calculated by this function
+ * @param bb_coord_new The new bb calculated by this function
  */
 static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_old_loc,
@@ -250,14 +250,14 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
 /**
 * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
 * the pin under consideration change layer.
- * @param net_id
- * @param pin_old_loc
- * @param pin_new_loc
- * @param curr_bb_edge
- * @param curr_bb_coord
- * @param bb_pin_sink_count_new
- * @param bb_edge_new
- * @param bb_coord_new
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param pin_old_loc Old location of the moving pin
+ * @param pin_new_loc New location of the moving pin
+ * @param curr_bb_edge The current known number of blocks of the net on bounding box edges
+ * @param curr_bb_coord The current known boudning box of the net
+ * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
+ * @param bb_edge_new The new bb edge calculated by this function
+ * @param bb_coord_new The new bb calculated by this function
  */
 static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_old_loc,
@@ -269,12 +269,12 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            std::vector<t_2D_bb>& bb_coord_new);
 /**
  * @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
- * @param net_id
- * @param pin_old_loc
- * @param pin_new_loc
- * @param curr_layer_pin_sink_count
- * @param bb_pin_sink_count_new
- * @param is_output_pin
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param pin_old_loc Old location of the moving pin
+ * @param pin_new_loc New location of the moving pin
+ * @param curr_layer_pin_sink_count Updated number of sinks of the net on each layer
+ * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
+ * @param is_output_pin Is the moving pin of the type output
  */
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
@@ -286,14 +286,15 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
 /**
  * @brief Update the data structure for large nets that keep track of
  * the number of blocks on each edge of the bounding box.
- * @param net_id
- * @param bb_edge_new
- * @param bb_coord_new
- * @param bb_layer_pin_sink_count
- * @param old_num_block_on_edge
- * @param old_edge_coord
- * @param new_num_block_on_edge
- * @param new_edge_coord
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param bb_edge_new The new bb edge calculated by this function
+ * @param bb_coord_new The new bb calculated by this function
+ * @param bb_layer_pin_sink_count The updated number of net's sinks on each layer
+ * @param old_num_block_on_edge The current known number of blocks of the net on bounding box edges
+ * @param old_edge_coord The current known boudning box of the net
+ * @param new_num_block_on_edge The new bb calculated by this function
+ * @param new_edge_coord The new bb edge calculated by this function
+ *
  */
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
@@ -307,11 +308,11 @@ static inline void update_bb_edge(ClusterNetId net_id,
 /**
  * @brief When BB is being updated incrementally, the pin is moving to a new layer, and the BB is of the type "per-layer,
  * use this function to update the BB on the new layer.
- * @param new_pin_loc
- * @param bb_edge_old
- * @param bb_coord_old
- * @param bb_edge_new
- * @param bb_coord_new
+ * @param new_pin_loc New location of the pin
+ * @param bb_edge_old bb_edge prior to moving the pin
+ * @param bb_coord_old bb_coord prior to moving the pin
+ * @param bb_edge_new New bb edge calculated by this function
+ * @param bb_coord_new new bb coord calculated by this function
  */
 static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
                             const t_2D_bb& bb_edge_old,
@@ -395,7 +396,7 @@ static double recompute_bb_cost();
 static double wirelength_crossing_count(size_t fanout);
 
 /**
- * @breif Calculate the wire-length cost of nets affected by moving the blocks and set bb_delta_c to the total cost change.
+ * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks indicated in the blocks_affected data structure.
  */
 static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c);
 
@@ -2098,7 +2099,7 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
     }
 }
 
-void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
+void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp) {
     /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
      * arrays with the inverse of the average number of tracks per channel   *
      * between [subhigh] and [sublow].  This is only useful for the cost     *
@@ -2204,7 +2205,7 @@ void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
         }
 }
 
-void free_fast_cost_update() {
+void free_chan_w_factors_for_place_cost () {
     chanx_place_cost_fac.clear();
     chany_place_cost_fac.clear();
 }
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 609cc80ea9d..7c84a881566 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -114,12 +114,12 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
  * @param place_cost_exp It is an exponent to which you take the average inverse channel 
  * capacity; a higher value would favour wider channels more over narrower channels during placement (usually we use 1).
  */
-void alloc_and_load_for_fast_cost_update(float place_cost_exp);
+void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp);
 
 /**
  * @brief Frees the chanx_place_cost_fac and chany_place_cost_fac arrays.
  */
-void free_fast_cost_update();
+void free_chan_w_factors_for_place_cost ();
 
 /**
  * @brief Resize net_cost, proposed_net_cost, and  bb_updated_before data structures to accommodate all nets.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d7df6635923..536a6a6c477 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1922,7 +1922,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         elem = OPEN;
     }
 
-    alloc_and_load_for_fast_cost_update(place_cost_exp);
+    alloc_and_load_chan_w_factors_for_place_cost (place_cost_exp);
 
     alloc_and_load_try_swap_structs(cube_bb);
 
@@ -1962,7 +1962,7 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
 
     place_move_ctx.num_sink_pin_layer.clear();
 
-    free_fast_cost_update();
+    free_chan_w_factors_for_place_cost ();
 
     free_try_swap_structs();
 

From 7537c277f88c54e41fbdf27ed8da9f64eefc0478 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 4 Jul 2024 17:13:02 -0400
Subject: [PATCH 183/188] [vpr][place] remove block comment sysntax

---
 vpr/src/place/net_cost_handler.cpp | 75 ++++++++++++++++--------------
 vpr/src/util/vpr_utils.cpp         |  4 +-
 2 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index e13a1707e25..2c64fd5a227 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -9,24 +9,29 @@
 using std::max;
 using std::min;
 
-/* Flags for the states of the bounding box.                              *
- * Stored as char for memory efficiency.                                  */
-
+/**
+ * @brief for the states of the bounding box. 
+ * Stored as char for memory efficiency.                              
+ */
 enum class NetUpdateState {
     NOT_UPDATED_YET,
     UPDATED_ONCE,
     GOT_FROM_SCRATCH
 };
 
-/* This defines the error tolerance for floating points variables used in *
- * cost computation. 0.01 means that there is a 1% error tolerance.       */
+/** 
+ * @brief The error tolerance due to round off for the total cost computation. 
+ * When we check it from scratch vs. incrementally. 0.01 means that there is a 1% error tolerance.      
+ */
 #define ERROR_TOL .01
 
-/* Expected crossing counts for nets with different #'s of pins.  From *
- * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
- * Multiplied to bounding box of a net to better estimate wire length  *
- * for higher fanout nets. Each entry is the correction factor for the *
- * fanout index-1                                                      */
+/** 
+ * @brief Crossing counts for nets with different #'s of pins.  From 
+ * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   
+ * Multiplied to bounding box of a net to better estimate wire length  
+ * for higher fanout nets. Each entry is the correction factor for the 
+ * fanout index-1
+ */
 static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
                                       1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
                                       1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
@@ -35,33 +40,35 @@ static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
                                       2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
                                       2.7933};
 
-/* The arrays below are used to precompute the inverse of the average   *
- * number of tracks per channel between [subhigh] and [sublow].  Access *
- * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
- * speed up the computation of the cost function that takes the length  *
- * of the net bounding box in each dimension, divided by the average    *
- * number of tracks in that direction; for other cost functions they    *
- * will never be used.                                                  *
+/** 
+ * @brief Matrices below are used to precompute the inverse of the average   
+ * number of tracks per channel between [subhigh] and [sublow].  Access 
+ * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     
+ * speed up the computation of the cost function that takes the length  
+ * of the net bounding box in each dimension, divided by the average    
+ * number of tracks in that direction; for other cost functions they    
+ * will never be used.                                                  
  */
-static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); //[0...device_ctx.grid.width()-2]
-static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.grid.height()-2]
+static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); // [0...device_ctx.grid.width()-2]
+static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); // [0...device_ctx.grid.height()-2]
 
 /* Cost of a net, and a temporary cost of a net used during move assessment. */
 static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
 
-/* [0...cluster_ctx.clb_nlist.nets().size()-1]                                               *
- * A flag array to indicate whether the specific bounding box has been updated   *
- * in this particular swap or not. If it has been updated before, the code       *
- * must use the updated data, instead of the out-of-date data passed into the    *
- * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET        *
- * indicates that the net has not been updated before, UPDATED_ONCE indicated    *
- * that the net has been updated once, if it is going to be updated again, the   *
- * values from the previous update must be used. GOT_FROM_SCRATCH is only        *
- * applicable for nets larger than SMALL_NETS and it indicates that the          *
- * particular bounding box cannot be updated incrementally before, hence the     *
- * bounding box is got from scratch, so the bounding box would definitely be     *
- * right, DO NOT update again.                                                   */
-static vtr::vector<ClusterNetId, NetUpdateState> bb_updated_before;
+/**                                              *
+ * @brief Flag array to indicate whether the specific bounding box has been updated
+ * in this particular swap or not. If it has been updated before, the code    
+ * must use the updated data, instead of the out-of-date data passed into the 
+ * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET     
+ * indicates that the net has not been updated before, UPDATED_ONCE indicated 
+ * that the net has been updated once, if it is going to be updated again, the
+ * values from the previous update must be used. GOT_FROM_SCRATCH is only     
+ * applicable for nets larger than SMALL_NETS and it indicates that the       
+ * particular bounding box is not incrementally updated, and hence the
+ * bounding box is got from scratch, so the bounding box would definitely be
+ * right, DO NOT update again.                                                   
+ */
+static vtr::vector<ClusterNetId, NetUpdateState> bb_updated_before; // [0...cluster_ctx.clb_nlist.nets().size()-1]
 
 /* The following arrays are used by the try_swap function for speed.   */
 
@@ -77,9 +84,9 @@ static vtr::vector<ClusterNetId, NetUpdateState> bb_updated_before;
 
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] -> 3D bounding box*/
 static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
-/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers] -> 2D bonding box on a layer*/
+/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers-1] -> 2D bonding box on a layer*/
 static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
-/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers] -> number of sink pins on a layer*/
+/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers-1] -> number of sink pins on a layer*/
 static vtr::Matrix<int> ts_layer_sink_pin_count;
 /* [0...num_afftected_nets] -> net_id of the affected nets */
 static std::vector<ClusterNetId> ts_nets_to_update;
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 48704d87971..6f47cf100cb 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -434,8 +434,8 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_
     return AtomPinId::INVALID();
 }
 
-/* Return the net pin which drive the CLB input connected to sink_pb_pin_id, or nullptr if none (i.e. driven internally)
- *   clb: Block in which the sink pin is located on
+/* Return the net pin which drives the CLB input connected to sink_pb_pin_id, or nullptr if none (i.e. driven internally)
+ *   clb: Block on which the sink pin is located
  *   sink_pb_pin_id: The physical pin index of the sink pin on the block
  *
  *  Returns a tuple containing

From 165931cd5762b045f4f44994acd20387e017f862 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 5 Jul 2024 09:32:53 -0400
Subject: [PATCH 184/188] [vpr][place] apply Vaughn's comments on
 net_cost_handler

---
 vpr/src/place/net_cost_handler.cpp | 450 ++++++++++++++---------------
 1 file changed, 225 insertions(+), 225 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 2c64fd5a227..6029e39e93e 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -25,6 +25,8 @@ enum class NetUpdateState {
  */
 #define ERROR_TOL .01
 
+const int MAX_FANOUT_CROSSING_COUNT = 50;
+
 /** 
  * @brief Crossing counts for nets with different #'s of pins.  From 
  * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   
@@ -32,7 +34,7 @@ enum class NetUpdateState {
  * for higher fanout nets. Each entry is the correction factor for the 
  * fanout index-1
  */
-static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
+static const float cross_count[MAX_FANOUT_CROSSING_COUNT] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
                                       1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
                                       1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
                                       2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
@@ -52,7 +54,11 @@ static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
 static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); // [0...device_ctx.grid.width()-2]
 static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); // [0...device_ctx.grid.height()-2]
 
-/* Cost of a net, and a temporary cost of a net used during move assessment. */
+/**
+ * @brief Cost of a net, and a temporary cost of a net used during move assessment. 
+ * We also use negative cost values in proposed_net_cost as a flag to indicate that 
+ * the cost of a net has not yet been updated.
+ */
 static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
 
 /**                                              *
@@ -101,7 +107,8 @@ static bool driven_by_moved_block(const ClusterNetId net,
                                   const std::vector<t_pl_moved_block>& moved_blocks);
 /**
  * @brief Update the bounding box (3D) of the net connected to blk_pin. The old and new locations of the pin are
- * stored in pl_moved_block. The updated bounding box will be stored in ts data structures.
+ * stored in pl_moved_block. The updated bounding box will be stored in ts data structures. Do not update the net 
+ * cost here since it should only be updated once per net, not once per pin.
  * @param net
  * @param blk
  * @param blk_pin
@@ -112,23 +119,10 @@ static void update_net_bb(const ClusterNetId& net,
                           const ClusterPinId& blk_pin,
                           const t_pl_moved_block& pl_moved_block);
 
-/**
- * @brief Update the bounding box (per-layer) of the net connected to blk_pin. The old and new locations of the pin are
- * stored in pl_moved_block. The updated bounding box will be stored in ts data structures.
- * @param net
- * @param blk
- * @param blk_pin
- * @param pl_moved_block
- */
-static void update_net_layer_bb(const ClusterNetId& net,
-                                const ClusterBlockId& blk,
-                                const ClusterPinId& blk_pin,
-                                const t_pl_moved_block& pl_moved_block);
-
 /**
  * @brief Calculate the new connection delay and timing cost of all the
- *        sink pins affected by moving a specific pin to a new location.
- *        Also calculates the total change in the timing cost.
+ * sink pins affected by moving a specific pin to a new location. Also 
+ * calculates the total change in the timing cost.
  * @param delay_model
  * @param criticalities
  * @param net
@@ -176,6 +170,30 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
                                                double& timing_delta_c,
                                                int& num_affected_nets,
                                                bool is_src_moving);
+                                            
+/**
+ * @brief Update the 3D bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
+ * @details Updates the bounding box of a net by storing its coordinates in the bb_coord_new data structure and the 
+ * number of blocks on each edge in the bb_edge_new data structure. This routine should only be called for large nets, 
+ * since it has some overhead relative to just doing a brute force bounding box calculation. The bounding box coordinate 
+ * and edge information for inet must be valid before this routine is called. Currently assumes channels on both sides of 
+ * the CLBs forming the edges of the bounding box can be used.  Essentially, I am assuming the pins always lie on the 
+ * outside of the bounding box. The x and y coordinates are the pin's x and y coordinates. IO blocks are considered to be one 
+ * cell in for simplicity.     
+ * @param bb_edge_new Number of blocks on the edges of the bounding box
+ * @param bb_coord_new Coordinates of the bounding box
+ * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
+ * @param pin_old_loc The old location of the moving pin
+ * @param pin_new_loc The new location of the moving pin
+ * @param src_pin Is the moving pin driving the net
+ */
+static void update_bb(ClusterNetId net_id,
+                      t_bb& bb_edge_new,
+                      t_bb& bb_coord_new,
+                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
+                      t_physical_tile_loc pin_old_loc,
+                      t_physical_tile_loc pin_new_loc,
+                      bool src_pin);
 
 /**
  * @brief Calculate the 3D bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
@@ -188,6 +206,26 @@ static void get_non_updatable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
                                   vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
+
+/**
+ * @brief Update the bounding box (per-layer) of the net connected to blk_pin. The old and new locations of the pin are
+ * stored in pl_moved_block. The updated bounding box will be stored in ts data structures.
+ * @details Finds the bounding box of a net and stores its coordinates in the bb_coord_new 
+ * data structure.  This routine should only be called for small nets, since it does not 
+ * determine enough information for the bounding box to be updated incrementally later.                
+ * Currently assumes channels on both sides of the CLBs forming the edges of the bounding box 
+ * can be used.  Essentially, I am assuming the pins always lie on the outside of the 
+ * bounding box.            
+ * @param net ID of the net for which the bounding box is requested
+ * @param blk ID of the moving block
+ * @param blk_pin ID of the pin connected to the net
+ * @param pl_moved_block Placement info about
+ */
+static void update_net_layer_bb(const ClusterNetId& net,
+                                const ClusterBlockId& blk,
+                                const ClusterPinId& blk_pin,
+                                const t_pl_moved_block& pl_moved_block);
+
 /**
  * @brief Calculate the per-layer bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
  * store them in bb_coord_new
@@ -199,25 +237,13 @@ static void get_non_updatable_layer_bb(ClusterNetId net_id,
                                        std::vector<t_2D_bb>& bb_coord_new,
                                        vtr::NdMatrixProxy<int, 1> num_sink_layer);
 
-/**
- * @brief Update the 3D bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
- * @param bb_edge_new Number of blocks on the edges of the bounding box
- * @param bb_coord_new Coordinates of the bounding box
- * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
- * @param pin_old_loc The old location of the moving pin
- * @param pin_new_loc The new location of the moving pin
- * @param src_pin Is the moving pin driving the net
- */
-static void update_bb(ClusterNetId net_id,
-                      t_bb& bb_edge_new,
-                      t_bb& bb_coord_new,
-                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
-                      t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc,
-                      bool src_pin);
 
 /**
  * @brief Update the per-layer bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
+ * @details /* Updates the bounding box of a net by storing its coordinates in the bb_coord_new data structure and 
+ * the number of blocks on each edge in the bb_edge_new data structure. This routine should only  be called for 
+ * large nets, since it has some overhead relative to just doing a brute force bounding box calculation. 
+ * The bounding box coordinate and edge information for inet must be valid before  this routine is called.                                            Currently assumes channels on both sides of the CLBs forming the   edges of the bounding box can be used.  Essentially, I am assuming the pins always lie on the outside of the bounding box.            The x and y coordinates are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity.
  * @param bb_edge_new Number of blocks on the edges of the bounding box
  * @param bb_coord_new Coordinates of the bounding box
  * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
@@ -234,8 +260,8 @@ static void update_layer_bb(ClusterNetId net_id,
                             bool is_output_pin);
 
 /**
- * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
- * the pin under consideration is not changing layer.
+* @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+* the pin under consideration change layer.
  * @param net_id ID of the net which the moving pin belongs to
  * @param pin_old_loc Old location of the moving pin
  * @param pin_new_loc New location of the moving pin
@@ -245,18 +271,55 @@ static void update_layer_bb(ClusterNetId net_id,
  * @param bb_edge_new The new bb edge calculated by this function
  * @param bb_coord_new The new bb calculated by this function
  */
-static inline void update_bb_same_layer(ClusterNetId net_id,
-                                        const t_physical_tile_loc& pin_old_loc,
-                                        const t_physical_tile_loc& pin_new_loc,
-                                        const std::vector<t_2D_bb>& curr_bb_edge,
-                                        const std::vector<t_2D_bb>& curr_bb_coord,
-                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                        std::vector<t_2D_bb>& bb_edge_new,
-                                        std::vector<t_2D_bb>& bb_coord_new);
+static inline void update_bb_layer_changed(ClusterNetId net_id,
+                                           const t_physical_tile_loc& pin_old_loc,
+                                           const t_physical_tile_loc& pin_new_loc,
+                                           const std::vector<t_2D_bb>& curr_bb_edge,
+                                           const std::vector<t_2D_bb>& curr_bb_coord,
+                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                           std::vector<t_2D_bb>& bb_edge_new,
+                                           std::vector<t_2D_bb>& bb_coord_new);
+                                
+/**
+ * @brief Calculate the per-layer BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
+ * @details This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e. from 
+ * only the block location information). It updates the coordinate, number of pins on each edge information, and the 
+ * number of sinks on each layer. It should only be called when the bounding box information is not valid.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param coords Bounding box coordinates of the net. It is calculated in this function
+ * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function.
+ * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function.
+ */
+static void get_layer_bb_from_scratch(ClusterNetId net_id,
+                                      std::vector<t_2D_bb>& num_on_edges,
+                                      std::vector<t_2D_bb>& coords,
+                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
 /**
-* @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
-* the pin under consideration change layer.
+ * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer
+ * and return the sum of the costs
+ * @param net_id ID of the net which cost is requested
+ * @param bb Per-layer bounding box of the net
+ * @return Wirelength cost of the net
+ */
+static double get_net_layer_bb_wire_cost(ClusterNetId /* net_id */,
+                                 const std::vector<t_2D_bb>& bb,
+                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
+/**
+ * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer
+ * and return the sum of the lengths
+ * @param net_id ID of the net which wirelength estimate is requested
+ * @param bb Bounding box of the net
+ * @return Wirelength estimate of the net
+ */
+static double get_net_wirelength_from_layer_bb(ClusterNetId /* net_id */,
+                                                const std::vector<t_2D_bb>& bb,
+                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
+/**
+ * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+ * the pin under consideration is not changing layer.
  * @param net_id ID of the net which the moving pin belongs to
  * @param pin_old_loc Old location of the moving pin
  * @param pin_new_loc New location of the moving pin
@@ -266,25 +329,24 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
  * @param bb_edge_new The new bb edge calculated by this function
  * @param bb_coord_new The new bb calculated by this function
  */
-static inline void update_bb_layer_changed(ClusterNetId net_id,
-                                           const t_physical_tile_loc& pin_old_loc,
-                                           const t_physical_tile_loc& pin_new_loc,
-                                           const std::vector<t_2D_bb>& curr_bb_edge,
-                                           const std::vector<t_2D_bb>& curr_bb_coord,
-                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                           std::vector<t_2D_bb>& bb_edge_new,
-                                           std::vector<t_2D_bb>& bb_coord_new);
+static inline void update_bb_same_layer(ClusterNetId net_id,
+                                        const t_physical_tile_loc& pin_old_loc,
+                                        const t_physical_tile_loc& pin_new_loc,
+                                        const std::vector<t_2D_bb>& curr_bb_edge,
+                                        const std::vector<t_2D_bb>& curr_bb_coord,
+                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                        std::vector<t_2D_bb>& bb_edge_new,
+                                        std::vector<t_2D_bb>& bb_coord_new);
+
 /**
  * @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
- * @param net_id ID of the net which the moving pin belongs to
  * @param pin_old_loc Old location of the moving pin
  * @param pin_new_loc New location of the moving pin
  * @param curr_layer_pin_sink_count Updated number of sinks of the net on each layer
  * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
  * @param is_output_pin Is the moving pin of the type output
  */
-static void update_bb_pin_sink_count(ClusterNetId net_id,
-                                     const t_physical_tile_loc& pin_old_loc,
+static void update_bb_pin_sink_count(const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
                                      vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
@@ -292,7 +354,9 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
 
 /**
  * @brief Update the data structure for large nets that keep track of
- * the number of blocks on each edge of the bounding box.
+ * the number of blocks on each edge of the bounding box. If the moving block
+ * is the only block on one of the edges, the bounding box is calculated from scratch.
+ * Since this function is used for large nets, it updates the bounding box incrementally.
  * @param net_id ID of the net which the moving pin belongs to
  * @param bb_edge_new The new bb edge calculated by this function
  * @param bb_coord_new The new bb calculated by this function
@@ -329,84 +393,62 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
 
 /**
  * @brief Calculate the 3D BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
- * @param net_id
- * @param coords
- * @param num_on_edges
- * @param num_sink_pin_layer
+ * @details This routine finds the bounding box of each net from scratch (i.e. from only the block location information).  It updates both the       
+ * coordinate and number of pins on each edge information. It should only be called when the bounding box 
+ * information is not valid.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param coords Bounding box coordinates of the net. It is calculated in this function
+ * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function.
+ * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function.
  */
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
                                 vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
-/**
- * @brief Calculate the per-layer BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
- * @param net_id
- * @param coords
- * @param num_on_edges
- * @param num_sink_pin_layer
- */
-static void get_layer_bb_from_scratch(ClusterNetId net_id,
-                                      std::vector<t_2D_bb>& num_on_edges,
-                                      std::vector<t_2D_bb>& coords,
-                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
-
 /**
  * @brief Given the 3D BB, calculate the wire-length cost of the net
- * @param net_id
- * @param bb
- * @return
+ * @param net_id ID of the net which cost is requested
+ * @param bb Bounding box of the net
+ * @return Wirelength cost of the net
  */
 static double get_net_cost(ClusterNetId net_id, const t_bb& bb);
 
-/**
- * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer
- * and return the sum of the costs
- * @param net_id
- * @param bb
- * @return
- */
-static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_bb>& bb,
-                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
 
 /**
  * @brief Given the 3D BB, calculate the wire-length estimate of the net
- * @param net_id
- * @param bb
- * @return
+ * @param net_id ID of the net which wirelength estimate is requested
+ * @param bb Bounding box of the net
+ * @return Wirelength estimate of the net
  */
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb);
 
-/**
- * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer
- * and return the sum of the lengths
- * @param net_id
- * @param bb
- * @return
- */
-static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_bb>& bb,
-                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
 
 /**
- * @brief To mitigate round-off errors, every once in a while, the costs of nets are being added from scrath.
+ * @brief To mitigate round-off errors, every once in a while, the costs of nets are summed up from scratch.
  * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again.
- * @return
+ * @return Total bb (wirelength) cost for the placement
  */
 static double recompute_bb_cost();
 
 /**
- * @brief To get the wirelength cost/est, BB perimiter is multiplied by a factor. This function returns that factor which is a function of net's fan-out.
- * @return double
+ * @brief To get the wirelength cost/est, BB perimeter is multiplied by a factor to approximately correct for the half-perimeter 
+ * bounding box wirelength's underestimate of wiring for nets with fanout greater than 2.
+ * @return Multiplicative wirelength correction factor
  */
 static double wirelength_crossing_count(size_t fanout);
 
 /**
  * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks indicated in the blocks_affected data structure.
+ * @param num_affected_nets Number of valid elements in ts_bb_coord_new 
+ * @param bb_delta_c Cost difference after and before moving the block
  */
 static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c);
 
+/******************************* End of Function definitions ************************************/
+
 //Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
 static bool driven_by_moved_block(const ClusterNetId net,
                                   const int num_blocks,
@@ -426,12 +468,6 @@ static bool driven_by_moved_block(const ClusterNetId net,
     return is_driven_by_move_blk;
 }
 
-/**
- * @brief Update the net bounding boxes.
- *
- * Do not update the net cost here since it should only
- * be updated once per net, not once per pin.
- */
 static void update_net_bb(const ClusterNetId& net,
                           const ClusterBlockId& blk,
                           const ClusterPinId& blk_pin,
@@ -509,35 +545,6 @@ static void update_net_layer_bb(const ClusterNetId& net,
     }
 }
 
-/**
- * @brief Calculate the new connection delay and timing cost of all the
- *        sink pins affected by moving a specific pin to a new location.
- *        Also calculates the total change in the timing cost.
- *
- * Assumes that the blocks have been moved to the proposed new locations.
- * Otherwise, the routine comp_td_single_connection_delay() will not be
- * able to calculate the most up to date connection delay estimation value.
- *
- * If the moved pin is a driver pin, then all the sink connections that are
- * driven by this driver pin are considered.
- *
- * If the moved pin is a sink pin, then it is the only pin considered. But
- * in some cases, the sink is already accounted for if it is also driven
- * by a driver pin located on a moved block. Computing it again would double
- * count its affect on the total timing cost change (delta_timing_cost).
- *
- * It is possible for some connections to have unchanged delays. For instance,
- * if we are using a dx/dy delay model, this could occur if a sink pin moved
- * to a new position with the same dx/dy from its net's driver pin.
- *
- * We skip these connections with unchanged delay values as their delay need
- * not be updated. Their timing costs also do not require any update, since
- * the criticalities values are always kept stale/unchanged during an block
- * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
- *
- * This is also done to minimize the number of timing node/edge invalidations
- * for incremental static timing analysis (incremental STA).
- */
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
@@ -545,6 +552,42 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   std::vector<ClusterPinId>& affected_pins,
                                   double& delta_timing_cost,
                                   bool is_src_moving) {
+    
+/**
+/**
+ * @brief Calculate the new connection delay and timing cost of all the
+ *        sink pins affected by moving a specific pin to a new location.
+ *        Also calculates the total change in the timing cost.
+ *
+    /**
+ * @brief Calculate the new connection delay and timing cost of all the
+ *        sink pins affected by moving a specific pin to a new location.
+ *        Also calculates the total change in the timing cost.
+ *
+     * Assumes that the blocks have been moved to the proposed new locations.
+     * Otherwise, the routine comp_td_single_connection_delay() will not be
+     * able to calculate the most up to date connection delay estimation value.
+     *
+     * If the moved pin is a driver pin, then all the sink connections that are
+     * driven by this driver pin are considered.
+     *
+     * If the moved pin is a sink pin, then it is the only pin considered. But
+     * in some cases, the sink is already accounted for if it is also driven
+     * by a driver pin located on a moved block. Computing it again would double
+     * count its affect on the total timing cost change (delta_timing_cost).
+     *
+     * It is possible for some connections to have unchanged delays. For instance,
+     * if we are using a dx/dy delay model, this could occur if a sink pin moved
+     * to a new position with the same dx/dy from its net's driver pin.
+     *
+     * We skip these connections with unchanged delay values as their delay need
+     * not be updated. Their timing costs also do not require any update, since
+     * the criticalities values are always kept stale/unchanged during an block
+     * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
+     *
+     * This is also done to minimize the number of timing node/edge invalidations
+     * for incremental static timing analysis (incremental STA).
+     */
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     const auto& connection_delay = g_placer_ctx.timing().connection_delay;
@@ -663,13 +706,6 @@ static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm
     }
 }
 
-/* Finds the bounding box of a net and stores its coordinates in the  *
- * bb_coord_new data structure.  This routine should only be called   *
- * for small nets, since it does not determine enough information for *
- * the bounding box to be updated incrementally later.                *
- * Currently assumes channels on both sides of the CLBs forming the   *
- * edges of the bounding box can be used.  Essentially, I am assuming *
- * the pins always lie on the outside of the bounding box.            */
 static void get_non_updatable_bb(ClusterNetId net_id,
                                 t_bb& bb_coord_new,
                                 vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
@@ -823,18 +859,6 @@ static void update_bb(ClusterNetId net_id,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin) {
-    /* Updates the bounding box of a net by storing its coordinates in    *
-     * the bb_coord_new data structure and the number of blocks on each   *
-     * edge in the bb_edge_new data structure.  This routine should only  *
-     * be called for large nets, since it has some overhead relative to   *
-     * just doing a brute force bounding box calculation.  The bounding   *
-     * box coordinate and edge information for inet must be valid before  *
-     * this routine is called.                                            *
-     * Currently assumes channels on both sides of the CLBs forming the   *
-     * edges of the bounding box can be used.  Essentially, I am assuming *
-     * the pins always lie on the outside of the bounding box.            *
-     * The x and y coordinates are the pin's x and y coordinates.         */
-    /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
     const t_bb *curr_bb_edge, *curr_bb_coord;
 
@@ -1085,14 +1109,14 @@ static void update_bb(ClusterNetId net_id,
             }
 
 
-        } else {
+        } else {//pin_new_loc.layer_num == pin_old_loc.layer_num
             bb_coord_new.layer_min = curr_bb_coord->layer_min;
             bb_coord_new.layer_max = curr_bb_coord->layer_max;
             bb_edge_new.layer_min = curr_bb_edge->layer_min;
             bb_edge_new.layer_max = curr_bb_edge->layer_max;
         }
 
-    } else {
+    } else {// num_layers == 1
         bb_coord_new.layer_min = curr_bb_coord->layer_min;
         bb_coord_new.layer_max = curr_bb_coord->layer_max;
         bb_edge_new.layer_min = curr_bb_edge->layer_min;
@@ -1111,20 +1135,6 @@ static void update_layer_bb(ClusterNetId net_id,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin) {
-    /* Updates the bounding box of a net by storing its coordinates in    *
-     * the bb_coord_new data structure and the number of blocks on each   *
-     * edge in the bb_edge_new data structure.  This routine should only  *
-     * be called for large nets, since it has some overhead relative to   *
-     * just doing a brute force bounding box calculation.  The bounding   *
-     * box coordinate and edge information for inet must be valid before  *
-     * this routine is called.                                            *
-     * Currently assumes channels on both sides of the CLBs forming the   *
-     * edges of the bounding box can be used.  Essentially, I am assuming *
-     * the pins always lie on the outside of the bounding box.            *
-     * The x and y coordinates are the pin's x and y coordinates.         */
-    /* IO blocks are considered to be one cell in for simplicity.         */
-    //TODO: account for multiple physical pin instances per logical pin
-    const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -1142,6 +1152,7 @@ static void update_layer_bb(ClusterNetId net_id,
 
     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
 
+    const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
     if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id];
@@ -1155,8 +1166,7 @@ static void update_layer_bb(ClusterNetId net_id,
 
     /* Check if I can update the bounding box incrementally. */
 
-    update_bb_pin_sink_count(net_id,
-                             pin_old_loc,
+    update_bb_pin_sink_count(pin_old_loc,
                              pin_new_loc,
                              curr_layer_pin_sink_count,
                              bb_pin_sink_count_new,
@@ -1322,6 +1332,11 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
     int new_layer_num = pin_new_loc.layer_num;
     VTR_ASSERT_SAFE(old_layer_num != new_layer_num);
 
+    /*
+    This funcitn is called when BB per layer is used and when the moving block is moving from one layer to another.
+    Thus, we need to update bounding box on both "from" and "to" layer. Here, we update the bounding box on "from" or
+    "old_layer". Then, "add_block_to_bb" is called to update the bounding box on the new layer.
+    */
     if (x_old == curr_bb_coord[old_layer_num].xmax) {
         update_bb_edge(net_id,
                        bb_edge_new,
@@ -1381,13 +1396,12 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                     bb_coord_new[new_layer_num]);
 }
 
-static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
-                                     const t_physical_tile_loc& pin_old_loc,
+static void update_bb_pin_sink_count(const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
                                      vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                      bool is_output_pin) {
-    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
+    VTR_ASSERT_SAFE(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin);
     for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
         bb_pin_sink_count_new[layer_num] = curr_layer_pin_sink_count[layer_num];
     }
@@ -1426,6 +1440,12 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
     int x_new = new_pin_loc.x;
     int y_new = new_pin_loc.y;
 
+    /* 
+    This function is called to only update the bounding box on the new layer from a block
+    moving to this layer from another layer. Thus, we only need to assess the effect of this
+    new block on the edges.
+    */
+
     if (x_new > bb_coord_old.xmax) {
         bb_edge_new.xmax = 1;
         bb_coord_new.xmax = x_new;
@@ -1455,10 +1475,6 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
     }
 }
 
-/* This routine finds the bounding box of each net from scratch (i.e.   *
- * from only the block location information).  It updates both the       *
- * coordinate and number of pins on each edge information.  It           *
- * should only be called when the bounding box information is not valid. */
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
@@ -1473,7 +1489,7 @@ static void get_bb_from_scratch(ClusterNetId net_id,
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
     pnum = net_pin_to_tile_pin_index(net_id, 0);
-    VTR_ASSERT(pnum >= 0);
+    VTR_ASSERT_SAFE(pnum >= 0);
     x = place_ctx.block_locs[bnum].loc.x
         + physical_tile_type(bnum)->pin_width_offset[pnum];
     y = place_ctx.block_locs[bnum].loc.y
@@ -1584,10 +1600,6 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     num_on_edges.layer_max = layer_max_edge;
 }
 
-/* This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e.   *
- * from only the block location information).  It updates the       *
- * coordinate, number of pins on each edge information, and the number of sinks on each layer.  It*
- * should only be called when the bounding box information is not valid. */
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
@@ -1611,7 +1623,7 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
     int pnum_src = net_pin_to_tile_pin_index(net_id, 0);
-    VTR_ASSERT(pnum_src >= 0);
+    VTR_ASSERT_SAFE(pnum_src >= 0);
     int x_src = place_ctx.block_locs[bnum].loc.x
                 + physical_tile_type(bnum)->pin_width_offset[pnum_src];
     int y_src = place_ctx.block_locs[bnum].loc.y
@@ -1620,6 +1632,9 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
     x_src = max(min<int>(x_src, grid.width() - 2), 1);
     y_src = max(min<int>(y_src, grid.height() - 2), 1);
 
+    // TODO: Currently we are assuming that crossing can only happen from OPIN. Because of that,
+    // when per-layer bounding box is used, we want the bounding box on each layer to also include
+    // the location of source since the connection on each layer starts from that location.
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         xmin[layer_num] = x_src;
         ymin[layer_num] = y_src;
@@ -1635,7 +1650,7 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
         int pnum = tile_pin_index(pin_id);
         int layer = place_ctx.block_locs[bnum].loc.layer;
-        VTR_ASSERT(layer >= 0 && layer < num_layers);
+        VTR_ASSERT_SAFE(layer >= 0 && layer < num_layers);
         num_sink_pin_layer[layer]++;
         int x = place_ctx.block_locs[bnum].loc.x
                 + physical_tile_type(bnum)->pin_width_offset[pnum];
@@ -1723,7 +1738,7 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bb) {
     return (ncost);
 }
 
-static double get_net_layer_cost(ClusterNetId /* net_id */,
+static double get_net_layer_bb_wire_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bb,
                                  const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
@@ -1738,6 +1753,11 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
         if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
+        /* 
+        adjust the bounding box half perimeter by the wirelength correction 
+        factor based on terminal count, which is 1 for the source + the number 
+        of sinks on this layer. 
+        */
         crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
@@ -1758,9 +1778,6 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
 }
 
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) {
-    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
-     * its coordinate bounding box.                                         */
-
     double ncost, crossing;
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
@@ -1781,7 +1798,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) {
     return (ncost);
 }
 
-static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
+static double get_net_wirelength_from_layer_bb(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bb,
                                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
@@ -1792,7 +1809,7 @@ static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
+        VTR_ASSERT_SAFE (layer_pin_sink_count[layer_num] != OPEN);
         if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
@@ -1814,10 +1831,6 @@ static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
 }
 
 static double recompute_bb_cost() {
-    /* Recomputes the cost to eliminate roundoff that may have accrued.  *
-     * This routine does as little work as possible to compute this new  *
-     * cost.                                                             */
-
     double cost = 0;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -1836,8 +1849,8 @@ static double wirelength_crossing_count(size_t fanout) {
     /* Get the expected "crossing count" of a net, based on its number *
      * of pins.  Extrapolate for very large nets.                      */
 
-    if (fanout > 50) {
-        return 2.7933 + 0.02616 * (fanout - 50);
+    if (fanout > MAX_FANOUT_CROSSING_COUNT) {
+        return 2.7933 + 0.02616 * (fanout - MAX_FANOUT_CROSSING_COUNT);
     } else {
         return cross_count[fanout - 1];
     }
@@ -1853,7 +1866,7 @@ static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c) {
             proposed_net_cost[net_id] = get_net_cost(net_id,
                                                      ts_bb_coord_new[net_id]);
         } else {
-            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
+            proposed_net_cost[net_id] = get_net_layer_bb_wire_cost(net_id,
                                                            layer_ts_bb_coord_new[net_id],
                                                            ts_layer_sink_pin_count[size_t(net_id)]);
         }
@@ -1910,14 +1923,6 @@ int find_affected_nets_and_update_costs(
     return num_affected_nets;
 }
 
-/* Finds the cost from scratch.  Done only when the placement   *
- * has been radically changed (i.e. after initial placement).   *
- * Otherwise find the cost change incrementally.  If method     *
- * check is NORMAL, we find bounding boxes that are updateable  *
- * for the larger nets.  If method is CHECK, all bounding boxes *
- * are found via the non_updateable_bb routine, to provide a    *
- * cost which can be used to check the correctness of the       *
- * other routine.                                               */
 double comp_bb_cost(e_cost_methods method) {
     double cost = 0;
     double expected_wirelength = 0.0;
@@ -1977,12 +1982,12 @@ double comp_layer_bb_cost(e_cost_methods method) {
                                            place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             }
 
-            net_cost[net_id] = get_net_layer_cost(net_id,
+            net_cost[net_id] = get_net_layer_bb_wire_cost(net_id,
                                                   place_move_ctx.layer_bb_coords[net_id],
                                                   place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             cost += net_cost[net_id];
             if (method == CHECK)
-                expected_wirelength += get_net_layer_wirelength_estimate(net_id,
+                expected_wirelength += get_net_wirelength_from_layer_bb(net_id,
                                                                          place_move_ctx.layer_bb_coords[net_id],
                                                                          place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
         }
@@ -2026,7 +2031,7 @@ void update_move_nets(int num_nets_affected,
 
         net_cost[net_id] = proposed_net_cost[net_id];
 
-        /* negative proposed_net_cost value is acting as a flag. */
+        /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */
         proposed_net_cost[net_id] = -1;
         bb_updated_before[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
@@ -2121,17 +2126,11 @@ void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp) {
 
     auto& device_ctx = g_vpr_ctx.device();
 
-    /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
-     * subhigh must be greater than or equal to sublow, we only need to       *
-     * allocate storage for the lower half of a matrix.                       */
-
-    //chanx_place_cost_fac = new float*[(device_ctx.grid.height())];
-    //for (size_t i = 0; i < device_ctx.grid.height(); i++)
-    //    chanx_place_cost_fac[i] = new float[(i + 1)];
-
-    //chany_place_cost_fac = new float*[(device_ctx.grid.width() + 1)];
-    //for (size_t i = 0; i < device_ctx.grid.width(); i++)
-    //    chany_place_cost_fac[i] = new float[(i + 1)];
+    /* 
+    Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since subhigh must be greater than or 
+    equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway 
+    for simplicity so we can use the vtr utility matrix functions.
+    */
 
     chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
     chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
@@ -2244,6 +2243,7 @@ void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb) {
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
 
+    /*This initialize the whole matrix to OPEN which is an invalid value*/
     ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)}, OPEN);
 
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());

From fbc9452df9c0730d9a38624369d413af1e26c143 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 5 Jul 2024 09:39:18 -0400
Subject: [PATCH 185/188] [vpr][place] apply Vaughn's comments on place.cpp

---
 vpr/src/place/net_cost_handler.cpp |  4 ++--
 vpr/src/place/net_cost_handler.h   |  7 +++++--
 vpr/src/place/place.cpp            | 10 ++--------
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 6029e39e93e..f78ea1266ff 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -2216,7 +2216,7 @@ void free_chan_w_factors_for_place_cost () {
     chany_place_cost_fac.clear();
 }
 
-void init_net_cost_structs(size_t num_nets) {
+void init_place_move_structs(size_t num_nets) {
     net_cost.resize(num_nets, -1.);
     proposed_net_cost.resize(num_nets, -1.);
     /* Used to store costs for moves not yet made and to indicate when a net's   *
@@ -2225,7 +2225,7 @@ void init_net_cost_structs(size_t num_nets) {
     bb_updated_before.resize(num_nets, NetUpdateState::NOT_UPDATED_YET);
 }
 
-void free_net_cost_structs() {
+void free_place_move_structs() {
     vtr::release_memory(net_cost);
     vtr::release_memory(proposed_net_cost);
     vtr::release_memory(bb_updated_before);
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 7c84a881566..57c64cadca5 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -6,6 +6,9 @@
 
 /**
  * @brief The method used to calculate palcement cost
+ * @details For comp_cost.  NORMAL means use the method that generates updateable bounding boxes for speed.  
+ * CHECK means compute all bounding boxes from scratch using a very simple routine to allow checks 
+ * of the other costs.
  * NORMAL: Compute cost efficiently using incremental techniques.
  * CHECK: Brute-force cost computation; useful to validate the more complex incremental cost update code.
  */
@@ -125,12 +128,12 @@ void free_chan_w_factors_for_place_cost ();
  * @brief Resize net_cost, proposed_net_cost, and  bb_updated_before data structures to accommodate all nets.
  * @param num_nets Number of nets in the netlist (clustered currently) that the placement engine uses.
  */
-void init_net_cost_structs(size_t num_nets);
+void init_place_move_structs(size_t num_nets);
 
 /**
  * @brief Free net_cost, proposed_net_cost, and  bb_updated_before data structures.
  */
-void free_net_cost_structs();
+void free_place_move_structs();
 
 /**
  * @brief Resize temporary storage data structures needed to determine which nets are affected by a move and data needed per net 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 536a6a6c477..4e7f448c34b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -98,12 +98,6 @@ static constexpr double ERROR_TOL = .01;
  * variables round-offs check.                                            */
 static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
 
-/* For comp_cost.  NORMAL means use the method that generates updateable  *
- * bounding boxes for speed.  CHECK means compute all bounding boxes from *
- * scratch using a very simple routine to allow checks of the other       *
- * costs.                                   
- */
-
 constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
 constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
@@ -1905,7 +1899,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         }
     }
 
-    init_net_cost_structs(num_nets);
+    init_place_move_structs(num_nets);
 
     if (cube_bb) {
         place_move_ctx.bb_coords.resize(num_nets, t_bb());
@@ -1951,7 +1945,7 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
 
     free_placement_macros_structs();
 
-    free_net_cost_structs();
+    free_place_move_structs();
 
     vtr::release_memory(place_move_ctx.bb_coords);
     vtr::release_memory(place_move_ctx.bb_num_on_edges);

From c9afd34b8bb3b3de2ec415ef98d75a918aa2f3e0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 5 Jul 2024 09:47:26 -0400
Subject: [PATCH 186/188] [vpr][place] fix typos in comments

---
 vpr/src/place/net_cost_handler.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index f78ea1266ff..4a7453fd540 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -240,10 +240,13 @@ static void get_non_updatable_layer_bb(ClusterNetId net_id,
 
 /**
  * @brief Update the per-layer bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
- * @details /* Updates the bounding box of a net by storing its coordinates in the bb_coord_new data structure and 
+ * @details Updates the bounding box of a net by storing its coordinates in the bb_coord_new data structure and 
  * the number of blocks on each edge in the bb_edge_new data structure. This routine should only  be called for 
  * large nets, since it has some overhead relative to just doing a brute force bounding box calculation. 
- * The bounding box coordinate and edge information for inet must be valid before  this routine is called.                                            Currently assumes channels on both sides of the CLBs forming the   edges of the bounding box can be used.  Essentially, I am assuming the pins always lie on the outside of the bounding box.            The x and y coordinates are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity.
+ * The bounding box coordinate and edge information for inet must be valid before  this routine is called. 
+ * Currently assumes channels on both sides of the CLBs forming the   edges of the bounding box can be used.  
+ * Essentially, I am assuming the pins always lie on the outside of the bounding box. The x and y coordinates 
+ * are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity.
  * @param bb_edge_new Number of blocks on the edges of the bounding box
  * @param bb_coord_new Coordinates of the bounding box
  * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
@@ -441,7 +444,8 @@ static double recompute_bb_cost();
 static double wirelength_crossing_count(size_t fanout);
 
 /**
- * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks indicated in the blocks_affected data structure.
+ * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks 
+ * indicated in the blocks_affected data structure.
  * @param num_affected_nets Number of valid elements in ts_bb_coord_new 
  * @param bb_delta_c Cost difference after and before moving the block
  */
@@ -880,7 +884,8 @@ static void update_bb(ClusterNetId net_id,
         return;
     }
 
-    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
+    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? 
+    place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
 
     if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
@@ -1150,7 +1155,8 @@ static void update_layer_bb(ClusterNetId net_id,
         return;
     }
 
-    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
+    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? 
+    place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
 
     const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
     if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {

From 3f1f8e236eeb0b4a5885fa87a3ab229254477b18 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 12 Jul 2024 07:42:54 -0400
Subject: [PATCH 187/188] fix comment format issue

---
 vpr/src/place/net_cost_handler.cpp | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 4a7453fd540..888f2bea130 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -556,18 +556,8 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   std::vector<ClusterPinId>& affected_pins,
                                   double& delta_timing_cost,
                                   bool is_src_moving) {
-    
-/**
-/**
- * @brief Calculate the new connection delay and timing cost of all the
- *        sink pins affected by moving a specific pin to a new location.
- *        Also calculates the total change in the timing cost.
- *
+
     /**
- * @brief Calculate the new connection delay and timing cost of all the
- *        sink pins affected by moving a specific pin to a new location.
- *        Also calculates the total change in the timing cost.
- *
      * Assumes that the blocks have been moved to the proposed new locations.
      * Otherwise, the routine comp_td_single_connection_delay() will not be
      * able to calculate the most up to date connection delay estimation value.

From fd9fce8064404c1abd16c3f382d0cd9915b1d97c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 12 Jul 2024 08:34:25 -0400
Subject: [PATCH 188/188] [vpr][place] put comments on net_cost_handler file

---
 vpr/src/place/net_cost_handler.cpp | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 888f2bea130..ad0f643ceb4 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1,3 +1,28 @@
+/**
+ * @file net_cost_handler.cpp
+ * @brief This file contains the implementation of functions used to update placement cost when a new move is proposed/committed.
+ * 
+ * VPR placement cost consists of three terms which represent wirelength, timing, and NoC cost. 
+ * 
+ * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) approach is used. In this approach, 
+ * half of the perimeter of the bounding box which contains all terminals of the net is multiplied by a correction factor, 
+ * and the resulting number is considered as an estimation of the bounding box. 
+ * 
+ * Currently, we have two types of bounding boxes: 3D bounding box (or Cube BB) and per-layer bounding box. 
+ * If the FPGA grid is a 2D structure, a Cube bounding box is used, which will always have the z direction equal to 1. For 3D architectures, 
+ * the user can specify the type of bounding box. If no type is specified, the RR graph is analyzed. If all inter-die connections happen from OPINs, 
+ * the Cube bounding box is chosen; otherwise, the per-layer bounding box is chosen. In the Cube bounding box, when a net is stretched across multiple layers, 
+ * the edges of the bounding box are determined by all of the blocks on all layers. 
+ * When the per-layer bounding box is used, a separate bounding box for each layer is created, and the wirelength estimation for each layer is calculated. 
+ * To get the total wirelength of a net, the wirelength estimation on all layers is summed up. For more details, please refer to Amin Mohaghegh's MASc thesis. 
+ * 
+ * For timing estimation, the placement delay model is used. For 2D architectures, you can think of the placement delay model as a 2D array indexed by dx and dy. 
+ * To get a delay estimation of a connection (from a source to a sink), first, dx and dy between these two points should be calculated, 
+ * and these two numbers are the indices to access this 2D array. By default, the placement delay model is created by iterating over the router lookahead 
+ * to get the minimum cost for each dx and dy.
+ * 
+ * @date July 12, 2024
+ */
 #include "net_cost_handler.h"
 #include "globals.h"
 #include "placer_globals.h"