Merge pull request #2944 from AlexandreSinger/feature-ap-solver

amin1377 · web-flow · commit b3d969402093 · 2025-03-21T08:51:37.000-04:00
[AP][Solver] Supporting Unfixed Blocks
diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp
@@ -12,6 +12,8 @@
 #include <memory>
 #include <utility>
 #include <vector>
+#include "device_grid.h"
+#include "flat_placement_types.h"
 #include "partial_placement.h"
 #include "ap_netlist.h"
 #include "vpr_error.h"
@@ -36,14 +38,16 @@
 #endif // EIGEN_INSTALLED
 
 std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_analytical_solver solver_type,
-                                                         const APNetlist& netlist) {
+                                                         const APNetlist& netlist,
+                                                         const DeviceGrid& device_grid) {
     // Based on the solver type passed in, build the solver.
     switch (solver_type) {
         case e_analytical_solver::QP_HYBRID:
 #ifdef EIGEN_INSTALLED
-            return std::make_unique<QPHybridSolver>(netlist);
+            return std::make_unique<QPHybridSolver>(netlist, device_grid);
 #else
             (void)netlist;
+            (void)device_grid;
             VPR_FATAL_ERROR(VPR_ERROR_AP,
                             "QP Hybrid Solver requires the Eigen library");
             break;
@@ -64,8 +68,11 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist)
     // row ID from [0, num_moveable_blocks) for each moveable block in the
     // netlist.
     num_moveable_blocks_ = 0;
+    num_fixed_blocks_ = 0;
     size_t current_row_id = 0;
     for (APBlockId blk_id : netlist.blocks()) {
+        if (netlist.block_mobility(blk_id) == APBlockMobility::FIXED)
+            num_fixed_blocks_++;
         if (netlist.block_mobility(blk_id) != APBlockMobility::MOVEABLE)
             continue;
         APRowId new_row_id = APRowId(current_row_id);
@@ -155,10 +162,10 @@ void QPHybridSolver::init_linear_system() {
     }
 
     // Initialize the linear system with zeros.
-    size_t num_variables = num_moveable_blocks_ + num_star_nodes;
-    A_sparse = Eigen::SparseMatrix<double>(num_variables, num_variables);
-    b_x = Eigen::VectorXd::Zero(num_variables);
-    b_y = Eigen::VectorXd::Zero(num_variables);
+    num_variables_ = num_moveable_blocks_ + num_star_nodes;
+    A_sparse = Eigen::SparseMatrix<double>(num_variables_, num_variables_);
+    b_x = Eigen::VectorXd::Zero(num_variables_);
+    b_y = Eigen::VectorXd::Zero(num_variables_);
 
     // Create a list of triplets that will be used to create the sparse
     // coefficient matrix. This is the method recommended by Eigen to initialize
@@ -254,7 +261,54 @@ void QPHybridSolver::update_linear_system_with_anchors(
     }
 }
 
+void QPHybridSolver::init_guesses(const DeviceGrid& device_grid) {
+    // If the number of fixed blocks is zero, initialized the guesses to the
+    // center of the device.
+    if (num_fixed_blocks_ == 0) {
+        guess_x = Eigen::VectorXd::Constant(num_variables_, device_grid.width() / 2.0);
+        guess_y = Eigen::VectorXd::Constant(num_variables_, device_grid.height() / 2.0);
+        return;
+    }
+
+    // Compute the centroid of all fixed blocks in the netlist.
+    t_flat_pl_loc centroid({0.0f, 0.0f, 0.0f});
+    unsigned num_blks_summed = 0;
+    for (APBlockId blk_id : netlist_.blocks()) {
+        // We only get the centroid of fixed blocks since these are the only
+        // blocks with positions that we know.
+        if (netlist_.block_mobility(blk_id) != APBlockMobility::FIXED)
+            continue;
+        // Get the flat location of the fixed block.
+        APFixedBlockLoc fixed_blk_loc = netlist_.block_loc(blk_id);
+        VTR_ASSERT_SAFE(fixed_blk_loc.x != APFixedBlockLoc::UNFIXED_DIM);
+        VTR_ASSERT_SAFE(fixed_blk_loc.y != APFixedBlockLoc::UNFIXED_DIM);
+        VTR_ASSERT_SAFE(fixed_blk_loc.layer_num != APFixedBlockLoc::UNFIXED_DIM);
+        t_flat_pl_loc flat_blk_loc;
+        flat_blk_loc.x = fixed_blk_loc.x;
+        flat_blk_loc.y = fixed_blk_loc.y;
+        flat_blk_loc.layer = fixed_blk_loc.layer_num;
+        // Accumulate into the centroid.
+        centroid += flat_blk_loc;
+        num_blks_summed++;
+    }
+    // Divide the sum by the number of fixed blocks.
+    VTR_ASSERT_SAFE(num_blks_summed == num_fixed_blocks_);
+    centroid /= static_cast<float>(num_blks_summed);
+
+    // Set the guesses to the centroid location.
+    guess_x = Eigen::VectorXd::Constant(num_variables_, centroid.x);
+    guess_y = Eigen::VectorXd::Constant(num_variables_, centroid.y);
+}
+
 void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
+    // In the first iteration, if the number of fixed blocks is 0, set the
+    // placement to be equal to the guess. The solver below will just set the
+    // solution to the zero vector if we do not set it to the guess directly.
+    if (iteration == 0 && num_fixed_blocks_ == 0) {
+        store_solution_into_placement(guess_x, guess_y, p_placement);
+        return;
+    }
+
     // Create a temporary linear system which will contain the original linear
     // system which may be updated to include the anchor points.
     Eigen::SparseMatrix<double> A_sparse_diff = Eigen::SparseMatrix<double>(A_sparse);
@@ -280,14 +334,24 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
     cg.compute(A_sparse_diff);
     VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at compute!");
     // Use the solver to solve for x and y using the constant vectors
-    // TODO: Use solve with guess to make this faster. Use the previous placement
-    //       as a guess.
-    Eigen::VectorXd x = cg.solve(b_x_diff);
+    Eigen::VectorXd x = cg.solveWithGuess(b_x_diff, guess_x);
     VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_x!");
-    Eigen::VectorXd y = cg.solve(b_y_diff);
+    Eigen::VectorXd y = cg.solveWithGuess(b_y_diff, guess_y);
     VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_y!");
 
     // Write the results back into the partial placement object.
+    store_solution_into_placement(x, y, p_placement);
+
+    // Update the guess. The guess for the next iteration is the solution in
+    // this iteration.
+    guess_x = x;
+    guess_y = y;
+}
+
+void QPHybridSolver::store_solution_into_placement(const Eigen::VectorXd& x_soln,
+                                                   const Eigen::VectorXd& y_soln,
+                                                   PartialPlacement& p_placement) {
+
     // NOTE: The first [0, num_moveable_blocks_) rows always represent the
     //       moveable APBlocks. The star nodes always come after and are ignored
     //       in the solution.
@@ -296,8 +360,23 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
         APBlockId blk_id = row_id_to_blk_id_[row_id];
         VTR_ASSERT_DEBUG(blk_id.is_valid());
         VTR_ASSERT_DEBUG(netlist_.block_mobility(blk_id) == APBlockMobility::MOVEABLE);
-        p_placement.block_x_locs[blk_id] = x[row_id_idx];
-        p_placement.block_y_locs[blk_id] = y[row_id_idx];
+        // Due to the iterative nature of CG, it is possible for the solver to
+        // overstep 0 and return a negative number by an incredibly small margin.
+        // Clamp the number to 0 in this case.
+        // TODO: Should investigate good bounds on this, the bounds below were
+        //       chosen since any difference higher than 1e-9 would concern me.
+        double x_pos = x_soln[row_id_idx];
+        if (x_pos < 0.0) {
+            VTR_ASSERT_SAFE(std::abs(x_pos) < negative_soln_tolerance_);
+            x_pos = 0.0;
+        }
+        double y_pos = y_soln[row_id_idx];
+        if (y_pos < 0.0) {
+            VTR_ASSERT_SAFE(std::abs(y_pos) < negative_soln_tolerance_);
+            y_pos = 0.0;
+        }
+        p_placement.block_x_locs[blk_id] = x_pos;
+        p_placement.block_y_locs[blk_id] = y_pos;
     }
 }
 
diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h
@@ -9,7 +9,8 @@
 #pragma once
 
 #include <memory>
-#include "ap_netlist_fwd.h"
+#include "ap_netlist.h"
+#include "device_grid.h"
 #include "vtr_strong_id.h"
 #include "vtr_vector.h"
 
@@ -98,6 +99,9 @@ class AnalyticalSolver {
     ///        when allocating matrices.
     size_t num_moveable_blocks_ = 0;
 
+    /// @brief The number of fixed blocks in the netlist.
+    size_t num_fixed_blocks_ = 0;
+
     /// @brief A lookup between a moveable APBlock and its linear ID from
     ///        [0, num_moveable_blocks). Fixed blocks will return an invalid row
     ///        ID. This is useful when knowing which row in the matrix
@@ -114,7 +118,8 @@ class AnalyticalSolver {
  * @brief A factory method which creates an Analytical Solver of the given type.
  */
 std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_analytical_solver solver_type,
-                                                         const APNetlist& netlist);
+                                                         const APNetlist& netlist,
+                                                         const DeviceGrid& device_grid);
 
 // The Eigen library is used to solve matrix equations in the following solvers.
 // The solver cannot be built if Eigen is not installed.
@@ -170,6 +175,14 @@ class QPHybridSolver : public AnalyticalSolver {
     ///        weights to grow slower.
     static constexpr double anchor_weight_exp_fac_ = 5.0;
 
+    /// @brief Due to the iterative nature of Conjugate Gradient method, the
+    ///        solver may overstep 0 to give a slightly negative solution. This
+    ///        is ok, and we can just clamp the position to 0. However, negative
+    ///        values that are too large may be indicative of an issue in the
+    ///        formulation. This value is how negative we tolerate the positions
+    ///        to be.
+    static constexpr double negative_soln_tolerance_ = 1e-9;
+
     /**
      * @brief Initializes the linear system of Ax = b_x and Ay = b_y based on
      *        the APNetlist and the fixed APBlock locations.
@@ -180,6 +193,14 @@ class QPHybridSolver : public AnalyticalSolver {
      */
     void init_linear_system();
 
+    /**
+     * @brief Intializes the guesses which will be used in the solver.
+     *
+     * The guesses will be used as starting points for the CG solver. The better
+     * these guesses are, the faster the solver will converge.
+     */
+    void init_guesses(const DeviceGrid& device_grid);
+
     /**
      * @brief Helper method to update the linear system with anchors to the
      *        current partial placement.
@@ -209,6 +230,14 @@ class QPHybridSolver : public AnalyticalSolver {
                                            PartialPlacement& p_placement,
                                            unsigned iteration);
 
+    /**
+     * @brief Store the x and y solutions in Eigen's vectors into the partial
+     *        placement object.
+     */
+    void store_solution_into_placement(const Eigen::VectorXd& x_soln,
+                                       const Eigen::VectorXd& y_soln,
+                                       PartialPlacement& p_placement);
+
     // The following variables represent the linear system without any anchor
     // points. These are filled in the constructor and never modified.
     // When the anchor-points are taken into consideration, the diagonal of the
@@ -224,19 +253,31 @@ class QPHybridSolver : public AnalyticalSolver {
     Eigen::VectorXd b_x;
     /// @brief The constant vector in the y dimension for the linear system.
     Eigen::VectorXd b_y;
+    /// @brief The number of variables in the solver. This is the sum of the
+    ///        number of moveable blocks in the netlist and the number of star
+    ///        nodes that exist.
+    size_t num_variables_ = 0;
+
+    /// @brief The current guess for the x positions of the blocks.
+    Eigen::VectorXd guess_x;
+    /// @brief The current guess for the y positions of the blocks.
+    Eigen::VectorXd guess_y;
 
   public:
     /**
      * @brief Constructor of the QPHybridSolver
      *
      * Initializes internal data and constructs the initial linear system.
      */
-    QPHybridSolver(const APNetlist& netlist)
+    QPHybridSolver(const APNetlist& netlist, const DeviceGrid& device_grid)
         : AnalyticalSolver(netlist) {
         // Initializing the linear system only depends on the netlist and fixed
         // block locations. Both are provided by the netlist, allowing this to
         // be initialized in the constructor.
         init_linear_system();
+
+        // Initialize the guesses for the first iteration.
+        init_guesses(device_grid);
     }
 
     /**
diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp
@@ -77,7 +77,8 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type,
     // Build the solver.
     VTR_LOGV(log_verbosity_ >= 10, "\tBuilding the solver...\n");
     solver_ = make_analytical_solver(e_analytical_solver::QP_HYBRID,
-                                     ap_netlist_);
+                                     ap_netlist_,
+                                     device_grid);
 
     // Build the density manager used by the partial legalizer.
     VTR_LOGV(log_verbosity_ >= 10, "\tBuilding the density manager...\n");
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/config.txt
@@ -0,0 +1,51 @@
+###############################################################################
+# Configuration file for running the MCNC benchmarks through the AP flow.
+#
+# The AP flow requires that each circuit contains fixed blocks and is fixed
+# to a specific device size. The device sizes here were chosen to match the
+# device sizes of the default VTR flow.
+###############################################################################
+
+# Path to directory of circuits to use
+circuits_dir=benchmarks/verilog
+
+# Path to directory of architectures to use
+archs_dir=arch/timing
+
+# Add architectures to list to sweep
+arch_list_add=k6_frac_N10_frac_chain_mem32K_40nm.xml
+
+# Add circuits to list to sweep
+circuit_list_add=boundtop.v
+circuit_list_add=ch_intrinsics.v
+circuit_list_add=or1200.v
+circuit_list_add=spree.v
+circuit_list_add=stereovision3.v
+
+# Constrain the circuits to their devices
+circuit_constraint_list_add=(stereovision3.v,    device=vtr_extra_small)
+circuit_constraint_list_add=(ch_intrinsics.v,    device=vtr_extra_small)
+circuit_constraint_list_add=(spree.v,            device=vtr_extra_small)
+circuit_constraint_list_add=(boundtop.v,         device=vtr_extra_small)
+circuit_constraint_list_add=(or1200.v,           device=vtr_small)
+
+# Constrain the circuits to their channel widths
+#       1.3 * minW
+circuit_constraint_list_add=(stereovision3.v,    route_chan_width=44)
+circuit_constraint_list_add=(ch_intrinsics.v,    route_chan_width=52)
+circuit_constraint_list_add=(spree.v,            route_chan_width=78)
+circuit_constraint_list_add=(boundtop.v,         route_chan_width=50)
+circuit_constraint_list_add=(or1200.v,           route_chan_width=118)
+
+# Parse info and how to parse
+parse_file=vpr_fixed_chan_width.txt
+
+# How to parse QoR info
+qor_parse_file=qor_ap_fixed_chan_width.txt
+
+# Pass requirements
+pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt
+
+# Pass the script params while writing the vpr constraints.
+script_params=-track_memory_usage -crit_path_router_iterations 100 --analytical_place --route
+
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/golden_results.txt