Skip to content

Commit c9e6075

Browse files
authoredMar 20, 2025··
Merge pull request #2942 from AlexandreSinger/feature-ap-partial-legalizer
[AP][GlobalPlacement] Improved Partial Legalizer Legality
2 parents ccb2396 + ce50295 commit c9e6075

18 files changed

+1487
-275
lines changed
 

‎vpr/src/analytical_place/analytical_solver.cpp

+10-35
Original file line numberDiff line numberDiff line change
@@ -236,41 +236,17 @@ void QPHybridSolver::init_linear_system() {
236236
A_sparse.setFromTriplets(tripletList.begin(), tripletList.end());
237237
}
238238

239-
/**
240-
* @brief Helper method to update the linear system with anchors to the current
241-
* partial placement.
242-
*
243-
* For each moveable block (with row = i) in the netlist:
244-
* A[i][i] = A[i][i] + coeff_pseudo_anchor;
245-
* b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor;
246-
* Where coeff_pseudo_anchor grows with each iteration.
247-
*
248-
* This is basically a fast way of adding a connection between all moveable
249-
* blocks in the netlist and their target fixed placement location.
250-
*
251-
* See add_connection_to_system.
252-
*
253-
* @param A_sparse_diff The ceofficient matrix to update.
254-
* @param b_x_diff The x-dimension constant vector to update.
255-
* @param b_y_diff The y-dimension constant vector to update.
256-
* @param p_placement The location the moveable blocks should be anchored
257-
* to.
258-
* @param num_moveable_blocks The number of moveable blocks in the netlist.
259-
* @param row_id_to_blk_id Lookup for the row id from the APBlock Id.
260-
* @param iteration The current iteration of the Global Placer.
261-
*/
262-
static inline void update_linear_system_with_anchors(Eigen::SparseMatrix<double>& A_sparse_diff,
263-
Eigen::VectorXd& b_x_diff,
264-
Eigen::VectorXd& b_y_diff,
265-
PartialPlacement& p_placement,
266-
size_t num_moveable_blocks,
267-
vtr::vector<APRowId, APBlockId> row_id_to_blk_id,
268-
unsigned iteration) {
239+
void QPHybridSolver::update_linear_system_with_anchors(
240+
Eigen::SparseMatrix<double>& A_sparse_diff,
241+
Eigen::VectorXd& b_x_diff,
242+
Eigen::VectorXd& b_y_diff,
243+
PartialPlacement& p_placement,
244+
unsigned iteration) {
269245
// Anchor weights grow exponentially with iteration.
270-
double coeff_pseudo_anchor = 0.01 * std::exp((double)iteration / 5);
271-
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks; row_id_idx++) {
246+
double coeff_pseudo_anchor = anchor_weight_mult_ * std::exp((double)iteration / anchor_weight_exp_fac_);
247+
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
272248
APRowId row_id = APRowId(row_id_idx);
273-
APBlockId blk_id = row_id_to_blk_id[row_id];
249+
APBlockId blk_id = row_id_to_blk_id_[row_id];
274250
double pseudo_w = coeff_pseudo_anchor;
275251
A_sparse_diff.coeffRef(row_id_idx, row_id_idx) += pseudo_w;
276252
b_x_diff(row_id_idx) += pseudo_w * p_placement.block_x_locs[blk_id];
@@ -289,8 +265,7 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
289265
// anchor-points (fixed block positions).
290266
if (iteration != 0) {
291267
update_linear_system_with_anchors(A_sparse_diff, b_x_diff, b_y_diff,
292-
p_placement, num_moveable_blocks_,
293-
row_id_to_blk_id_, iteration);
268+
p_placement, iteration);
294269
}
295270
// Verify that the constant vectors are valid.
296271
VTR_ASSERT_DEBUG(!b_x_diff.hasNaN() && "b_x has NaN!");

‎vpr/src/analytical_place/analytical_solver.h

+44
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,21 @@ class QPHybridSolver : public AnalyticalSolver {
155155
/// sparse.
156156
static constexpr size_t star_num_pins_threshold = 3;
157157

158+
// The following constants are used to configure the anchor weighting.
159+
// The weights of anchors grow exponentially each iteration by the following
160+
// function:
161+
// anchor_w = anchor_weight_mult_ * e^(iter / anchor_weight_exp_fac_)
162+
// The numbers below were empircally found to work well.
163+
164+
/// @brief Multiplier for the anchorweight. The smaller this number is, the
165+
/// weaker the anchors will be at the start.
166+
static constexpr double anchor_weight_mult_ = 0.001;
167+
168+
/// @brief Factor for controlling the growth of the exponential term in the
169+
/// weight factor function. Larger numbers will cause the anchor
170+
/// weights to grow slower.
171+
static constexpr double anchor_weight_exp_fac_ = 5.0;
172+
158173
/**
159174
* @brief Initializes the linear system of Ax = b_x and Ay = b_y based on
160175
* the APNetlist and the fixed APBlock locations.
@@ -165,6 +180,35 @@ class QPHybridSolver : public AnalyticalSolver {
165180
*/
166181
void init_linear_system();
167182

183+
/**
184+
* @brief Helper method to update the linear system with anchors to the
185+
* current partial placement.
186+
*
187+
* For each moveable block (with row = i) in the netlist:
188+
* A[i][i] = A[i][i] + coeff_pseudo_anchor;
189+
* b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor;
190+
* Where coeff_pseudo_anchor grows with each iteration.
191+
*
192+
* This is basically a fast way of adding a connection between all moveable
193+
* blocks in the netlist and their target fixed placement location.
194+
*
195+
* See add_connection_to_system.
196+
*
197+
* @param A_sparse_diff The ceofficient matrix to update.
198+
* @param b_x_diff The x-dimension constant vector to update.
199+
* @param b_y_diff The y-dimension constant vector to update.
200+
* @param p_placement The location the moveable blocks should be
201+
* anchored to.
202+
* @param num_moveable_blocks The number of moveable blocks in the netlist.
203+
* @param row_id_to_blk_id Lookup for the row id from the APBlock Id.
204+
* @param iteration The current iteration of the Global Placer.
205+
*/
206+
void update_linear_system_with_anchors(Eigen::SparseMatrix<double>& A_sparse_diff,
207+
Eigen::VectorXd& b_x_diff,
208+
Eigen::VectorXd& b_y_diff,
209+
PartialPlacement& p_placement,
210+
unsigned iteration);
211+
168212
// The following variables represent the linear system without any anchor
169213
// points. These are filled in the constructor and never modified.
170214
// When the anchor-points are taken into consideration, the diagonal of the

‎vpr/src/analytical_place/flat_placement_bins.h

-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ class FlatPlacementBins {
111111
inline const vtr::Rect<double>& bin_region(FlatPlacementBinId bin_id) const {
112112
VTR_ASSERT(bin_id.is_valid());
113113
return bin_region_[bin_id];
114-
;
115114
}
116115

117116
/**

‎vpr/src/analytical_place/flat_placement_density_manager.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ FlatPlacementDensityManager::FlatPlacementDensityManager(const APNetlist& ap_net
8080
auto tile_type = device_grid.get_physical_type(tile_loc);
8181
int tw = tile_type->width;
8282
int th = tile_type->height;
83+
VTR_ASSERT_SAFE(tw != 0 && th != 0);
8384
vtr::Rect<double> new_bin_region(vtr::Point<double>(x, y),
8485
vtr::Point<double>(x + tw,
8586
y + th));
@@ -162,6 +163,10 @@ void FlatPlacementDensityManager::remove_block_from_bin(APBlockId blk_id,
162163
}
163164

164165
void FlatPlacementDensityManager::import_placement_into_bins(const PartialPlacement& p_placement) {
166+
// Empty the bins such that all blocks are no longer within the bins.
167+
empty_bins();
168+
169+
// Insert each block in the netlist into their bin based on their placement.
165170
// TODO: Maybe import the fixed block locations in the constructor and then
166171
// only import the moveable block locations.
167172
for (APBlockId blk_id : ap_netlist_.blocks()) {
@@ -215,9 +220,9 @@ void FlatPlacementDensityManager::empty_bins() {
215220
// Reset all of the bins and their utilizations.
216221
for (FlatPlacementBinId bin_id : bins_.bins()) {
217222
bins_.remove_all_blocks_from_bin(bin_id);
218-
bin_utilization_[bin_id] = PrimitiveVector();
219-
bin_overfill_[bin_id] = calc_bin_overfill(bin_utilization_[bin_id], bin_capacity_[bin_id]);
220-
bin_underfill_[bin_id] = calc_bin_underfill(bin_utilization_[bin_id], bin_capacity_[bin_id]);
223+
bin_utilization_[bin_id].clear();
224+
bin_overfill_[bin_id].clear();
225+
bin_underfill_[bin_id] = bin_capacity_[bin_id];
221226
}
222227
// Once all the bins are reset, all bins should be empty; therefore no bins
223228
// are overfilled.

‎vpr/src/analytical_place/flat_placement_density_manager.h

+3
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ class FlatPlacementDensityManager {
185185
* @brief Import the given flat placement into the bins.
186186
*
187187
* This will place AP blocks into the bins that they are placed over.
188+
*
189+
* This will reset the bins before importing the placement. Anything inside
190+
* the bins will be removed.
188191
*/
189192
void import_placement_into_bins(const PartialPlacement& p_placement);
190193

‎vpr/src/analytical_place/flat_placement_mass_calculator.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ static void print_capacities(const std::vector<PrimitiveVector>& logical_block_t
234234
VTR_LOG("\n");
235235
}
236236
VTR_LOG("\n");
237+
// TODO: Print the masses of each model.
237238
}
238239

239240
FlatPlacementMassCalculator::FlatPlacementMassCalculator(const APNetlist& ap_netlist,

‎vpr/src/analytical_place/global_placer.cpp

+76
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@
1313
#include "analytical_solver.h"
1414
#include "ap_flow_enums.h"
1515
#include "ap_netlist.h"
16+
#include "ap_netlist_fwd.h"
1617
#include "atom_netlist.h"
1718
#include "device_grid.h"
19+
#include "flat_placement_bins.h"
1820
#include "flat_placement_density_manager.h"
21+
#include "globals.h"
1922
#include "partial_legalizer.h"
2023
#include "partial_placement.h"
2124
#include "physical_types.h"
25+
#include "primitive_vector.h"
2226
#include "vpr_error.h"
2327
#include "vtr_log.h"
2428
#include "vtr_time.h"
@@ -90,9 +94,74 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type,
9094
partial_legalizer_ = make_partial_legalizer(partial_legalizer_type,
9195
ap_netlist_,
9296
density_manager_,
97+
prepacker,
9398
log_verbosity_);
9499
}
95100

101+
/**
102+
* @brief Helper method to print the statistics on the given partial placement.
103+
*/
104+
static void print_placement_stats(const PartialPlacement& p_placement,
105+
const APNetlist& ap_netlist,
106+
FlatPlacementDensityManager& density_manager) {
107+
// Print the placement HPWL
108+
VTR_LOG("\tPlacement HPWL: %f\n", p_placement.get_hpwl(ap_netlist));
109+
110+
// Print density information. Need to reset the density manager to ensure
111+
// the data is valid.
112+
density_manager.import_placement_into_bins(p_placement);
113+
114+
// Print the number of overfilled bins.
115+
size_t num_overfilled_bins = density_manager.get_overfilled_bins().size();
116+
VTR_LOG("\tNumber of overfilled bins: %zu\n", num_overfilled_bins);
117+
118+
// Print the average overfill
119+
float total_overfill = 0.0f;
120+
for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) {
121+
total_overfill += density_manager.get_bin_overfill(bin_id).manhattan_norm();
122+
}
123+
float avg_overfill = 0.0f;
124+
if (num_overfilled_bins != 0)
125+
avg_overfill = total_overfill / static_cast<float>(num_overfilled_bins);
126+
VTR_LOG("\tAverage overfill magnitude: %f\n", avg_overfill);
127+
128+
// Print the number of overfilled tiles per type.
129+
const auto& physical_tile_types = g_vpr_ctx.device().physical_tile_types;
130+
const auto& device_grid = g_vpr_ctx.device().grid;
131+
std::vector<unsigned> overfilled_tiles_by_type(physical_tile_types.size(), 0);
132+
for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) {
133+
const auto& bin_region = density_manager.flat_placement_bins().bin_region(bin_id);
134+
auto tile_loc = t_physical_tile_loc((int)bin_region.xmin(),
135+
(int)bin_region.ymin(),
136+
0);
137+
auto tile_type = device_grid.get_physical_type(tile_loc);
138+
overfilled_tiles_by_type[tile_type->index]++;
139+
}
140+
VTR_LOG("\tOverfilled bins by tile type:\n");
141+
for (size_t type_idx = 0; type_idx < physical_tile_types.size(); type_idx++) {
142+
VTR_LOG("\t\t%10s: %zu\n",
143+
physical_tile_types[type_idx].name.c_str(),
144+
overfilled_tiles_by_type[type_idx]);
145+
}
146+
147+
// Count the number of blocks that were placed in a bin which they cannot
148+
// physically be placed into (according to their mass).
149+
unsigned num_misplaced_blocks = 0;
150+
for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) {
151+
for (APBlockId ap_blk_id : density_manager.flat_placement_bins().bin_contained_blocks(bin_id)) {
152+
// Get the blk mass and project it onto the capacity of its bin.
153+
PrimitiveVector blk_mass = density_manager.mass_calculator().get_block_mass(ap_blk_id);
154+
PrimitiveVector projected_mass = blk_mass;
155+
projected_mass.project(density_manager.get_bin_capacity(bin_id));
156+
// If the projected mass does not match its match, this implies that
157+
// there this block does not belong in this bin.
158+
if (projected_mass != blk_mass)
159+
num_misplaced_blocks++;
160+
}
161+
}
162+
VTR_LOG("\tNumber of blocks in an incompatible bin: %zu\n", num_misplaced_blocks);
163+
}
164+
96165
/**
97166
* @brief Helper method to print the header of the per-iteration status updates
98167
* of the global placer.
@@ -177,6 +246,13 @@ PartialPlacement SimPLGlobalPlacer::place() {
177246
if (hpwl_relative_gap < target_hpwl_relative_gap_)
178247
break;
179248
}
249+
250+
// Print some statistics on the final placement.
251+
VTR_LOG("Placement after Global Placement:\n");
252+
print_placement_stats(p_placement,
253+
ap_netlist_,
254+
*density_manager_);
255+
180256
// Return the placement from the final iteration.
181257
// TODO: investigate saving the best solution found so far. It should be
182258
// cheap to save a copy of the PartialPlacement object.

‎vpr/src/analytical_place/global_placer.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ class SimPLGlobalPlacer : public GlobalPlacer {
116116
/// lower-bound placements. The placer will stop if the difference
117117
/// between the two bounds, normalized to the upper-bound, is smaller
118118
/// than this number.
119-
static constexpr double target_hpwl_relative_gap_ = 0.10;
119+
/// This number was empircally found to work well.
120+
static constexpr double target_hpwl_relative_gap_ = 0.05;
120121

121122
/// @brief The solver which generates the lower-bound placement.
122123
std::unique_ptr<AnalyticalSolver> solver_;
+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date March 2025
5+
* @brief Implementation of a model grouper class which groups models together
6+
* which must be legalized together in a flat placement.
7+
*/
8+
9+
#include "model_grouper.h"
10+
#include <queue>
11+
#include <unordered_map>
12+
#include <unordered_set>
13+
#include <vector>
14+
#include "cad_types.h"
15+
#include "logic_types.h"
16+
#include "prepack.h"
17+
#include "vtr_assert.h"
18+
#include "vtr_log.h"
19+
20+
/**
21+
* @brief Recursive helper function which gets the models in the given pattern
22+
* block.
23+
*
24+
* @param pattern_block
25+
* The pattern block to get the models of.
26+
* @param models
27+
* A set of the models found so far.
28+
* @param block_visited
29+
* A vector of flags for each pattern block to signify which blocks have
30+
* been visited.
31+
*/
32+
static void get_pattern_models_recurr(t_pack_pattern_block* pattern_block,
33+
std::unordered_set<int>& models,
34+
std::vector<bool>& block_visited) {
35+
// If the pattern block is invalid or this block has been visited, return.
36+
if (pattern_block == nullptr || block_visited[pattern_block->block_id]) {
37+
return;
38+
}
39+
40+
// Mark this block as visited and insert its model into the models vector.
41+
block_visited[pattern_block->block_id] = true;
42+
models.insert(pattern_block->pb_type->model->index);
43+
44+
// Go through this block's connections and get their pattern models.
45+
t_pack_pattern_connections* connection = pattern_block->connections;
46+
while (connection != nullptr) {
47+
get_pattern_models_recurr(connection->from_block, models, block_visited);
48+
get_pattern_models_recurr(connection->to_block, models, block_visited);
49+
connection = connection->next;
50+
}
51+
}
52+
53+
/**
54+
* @brief Entry point into the recursive function above. Gets the models in
55+
* the given pack pattern.
56+
*/
57+
static std::unordered_set<int> get_pattern_models(const t_pack_patterns& pack_pattern) {
58+
std::unordered_set<int> models_in_pattern;
59+
60+
// Initialize the visited flags for each block to false.
61+
std::vector<bool> block_visited(pack_pattern.num_blocks, false);
62+
// Begin the recursion with the root block.
63+
get_pattern_models_recurr(pack_pattern.root_block, models_in_pattern, block_visited);
64+
65+
return models_in_pattern;
66+
}
67+
68+
ModelGrouper::ModelGrouper(const Prepacker& prepacker,
69+
t_model* user_models,
70+
t_model* library_models,
71+
int log_verbosity) {
72+
/**
73+
* Group the models together based on their pack patterns. If model A and
74+
* model B form a pattern, and model B and model C form a pattern, then
75+
* models A, B, and C are in a group together.
76+
*
77+
* An efficient way to find this is to represent this problem as a graph,
78+
* where each node is a model and each edge is a relationship where a model
79+
* is in a pack pattern with another model. We can then perform BFS to find
80+
* the connected sub-graphs which will be the groups.
81+
*/
82+
83+
// Get the number of models
84+
// TODO: Clean up the models vectors in VTR.
85+
std::unordered_map<int, char*> model_name;
86+
unsigned num_models = 0;
87+
t_model* model = library_models;
88+
while (model != nullptr) {
89+
model_name[model->index] = model->name;
90+
num_models++;
91+
model = model->next;
92+
}
93+
model = user_models;
94+
while (model != nullptr) {
95+
model_name[model->index] = model->name;
96+
num_models++;
97+
model = model->next;
98+
}
99+
100+
// Create an adjacency list for the edges. An edge is formed where two
101+
// models share a pack pattern together.
102+
std::vector<std::unordered_set<int>> adj_list(num_models);
103+
for (const t_pack_patterns& pack_pattern : prepacker.get_all_pack_patterns()) {
104+
// Get the models within this pattern.
105+
auto models_in_pattern = get_pattern_models(pack_pattern);
106+
VTR_ASSERT_SAFE(!models_in_pattern.empty());
107+
108+
// Debug print the models within the pattern.
109+
if (log_verbosity >= 20) {
110+
VTR_LOG("Pattern: %s\n\t", pack_pattern.name);
111+
for (int model_idx : models_in_pattern) {
112+
VTR_LOG("%s ", model_name[model_idx]);
113+
}
114+
VTR_LOG("\n");
115+
}
116+
117+
// Connect each of the models to the first model in the pattern. Since
118+
// we only care if there exist a path from each model to another, we do
119+
// not need to connect the models in a clique.
120+
int first_model_idx = *models_in_pattern.begin();
121+
for (int model_idx : models_in_pattern) {
122+
adj_list[model_idx].insert(first_model_idx);
123+
adj_list[first_model_idx].insert(model_idx);
124+
}
125+
}
126+
127+
// Perform BFS to group the models.
128+
VTR_LOGV(log_verbosity >= 20,
129+
"Finding model groups...\n");
130+
std::queue<int> node_queue;
131+
model_group_id_.resize(num_models, ModelGroupId::INVALID());
132+
for (int model_idx = 0; model_idx < (int)num_models; model_idx++) {
133+
// If this model is already in a group, skip it.
134+
if (model_group_id_[model_idx].is_valid()) {
135+
VTR_LOGV(log_verbosity >= 20,
136+
"\t(%d -> %d)\n", model_idx, model_group_id_[model_idx]);
137+
continue;
138+
}
139+
140+
ModelGroupId group_id = ModelGroupId(group_ids_.size());
141+
// Put the model in this group and push to the queue.
142+
model_group_id_[model_idx] = group_id;
143+
node_queue.push(model_idx);
144+
145+
while (!node_queue.empty()) {
146+
// Pop a node from the queue, and explore its neighbors.
147+
int node_model_idx = node_queue.front();
148+
node_queue.pop();
149+
for (int neighbor_model_idx : adj_list[node_model_idx]) {
150+
// If this neighbor is already in this group, skip it.
151+
if (model_group_id_[neighbor_model_idx].is_valid()) {
152+
VTR_ASSERT_SAFE(model_group_id_[neighbor_model_idx] == group_id);
153+
continue;
154+
}
155+
// Put the neighbor in this group and push it to the queue.
156+
model_group_id_[neighbor_model_idx] = group_id;
157+
node_queue.push(neighbor_model_idx);
158+
}
159+
}
160+
161+
VTR_LOGV(log_verbosity >= 20,
162+
"\t(%d -> %d)\n", model_idx, model_group_id_[model_idx]);
163+
group_ids_.push_back(group_id);
164+
}
165+
166+
// Create a lookup between each group and the models it contains.
167+
groups_.resize(groups().size());
168+
for (int model_idx = 0; model_idx < (int)num_models; model_idx++) {
169+
groups_[model_group_id_[model_idx]].push_back(model_idx);
170+
}
171+
172+
// Debug printing for each group.
173+
if (log_verbosity >= 20) {
174+
for (ModelGroupId group_id : groups()) {
175+
const std::vector<int>& group = groups_[group_id];
176+
VTR_LOG("Group %zu:\n", group_id);
177+
VTR_LOG("\tSize = %zu\n", group.size());
178+
VTR_LOG("\tContained models:\n");
179+
for (int model_idx : group) {
180+
VTR_LOG("\t\t%s\n", model_name[model_idx]);
181+
}
182+
}
183+
}
184+
}
+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date March 2025
5+
* @brief Declaration of a model grouper class which groups together models
6+
* that must be legalized together in a flat placement.
7+
*/
8+
9+
#pragma once
10+
11+
#include <vector>
12+
#include "vtr_assert.h"
13+
#include "vtr_range.h"
14+
#include "vtr_strong_id.h"
15+
#include "vtr_vector.h"
16+
#include "vtr_vector_map.h"
17+
18+
// Forward declarations.
19+
class Prepacker;
20+
struct t_model;
21+
22+
/// @brief Tag for the ModelGroupId
23+
struct model_group_id_tag;
24+
25+
/// @brief A unique ID of a group of models created by the ModelGrouper class.
26+
typedef vtr::StrongId<model_group_id_tag, size_t> ModelGroupId;
27+
28+
/**
29+
* @brief A manager class for grouping together models that must be legalized
30+
* together in a flat placement due to how they form molecules with each
31+
* other.
32+
*
33+
* When performing legalization of a flat placement, it is desirable to split
34+
* the problem into independent legalization problems. We cannot place all of
35+
* the blocks of different model types independently since some blocks are made
36+
* of multiple different types of models. We wish to find the minimum number of
37+
* models that we need to legalize at the same time.
38+
*
39+
* This class groups models together based on the pack patterns that they can
40+
* form in the prepacker. If model A and model B can form a pack pattern, and
41+
* model B and model C can form a pack pattern, then models A, B, and C form a
42+
* group and must be legalized together.
43+
*
44+
* This class also manages what models each group contains and the group of each
45+
* model, where the user can use IDs to get relavent information.
46+
*/
47+
class ModelGrouper {
48+
public:
49+
// Iterator for the model group IDs
50+
typedef typename vtr::vector_map<ModelGroupId, ModelGroupId>::const_iterator group_iterator;
51+
52+
// Range for the model group IDs
53+
typedef typename vtr::Range<group_iterator> group_range;
54+
55+
public:
56+
ModelGrouper() = delete;
57+
58+
/**
59+
* @brief Constructor for the model grouper class. Groups are formed here.
60+
*
61+
* @param prepacker
62+
* The prepacker used to create molecules in the flat placement. This
63+
* provides the pack patterns for forming the groups.
64+
* @param user_models
65+
* Linked list of user-provided models.
66+
* @param library_models
67+
* Linked list of library models.
68+
* @param log_verbosity
69+
* The verbosity of log messages in the grouper class.
70+
*/
71+
ModelGrouper(const Prepacker& prepacker,
72+
t_model* user_models,
73+
t_model* library_models,
74+
int log_verbosity);
75+
76+
/**
77+
* @brief Returns a list of all valid group IDs.
78+
*/
79+
inline group_range groups() const {
80+
return vtr::make_range(group_ids_.begin(), group_ids_.end());
81+
}
82+
83+
/**
84+
* @brief Gets the group ID of the given model.
85+
*/
86+
inline ModelGroupId get_model_group_id(int model_index) const {
87+
VTR_ASSERT_SAFE_MSG(model_index < (int)model_group_id_.size(),
88+
"Model index outside of range for model_group_id_");
89+
ModelGroupId group_id = model_group_id_[model_index];
90+
VTR_ASSERT_SAFE_MSG(group_id.is_valid(),
91+
"Model is not in a group");
92+
return group_id;
93+
}
94+
95+
/**
96+
* @brief Gets the models in the given group.
97+
*/
98+
inline const std::vector<int>& get_models_in_group(ModelGroupId group_id) const {
99+
VTR_ASSERT_SAFE_MSG(group_id.is_valid(),
100+
"Invalid group id");
101+
VTR_ASSERT_SAFE_MSG(groups_[group_id].size() != 0,
102+
"Group is empty");
103+
return groups_[group_id];
104+
}
105+
106+
private:
107+
/// @brief List of all group IDs.
108+
vtr::vector_map<ModelGroupId, ModelGroupId> group_ids_;
109+
110+
/// @brief A lookup between models and the group ID that contains them.
111+
std::vector<ModelGroupId> model_group_id_;
112+
113+
/// @brief A lookup between each group ID and the models in that group.
114+
vtr::vector<ModelGroupId, std::vector<int>> groups_;
115+
};

‎vpr/src/analytical_place/partial_legalizer.cpp

+643-206
Large diffs are not rendered by default.

‎vpr/src/analytical_place/partial_legalizer.h

+232
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@
1313

1414
#pragma once
1515

16+
#include <functional>
1617
#include <memory>
1718
#include <vector>
1819
#include "ap_netlist_fwd.h"
1920
#include "flat_placement_bins.h"
2021
#include "flat_placement_density_manager.h"
22+
#include "model_grouper.h"
2123
#include "primitive_vector.h"
24+
#include "vtr_geometry.h"
25+
#include "vtr_prefix_sum.h"
2226
#include "vtr_vector.h"
2327

2428
// Forward declarations
@@ -90,6 +94,7 @@ class PartialLegalizer {
9094
std::unique_ptr<PartialLegalizer> make_partial_legalizer(e_partial_legalizer legalizer_type,
9195
const APNetlist& netlist,
9296
std::shared_ptr<FlatPlacementDensityManager> density_manager,
97+
const Prepacker& prepacker,
9398
int log_verbosity);
9499

95100
/**
@@ -240,6 +245,97 @@ class FlowBasedLegalizer : public PartialLegalizer {
240245
void legalize(PartialPlacement& p_placement) final;
241246
};
242247

248+
/**
249+
* @brief A cluster of flat placement bins.
250+
*/
251+
typedef typename std::vector<FlatPlacementBinId> FlatPlacementBinCluster;
252+
253+
/**
254+
* @brief Enum for the direction of a partition.
255+
*/
256+
enum class e_partition_dir {
257+
VERTICAL,
258+
HORIZONTAL
259+
};
260+
261+
/**
262+
* @brief Spatial window used to spread the blocks contained within.
263+
*
264+
* This window's region is identified and grown until it has enough space to
265+
* accomodate the blocks stored within. This window is then successivly
266+
* partitioned until it is small enough (blocks are not too dense).
267+
*/
268+
struct SpreadingWindow {
269+
/// @brief The blocks contained within this window.
270+
std::vector<APBlockId> contained_blocks;
271+
272+
/// @brief The 2D region of space that this window covers.
273+
vtr::Rect<double> region;
274+
};
275+
276+
/**
277+
* @brief Struct to hold the information from partitioning a window. Contains
278+
* the two window partitions and some information about how they were
279+
* generated.
280+
*/
281+
struct PartitionedWindow {
282+
/// @brief The direction of the partition.
283+
e_partition_dir partition_dir;
284+
285+
/// @brief The position that the parent window was split at.
286+
double pivot_pos;
287+
288+
/// @brief The lower window. This is the left partition when the direction
289+
/// is vertical, and the bottom partition when the direction is
290+
/// horizontal.
291+
SpreadingWindow lower_window;
292+
293+
/// @brief The upper window. This is the right partition when the direction
294+
/// is vertical, and the top partition when the direction is
295+
/// horizontal.
296+
SpreadingWindow upper_window;
297+
};
298+
299+
/**
300+
* @brief Wrapper class around the prefix sum class which creates a prefix sum
301+
* for each model type and has helper methods for getting the sums over
302+
* regions.
303+
*/
304+
class PerModelPrefixSum2D {
305+
public:
306+
PerModelPrefixSum2D() = default;
307+
308+
/**
309+
* @brief Construct prefix sums for each of the models in the architecture.
310+
*
311+
* Uses the density manager to get the size of the placeable region.
312+
*
313+
* The lookup is a lambda used to populate the prefix sum. It provides
314+
* the model index, x, and y to be populated.
315+
*/
316+
PerModelPrefixSum2D(const FlatPlacementDensityManager& density_manager,
317+
t_model* user_models,
318+
t_model* library_models,
319+
std::function<float(int, size_t, size_t)> lookup);
320+
321+
/**
322+
* @brief Get the sum for a given model over the given region.
323+
*/
324+
float get_model_sum(int model_index,
325+
const vtr::Rect<double>& region) const;
326+
327+
/**
328+
* @brief Get the multi-dimensional sum over the given model indices over
329+
* the given region.
330+
*/
331+
PrimitiveVector get_sum(const std::vector<int>& model_indices,
332+
const vtr::Rect<double>& region) const;
333+
334+
private:
335+
/// @brief Per-Model Prefix Sums
336+
std::vector<vtr::PrefixSum2D<float>> model_prefix_sum_;
337+
};
338+
243339
/**
244340
* @brief A bi-paritioning spreading full legalizer.
245341
*
@@ -258,6 +354,19 @@ class FlowBasedLegalizer : public PartialLegalizer {
258354
* GPlace3.0: https://doi.org/10.1145/3233244
259355
*/
260356
class BiPartitioningPartialLegalizer : public PartialLegalizer {
357+
private:
358+
/// @brief The maximum gap between overfilled bins we can have in a flat
359+
/// placement bin cluster. For example, if this is set to 1, we will
360+
/// allow two overfilled bins to be clustered together if they only
361+
/// have 1 non-overfilled bin of gap between them.
362+
/// The rational behind this is that it allows us to predict that the windows
363+
/// created for each cluster will overlap if they are within some gap distance.
364+
/// Increasing this number too much may cluster bins together too much and
365+
/// create large windows; decreasing this number will put more pressure on
366+
/// the window generation code, which can increase window size and runtime.
367+
/// TODO: Should this be distance instead of number of bins?
368+
static constexpr int max_bin_cluster_gap_ = 1;
369+
261370
public:
262371
/**
263372
* @brief Constructor for the bi-partitioning partial legalizer.
@@ -267,6 +376,7 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer {
267376
*/
268377
BiPartitioningPartialLegalizer(const APNetlist& netlist,
269378
std::shared_ptr<FlatPlacementDensityManager> density_manager,
379+
const Prepacker& prepacker,
270380
int log_verbosity);
271381

272382
/**
@@ -278,8 +388,130 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer {
278388
*/
279389
void legalize(PartialPlacement& p_placement) final;
280390

391+
private:
392+
// ========================================================================
393+
// Identifying spreading windows
394+
// ========================================================================
395+
396+
/**
397+
* @brief Identify spreading windows which contain overfilled bins in the
398+
* given model group on the device and do not overlap.
399+
*
400+
* This process is split into 4 stages:
401+
* 1) Overfilled bins are identified and clustered.
402+
* 2) Grow windows around the overfilled bin clusters. These windows
403+
* will grow until there is just enough space to accomodate the blocks
404+
* within the window (capacity of the window is larger than the utilization).
405+
* 3) Merge overlapping windows.
406+
* 4) Move the blocks within these window regions from their bins into
407+
* their windows. This updates the current utilization of bins, making
408+
* spreading easier.
409+
*
410+
* We identify non-overlapping windows for different model groups independtly
411+
* for a few reasons:
412+
* - Each model group, by design, can be spread independent of each other.
413+
* This reduces the problem size by the number of groups.
414+
* - Without model groups, one block placed on the wrong side of the chip
415+
* may create a window the size of the entire chip! This would rip up and
416+
* spread all the blocks in the chip, which is very expensive.
417+
* - This allows us to ignore block models which are already in legal
418+
* positions.
419+
*/
420+
std::vector<SpreadingWindow> identify_non_overlapping_windows(ModelGroupId group_id);
421+
422+
/**
423+
* @brief Identifies clusters of overfilled bins for the given model group.
424+
*
425+
* This locates clusters of overfilled bins which are within a given
426+
* distance from each other.
427+
*/
428+
std::vector<FlatPlacementBinCluster> get_overfilled_bin_clusters(ModelGroupId group_id);
429+
430+
/**
431+
* @brief Creates and grows minimum spanning windows around the given
432+
* overfilled bin clusters.
433+
*
434+
* Here, minimum means that the windows are just large enough such that the
435+
* capacity of the bins within the window is larger than the utilization for
436+
* the given model group.
437+
*/
438+
std::vector<SpreadingWindow> get_min_windows_around_clusters(
439+
const std::vector<FlatPlacementBinCluster>& overfilled_bin_clusters,
440+
ModelGroupId group_id);
441+
442+
/**
443+
* @brief Merges overlapping windows in the given vector of windows.
444+
*
445+
* The resulting merged windows is stored in the given windows object.
446+
*/
447+
void merge_overlapping_windows(std::vector<SpreadingWindow>& windows);
448+
449+
/**
450+
* @brief Moves the blocks out of their bins and into their window.
451+
*
452+
* Only blocks in the given model group will be moved.
453+
*/
454+
void move_blocks_into_windows(std::vector<SpreadingWindow>& non_overlapping_windows,
455+
ModelGroupId group_id);
456+
457+
// ========================================================================
458+
// Spreading blocks over windows
459+
// ========================================================================
460+
461+
/**
462+
* @brief Spread the blocks over each of the given non-overlapping windows.
463+
*
464+
* The partial placement solution from the solver is used to decide which
465+
* window partition to put a block into. The model group this window is
466+
* spreading over can make it more efficient to make decisions.
467+
*/
468+
void spread_over_windows(std::vector<SpreadingWindow>& non_overlapping_windows,
469+
const PartialPlacement& p_placement,
470+
ModelGroupId group_id);
471+
472+
/**
473+
* @brief Partition the given window into two sub-windows.
474+
*
475+
* We return extra information about how the window was created; for example,
476+
* the direction of the partition (vertical / horizontal) and the position
477+
* of the cut.
478+
*/
479+
PartitionedWindow partition_window(SpreadingWindow& window);
480+
481+
/**
482+
* @brief Partition the blocks in the given window into the partitioned
483+
* windows.
484+
*
485+
* This is kept separate from splitting the physical window region for
486+
* cleanliness. After this point, the window will not have any atoms in
487+
* it.
488+
*/
489+
void partition_blocks_in_window(SpreadingWindow& window,
490+
PartitionedWindow& partitioned_window,
491+
ModelGroupId group_id,
492+
const PartialPlacement& p_placement);
493+
494+
/**
495+
* @brief Move the blocks out of the given windows and put them back into
496+
* the correct bin according to the window that contains them.
497+
*/
498+
void move_blocks_out_of_windows(std::vector<SpreadingWindow>& finished_windows);
499+
281500
private:
282501
/// @brief The density manager which manages the capacity and utilization
283502
/// of regions of the device.
284503
std::shared_ptr<FlatPlacementDensityManager> density_manager_;
504+
505+
/// @brief Grouper object which handles grouping together models which must
506+
/// be spread together. Models are grouped based on the pack patterns
507+
/// that they can form with each other.
508+
ModelGrouper model_grouper_;
509+
510+
/// @brief The prefix sum for the capacity of the device, as given by the
511+
/// density manager. We will need to get the capacity of 2D regions
512+
/// of the device very often for this partial legalizer. This data
513+
/// structure greatly improves the time complexity of this operation.
514+
///
515+
/// This is populated in the constructor and not modified.
516+
PerModelPrefixSum2D capacity_prefix_sum_;
285517
};

‎vpr/src/analytical_place/primitive_vector.h

+93-17
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010

1111
#pragma once
1212

13+
#include <cmath>
1314
#include <cstdlib>
1415
#include <unordered_map>
16+
#include <vector>
17+
#include "vtr_log.h"
1518

1619
/**
1720
* @brief A sparse vector class to store an M-dimensional quantity of primitives
@@ -48,9 +51,24 @@ class PrimitiveVector {
4851
* This is a common enough feature to use its own setter.
4952
*/
5053
inline void add_val_to_dim(float val, size_t dim) {
51-
if (data_.count(dim) == 0)
52-
data_[dim] = 0.f;
53-
data_[dim] += val;
54+
auto it = data_.find(dim);
55+
if (it == data_.end())
56+
data_.insert({dim, val});
57+
else {
58+
it->second += val;
59+
}
60+
}
61+
62+
/**
63+
* @brief Subtract the value to the given dimension.
64+
*/
65+
inline void subtract_val_from_dim(float val, size_t dim) {
66+
auto it = data_.find(dim);
67+
if (it == data_.end())
68+
data_.insert({dim, -1.0f * val});
69+
else {
70+
it->second -= val;
71+
}
5472
}
5573

5674
/**
@@ -104,19 +122,26 @@ class PrimitiveVector {
104122
*/
105123
inline PrimitiveVector& operator+=(const PrimitiveVector& rhs) {
106124
for (const auto& p : rhs.data_) {
107-
float dim_val = get_dim_val(p.first);
108-
set_dim_val(p.first, dim_val + p.second);
125+
add_val_to_dim(p.second, p.first);
109126
}
110127
return *this;
111128
}
112129

130+
/**
131+
* @brief Element-wise addition of this with rhs.
132+
*/
133+
inline PrimitiveVector operator+(const PrimitiveVector& rhs) const {
134+
PrimitiveVector res = *this;
135+
res += rhs;
136+
return res;
137+
}
138+
113139
/**
114140
* @brief Element-wise de-accumulation of rhs into this.
115141
*/
116142
inline PrimitiveVector& operator-=(const PrimitiveVector& rhs) {
117143
for (const auto& p : rhs.data_) {
118-
float dim_val = get_dim_val(p.first);
119-
set_dim_val(p.first, dim_val - p.second);
144+
subtract_val_from_dim(p.second, p.first);
120145
}
121146
return *this;
122147
}
@@ -140,6 +165,25 @@ class PrimitiveVector {
140165
return *this;
141166
}
142167

168+
/**
169+
* @brief Element-wise division with a scalar.
170+
*/
171+
inline PrimitiveVector& operator/=(float rhs) {
172+
for (auto& p : data_) {
173+
p.second /= rhs;
174+
}
175+
return *this;
176+
}
177+
178+
/**
179+
* @brief Element-wise division with a scalar.
180+
*/
181+
inline PrimitiveVector operator/(float rhs) const {
182+
PrimitiveVector res = *this;
183+
res /= rhs;
184+
return res;
185+
}
186+
143187
/**
144188
* @brief Returns true if any dimension of this vector is less than any
145189
* dimension of rhs; false otherwise.
@@ -168,12 +212,11 @@ class PrimitiveVector {
168212
* is positive, it will not change.
169213
*/
170214
inline void relu() {
171-
for (auto& p : data_) {
172-
// TODO: Should remove the zero elements from the map to improve
173-
// efficiency.
174-
if (p.second < 0.f)
175-
p.second = 0.f;
176-
}
215+
std::erase_if(data_, [](const std::pair<size_t, float>& p) {
216+
// Note: we erase the numbers from the map to improve the performance
217+
// of future operations on this vector.
218+
return p.second <= 0.0f;
219+
});
177220
}
178221

179222
/**
@@ -234,12 +277,36 @@ class PrimitiveVector {
234277
inline void project(const PrimitiveVector& dir) {
235278
// For each dimension of this vector, if that dimension is zero in dir
236279
// set the dimension to zero.
280+
std::erase_if(data_, [&](const std::pair<size_t, float>& p) {
281+
return dir.get_dim_val(p.first) == 0.0f;
282+
});
283+
}
284+
285+
/**
286+
* @brief Gets the non-zero dimensions of this vector.
287+
*/
288+
inline std::vector<int> get_non_zero_dims() const {
289+
std::vector<int> non_zero_dims;
237290
for (auto& p : data_) {
238-
// TODO: Instead of zeroing the dimension, it should be removed
239-
// from the map.
240-
if (dir.get_dim_val(p.first) == 0.f)
241-
p.second = 0.f;
291+
if (p.second != 0.0f)
292+
non_zero_dims.push_back(p.first);
242293
}
294+
return non_zero_dims;
295+
}
296+
297+
/**
298+
* @brief Returns true if this and other do not share any non-zero dimensions.
299+
*/
300+
inline bool are_dims_disjoint(const PrimitiveVector& other) const {
301+
for (const auto& p : other.data_) {
302+
// If this and other both have a shared dimension, then they are not
303+
// perpendicular.
304+
if (p.second != 0.0f && get_dim_val(p.first) != 0.0f) {
305+
return false;
306+
}
307+
}
308+
// If they do not share any dimensions, then they are perpendicular.
309+
return true;
243310
}
244311

245312
/**
@@ -268,4 +335,13 @@ class PrimitiveVector {
268335
}
269336
return res;
270337
}
338+
339+
/**
340+
* @brief Debug printing method.
341+
*/
342+
inline void print() const {
343+
for (const auto& p : data_) {
344+
VTR_LOG("(%zu, %f)\n", p.first, p.second);
345+
}
346+
}
271347
};

‎vpr/src/pack/appack_context.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,16 @@ struct t_appack_options {
6262
// We use the following gain attenuation function:
6363
// attenuation = { 1 - (quad_fac * d)^2 if d < dist_th
6464
// { 1 / sqrt(d - sqrt_offset) if d >= dist_th
65+
// The numbers below were empirically found to work well.
66+
6567
// Distance threshold which decides when to use quadratic decay or inverted
6668
// sqrt decay. If the distance is less than this threshold, quadratic decay
6769
// is used. Inverted sqrt is used otherwise.
68-
float dist_th = 1.0f;
70+
float dist_th = 5.0f;
6971
// Horizontal offset to the inverted sqrt decay.
70-
float sqrt_offset = -2.9f;
72+
float sqrt_offset = -1.1f;
7173
// Scaling factor for the quadratic decay term.
72-
float quad_fac = 0.7f;
74+
float quad_fac = 0.1543f;
7375

7476
// =========== Candidate selection distance ============================ //
7577
// When selecting candidates, what distance from the cluster will we

‎vpr/src/pack/prepack.h

+7
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,13 @@ class Prepacker {
286286
return chain_info_.size();
287287
}
288288

289+
/**
290+
* @brief Get a list of all the pack patterns in the architecture.
291+
*/
292+
inline const std::vector<t_pack_patterns>& get_all_pack_patterns() const {
293+
return list_of_pack_patterns;
294+
}
295+
289296
private:
290297
/**
291298
* Pre-pack atoms in netlist to molecules

‎vpr/test/test_ap_primitive_vector.cpp

+55
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* PrimitiveVector object are working as expected.
99
*/
1010

11+
#include <algorithm>
1112
#include "catch2/catch_test_macros.hpp"
1213
#include "primitive_vector.h"
1314

@@ -310,6 +311,60 @@ TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") {
310311
res = PrimitiveVector::max(vec2, vec1);
311312
REQUIRE(res == golden);
312313
}
314+
315+
SECTION("Test more operators and methods") {
316+
PrimitiveVector vec1, vec2;
317+
318+
// Subtract value from dimension
319+
vec1.set_dim_val(0, 5.f);
320+
vec1.subtract_val_from_dim(3.f, 0);
321+
REQUIRE(vec1.get_dim_val(0) == 2.f);
322+
323+
// Element-wise addition operator
324+
vec1.clear();
325+
vec1.set_dim_val(0, 1.f);
326+
vec1.set_dim_val(1, 2.f);
327+
vec2.clear();
328+
vec2.set_dim_val(0, 3.f);
329+
vec2.set_dim_val(1, 4.f);
330+
PrimitiveVector vec_sum = vec1 + vec2;
331+
REQUIRE(vec_sum.get_dim_val(0) == 4.f);
332+
REQUIRE(vec_sum.get_dim_val(1) == 6.f);
333+
334+
// Element-wise division operator
335+
vec1.clear();
336+
vec1.set_dim_val(0, 10.f);
337+
vec1.set_dim_val(1, 20.f);
338+
vec1 /= 2.f;
339+
REQUIRE(vec1.get_dim_val(0) == 5.f);
340+
REQUIRE(vec1.get_dim_val(1) == 10.f);
341+
342+
// Element-wise division operator (const)
343+
vec1.clear();
344+
vec1.set_dim_val(0, 10.f);
345+
vec1.set_dim_val(1, 20.f);
346+
PrimitiveVector vec_div = vec1 / 2.f;
347+
REQUIRE(vec_div.get_dim_val(0) == 5.f);
348+
REQUIRE(vec_div.get_dim_val(1) == 10.f);
349+
350+
// Get non-zero dimensions
351+
vec1.clear();
352+
vec1.set_dim_val(0, 1.f);
353+
vec1.set_dim_val(2, 3.f);
354+
std::vector<int> non_zero_dims = vec1.get_non_zero_dims();
355+
REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 0) != non_zero_dims.end());
356+
REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 2) != non_zero_dims.end());
357+
REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 1) == non_zero_dims.end());
358+
359+
// Test orthogonal vectors
360+
vec1.clear();
361+
vec2.clear();
362+
vec1.set_dim_val(0, 1.f);
363+
vec2.set_dim_val(1, 2.f);
364+
REQUIRE(vec1.are_dims_disjoint(vec2));
365+
vec2.set_dim_val(0, 3.f);
366+
REQUIRE(!vec1.are_dims_disjoint(vec2));
367+
}
313368
}
314369

315370
} // namespace
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time
2-
k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 6.15 vpr 74.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 9 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76312 9 19 897 28 0 768 114 16 16 256 -1 mcnc_medium -1 -1 7446 10050 1525 5847 2678 74.5 MiB 1.98 0.01 5.22187 -85.9445 -5.22187 nan 0.05 0.00204197 0.00165471 0.0922108 0.0778153 74.5 MiB 1.98 74.5 MiB 1.87 12280 16.0104 3195 4.16558 8207 35340 1711962 391448 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 35 28900 206586 -1 5.82297 nan -93.0212 -5.82297 0 0 0.19 -1 -1 74.5 MiB 0.60 0.268738 0.231571 74.5 MiB -1 0.05
3-
k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.87 vpr 75.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77752 256 245 954 501 0 711 554 22 22 484 -1 mcnc_large -1 -1 8904 66500 1807 14947 49746 75.9 MiB 0.88 0.01 4.19633 -806.67 -4.19633 nan 0.07 0.00209601 0.00184942 0.0749397 0.0672821 75.9 MiB 0.88 75.9 MiB 0.87 12620 17.7496 3382 4.75668 3608 8619 480767 96513 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 15 47664 245996 -1 4.54897 nan -867.702 -4.54897 0 0 0.22 -1 -1 75.9 MiB 0.19 0.170591 0.156391 75.9 MiB -1 0.07
4-
k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 19.24 vpr 103.73 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 289 10 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 106224 10 10 2659 20 0 2320 309 22 22 484 -1 mcnc_large -1 -1 33337 60861 15622 40285 4954 103.7 MiB 7.37 0.03 7.08906 -67.526 -7.08906 nan 0.15 0.00509718 0.00406142 0.35604 0.28949 103.7 MiB 7.37 103.7 MiB 7.07 48698 20.9905 12433 5.35905 17466 71913 3700066 508136 2.15576e+07 1.55754e+07 3.51389e+06 7260.09 20 64568 594370 -1 7.09981 nan -68.5294 -7.09981 0 0 0.63 -1 -1 103.7 MiB 1.13 0.742979 0.632564 103.7 MiB -1 0.15
5-
k6_frac_N10_40nm.xml seq.pre-vpr.blif common 5.15 vpr 75.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 85 41 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77640 41 35 1006 76 0 827 161 16 16 256 -1 mcnc_medium -1 -1 8073 13708 1574 6075 6059 75.8 MiB 1.93 0.01 5.2078 -150.175 -5.2078 nan 0.05 0.00241319 0.00198256 0.0910059 0.0770604 75.8 MiB 1.93 75.8 MiB 1.81 13112 15.8549 3429 4.14631 6281 26105 949531 164260 1.05632e+07 4.58099e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.48717 nan -159.221 -5.48717 0 0 0.19 -1 -1 75.8 MiB 0.33 0.222488 0.193946 75.8 MiB -1 0.05
1+
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time
2+
k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.94 vpr 74.77 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 80 9 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76564 9 19 897 28 0 624 108 16 16 256 -1 mcnc_medium -1 -1 10315 6596 9617 1559 5516 2542 74.8 MiB 1.83 0.01 6.75959 5.07271 -83.5391 -5.07271 nan 0.05 0.00162447 0.001265 0.077793 0.0643277 74.8 MiB 1.83 74.8 MiB 1.37 11052 17.7400 2817 4.52167 5101 22566 851127 138852 1.05632e+07 4.31152e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.37355 nan -88.7113 -5.37355 0 0 0.20 -1 -1 74.8 MiB 0.27 0.190594 0.164391 74.8 MiB -1 0.05
3+
k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.43 vpr 75.06 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 59 256 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76860 256 245 954 501 0 589 560 22 22 484 -1 mcnc_large -1 -1 10234 7797 51314 1070 11670 38574 75.1 MiB 0.67 0.01 6.53248 4.02447 -785.149 -4.02447 nan 0.07 0.00226809 0.00205398 0.0618196 0.0559082 75.1 MiB 0.67 75.1 MiB 0.37 10533 17.8829 2862 4.85908 2507 5465 336298 76364 2.15576e+07 3.17975e+06 1.49107e+06 3080.73 19 47664 245996 -1 4.35047 nan -842.961 -4.35047 0 0 0.22 -1 -1 75.1 MiB 0.19 0.175627 0.161726 75.1 MiB -1 0.07
4+
k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 18.05 vpr 102.53 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 283 10 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 104988 10 10 2659 20 0 1537 303 22 22 484 -1 mcnc_large -1 -1 38269 26758 56238 15119 35900 5219 102.5 MiB 7.03 0.02 10.0331 6.59208 -63.1998 -6.59208 nan 0.16 0.00681329 0.00553283 0.410131 0.342368 102.5 MiB 7.03 102.5 MiB 4.80 40340 26.2459 10213 6.64476 10566 57669 2722491 354615 2.15576e+07 1.5252e+07 3.51389e+06 7260.09 18 64568 594370 -1 6.59758 nan -64.3078 -6.59758 0 0 0.64 -1 -1 102.5 MiB 0.98 0.800154 0.691255 102.5 MiB -1 0.16
5+
k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.80 vpr 75.61 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 41 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77428 41 35 1006 76 0 667 163 16 16 256 -1 mcnc_medium -1 -1 11495 7037 12623 1276 5735 5612 75.6 MiB 1.80 0.01 6.34209 4.94158 -140.443 -4.94158 nan 0.05 0.00182801 0.00144126 0.0708206 0.0592281 75.6 MiB 1.80 75.6 MiB 1.33 11301 16.9430 2961 4.43928 4738 21343 723412 125961 1.05632e+07 4.68878e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.29948 nan -148.755 -5.29948 0 0 0.19 -1 -1 75.6 MiB 0.26 0.191646 0.16645 75.6 MiB -1 0.05
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time
2-
k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.74 vpr 74.21 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 9 -1 -1 success v8.0.0-12241-g26615cb38 release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-12T19:05:19 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 75996 9 19 897 28 0 768 114 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.66 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.66 74.2 MiB 1.55 17094 22.2868 4573 5.96219 5603 20605 894991 145381 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 18 28900 206586 -1 6.8999 nan -108.582 -6.8999 0 0 0.19 -1 -1 74.2 MiB 0.30 0.11634 0.103759 74.2 MiB -1 0.05
3-
k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.23 vpr 74.90 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-12241-g26615cb38 release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-12T19:05:19 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76700 256 245 954 501 0 711 554 22 22 484 -1 mcnc_large -1 -1 -1 -1 -1 -1 -1 74.9 MiB 0.48 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.9 MiB 0.48 74.9 MiB 0.47 14934 21.0042 3961 5.57103 3454 8241 562985 107042 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 15 47664 245996 -1 5.95192 nan -973.234 -5.95192 0 0 0.22 -1 -1 74.9 MiB 0.20 0.0953982 0.0888954 74.9 MiB -1 0.07
4-
k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.68 vpr 75.69 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 85 41 -1 -1 success v8.0.0-12241-g26615cb38 release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-12T19:05:19 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77504 41 35 1006 76 0 827 161 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 75.7 MiB 1.57 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 75.7 MiB 1.57 75.7 MiB 1.44 19170 23.1802 5187 6.27207 6058 23325 1081692 174542 1.05632e+07 4.58099e+06 1.26944e+06 4958.75 18 28900 206586 -1 6.76552 nan -194.633 -6.76552 0 0 0.20 -1 -1 75.7 MiB 0.35 0.128528 0.115002 75.7 MiB -1 0.06
1+
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time
2+
k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.57 vpr 74.60 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 80 9 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76392 9 19 897 28 0 624 108 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 -1 74.6 MiB 1.65 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.6 MiB 1.65 74.6 MiB 1.48 14371 23.0674 3784 6.07384 4075 16657 665456 103737 1.05632e+07 4.31152e+06 1.26944e+06 4958.75 17 28900 206586 -1 6.63192 nan -103.794 -6.63192 0 0 0.19 -1 -1 74.6 MiB 0.27 0.120295 0.107523 74.6 MiB -1 0.05
3+
k6_frac_N10_40nm.xml des.pre-vpr.blif common 1.93 vpr 75.64 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 59 256 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77456 256 245 954 501 0 589 560 22 22 484 -1 mcnc_large -1 -1 -1 -1 -1 -1 -1 -1 75.6 MiB 0.38 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 75.6 MiB 0.38 75.6 MiB 0.37 12828 21.7793 3449 5.85569 2290 4763 363294 72848 2.15576e+07 3.17975e+06 1.49107e+06 3080.73 12 47664 245996 -1 6.32147 nan -1032.91 -6.32147 0 0 0.22 -1 -1 75.6 MiB 0.16 0.08541 0.0798207 75.6 MiB -1 0.07
4+
k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.34 vpr 75.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 41 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77332 41 35 1006 76 0 667 163 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 -1 75.5 MiB 1.46 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 75.5 MiB 1.46 75.5 MiB 1.27 15928 23.8801 4303 6.45127 4201 18009 720686 116311 1.05632e+07 4.68878e+06 1.26944e+06 4958.75 16 28900 206586 -1 6.42149 nan -177.756 -6.42149 0 0 0.20 -1 -1 75.5 MiB 0.28 0.122598 0.110096 75.5 MiB -1 0.05

0 commit comments

Comments
 (0)
Please sign in to comment.