Skip to content

Commit 14f7914

Browse files
[APPack] Flat-Placement Informed Unrelated Clustering
Used flat placement information provided by APPack to try and select better unrelated candidates. This searches for candidates as close to the flat placement position of the cluster. There are two parameters that control how this is performed: 1) max_unrelated_tile_distance decides how far the algorithm will search for unrelated candidates. The algorithm will check for candidates in the same tile as the cluster, and then will search farther and farther out 2) max_unrelated_clustering_attempts decides how many failing attempts the cluster will try unrelated clustering. This matches the option of the same name in the candidate selector class; but this was made separate since likely it will be different for APPack.
1 parent b3d9694 commit 14f7914

File tree

5 files changed

+324
-36
lines changed

5 files changed

+324
-36
lines changed

vpr/src/pack/appack_context.h

+30-5
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ struct t_appack_options {
5353
CENTROID, /**< The location of the cluster is the centroid of the molecules which have been packed into it. */
5454
SEED /**< The location of the cluster is the location of the first molecule packed into it. */
5555
};
56-
e_cl_loc_ty cluster_location_ty = e_cl_loc_ty::CENTROID;
56+
static constexpr e_cl_loc_ty cluster_location_ty = e_cl_loc_ty::CENTROID;
5757

5858
// =========== Candidate gain attenuation ============================== //
5959
// These terms are used to update the gain of a given candidate based on
@@ -67,11 +67,11 @@ struct t_appack_options {
6767
// Distance threshold which decides when to use quadratic decay or inverted
6868
// sqrt decay. If the distance is less than this threshold, quadratic decay
6969
// is used. Inverted sqrt is used otherwise.
70-
float dist_th = 5.0f;
70+
static constexpr float dist_th = 5.0f;
7171
// Horizontal offset to the inverted sqrt decay.
72-
float sqrt_offset = -1.1f;
72+
static constexpr float sqrt_offset = -1.1f;
7373
// Scaling factor for the quadratic decay term.
74-
float quad_fac = 0.1543f;
74+
static constexpr float quad_fac = 0.1543f;
7575

7676
// =========== Candidate selection distance ============================ //
7777
// When selecting candidates, what distance from the cluster will we
@@ -81,7 +81,32 @@ struct t_appack_options {
8181
// types of molecules / clusters. For example, CLBs vs DSPs
8282
float max_candidate_distance = std::numeric_limits<float>::max();
8383

84-
// TODO: Investigate adding flat placement info to unrelated clustering.
84+
// =========== Unrelated clustering ==================================== //
85+
// After searching for candidates by connectivity and timing, the user may
86+
// turn on unrelated clustering, which will allow molecules which are
87+
// unrelated to the cluster being created to be attempted to be packed in.
88+
// APPack uses flat placement information to decide which unrelated
89+
// molecules to try.
90+
91+
// APPack will search for unrelated molecules in the tile which contains
92+
// the flat location of the cluster. It will then look farther out, tile
93+
// by tile. This parameter is the maximum distance from the cluster's tile
94+
// that APPack will search. Setting this to 0 would only allow APPack to
95+
// search within the cluster's tile. Setting this to a higher number would
96+
// allow APPack to search farther away; but may bring in molecules which
97+
// do not "want" to be in the cluster.
98+
static constexpr float max_unrelated_tile_distance = 1.0f;
99+
100+
// Unrelated clustering occurs after all other candidate selection methods
101+
// have failed. This parameter sets how many time we will attempt unrelated
102+
// clustering between failures of unrelated clustering. If this is set to
103+
// 1, and unrelated clustering failed for a cluster, it will not be attempted
104+
// again for that cluster (note: if it succeeds, the number of attempts get
105+
// reset).
106+
// NOTE: A similar option exists in the candidate selector class. This was
107+
// duplicated since it is very likely that APPack would need a
108+
// different value for this option than the non-APPack flow.
109+
static constexpr int max_unrelated_clustering_attempts = 2;
85110

86111
// TODO: Investigate adding flat placement info to seed selection.
87112
};

vpr/src/pack/greedy_candidate_selector.cpp

+195-31
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "greedy_candidate_selector.h"
99
#include <algorithm>
1010
#include <cmath>
11+
#include <queue>
1112
#include <vector>
1213
#include "appack_context.h"
1314
#include "flat_placement_types.h"
@@ -16,13 +17,12 @@
1617
#include "attraction_groups.h"
1718
#include "cluster_legalizer.h"
1819
#include "cluster_placement.h"
19-
#include "globals.h"
2020
#include "greedy_clusterer.h"
2121
#include "prepack.h"
2222
#include "timing_info.h"
23-
#include "vpr_context.h"
2423
#include "vpr_types.h"
2524
#include "vtr_assert.h"
25+
#include "vtr_ndmatrix.h"
2626
#include "vtr_vector.h"
2727

2828
/*
@@ -105,44 +105,93 @@ GreedyCandidateSelector::GreedyCandidateSelector(
105105
, timing_info_(timing_info)
106106
, appack_ctx_(appack_ctx)
107107
, rng_(0) {
108-
// Initialize the list of molecules to pack, the clustering data, and the
109-
// net info.
110108

111-
// Initialize unrelated clustering data.
109+
// Initialize unrelated clustering data if unrelated clustering is enabled.
112110
if (allow_unrelated_clustering_) {
113-
/* alloc and load list of molecules to pack */
114-
unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1);
111+
initialize_unrelated_clustering_data(max_molecule_stats);
112+
}
113+
114+
/* TODO: This is memory inefficient, fix if causes problems */
115+
/* Store stats on nets used by packed block, useful for determining transitively connected blocks
116+
* (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */
117+
clb_inter_blk_nets_.resize(atom_netlist.blocks().size());
118+
}
119+
120+
void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molecule_stats& max_molecule_stats) {
121+
// Create a sorted list of molecules, sorted on decreasing molecule base
122+
// gain. (Highest gain).
123+
std::vector<PackMoleculeId> molecules_vector;
124+
molecules_vector.assign(prepacker_.molecules().begin(), prepacker_.molecules().end());
125+
std::stable_sort(molecules_vector.begin(),
126+
molecules_vector.end(),
127+
[&](PackMoleculeId a_id, PackMoleculeId b_id) {
128+
const t_pack_molecule& a = prepacker_.get_molecule(a_id);
129+
const t_pack_molecule& b = prepacker_.get_molecule(b_id);
130+
131+
return a.base_gain > b.base_gain;
132+
});
133+
134+
if (appack_ctx_.appack_options.use_appack) {
135+
/**
136+
* For APPack, we build a spatial data structure where for each 1x1 grid
137+
* position on the FPGA, we maintain lists of molecule candidates.
138+
* The lists are in order of number of used external pins by the molecule.
139+
* Within each list, the molecules are sorted by their base gain.
140+
*/
141+
// Get the max x, y, and layer from the flat placement.
142+
t_flat_pl_loc max_loc({0.0f, 0.0f, 0.0f});
143+
for (PackMoleculeId mol_id : molecules_vector) {
144+
t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_);
145+
max_loc.x = std::max(max_loc.x, mol_pos.x);
146+
max_loc.y = std::max(max_loc.y, mol_pos.y);
147+
max_loc.layer = std::max(max_loc.layer, mol_pos.layer);
148+
}
115149

116-
// Create a sorted list of molecules, sorted on decreasing molecule base
117-
// gain. (Highest gain).
118-
std::vector<PackMoleculeId> molecules_vector;
119-
molecules_vector.assign(prepacker.molecules().begin(), prepacker.molecules().end());
120-
std::stable_sort(molecules_vector.begin(),
121-
molecules_vector.end(),
122-
[&](PackMoleculeId a_id, PackMoleculeId b_id) {
123-
const t_pack_molecule& a = prepacker.get_molecule(a_id);
124-
const t_pack_molecule& b = prepacker.get_molecule(b_id);
150+
VTR_ASSERT_MSG(max_loc.layer == 0,
151+
"APPack unrelated clustering does not support 3D "
152+
"FPGAs yet");
153+
154+
// Initialize the data structure with empty arrays with enough space
155+
// for each molecule.
156+
size_t flat_grid_width = max_loc.x + 1;
157+
size_t flat_grid_height = max_loc.y + 1;
158+
appack_unrelated_clustering_data_ =
159+
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2>({flat_grid_width,
160+
flat_grid_height});
161+
for (size_t x = 0; x < flat_grid_width; x++) {
162+
for (size_t y = 0; y < flat_grid_height; y++) {
163+
appack_unrelated_clustering_data_[x][y].resize(max_molecule_stats.num_used_ext_pins + 1);
164+
}
165+
}
166+
167+
// Fill the grid with molecule information.
168+
for (PackMoleculeId mol_id : molecules_vector) {
169+
t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_);
125170

126-
return a.base_gain > b.base_gain;
127-
});
171+
//Figure out how many external inputs are used by this molecule
172+
t_molecule_stats molecule_stats = prepacker_.calc_molecule_stats(mol_id, atom_netlist_);
173+
int ext_inps = molecule_stats.num_used_ext_inputs;
174+
175+
//Insert the molecule into the unclustered lists by number of external inputs
176+
auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.x][mol_pos.y];
177+
tile_uc_data[ext_inps].push_back(mol_id);
178+
}
179+
} else {
180+
/* alloc and load list of molecules to pack */
181+
unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1);
128182

129183
// Push back the each molecule into the unrelated clustering data vector
130184
// for their external inputs. This creates individual sorted lists of
131185
// molecules for each number of used external inputs.
132186
for (PackMoleculeId mol_id : molecules_vector) {
133187
//Figure out how many external inputs are used by this molecule
134-
t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(mol_id, atom_netlist);
188+
t_molecule_stats molecule_stats = prepacker_.calc_molecule_stats(mol_id, atom_netlist_);
135189
int ext_inps = molecule_stats.num_used_ext_inputs;
136190

137191
//Insert the molecule into the unclustered lists by number of external inputs
138192
unrelated_clustering_data_[ext_inps].push_back(mol_id);
139193
}
140194
}
141-
142-
/* TODO: This is memory inefficient, fix if causes problems */
143-
/* Store stats on nets used by packed block, useful for determining transitively connected blocks
144-
* (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */
145-
clb_inter_blk_nets_.resize(atom_netlist.blocks().size());
146195
}
147196

148197
GreedyCandidateSelector::~GreedyCandidateSelector() {
@@ -281,6 +330,7 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success(
281330
// Whenever a new molecule has been clustered, reset the number of
282331
// unrelated clustering attempts.
283332
num_unrelated_clustering_attempts_ = 0;
333+
// cluster_gain_stats.unrelated_molecules_attempted.clear();
284334

285335
// If using APPack, update the position of the cluster based on the addition
286336
// of this molecule.
@@ -673,15 +723,23 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster(
673723
// If we are allowing unrelated clustering and no molecule has been found,
674724
// get unrelated candidate for cluster.
675725
if (allow_unrelated_clustering_ && best_molecule == PackMoleculeId::INVALID()) {
676-
if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) {
677-
best_molecule = get_unrelated_candidate_for_cluster(cluster_id,
678-
cluster_legalizer);
679-
num_unrelated_clustering_attempts_++;
680-
VTR_LOGV(best_molecule && log_verbosity_ > 2,
681-
"\tFound unrelated molecule to cluster\n");
726+
const t_appack_options& appack_options = appack_ctx_.appack_options;
727+
if (appack_options.use_appack) {
728+
if (num_unrelated_clustering_attempts_ < appack_options.max_unrelated_clustering_attempts) {
729+
best_molecule = get_unrelated_candidate_for_cluster_appack(cluster_gain_stats,
730+
cluster_id,
731+
cluster_legalizer);
732+
num_unrelated_clustering_attempts_++;
733+
}
682734
} else {
683-
num_unrelated_clustering_attempts_ = 0;
735+
if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) {
736+
best_molecule = get_unrelated_candidate_for_cluster(cluster_id,
737+
cluster_legalizer);
738+
num_unrelated_clustering_attempts_++;
739+
}
684740
}
741+
VTR_LOGV(best_molecule && log_verbosity_ > 2,
742+
"\tFound unrelated molecule to cluster\n");
685743
} else {
686744
VTR_LOGV(!best_molecule && log_verbosity_ > 2,
687745
"\tNo related molecule found and unrelated clustering disabled\n");
@@ -1154,6 +1212,112 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster(
11541212
return PackMoleculeId::INVALID();
11551213
}
11561214

1215+
PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appack(
1216+
ClusterGainStats& cluster_gain_stats,
1217+
LegalizationClusterId cluster_id,
1218+
const ClusterLegalizer& cluster_legalizer) {
1219+
1220+
/**
1221+
* For APPack, we want to find a close candidate with the highest number
1222+
* of available inputs which could be packed into the given cluster.
1223+
* We will search for candidates in a BFS manner, where we will search in
1224+
* the same 1x1 grid location of the cluster for a compatible candidate, and
1225+
* will then search out if none can be found.
1226+
*
1227+
* Here, a molecule is compatible if:
1228+
* - It has not been clustered already
1229+
* - The number of inputs it has available is less than or equal to the
1230+
* number of inputs available in the cluster.
1231+
* - It has not tried to be packed in this cluster before.
1232+
* - It is compatible with the cluster.
1233+
*/
1234+
1235+
VTR_ASSERT_MSG(allow_unrelated_clustering_,
1236+
"Cannot get unrelated candidates when unrelated clustering "
1237+
"is disabled");
1238+
1239+
VTR_ASSERT_MSG(appack_ctx_.appack_options.use_appack,
1240+
"APPack is disabled, cannot get unrelated clusters using "
1241+
"flat placement information");
1242+
1243+
// The cluster will likely have more inputs available than a single molecule
1244+
// would have available (clusters have more pins). Clamp the inputs available
1245+
// to the max number of inputs a molecule could have.
1246+
size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available(cluster_id);
1247+
VTR_ASSERT_SAFE(!appack_unrelated_clustering_data_.empty());
1248+
size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0].size() - 1;
1249+
if (inputs_avail >= max_molecule_inputs_avail) {
1250+
inputs_avail = max_molecule_inputs_avail;
1251+
}
1252+
1253+
// Create a queue of locations to search and a map of visited grid locations.
1254+
std::queue<t_flat_pl_loc> search_queue;
1255+
vtr::NdMatrix<bool, 2> visited({appack_unrelated_clustering_data_.dim_size(0),
1256+
appack_unrelated_clustering_data_.dim_size(1)},
1257+
false);
1258+
// Push the position of the cluster to the queue.
1259+
search_queue.push(cluster_gain_stats.flat_cluster_position);
1260+
1261+
while (!search_queue.empty()) {
1262+
// Pop a position to search from the queue.
1263+
const t_flat_pl_loc& node_loc = search_queue.front();
1264+
VTR_ASSERT_SAFE(node_loc.layer == 0);
1265+
1266+
// If this position is too far from the source, skip it.
1267+
float dist = get_manhattan_distance(node_loc, cluster_gain_stats.flat_cluster_position);
1268+
if (dist > 1) {
1269+
search_queue.pop();
1270+
continue;
1271+
}
1272+
1273+
// If this position has been visited, skip it.
1274+
if (visited[node_loc.x][node_loc.y]) {
1275+
search_queue.pop();
1276+
continue;
1277+
}
1278+
visited[node_loc.x][node_loc.y] = true;
1279+
1280+
// Explore this position from highest number of inputs available to lowest.
1281+
const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y];
1282+
VTR_ASSERT_SAFE(inputs_avail < uc_data.size());
1283+
for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
1284+
// Get the molecule by the number of external inputs.
1285+
for (PackMoleculeId mol_id : uc_data[ext_inps]) {
1286+
// If this molecule has been clustered, skip it.
1287+
if (cluster_legalizer.is_mol_clustered(mol_id))
1288+
continue;
1289+
// If this molecule has tried to be packed before and failed
1290+
// do not try it. This also means that this molecule may be
1291+
// related to this cluster in some way.
1292+
if (cluster_gain_stats.mol_failures.find(mol_id) != cluster_gain_stats.mol_failures.end())
1293+
continue;
1294+
// If this molecule is not compatible with the current cluster
1295+
// skip it.
1296+
if (!cluster_legalizer.is_molecule_compatible(mol_id, cluster_id))
1297+
continue;
1298+
// Return this molecule as the unrelated candidate.
1299+
return mol_id;
1300+
}
1301+
}
1302+
1303+
// Push the neighbors of the position to the queue.
1304+
if (node_loc.x >= 1.0f)
1305+
search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer});
1306+
if (node_loc.x <= visited.dim_size(0) - 2)
1307+
search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer});
1308+
if (node_loc.y >= 1.0f)
1309+
search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer});
1310+
if (node_loc.y <= visited.dim_size(1) - 2)
1311+
search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer});
1312+
1313+
// Pop the position off the queue.
1314+
search_queue.pop();
1315+
}
1316+
1317+
// No molecule could be found. Return an invalid ID.
1318+
return PackMoleculeId::INVALID();
1319+
}
1320+
11571321
void GreedyCandidateSelector::update_candidate_selector_finalize_cluster(
11581322
ClusterGainStats& cluster_gain_stats,
11591323
LegalizationClusterId cluster_id) {

0 commit comments

Comments
 (0)