|
8 | 8 | #include "greedy_candidate_selector.h"
|
9 | 9 | #include <algorithm>
|
10 | 10 | #include <cmath>
|
| 11 | +#include <queue> |
11 | 12 | #include <vector>
|
12 | 13 | #include "appack_context.h"
|
13 | 14 | #include "flat_placement_types.h"
|
|
16 | 17 | #include "attraction_groups.h"
|
17 | 18 | #include "cluster_legalizer.h"
|
18 | 19 | #include "cluster_placement.h"
|
19 |
| -#include "globals.h" |
20 | 20 | #include "greedy_clusterer.h"
|
21 | 21 | #include "prepack.h"
|
22 | 22 | #include "timing_info.h"
|
23 |
| -#include "vpr_context.h" |
24 | 23 | #include "vpr_types.h"
|
25 | 24 | #include "vtr_assert.h"
|
| 25 | +#include "vtr_ndmatrix.h" |
26 | 26 | #include "vtr_vector.h"
|
27 | 27 |
|
28 | 28 | /*
|
@@ -105,44 +105,93 @@ GreedyCandidateSelector::GreedyCandidateSelector(
|
105 | 105 | , timing_info_(timing_info)
|
106 | 106 | , appack_ctx_(appack_ctx)
|
107 | 107 | , rng_(0) {
|
108 |
| - // Initialize the list of molecules to pack, the clustering data, and the |
109 |
| - // net info. |
110 | 108 |
|
111 |
| - // Initialize unrelated clustering data. |
| 109 | + // Initialize unrelated clustering data if unrelated clustering is enabled. |
112 | 110 | if (allow_unrelated_clustering_) {
|
113 |
| - /* alloc and load list of molecules to pack */ |
114 |
| - unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1); |
| 111 | + initialize_unrelated_clustering_data(max_molecule_stats); |
| 112 | + } |
| 113 | + |
| 114 | + /* TODO: This is memory inefficient, fix if causes problems */ |
| 115 | + /* Store stats on nets used by packed block, useful for determining transitively connected blocks |
| 116 | + * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ |
| 117 | + clb_inter_blk_nets_.resize(atom_netlist.blocks().size()); |
| 118 | +} |
| 119 | + |
| 120 | +void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molecule_stats& max_molecule_stats) { |
| 121 | + // Create a sorted list of molecules, sorted on decreasing molecule base |
| 122 | + // gain. (Highest gain). |
| 123 | + std::vector<PackMoleculeId> molecules_vector; |
| 124 | + molecules_vector.assign(prepacker_.molecules().begin(), prepacker_.molecules().end()); |
| 125 | + std::stable_sort(molecules_vector.begin(), |
| 126 | + molecules_vector.end(), |
| 127 | + [&](PackMoleculeId a_id, PackMoleculeId b_id) { |
| 128 | + const t_pack_molecule& a = prepacker_.get_molecule(a_id); |
| 129 | + const t_pack_molecule& b = prepacker_.get_molecule(b_id); |
| 130 | + |
| 131 | + return a.base_gain > b.base_gain; |
| 132 | + }); |
| 133 | + |
| 134 | + if (appack_ctx_.appack_options.use_appack) { |
| 135 | + /** |
| 136 | + * For APPack, we build a spatial data structure where for each 1x1 grid |
| 137 | + * position on the FPGA, we maintain lists of molecule candidates. |
| 138 | + * The lists are in order of number of used external pins by the molecule. |
| 139 | + * Within each list, the molecules are sorted by their base gain. |
| 140 | + */ |
| 141 | + // Get the max x, y, and layer from the flat placement. |
| 142 | + t_flat_pl_loc max_loc({0.0f, 0.0f, 0.0f}); |
| 143 | + for (PackMoleculeId mol_id : molecules_vector) { |
| 144 | + t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_); |
| 145 | + max_loc.x = std::max(max_loc.x, mol_pos.x); |
| 146 | + max_loc.y = std::max(max_loc.y, mol_pos.y); |
| 147 | + max_loc.layer = std::max(max_loc.layer, mol_pos.layer); |
| 148 | + } |
115 | 149 |
|
116 |
| - // Create a sorted list of molecules, sorted on decreasing molecule base |
117 |
| - // gain. (Highest gain). |
118 |
| - std::vector<PackMoleculeId> molecules_vector; |
119 |
| - molecules_vector.assign(prepacker.molecules().begin(), prepacker.molecules().end()); |
120 |
| - std::stable_sort(molecules_vector.begin(), |
121 |
| - molecules_vector.end(), |
122 |
| - [&](PackMoleculeId a_id, PackMoleculeId b_id) { |
123 |
| - const t_pack_molecule& a = prepacker.get_molecule(a_id); |
124 |
| - const t_pack_molecule& b = prepacker.get_molecule(b_id); |
| 150 | + VTR_ASSERT_MSG(max_loc.layer == 0, |
| 151 | + "APPack unrelated clustering does not support 3D " |
| 152 | + "FPGAs yet"); |
| 153 | + |
| 154 | + // Initialize the data structure with empty arrays with enough space |
| 155 | + // for each molecule. |
| 156 | + size_t flat_grid_width = max_loc.x + 1; |
| 157 | + size_t flat_grid_height = max_loc.y + 1; |
| 158 | + appack_unrelated_clustering_data_ = |
| 159 | + vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2>({flat_grid_width, |
| 160 | + flat_grid_height}); |
| 161 | + for (size_t x = 0; x < flat_grid_width; x++) { |
| 162 | + for (size_t y = 0; y < flat_grid_height; y++) { |
| 163 | + appack_unrelated_clustering_data_[x][y].resize(max_molecule_stats.num_used_ext_pins + 1); |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + // Fill the grid with molecule information. |
| 168 | + for (PackMoleculeId mol_id : molecules_vector) { |
| 169 | + t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_); |
125 | 170 |
|
126 |
| - return a.base_gain > b.base_gain; |
127 |
| - }); |
| 171 | + //Figure out how many external inputs are used by this molecule |
| 172 | + t_molecule_stats molecule_stats = prepacker_.calc_molecule_stats(mol_id, atom_netlist_); |
| 173 | + int ext_inps = molecule_stats.num_used_ext_inputs; |
| 174 | + |
| 175 | + //Insert the molecule into the unclustered lists by number of external inputs |
| 176 | + auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.x][mol_pos.y]; |
| 177 | + tile_uc_data[ext_inps].push_back(mol_id); |
| 178 | + } |
| 179 | + } else { |
| 180 | + /* alloc and load list of molecules to pack */ |
| 181 | + unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1); |
128 | 182 |
|
129 | 183 | // Push back the each molecule into the unrelated clustering data vector
|
130 | 184 | // for their external inputs. This creates individual sorted lists of
|
131 | 185 | // molecules for each number of used external inputs.
|
132 | 186 | for (PackMoleculeId mol_id : molecules_vector) {
|
133 | 187 | //Figure out how many external inputs are used by this molecule
|
134 |
| - t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(mol_id, atom_netlist); |
| 188 | + t_molecule_stats molecule_stats = prepacker_.calc_molecule_stats(mol_id, atom_netlist_); |
135 | 189 | int ext_inps = molecule_stats.num_used_ext_inputs;
|
136 | 190 |
|
137 | 191 | //Insert the molecule into the unclustered lists by number of external inputs
|
138 | 192 | unrelated_clustering_data_[ext_inps].push_back(mol_id);
|
139 | 193 | }
|
140 | 194 | }
|
141 |
| - |
142 |
| - /* TODO: This is memory inefficient, fix if causes problems */ |
143 |
| - /* Store stats on nets used by packed block, useful for determining transitively connected blocks |
144 |
| - * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ |
145 |
| - clb_inter_blk_nets_.resize(atom_netlist.blocks().size()); |
146 | 195 | }
|
147 | 196 |
|
148 | 197 | GreedyCandidateSelector::~GreedyCandidateSelector() {
|
@@ -281,6 +330,7 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success(
|
281 | 330 | // Whenever a new molecule has been clustered, reset the number of
|
282 | 331 | // unrelated clustering attempts.
|
283 | 332 | num_unrelated_clustering_attempts_ = 0;
|
| 333 | + // cluster_gain_stats.unrelated_molecules_attempted.clear(); |
284 | 334 |
|
285 | 335 | // If using APPack, update the position of the cluster based on the addition
|
286 | 336 | // of this molecule.
|
@@ -673,15 +723,23 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster(
|
673 | 723 | // If we are allowing unrelated clustering and no molecule has been found,
|
674 | 724 | // get unrelated candidate for cluster.
|
675 | 725 | if (allow_unrelated_clustering_ && best_molecule == PackMoleculeId::INVALID()) {
|
676 |
| - if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) { |
677 |
| - best_molecule = get_unrelated_candidate_for_cluster(cluster_id, |
678 |
| - cluster_legalizer); |
679 |
| - num_unrelated_clustering_attempts_++; |
680 |
| - VTR_LOGV(best_molecule && log_verbosity_ > 2, |
681 |
| - "\tFound unrelated molecule to cluster\n"); |
| 726 | + const t_appack_options& appack_options = appack_ctx_.appack_options; |
| 727 | + if (appack_options.use_appack) { |
| 728 | + if (num_unrelated_clustering_attempts_ < appack_options.max_unrelated_clustering_attempts) { |
| 729 | + best_molecule = get_unrelated_candidate_for_cluster_appack(cluster_gain_stats, |
| 730 | + cluster_id, |
| 731 | + cluster_legalizer); |
| 732 | + num_unrelated_clustering_attempts_++; |
| 733 | + } |
682 | 734 | } else {
|
683 |
| - num_unrelated_clustering_attempts_ = 0; |
| 735 | + if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) { |
| 736 | + best_molecule = get_unrelated_candidate_for_cluster(cluster_id, |
| 737 | + cluster_legalizer); |
| 738 | + num_unrelated_clustering_attempts_++; |
| 739 | + } |
684 | 740 | }
|
| 741 | + VTR_LOGV(best_molecule && log_verbosity_ > 2, |
| 742 | + "\tFound unrelated molecule to cluster\n"); |
685 | 743 | } else {
|
686 | 744 | VTR_LOGV(!best_molecule && log_verbosity_ > 2,
|
687 | 745 | "\tNo related molecule found and unrelated clustering disabled\n");
|
@@ -1154,6 +1212,112 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster(
|
1154 | 1212 | return PackMoleculeId::INVALID();
|
1155 | 1213 | }
|
1156 | 1214 |
|
| 1215 | +PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appack( |
| 1216 | + ClusterGainStats& cluster_gain_stats, |
| 1217 | + LegalizationClusterId cluster_id, |
| 1218 | + const ClusterLegalizer& cluster_legalizer) { |
| 1219 | + |
| 1220 | + /** |
| 1221 | + * For APPack, we want to find a close candidate with the highest number |
| 1222 | + * of available inputs which could be packed into the given cluster. |
| 1223 | + * We will search for candidates in a BFS manner, where we will search in |
| 1224 | + * the same 1x1 grid location of the cluster for a compatible candidate, and |
| 1225 | + * will then search out if none can be found. |
| 1226 | + * |
| 1227 | + * Here, a molecule is compatible if: |
| 1228 | + * - It has not been clustered already |
| 1229 | + * - The number of inputs it has available is less than or equal to the |
| 1230 | + * number of inputs available in the cluster. |
| 1231 | + * - It has not tried to be packed in this cluster before. |
| 1232 | + * - It is compatible with the cluster. |
| 1233 | + */ |
| 1234 | + |
| 1235 | + VTR_ASSERT_MSG(allow_unrelated_clustering_, |
| 1236 | + "Cannot get unrelated candidates when unrelated clustering " |
| 1237 | + "is disabled"); |
| 1238 | + |
| 1239 | + VTR_ASSERT_MSG(appack_ctx_.appack_options.use_appack, |
| 1240 | + "APPack is disabled, cannot get unrelated clusters using " |
| 1241 | + "flat placement information"); |
| 1242 | + |
| 1243 | + // The cluster will likely have more inputs available than a single molecule |
| 1244 | + // would have available (clusters have more pins). Clamp the inputs available |
| 1245 | + // to the max number of inputs a molecule could have. |
| 1246 | + size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available(cluster_id); |
| 1247 | + VTR_ASSERT_SAFE(!appack_unrelated_clustering_data_.empty()); |
| 1248 | + size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0].size() - 1; |
| 1249 | + if (inputs_avail >= max_molecule_inputs_avail) { |
| 1250 | + inputs_avail = max_molecule_inputs_avail; |
| 1251 | + } |
| 1252 | + |
| 1253 | + // Create a queue of locations to search and a map of visited grid locations. |
| 1254 | + std::queue<t_flat_pl_loc> search_queue; |
| 1255 | + vtr::NdMatrix<bool, 2> visited({appack_unrelated_clustering_data_.dim_size(0), |
| 1256 | + appack_unrelated_clustering_data_.dim_size(1)}, |
| 1257 | + false); |
| 1258 | + // Push the position of the cluster to the queue. |
| 1259 | + search_queue.push(cluster_gain_stats.flat_cluster_position); |
| 1260 | + |
| 1261 | + while (!search_queue.empty()) { |
| 1262 | + // Pop a position to search from the queue. |
| 1263 | + const t_flat_pl_loc& node_loc = search_queue.front(); |
| 1264 | + VTR_ASSERT_SAFE(node_loc.layer == 0); |
| 1265 | + |
| 1266 | + // If this position is too far from the source, skip it. |
| 1267 | + float dist = get_manhattan_distance(node_loc, cluster_gain_stats.flat_cluster_position); |
| 1268 | + if (dist > 1) { |
| 1269 | + search_queue.pop(); |
| 1270 | + continue; |
| 1271 | + } |
| 1272 | + |
| 1273 | + // If this position has been visited, skip it. |
| 1274 | + if (visited[node_loc.x][node_loc.y]) { |
| 1275 | + search_queue.pop(); |
| 1276 | + continue; |
| 1277 | + } |
| 1278 | + visited[node_loc.x][node_loc.y] = true; |
| 1279 | + |
| 1280 | + // Explore this position from highest number of inputs available to lowest. |
| 1281 | + const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y]; |
| 1282 | + VTR_ASSERT_SAFE(inputs_avail < uc_data.size()); |
| 1283 | + for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { |
| 1284 | + // Get the molecule by the number of external inputs. |
| 1285 | + for (PackMoleculeId mol_id : uc_data[ext_inps]) { |
| 1286 | + // If this molecule has been clustered, skip it. |
| 1287 | + if (cluster_legalizer.is_mol_clustered(mol_id)) |
| 1288 | + continue; |
| 1289 | + // If this molecule has tried to be packed before and failed |
| 1290 | + // do not try it. This also means that this molecule may be |
| 1291 | + // related to this cluster in some way. |
| 1292 | + if (cluster_gain_stats.mol_failures.find(mol_id) != cluster_gain_stats.mol_failures.end()) |
| 1293 | + continue; |
| 1294 | + // If this molecule is not compatible with the current cluster |
| 1295 | + // skip it. |
| 1296 | + if (!cluster_legalizer.is_molecule_compatible(mol_id, cluster_id)) |
| 1297 | + continue; |
| 1298 | + // Return this molecule as the unrelated candidate. |
| 1299 | + return mol_id; |
| 1300 | + } |
| 1301 | + } |
| 1302 | + |
| 1303 | + // Push the neighbors of the position to the queue. |
| 1304 | + if (node_loc.x >= 1.0f) |
| 1305 | + search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer}); |
| 1306 | + if (node_loc.x <= visited.dim_size(0) - 2) |
| 1307 | + search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer}); |
| 1308 | + if (node_loc.y >= 1.0f) |
| 1309 | + search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer}); |
| 1310 | + if (node_loc.y <= visited.dim_size(1) - 2) |
| 1311 | + search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer}); |
| 1312 | + |
| 1313 | + // Pop the position off the queue. |
| 1314 | + search_queue.pop(); |
| 1315 | + } |
| 1316 | + |
| 1317 | + // No molecule could be found. Return an invalid ID. |
| 1318 | + return PackMoleculeId::INVALID(); |
| 1319 | +} |
| 1320 | + |
1157 | 1321 | void GreedyCandidateSelector::update_candidate_selector_finalize_cluster(
|
1158 | 1322 | ClusterGainStats& cluster_gain_stats,
|
1159 | 1323 | LegalizationClusterId cluster_id) {
|
|
0 commit comments