|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +""" This module controls and parses the large runs that includes |
| 4 | +sweeping multiple parameters. """ |
| 5 | +import itertools |
| 6 | +import os |
| 7 | +import sys |
| 8 | +import csv |
| 9 | +import pandas as pd |
| 10 | +import numpy as np |
| 11 | +from scipy import stats |
| 12 | + |
| 13 | +# Define the global dictionary |
| 14 | +PARAMS_DICT = { |
| 15 | + "--seed": [1, 2], |
| 16 | + "--place_algorithm": ["criticality_timing"], |
| 17 | + "--place_agent_epsilon": [0.3], |
| 18 | +} |
| 19 | + |
| 20 | +# Set to True if you only care about specific metrics |
| 21 | +KEEP_METRICS_ONLY = True |
| 22 | +PARSED_METRICS = ["num_io", "num_LAB"] |
| 23 | + |
| 24 | + |
| 25 | +def safe_gmean(series): |
| 26 | + """Calculate the geomeans of a series in a safe way even for large numbers""" |
| 27 | + series = series.replace({0: np.nan}) |
| 28 | + return stats.gmean(series.dropna()) |
| 29 | + |
| 30 | + |
| 31 | +def generate_combinations(): |
| 32 | + """Generates all the parameter combinations between the input parameters values.""" |
| 33 | + keys = list(PARAMS_DICT.keys()) |
| 34 | + values = list(PARAMS_DICT.values()) |
| 35 | + combinations = list(itertools.product(*values)) |
| 36 | + |
| 37 | + lines = [] |
| 38 | + for combination in combinations: |
| 39 | + params_str = " ".join(f"{key} {value}" for key, value in zip(keys, combination)) |
| 40 | + lines.append(f"script_params_list_add={params_str}\n") |
| 41 | + return lines |
| 42 | + |
| 43 | + |
| 44 | +def parse_results(input_path): |
| 45 | + """ |
| 46 | + Parse the output results |
| 47 | + """ |
| 48 | + # Find the runXXX directory with the largest XXX |
| 49 | + run_dirs = [ |
| 50 | + d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit() |
| 51 | + ] |
| 52 | + if not run_dirs: |
| 53 | + print("No runXXX directories found in the specified input path.") |
| 54 | + sys.exit(1) |
| 55 | + |
| 56 | + largest_run_path = os.path.join(input_path, max(run_dirs, key=lambda d: int(d[3:]))) |
| 57 | + |
| 58 | + # Path to parse_results.txt and full_res.csv |
| 59 | + full_res_csv_path = os.path.join(largest_run_path, "full_res.csv") |
| 60 | + |
| 61 | + if not os.path.exists(os.path.join(largest_run_path, "parse_results.txt")): |
| 62 | + print("File parse_results.txt not found.") |
| 63 | + sys.exit(1) |
| 64 | + |
| 65 | + # Read the parse_results.txt file and write to full_res.csv |
| 66 | + with open( |
| 67 | + os.path.join(largest_run_path, "parse_results.txt"), "r" |
| 68 | + ) as txt_file, open(full_res_csv_path, "w", newline="") as csv_file: |
| 69 | + reader = csv.reader(txt_file, delimiter="\t") |
| 70 | + writer = csv.writer(csv_file) |
| 71 | + |
| 72 | + headers = next(reader) |
| 73 | + script_params_index = headers.index("script_params") |
| 74 | + |
| 75 | + # Create new headers with PARAMS_DICT keys |
| 76 | + new_headers = ( |
| 77 | + headers[:script_params_index] |
| 78 | + + list(PARAMS_DICT.keys()) |
| 79 | + + headers[script_params_index + 1 :] |
| 80 | + ) |
| 81 | + writer.writerow(new_headers) |
| 82 | + |
| 83 | + for row in reader: |
| 84 | + script_params_value = row[script_params_index] |
| 85 | + script_params_dict = parse_script_params(script_params_value) |
| 86 | + new_row = ( |
| 87 | + row[:script_params_index] |
| 88 | + + [script_params_dict.get(key, "") for key in PARAMS_DICT] |
| 89 | + + row[script_params_index + 1 :] |
| 90 | + ) |
| 91 | + writer.writerow(new_row) |
| 92 | + |
| 93 | + print(f"Converted parse_results.txt to {full_res_csv_path}") |
| 94 | + |
| 95 | + # Generate avg_seed.csv if --seed column exists |
| 96 | + generate_avg_seed_csv(full_res_csv_path, largest_run_path) |
| 97 | + print("Generated average seed results") |
| 98 | + |
| 99 | + # Generate gmean_res.csv |
| 100 | + generate_geomean_res_csv( |
| 101 | + os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path |
| 102 | + ) |
| 103 | + print("Generated geometric average results over all the circuits") |
| 104 | + |
| 105 | + generate_xlsx(largest_run_path) |
| 106 | + print("Generated xlsx that merges all the result csv files") |
| 107 | + |
| 108 | + |
| 109 | +def generate_xlsx(largest_run_path): |
| 110 | + """Generate a xlsx file that includes the full results, average results over the seed |
| 111 | + and the geometrically averaged results over all the benchmarks.""" |
| 112 | + |
| 113 | + csv_files = [ |
| 114 | + os.path.join(largest_run_path, "full_res.csv"), |
| 115 | + os.path.join(largest_run_path, "avg_seed.csv"), |
| 116 | + os.path.join(largest_run_path, "geomean_res.csv"), |
| 117 | + ] |
| 118 | + sheet_names = ["Full res", "Avg. seeds", "Summary"] |
| 119 | + output_excel_file = os.path.join(largest_run_path, "summary.xlsx") |
| 120 | + # Create an Excel writer object |
| 121 | + # pylint: disable=abstract-class-instantiated |
| 122 | + with pd.ExcelWriter(output_excel_file, engine="xlsxwriter") as writer: |
| 123 | + for csv_file, sheet_name in zip(csv_files, sheet_names): |
| 124 | + # Read each CSV file |
| 125 | + df = pd.read_csv(csv_file) |
| 126 | + |
| 127 | + # Write each DataFrame to a different sheet |
| 128 | + df.to_excel(writer, sheet_name=sheet_name, index=False) |
| 129 | + |
| 130 | + |
| 131 | +def parse_script_params(script_params): |
| 132 | + """Helper function to parse the script params values from earch row in |
| 133 | + the parse_results.txt""" |
| 134 | + |
| 135 | + parsed_params = {key: "" for key in PARAMS_DICT} |
| 136 | + |
| 137 | + parts = script_params.split("_") |
| 138 | + i = 0 |
| 139 | + |
| 140 | + while i < len(parts): |
| 141 | + for key in PARAMS_DICT: |
| 142 | + key_parts = key.split("_") |
| 143 | + key_length = len(key_parts) |
| 144 | + |
| 145 | + if parts[i : i + key_length] == key_parts: |
| 146 | + value_parts = [] |
| 147 | + j = i + key_length |
| 148 | + |
| 149 | + while j < len(parts) and not any( |
| 150 | + parts[j : j + len(k.split("_"))] == k.split("_") |
| 151 | + for k in PARAMS_DICT |
| 152 | + ): |
| 153 | + value_parts.append(parts[j]) |
| 154 | + j += 1 |
| 155 | + |
| 156 | + parsed_params[key] = "_".join(value_parts) |
| 157 | + i = j - 1 |
| 158 | + break |
| 159 | + |
| 160 | + i += 1 |
| 161 | + |
| 162 | + return parsed_params |
| 163 | + |
| 164 | + |
| 165 | +def generate_avg_seed_csv(full_res_csv_path, output_dir): |
| 166 | + """Generate the average results over the seeds""" |
| 167 | + df = pd.read_csv(full_res_csv_path) |
| 168 | + assert isinstance(df, pd.DataFrame) |
| 169 | + |
| 170 | + if KEEP_METRICS_ONLY: |
| 171 | + col_to_keep = ["circuit", "arch"] |
| 172 | + col_to_keep.extend(list(PARAMS_DICT.keys())) |
| 173 | + col_to_keep.extend(PARSED_METRICS) |
| 174 | + df = df.drop( |
| 175 | + # pylint: disable=no-member |
| 176 | + columns=[col for col in df.columns if col not in col_to_keep] |
| 177 | + ) |
| 178 | + |
| 179 | + # Check if '--seed' column is present |
| 180 | + if "--seed" in df.columns: |
| 181 | + # Determine the grouping keys: ['circuit', 'arch'] + keys from PARAMS_DICT that |
| 182 | + # are present in the dataframe |
| 183 | + grouping_keys = ["circuit", "arch"] + [ |
| 184 | + key for key in PARAMS_DICT if key in df.columns and key != "--seed" |
| 185 | + ] |
| 186 | + |
| 187 | + # Group by specified keys and compute the mean for numeric columns |
| 188 | + df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index() |
| 189 | + |
| 190 | + # Drop the '--seed' column if it exists |
| 191 | + if "--seed" in df_grouped.columns: |
| 192 | + df_grouped.drop(columns=["--seed"], inplace=True) |
| 193 | + else: |
| 194 | + df_grouped = df |
| 195 | + |
| 196 | + # Save the resulting dataframe to a CSV file |
| 197 | + avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv") |
| 198 | + df_grouped.to_csv(avg_seed_csv_path, index=False) |
| 199 | + |
| 200 | + |
| 201 | +def generate_geomean_res_csv(full_res_csv_path, output_dir): |
| 202 | + """Generate the geometric average results over the different circuits""" |
| 203 | + |
| 204 | + df = pd.read_csv(full_res_csv_path) |
| 205 | + |
| 206 | + param_columns = [key for key in PARAMS_DICT if key != "--seed"] |
| 207 | + non_param_columns = [col for col in df.columns if col not in param_columns] |
| 208 | + |
| 209 | + # pylint: disable=no-member |
| 210 | + geomean_df = ( |
| 211 | + df.groupby(param_columns) |
| 212 | + .agg( |
| 213 | + { |
| 214 | + col: (lambda x: "" if x.dtype == "object" else safe_gmean(x)) |
| 215 | + for col in non_param_columns |
| 216 | + } |
| 217 | + ) |
| 218 | + .reset_index() |
| 219 | + ) |
| 220 | + |
| 221 | + geomean_df.drop(columns=["circuit"], inplace=True) |
| 222 | + geomean_df.drop(columns=["arch"], inplace=True) |
| 223 | + |
| 224 | + geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv") |
| 225 | + geomean_df.to_csv(geomean_res_csv_path, index=False) |
| 226 | + |
| 227 | + |
| 228 | +def main(): |
| 229 | + """Main function""" |
| 230 | + |
| 231 | + if len(sys.argv) < 3: |
| 232 | + print("Usage: script.py <option> <path_to_directory>") |
| 233 | + sys.exit(1) |
| 234 | + |
| 235 | + option = sys.argv[1] |
| 236 | + directory_path = sys.argv[2] |
| 237 | + |
| 238 | + if option == "--generate": |
| 239 | + # Generate the combinations |
| 240 | + lines = generate_combinations() |
| 241 | + |
| 242 | + # Define the path to the config file |
| 243 | + config_path = os.path.join(directory_path, "config", "config.txt") |
| 244 | + |
| 245 | + # Ensure the config directory exists |
| 246 | + os.makedirs(os.path.dirname(config_path), exist_ok=True) |
| 247 | + |
| 248 | + # Append the lines to the config file |
| 249 | + with open(config_path, "a") as file: |
| 250 | + file.writelines(lines) |
| 251 | + |
| 252 | + print(f"Appended lines to {config_path}") |
| 253 | + |
| 254 | + elif option == "--parse": |
| 255 | + parse_results(directory_path) |
| 256 | + |
| 257 | + else: |
| 258 | + print("Invalid option. Use --generate or --parse") |
| 259 | + sys.exit(1) |
| 260 | + |
| 261 | + |
| 262 | +if __name__ == "__main__": |
| 263 | + main() |
0 commit comments