diff --git a/src/autogluon/bench/eval/aggregate/aggregate.py b/src/autogluon/bench/eval/aggregate/aggregate.py new file mode 100644 index 00000000..6b8b0335 --- /dev/null +++ b/src/autogluon/bench/eval/aggregate/aggregate.py @@ -0,0 +1,68 @@ +import logging +from typing import List + +from autogluon.bench.eval.benchmark_context.output_suite_context import OutputSuiteContext +from autogluon.common.savers import save_pd + +logger = logging.getLogger(__name__) + + +def aggregate( + s3_bucket: str, + module: str, + benchmark_name: str, + artifacts: List[str] = ["results"], + constraint: str | None = None, + include_infer_speed: bool = False, + mode: str = "ray", +) -> None: + """ + Aggregates objects across an agbenchmark. Functionality depends on artifact specified: + + Params: + ------- + s3_bucket: str + Name of the relevant s3_bucket + module: str + The name of the relevant autogluon module: + can be one of ['tabular', 'timeseries', 'multimodal'] + benchmark_name: str + The name of the relevant benchmark that was run + artifacts: List[str] + The desired artifact to be aggregated can be one of ['results', 'learning_curves'] + constraint: str + Name of constraint used in benchmark + include_infer_speed: bool + Include inference speed in aggregation. + mode: str + Can be one of ['seq', 'ray']. + If seq, runs sequentially. + If ray, utilizes parallelization. + """ + result_path = f"{module}/{benchmark_name}" + path_prefix = f"s3://{s3_bucket}/{result_path}/" + contains = f".{constraint}." if constraint else None + + output_suite_context = OutputSuiteContext( + path=path_prefix, + contains=contains, + include_infer_speed=include_infer_speed, + mode=mode, + ) + + if "results" in artifacts: + aggregated_results_name = f"results_automlbenchmark_{constraint}_{benchmark_name}.csv" + results_df = output_suite_context.aggregate_results() + artifact_path = f"s3://{s3_bucket}/aggregated/{result_path}/{aggregated_results_name}" + save_pd.save(path=artifact_path, df=results_df) + logger.info(f"Aggregated results output saved to {artifact_path}!") + + if "learning_curves" in artifacts: + artifact_path = f"s3://{s3_bucket}/aggregated/{result_path}/learning_curves" + output_suite_context.aggregate_learning_curves(save_path=artifact_path) + logger.info(f"Aggregated learning curves output saved to {artifact_path}!") + + valid_artifacts = ["results", "learning_curves"] + unknown_artifacts = [artifact for artifact in artifacts if artifact not in valid_artifacts] + if unknown_artifacts: + raise ValueError(f"Unknown artifacts: {unknown_artifacts}") diff --git a/src/autogluon/bench/eval/aggregate/results.py b/src/autogluon/bench/eval/aggregate/results.py deleted file mode 100644 index 2c14da64..00000000 --- a/src/autogluon/bench/eval/aggregate/results.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging - -from autogluon.bench.eval.benchmark_context.output_suite_context import OutputSuiteContext -from autogluon.common.savers import save_pd - -logger = logging.getLogger(__name__) - - -def aggregate_results(s3_bucket, s3_prefix, version_name, constraint, include_infer_speed=False, mode="ray"): - contains = f".{constraint}." if constraint else None - result_path = f"{s3_prefix}{version_name}/" - path_prefix = f"s3://{s3_bucket}/{result_path}" - - aggregated_results_name = f"results_automlbenchmark_{constraint}_{version_name}.csv" - - output_suite_context = OutputSuiteContext( - path=path_prefix, - contains=contains, - include_infer_speed=include_infer_speed, - mode=mode, - ) - results_df = output_suite_context.aggregate_results() - - print(results_df) - - save_path = f"s3://{s3_bucket}/aggregated/{result_path}{aggregated_results_name}" - save_pd.save(path=save_path, df=results_df) - logger.info(f"Aggregated results saved to {save_path}!") diff --git a/src/autogluon/bench/eval/benchmark_context/output_context.py b/src/autogluon/bench/eval/benchmark_context/output_context.py index 7a88587a..199ed03e 100644 --- a/src/autogluon/bench/eval/benchmark_context/output_context.py +++ b/src/autogluon/bench/eval/benchmark_context/output_context.py @@ -1,3 +1,4 @@ +import logging import zipfile from io import BytesIO, TextIOWrapper from typing import Optional, Set, Union @@ -6,9 +7,12 @@ import numpy as np import pandas as pd +from autogluon.bench.eval.benchmark_context.utils import get_s3_paths from autogluon.common.loaders import load_pd, load_pkl from autogluon.common.utils.s3_utils import s3_path_to_bucket_prefix +logger = logging.getLogger(__name__) + class OutputContext: def __init__(self, path): @@ -49,6 +53,10 @@ def path_results(self): def path_leaderboard(self): return self.path + "leaderboard.csv" + @property + def path_learning_curves(self): + return self.path + "learning_curves/" + @property def path_model_failures(self): return self.path + "model_failures.csv" @@ -99,6 +107,44 @@ def load_results( def load_leaderboard(self) -> pd.DataFrame: return load_pd.load(self.path_leaderboard) + def load_learning_curves(self, save_path: str, suffix: str = "learning_curves.json") -> None: + """ + Params: + ------- + save_path: str + the path to save all learning curve artifacts + suffix: str + the suffix matching all learning curves files + """ + try: + # copy_s3_file method not yet in stable release of autogluon + from autogluon.common.utils.s3_utils import copy_s3_file + except: + raise ImportError( + f"Install AutoGluon from source to get access to copy_s3_file from autogluon.common.utils.s3_utils" + ) + + path = self.path_learning_curves + all_curves = get_s3_paths(path_prefix=path, suffix=suffix) + + for origin_path in all_curves: + dataset, fold = self.get_dataset_fold(origin_path) + destination_path = f"{save_path}/{dataset}/{fold}/learning_curves.json" + copy_s3_file(origin_path=origin_path, destination_path=destination_path) + + def get_dataset_fold(self, path_str: str) -> tuple[str, str]: + parts = path_str.rstrip("/").split("/") + + if len(parts) < 3: + raise ValueError( + f"Improper folder dimensions at {path_str}. Expected following path structure: .../dataset/fold/learning_curves.json" + ) + + # path pattern: .../dataset/fold/learning_curves.json + dataset, fold, _ = parts[-3:] + + return dataset, fold + def load_model_failures(self) -> pd.DataFrame: """Load and return the raw model failures file""" return load_pd.load(self.path_model_failures) diff --git a/src/autogluon/bench/eval/benchmark_context/output_suite_context.py b/src/autogluon/bench/eval/benchmark_context/output_suite_context.py index f05b8824..0624a601 100644 --- a/src/autogluon/bench/eval/benchmark_context/output_suite_context.py +++ b/src/autogluon/bench/eval/benchmark_context/output_suite_context.py @@ -1,6 +1,7 @@ from __future__ import annotations import copy +import logging import os from typing import List, Optional, Set @@ -12,6 +13,8 @@ from autogluon.bench.eval.benchmark_context.utils import get_s3_paths from autogluon.common.loaders import load_pd +logger = logging.getLogger(__name__) + DEFAULT_COLUMNS_TO_KEEP = [ "id", "task", @@ -175,6 +178,13 @@ def get_zeroshot_metadata_size_bytes(self, allow_exception=False) -> List[int]: allow_exception=allow_exception, ) + def load_learning_curves(self, save_path: str) -> list[None]: + return self._loop_func( + func=OutputContext.load_learning_curves, + input_list=self.output_contexts, + kwargs=dict(save_path=save_path), + ) + def filter_failures(self): amlb_info_list = self.get_amlb_info() output_contexts_valid = [] @@ -206,6 +216,9 @@ def aggregate_results(self, results_list: List[pd.DataFrame] | None = None) -> p results_df = pd.concat(results_list, ignore_index=True) return results_df + def aggregate_learning_curves(self, save_path: str) -> None: + self.load_learning_curves(save_path=save_path) + def load_leaderboards(self) -> List[pd.DataFrame]: if self.num_contexts == 0: raise AssertionError("Empty output_contexts!") diff --git a/src/autogluon/bench/eval/benchmark_context/utils.py b/src/autogluon/bench/eval/benchmark_context/utils.py index a235d12f..a2b5e844 100644 --- a/src/autogluon/bench/eval/benchmark_context/utils.py +++ b/src/autogluon/bench/eval/benchmark_context/utils.py @@ -1,8 +1,31 @@ +from __future__ import annotations + from autogluon.common.loaders import load_s3 from autogluon.common.utils import s3_utils -def get_s3_paths(path_prefix: str, contains=None, suffix=None): +def get_s3_paths(path_prefix: str, contains: str | None = None, suffix: str | None = None) -> list[str]: + """ + Gets all s3 paths in the path_prefix that contain 'contains' + and end with 'suffix.' + + Params: + ------- + path_prefix: str + The path prefix. + contains : Optional[str], default = None + Can be specified to limit the returned outputs. + For example, by specifying the constraint, such as ".1h8c." + suffix: str, default = None + Can be specified to limit the returned outputs. + For example, by specifying "leaderboard.csv" only objects ending + with this suffix will be included + If no suffix provided, will save all files in artifact directory. + + Returns: + -------- + List[str]: All s3 paths that adhere to the conditions passed in. + """ bucket, prefix = s3_utils.s3_path_to_bucket_prefix(path_prefix) objects = load_s3.list_bucket_prefix_suffix_contains_s3( bucket=bucket, prefix=prefix, suffix=suffix, contains=contains diff --git a/src/autogluon/bench/eval/scripts/aggregate_amlb_results.py b/src/autogluon/bench/eval/scripts/aggregate_amlb_results.py index de757a9e..8dbd2e81 100644 --- a/src/autogluon/bench/eval/scripts/aggregate_amlb_results.py +++ b/src/autogluon/bench/eval/scripts/aggregate_amlb_results.py @@ -1,6 +1,8 @@ +from typing import List + import typer -from autogluon.bench.eval.aggregate.results import aggregate_results +from autogluon.bench.eval.aggregate.aggregate import aggregate app = typer.Typer() @@ -12,6 +14,9 @@ def aggregate_amlb_results( benchmark_name: str = typer.Argument( help="Folder name of benchmark run in which all objects with path 'scores/results.csv' get aggregated." ), + artifact: List[str] = typer.Option( + ["results"], help="What should be saved, can be any of ['results', 'learning_curves'], default='results'" + ), constraint: str = typer.Option( None, help="Name of constraint used in benchmark, refer to https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml. Not applicable when `module==multimodal`", @@ -20,17 +25,51 @@ def aggregate_amlb_results( mode: str = typer.Option("ray", help='Aggregation mode: "seq" or "ray".'), ): """ - Finds "scores/results.csv" under s3://<s3_bucket>/<module>/<benchmark_name> recursively with the constraint if provided, - and append all results into one file at s3://<s3_bucket>/aggregated/<module>/<benchmark_name>/results_automlbenchmark_<constraint>_<benchmark_name>.csv + Aggregates objects across an agbenchmark. Functionality depends on artifact specified: + + Params: + ------- + s3_bucket: str + Name of the relevant s3_bucket + module: str + The name of the relevant autogluon module: can be one of ['tabular', 'timeseries', 'multimodal'] + benchmark_name: str + The name of the relevant benchmark that was run + artifact: List[str] + The desired artifact(s) to be aggregated can be one of ['results', 'learning_curves'] + constraint: str + Name of constraint used in benchmark + include_infer_speed: bool + Include inference speed in aggregation. + mode: str + Can be one of ['seq', 'ray']. + If seq, runs sequentially. + If ray, utilizes parallelization. + + Artifact Outcomes: ['results', 'learning_curves'] + results: + Finds "scores/results.csv" under s3://<s3_bucket>/<module>/<benchmark_name> recursively with the constraint if provided, + and append all results into one file at s3://<s3_bucket>/aggregated/<module>/<benchmark_name>/results_automlbenchmark_<constraint>_<benchmark_name>.csv + + Example: + agbench aggregate-amlb-results autogluon-benchmark-metrics tabular ag_tabular_20230629T140546 --constraint test + + learning_curves: + Finds specified learning_curves.json files under s3://<s3_bucket>/<module>/<benchmark_name> recursively with the constraint if provided, + and stores all artifacts in common directory at s3://<s3_bucket>/aggregated/<module>/<benchmark_name>/ + + Example: + agbench aggregate-amlb-results autogluon-benchmark-metrics tabular ag_bench_learning_curves_20240802T163522 --artifact learning_curves --constraint toy - Example: - agbench aggregate-amlb-results autogluon-benchmark-metrics tabular ag_tabular_20230629T140546 --constraint test + # to generate both + agbench aggregate-amlb-results autogluon-benchmark-metrics tabular ag_bench_learning_curves_20240802T163522 --artifact results --artifact learning_curves --constraint toy """ - aggregate_results( + aggregate( s3_bucket=s3_bucket, - s3_prefix=f"{module}/", - version_name=benchmark_name, + module=module, + benchmark_name=benchmark_name, + artifacts=artifact, constraint=constraint, include_infer_speed=include_infer_speed, mode=mode,