diff --git a/conda_forge_tick/auto_tick.py b/conda_forge_tick/auto_tick.py index 908e97a9e..b411b7b86 100644 --- a/conda_forge_tick/auto_tick.py +++ b/conda_forge_tick/auto_tick.py @@ -9,7 +9,7 @@ import traceback import typing from dataclasses import dataclass -from typing import Literal, cast +from typing import AnyStr, Literal, cast from urllib.error import URLError from uuid import uuid4 @@ -41,6 +41,7 @@ ) from conda_forge_tick.lazy_json_backends import ( LazyJson, + does_key_exist_in_hashmap, get_all_keys_for_hashmap, lazy_json_transaction, remove_key_for_hashmap, @@ -50,7 +51,6 @@ PR_LIMIT, load_migrators, ) -from conda_forge_tick.migration_runner import run_migration from conda_forge_tick.migrators import MigrationYaml, Migrator, Version from conda_forge_tick.migrators.version import VersionMigrationError from conda_forge_tick.os_utils import eval_cmd @@ -69,6 +69,7 @@ sanitize_string, ) +from .migration_runner import run_migration from .migrators_types import MigrationUidTypedDict from .models.pr_json import PullRequestData, PullRequestInfoSpecial, PullRequestState @@ -627,7 +628,7 @@ def run( return migration_run_data["migrate_return_value"], pr_lazy_json -def _compute_time_per_migrator(mctx, migrators): +def _compute_time_per_migrator(migrators): # we weight each migrator by the number of available nodes to migrate num_nodes = [] for migrator in tqdm.tqdm(migrators, ncols=80, desc="computing time per migrator"): @@ -757,6 +758,7 @@ def _run_migrator_on_feedstock_branch( except (github3.GitHubError, github.GithubException) as e: # TODO: pull this down into run() - also check the other exceptions + # TODO: continue here, after that run locally and add tests, backend should be injected into run if hasattr(e, "msg") and e.msg == "Repository was archived so is read-only.": attrs["archived"] = True else: @@ -847,10 +849,11 @@ def _run_migrator_on_feedstock_branch( return good_prs, break_loop -def _is_migrator_done(_mg_start, good_prs, time_per, pr_limit): +def _is_migrator_done( + _mg_start, good_prs, time_per, pr_limit, git_backend: GitPlatformBackend +): curr_time = time.time() - backend = github_backend() - api_req = backend.get_api_requests_left() + api_req = git_backend.get_api_requests_left() if curr_time - START_TIME > TIMEOUT: logger.info( @@ -886,8 +889,25 @@ def _is_migrator_done(_mg_start, good_prs, time_per, pr_limit): def _run_migrator( - migrator, mctx, temp, time_per, dry_run, git_backend: GitPlatformBackend -): + migrator: Migrator, + mctx: MigratorSessionContext, + temp: list[AnyStr], + time_per: float, + git_backend: GitPlatformBackend, + package: str | None = None, +) -> int: + """ + Run a migrator. + + :param migrator: The migrator to run. + :param mctx: The migrator session context. + :param temp: The list of temporary files. + :param time_per: The time limit of this migrator. + :param git_backend: The GitPlatformBackend instance to use. + :param package: The package to update, if None, all packages are updated. + + :return: The number of "good" PRs created by the migrator. + """ _mg_start = time.time() migrator_name = get_migrator_name(migrator) @@ -909,6 +929,15 @@ def _run_migrator( possible_nodes = list(migrator.order(effective_graph, mctx.graph)) + if package: + if package not in possible_nodes: + logger.info( + f"Package {package} is not a candidate for migration of {migrator_name}. " + f"If you want to investigate this, run the make-migrators command." + ) + return 0 + possible_nodes = [package] + # version debugging info if isinstance(migrator, Version): print("possible version migrations:", flush=True) @@ -941,7 +970,9 @@ def _run_migrator( flush=True, ) - if _is_migrator_done(_mg_start, good_prs, time_per, migrator.pr_limit): + if _is_migrator_done( + _mg_start, good_prs, time_per, migrator.pr_limit, git_backend + ): return 0 for node_name in possible_nodes: @@ -958,7 +989,9 @@ def _run_migrator( ): # Don't let CI timeout, break ahead of the timeout so we make certain # to write to the repo - if _is_migrator_done(_mg_start, good_prs, time_per, migrator.pr_limit): + if _is_migrator_done( + _mg_start, good_prs, time_per, migrator.pr_limit, git_backend + ): break base_branches = migrator.get_possible_feedstock_branches(attrs) @@ -1028,8 +1061,7 @@ def _run_migrator( os.chdir(BOT_HOME_DIR) # Write graph partially through - if not dry_run: - dump_graph(mctx.graph) + dump_graph(mctx.graph) with filter_reprinted_lines("rm-tmp"): for f in glob.glob("/tmp/*"): @@ -1054,18 +1086,26 @@ def _setup_limits(): resource.setrlimit(resource.RLIMIT_AS, (limit_int, limit_int)) -def _update_nodes_with_bot_rerun(gx: nx.DiGraph): - """Go through all the open PRs and check if they are rerun""" +def _update_nodes_with_bot_rerun(gx: nx.DiGraph, package: str | None = None): + """ + Go through all the open PRs and check if they are rerun + + :param gx: the dependency graph + :param package: the package to update, if None, all packages are updated + """ print("processing bot-rerun labels", flush=True) - for i, (name, node) in enumerate(gx.nodes.items()): + nodes = gx.nodes.items() if not package else [(package, gx.nodes[package])] + + for i, (name, node) in enumerate(nodes): # logger.info( # f"node: {i} memory usage: " # f"{psutil.Process().memory_info().rss // 1024 ** 2}MB", # ) with node["payload"] as payload: if payload.get("archived", False): + logger.debug(f"skipping archived package {name}") continue with payload["pr_info"] as pri, payload["version_pr_info"] as vpri: # reset bad @@ -1115,12 +1155,21 @@ def _filter_ignored_versions(attrs, version): return version -def _update_nodes_with_new_versions(gx): - """Updates every node with it's new version (when available)""" +def _update_nodes_with_new_versions(gx: nx.DiGraph, package: str | None = None): + """ + Updates every node with its new version (when available) + + :param gx: the dependency graph + :param package: the package to update, if None, all packages are updated + """ print("updating nodes with new versions", flush=True) - version_nodes = get_all_keys_for_hashmap("versions") + if package and not does_key_exist_in_hashmap("versions", package): + logger.warning(f"Package {package} not found in versions hashmap") + return + + version_nodes = get_all_keys_for_hashmap("versions") if not package else [package] for node in version_nodes: version_data = LazyJson(f"versions/{node}.json").data @@ -1146,13 +1195,35 @@ def _update_nodes_with_new_versions(gx): vpri["new_version"] = version_from_data -def _remove_closed_pr_json(): +def _remove_closed_pr_json(package: str | None = None): + """ + Remove the pull request information for closed PRs. + + :param package: The package to remove the PR information for. If None, all PR information is removed. If you pass + a package, closed pr_json files are not removed because this would require iterating all pr_json files. + """ print("collapsing closed PR json", flush=True) + if package: + pr_info_nodes = ( + [package] if does_key_exist_in_hashmap("pr_info", package) else [] + ) + version_pr_info_nodes = ( + [package] if does_key_exist_in_hashmap("version_pr_info", package) else [] + ) + + if not pr_info_nodes: + logger.warning(f"Package {package} not found in pr_info hashmap") + if not version_pr_info_nodes: + logger.warning(f"Package {package} not found in version_pr_info hashmap") + else: + pr_info_nodes = get_all_keys_for_hashmap("pr_info") + version_pr_info_nodes = get_all_keys_for_hashmap("version_pr_info") + # first we go from nodes to pr json and update the pr info and remove the data name_nodes = [ - ("pr_info", get_all_keys_for_hashmap("pr_info")), - ("version_pr_info", get_all_keys_for_hashmap("version_pr_info")), + ("pr_info", pr_info_nodes), + ("version_pr_info", version_pr_info_nodes), ] for name, nodes in name_nodes: for node in nodes: @@ -1185,6 +1256,11 @@ def _remove_closed_pr_json(): # at this point, any json blob referenced in the pr info is state != closed # so we can remove anything that is empty or closed + if package: + logger.info( + "Since you requested a run for a specific package, we are not removing closed pr_json files." + ) + return nodes = get_all_keys_for_hashmap("pr_json") for node in nodes: pr = LazyJson(f"pr_json/{node}.json") @@ -1195,22 +1271,22 @@ def _remove_closed_pr_json(): ) -def _update_graph_with_pr_info(): - _remove_closed_pr_json() +def _update_graph_with_pr_info(package: str | None = None): + _remove_closed_pr_json(package) gx = load_existing_graph() - _update_nodes_with_bot_rerun(gx) - _update_nodes_with_new_versions(gx) + _update_nodes_with_bot_rerun(gx, package) + _update_nodes_with_new_versions(gx, package) dump_graph(gx) -def main(ctx: CliContext) -> None: +def main(ctx: CliContext, package: str | None = None) -> None: global START_TIME START_TIME = time.time() _setup_limits() with fold_log_lines("updating graph with PR info"): - _update_graph_with_pr_info() + _update_graph_with_pr_info(package) deploy(ctx, dirs_to_deploy=["version_pr_info", "pr_json", "pr_info"]) # record tmp dir so we can be sure to clean it later @@ -1229,8 +1305,8 @@ def main(ctx: CliContext) -> None: graph=gx, smithy_version=smithy_version, pinning_version=pinning_version, - dry_run=ctx.dry_run, ) + # TODO: this does not support --online migrators = load_migrators() # compute the time per migrator @@ -1241,7 +1317,6 @@ def main(ctx: CliContext) -> None: time_per_migrator, tot_time_per_migrator, ) = _compute_time_per_migrator( - mctx, migrators, ) for i, migrator in enumerate(migrators): @@ -1265,7 +1340,7 @@ def main(ctx: CliContext) -> None: for mg_ind, migrator in enumerate(migrators): good_prs = _run_migrator( - migrator, mctx, temp, time_per_migrator[mg_ind], ctx.dry_run, git_backend + migrator, mctx, temp, time_per_migrator[mg_ind], git_backend, package ) if good_prs > 0: pass @@ -1280,5 +1355,5 @@ def main(ctx: CliContext) -> None: # ], # ) - logger.info("API Calls Remaining: %d", github_backend().get_api_requests_left()) + logger.info(f"API Calls Remaining: {git_backend.get_api_requests_left()}") logger.info("Done") diff --git a/conda_forge_tick/cli.py b/conda_forge_tick/cli.py index d57660935..57e47ab8d 100644 --- a/conda_forge_tick/cli.py +++ b/conda_forge_tick/cli.py @@ -151,11 +151,20 @@ def update_upstream_versions( @main.command(name="auto-tick") +@click.argument( + "package", + required=False, +) @pass_context -def auto_tick(ctx: CliContext) -> None: +def auto_tick(ctx: CliContext, package: str | None) -> None: + """ + Run the main bot logic that runs all migrations, updates the graph accordingly, and opens the corresponding PRs. + + If PACKAGE is given, only run the bot for that package, otherwise run the bot for all packages. + """ from . import auto_tick - auto_tick.main(ctx) + auto_tick.main(ctx, package=package) @main.command(name="make-status-report") diff --git a/conda_forge_tick/contexts.py b/conda_forge_tick/contexts.py index 7933f3250..3d1057326 100644 --- a/conda_forge_tick/contexts.py +++ b/conda_forge_tick/contexts.py @@ -27,7 +27,6 @@ class MigratorSessionContext: graph: DiGraph = None smithy_version: str = "" pinning_version: str = "" - dry_run: bool = True @dataclass(frozen=True) diff --git a/conda_forge_tick/lazy_json_backends.py b/conda_forge_tick/lazy_json_backends.py index 6a56ca666..98b865574 100644 --- a/conda_forge_tick/lazy_json_backends.py +++ b/conda_forge_tick/lazy_json_backends.py @@ -630,6 +630,17 @@ def get_all_keys_for_hashmap(name): return backend.hkeys(name) +def does_key_exist_in_hashmap(name: str, key: str) -> bool: + """ + Check if a key exists in a hashmap, using the primary backend. + :param name: The hashmap name. + :param key: The key to check. + :return: True if the key exists, False otherwise. + """ + backend = LAZY_JSON_BACKENDS[CF_TICK_GRAPH_DATA_PRIMARY_BACKEND]() + return backend.hexists(name, name) + + @contextlib.contextmanager def lazy_json_transaction(): try: diff --git a/conda_forge_tick/make_migrators.py b/conda_forge_tick/make_migrators.py index fa5171bb4..099cd0a34 100644 --- a/conda_forge_tick/make_migrators.py +++ b/conda_forge_tick/make_migrators.py @@ -706,7 +706,6 @@ def create_migration_yaml_creator( def initialize_migrators( gx: nx.DiGraph, - dry_run: bool = False, ) -> MutableSequence[Migrator]: migrators: List[Migrator] = [] @@ -832,10 +831,7 @@ def load_migrators(skip_paused: bool = True) -> MutableSequence[Migrator]: def main(ctx: CliContext) -> None: gx = load_existing_graph() - migrators = initialize_migrators( - gx, - dry_run=ctx.dry_run, - ) + migrators = initialize_migrators(gx) with ( fold_log_lines("dumping migrators to JSON"), lazy_json_override_backends( diff --git a/conda_forge_tick/status_report.py b/conda_forge_tick/status_report.py index 46b51e6a3..f0d026f39 100644 --- a/conda_forge_tick/status_report.py +++ b/conda_forge_tick/status_report.py @@ -402,7 +402,6 @@ def main() -> None: graph=gx, smithy_version=smithy_version, pinning_version=pinning_version, - dry_run=False, ) migrators = load_migrators(skip_paused=False)