Pass a MicrobatchBuilder to MicrobatchBatchRunner

This is necessary because the materialization executor needs the `MicrobatchBuilder` in order to build the jinja context.
dbt-labs · Feb 24, 2025 · 3e857dd · 3e857dd
1 parent 1a8fbf1
commit 3e857dd
Showing 1 changed file with 112 additions and 120 deletions.
diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py
@@ -336,119 +336,6 @@ def execute(self, model, manifest):
         return self._execute_model(hook_ctx, context_config, model, context, materialization_macro)
 
 
-class MicrobatchModelRunnerOLD(ModelRunner):
-    def __init__(self, config, adapter, node, node_index: int, num_nodes: int):
-        super().__init__(config, adapter, node, node_index, num_nodes)
-
-        # Make non-optional
-        self.batch_idx: Optional[int] = None
-        self.batches: Dict[int, BatchType] = {}
-        self.relation_exists: bool = False
-
-    def _build_succesful_run_batch_result(
-        self,
-        model: ModelNode,
-        context: Dict[str, Any],
-        batch: BatchType,
-        elapsed_time: float = 0.0,
-    ) -> RunResult:
-        # TODO: move to batch runner
-        run_result = self._build_run_model_result(model, context, elapsed_time)
-        run_result.batch_results = BatchResults(successful=[batch])
-        return run_result
-
-    def _build_failed_run_batch_result(
-        self,
-        model: ModelNode,
-        batch: BatchType,
-        elapsed_time: float = 0.0,
-    ) -> RunResult:
-        # TODO: move to batch runner
-        return RunResult(
-            node=model,
-            status=RunStatus.Error,
-            timing=[],
-            thread_id=threading.current_thread().name,
-            execution_time=elapsed_time,
-            message="ERROR",
-            adapter_response={},
-            failures=1,
-            batch_results=BatchResults(failed=[batch]),
-        )
-
-    def _execute_microbatch_materialization(
-        self,
-        model: ModelNode,
-        context: Dict[str, Any],
-        materialization_macro: MacroProtocol,
-    ) -> RunResult:
-        # TODO: This method should be moved to the batch runner
-
-        batch = self.batches[self.batch_idx]
-        # call materialization_macro to get a batch-level run result
-        start_time = time.perf_counter()
-        try:
-            # Update jinja context with batch context members
-            jinja_context = microbatch_builder.build_jinja_context_for_batch(
-                incremental_batch=self.relation_exists
-            )
-            context.update(jinja_context)
-
-            # Materialize batch and cache any materialized relations
-            result = MacroGenerator(
-                materialization_macro, context, stack=context["context_macro_stack"]
-            )()
-            for relation in self._materialization_relations(result, model):
-                self.adapter.cache_added(relation.incorporate(dbt_created=True))
-
-            # Build result of executed batch
-            batch_run_result = self._build_succesful_run_batch_result(
-                model, context, batch, time.perf_counter() - start_time
-            )
-            batch_result = batch_run_result
-
-            # At least one batch has been inserted successfully!
-            # Can proceed incrementally + in parallel
-            self.relation_exists = True
-
-        except (KeyboardInterrupt, SystemExit):
-            # reraise it for GraphRunnableTask.execute_nodes to handle
-            raise
-        except Exception as e:
-            fire_event(
-                GenericExceptionOnRun(
-                    unique_id=self.node.unique_id,
-                    exc=f"Exception on worker thread. {str(e)}",
-                    node_info=self.node.node_info,
-                )
-            )
-            batch_run_result = self._build_failed_run_batch_result(
-                model, batch, time.perf_counter() - start_time
-            )
-
-        batch_result = batch_run_result
-
-        return batch_result
-
-    def _execute_model(
-        self,
-        hook_ctx: Any,
-        context_config: Any,
-        model: ModelNode,
-        context: Dict[str, Any],
-        materialization_macro: MacroProtocol,
-    ) -> RunResult:
-        # TODO: Move to batch runner
-        try:
-            batch_result = self._execute_microbatch_materialization(
-                model, context, materialization_macro
-            )
-        finally:
-            self.adapter.post_model_hook(context_config, hook_ctx)
-
-        return batch_result
-
-
 class MicrobatchBatchRunner(ModelRunner):
     """Handles the running of individual batches"""
 
@@ -462,12 +349,14 @@ def __init__(
         batch_idx: int,
         batches: Dict[int, BatchType],
         relation_exists: bool,
+        microbatch_builder: MicrobatchBuilder,
     ):
         super().__init__(config, adapter, node, node_index, num_nodes)
 
         self.batch_idx = batch_idx
         self.batches = batches
         self.relation_exists = relation_exists
+        self.microbatch_builder = microbatch_builder
 
     def describe_node(self) -> str:
         # TODO: I'm not sure if we actually need this. We should try removing it once everything
@@ -583,6 +472,104 @@ def compile(self, manifest: Manifest):
 
         return self.node
 
+    def _build_succesful_run_batch_result(
+        self,
+        model: ModelNode,
+        context: Dict[str, Any],
+        batch: BatchType,
+        elapsed_time: float = 0.0,
+    ) -> RunResult:
+        run_result = self._build_run_model_result(model, context, elapsed_time)
+        run_result.batch_results = BatchResults(successful=[batch])
+        return run_result
+
+    def _build_failed_run_batch_result(
+        self,
+        model: ModelNode,
+        batch: BatchType,
+        elapsed_time: float = 0.0,
+    ) -> RunResult:
+        return RunResult(
+            node=model,
+            status=RunStatus.Error,
+            timing=[],
+            thread_id=threading.current_thread().name,
+            execution_time=elapsed_time,
+            message="ERROR",
+            adapter_response={},
+            failures=1,
+            batch_results=BatchResults(failed=[batch]),
+        )
+
+    def _execute_microbatch_materialization(
+        self,
+        model: ModelNode,
+        context: Dict[str, Any],
+        materialization_macro: MacroProtocol,
+    ) -> RunResult:
+        batch = self.batches[self.batch_idx]
+        # call materialization_macro to get a batch-level run result
+        start_time = time.perf_counter()
+        try:
+            # Update jinja context with batch context members
+            jinja_context = self.microbatch_builder.build_jinja_context_for_batch(
+                incremental_batch=self.relation_exists
+            )
+            context.update(jinja_context)
+
+            # Materialize batch and cache any materialized relations
+            result = MacroGenerator(
+                materialization_macro, context, stack=context["context_macro_stack"]
+            )()
+            for relation in self._materialization_relations(result, model):
+                self.adapter.cache_added(relation.incorporate(dbt_created=True))
+
+            # Build result of executed batch
+            batch_run_result = self._build_succesful_run_batch_result(
+                model, context, batch, time.perf_counter() - start_time
+            )
+            batch_result = batch_run_result
+
+            # At least one batch has been inserted successfully!
+            # Can proceed incrementally + in parallel
+            self.relation_exists = True
+
+        except (KeyboardInterrupt, SystemExit):
+            # reraise it for GraphRunnableTask.execute_nodes to handle
+            raise
+        except Exception as e:
+            fire_event(
+                GenericExceptionOnRun(
+                    unique_id=self.node.unique_id,
+                    exc=f"Exception on worker thread. {str(e)}",
+                    node_info=self.node.node_info,
+                )
+            )
+            batch_run_result = self._build_failed_run_batch_result(
+                model, batch, time.perf_counter() - start_time
+            )
+
+        batch_result = batch_run_result
+
+        return batch_result
+
+    def _execute_model(
+        self,
+        hook_ctx: Any,
+        context_config: Any,
+        model: ModelNode,
+        context: Dict[str, Any],
+        materialization_macro: MacroProtocol,
+    ) -> RunResult:
+        try:
+            batch_result = self._execute_microbatch_materialization(
+                model, context, materialization_macro
+            )
+        finally:
+            self.adapter.post_model_hook(context_config, hook_ctx)
+
+        return batch_result
+
 
 class MicrobatchModelRunner(ModelRunner):
     """Handles the orchestration of batches to run for a given microbatch model"""
@@ -688,12 +675,7 @@ def merge_batch_results(self, result: RunResult, batch_results: List[RunResult])
         if self.node.previous_batch_results is not None:
             result.batch_results.successful += self.node.previous_batch_results.successful
 
-    def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
-        """Get the batches that should be run for the model"""
-
-        # TODO: All of this setup for creating MicrobatchBuilder should _only_
-        # hapen if their are failed batches for us to retry
-
+    def get_microbatch_builder(self, model: ModelNode) -> MicrobatchBuilder:
         # Intially set the start/end to values from args
         event_time_start = getattr(self.config.args, "EVENT_TIME_START", None)
         event_time_end = getattr(self.config.args, "EVENT_TIME_END", None)
@@ -705,18 +687,22 @@ def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
             event_time_start = self.config.args.sample.start
             event_time_end = self.config.args.sample.end
 
-        microbatch_builder = MicrobatchBuilder(
+        return MicrobatchBuilder(
             model=model,
             is_incremental=self._is_incremental(model),
             event_time_start=event_time_start,
             event_time_end=event_time_end,
             default_end_time=get_invocation_started_at(),
         )
 
+    def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
+        """Get the batches that should be run for the model"""
+
         # Note currently (02/23/2025) model.previous_batch_results is only ever _not_ `None`
         # IFF `dbt retry` is being run and the microbatch model had batches which
         # failed on the run of the model (which is being retried)
         if model.previous_batch_results is None:
+            microbatch_builder = self.get_microbatch_builder(model)
             end = microbatch_builder.build_end_time()
             start = microbatch_builder.build_start_time(end)
             batches = microbatch_builder.build_batches(start, end)
@@ -746,6 +732,7 @@ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
 
         batch_results: List[RunResult] = []
         batch_idx = 0
+        microbatch_builder = self.get_microbatch_builder(model)
 
         # Run first batch not in parallel
         relation_exists = self.parent_task._submit_batch(
@@ -757,6 +744,7 @@ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
             batch_results=batch_results,
             pool=self.pool,
             force_sequential_run=True,
+            microbatch_builder=microbatch_builder,
         )
         batch_idx += 1
         skip_batches = batch_results[0].status != RunStatus.Success
@@ -772,6 +760,7 @@ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
                 batch_results=batch_results,
                 pool=self.pool,
                 skip=skip_batches,
+                microbatch_builder=microbatch_builder,
             )
             batch_idx += 1
 
@@ -793,6 +782,7 @@ def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
                 pool=self.pool,
                 force_sequential_run=True,
                 skip=skip_batches,
+                microbatch_builder=microbatch_builder,
             )
 
         # Finalize run: merge results, track model run, and print final result line
@@ -855,6 +845,7 @@ def _submit_batch(
         batch_idx: int,
         batch_results: List[RunResult],
         pool: ThreadPool,
+        microbatch_builder: MicrobatchBuilder,
         force_sequential_run: bool = False,
         skip: bool = False,
     ):
@@ -879,6 +870,7 @@ def _submit_batch(
             batch_idx,
             batches,
             relation_exists,
+            microbatch_builder,
         )
 
         if skip: