diff --git a/.changes/unreleased/Fixes-20250305-110257.yaml b/.changes/unreleased/Fixes-20250305-110257.yaml new file mode 100644 index 00000000000..2d119600066 --- /dev/null +++ b/.changes/unreleased/Fixes-20250305-110257.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fixes parsing errors when using the new YAML format for snapshots +time: 2025-03-05T11:02:57.829685Z +custom: + Author: amardatar + Issue: "11164" diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 179daf642c7..37e8e9a3141 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -7,6 +7,7 @@ AnySourceFile, ParseFileType, SchemaSourceFile, + SourceFile, parse_file_type_to_parser, ) from dbt.contracts.graph.manifest import Manifest @@ -344,7 +345,11 @@ def remove_node_in_saved(self, source_file, unique_id): if node.patch_path: file_id = node.patch_path # it might be changed... then what? - if file_id not in self.file_diff["deleted"] and file_id in self.saved_files: + if ( + file_id not in self.file_diff["deleted"] + and file_id in self.saved_files + and source_file.parse_file_type in parse_file_type_to_key + ): # Schema files should already be updated if this comes from a node, # but this code is also called when updating groups and exposures. # This might save the old schema file element, so when the schema file @@ -391,10 +396,10 @@ def update_fixture_in_saved(self, new_source_file, old_source_file): self.saved_files[new_source_file.file_id] = deepcopy(new_source_file) self.add_to_pp_files(new_source_file) - def remove_mssat_file(self, source_file): + def remove_mssat_file(self, source_file: AnySourceFile): # nodes [unique_ids] -- SQL files # There should always be a node for a SQL file - if not source_file.nodes: + if not isinstance(source_file, SourceFile) or not source_file.nodes: return # There is generally only 1 node for SQL files, except for macros and snapshots for unique_id in source_file.nodes: diff --git a/tests/functional/snapshots/fixtures.py b/tests/functional/snapshots/fixtures.py index df0c29b8fa5..0b591ae1a80 100644 --- a/tests/functional/snapshots/fixtures.py +++ b/tests/functional/snapshots/fixtures.py @@ -307,6 +307,16 @@ - not_null """ +snapshots_pg__source_snapshot_yml = """ +snapshots: + - name: snapshot_source + relation: "source('information_schema', 'tables')" + config: + unique_key: "table_schema || '-' || table_name" + strategy: check + check_cols: all +""" + snapshots_pg__snapshot_mod_yml = """ snapshots: - name: snapshot_actual @@ -417,3 +427,23 @@ select * from {{target.database}}.{{schema}}.seed {% endsnapshot %} """ + + +sources_pg__source_yml = """ +sources: + - name: information_schema + database: dbt + schema: information_schema + tables: + - name: tables +""" + +sources_pg__source_mod_yml = """ +sources: + - name: information_schema + database: dbt + schema: information_schema + tables: + - name: tables + description: tables +""" diff --git a/tests/functional/snapshots/test_basic_snapshot.py b/tests/functional/snapshots/test_basic_snapshot.py index 5300f921971..df01bd2f5dd 100644 --- a/tests/functional/snapshots/test_basic_snapshot.py +++ b/tests/functional/snapshots/test_basic_snapshot.py @@ -23,8 +23,11 @@ snapshots_pg__snapshot_no_target_schema_sql, snapshots_pg__snapshot_sql, snapshots_pg__snapshot_yml, + snapshots_pg__source_snapshot_yml, snapshots_pg_custom__snapshot_sql, snapshots_pg_custom_namespaced__snapshot_sql, + sources_pg__source_mod_yml, + sources_pg__source_yml, ) snapshots_check_col__snapshot_sql = """ @@ -382,20 +385,24 @@ class BasicYaml(Basic): def snapshots(self): """Overrides the same function in Basic to use the YAML method of defining a snapshot.""" - return {"snapshot.yml": snapshots_pg__snapshot_yml} + return { + "snapshot.yml": snapshots_pg__snapshot_yml, + "source_snapshot.yml": snapshots_pg__source_snapshot_yml, + } @pytest.fixture(scope="class") def models(self): """Overrides the same function in Basic to use a modified version of schema.yml without snapshot config.""" return { + "sources.yml": sources_pg__source_yml, "ref_snapshot.sql": models__ref_snapshot_sql, } class TestBasicSnapshotYaml(BasicYaml): def test_basic_snapshot_yaml(self, project): - snapshot_setup(project, num_snapshot_models=1) + snapshot_setup(project, num_snapshot_models=2) class TestYamlSnapshotPartialParsing(BasicYaml): @@ -406,6 +413,12 @@ def test_snapshot_partial_parsing(self, project): snapshot = manifest.nodes[snapshot_id] assert snapshot.meta["owner"] == "a_owner" + # change an unrelated source file + write_file(sources_pg__source_mod_yml, "models", "sources.yml") + manifest = run_dbt(["parse"]) + snapshot = manifest.nodes[snapshot_id] + assert snapshot.meta["owner"] == "a_owner" + # change snapshot yml file and re-parse write_file(snapshots_pg__snapshot_mod_yml, "snapshots", "snapshot.yml") manifest = run_dbt(["parse"]) diff --git a/tests/unit/parser/test_partial.py b/tests/unit/parser/test_partial.py index 969da7ef567..7b19a5c0f38 100644 --- a/tests/unit/parser/test_partial.py +++ b/tests/unit/parser/test_partial.py @@ -13,17 +13,47 @@ SchemaSourceFile, SourceFile, ) +from dbt.contracts.graph.manifest import Manifest +from dbt.contracts.graph.nodes import SnapshotNode, SourceDefinition from dbt.node_types import NodeType from dbt.parser.partial import PartialParsing from dbt.tests.util import safe_set_invocation_context from tests.unit.utils import normalize -from tests.unit.utils.manifest import make_generic_test, make_model +from tests.unit.utils.manifest import ( + make_generic_test, + make_manifest, + make_model, + make_singular_test, + make_source, + make_source_snapshot, +) PROJECT_NAME = "my_test" @pytest.fixture -def files() -> Dict[str, BaseSourceFile]: +def singular_test(): + st = make_singular_test( + PROJECT_NAME, "my_singular_test", "select 1 where false", path="tests/my_singular_test.sql" + ) + st.patch_path = "my_test://tests/tests.yml" + return st + + +@pytest.fixture +def source() -> SourceDefinition: + return make_source(PROJECT_NAME, "my_source", "my_source_table", path="models/schema.yml") + + +@pytest.fixture +def source_snapshot(source) -> SnapshotNode: + return make_source_snapshot( + PROJECT_NAME, "my_test_source_snapshot", source, path="models/schema.yml" + ) + + +@pytest.fixture +def files(singular_test, source, source_snapshot) -> Dict[str, BaseSourceFile]: project_root = "/users/root" sql_model_file = SourceFile( path=FilePath( @@ -95,10 +125,46 @@ def files() -> Dict[str, BaseSourceFile]: {"name": "python_model", "description": "python"}, {"name": "not_null", "model": "test.my_test.test_my_model"}, ], + "sources": [ + {"name": source.source_name, "tables": [{"name": source.name}]}, + ], + "snapshots": [ + { + "name": source_snapshot.name, + "relation": f"source('{source.source_name}', '{source.name}')", + }, + ], }, ndp=["model.my_test.my_model"], env_vars={}, data_tests={"models": {"not_null": {"test.my_test.test_my_model": []}}}, + snapshots=[f"snapshot.{PROJECT_NAME}.{source_snapshot.name}"], + sources=[f"source.{PROJECT_NAME}.{source.source_name}.{source.name}"], + ) + singular_test_sql_file = SourceFile( + path=FilePath( + project_root=project_root, + searched_path="tests", + relative_path=f"{singular_test.name}.sql", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("a test"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.SingularTest, + nodes=[f"test.{PROJECT_NAME}.{singular_test.name}"], + env_vars=[], + ) + singular_test_yml_file = SourceFile( + path=FilePath( + project_root=project_root, + searched_path="tests", + relative_path="tests.yml", + modification_time=time.time(), + ), + checksum=FileHash.from_contents("a test"), + project_name=PROJECT_NAME, + parse_file_type=ParseFileType.Schema, + env_vars=[], ) return { schema_file.file_id: schema_file, @@ -106,11 +172,13 @@ def files() -> Dict[str, BaseSourceFile]: sql_model_file_untouched.file_id: sql_model_file_untouched, python_model_file.file_id: python_model_file, python_model_file_untouched.file_id: python_model_file_untouched, + singular_test_sql_file.file_id: singular_test_sql_file, + singular_test_yml_file.file_id: singular_test_yml_file, } @pytest.fixture -def nodes() -> List[NodeType]: +def nodes(singular_test, source, source_snapshot) -> List[NodeType]: patch_path = "my_test://" + normalize("models/schema.yml") my_model = make_model(PROJECT_NAME, "my_model", "", patch_path=patch_path) return [ @@ -121,9 +189,17 @@ def nodes() -> List[NodeType]: PROJECT_NAME, "python_model_untouched", "", language="python", patch_path=patch_path ), make_generic_test(PROJECT_NAME, "test", my_model, {}), + singular_test, + source, + source_snapshot, ] +@pytest.fixture +def manifest(files, nodes, source) -> Manifest: + return make_manifest(files=files, nodes=nodes, sources=[source]) + + @pytest.fixture def partial_parsing(manifest, files): safe_set_invocation_context() @@ -189,7 +265,7 @@ def test_schedule_macro_nodes_for_parsing_basic(partial_parsing): } -def test_schedule_nodes_for_parsing_versioning(partial_parsing) -> None: +def test_schedule_nodes_for_parsing_versioning(partial_parsing, source) -> None: # Modify schema file to add versioning schema_file_id = "my_test://" + normalize("models/schema.yml") partial_parsing.new_files[schema_file_id].checksum = FileHash.from_contents("changed") @@ -205,6 +281,9 @@ def test_schedule_nodes_for_parsing_versioning(partial_parsing) -> None: {"name": "python_model", "description": "python"}, {"name": "not_null", "model": "test.my_test.test_my_model"}, ], + "sources": [ + {"name": source.source_name, "tables": [{"name": source.name}]}, + ], } with mock.patch.object( partial_parsing, "schedule_referencing_nodes_for_parsing" @@ -225,6 +304,15 @@ def partial_parsing(self, manifest, files): saved_files["my_test://models/python_model_untouched.py"].checksum = ( FileHash.from_contents("something new") ) + saved_files["my_test://models/schema.yml"].dfy["sources"][0]["tables"][0][ + "description" + ] = "test" + saved_files["my_test://models/schema.yml"].checksum = FileHash.from_contents( + "something new" + ) + saved_files["my_test://tests/my_singular_test.sql"].checksum = FileHash.from_contents( + "something new" + ) return PartialParsing(manifest, saved_files) def test_build_file_diff_basic(self, partial_parsing): @@ -232,9 +320,23 @@ def test_build_file_diff_basic(self, partial_parsing): assert set(partial_parsing.file_diff["unchanged"]) == { "my_test://models/my_model_untouched.sql", "my_test://models/my_model.sql", - "my_test://models/schema.yml", "my_test://models/python_model.py", + "my_test://tests/tests.yml", } - assert partial_parsing.file_diff["changed"] == [ - "my_test://models/python_model_untouched.py" + assert set(partial_parsing.file_diff["changed"]) == { + "my_test://models/python_model_untouched.py", + "my_test://tests/my_singular_test.sql", + } + assert partial_parsing.file_diff["changed_schema_files"] == [ + "my_test://models/schema.yml", ] + + def test_get_parsing_files(self, partial_parsing): + project_parser_files = partial_parsing.get_parsing_files() + assert project_parser_files == { + PROJECT_NAME: { + "ModelParser": ["my_test://models/python_model_untouched.py"], + "SchemaParser": ["my_test://models/schema.yml"], + "SingularTestParser": ["my_test://tests/my_singular_test.sql"], + }, + } diff --git a/tests/unit/utils/manifest.py b/tests/unit/utils/manifest.py index 0950f68ebb5..24eb2c2159d 100644 --- a/tests/unit/utils/manifest.py +++ b/tests/unit/utils/manifest.py @@ -11,6 +11,7 @@ Owner, QueryParams, RefArgs, + SnapshotConfig, TestConfig, TestMetadata, WhereFilter, @@ -37,6 +38,7 @@ SeedNode, SemanticModel, SingularTestNode, + SnapshotNode, SourceDefinition, UnitTestDefinition, ) @@ -508,6 +510,35 @@ def make_saved_query(pkg: str, name: str, metric: str, path=None): ) +def make_source_snapshot(pkg: str, name: str, source: SourceDefinition, path=None) -> SnapshotNode: + if path is None: + path = "schema.yml" + + return SnapshotNode( + database=source.database, + schema=source.schema, + name=name, + resource_type=NodeType.Snapshot, + package_name=source.package_name, + path=path, + original_file_path=path, + unique_id=f"snapshot.{pkg}.{name}", + fqn=[pkg, "snapshot", name], + alias=None, + checksum="", + sources=[[source.source_name, source.name]], + depends_on=DependsOn( + nodes=[source.unique_id], + macros=[], + ), + config=SnapshotConfig( + strategy="check", + unique_key="'dummy'", + check_cols="all", + ), + ) + + @pytest.fixture def macro_test_unique() -> Macro: return make_macro( @@ -1016,7 +1047,7 @@ def make_manifest( groups: List[Group] = [], macros: List[Macro] = [], metrics: List[Metric] = [], - nodes: List[ModelNode] = [], + nodes: List[ManifestNode] = [], saved_queries: List[SavedQuery] = [], selectors: Dict[str, Any] = {}, semantic_models: List[SemanticModel] = [],