Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing error for projects using the new YAML format for snapshots #11362

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20250305-110257.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fixes parsing errors when using the new YAML format for snapshots
time: 2025-03-05T11:02:57.829685Z
custom:
Author: amardatar
Issue: "11164"
11 changes: 8 additions & 3 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
AnySourceFile,
ParseFileType,
SchemaSourceFile,
SourceFile,
parse_file_type_to_parser,
)
from dbt.contracts.graph.manifest import Manifest
Expand Down Expand Up @@ -344,7 +345,11 @@ def remove_node_in_saved(self, source_file, unique_id):
if node.patch_path:
file_id = node.patch_path
# it might be changed... then what?
if file_id not in self.file_diff["deleted"] and file_id in self.saved_files:
if (
file_id not in self.file_diff["deleted"]
and file_id in self.saved_files
and source_file.parse_file_type in parse_file_type_to_key
):
# Schema files should already be updated if this comes from a node,
# but this code is also called when updating groups and exposures.
# This might save the old schema file element, so when the schema file
Expand Down Expand Up @@ -391,10 +396,10 @@ def update_fixture_in_saved(self, new_source_file, old_source_file):
self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
self.add_to_pp_files(new_source_file)

def remove_mssat_file(self, source_file):
def remove_mssat_file(self, source_file: AnySourceFile):
# nodes [unique_ids] -- SQL files
# There should always be a node for a SQL file
if not source_file.nodes:
if not isinstance(source_file, SourceFile) or not source_file.nodes:
return
# There is generally only 1 node for SQL files, except for macros and snapshots
for unique_id in source_file.nodes:
Expand Down
30 changes: 30 additions & 0 deletions tests/functional/snapshots/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,16 @@
- not_null
"""

snapshots_pg__source_snapshot_yml = """
snapshots:
- name: snapshot_source
relation: "source('information_schema', 'tables')"
config:
unique_key: "table_schema || '-' || table_name"
strategy: check
check_cols: all
"""

snapshots_pg__snapshot_mod_yml = """
snapshots:
- name: snapshot_actual
Expand Down Expand Up @@ -417,3 +427,23 @@
select * from {{target.database}}.{{schema}}.seed
{% endsnapshot %}
"""


sources_pg__source_yml = """
sources:
- name: information_schema
database: dbt
schema: information_schema
tables:
- name: tables
"""

sources_pg__source_mod_yml = """
sources:
- name: information_schema
database: dbt
schema: information_schema
tables:
- name: tables
description: tables
"""
17 changes: 15 additions & 2 deletions tests/functional/snapshots/test_basic_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
snapshots_pg__snapshot_no_target_schema_sql,
snapshots_pg__snapshot_sql,
snapshots_pg__snapshot_yml,
snapshots_pg__source_snapshot_yml,
snapshots_pg_custom__snapshot_sql,
snapshots_pg_custom_namespaced__snapshot_sql,
sources_pg__source_mod_yml,
sources_pg__source_yml,
)

snapshots_check_col__snapshot_sql = """
Expand Down Expand Up @@ -382,20 +385,24 @@ class BasicYaml(Basic):
def snapshots(self):
"""Overrides the same function in Basic to use the YAML method of
defining a snapshot."""
return {"snapshot.yml": snapshots_pg__snapshot_yml}
return {
"snapshot.yml": snapshots_pg__snapshot_yml,
"source_snapshot.yml": snapshots_pg__source_snapshot_yml,
}

@pytest.fixture(scope="class")
def models(self):
"""Overrides the same function in Basic to use a modified version of
schema.yml without snapshot config."""
return {
"sources.yml": sources_pg__source_yml,
"ref_snapshot.sql": models__ref_snapshot_sql,
}


class TestBasicSnapshotYaml(BasicYaml):
def test_basic_snapshot_yaml(self, project):
snapshot_setup(project, num_snapshot_models=1)
snapshot_setup(project, num_snapshot_models=2)


class TestYamlSnapshotPartialParsing(BasicYaml):
Expand All @@ -406,6 +413,12 @@ def test_snapshot_partial_parsing(self, project):
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "a_owner"

# change an unrelated source file
write_file(sources_pg__source_mod_yml, "models", "sources.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "a_owner"

# change snapshot yml file and re-parse
write_file(snapshots_pg__snapshot_mod_yml, "snapshots", "snapshot.yml")
manifest = run_dbt(["parse"])
Expand Down
116 changes: 109 additions & 7 deletions tests/unit/parser/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,47 @@
SchemaSourceFile,
SourceFile,
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import SnapshotNode, SourceDefinition
from dbt.node_types import NodeType
from dbt.parser.partial import PartialParsing
from dbt.tests.util import safe_set_invocation_context
from tests.unit.utils import normalize
from tests.unit.utils.manifest import make_generic_test, make_model
from tests.unit.utils.manifest import (
make_generic_test,
make_manifest,
make_model,
make_singular_test,
make_source,
make_source_snapshot,
)

PROJECT_NAME = "my_test"


@pytest.fixture
def files() -> Dict[str, BaseSourceFile]:
def singular_test():
st = make_singular_test(
PROJECT_NAME, "my_singular_test", "select 1 where false", path="tests/my_singular_test.sql"
)
st.patch_path = "my_test://tests/tests.yml"
return st


@pytest.fixture
def source() -> SourceDefinition:
return make_source(PROJECT_NAME, "my_source", "my_source_table", path="models/schema.yml")


@pytest.fixture
def source_snapshot(source) -> SnapshotNode:
return make_source_snapshot(
PROJECT_NAME, "my_test_source_snapshot", source, path="models/schema.yml"
)


@pytest.fixture
def files(singular_test, source, source_snapshot) -> Dict[str, BaseSourceFile]:
project_root = "/users/root"
sql_model_file = SourceFile(
path=FilePath(
Expand Down Expand Up @@ -95,22 +125,60 @@ def files() -> Dict[str, BaseSourceFile]:
{"name": "python_model", "description": "python"},
{"name": "not_null", "model": "test.my_test.test_my_model"},
],
"sources": [
{"name": source.source_name, "tables": [{"name": source.name}]},
],
"snapshots": [
{
"name": source_snapshot.name,
"relation": f"source('{source.source_name}', '{source.name}')",
},
],
},
ndp=["model.my_test.my_model"],
env_vars={},
data_tests={"models": {"not_null": {"test.my_test.test_my_model": []}}},
snapshots=[f"snapshot.{PROJECT_NAME}.{source_snapshot.name}"],
sources=[f"source.{PROJECT_NAME}.{source.source_name}.{source.name}"],
)
singular_test_sql_file = SourceFile(
path=FilePath(
project_root=project_root,
searched_path="tests",
relative_path=f"{singular_test.name}.sql",
modification_time=time.time(),
),
checksum=FileHash.from_contents("a test"),
project_name=PROJECT_NAME,
parse_file_type=ParseFileType.SingularTest,
nodes=[f"test.{PROJECT_NAME}.{singular_test.name}"],
env_vars=[],
)
singular_test_yml_file = SourceFile(
path=FilePath(
project_root=project_root,
searched_path="tests",
relative_path="tests.yml",
modification_time=time.time(),
),
checksum=FileHash.from_contents("a test"),
project_name=PROJECT_NAME,
parse_file_type=ParseFileType.Schema,
env_vars=[],
)
return {
schema_file.file_id: schema_file,
sql_model_file.file_id: sql_model_file,
sql_model_file_untouched.file_id: sql_model_file_untouched,
python_model_file.file_id: python_model_file,
python_model_file_untouched.file_id: python_model_file_untouched,
singular_test_sql_file.file_id: singular_test_sql_file,
singular_test_yml_file.file_id: singular_test_yml_file,
}


@pytest.fixture
def nodes() -> List[NodeType]:
def nodes(singular_test, source, source_snapshot) -> List[NodeType]:
patch_path = "my_test://" + normalize("models/schema.yml")
my_model = make_model(PROJECT_NAME, "my_model", "", patch_path=patch_path)
return [
Expand All @@ -121,9 +189,17 @@ def nodes() -> List[NodeType]:
PROJECT_NAME, "python_model_untouched", "", language="python", patch_path=patch_path
),
make_generic_test(PROJECT_NAME, "test", my_model, {}),
singular_test,
source,
source_snapshot,
]


@pytest.fixture
def manifest(files, nodes, source) -> Manifest:
return make_manifest(files=files, nodes=nodes, sources=[source])


@pytest.fixture
def partial_parsing(manifest, files):
safe_set_invocation_context()
Expand Down Expand Up @@ -189,7 +265,7 @@ def test_schedule_macro_nodes_for_parsing_basic(partial_parsing):
}


def test_schedule_nodes_for_parsing_versioning(partial_parsing) -> None:
def test_schedule_nodes_for_parsing_versioning(partial_parsing, source) -> None:
# Modify schema file to add versioning
schema_file_id = "my_test://" + normalize("models/schema.yml")
partial_parsing.new_files[schema_file_id].checksum = FileHash.from_contents("changed")
Expand All @@ -205,6 +281,9 @@ def test_schedule_nodes_for_parsing_versioning(partial_parsing) -> None:
{"name": "python_model", "description": "python"},
{"name": "not_null", "model": "test.my_test.test_my_model"},
],
"sources": [
{"name": source.source_name, "tables": [{"name": source.name}]},
],
}
with mock.patch.object(
partial_parsing, "schedule_referencing_nodes_for_parsing"
Expand All @@ -225,16 +304,39 @@ def partial_parsing(self, manifest, files):
saved_files["my_test://models/python_model_untouched.py"].checksum = (
FileHash.from_contents("something new")
)
saved_files["my_test://models/schema.yml"].dfy["sources"][0]["tables"][0][
"description"
] = "test"
saved_files["my_test://models/schema.yml"].checksum = FileHash.from_contents(
"something new"
)
saved_files["my_test://tests/my_singular_test.sql"].checksum = FileHash.from_contents(
"something new"
)
return PartialParsing(manifest, saved_files)

def test_build_file_diff_basic(self, partial_parsing):
partial_parsing.build_file_diff()
assert set(partial_parsing.file_diff["unchanged"]) == {
"my_test://models/my_model_untouched.sql",
"my_test://models/my_model.sql",
"my_test://models/schema.yml",
"my_test://models/python_model.py",
"my_test://tests/tests.yml",
}
assert partial_parsing.file_diff["changed"] == [
"my_test://models/python_model_untouched.py"
assert set(partial_parsing.file_diff["changed"]) == {
"my_test://models/python_model_untouched.py",
"my_test://tests/my_singular_test.sql",
}
assert partial_parsing.file_diff["changed_schema_files"] == [
"my_test://models/schema.yml",
]

def test_get_parsing_files(self, partial_parsing):
project_parser_files = partial_parsing.get_parsing_files()
assert project_parser_files == {
PROJECT_NAME: {
"ModelParser": ["my_test://models/python_model_untouched.py"],
"SchemaParser": ["my_test://models/schema.yml"],
"SingularTestParser": ["my_test://tests/my_singular_test.sql"],
},
}
33 changes: 32 additions & 1 deletion tests/unit/utils/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Owner,
QueryParams,
RefArgs,
SnapshotConfig,
TestConfig,
TestMetadata,
WhereFilter,
Expand All @@ -37,6 +38,7 @@
SeedNode,
SemanticModel,
SingularTestNode,
SnapshotNode,
SourceDefinition,
UnitTestDefinition,
)
Expand Down Expand Up @@ -508,6 +510,35 @@ def make_saved_query(pkg: str, name: str, metric: str, path=None):
)


def make_source_snapshot(pkg: str, name: str, source: SourceDefinition, path=None) -> SnapshotNode:
if path is None:
path = "schema.yml"

return SnapshotNode(
database=source.database,
schema=source.schema,
name=name,
resource_type=NodeType.Snapshot,
package_name=source.package_name,
path=path,
original_file_path=path,
unique_id=f"snapshot.{pkg}.{name}",
fqn=[pkg, "snapshot", name],
alias=None,
checksum="",
sources=[[source.source_name, source.name]],
depends_on=DependsOn(
nodes=[source.unique_id],
macros=[],
),
config=SnapshotConfig(
strategy="check",
unique_key="'dummy'",
check_cols="all",
),
)


@pytest.fixture
def macro_test_unique() -> Macro:
return make_macro(
Expand Down Expand Up @@ -1016,7 +1047,7 @@ def make_manifest(
groups: List[Group] = [],
macros: List[Macro] = [],
metrics: List[Metric] = [],
nodes: List[ModelNode] = [],
nodes: List[ManifestNode] = [],
saved_queries: List[SavedQuery] = [],
selectors: Dict[str, Any] = {},
semantic_models: List[SemanticModel] = [],
Expand Down
Loading