-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: merge deps.dev events into the
event
table (#2285)
* add: incremental `staging` model for deps.dev * add: dependency parsing `name`/`namespace` macro * add: deps.dev dependencies `intermediate` model * feat: merge deps.dev events into the `event` table * fix: remove unused `comments` * fix: use sample data for non `production` environments
- Loading branch information
Showing
4 changed files
with
200 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{# | ||
Macro to parse the namespace from the artifact name based on the event source. | ||
Arguments: | ||
- event_source: The event source of the artifact. | ||
- artifact_name: The name of the artifact. | ||
Returns the namespace based on event source rules. | ||
#} | ||
{% macro parse_namespace(event_source, artifact_name) %} | ||
case | ||
when {{ event_source }} = 'NPM' and STRPOS({{ artifact_name }}, '/') > 0 then | ||
SPLIT(SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(0)], '@')[SAFE_OFFSET(1)] | ||
when {{ event_source }} = 'GO' and STRPOS({{ artifact_name }}, '/') > 0 then | ||
SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(1)] | ||
when {{ event_source }} = 'MAVEN' then | ||
SPLIT({{ artifact_name }}, ':')[SAFE_OFFSET(0)] | ||
when {{ event_source }} = 'NUGET' and STRPOS({{ artifact_name }}, '.') > 0 then | ||
SPLIT({{ artifact_name }}, '.')[SAFE_OFFSET(0)] | ||
else {{ artifact_name }} | ||
end | ||
{% endmacro %} | ||
|
||
{# | ||
Macro to parse the name from the artifact name based on the event source. | ||
Arguments: | ||
- event_source: The event source of the artifact. | ||
- artifact_name: The name of the artifact. | ||
Returns the name based on event source rules. | ||
#} | ||
{% macro parse_name(event_source, artifact_name) %} | ||
case | ||
when {{ event_source }} = 'NPM' and STRPOS({{ artifact_name }}, '/') > 0 then | ||
SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(1)] | ||
when {{ event_source }} = 'GO' and STRPOS({{ artifact_name }}, '/') > 0 then | ||
SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(2)] | ||
when {{ event_source }} = 'MAVEN' then | ||
SPLIT({{ artifact_name }}, ':')[SAFE_OFFSET(1)] | ||
when {{ event_source }} = 'NUGET' and STRPOS({{ artifact_name }}, '.') > 0 then | ||
REGEXP_REPLACE({{ artifact_name }}, r'^[^.]+\.', '') | ||
else {{ artifact_name }} | ||
end | ||
{% endmacro %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
warehouse/dbt/models/intermediate/events/int_events_dependencies.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
{% set event_source_name = '"DEPS_DEV"' %} | ||
|
||
with snapshots as ( | ||
select | ||
`SnapshotAt` as `time`, | ||
`System` as from_artifact_type, | ||
`Name` as from_artifact_name, | ||
`Version` as from_artifact_version, | ||
`Dependency`.`Name` as to_artifact_name, | ||
`Dependency`.`System` as to_artifact_type, | ||
`Dependency`.`Version` as to_artifact_version, | ||
LAG(`Dependency`.`Name`) over ( | ||
partition by `System`, `Name`, `Dependency`.`Name`, `Version`, `Dependency`.`Version` | ||
order by `SnapshotAt` | ||
) as previous_to_artifact_name | ||
from {{ ref('stg_deps_dev__dependencies') }} | ||
where `MinimumDepth` = 1 | ||
), | ||
|
||
intermediate as ( | ||
select | ||
`time`, | ||
case | ||
when previous_to_artifact_name is null then 'ADD_DEPENDENCY' | ||
when | ||
to_artifact_name is not null and to_artifact_name <> previous_to_artifact_name | ||
then 'REMOVE_DEPENDENCY' | ||
else 'NO_CHANGE' | ||
end as event_type, | ||
{{ event_source_name }} as event_source, | ||
{{ parse_name( | ||
'to_artifact_type', | ||
'to_artifact_name') | ||
}} as to_artifact_name, | ||
{{ parse_namespace( | ||
'to_artifact_type', | ||
'to_artifact_name') | ||
}} as to_artifact_namespace, | ||
to_artifact_type, | ||
{{ parse_name( | ||
'from_artifact_type', | ||
'from_artifact_name') | ||
}} as from_artifact_name, | ||
{{ parse_namespace( | ||
'from_artifact_type', | ||
'from_artifact_name') | ||
}} as from_artifact_namespace, | ||
from_artifact_type, | ||
1.0 as amount | ||
from snapshots | ||
), | ||
|
||
artifact_ids as ( | ||
select | ||
`time`, | ||
event_type, | ||
event_source, | ||
{{ oso_id( | ||
'event_source', | ||
'to_artifact_namespace', | ||
'to_artifact_name') | ||
}} as to_artifact_id, | ||
to_artifact_name, | ||
to_artifact_namespace, | ||
to_artifact_type, | ||
{{ oso_id( | ||
'event_source', | ||
'to_artifact_type') | ||
}} as to_artifact_source_id, | ||
{{ oso_id( | ||
'event_source', | ||
'from_artifact_namespace', | ||
'from_artifact_name') | ||
}} as from_artifact_id, | ||
from_artifact_name, | ||
from_artifact_namespace, | ||
from_artifact_type, | ||
{{ oso_id( | ||
'event_source', | ||
'from_artifact_type') | ||
}} as from_artifact_source_id, | ||
amount | ||
from intermediate | ||
where event_type <> 'NO_CHANGE' | ||
), | ||
|
||
changes as ( | ||
select | ||
`time`, | ||
event_type, | ||
event_source, | ||
to_artifact_id, | ||
to_artifact_name, | ||
to_artifact_namespace, | ||
to_artifact_type, | ||
to_artifact_source_id, | ||
from_artifact_id, | ||
from_artifact_name, | ||
from_artifact_namespace, | ||
from_artifact_type, | ||
from_artifact_source_id, | ||
amount, | ||
{{ oso_id( | ||
'event_source', | ||
'time', | ||
'to_artifact_id', | ||
'to_artifact_type', | ||
'from_artifact_id', | ||
'from_artifact_type', | ||
'event_type') | ||
}} as event_source_id | ||
from artifact_ids | ||
) | ||
|
||
select * from changes |
42 changes: 42 additions & 0 deletions
42
warehouse/dbt/models/staging/deps-dev/stg_deps_dev__dependencies.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
{{ config( | ||
materialized='incremental', | ||
partition_by={ | ||
'field': 'SnapshotAt', | ||
'data_type': 'timestamp', | ||
'granularity': 'day' | ||
}, | ||
) }} | ||
|
||
{% set is_production = target.name == 'production' %} | ||
|
||
{% if is_production %} | ||
with base as ( | ||
select | ||
`SnapshotAt`, | ||
`System`, | ||
`Name`, | ||
`Version`, | ||
`Dependency`, | ||
`MinimumDepth` | ||
from `bigquery-public-data.deps_dev_v1.Dependencies` | ||
) | ||
{% if is_incremental() %} | ||
select * from base | ||
where `SnapshotAt` > (select max(`SnapshotAt`) from {{ this }}) | ||
{% else %} | ||
select * from base | ||
{% endif %} | ||
{% else %} | ||
select | ||
'NPM' as `System`, | ||
'@example/oso' as `Name`, | ||
'0.0.0' as `Version`, | ||
1 as `MinimumDepth`, | ||
current_timestamp() as `SnapshotAt`, | ||
struct( | ||
'NPM' as `System`, | ||
'@example/oso-dep' as `Name`, | ||
'0.0.0' as `Version` | ||
) as `Dependency` | ||
limit 1 | ||
{% endif %} |