Skip to content

Commit

Permalink
feat: merge deps.dev events into the event table (#2285)
Browse files Browse the repository at this point in the history
* add: incremental `staging` model for deps.dev

* add: dependency parsing `name`/`namespace` macro

* add: deps.dev dependencies `intermediate` model

* feat: merge deps.dev events into the `event` table

* fix: remove unused `comments`

* fix: use sample data for non `production` environments
  • Loading branch information
Jabolol authored Oct 1, 2024
1 parent a0becfe commit cb91442
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 0 deletions.
41 changes: 41 additions & 0 deletions warehouse/dbt/macros/models/deps_dev_artifact_details.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{#
Macro to parse the namespace from the artifact name based on the event source.
Arguments:
- event_source: The event source of the artifact.
- artifact_name: The name of the artifact.
Returns the namespace based on event source rules.
#}
{% macro parse_namespace(event_source, artifact_name) %}
case
when {{ event_source }} = 'NPM' and STRPOS({{ artifact_name }}, '/') > 0 then
SPLIT(SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(0)], '@')[SAFE_OFFSET(1)]
when {{ event_source }} = 'GO' and STRPOS({{ artifact_name }}, '/') > 0 then
SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(1)]
when {{ event_source }} = 'MAVEN' then
SPLIT({{ artifact_name }}, ':')[SAFE_OFFSET(0)]
when {{ event_source }} = 'NUGET' and STRPOS({{ artifact_name }}, '.') > 0 then
SPLIT({{ artifact_name }}, '.')[SAFE_OFFSET(0)]
else {{ artifact_name }}
end
{% endmacro %}

{#
Macro to parse the name from the artifact name based on the event source.
Arguments:
- event_source: The event source of the artifact.
- artifact_name: The name of the artifact.
Returns the name based on event source rules.
#}
{% macro parse_name(event_source, artifact_name) %}
case
when {{ event_source }} = 'NPM' and STRPOS({{ artifact_name }}, '/') > 0 then
SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(1)]
when {{ event_source }} = 'GO' and STRPOS({{ artifact_name }}, '/') > 0 then
SPLIT({{ artifact_name }}, '/')[SAFE_OFFSET(2)]
when {{ event_source }} = 'MAVEN' then
SPLIT({{ artifact_name }}, ':')[SAFE_OFFSET(1)]
when {{ event_source }} = 'NUGET' and STRPOS({{ artifact_name }}, '.') > 0 then
REGEXP_REPLACE({{ artifact_name }}, r'^[^.]+\.', '')
else {{ artifact_name }}
end
{% endmacro %}
2 changes: 2 additions & 0 deletions warehouse/dbt/models/intermediate/events/int_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ all_events as (
select * from {{ ref('int_zora_contract_invocation_events') }}
union all
select * from {{ ref('int_arbitrum_one_contract_invocation_events') }}
union all
select * from {{ ref('int_events_dependencies') }}
)
union all
select
Expand Down
115 changes: 115 additions & 0 deletions warehouse/dbt/models/intermediate/events/int_events_dependencies.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{% set event_source_name = '"DEPS_DEV"' %}

with snapshots as (
select
`SnapshotAt` as `time`,
`System` as from_artifact_type,
`Name` as from_artifact_name,
`Version` as from_artifact_version,
`Dependency`.`Name` as to_artifact_name,
`Dependency`.`System` as to_artifact_type,
`Dependency`.`Version` as to_artifact_version,
LAG(`Dependency`.`Name`) over (
partition by `System`, `Name`, `Dependency`.`Name`, `Version`, `Dependency`.`Version`
order by `SnapshotAt`
) as previous_to_artifact_name
from {{ ref('stg_deps_dev__dependencies') }}
where `MinimumDepth` = 1
),

intermediate as (
select
`time`,
case
when previous_to_artifact_name is null then 'ADD_DEPENDENCY'
when
to_artifact_name is not null and to_artifact_name <> previous_to_artifact_name
then 'REMOVE_DEPENDENCY'
else 'NO_CHANGE'
end as event_type,
{{ event_source_name }} as event_source,
{{ parse_name(
'to_artifact_type',
'to_artifact_name')
}} as to_artifact_name,
{{ parse_namespace(
'to_artifact_type',
'to_artifact_name')
}} as to_artifact_namespace,
to_artifact_type,
{{ parse_name(
'from_artifact_type',
'from_artifact_name')
}} as from_artifact_name,
{{ parse_namespace(
'from_artifact_type',
'from_artifact_name')
}} as from_artifact_namespace,
from_artifact_type,
1.0 as amount
from snapshots
),

artifact_ids as (
select
`time`,
event_type,
event_source,
{{ oso_id(
'event_source',
'to_artifact_namespace',
'to_artifact_name')
}} as to_artifact_id,
to_artifact_name,
to_artifact_namespace,
to_artifact_type,
{{ oso_id(
'event_source',
'to_artifact_type')
}} as to_artifact_source_id,
{{ oso_id(
'event_source',
'from_artifact_namespace',
'from_artifact_name')
}} as from_artifact_id,
from_artifact_name,
from_artifact_namespace,
from_artifact_type,
{{ oso_id(
'event_source',
'from_artifact_type')
}} as from_artifact_source_id,
amount
from intermediate
where event_type <> 'NO_CHANGE'
),

changes as (
select
`time`,
event_type,
event_source,
to_artifact_id,
to_artifact_name,
to_artifact_namespace,
to_artifact_type,
to_artifact_source_id,
from_artifact_id,
from_artifact_name,
from_artifact_namespace,
from_artifact_type,
from_artifact_source_id,
amount,
{{ oso_id(
'event_source',
'time',
'to_artifact_id',
'to_artifact_type',
'from_artifact_id',
'from_artifact_type',
'event_type')
}} as event_source_id
from artifact_ids
)

select * from changes
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{{ config(
materialized='incremental',
partition_by={
'field': 'SnapshotAt',
'data_type': 'timestamp',
'granularity': 'day'
},
) }}

{% set is_production = target.name == 'production' %}

{% if is_production %}
with base as (
select
`SnapshotAt`,
`System`,
`Name`,
`Version`,
`Dependency`,
`MinimumDepth`
from `bigquery-public-data.deps_dev_v1.Dependencies`
)
{% if is_incremental() %}
select * from base
where `SnapshotAt` > (select max(`SnapshotAt`) from {{ this }})
{% else %}
select * from base
{% endif %}
{% else %}
select
'NPM' as `System`,
'@example/oso' as `Name`,
'0.0.0' as `Version`,
1 as `MinimumDepth`,
current_timestamp() as `SnapshotAt`,
struct(
'NPM' as `System`,
'@example/oso-dep' as `Name`,
'0.0.0' as `Version`
) as `Dependency`
limit 1
{% endif %}

0 comments on commit cb91442

Please sign in to comment.