-
Notifications
You must be signed in to change notification settings - Fork 6
222 lines (178 loc) · 8.41 KB
/
ci_pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
name: ci_pipeline
on:
pull_request_target:
branches:
- stg
- prd
env:
DBT_CICD_RUN: "true"
DESTINATION_BRANCH: "${{ github.event.pull_request.base.ref }}"
DBT_DATASET: "ci_${{ github.event.number }}_${{ github.run_number }}_${{ github.sha }}"
DBT_PROFILES_DIR: "."
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
POETRY_VERSION: "2.0.1"
jobs:
ci_pipeline:
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
id: setup-python
- name: Load cached Poetry installation
id: cached-poetry
uses: actions/cache@v4
with:
path: /home/runner/.local
key: poetry-cache-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}
- name: Install Poetry
if: steps.cached-poetry.outputs.cache-hit != 'true'
uses: snok/install-poetry@v1
with:
installer-parallel: true
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
- name: Save stg GCP credentials to runner
run: |
echo "${{ secrets.DBT_CICD_SA_SECRET_KEY_STG_BASE64_ENCODED }}" | base64 --decode >> ./service_account.json && \
export GOOGLE_APPLICATION_CREDENTIALS="$(pwd)/service_account.json"
if: ${{ github.event.pull_request.base.ref == 'stg' }}
- name: Save prd GCP credentials to runner
run: |
echo "${{ secrets.DBT_CICD_SA_SECRET_KEY_PRD_BASE64_ENCODED }}" | base64 --decode >> ./service_account.json && \
export GOOGLE_APPLICATION_CREDENTIALS="$(pwd)/service_account.json"
if: ${{ github.event.pull_request.base.ref == 'prd' }}
- name: Authenticate with GCP
run: |
gcloud auth activate-service-account dbt-cicd@beyond-basics-$DESTINATION_BRANCH.iam.gserviceaccount.com --key-file=./service_account.json && \
gcloud config set project beyond-basics-$DESTINATION_BRANCH
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install python packages
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-ansi
- name: dbt dep
run: poetry run dbt deps
- name: dbt compile
run: poetry run dbt compile --target $DESTINATION_BRANCH
- run: poetry run pytest ./tests/pytest -m no_deps -n 5
- run: poetry run dbt deps
- name: dbt debug
run: poetry run dbt debug --target $DESTINATION_BRANCH
# Create objects in BigQuery and run unit tests
- name: dbt seed
run: poetry run dbt seed --target $DESTINATION_BRANCH
- name: dbt run --empty
run : poetry run dbt run --empty --target $DESTINATION_BRANCH
- name: Run unit tests
run: poetry run dbt test --select "test_type:unit" --target $DESTINATION_BRANCH
# Needs to be run early in the CI pipeline to allow `dbt docs generate` to succeed (which is a dependency of pre-commit and dbt-coverage)
- name: dbt build
run: poetry run dbt --warn-error build --fail-fast --full-refresh --exclude "test_type:unit" --target $DESTINATION_BRANCH
- name: dbt docs generate
run: poetry run dbt docs generate --target $DESTINATION_BRANCH
- name: Run `dbt-bouncer`
run: poetry run dbt-bouncer
- name: pre-commit run -a
run: SKIP=autoflake,dbt-compile,dbt-docs-generate poetry run pre-commit run -a
- name: Generate docs coverage report
id: dbt-docs-coverage-report
run: |
poetry run dbt-coverage compute doc --cov-report coverage-doc.json --cov-format markdown >> coverage-doc.md
sed -i '/# Coverage report/c\# Doc coverage report.' coverage-doc.md
- uses: mshick/add-pr-comment@v2
with:
message-path: coverage-doc.md
message-id: dbt-docs-coverage-report
refresh-message-position: true
- name: Generate test coverage report
id: dbt-test-coverage-report
run: |
poetry run dbt-coverage compute test --cov-report coverage-test.json --cov-format markdown >> coverage-test.md
sed -i '/# Coverage report/c\# Test coverage report.' coverage-test.md
- uses: mshick/add-pr-comment@v2
with:
message-path: coverage-test.md
message-id: dbt-test-coverage-report
refresh-message-position: true
- run: poetry run pytest ./tests/pytest -m run_results_json -n 5
- name: dbt source freshness
run: poetry run dbt source freshness --target $DESTINATION_BRANCH || true # source freshness is allowed to fail in CI due to stale sources, the next command runs pytest on sources.json to validate the generated SQL (the prupose of this step)
- run: poetry run pytest ./tests/pytest -m sources_json -n 5
- name: dbt build incremental models
run: poetry run dbt --warn-error build --fail-fast --select config.materialized:incremental --exclude "test_type:unit" --target $DESTINATION_BRANCH
# Mart Monitor
- run: if [ "$DESTINATION_BRANCH" == "stg" ]; then DBT_CICD_RUN="false" poetry run dbt compile --target $DESTINATION_BRANCH --threads 64; else echo "Only runs for PRs to stg"; fi # Need manifest.json as exists during $DESTINATION_BRANCH runs and not during CI runs
- run: if [ "$DESTINATION_BRANCH" == "stg" ]; then poetry run python ./scripts/mart_monitor_commenter.py --dbt_dataset $DBT_DATASET --pull_request_id ${{ github.event.number }} --target_branch $DESTINATION_BRANCH; else echo "Only runs for PRs to stg"; fi
- name: Ensure ERD can be generated
run: poetry run python ./scripts/generate_marts_erd_diagram.py
dev_container:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Build container and run dbt command
uses: devcontainers/[email protected]
env:
DBT_DATASET: "cicd_${{ github.event.number }}_${{ github.run_number }}_${{ github.sha }}_dev_container"
DBT_PROFILES_DIR: "."
DESTINATION_BRANCH: "${{ github.event.pull_request.base.ref }}"
with:
runCmd: |
echo "${{ secrets.DBT_CICD_SA_SECRET_KEY_STG_BASE64_ENCODED }}" | base64 --decode >> ./service_account.json
export GOOGLE_APPLICATION_CREDENTIALS="$(pwd)/service_account.json"cloud auth activate-service-account dbt-cicd@beyond-basics-$DESTINATION_BRANCH.iam.gserviceaccount.com --key-file=./service_account.json
gcloud config set project beyond-basics-$DESTINATION_BRANCH
poetry install
poetry run dbt deps
poetry run dbt compile --target $DESTINATION_BRANCH
env: |
DBT_DATASET
DBT_PROFILES_DIR
DESTINATION_BRANCH
docker:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: dbt-beyond-the-basics
tags: |
type=ref,event=branch
type=raw,value=${{ github.sha }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Determine python version
id: python-version
shell: bash
run: |
export PYTHON_VERSION=$(cat ./.python-version)
echo "PYTHON_VERSION: $PYTHON_VERSION"
echo "PYTHON_VERSION=$PYTHON_VERSION" >> $GITHUB_OUTPUT
- name: Build Docker image
uses: docker/build-push-action@v6
with:
build-args: |
"PYTHON_VERSION=${{ steps.python-version.outputs.PYTHON_VERSION }}"
cache-from: type=gha
cache-to: type=gha,mode=max
context: .
file: ./Dockerfile
load: true
push: false
tags: ${{ steps.meta.outputs.tags }}
- name: Test Docker image
run: |
docker run --rm \
--env DBT_DATASET="test" \
--volume ~/.config/gcloud:/root/.config/gcloud \
dbt-beyond-the-basics:${{ github.sha }} \
dbt parse