Skip to content

Commit

Permalink
Merge branch 'extract-vceregen' into transform-vceregen
Browse files Browse the repository at this point in the history
  • Loading branch information
e-belfer authored Oct 16, 2024
2 parents 7e3c926 + 4158afd commit 069c246
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/zenodo-cache-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
- name: Sync internal and public caches
run: |
gcloud storage rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }}
gcloud storage --billing-project=catalyst-cooperative-pudl rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }}
zenodo-cache-sync-notify:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Configure gsutil authentication
# Configure gcloud authentication
# hadolint ignore=DL3059
RUN printf '[GoogleCompute]\nservice_account = default' > /etc/boto.cfg

Expand Down
14 changes: 7 additions & 7 deletions docker/gcp_pudl_etl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function run_pudl_etl() {

function save_outputs_to_gcs() {
echo "Copying outputs to GCP bucket $PUDL_GCS_OUTPUT" && \
gsutil -q -m cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" && \
gcloud storage --quiet cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" && \
rm -f "$PUDL_OUTPUT/success"
}

Expand All @@ -85,12 +85,12 @@ function upload_to_dist_path() {
# If the old outputs don't exist, these will exit with status 1, so we
# don't && them with the rest of the commands.
echo "Removing old outputs from $GCS_PATH."
gsutil -q -m -u "$GCP_BILLING_PROJECT" rm -r "$GCS_PATH"
gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" rm -r "$GCS_PATH"
echo "Removing old outputs from $AWS_PATH."
aws s3 rm --quiet --recursive "$AWS_PATH"

echo "Copying outputs to $GCS_PATH:" && \
gsutil -q -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "$GCS_PATH" && \
gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "$GCS_PATH" && \
echo "Copying outputs to $AWS_PATH" && \
aws s3 cp --quiet --recursive "$PUDL_OUTPUT/" "$AWS_PATH"
else
Expand All @@ -113,12 +113,12 @@ function distribute_parquet() {
DIST_PATH="$BUILD_REF"
fi
echo "Copying outputs to $PARQUET_BUCKET/$DIST_PATH" && \
gsutil -q -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/$DIST_PATH"
gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/$DIST_PATH"

# If running a tagged release, ALSO update the stable distribution bucket path:
if [[ "$GITHUB_ACTION_TRIGGER" == "push" && "$BUILD_REF" == v20* ]]; then
echo "Copying outputs to $PARQUET_BUCKET/stable" && \
gsutil -q -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/stable"
gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/stable"
fi
fi
}
Expand Down Expand Up @@ -298,13 +298,13 @@ if [[ $ETL_SUCCESS == 0 ]]; then
# If running a tagged release, ensure that outputs can't be accidentally deleted
# It's not clear that an object lock can be applied in S3 with the AWS CLI
if [[ "$GITHUB_ACTION_TRIGGER" == "push" && "$BUILD_REF" == v20* ]]; then
gsutil -m -u catalyst-cooperative-pudl retention temp set "gs://pudl.catalyst.coop/$BUILD_REF/*" 2>&1 | tee -a "$LOGFILE"
gcloud storage --billing-project="catalyst-cooperative-pudl" objects update "gs://pudl.catalyst.coop/$BUILD_REF/*" --temporary-hold 2>&1 | tee -a "$LOGFILE"
GCS_TEMPORARY_HOLD_SUCCESS=${PIPESTATUS[0]}
fi
fi

# This way we also save the logs from latter steps in the script
gsutil -q cp "$LOGFILE" "$PUDL_GCS_OUTPUT"
gcloud storage --quiet cp "$LOGFILE" "$PUDL_GCS_OUTPUT"

# Notify slack about entire pipeline's success or failure;
if [[ $ETL_SUCCESS == 0 && \
Expand Down

0 comments on commit 069c246

Please sign in to comment.