From 6dea3327cd1bf288a73ddb6f7e3aecd2b442b459 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 16 Oct 2024 14:15:54 -0400 Subject: [PATCH 1/4] Add back user project --- .github/workflows/zenodo-cache-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/zenodo-cache-sync.yml b/.github/workflows/zenodo-cache-sync.yml index b073841c14..6956d37ddb 100644 --- a/.github/workflows/zenodo-cache-sync.yml +++ b/.github/workflows/zenodo-cache-sync.yml @@ -66,7 +66,7 @@ jobs: - name: Sync internal and public caches run: | - gcloud storage rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }} + gcloud storage -u catalyst-cooperative-pudl rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }} zenodo-cache-sync-notify: runs-on: ubuntu-latest From 7554a360069d9d5cfbfccc6dfb1fb682efd0638d Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 16 Oct 2024 14:21:18 -0400 Subject: [PATCH 2/4] Update project path --- .github/workflows/zenodo-cache-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/zenodo-cache-sync.yml b/.github/workflows/zenodo-cache-sync.yml index 6956d37ddb..d855aae55d 100644 --- a/.github/workflows/zenodo-cache-sync.yml +++ b/.github/workflows/zenodo-cache-sync.yml @@ -66,7 +66,7 @@ jobs: - name: Sync internal and public caches run: | - gcloud storage -u catalyst-cooperative-pudl rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }} + gcloud storage --project=catalyst-cooperative-pudl rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }} zenodo-cache-sync-notify: runs-on: ubuntu-latest From 9ada9f5eec0e005bfb79824433611d66c5402477 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 16 Oct 2024 14:24:50 -0400 Subject: [PATCH 3/4] Update project to billing project --- .github/workflows/zenodo-cache-sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/zenodo-cache-sync.yml b/.github/workflows/zenodo-cache-sync.yml index d855aae55d..ccad618328 100644 --- a/.github/workflows/zenodo-cache-sync.yml +++ b/.github/workflows/zenodo-cache-sync.yml @@ -66,7 +66,7 @@ jobs: - name: Sync internal and public caches run: | - gcloud storage --project=catalyst-cooperative-pudl rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }} + gcloud storage --billing-project=catalyst-cooperative-pudl rsync -r ${{ env.INTERNAL_ZENODO_CACHE_BUCKET }} ${{ env.PUBLIC_ZENODO_CACHE_BUCKET }} zenodo-cache-sync-notify: runs-on: ubuntu-latest From 4158afd619fe019dec8e9a412ee3a88080d715ee Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 16 Oct 2024 14:48:19 -0400 Subject: [PATCH 4/4] Update dockerfile to replace gsutil with gcloud storage --- docker/Dockerfile | 2 +- docker/gcp_pudl_etl.sh | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 6bd5268a43..0009a70167 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -15,7 +15,7 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Configure gsutil authentication +# Configure gcloud authentication # hadolint ignore=DL3059 RUN printf '[GoogleCompute]\nservice_account = default' > /etc/boto.cfg diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index d972caa024..8569d92bef 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -71,7 +71,7 @@ function run_pudl_etl() { function save_outputs_to_gcs() { echo "Copying outputs to GCP bucket $PUDL_GCS_OUTPUT" && \ - gsutil -q -m cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" && \ + gcloud storage --quiet cp -r "$PUDL_OUTPUT" "$PUDL_GCS_OUTPUT" && \ rm -f "$PUDL_OUTPUT/success" } @@ -85,12 +85,12 @@ function upload_to_dist_path() { # If the old outputs don't exist, these will exit with status 1, so we # don't && them with the rest of the commands. echo "Removing old outputs from $GCS_PATH." - gsutil -q -m -u "$GCP_BILLING_PROJECT" rm -r "$GCS_PATH" + gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" rm -r "$GCS_PATH" echo "Removing old outputs from $AWS_PATH." aws s3 rm --quiet --recursive "$AWS_PATH" echo "Copying outputs to $GCS_PATH:" && \ - gsutil -q -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "$GCS_PATH" && \ + gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "$GCS_PATH" && \ echo "Copying outputs to $AWS_PATH" && \ aws s3 cp --quiet --recursive "$PUDL_OUTPUT/" "$AWS_PATH" else @@ -113,12 +113,12 @@ function distribute_parquet() { DIST_PATH="$BUILD_REF" fi echo "Copying outputs to $PARQUET_BUCKET/$DIST_PATH" && \ - gsutil -q -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/$DIST_PATH" + gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/$DIST_PATH" # If running a tagged release, ALSO update the stable distribution bucket path: if [[ "$GITHUB_ACTION_TRIGGER" == "push" && "$BUILD_REF" == v20* ]]; then echo "Copying outputs to $PARQUET_BUCKET/stable" && \ - gsutil -q -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/stable" + gcloud storage --quiet --billing-project="$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/parquet/*" "$PARQUET_BUCKET/stable" fi fi } @@ -298,13 +298,13 @@ if [[ $ETL_SUCCESS == 0 ]]; then # If running a tagged release, ensure that outputs can't be accidentally deleted # It's not clear that an object lock can be applied in S3 with the AWS CLI if [[ "$GITHUB_ACTION_TRIGGER" == "push" && "$BUILD_REF" == v20* ]]; then - gsutil -m -u catalyst-cooperative-pudl retention temp set "gs://pudl.catalyst.coop/$BUILD_REF/*" 2>&1 | tee -a "$LOGFILE" + gcloud storage --billing-project="catalyst-cooperative-pudl" objects update "gs://pudl.catalyst.coop/$BUILD_REF/*" --temporary-hold 2>&1 | tee -a "$LOGFILE" GCS_TEMPORARY_HOLD_SUCCESS=${PIPESTATUS[0]} fi fi # This way we also save the logs from latter steps in the script -gsutil -q cp "$LOGFILE" "$PUDL_GCS_OUTPUT" +gcloud storage --quiet cp "$LOGFILE" "$PUDL_GCS_OUTPUT" # Notify slack about entire pipeline's success or failure; if [[ $ETL_SUCCESS == 0 && \