From 59e9d47cb71291d025087d888b011a7b72d68722 Mon Sep 17 00:00:00 2001
From: Aarav Navani <arav.navani@gmail.com>
Date: Wed, 14 Aug 2024 12:56:13 -0700
Subject: [PATCH 01/16] fastapi setup

---
 app.py | 102 +++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 29 deletions(-)

diff --git a/app.py b/app.py
index 4f7914a..aab6985 100644
--- a/app.py
+++ b/app.py
@@ -1,34 +1,78 @@
-import json
-import torch
-import nltk
-from typing import Any, Dict, List
+
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Union
 from transformers import pipeline
+import torch
 
-class InferlessPythonModel:
+app = FastAPI()
 
-    def initialize(self):
-        self._classifier = pipeline(
-                "zero-shot-classification",
-                model="facebook/bart-large-mnli",
-                device="cuda",
-                hypothesis_template="This example has to do with topic {}.",
-                multi_label=True,
-            )
-        #self._classifier.to("cuda")
-        
-    def infer(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        result = self._classifier(inputs["text"], inputs["candidate_topics"])
-        topics = result["labels"]
-        scores = result["scores"]
-        found_topics = []
-        for topic, score in zip(topics, scores):
-            if score > inputs["zero_shot_threshold"]:
-                found_topics.append(topic)
-        if not found_topics:
-            return {"results": ["No valid topic found."]}
-        return {"results": found_topics}
-        
-    def finalize(self):
-        pass
+# Initialize the zero-shot classification pipeline
+classifier = pipeline(
+    "zero-shot-classification",
+    model="facebook/bart-large-mnli",
+    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+    hypothesis_template="This example has to do with topic {}.",
+    multi_label=True,
+)
+
+class InferenceData(BaseModel):
+    name: str
+    shape: List[int]
+    data: Union[List[str], List[float]]
+    datatype: str
+
+class InputRequest(BaseModel):
+    inputs: List[InferenceData]
+
+class OutputResponse(BaseModel):
+    modelname: str
+    modelversion: str
+    outputs: List[InferenceData]
+
+@app.post("/validate", response_model=OutputResponse)
+async def restrict_to_topic(input_request: InputRequest):
+    print('make request')
+    text = None
+    candidate_topics = None
+    zero_shot_threshold = 0.5
+    
+    for inp in input_request.inputs:
+        if inp.name == "text":
+            text = inp.data[0]
+        elif inp.name == "candidate_topics":
+            candidate_topics = inp.data
+        elif inp.name == "zero_shot_threshold":
+            zero_shot_threshold = float(inp.data[0])
+    
+    if text is None or candidate_topics is None:
+        raise HTTPException(status_code=400, detail="Invalid input format")
+    
+    # Perform zero-shot classification
+    result = classifier(text, candidate_topics)
+    topics = result["labels"]
+    scores = result["scores"]
+    found_topics = [topic for topic, score in zip(topics, scores) if score > zero_shot_threshold]
     
+    if not found_topics:
+        found_topics = ["No valid topic found."]
     
+    output_data = OutputResponse(
+        modelname="RestrictToTopicModel",
+        modelversion="1",
+        outputs=[
+            InferenceData(
+                name="results",
+                datatype="BYTES",
+                shape=[len(found_topics)],
+                data=found_topics
+            )
+        ]
+    )
+    
+    print(f"Output data: {output_data}")
+    return output_data
+
+# Run the app with uvicorn
+# Save this script as app.py and run with: uvicorn app:app --reload
\ No newline at end of file

From 10298c7f0f55800795776b97b3fc8b11a08fe56b Mon Sep 17 00:00:00 2001
From: Aarav Navani <arav.navani@gmail.com>
Date: Wed, 14 Aug 2024 12:57:55 -0700
Subject: [PATCH 02/16] newline

---
 app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app.py b/app.py
index aab6985..5f39f91 100644
--- a/app.py
+++ b/app.py
@@ -75,4 +75,4 @@ async def restrict_to_topic(input_request: InputRequest):
     return output_data
 
 # Run the app with uvicorn
-# Save this script as app.py and run with: uvicorn app:app --reload
\ No newline at end of file
+# Save this script as app.py and run with: uvicorn app:app --reload

From a62454e697b2b2c07abf52231bef11c854bd4c36 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Tue, 20 Aug 2024 14:36:13 -0700
Subject: [PATCH 03/16] Updated for sagemaker endpoints compatibility

---
 Dockerfile | 37 ++++++++++++++++++++++
 app.py     | 49 +++++++++++++++++++++++------
 serve      | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+), 10 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 serve

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..35c79b5
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,37 @@
+# Use an official PyTorch image with CUDA support
+FROM pytorch/pytorch:2.1.2-cuda11.8-cudnn8-runtime
+
+# Set the working directory
+WORKDIR /app
+
+# Copy the pyproject.toml and any other necessary files (e.g., README, LICENSE)
+COPY pyproject.toml .
+COPY README.md .
+COPY LICENSE .
+
+# Install dependencies from the pyproject.toml file
+RUN pip install --upgrade pip setuptools wheel
+RUN pip install .
+
+ENV HF_HUB_ENABLE_HF_TRANSFER=1
+
+# Install the necessary packages for the FastAPI app
+RUN pip install fastapi "uvicorn[standard]" gunicorn transformers accelerate huggingface_hub hf-transfer "jinja2>=3.1.0"
+
+# Copy the entire project code into the container
+COPY . /app
+
+# Copy the serve script into the container
+COPY serve /usr/local/bin/serve
+
+# Make the serve script executable
+RUN chmod +x /usr/local/bin/serve
+
+# Set environment variable to determine the device (cuda or cpu)
+ENV env=prod
+
+# Expose the port that the FastAPI app will run on
+EXPOSE 8080
+
+# Set the entrypoint for SageMaker to the serve script
+ENTRYPOINT ["serve"]
diff --git a/app.py b/app.py
index 5f39f91..88cc819 100644
--- a/app.py
+++ b/app.py
@@ -1,21 +1,39 @@
-
-
+import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List, Union
-from transformers import pipeline
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline, ZeroShotClassificationPipeline
 import torch
 
 app = FastAPI()
 
 # Initialize the zero-shot classification pipeline
-classifier = pipeline(
-    "zero-shot-classification",
-    model="facebook/bart-large-mnli",
-    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
-    hypothesis_template="This example has to do with topic {}.",
-    multi_label=True,
-)
+model_save_directory = "/opt/ml/model"
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+
+print(f"Using torch device: {torch_device}")
+
+if not os.path.exists(model_save_directory):
+    print(f"Using cached model in {model_save_directory}...")
+    model = AutoModelForSequenceClassification.from_pretrained(model_save_directory)
+    tokenizer = AutoTokenizer.from_pretrained(model_save_directory)
+    classifier = ZeroShotClassificationPipeline(
+        model=model,
+        tokenizer=tokenizer,
+        device=torch.device(torch_device),
+        hypothesis_template="This example has to do with topic {}.",
+        multi_label=True
+    )
+else:
+    print("Downloading model from Hugging Face...")
+    classifier = pipeline(
+        "zero-shot-classification",
+        model="facebook/bart-large-mnli",
+        device=torch.device(torch_device),
+        hypothesis_template="This example has to do with topic {}.",
+        multi_label=True,
+    )
+
 
 class InferenceData(BaseModel):
     name: str
@@ -74,5 +92,16 @@ async def restrict_to_topic(input_request: InputRequest):
     print(f"Output data: {output_data}")
     return output_data
 
+
+# Sagemaker specific endpoints
+@app.get("/ping")
+async def healtchcheck():
+    return {"status": "ok"}
+
+@app.post("/invocations", response_model=OutputResponse)
+async def retrict_to_topic_sagemaker(input_request: InputRequest):
+    return await restrict_to_topic(input_request)
+
+
 # Run the app with uvicorn
 # Save this script as app.py and run with: uvicorn app:app --reload
diff --git a/serve b/serve
new file mode 100644
index 0000000..f0e117a
--- /dev/null
+++ b/serve
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import multiprocessing
+import os
+import signal
+import subprocess
+import sys
+import math
+
+import torch
+from huggingface_hub import snapshot_download
+
+cpu_count = multiprocessing.cpu_count()
+default_worker_count = max(cpu_count // 8,1)
+
+model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', '60')
+model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', default_worker_count))
+model_save_directory = os.environ.get('MODEL_SAVE_DIRECTORY', '/opt/ml/model')
+
+MODEL_NAME = "facebook/bart-large-mnli"
+DEFAULT_REVISION = "d7645e127eaf1aefc7862fd59a17a5aa8558b8ce"
+
+print(f'Model server workers: {model_server_workers}')
+print(f'Model save directory: {model_save_directory}')
+print(f'Model server timeout: {model_server_timeout}')
+
+print(f'CPU count: {cpu_count}')
+
+def sigterm_handler(gunicorn_pid):
+    try:
+        os.kill(gunicorn_pid, signal.SIGTERM)
+    except OSError:
+        pass
+    sys.exit(0)
+
+def load_and_save_model():
+    try:
+        
+        print('Loading the model...')
+        # Ensure the save directory exists
+        if not os.path.exists(model_save_directory):
+            os.makedirs(model_save_directory)
+
+            print("Downloading the model...")
+
+            snapshot_download(
+                MODEL_NAME,
+                local_dir=model_save_directory,
+                ignore_patterns=[
+                    "*.pt",
+                    "*.bin",
+                    "*.pth",
+                    "original/*",
+                ],  # Ensure safetensors
+                revision=DEFAULT_REVISION,
+                force_download=False,
+            )
+        else:
+            print("Model already downloaded.")
+    
+        print('Model loaded and saved successfully.')
+    except Exception as e:
+        print(f'Error loading and saving the model: {e}')
+        sys.exit(1)
+
+def start_server():
+    print(f'Starting the inference server with {model_server_workers} workers.')
+    
+    load_and_save_model()
+
+    try:
+        # Start Gunicorn to serve the FastAPI app
+        gunicorn = subprocess.Popen(['gunicorn',
+                                     '--timeout', str(model_server_timeout),
+                                     '-k', 'uvicorn.workers.UvicornWorker',
+                                     '-b', '0.0.0.0:8080',
+                                     '-w', str(model_server_workers),
+                                     'app:app'])
+
+        signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(gunicorn.pid))
+
+        # Wait for the Gunicorn process to exit
+        gunicorn.wait()
+
+    except Exception as e:
+        print(f'Error starting the inference server: {e}')
+        sys.exit(1)
+
+    print('Inference server exiting')
+
+if __name__ == '__main__':
+    start_server()

From a0395a0eeb305d9bdd3569a3086c3471c8cc2617 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Wed, 21 Aug 2024 11:44:34 -0700
Subject: [PATCH 04/16] wip: adding CI

---
 .github/ecr_sagemaker_publish.yml | 62 +++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 .github/ecr_sagemaker_publish.yml

diff --git a/.github/ecr_sagemaker_publish.yml b/.github/ecr_sagemaker_publish.yml
new file mode 100644
index 0000000..bcc3834
--- /dev/null
+++ b/.github/ecr_sagemaker_publish.yml
@@ -0,0 +1,62 @@
+name: Sagemaker ECR Publish (RC)
+
+on:
+  push:
+    branches:
+      - main
+      - feat/sagemaker-serve
+
+jobs:
+  publish_image:
+    name: Publish Sagemaker Image (Release Candidate)
+    runs-on: ubuntu-latest
+    env:
+      AWS_REGION: us-east-1
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      AWS_ECR_REPOSITORY: ${{ vars.AWS_ECR_REPOSITORY }} 
+      WORKING_DIR: "./"
+    steps: 
+    
+      - name: Check out head
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@master
+        with:
+          platforms: linux/amd64
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@master
+        with:
+          platforms: linux/amd64
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: ${{ env.AWS_REGION }}
+          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
+          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v1
+
+      - name: Build & Push ECR Image
+        uses: docker/build-push-action@v2
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: ${{ env.WORKING_DIR }}
+          platforms: linux/amd64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          push: true
+          tags: ${{ env.AWS_ECR_REPOSITORY }}:restrict2topic-rc
+
+      - name: Deploy to ECS
+        run: |
+          aws ecs update-service --cluster ${{ env.AWS_ECS_CLUSTER_NAME }} --service ${{ env.AWS_ECS_SERVICE_NAME }} --desired-count ${{ env.AWS_ECS_DESIRED_TASK_COUNT }} --force-new-deployment
+        env:
+          AWS_DEFAULT_REGION: ${{ env.AWS_REGION }}
\ No newline at end of file

From 3b6084c2ecb4ccfa69df2db4c51cbf4c276c966e Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Wed, 21 Aug 2024 11:46:31 -0700
Subject: [PATCH 05/16] moved to workflows folder

---
 .github/{ => workflows}/ecr_sagemaker_publish.yml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/{ => workflows}/ecr_sagemaker_publish.yml (100%)

diff --git a/.github/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
similarity index 100%
rename from .github/ecr_sagemaker_publish.yml
rename to .github/workflows/ecr_sagemaker_publish.yml

From 332b33ac901ee628cae9abe880e0a35df143b456 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 15:44:51 -0700
Subject: [PATCH 06/16] testing ci

---
 .github/workflows/ecr_sagemaker_publish.yml | 44 +++++++++++----------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index bcc3834..539e435 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -6,15 +6,18 @@ on:
       - main
       - feat/sagemaker-serve
 
+# Needed for OIDC / assume role
+permissions:
+  id-token: write  
+  contents: read    
+
 jobs:
   publish_image:
     name: Publish Sagemaker Image (Release Candidate)
     runs-on: ubuntu-latest
     env:
       AWS_REGION: us-east-1
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      AWS_ECR_REPOSITORY: ${{ vars.AWS_ECR_REPOSITORY }} 
+      AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }}
       WORKING_DIR: "./"
     steps: 
     
@@ -37,26 +40,25 @@ jobs:
         uses: aws-actions/configure-aws-credentials@v4
         with:
           aws-region: ${{ env.AWS_REGION }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
+          role-to-assume: ${{ env.AWS_CI_ROLE__PROD}}
 
       - name: Login to Amazon ECR
         id: login-ecr
         uses: aws-actions/amazon-ecr-login@v1
 
-      - name: Build & Push ECR Image
-        uses: docker/build-push-action@v2
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          context: ${{ env.WORKING_DIR }}
-          platforms: linux/amd64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          push: true
-          tags: ${{ env.AWS_ECR_REPOSITORY }}:restrict2topic-rc
-
-      - name: Deploy to ECS
-        run: |
-          aws ecs update-service --cluster ${{ env.AWS_ECS_CLUSTER_NAME }} --service ${{ env.AWS_ECS_SERVICE_NAME }} --desired-count ${{ env.AWS_ECS_DESIRED_TASK_COUNT }} --force-new-deployment
-        env:
-          AWS_DEFAULT_REGION: ${{ env.AWS_REGION }}
\ No newline at end of file
+      # - name: Build & Push ECR Image
+      #   uses: docker/build-push-action@v2
+      #   with:
+      #     builder: ${{ steps.buildx.outputs.name }}
+      #     context: ${{ env.WORKING_DIR }}
+      #     platforms: linux/amd64
+      #     cache-from: type=gha
+      #     cache-to: type=gha,mode=max
+      #     push: true
+      #     tags: ${{ env.AWS_ECR_REPOSITORY }}:restrict2topic-rc
+
+      # - name: Deploy to ECS
+      #   run: |
+      #     aws ecs update-service --cluster ${{ env.AWS_ECS_CLUSTER_NAME }} --service ${{ env.AWS_ECS_SERVICE_NAME }} --desired-count ${{ env.AWS_ECS_DESIRED_TASK_COUNT }} --force-new-deployment
+      #   env:
+      #     AWS_DEFAULT_REGION: ${{ env.AWS_REGION }}
\ No newline at end of file

From e64b1558038a4d7d5443b7720d392324354a729d Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 15:51:51 -0700
Subject: [PATCH 07/16] updated masked password field

---
 .github/workflows/ecr_sagemaker_publish.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 539e435..09f4f81 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -44,7 +44,8 @@ jobs:
 
       - name: Login to Amazon ECR
         id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v1
+        uses: aws-actions/amazon-ecr-login@v2
+          mask-password: 'true'
 
       # - name: Build & Push ECR Image
       #   uses: docker/build-push-action@v2

From 676144dcfa9d1eb9f6a9a4587895088c0b6c31d1 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 15:52:30 -0700
Subject: [PATCH 08/16] fix syntax

---
 .github/workflows/ecr_sagemaker_publish.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 09f4f81..2ee7473 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -45,6 +45,7 @@ jobs:
       - name: Login to Amazon ECR
         id: login-ecr
         uses: aws-actions/amazon-ecr-login@v2
+        with:
           mask-password: 'true'
 
       # - name: Build & Push ECR Image

From 9470f48bd7856479f3c6b3d84154aa867a4debe1 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:06:40 -0700
Subject: [PATCH 09/16] push to ecr

---
 .github/workflows/ecr_sagemaker_publish.yml | 26 ++++++++-------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 2ee7473..321a94b 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -48,19 +48,13 @@ jobs:
         with:
           mask-password: 'true'
 
-      # - name: Build & Push ECR Image
-      #   uses: docker/build-push-action@v2
-      #   with:
-      #     builder: ${{ steps.buildx.outputs.name }}
-      #     context: ${{ env.WORKING_DIR }}
-      #     platforms: linux/amd64
-      #     cache-from: type=gha
-      #     cache-to: type=gha,mode=max
-      #     push: true
-      #     tags: ${{ env.AWS_ECR_REPOSITORY }}:restrict2topic-rc
-
-      # - name: Deploy to ECS
-      #   run: |
-      #     aws ecs update-service --cluster ${{ env.AWS_ECS_CLUSTER_NAME }} --service ${{ env.AWS_ECS_SERVICE_NAME }} --desired-count ${{ env.AWS_ECS_DESIRED_TASK_COUNT }} --force-new-deployment
-      #   env:
-      #     AWS_DEFAULT_REGION: ${{ env.AWS_REGION }}
\ No newline at end of file
+      - name: Build & Push ECR Image
+        uses: docker/build-push-action@v2
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: ${{ env.WORKING_DIR }}
+          platforms: linux/amd64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          push: true
+          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:restrict2topic-rc
\ No newline at end of file

From 144cfa9d7b3aab60fe025689004178fe01a019ef Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:25:21 -0700
Subject: [PATCH 10/16] test

---
 .github/workflows/ecr_sagemaker_publish.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 321a94b..d8ece2d 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -1,10 +1,19 @@
 name: Sagemaker ECR Publish (RC)
 
+# on manual trigger, with inputs: is_release_candidate
 on:
   push:
     branches:
       - main
       - feat/sagemaker-serve
+  workflow_dispatch:
+    inputs:
+      is_release_candidate:
+        description: 'Is this a release candidate?'
+        required: true
+        default: 'true'
+
+  
 
 # Needed for OIDC / assume role
 permissions:
@@ -19,6 +28,7 @@ jobs:
       AWS_REGION: us-east-1
       AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }}
       WORKING_DIR: "./"
+      IS_RC: ${{ github.event.inputs.is_release_candidate }}
     steps: 
     
       - name: Check out head
@@ -26,6 +36,9 @@ jobs:
         with:
           persist-credentials: false
 
+      - name: Echo is RC
+        run: echo "Is Release Candidate: ${{ env.IS_RC }}"
+
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
         with:

From 4cdbcb98f889661b95d717537f615f577d215faf Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:27:20 -0700
Subject: [PATCH 11/16] chore: Update IS_RC default value to 'true' in
 ecr_sagemaker_publish workflow

---
 .github/workflows/ecr_sagemaker_publish.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index d8ece2d..7ac0a1d 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -28,7 +28,7 @@ jobs:
       AWS_REGION: us-east-1
       AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }}
       WORKING_DIR: "./"
-      IS_RC: ${{ github.event.inputs.is_release_candidate }}
+      IS_RC: ${{ github.event.inputs.is_release_candidate || 'true' }}
     steps: 
     
       - name: Check out head
@@ -36,8 +36,7 @@ jobs:
         with:
           persist-credentials: false
 
-      - name: Echo is RC
-        run: echo "Is Release Candidate: ${{ env.IS_RC }}"
+      - run: echo "Is Release Candidate: ${{ env.IS_RC }}"
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master

From cedee36473ac06c4051de99f39415d138f1c0554 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:28:34 -0700
Subject: [PATCH 12/16] fix

---
 .github/workflows/ecr_sagemaker_publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 7ac0a1d..e0c4fed 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -36,7 +36,7 @@ jobs:
         with:
           persist-credentials: false
 
-      - run: echo "Is Release Candidate: ${{ env.IS_RC }}"
+      - run: echo "Is Release Candidate: $IS_RC"
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master

From 097a1f2e061818b01441abd177913a89bbae56a8 Mon Sep 17 00:00:00 2001
From: Alejandro Esquivel <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:30:25 -0700
Subject: [PATCH 13/16] Update ecr_sagemaker_publish.yml

---
 .github/workflows/ecr_sagemaker_publish.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index e0c4fed..1e4a0ec 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -36,7 +36,7 @@ jobs:
         with:
           persist-credentials: false
 
-      - run: echo "Is Release Candidate: $IS_RC"
+      - run: echo "Is Release Candidate:$IS_RC"
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
@@ -69,4 +69,4 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
           push: true
-          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:restrict2topic-rc
\ No newline at end of file
+          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:restrict2topic-rc

From 0d65eda8a3650e837a1872af0df77c713855908d Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:37:05 -0700
Subject: [PATCH 14/16] using different tags for rc and prod images

---
 .github/workflows/ecr_sagemaker_publish.yml | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 1e4a0ec..28f3dc5 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -25,10 +25,11 @@ jobs:
     name: Publish Sagemaker Image (Release Candidate)
     runs-on: ubuntu-latest
     env:
+      VALIDATOR_TAG_NAME: restrict2topic
       AWS_REGION: us-east-1
-      AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }}
       WORKING_DIR: "./"
-      IS_RC: ${{ github.event.inputs.is_release_candidate || 'true' }}
+      AWS_CI_ROLE__PROD: ${{ secrets.AWS_CI_ROLE__PROD }}
+      AWS_ECR_RELEASE_CANDIDATE: ${{ inputs.is_release_candidate || 'true' }}
     steps: 
     
       - name: Check out head
@@ -36,7 +37,17 @@ jobs:
         with:
           persist-credentials: false
 
-      - run: echo "Is Release Candidate:$IS_RC"
+      - run: |
+          if [ ${{ env.AWS_ECR_RELEASE_CANDIDATE }} == 'true' ]; then
+            echo "This is a release candidate."
+            echo "Setting tag to -rc"
+            ECR_TAG=$VALIDATOR_TAG_NAME-rc
+          else
+            echo "This is a production image."
+            ECR_TAG=$VALIDATOR_TAG_NAME
+          fi
+          echo "Setting ECR tag to $ECR_TAG"
+          echo "ECR_TAG=$ECR_TAG" >> "$GITHUB_OUTPUT"
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
@@ -69,4 +80,4 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
           push: true
-          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:restrict2topic-rc
+          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:${{ env.ECR_TAG }}

From 6569c3ca2c5c1032d23396f51dc579ece45be580 Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:41:09 -0700
Subject: [PATCH 15/16] fix tagging

---
 .github/workflows/ecr_sagemaker_publish.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 28f3dc5..36a8ae0 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -37,7 +37,9 @@ jobs:
         with:
           persist-credentials: false
 
-      - run: |
+      - name: Set ECR Tag
+        id: set-ecr-tag
+        run: |
           if [ ${{ env.AWS_ECR_RELEASE_CANDIDATE }} == 'true' ]; then
             echo "This is a release candidate."
             echo "Setting tag to -rc"
@@ -80,4 +82,4 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
           push: true
-          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:${{ env.ECR_TAG }}
+          tags: 064852979926.dkr.ecr.us-east-1.amazonaws.com/gr-sagemaker-validator-images-prod:${{ steps.set-ecr-tag.outputs.ECR_TAG }}

From 0bb9992d0554bf2a2e05a07015a20303726f8b3d Mon Sep 17 00:00:00 2001
From: Alejandro <ae@alejandro.ltd>
Date: Thu, 22 Aug 2024 16:47:36 -0700
Subject: [PATCH 16/16] cleanup

---
 .github/workflows/ecr_sagemaker_publish.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/ecr_sagemaker_publish.yml b/.github/workflows/ecr_sagemaker_publish.yml
index 36a8ae0..365013c 100644
--- a/.github/workflows/ecr_sagemaker_publish.yml
+++ b/.github/workflows/ecr_sagemaker_publish.yml
@@ -1,11 +1,9 @@
 name: Sagemaker ECR Publish (RC)
 
-# on manual trigger, with inputs: is_release_candidate
 on:
   push:
     branches:
       - main
-      - feat/sagemaker-serve
   workflow_dispatch:
     inputs:
       is_release_candidate:
@@ -13,8 +11,6 @@ on:
         required: true
         default: 'true'
 
-  
-
 # Needed for OIDC / assume role
 permissions:
   id-token: write