Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve build reliability #725

Merged
merged 5 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ on:
RAPIDS_VER:
required: true
type: string
BASE_TAG:
required: true
type: string
NOTEBOOKS_TAG:
required: true
type: string
CUVS_BENCH_TAG:
required: true
type: string
Expand Down Expand Up @@ -88,38 +82,6 @@ jobs:
with:
driver: docker
endpoint: builders
- name: Build base image
uses: docker/build-push-action@v6
with:
context: context
file: Dockerfile
target: base
push: true
pull: true
build-args: |
CUDA_VER=${{ inputs.CUDA_VER }}
LINUX_DISTRO=${{ inputs.LINUX_DISTRO }}
LINUX_DISTRO_VER=${{ inputs.LINUX_DISTRO_VER }}
LINUX_VER=${{ inputs.LINUX_VER }}
PYTHON_VER=${{ inputs.PYTHON_VER }}
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.BASE_TAG }}-${{ matrix.ARCH }}
- name: Build notebooks image
uses: docker/build-push-action@v6
with:
context: context
file: Dockerfile
target: notebooks
push: true
pull: true
build-args: |
CUDA_VER=${{ inputs.CUDA_VER }}
LINUX_DISTRO=${{ inputs.LINUX_DISTRO }}
LINUX_DISTRO_VER=${{ inputs.LINUX_DISTRO_VER }}
LINUX_VER=${{ inputs.LINUX_VER }}
PYTHON_VER=${{ inputs.PYTHON_VER }}
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.NOTEBOOKS_TAG }}-${{ matrix.ARCH }}
- name: Build cuVS Benchmarks GPU image
uses: docker/build-push-action@v6
with:
Expand All @@ -134,20 +96,20 @@ jobs:
PYTHON_VER=${{ inputs.PYTHON_VER }}
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.CUVS_BENCH_TAG }}-${{ matrix.ARCH }}
- name: Build cuVS Benchmarks GPU with datasets image
uses: docker/build-push-action@v6
with:
context: context
file: cuvs-bench/gpu/Dockerfile
target: cuvs-bench-datasets
push: true
pull: true
build-args: |
CUDA_VER=${{ inputs.CUDA_VER }}
LINUX_VER=${{ inputs.LINUX_VER }}
PYTHON_VER=${{ inputs.PYTHON_VER }}
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.CUVS_BENCH_DATASETS_TAG }}-${{ matrix.ARCH }}
# - name: Build cuVS Benchmarks GPU with datasets image
# uses: docker/build-push-action@v6
# with:
# context: context
# file: cuvs-bench/gpu/Dockerfile
# target: cuvs-bench-datasets
# push: true
# pull: true
# build-args: |
# CUDA_VER=${{ inputs.CUDA_VER }}
# LINUX_VER=${{ inputs.LINUX_VER }}
# PYTHON_VER=${{ inputs.PYTHON_VER }}
# RAPIDS_VER=${{ inputs.RAPIDS_VER }}
# tags: ${{ inputs.CUVS_BENCH_DATASETS_TAG }}-${{ matrix.ARCH }}
- name: Build cuVS Benchmarks CPU image
if: inputs.BUILD_CUVS_BENCH_CPU_IMAGE
uses: docker/build-push-action@v6
Expand Down
110 changes: 110 additions & 0 deletions .github/workflows/build-rapids-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: Build and push image variant

on:
workflow_call:
inputs:
ARCHES:
required: true
type: string
CUDA_VER:
required: true
type: string
LINUX_DISTRO:
required: true
type: string
LINUX_DISTRO_VER:
required: true
type: string
LINUX_VER:
required: true
type: string
PYTHON_VER:
required: true
type: string
RAPIDS_VER:
required: true
type: string
BASE_TAG:
required: true
type: string
NOTEBOOKS_TAG:
required: true
type: string

jobs:
build:
strategy:
matrix:
ARCH: ${{ fromJSON(inputs.ARCHES) }}
CUDA_VER: ["${{ inputs.CUDA_VER }}"]
LINUX_VER: ["${{ inputs.LINUX_VER }}"]
PYTHON_VER: ["${{ inputs.PYTHON_VER }}"]
RAPIDS_VER: ["${{ inputs.RAPIDS_VER }}"]
fail-fast: false
runs-on: "linux-${{ matrix.ARCH }}-cpu4"
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install gha-tools
run: |
mkdir -p /tmp/gha-tools
curl -s -L 'https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz' | tar -xz -C /tmp/gha-tools
echo "/tmp/gha-tools" >> "${GITHUB_PATH}"
- name: Clean up condarc for release builds
run: |
GIT_DESCRIBE_TAG="$(git describe --tags --abbrev=0)"
GIT_DESCRIBE_TAG="${GIT_DESCRIBE_TAG:1}" # remove leading 'v'
if [[ ! $GIT_DESCRIBE_TAG =~ [a-z] ]]; then
rapids-logger "Most recent tag is for release, adding `rapidsai` channel and removing `rapidsai-nightly` and `dask/label/dev` channels."
sed -i 's|rapidsai-nightly|rapidsai|;\|dask/label/dev|d' context/condarc
else
rapids-logger "Most recent tag is an alpha. Build will use nightly channels."
fi
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
- name: Set up Docker Context for Buildx
id: buildx-context
run: |
docker context create builders
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver: docker
endpoint: builders
- name: Build base image
uses: docker/build-push-action@v6
with:
context: context
file: Dockerfile
target: base
push: true
pull: true
build-args: |
CUDA_VER=${{ inputs.CUDA_VER }}
LINUX_DISTRO=${{ inputs.LINUX_DISTRO }}
LINUX_DISTRO_VER=${{ inputs.LINUX_DISTRO_VER }}
LINUX_VER=${{ inputs.LINUX_VER }}
PYTHON_VER=${{ inputs.PYTHON_VER }}
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.BASE_TAG }}-${{ matrix.ARCH }}
- name: Build notebooks image
uses: docker/build-push-action@v6
with:
context: context
file: Dockerfile
target: notebooks
push: true
pull: true
build-args: |
CUDA_VER=${{ inputs.CUDA_VER }}
LINUX_DISTRO=${{ inputs.LINUX_DISTRO }}
LINUX_DISTRO_VER=${{ inputs.LINUX_DISTRO_VER }}
LINUX_VER=${{ inputs.LINUX_VER }}
PYTHON_VER=${{ inputs.PYTHON_VER }}
RAPIDS_VER=${{ inputs.RAPIDS_VER }}
tags: ${{ inputs.NOTEBOOKS_TAG }}-${{ matrix.ARCH }}
64 changes: 54 additions & 10 deletions .github/workflows/build-test-publish-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ jobs:
needs:
- checks
- compute-matrix
- build
- build-multiarch-manifest
- build-rapids
- build-rapids-multiarch-manifest
- build-cuvs
- build-cuvs-multiarch-manifest
- test
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down Expand Up @@ -137,13 +139,13 @@ jobs:
export TEST_MATRIX

echo "TEST_MATRIX=$(yq -n -o json 'env(TEST_MATRIX)' | jq -c '{include: .}')" | tee --append "${GITHUB_OUTPUT}"
build:
build-rapids:
needs: [checks, compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
secrets: inherit
uses: ./.github/workflows/build-image.yml
uses: ./.github/workflows/build-rapids-image.yml
with:
ARCHES: ${{ toJSON(matrix.ARCHES) }}
CUDA_VER: ${{ matrix.CUDA_VER }}
Expand All @@ -152,7 +154,6 @@ jobs:
LINUX_VER: ${{ matrix.LINUX_VER }}
PYTHON_VER: ${{ matrix.PYTHON_VER }}
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
BUILD_CUVS_BENCH_CPU_IMAGE: ${{ matrix.BUILD_CUVS_BENCH_CPU_IMAGE }}
BASE_TAG:
"rapidsai/${{ needs.compute-matrix.outputs.BASE_IMAGE_REPO }}:\
${{ needs.compute-matrix.outputs.BASE_TAG_PREFIX }}\
Expand All @@ -167,6 +168,22 @@ jobs:
${{ needs.compute-matrix.outputs.ALPHA_TAG }}-\
cuda${{ matrix.CUDA_TAG }}-\
py${{ matrix.PYTHON_VER }}"
build-cuvs:
needs: [checks, compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
secrets: inherit
uses: ./.github/workflows/build-cuvs-image.yml
with:
ARCHES: ${{ toJSON(matrix.ARCHES) }}
CUDA_VER: ${{ matrix.CUDA_VER }}
LINUX_DISTRO: ${{ matrix.LINUX_DISTRO }}
LINUX_DISTRO_VER: ${{ matrix.LINUX_DISTRO_VER }}
LINUX_VER: ${{ matrix.LINUX_VER }}
PYTHON_VER: ${{ matrix.PYTHON_VER }}
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
BUILD_CUVS_BENCH_CPU_IMAGE: ${{ matrix.BUILD_CUVS_BENCH_CPU_IMAGE }}
CUVS_BENCH_TAG:
"rapidsai/${{ needs.compute-matrix.outputs.CUVS_BENCH_IMAGE_REPO }}:\
${{ needs.compute-matrix.outputs.CUVS_BENCH_TAG_PREFIX }}\
Expand All @@ -187,8 +204,8 @@ jobs:
${{ needs.compute-matrix.outputs.RAPIDS_VER }}\
${{ needs.compute-matrix.outputs.ALPHA_TAG }}-\
py${{ matrix.PYTHON_VER }}"
build-multiarch-manifest:
needs: [build, compute-matrix]
build-rapids-multiarch-manifest:
needs: [build-rapids, compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
Expand All @@ -206,7 +223,6 @@ jobs:
- name: Create multiarch manifest
shell: bash
env:
CUVS_BENCH_CPU_IMAGE_BUILT: ${{ matrix.BUILD_CUVS_BENCH_CPU_IMAGE }}
BASE_IMAGE_REPO: ${{ needs.compute-matrix.outputs.BASE_IMAGE_REPO }}
BASE_TAG_PREFIX: ${{ needs.compute-matrix.outputs.BASE_TAG_PREFIX }}
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
Expand All @@ -215,6 +231,34 @@ jobs:
PYTHON_VER: ${{ matrix.PYTHON_VER }}
NOTEBOOKS_IMAGE_REPO: ${{ needs.compute-matrix.outputs.NOTEBOOKS_IMAGE_REPO }}
NOTEBOOKS_TAG_PREFIX: ${{ needs.compute-matrix.outputs.NOTEBOOKS_TAG_PREFIX }}
GPUCIBOT_DOCKERHUB_USER: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
GPUCIBOT_DOCKERHUB_TOKEN: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
ARCHES: ${{ toJSON(matrix.ARCHES) }}
run: ci/create-rapids-multiarch-manifest.sh
build-cuvs-multiarch-manifest:
needs: [build-cuvs, compute-matrix]
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
- name: Create multiarch manifest
shell: bash
env:
RAPIDS_VER: ${{ needs.compute-matrix.outputs.RAPIDS_VER }}
ALPHA_TAG: ${{ needs.compute-matrix.outputs.ALPHA_TAG }}
CUDA_TAG: ${{ matrix.CUDA_TAG }}
PYTHON_VER: ${{ matrix.PYTHON_VER }}
CUVS_BENCH_CPU_IMAGE_BUILT: ${{ matrix.BUILD_CUVS_BENCH_CPU_IMAGE }}
CUVS_BENCH_IMAGE_REPO: ${{ needs.compute-matrix.outputs.CUVS_BENCH_IMAGE_REPO }}
CUVS_BENCH_TAG_PREFIX: ${{ needs.compute-matrix.outputs.CUVS_BENCH_TAG_PREFIX }}
CUVS_BENCH_DATASETS_IMAGE_REPO: ${{ needs.compute-matrix.outputs.CUVS_BENCH_DATASETS_IMAGE_REPO }}
Expand All @@ -224,9 +268,9 @@ jobs:
GPUCIBOT_DOCKERHUB_USER: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
GPUCIBOT_DOCKERHUB_TOKEN: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
ARCHES: ${{ toJSON(matrix.ARCHES) }}
run: ci/create-multiarch-manifest.sh
run: ci/create-cuvs-multiarch-manifest.sh
test:
needs: [compute-matrix, build]
needs: [compute-matrix, build-rapids]
if: inputs.run_tests
strategy:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.TEST_MATRIX) }}
Expand Down
13 changes: 10 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ ARG RAPIDS_VER

SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

RUN <<EOF
apt-get update
apt-get install -y wget
wget https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin
apt-get purge -y --auto-remove wget
rm -rf /var/lib/apt/lists/*
EOF
RUN useradd -rm -d /home/rapids -s /bin/bash -g conda -u 1001 rapids

USER rapids
Expand All @@ -57,7 +64,7 @@ conda config --show-sources
conda list --show-channel-urls

# Install RAPIDS
mamba install -y -n base \
rapids-mamba-retry install -y -n base \
"rapids=${RAPIDS_VER}.*" \
"python=${PYTHON_VER}.*" \
"cuda-version=${CUDA_VER%.*}.*" \
Expand Down Expand Up @@ -90,12 +97,12 @@ COPY --from=dependencies --chown=rapids /test_notebooks_dependencies.yaml test_n
COPY --from=dependencies --chown=rapids /notebooks /home/rapids/notebooks

RUN <<EOF
mamba env update -n base -f test_notebooks_dependencies.yaml
rapids-mamba-retry env update -n base -f test_notebooks_dependencies.yaml
conda clean -afy
EOF

RUN <<EOF
mamba install -y -n base \
rapids-mamba-retry install -y -n base \
"jupyterlab=4" \
dask-labextension \
jupyterlab-nvdashboard
Expand Down
29 changes: 29 additions & 0 deletions ci/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

set -eEuo pipefail

# Authenticate and retrieve DockerHub token
HUB_TOKEN=$(
curl -s -H "Content-Type: application/json" \
-X POST \
-d "{\"username\": \"$GPUCIBOT_DOCKERHUB_USER\", \"password\": \"$GPUCIBOT_DOCKERHUB_TOKEN\"}" \
https://hub.docker.com/v2/users/login/ | jq -r .token \
)
echo "::add-mask::${HUB_TOKEN}"
export HUB_TOKEN

# Function to check if a Docker tag exists
check_tag_exists() {
local repo="$1"
local tag="$2"
local exists
exists=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: JWT $HUB_TOKEN" \
"https://hub.docker.com/v2/repositories/${org}/${repo}/tags/${tag}/")

if [ "$exists" -ne 200 ]; then
echo "Error: Required image tag ${repo}:${tag} does not exist. This implies that the image was not built successfully in the build job."
exit 1
fi
}

export org="rapidsai"
Loading
Loading