diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index b4b85dfd8a..b055ae4ab5 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -83,12 +83,16 @@ jobs: elif [[ "${{ inputs.compiler_family }}" == "cuda" ]] then echo "cuda=cuda${{ inputs.compiler_version }}" >> "$GITHUB_OUTPUT" - echo 'test_runner=["self-hosted", "GPU"]' >> "$GITHUB_OUTPUT" + echo 'test_runner=["self-hosted", "jetstream2", "GPU"]' >> "$GITHUB_OUTPUT" echo 'test_docker_options=--gpus=all' >> "$GITHUB_OUTPUT" case "${{ inputs.compiler_version }}" in 125) echo "image=nvidia/cuda:12.5.0-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; + 124) + echo "image=nvidia/cuda:12.4.1-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; + 122) + echo "image=nvidia/cuda:12.2.2-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; *) echo "Unknown compiler" && exit 1;; esac @@ -250,6 +254,10 @@ jobs: # if: ${{ contains(inputs.config, 'cuda') }} # run: uv pip install --no-deps fastrlock==0.8.2 cupy-cuda12x==12.3.0 + - name: Run nvidia-smi + if: ${{ contains(inputs.config, 'cuda') }} + run: nvidia-smi + ### Python unit tests - name: Run pytest (serial) run: python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 @@ -275,15 +283,15 @@ jobs: # that the GPU kernel is called for at least a few timesteps for these checks to be effective. - name: Run memcheck if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' && !contains(inputs.config, 'mpi')}} - run: compute-sanitizer --tool memcheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + run: compute-sanitizer --tool memcheck --launch-timeout=120 --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 - name: Run racecheck if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' && !contains(inputs.config, 'mpi')}} - run: compute-sanitizer --tool racecheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + run: compute-sanitizer --tool racecheck --launch-timeout=120 --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 - name: Run synccheck if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' && !contains(inputs.config, 'mpi')}} - run: compute-sanitizer --tool synccheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + run: compute-sanitizer --tool synccheck --launch-timeout=120 --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 ### Validation tests - name: Run pytest -m validate (serial) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4b475de6cb..5113a08a52 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -17,7 +17,7 @@ on: jobs: start_action_runners: name: Start - uses: glotzerlab/jetstream2-admin/.github/workflows/start.yaml@98f840ba341f72bf412100f2180d38c06e792b84 # v1.3.1 + uses: glotzerlab/jetstream2-admin/.github/workflows/start.yaml@9ef0cf7995c9209bfa77d45baa124f15df669393 # v1.4.0 secrets: inherit typical: @@ -53,13 +53,13 @@ jobs: - config: [gcc, 14, -py, 313, -nomd, -nohpmc] - config: [gcc, 10, -py, 39, -mpi] - - config: [cuda, 125, -py, 313, -mpi] + - config: [cuda, 124, -py, 313, -mpi] validate: true - - config: [cuda, 125, -py, 313] + - config: [cuda, 124, -py, 313] validate: true - - config: [cuda, 125, -py, 313, -mpi, -debug] + - config: [cuda, 124, -py, 313, -mpi, -debug] release: