forked from pytorch/rl
-
Notifications
You must be signed in to change notification settings - Fork 0
52 lines (47 loc) · 1.72 KB
/
test-linux-rlhf.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
name: RLHF Tests on Linux
on:
pull_request:
push:
branches:
- nightly
- main
- release/*
workflow_dispatch:
concurrency:
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
cancel-in-progress: true
jobs:
unittests:
strategy:
matrix:
python_version: ["3.9"]
cuda_arch_version: ["12.1"]
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
repository: pytorch/rl
runner: "linux.g5.4xlarge.nvidia.gpu"
# gpu-arch-type: cuda
# gpu-arch-version: "11.7"
docker-image: "nvidia/cudagl:11.4.0-base"
timeout: 120
script: |
if [[ "${{ github.ref }}" =~ release/* ]]; then
export RELEASE=1
export TORCH_VERSION=stable
else
export RELEASE=0
export TORCH_VERSION=nightly
fi
set -euo pipefail
export PYTHON_VERSION="3.9"
export CU_VERSION="cu117"
export TAR_OPTIONS="--no-same-owner"
export UPLOAD_CHANNEL="nightly"
export TF_CPP_MIN_LOG_LEVEL=0
export TD_GET_DEFAULTS_TO_NONE=1
bash .github/unittest/linux_libs/scripts_rlhf/setup_env.sh
bash .github/unittest/linux_libs/scripts_rlhf/install.sh
bash .github/unittest/linux_libs/scripts_rlhf/run_test.sh
bash .github/unittest/linux_libs/scripts_rlhf/post_process.sh