-
Notifications
You must be signed in to change notification settings - Fork 7
/
run_infer_example.sh
39 lines (37 loc) · 1.13 KB
/
run_infer_example.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
PATH_TO_MODEL="XXXX"
PATH_TO_INFER_DATA="YYYY"
NUM_LAYERS=12
MAX_LAYERS=96
# budget == max(#batch_size * #seq_length)
BUDGET=10000
ARGS="--model-name $PATH_TO_MODEL \
--model-type opt \
--seed 42 \
--fp16 \
--num-layers $NUM_LAYERS \
--max-layers $MAX_LAYERS \
--budget $BUDGET \
--num-iters 10000000000 \
--dist-url tcp://127.0.0.1:9031 \
--token-micro-batch-size 2 \
--world-size 8 --pipeline-group-size 8 --data-group-size 1 \
--pp-mode pipe_sync_sample_mask_token_pipe \
--infer-data $PATH_TO_INFER_DATA"
(trap 'kill 0' SIGINT; \
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 0 --rank 0 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 1 --rank 1 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 2 --rank 2 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 3 --rank 3 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 4 --rank 4 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 5 --rank 5 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 6 --rank 6 \
&
python dist_inference_runner.py $(echo ${ARGS}) --cuda-id 7 --rank 7 \
& \
wait)