Skip to content

Commit 7e49fbc

Browse files
authored
LLM: make finetuning examples more common for other models (#10078)
1 parent 90f004b commit 7e49fbc

8 files changed

+27
-19
lines changed

python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from datasets import load_dataset
4040
import accelerate
4141

42-
from transformers import LlamaTokenizer
42+
from transformers import AutoTokenizer
4343
from peft import (
4444
get_peft_model_state_dict,
4545
set_peft_model_state_dict,
@@ -161,6 +161,7 @@ def train(
161161
optimize_model=False,
162162
torch_dtype=torch.bfloat16,
163163
modules_to_not_convert=["lm_head"],
164+
trust_remote_code=True,
164165
)
165166
else:
166167
model = AutoModelForCausalLM.from_pretrained(
@@ -169,13 +170,14 @@ def train(
169170
optimize_model=False,
170171
torch_dtype=torch.bfloat16,
171172
modules_to_not_convert=["lm_head"],
173+
trust_remote_code=True,
172174
)
173175

174176
print(f"Model loaded on rank {os.environ.get('LOCAL_RANK')}")
175177
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
176178
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
177179

178-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
180+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
179181
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
180182

181183
tokenizer.pad_token_id = (

python/llm/example/GPU/LLM-Finetuning/LoRA/export_merged_model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import os
1717

1818
import torch
19-
from transformers import LlamaTokenizer # noqa: F402
19+
from transformers import AutoTokenizer
2020
import argparse
2121

2222
current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
3939
adapter_path = args.adapter_path
4040
output_path = args.output_path
4141

42-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
42+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
4343
merge_adapter(base_model, tokenizer, adapter_path, output_path)
4444
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from datasets import load_dataset
4040
import accelerate
4141

42-
from transformers import LlamaTokenizer
42+
from transformers import AutoTokenizer
4343
from peft import (
4444
get_peft_model_state_dict,
4545
set_peft_model_state_dict,
@@ -161,6 +161,7 @@ def train(
161161
optimize_model=False,
162162
torch_dtype=torch.bfloat16,
163163
modules_to_not_convert=["lm_head"],
164+
trust_remote_code=True,
164165
)
165166
else:
166167
# Default 4-bit format for qa-lora is sym_int4
@@ -172,7 +173,8 @@ def train(
172173
bnb_4bit_compute_dtype=torch.bfloat16
173174
)
174175
model = AutoModelForCausalLM.from_pretrained(base_model,
175-
quantization_config=bnb_config, )
176+
quantization_config=bnb_config,
177+
trust_remote_code=True,)
176178
# below is also supported
177179
# Load the base model from a directory or the HF Hub to 4-bit format
178180
# model = AutoModelForCausalLM.from_pretrained(
@@ -187,7 +189,7 @@ def train(
187189
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
188190
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
189191

190-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
192+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
191193
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
192194

193195
tokenizer.pad_token_id = (

python/llm/example/GPU/LLM-Finetuning/QA-LoRA/export_merged_model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import os
1717

1818
import torch
19-
from transformers import LlamaTokenizer # noqa: F402
19+
from transformers import AutoTokenizer
2020
import argparse
2121

2222
current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
3939
adapter_path = args.adapter_path
4040
output_path = args.output_path
4141

42-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
42+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
4343
merge_adapter(base_model, tokenizer, adapter_path, output_path)
4444
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from datasets import load_dataset
4040
import accelerate
4141

42-
from transformers import LlamaTokenizer
42+
from transformers import AutoTokenizer
4343
from peft import (
4444
get_peft_model_state_dict,
4545
set_peft_model_state_dict,
@@ -161,6 +161,7 @@ def train(
161161
optimize_model=False,
162162
torch_dtype=torch.bfloat16,
163163
modules_to_not_convert=["lm_head"],
164+
trust_remote_code=True,
164165
)
165166
else:
166167
# According to the QLoRA paper, using "nf4" could yield better model quality than "int4"
@@ -172,7 +173,8 @@ def train(
172173
bnb_4bit_compute_dtype=torch.bfloat16
173174
)
174175
model = AutoModelForCausalLM.from_pretrained(base_model,
175-
quantization_config=bnb_config, )
176+
quantization_config=bnb_config,
177+
trust_remote_code=True)
176178
# below is also supported
177179
# Load the base model from a directory or the HF Hub to 4-bit format
178180
# model = AutoModelForCausalLM.from_pretrained(
@@ -187,7 +189,7 @@ def train(
187189
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
188190
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
189191

190-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
192+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
191193
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
192194

193195
tokenizer.pad_token_id = (

python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/export_merged_model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import os
1717

1818
import torch
19-
from transformers import LlamaTokenizer # noqa: F402
19+
from transformers import AutoTokenizer
2020
import argparse
2121

2222
current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
3939
adapter_path = args.adapter_path
4040
output_path = args.output_path
4141

42-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
42+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
4343
merge_adapter(base_model, tokenizer, adapter_path, output_path)
4444
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

python/llm/example/GPU/LLM-Finetuning/ReLora/alpaca_relora_finetuning.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from datasets import load_dataset
4040
import accelerate
4141

42-
from transformers import LlamaTokenizer
42+
from transformers import AutoTokenizer
4343
from peft import (
4444
get_peft_model_state_dict,
4545
set_peft_model_state_dict,
@@ -174,6 +174,7 @@ def train(
174174
optimize_model=False,
175175
torch_dtype=torch.bfloat16,
176176
modules_to_not_convert=["lm_head"],
177+
trust_remote_code=True,
177178
)
178179
else:
179180
# use bnb_config for qlora/qalora/relora, which use 4bit for base model
@@ -184,7 +185,8 @@ def train(
184185
bnb_4bit_compute_dtype=torch.bfloat16
185186
)
186187
model = AutoModelForCausalLM.from_pretrained(base_model,
187-
quantization_config=bnb_config, )
188+
quantization_config=bnb_config,
189+
trust_remote_code=True)
188190
# below is also supported
189191
# Load the base model from a directory or the HF Hub to 4-bit format
190192
# model = AutoModelForCausalLM.from_pretrained(
@@ -199,7 +201,7 @@ def train(
199201
model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
200202
print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
201203

202-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
204+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
203205
print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
204206

205207
tokenizer.pad_token_id = (

python/llm/example/GPU/LLM-Finetuning/ReLora/export_merged_model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import os
1717

1818
import torch
19-
from transformers import LlamaTokenizer # noqa: F402
19+
from transformers import AutoTokenizer
2020
import argparse
2121

2222
current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
3939
adapter_path = args.adapter_path
4040
output_path = args.output_path
4141

42-
tokenizer = LlamaTokenizer.from_pretrained(base_model)
42+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
4343
merge_adapter(base_model, tokenizer, adapter_path, output_path)
4444
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

0 commit comments

Comments
 (0)