39
39
from datasets import load_dataset
40
40
import accelerate
41
41
42
- from transformers import LlamaTokenizer
42
+ from transformers import AutoTokenizer
43
43
from peft import (
44
44
get_peft_model_state_dict ,
45
45
set_peft_model_state_dict ,
@@ -161,6 +161,7 @@ def train(
161
161
optimize_model = False ,
162
162
torch_dtype = torch .bfloat16 ,
163
163
modules_to_not_convert = ["lm_head" ],
164
+ trust_remote_code = True ,
164
165
)
165
166
else :
166
167
# According to the QLoRA paper, using "nf4" could yield better model quality than "int4"
@@ -172,7 +173,8 @@ def train(
172
173
bnb_4bit_compute_dtype = torch .bfloat16
173
174
)
174
175
model = AutoModelForCausalLM .from_pretrained (base_model ,
175
- quantization_config = bnb_config , )
176
+ quantization_config = bnb_config ,
177
+ trust_remote_code = True )
176
178
# below is also supported
177
179
# Load the base model from a directory or the HF Hub to 4-bit format
178
180
# model = AutoModelForCausalLM.from_pretrained(
@@ -187,7 +189,7 @@ def train(
187
189
model = model .to (f'xpu:{ os .environ .get ("LOCAL_RANK" , 0 )} ' )
188
190
print (f"Model moved to rank { os .environ .get ('LOCAL_RANK' )} " )
189
191
190
- tokenizer = LlamaTokenizer .from_pretrained (base_model )
192
+ tokenizer = AutoTokenizer .from_pretrained (base_model , trust_remote_code = True )
191
193
print (f"Tokenizer loaded on rank { os .environ .get ('LOCAL_RANK' )} " )
192
194
193
195
tokenizer .pad_token_id = (
0 commit comments