LLM: make finetuning examples more common for other models (#10078)

plusbang · web-flow · commit 7e49fbc5ddad · 2024-02-04T16:03:52.000+08:00
diff --git a/python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/LoRA/alpaca_lora_finetuning.py
@@ -39,7 +39,7 @@
 from datasets import load_dataset
 import accelerate
 
-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
     get_peft_model_state_dict,
     set_peft_model_state_dict,
@@ -161,6 +161,7 @@ def train(
             optimize_model=False,
             torch_dtype=torch.bfloat16,
             modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
         )
     else:
         model = AutoModelForCausalLM.from_pretrained(
@@ -169,13 +170,14 @@ def train(
             optimize_model=False,
             torch_dtype=torch.bfloat16,
             modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
         )
 
     print(f"Model loaded on rank {os.environ.get('LOCAL_RANK')}")
     model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
     print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
 
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
 
     tokenizer.pad_token_id = (
diff --git a/python/llm/example/GPU/LLM-Finetuning/LoRA/export_merged_model.py b/python/llm/example/GPU/LLM-Finetuning/LoRA/export_merged_model.py
@@ -16,7 +16,7 @@
 import os
 
 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse
 
 current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
     adapter_path = args.adapter_path
     output_path = args.output_path
     
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     merge_adapter(base_model, tokenizer, adapter_path, output_path)
     print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
diff --git a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/alpaca_qalora_finetuning.py
@@ -39,7 +39,7 @@
 from datasets import load_dataset
 import accelerate
 
-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
     get_peft_model_state_dict,
     set_peft_model_state_dict,
@@ -161,6 +161,7 @@ def train(
             optimize_model=False,
             torch_dtype=torch.bfloat16,
             modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
         )
     else:
         # Default 4-bit format for qa-lora is sym_int4
@@ -172,7 +173,8 @@ def train(
             bnb_4bit_compute_dtype=torch.bfloat16
         )
         model = AutoModelForCausalLM.from_pretrained(base_model,
-                                                     quantization_config=bnb_config, )
+                                                     quantization_config=bnb_config,
+                                                     trust_remote_code=True,)
         # below is also supported
         # Load the base model from a directory or the HF Hub to 4-bit format
         # model = AutoModelForCausalLM.from_pretrained(
@@ -187,7 +189,7 @@ def train(
     model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
     print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
 
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
 
     tokenizer.pad_token_id = (
diff --git a/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/export_merged_model.py b/python/llm/example/GPU/LLM-Finetuning/QA-LoRA/export_merged_model.py
@@ -16,7 +16,7 @@
 import os
 
 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse
 
 current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
     adapter_path = args.adapter_path
     output_path = args.output_path
     
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     merge_adapter(base_model, tokenizer, adapter_path, output_path)
     print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/alpaca_qlora_finetuning.py
@@ -39,7 +39,7 @@
 from datasets import load_dataset
 import accelerate
 
-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
     get_peft_model_state_dict,
     set_peft_model_state_dict,
@@ -161,6 +161,7 @@ def train(
             optimize_model=False,
             torch_dtype=torch.bfloat16,
             modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
         )
     else:
         # According to the QLoRA paper, using "nf4" could yield better model quality than "int4"
@@ -172,7 +173,8 @@ def train(
             bnb_4bit_compute_dtype=torch.bfloat16
         )
         model = AutoModelForCausalLM.from_pretrained(base_model,
-                                                     quantization_config=bnb_config, )
+                                                     quantization_config=bnb_config,
+                                                     trust_remote_code=True)
         # below is also supported
         # Load the base model from a directory or the HF Hub to 4-bit format
         # model = AutoModelForCausalLM.from_pretrained(
@@ -187,7 +189,7 @@ def train(
     model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
     print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
 
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
 
     tokenizer.pad_token_id = (
diff --git a/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/export_merged_model.py b/python/llm/example/GPU/LLM-Finetuning/QLoRA/alpaca-qlora/export_merged_model.py
@@ -16,7 +16,7 @@
 import os
 
 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse
 
 current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
     adapter_path = args.adapter_path
     output_path = args.output_path
     
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     merge_adapter(base_model, tokenizer, adapter_path, output_path)
     print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
diff --git a/python/llm/example/GPU/LLM-Finetuning/ReLora/alpaca_relora_finetuning.py b/python/llm/example/GPU/LLM-Finetuning/ReLora/alpaca_relora_finetuning.py
@@ -39,7 +39,7 @@
 from datasets import load_dataset
 import accelerate
 
-from transformers import LlamaTokenizer
+from transformers import AutoTokenizer
 from peft import (
     get_peft_model_state_dict,
     set_peft_model_state_dict,
@@ -174,6 +174,7 @@ def train(
             optimize_model=False,
             torch_dtype=torch.bfloat16,
             modules_to_not_convert=["lm_head"],
+            trust_remote_code=True,
         )
     else:
         # use bnb_config for qlora/qalora/relora, which use 4bit for base model
@@ -184,7 +185,8 @@ def train(
             bnb_4bit_compute_dtype=torch.bfloat16
         )
         model = AutoModelForCausalLM.from_pretrained(base_model,
-                                                     quantization_config=bnb_config, )
+                                                     quantization_config=bnb_config,
+                                                     trust_remote_code=True)
         # below is also supported
         # Load the base model from a directory or the HF Hub to 4-bit format
         # model = AutoModelForCausalLM.from_pretrained(
@@ -199,7 +201,7 @@ def train(
     model = model.to(f'xpu:{os.environ.get("LOCAL_RANK", 0)}')
     print(f"Model moved to rank {os.environ.get('LOCAL_RANK')}")
 
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     print(f"Tokenizer loaded on rank {os.environ.get('LOCAL_RANK')}")
 
     tokenizer.pad_token_id = (
diff --git a/python/llm/example/GPU/LLM-Finetuning/ReLora/export_merged_model.py b/python/llm/example/GPU/LLM-Finetuning/ReLora/export_merged_model.py
@@ -16,7 +16,7 @@
 import os
 
 import torch
-from transformers import LlamaTokenizer  # noqa: F402
+from transformers import AutoTokenizer
 import argparse
 
 current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -39,6 +39,6 @@
     adapter_path = args.adapter_path
     output_path = args.output_path
     
-    tokenizer = LlamaTokenizer.from_pretrained(base_model)
+    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     merge_adapter(base_model, tokenizer, adapter_path, output_path)
     print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')