From fb4aa659a2dd354cbaf60c97ee4ec235c447646c Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Mon, 17 Feb 2025 10:52:24 +0200 Subject: [PATCH] Fixed issue with granite-vision QLORA and changes to QLORA training by default --- .../en/fine_tuning_granite_vision_sft_trl.ipynb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/notebooks/en/fine_tuning_granite_vision_sft_trl.ipynb b/notebooks/en/fine_tuning_granite_vision_sft_trl.ipynb index 1bfbfbf7..35425703 100644 --- a/notebooks/en/fine_tuning_granite_vision_sft_trl.ipynb +++ b/notebooks/en/fine_tuning_granite_vision_sft_trl.ipynb @@ -78,7 +78,7 @@ "!pip install -q flash-attn --no-build-isolation\n", "\n", "try:\n", - " from flash_attn.flash_attention import FlashAttention\n", + " import flash_attn\n", " print(\"FlashAttention is installed\")\n", " USE_FLASH_ATTENTION = True\n", "except ImportError:\n", @@ -639,8 +639,8 @@ "source": [ "from transformers import BitsAndBytesConfig\n", "\n", - "USE_QLORA = False\n", - "USE_LORA = False\n", + "USE_QLORA = True\n", + "USE_LORA = True\n", "\n", "if USE_QLORA:\n", " # BitsAndBytesConfig int-4 config\n", @@ -648,7 +648,9 @@ " load_in_4bit=True,\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", - " bnb_4bit_compute_dtype=torch.bfloat16\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " llm_int8_skip_modules=[\"vision_tower\", \"lm_head\"], # Skip problematic modules\n", + " llm_int8_enable_fp32_cpu_offload=True\n", " )\n", "else:\n", " bnb_config = None\n", @@ -693,7 +695,6 @@ " r=8,\n", " lora_alpha=8,\n", " lora_dropout=0.1,\n", - " # target_modules=['down_proj','o_proj','k_proj','q_proj','gate_proj','up_proj','v_proj'],\n", " target_modules=[name for name, _ in model.named_modules() if 'language_model' in name and '_proj' in name],\n", " use_dora=True,\n", " init_lora_weights=\"gaussian\"\n", @@ -1052,7 +1053,8 @@ "outputs": [], "source": [ "if USE_LORA:\n", - " model = model.merge_and_unload().to(torch.bfloat16)" + " from peft import PeftModel\n", + " model = PeftModel.from_pretrained(model, training_args.output_dir)" ] }, {