transformerlab · deep1401 · Mar 3, 2025 · Mar 3, 2025 · Mar 3, 2025 · Mar 3, 2025
diff --git a/...ali_.transformerlab_workspace_plugins_eleuther-ai-lm-evaluation-harness-mlx_output_36.txt b/...ali_.transformerlab_workspace_plugins_eleuther-ai-lm-evaluation-harness-mlx_output_36.txt
diff --git a/transformerlab/plugins/llama_trainer/main.py b/transformerlab/plugins/llama_trainer/main.py
@@ -149,6 +149,8 @@ def format_instruction(mapping):
 max_seq_length = int(config["maximum_sequence_length"])  # max sequence length for model and packing of the dataset
 print(max_seq_length)
 
+report_to = ["tensorboard"]
+
 if WANDB_LOGGING:
     WANDB_LOGGING, report_to = transformerlab.plugin.test_wandb_login()
     if not WANDB_LOGGING:

diff --git a/transformerlab/plugins/llama_trainer_multi_gpu/index.json b/transformerlab/plugins/llama_trainer_multi_gpu/index.json
@@ -0,0 +1,145 @@
+{
+    "name": "Llama SFT Trainer -- Huggingface TRL (Multi GPU Support)",
+    "uniqueId": "llama_trainer_multi_gpu",
+    "description": "A training script adapted from https://www.philschmid.de/instruction-tune-llama-2 for training Llama2 using PeFT",
+    "plugin-format": "python",
+    "type": "trainer",
+    "version": "0.1.0",
+    "model_architectures": [
+        "LlamaForCausalLM",
+        "Qwen2ForCausalLM"
+    ],
+    "git": "",
+    "url": "",
+    "files": [
+        "main.py",
+        "setup.sh"
+    ],
+    "setup-script": "setup.sh",
+    "parameters": {
+        "train_device": {
+            "title": "Training Device",
+            "type": "string",
+            "required": true,
+            "enum": [
+                "cuda",
+                "cpu",
+                "tpu"
+            ],
+            "default": "cuda"
+        },
+        "gpu_ids": {
+            "title": "GPU IDs to Train",
+            "type": "string",
+            "default": "auto"
+        },
+        "maximum_sequence_length": {
+            "title": "Maximum Sequence Length",
+            "type": "integer",
+            "default": 2048,
+            "minimum": 1,
+            "maximum": 4096
+        },
+        "batch_size": {
+            "title": "Batch Size",
+            "type": "integer",
+            "default": 4,
+            "minimum": 1,
+            "maximum": 64
+        },
+        "learning_rate_schedule": {
+            "title": "Learning Rate Schedule",
+            "type": "string",
+            "enum": [
+                "constant",
+                "linear",
+                "cosine",
+                "constant_with_warmup"
+            ],
+            "default": "constant"
+        },
+        "learning_rate": {
+            "title": "Learning Rate",
+            "type": "number",
+            "default": 5e-5,
+            "minimum": 1e-6,
+            "maximum": 1e+6
+        },
+        "num_train_epochs": {
+            "title": "Number of Training Epochs",
+            "type": "integer",
+            "default": 1,
+            "minimum": 1,
+            "maximum": 24
+        },
+        "max_steps": {
+            "title": "Max Steps (-1 means no limit)",
+            "type": "integer",
+            "default": -1
+        },
+        "lora_r": {
+            "title": "Lora R",
+            "type": "number",
+            "minimum": 4,
+            "maximum": 64,
+            "multipleOf": 4,
+            "default": 16
+        },
+        "lora_alpha": {
+            "title": "Lora Alpha",
+            "type": "number",
+            "minimum": 4,
+            "maximum": 128,
+            "multipleOf": 4,
+            "default": 32
+        },
+        "lora_dropout": {
+            "title": "Lora Dropout",
+            "type": "number",
+            "minimum": 0.05,
+            "maximum": 0.9,
+            "default": 0.05
+        },
+        "adaptor_name": {
+            "title": "Adaptor Name",
+            "type": "string",
+            "required": true
+        },
+        "log_to_wandb": {
+            "title": "Log to Weights and Biases",
+            "type": "boolean",
+            "default": true,
+            "required": true
+        }
+    },
+    "parameters_ui": {
+        "maximum_sequence_length": {
+            "ui:help": "Maximum sequence length for the model. Longer sequences will be truncated. Keep lower to save memory."
+        },
+        "train_device": {
+            "ui:help": "The device to train the model on. Use 'cuda' for Multi GPU Training, 'cpu' for CPU, and 'tpu' for TPU.",
+            "ui:widget": "AutoCompleteWidget",
+            "ui:options": {
+                "multiple": false
+            }
+
+        },
+        "gpu_ids": {
+            "ui:help": "Comma separated list of GPU IDs to use for training. Set to 'auto' for all GPUs. Example: 0,1,2,3 for 4 GPUs."
+        },
+        "batch_size": {
+            "ui:help": "The number of sequences processed simultaneously during training. Higher values lower number of iterations but require more memory."
+        },
+        "lora_r": {
+            "ui:widget": "range",
+            "ui:help": "Rank of the update matrices, expressed in int. Lower rank results in smaller update matrices with fewer trainable parameters."
+        },
+        "lora_alpha": {
+            "ui:widget": "range",
+            "ui:help": "LoRA scaling factor. Make it a multiple of LoRA R."
+        },
+        "log_to_wandb": {
+            "ui:help": "Log training to Weights and Biases. You must have a Weights and Biases account and API key to use this feature. You need to set the API Key in settings to use this feature."
+        }
+    }
+}