Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add/multi gpu training #98

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions transformerlab/plugins/llama_trainer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def format_instruction(mapping):
max_seq_length = int(config["maximum_sequence_length"]) # max sequence length for model and packing of the dataset
print(max_seq_length)

report_to = ["tensorboard"]

if WANDB_LOGGING:
WANDB_LOGGING, report_to = transformerlab.plugin.test_wandb_login()
if not WANDB_LOGGING:
Expand Down
145 changes: 145 additions & 0 deletions transformerlab/plugins/llama_trainer_multi_gpu/index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
{
"name": "Llama SFT Trainer -- Huggingface TRL (Multi GPU Support)",
"uniqueId": "llama_trainer_multi_gpu",
"description": "A training script adapted from https://www.philschmid.de/instruction-tune-llama-2 for training Llama2 using PeFT",
"plugin-format": "python",
"type": "trainer",
"version": "0.1.0",
"model_architectures": [
"LlamaForCausalLM",
"Qwen2ForCausalLM"
],
"git": "",
"url": "",
"files": [
"main.py",
"setup.sh"
],
"setup-script": "setup.sh",
"parameters": {
"train_device": {
"title": "Training Device",
"type": "string",
"required": true,
"enum": [
"cuda",
"cpu",
"tpu"
],
"default": "cuda"
},
"gpu_ids": {
"title": "GPU IDs to Train",
"type": "string",
"default": "auto"
},
"maximum_sequence_length": {
"title": "Maximum Sequence Length",
"type": "integer",
"default": 2048,
"minimum": 1,
"maximum": 4096
},
"batch_size": {
"title": "Batch Size",
"type": "integer",
"default": 4,
"minimum": 1,
"maximum": 64
},
"learning_rate_schedule": {
"title": "Learning Rate Schedule",
"type": "string",
"enum": [
"constant",
"linear",
"cosine",
"constant_with_warmup"
],
"default": "constant"
},
"learning_rate": {
"title": "Learning Rate",
"type": "number",
"default": 5e-5,
"minimum": 1e-6,
"maximum": 1e+6
},
"num_train_epochs": {
"title": "Number of Training Epochs",
"type": "integer",
"default": 1,
"minimum": 1,
"maximum": 24
},
"max_steps": {
"title": "Max Steps (-1 means no limit)",
"type": "integer",
"default": -1
},
"lora_r": {
"title": "Lora R",
"type": "number",
"minimum": 4,
"maximum": 64,
"multipleOf": 4,
"default": 16
},
"lora_alpha": {
"title": "Lora Alpha",
"type": "number",
"minimum": 4,
"maximum": 128,
"multipleOf": 4,
"default": 32
},
"lora_dropout": {
"title": "Lora Dropout",
"type": "number",
"minimum": 0.05,
"maximum": 0.9,
"default": 0.05
},
"adaptor_name": {
"title": "Adaptor Name",
"type": "string",
"required": true
},
"log_to_wandb": {
"title": "Log to Weights and Biases",
"type": "boolean",
"default": true,
"required": true
}
},
"parameters_ui": {
"maximum_sequence_length": {
"ui:help": "Maximum sequence length for the model. Longer sequences will be truncated. Keep lower to save memory."
},
"train_device": {
"ui:help": "The device to train the model on. Use 'cuda' for Multi GPU Training, 'cpu' for CPU, and 'tpu' for TPU.",
"ui:widget": "AutoCompleteWidget",
"ui:options": {
"multiple": false
}

},
"gpu_ids": {
"ui:help": "Comma separated list of GPU IDs to use for training. Set to 'auto' for all GPUs. Example: 0,1,2,3 for 4 GPUs."
},
"batch_size": {
"ui:help": "The number of sequences processed simultaneously during training. Higher values lower number of iterations but require more memory."
},
"lora_r": {
"ui:widget": "range",
"ui:help": "Rank of the update matrices, expressed in int. Lower rank results in smaller update matrices with fewer trainable parameters."
},
"lora_alpha": {
"ui:widget": "range",
"ui:help": "LoRA scaling factor. Make it a multiple of LoRA R."
},
"log_to_wandb": {
"ui:help": "Log training to Weights and Biases. You must have a Weights and Biases account and API key to use this feature. You need to set the API Key in settings to use this feature."
}
}
}
Loading