PEFT (الضبط الدقيق الفعال في المعاملات) هي مكتبة HuggingFace تتيح تكييف النماذج الكبيرة المُدربة مسبقاً عن طريق تدريب عدد صغير فقط من المعاملات الإضافية، مما يقلل بشكل كبير من استخدام الذاكرة ووقت التدريب مع الحفاظ على الأداء.
التثبيت
# Install from PyPI
pip install peft
# Install with quantization support
pip install peft bitsandbytes
# Install from source
pip install git+https://github.com/huggingface/peft.git
# Verify
python -c "import peft; print(peft.__version__)"
LoRA (Low-Rank Adaptation)
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
# Configure LoRA
lora_config = LoraConfig(
r=16, # Rank
lora_alpha=32, # Scaling factor
lora_dropout=0.05, # Dropout for LoRA layers
target_modules=[ # Modules to apply LoRA
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
],
task_type=TaskType.CAUSAL_LM,
bias="none", # none, all, or lora_only
)
# Wrap model with LoRA adapters
model = get_peft_model(model, lora_config)
# Print trainable parameter count
model.print_trainable_parameters()
# Output: trainable params: 13M || all params: 8B || trainable%: 0.16%
QLoRA (Quantized LoRA)
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
# Load model in 4-bit
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-8B",
quantization_config=bnb_config,
device_map="auto",
)
# Prepare quantized model for training
model = prepare_model_for_kbit_training(model)
# Apply LoRA on top
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules="all-linear", # Apply to all linear layers
lora_dropout=0.05,
task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, lora_config)
AdaLoRA (Adaptive LoRA)
from peft import AdaLoraConfig, get_peft_model
# AdaLoRA dynamically adjusts rank per layer
config = AdaLoraConfig(
init_r=12, # Initial rank for all layers
target_r=4, # Target average rank after pruning
beta1=0.85,
beta2=0.85,
tinit=200, # Steps before pruning starts
tfinal=1000, # Steps when pruning ends
deltaT=10, # Pruning interval
lora_alpha=32,
lora_dropout=0.05,
target_modules=["q_proj", "v_proj"],
task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, config)
IA3 (Infused Adapter by Inhibiting and Amplifying Inner Activations)
from peft import IA3Config, get_peft_model
# IA3 learns rescaling vectors (even fewer params than LoRA)
config = IA3Config(
target_modules=["k_proj", "v_proj", "down_proj"],
feedforward_modules=["down_proj"],
task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, config)
model.print_trainable_parameters()
# Typically < 0.01% trainable parameters
Prefix Tuning
from peft import PrefixTuningConfig, get_peft_model
config = PrefixTuningConfig(
num_virtual_tokens=20,
task_type=TaskType.CAUSAL_LM,
prefix_projection=True, # Use MLP to project prefix
encoder_hidden_size=1024,
)
model = get_peft_model(model, config)
Prompt Tuning
from peft import PromptTuningConfig, PromptTuningInit, get_peft_model
config = PromptTuningConfig(
num_virtual_tokens=20,
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.TEXT,
prompt_tuning_init_text="Classify the following text:",
tokenizer_name_or_path="meta-llama/Llama-3.1-8B",
)
model = get_peft_model(model, config)
Saving and Loading Adapters
# Save only the adapter weights (small file)
model.save_pretrained("./my-lora-adapter")
# Load adapter onto base model
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
model = PeftModel.from_pretrained(base_model, "./my-lora-adapter")
# Push adapter to HuggingFace Hub
model.push_to_hub("my-org/llama3-lora-adapter")
# Load from Hub
model = PeftModel.from_pretrained(base_model, "my-org/llama3-lora-adapter")
Merging Adapters
# Merge LoRA weights into base model permanently
model = PeftModel.from_pretrained(base_model, "./my-lora-adapter")
merged_model = model.merge_and_unload()
# Save the merged model (full size, no adapter dependency)
merged_model.save_pretrained("./merged-model")
# Merge with different methods
model.merge_adapter() # Merge into base weights
model.unmerge_adapter() # Unmerge (revert to adapter mode)
Multi-Adapter Inference
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
# Load first adapter
model = PeftModel.from_pretrained(base_model, "./adapter-coding", adapter_name="coding")
# Load additional adapters
model.load_adapter("./adapter-writing", adapter_name="writing")
model.load_adapter("./adapter-math", adapter_name="math")
# Switch between adapters at inference time
model.set_adapter("coding")
output_code = model.generate(**inputs)
model.set_adapter("writing")
output_text = model.generate(**inputs)
# Combine adapters with weighted merge
model.add_weighted_adapter(
adapters=["coding", "writing"],
weights=[0.7, 0.3],
adapter_name="combined",
)
model.set_adapter("combined")
Training with HuggingFace Trainer
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir="./output",
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
num_train_epochs=3,
learning_rate=2e-4,
bf16=True,
logging_steps=10,
save_steps=200,
remove_unused_columns=False,
)
trainer = Trainer(
model=model, # PEFT-wrapped model
args=training_args,
train_dataset=dataset,
data_collator=data_collator,
)
trainer.train()
PEFT Methods Comparison
| Method | Trainable Params | Memory | Speed | Quality |
|---|
| LoRA | ~0.1-1% | Low | Fast | High |
| QLoRA | ~0.1-1% | Very Low | Medium | High |
| AdaLoRA | ~0.1-1% | Low | Medium | High |
| IA3 | ~0.01% | Very Low | Fast | Good |
| Prefix Tuning | ~0.1% | Low | Fast | Good |
| Prompt Tuning | ~0.01% | Very Low | Fast | Moderate |
| Full Fine-Tune | 100% | Very High | Slow | Highest |
Common Config الخيارات
| Parameter | الوصف | Typical Value |
|---|
r | LoRA rank | 8, 16, 32, 64 |
lora_alpha | Scaling factor (usually 2x rank) | 16, 32, 64 |
lora_dropout | Dropout probability | 0.0 - 0.1 |
target_modules | Which layers to adapt | "all-linear" or list |
bias | Train bias params | "none", "all", "lora_only" |
task_type | Task type enum | CAUSAL_LM, SEQ_CLS, SEQ_2_SEQ_LM |