تخطَّ إلى المحتوى

PEFT أوامر

PEFT (الضبط الدقيق الفعال في المعاملات) هي مكتبة HuggingFace تتيح تكييف النماذج الكبيرة المُدربة مسبقاً عن طريق تدريب عدد صغير فقط من المعاملات الإضافية، مما يقلل بشكل كبير من استخدام الذاكرة ووقت التدريب مع الحفاظ على الأداء.

التثبيت

# Install from PyPI
pip install peft

# Install with quantization support
pip install peft bitsandbytes

# Install from source
pip install git+https://github.com/huggingface/peft.git

# Verify
python -c "import peft; print(peft.__version__)"

LoRA (Low-Rank Adaptation)

from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")

# Configure LoRA
lora_config = LoraConfig(
    r=16,                          # Rank
    lora_alpha=32,                 # Scaling factor
    lora_dropout=0.05,             # Dropout for LoRA layers
    target_modules=[               # Modules to apply LoRA
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    task_type=TaskType.CAUSAL_LM,
    bias="none",                   # none, all, or lora_only
)

# Wrap model with LoRA adapters
model = get_peft_model(model, lora_config)

# Print trainable parameter count
model.print_trainable_parameters()
# Output: trainable params: 13M || all params: 8B || trainable%: 0.16%

QLoRA (Quantized LoRA)

from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch

# Load model in 4-bit
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8B",
    quantization_config=bnb_config,
    device_map="auto",
)

# Prepare quantized model for training
model = prepare_model_for_kbit_training(model)

# Apply LoRA on top
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules="all-linear",  # Apply to all linear layers
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)

AdaLoRA (Adaptive LoRA)

from peft import AdaLoraConfig, get_peft_model

# AdaLoRA dynamically adjusts rank per layer
config = AdaLoraConfig(
    init_r=12,              # Initial rank for all layers
    target_r=4,             # Target average rank after pruning
    beta1=0.85,
    beta2=0.85,
    tinit=200,              # Steps before pruning starts
    tfinal=1000,            # Steps when pruning ends
    deltaT=10,              # Pruning interval
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, config)

IA3 (Infused Adapter by Inhibiting and Amplifying Inner Activations)

from peft import IA3Config, get_peft_model

# IA3 learns rescaling vectors (even fewer params than LoRA)
config = IA3Config(
    target_modules=["k_proj", "v_proj", "down_proj"],
    feedforward_modules=["down_proj"],
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, config)
model.print_trainable_parameters()
# Typically < 0.01% trainable parameters

Prefix Tuning

from peft import PrefixTuningConfig, get_peft_model

config = PrefixTuningConfig(
    num_virtual_tokens=20,
    task_type=TaskType.CAUSAL_LM,
    prefix_projection=True,   # Use MLP to project prefix
    encoder_hidden_size=1024,
)

model = get_peft_model(model, config)

Prompt Tuning

from peft import PromptTuningConfig, PromptTuningInit, get_peft_model

config = PromptTuningConfig(
    num_virtual_tokens=20,
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    prompt_tuning_init_text="Classify the following text:",
    tokenizer_name_or_path="meta-llama/Llama-3.1-8B",
)

model = get_peft_model(model, config)

Saving and Loading Adapters

# Save only the adapter weights (small file)
model.save_pretrained("./my-lora-adapter")

# Load adapter onto base model
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
model = PeftModel.from_pretrained(base_model, "./my-lora-adapter")

# Push adapter to HuggingFace Hub
model.push_to_hub("my-org/llama3-lora-adapter")

# Load from Hub
model = PeftModel.from_pretrained(base_model, "my-org/llama3-lora-adapter")

Merging Adapters

# Merge LoRA weights into base model permanently
model = PeftModel.from_pretrained(base_model, "./my-lora-adapter")
merged_model = model.merge_and_unload()

# Save the merged model (full size, no adapter dependency)
merged_model.save_pretrained("./merged-model")

# Merge with different methods
model.merge_adapter()            # Merge into base weights
model.unmerge_adapter()          # Unmerge (revert to adapter mode)

Multi-Adapter Inference

from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")

# Load first adapter
model = PeftModel.from_pretrained(base_model, "./adapter-coding", adapter_name="coding")

# Load additional adapters
model.load_adapter("./adapter-writing", adapter_name="writing")
model.load_adapter("./adapter-math", adapter_name="math")

# Switch between adapters at inference time
model.set_adapter("coding")
output_code = model.generate(**inputs)

model.set_adapter("writing")
output_text = model.generate(**inputs)

# Combine adapters with weighted merge
model.add_weighted_adapter(
    adapters=["coding", "writing"],
    weights=[0.7, 0.3],
    adapter_name="combined",
)
model.set_adapter("combined")

Training with HuggingFace Trainer

from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./output",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    bf16=True,
    logging_steps=10,
    save_steps=200,
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,        # PEFT-wrapped model
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator,
)

trainer.train()

PEFT Methods Comparison

MethodTrainable ParamsMemorySpeedQuality
LoRA~0.1-1%LowFastHigh
QLoRA~0.1-1%Very LowMediumHigh
AdaLoRA~0.1-1%LowMediumHigh
IA3~0.01%Very LowFastGood
Prefix Tuning~0.1%LowFastGood
Prompt Tuning~0.01%Very LowFastModerate
Full Fine-Tune100%Very HighSlowHighest

Common Config الخيارات

ParameterالوصفTypical Value
rLoRA rank8, 16, 32, 64
lora_alphaScaling factor (usually 2x rank)16, 32, 64
lora_dropoutDropout probability0.0 - 0.1
target_modulesWhich layers to adapt"all-linear" or list
biasTrain bias params"none", "all", "lora_only"
task_typeTask type enumCAUSAL_LM, SEQ_CLS, SEQ_2_SEQ_LM