تخطَّ إلى المحتوى

Transformers أوامر

HuggingFace Transformers توفر آلاف النماذج المُدربة مسبقاً لمعالجة اللغة الطبيعية والرؤية الحاسوبية والصوت والمهام متعددة الوسائط. تقدم واجهة برمجة تطبيقات موحدة لتحميل النماذج والترميز والتدريب والاستدلال عبر PyTorch و TensorFlow و JAX.

التثبيت

# Install with PyTorch backend
pip install transformers[torch]

# Install with all optional dependencies
pip install transformers[torch,sentencepiece,tokenizers,vision,audio]

# Install from source
pip install git+https://github.com/huggingface/transformers.git

# Install specific version
pip install transformers==4.46.0

# Verify
python -c "import transformers; print(transformers.__version__)"

Pipeline API (Quick Inference)

from transformers import pipeline

# Text generation
generator = pipeline("text-generation", model="meta-llama/Llama-3.1-8B-Instruct")
result = generator("Explain quantum computing:", max_new_tokens=200)

# Sentiment analysis
classifier = pipeline("sentiment-analysis")
result = classifier("I love this product!")
# [{'label': 'POSITIVE', 'score': 0.9998}]

# Summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(long_text, max_length=130, min_length=30)

# Question answering
qa = pipeline("question-answering")
result = qa(question="What is BERT?", context="BERT is a transformer model...")

# Zero-shot classification
classifier = pipeline("zero-shot-classification")
result = classifier("I need to pay my bill", candidate_labels=["billing", "support", "sales"])

# Image classification
classifier = pipeline("image-classification", model="google/vit-base-patch16-224")
result = classifier("photo.jpg")

# Automatic speech recognition
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
result = asr("audio.mp3")

AutoModel and AutoTokenizer

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load model and tokenizer
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Tokenize input
inputs = tokenizer("Hello, how are you?", return_tensors="pt").to(model.device)

# Generate
outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.7,
    top_p=0.9,
    do_sample=True,
)

# Decode
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

Chat Template

from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8B-Instruct",
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Explain transformers in ML."},
]

# Apply chat template
input_ids = tokenizer.apply_chat_template(
    messages, return_tensors="pt", add_generation_prompt=True
).to(model.device)

outputs = model.generate(input_ids, max_new_tokens=256)
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)

Quantization with BitsAndBytes

from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch

# 4-bit quantization (QLoRA-style)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-70B-Instruct",
    quantization_config=bnb_config,
    device_map="auto",
)

# 8-bit quantization
model_8bit = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-70B-Instruct",
    load_in_8bit=True,
    device_map="auto",
)

Trainer API

from transformers import (
    AutoModelForCausalLM, AutoTokenizer,
    Trainer, TrainingArguments, DataCollatorForLanguageModeling,
)
from datasets import load_dataset

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
tokenizer.pad_token = tokenizer.eos_token

dataset = load_dataset("tatsu-lab/alpaca", split="train")

def tokenize(example):
    return tokenizer(example["text"], truncation=True, max_length=2048)

tokenized = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)

training_args = TrainingArguments(
    output_dir="./output",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-5,
    bf16=True,
    logging_steps=10,
    save_steps=500,
    save_total_limit=3,
    warmup_steps=100,
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    report_to="wandb",
    gradient_checkpointing=True,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

trainer.train()
trainer.save_model("./final-model")

Push to Hub

from huggingface_hub import login

# Authenticate
login(token="hf_YOUR_TOKEN")

# Push model and tokenizer
model.push_to_hub("my-org/my-model")
tokenizer.push_to_hub("my-org/my-model")

# Push with Trainer
training_args = TrainingArguments(
    output_dir="./output",
    push_to_hub=True,
    hub_model_id="my-org/my-model",
)

Model Parallelism

# Automatic device placement across GPUs
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-70B-Instruct",
    device_map="auto",        # Automatic placement
    torch_dtype=torch.float16,
)

# Custom device map
device_map = {
    "model.embed_tokens": 0,
    "model.layers.0": 0,
    "model.layers.1": 0,
    "model.layers.2": 1,
    "model.layers.3": 1,
    "model.norm": 1,
    "lm_head": 1,
}

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8B",
    device_map=device_map,
)

Generation Config

from transformers import GenerationConfig

gen_config = GenerationConfig(
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1,
    do_sample=True,
    num_beams=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
)

outputs = model.generate(**inputs, generation_config=gen_config)

Common Auto Classes

ClassUse Case
AutoModelBase model (no head)
AutoModelForCausalLMText generation (GPT, Llama)
AutoModelForSeq2SeqLMSeq2seq (T5, BART)
AutoModelForSequenceClassificationText classification
AutoModelForTokenClassificationNER, POS tagging
AutoModelForQuestionAnsweringExtractive QA
AutoModelForImageClassificationImage classification
AutoModelForSpeechSeq2SeqSpeech-to-text
AutoTokenizerAuto-detect tokenizer
AutoProcessorMulti-modal processing

CLI الأوامر

# Download model
huggingface-cli download meta-llama/Llama-3.1-8B-Instruct

# Upload model
huggingface-cli upload my-org/my-model ./model-dir

# Login
huggingface-cli login

# Check cache
huggingface-cli scan-cache

# Delete cached models
huggingface-cli delete-cache