Commandes Transformers
HuggingFace Transformers fournit des milliers de modèles pré-entraînés pour le NLP, la vision par ordinateur, l’audio et les tâches multimodales. La bibliothèque offre des pipelines simples pour l’inférence, des outils d’entraînement et une intégration profonde avec l’écosystème HuggingFace.
Installation
# Install with PyTorch backend
pip install transformers[torch]
# Install with all optional dependencies
pip install transformers[torch,sentencepiece,tokenizers,vision,audio]
# Install from source
pip install git+https://github.com/huggingface/transformers.git
# Install specific version
pip install transformers==4.46.0
# Verify
python -c "import transformers; print(transformers.__version__)"
API Pipeline (inférence rapide)
from transformers import pipeline
# Text generation
generator = pipeline("text-generation", model="meta-llama/Llama-3.1-8B-Instruct")
result = generator("Explain quantum computing:", max_new_tokens=200)
# Sentiment analysis
classifier = pipeline("sentiment-analysis")
result = classifier("I love this product!")
# [{'label': 'POSITIVE', 'score': 0.9998}]
# Summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(long_text, max_length=130, min_length=30)
# Question answering
qa = pipeline("question-answering")
result = qa(question="What is BERT?", context="BERT is a transformer model...")
# Zero-shot classification
classifier = pipeline("zero-shot-classification")
result = classifier("I need to pay my bill", candidate_labels=["billing", "support", "sales"])
# Image classification
classifier = pipeline("image-classification", model="google/vit-base-patch16-224")
result = classifier("photo.jpg")
# Automatic speech recognition
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
result = asr("audio.mp3")
AutoModel et AutoTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load model and tokenizer
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
)
# Tokenize input
inputs = tokenizer("Hello, how are you?", return_tensors="pt").to(model.device)
# Generate
outputs = model.generate(
**inputs,
max_new_tokens=100,
temperature=0.7,
top_p=0.9,
do_sample=True,
)
# Decode
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
Modèle de chat
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-8B-Instruct",
torch_dtype=torch.bfloat16,
device_map="auto",
)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain transformers in ML."},
]
# Apply chat template
input_ids = tokenizer.apply_chat_template(
messages, return_tensors="pt", add_generation_prompt=True
).to(model.device)
outputs = model.generate(input_ids, max_new_tokens=256)
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
Quantification avec BitsAndBytes
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch
# 4-bit quantization (QLoRA-style)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-70B-Instruct",
quantization_config=bnb_config,
device_map="auto",
)
# 8-bit quantization
model_8bit = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-70B-Instruct",
load_in_8bit=True,
device_map="auto",
)
API Trainer
from transformers import (
AutoModelForCausalLM, AutoTokenizer,
Trainer, TrainingArguments, DataCollatorForLanguageModeling,
)
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
tokenizer.pad_token = tokenizer.eos_token
dataset = load_dataset("tatsu-lab/alpaca", split="train")
def tokenize(example):
return tokenizer(example["text"], truncation=True, max_length=2048)
tokenized = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)
training_args = TrainingArguments(
output_dir="./output",
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
num_train_epochs=3,
learning_rate=2e-5,
bf16=True,
logging_steps=10,
save_steps=500,
save_total_limit=3,
warmup_steps=100,
weight_decay=0.01,
lr_scheduler_type="cosine",
report_to="wandb",
gradient_checkpointing=True,
push_to_hub=False,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized,
data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
trainer.train()
trainer.save_model("./final-model")
Publier sur le Hub
from huggingface_hub import login
# Authenticate
login(token="hf_YOUR_TOKEN")
# Push model and tokenizer
model.push_to_hub("my-org/my-model")
tokenizer.push_to_hub("my-org/my-model")
# Push with Trainer
training_args = TrainingArguments(
output_dir="./output",
push_to_hub=True,
hub_model_id="my-org/my-model",
)
Parallélisme de modèles
# Automatic device placement across GPUs
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-70B-Instruct",
device_map="auto", # Automatic placement
torch_dtype=torch.float16,
)
# Custom device map
device_map = {
"model.embed_tokens": 0,
"model.layers.0": 0,
"model.layers.1": 0,
"model.layers.2": 1,
"model.layers.3": 1,
"model.norm": 1,
"lm_head": 1,
}
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-8B",
device_map=device_map,
)
Configuration de génération
from transformers import GenerationConfig
gen_config = GenerationConfig(
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
top_k=50,
repetition_penalty=1.1,
do_sample=True,
num_beams=1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
outputs = model.generate(**inputs, generation_config=gen_config)
Classes Auto courantes
| Classe | Cas d’utilisation |
|---|---|
AutoModel | Base model (no head) |
AutoModelForCausalLM | Text generation (GPT, Llama) |
AutoModelForSeq2SeqLM | Seq2seq (T5, BART) |
AutoModelForSequenceClassification | Text classification |
AutoModelForTokenClassification | NER, POS tagging |
AutoModelForQuestionAnswering | Extractive QA |
AutoModelForImageClassification | Image classification |
AutoModelForSpeechSeq2Seq | Speech-to-text |
AutoTokenizer | Auto-detect tokenizer |
AutoProcessor | Multi-modal processing |
Commandes CLI
# Download model
huggingface-cli download meta-llama/Llama-3.1-8B-Instruct
# Upload model
huggingface-cli upload my-org/my-model ./model-dir
# Login
huggingface-cli login
# Check cache
huggingface-cli scan-cache
# Delete cached models
huggingface-cli delete-cache