Transformers-Befehle
HuggingFace Transformers bietet Tausende von vortrainierten Modellen für NLP, Computer Vision, Audio und multimodale Aufgaben. Es stellt eine einheitliche API für das Laden von Modellen, Tokenisierung, Training und Inferenz über PyTorch, TensorFlow und JAX bereit.
Installation
# Install with PyTorch backend
pip install transformers[torch]
# Install with all optional dependencies
pip install transformers[torch,sentencepiece,tokenizers,vision,audio]
# Install from source
pip install git+https://github.com/huggingface/transformers.git
# Install specific version
pip install transformers==4.46.0
# Verify
python -c "import transformers; print(transformers.__version__)"
Pipeline-API (Schnelle Inferenz)
from transformers import pipeline
# Text generation
generator = pipeline("text-generation", model="meta-llama/Llama-3.1-8B-Instruct")
result = generator("Explain quantum computing:", max_new_tokens=200)
# Sentiment analysis
classifier = pipeline("sentiment-analysis")
result = classifier("I love this product!")
# [{'label': 'POSITIVE', 'score': 0.9998}]
# Summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(long_text, max_length=130, min_length=30)
# Question answering
qa = pipeline("question-answering")
result = qa(question="What is BERT?", context="BERT is a transformer model...")
# Zero-shot classification
classifier = pipeline("zero-shot-classification")
result = classifier("I need to pay my bill", candidate_labels=["billing", "support", "sales"])
# Image classification
classifier = pipeline("image-classification", model="google/vit-base-patch16-224")
result = classifier("photo.jpg")
# Automatic speech recognition
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
result = asr("audio.mp3")
AutoModel and AutoTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load model and tokenizer
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
)
# Tokenize input
inputs = tokenizer("Hello, how are you?", return_tensors="pt").to(model.device)
# Generate
outputs = model.generate(
**inputs,
max_new_tokens=100,
temperature=0.7,
top_p=0.9,
do_sample=True,
)
# Decode
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
Chat-Vorlage
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-8B-Instruct",
torch_dtype=torch.bfloat16,
device_map="auto",
)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain transformers in ML."},
]
# Apply chat template
input_ids = tokenizer.apply_chat_template(
messages, return_tensors="pt", add_generation_prompt=True
).to(model.device)
outputs = model.generate(input_ids, max_new_tokens=256)
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
Quantisierung mit BitsAndBytes
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch
# 4-bit quantization (QLoRA-style)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-70B-Instruct",
quantization_config=bnb_config,
device_map="auto",
)
# 8-bit quantization
model_8bit = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-70B-Instruct",
load_in_8bit=True,
device_map="auto",
)
Trainer-API
from transformers import (
AutoModelForCausalLM, AutoTokenizer,
Trainer, TrainingArguments, DataCollatorForLanguageModeling,
)
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
tokenizer.pad_token = tokenizer.eos_token
dataset = load_dataset("tatsu-lab/alpaca", split="train")
def tokenize(example):
return tokenizer(example["text"], truncation=True, max_length=2048)
tokenized = dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)
training_args = TrainingArguments(
output_dir="./output",
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
num_train_epochs=3,
learning_rate=2e-5,
bf16=True,
logging_steps=10,
save_steps=500,
save_total_limit=3,
warmup_steps=100,
weight_decay=0.01,
lr_scheduler_type="cosine",
report_to="wandb",
gradient_checkpointing=True,
push_to_hub=False,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized,
data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
trainer.train()
trainer.save_model("./final-model")
Zum Hub hochladen
from huggingface_hub import login
# Authenticate
login(token="hf_YOUR_TOKEN")
# Push model and tokenizer
model.push_to_hub("my-org/my-model")
tokenizer.push_to_hub("my-org/my-model")
# Push with Trainer
training_args = TrainingArguments(
output_dir="./output",
push_to_hub=True,
hub_model_id="my-org/my-model",
)
Modell-Parallelismus
# Automatic device placement across GPUs
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-70B-Instruct",
device_map="auto", # Automatic placement
torch_dtype=torch.float16,
)
# Custom device map
device_map = {
"model.embed_tokens": 0,
"model.layers.0": 0,
"model.layers.1": 0,
"model.layers.2": 1,
"model.layers.3": 1,
"model.norm": 1,
"lm_head": 1,
}
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.1-8B",
device_map=device_map,
)
Generierungskonfiguration
from transformers import GenerationConfig
gen_config = GenerationConfig(
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
top_k=50,
repetition_penalty=1.1,
do_sample=True,
num_beams=1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
outputs = model.generate(**inputs, generation_config=gen_config)
Häufige Auto-Klassen
| Class | Use Case |
|---|---|
AutoModel | Base model (no head) |
AutoModelForCausalLM | Text generation (GPT, Llama) |
AutoModelForSeq2SeqLM | Seq2seq (T5, BART) |
AutoModelForSequenceClassification | Text classification |
AutoModelForTokenClassification | NER, POS tagging |
AutoModelForQuestionAnswering | Extractive QA |
AutoModelForImageClassification | Image classification |
AutoModelForSpeechSeq2Seq | Speech-to-text |
AutoTokenizer | Auto-detect tokenizer |
AutoProcessor | Multi-modal processing |
CLI-Befehle
# Download model
huggingface-cli download meta-llama/Llama-3.1-8B-Instruct
# Upload model
huggingface-cli upload my-org/my-model ./model-dir
# Login
huggingface-cli login
# Check cache
huggingface-cli scan-cache
# Delete cached models
huggingface-cli delete-cache