Introduction: Fine-tuning transforms general-purpose language models into specialized tools for your domain. While prompting works for many tasks, fine-tuning delivers consistent behavior, lower latency, and reduced token costs when you need the model to reliably follow specific formats, use domain terminology, or exhibit particular reasoning patterns. This guide covers practical fine-tuning strategies: preparing high-quality training data, choosing between full fine-tuning and parameter-efficient methods like LoRA, implementing training loops with proper evaluation, and deploying fine-tuned models in production. Whether you’re building a customer service bot that speaks your brand voice or a code assistant trained on your codebase, fine-tuning lets you embed domain knowledge directly into model weights rather than cramming it into prompts.

Data Preparation
from dataclasses import dataclass, field
from typing import Any, Optional
from enum import Enum
import json
class DataFormat(Enum):
"""Training data formats."""
INSTRUCTION = "instruction" # instruction/input/output
CHAT = "chat" # messages format
COMPLETION = "completion" # prompt/completion
PREFERENCE = "preference" # chosen/rejected pairs
@dataclass
class TrainingExample:
"""A single training example."""
id: str
format: DataFormat
content: dict
metadata: dict = field(default_factory=dict)
@dataclass
class InstructionExample:
"""Instruction-following format."""
instruction: str
input: str = ""
output: str = ""
def to_prompt(self, template: str = "alpaca") -> str:
"""Convert to prompt format."""
if template == "alpaca":
if self.input:
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{self.instruction}
### Input:
{self.input}
### Response:
{self.output}"""
else:
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{self.instruction}
### Response:
{self.output}"""
elif template == "chatml":
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"{self.instruction}\n{self.input}" if self.input else self.instruction},
{"role": "assistant", "content": self.output}
]
return self._format_chatml(messages)
return f"{self.instruction}\n{self.input}\n{self.output}"
def _format_chatml(self, messages: list[dict]) -> str:
"""Format as ChatML."""
formatted = ""
for msg in messages:
formatted += f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n"
return formatted
class DatasetPreparer:
"""Prepare datasets for fine-tuning."""
def __init__(self, tokenizer: Any):
self.tokenizer = tokenizer
def prepare_instruction_dataset(
self,
examples: list[InstructionExample],
max_length: int = 2048,
template: str = "alpaca"
) -> list[dict]:
"""Prepare instruction dataset."""
prepared = []
for example in examples:
prompt = example.to_prompt(template)
# Tokenize
tokens = self.tokenizer.encode(prompt)
if len(tokens) > max_length:
# Truncate or skip
continue
prepared.append({
"input_ids": tokens,
"attention_mask": [1] * len(tokens),
"labels": tokens # For causal LM
})
return prepared
def prepare_chat_dataset(
self,
conversations: list[list[dict]],
max_length: int = 2048
) -> list[dict]:
"""Prepare chat dataset."""
prepared = []
for messages in conversations:
# Format conversation
formatted = self._format_conversation(messages)
# Tokenize
tokens = self.tokenizer.encode(formatted)
if len(tokens) > max_length:
continue
# Create labels (mask user turns)
labels = self._create_chat_labels(messages, tokens)
prepared.append({
"input_ids": tokens,
"attention_mask": [1] * len(tokens),
"labels": labels
})
return prepared
def _format_conversation(self, messages: list[dict]) -> str:
"""Format conversation."""
formatted = ""
for msg in messages:
role = msg["role"]
content = msg["content"]
formatted += f"<|im_start|>{role}\n{content}<|im_end|>\n"
return formatted
def _create_chat_labels(
self,
messages: list[dict],
tokens: list[int]
) -> list[int]:
"""Create labels masking user turns."""
# Simplified - in practice, need to track token positions
# and mask non-assistant tokens with -100
return tokens
class DataQualityChecker:
"""Check training data quality."""
def __init__(self, tokenizer: Any):
self.tokenizer = tokenizer
def check_example(self, example: InstructionExample) -> dict:
"""Check single example quality."""
issues = []
# Check instruction
if len(example.instruction) < 10:
issues.append("instruction_too_short")
if len(example.instruction) > 1000:
issues.append("instruction_too_long")
# Check output
if len(example.output) < 5:
issues.append("output_too_short")
# Check for repetition
if self._has_repetition(example.output):
issues.append("output_has_repetition")
# Check token count
prompt = example.to_prompt()
tokens = self.tokenizer.encode(prompt)
return {
"valid": len(issues) == 0,
"issues": issues,
"token_count": len(tokens)
}
def _has_repetition(self, text: str, threshold: float = 0.3) -> bool:
"""Check for excessive repetition."""
words = text.lower().split()
if len(words) < 10:
return False
unique_ratio = len(set(words)) / len(words)
return unique_ratio < threshold
def analyze_dataset(
self,
examples: list[InstructionExample]
) -> dict:
"""Analyze entire dataset."""
results = [self.check_example(e) for e in examples]
valid_count = sum(1 for r in results if r["valid"])
token_counts = [r["token_count"] for r in results]
issue_counts = {}
for r in results:
for issue in r["issues"]:
issue_counts[issue] = issue_counts.get(issue, 0) + 1
return {
"total_examples": len(examples),
"valid_examples": valid_count,
"invalid_examples": len(examples) - valid_count,
"avg_tokens": sum(token_counts) / len(token_counts) if token_counts else 0,
"max_tokens": max(token_counts) if token_counts else 0,
"min_tokens": min(token_counts) if token_counts else 0,
"issue_counts": issue_counts
}
Parameter-Efficient Fine-Tuning
from dataclasses import dataclass
from typing import Any, Optional
import torch
import torch.nn as nn
@dataclass
class LoRAConfig:
"""LoRA configuration."""
r: int = 8 # Rank
alpha: int = 16 # Scaling factor
dropout: float = 0.1
target_modules: list[str] = None
def __post_init__(self):
if self.target_modules is None:
self.target_modules = ["q_proj", "v_proj"]
class LoRALayer(nn.Module):
"""LoRA adapter layer."""
def __init__(
self,
in_features: int,
out_features: int,
config: LoRAConfig
):
super().__init__()
self.r = config.r
self.alpha = config.alpha
self.scaling = self.alpha / self.r
# Low-rank matrices
self.lora_A = nn.Parameter(
torch.zeros(config.r, in_features)
)
self.lora_B = nn.Parameter(
torch.zeros(out_features, config.r)
)
self.dropout = nn.Dropout(config.dropout)
# Initialize
nn.init.kaiming_uniform_(self.lora_A, a=5**0.5)
nn.init.zeros_(self.lora_B)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Forward pass."""
# x: (batch, seq, in_features)
# lora_A: (r, in_features)
# lora_B: (out_features, r)
dropped = self.dropout(x)
lora_out = dropped @ self.lora_A.T @ self.lora_B.T
return lora_out * self.scaling
class LoRAModel:
"""Apply LoRA to a model."""
def __init__(self, model: nn.Module, config: LoRAConfig):
self.model = model
self.config = config
self.lora_layers = {}
self._apply_lora()
def _apply_lora(self):
"""Apply LoRA to target modules."""
for name, module in self.model.named_modules():
if any(target in name for target in self.config.target_modules):
if isinstance(module, nn.Linear):
lora = LoRALayer(
module.in_features,
module.out_features,
self.config
)
self.lora_layers[name] = lora
# Wrap forward
original_forward = module.forward
def make_forward(orig, lora_layer):
def forward(x):
return orig(x) + lora_layer(x)
return forward
module.forward = make_forward(original_forward, lora)
def get_trainable_params(self) -> list[nn.Parameter]:
"""Get only LoRA parameters."""
params = []
for lora in self.lora_layers.values():
params.extend([lora.lora_A, lora.lora_B])
return params
def save_lora(self, path: str):
"""Save LoRA weights."""
state = {}
for name, lora in self.lora_layers.items():
state[f"{name}.lora_A"] = lora.lora_A.data
state[f"{name}.lora_B"] = lora.lora_B.data
torch.save(state, path)
def load_lora(self, path: str):
"""Load LoRA weights."""
state = torch.load(path)
for name, lora in self.lora_layers.items():
lora.lora_A.data = state[f"{name}.lora_A"]
lora.lora_B.data = state[f"{name}.lora_B"]
@dataclass
class QLoRAConfig(LoRAConfig):
"""QLoRA configuration with quantization."""
bits: int = 4
double_quant: bool = True
quant_type: str = "nf4"
class PEFTTrainer:
"""Parameter-efficient fine-tuning trainer."""
def __init__(
self,
model: nn.Module,
config: LoRAConfig,
learning_rate: float = 1e-4
):
self.lora_model = LoRAModel(model, config)
self.learning_rate = learning_rate
# Only train LoRA params
self.optimizer = torch.optim.AdamW(
self.lora_model.get_trainable_params(),
lr=learning_rate
)
def train_step(
self,
input_ids: torch.Tensor,
labels: torch.Tensor
) -> float:
"""Single training step."""
self.optimizer.zero_grad()
# Forward
outputs = self.lora_model.model(
input_ids=input_ids,
labels=labels
)
loss = outputs.loss
# Backward
loss.backward()
self.optimizer.step()
return loss.item()
def save(self, path: str):
"""Save LoRA weights."""
self.lora_model.save_lora(path)
Training Loop Implementation
from dataclasses import dataclass
from typing import Any, Optional, Callable
from datetime import datetime
import torch
from torch.utils.data import DataLoader
@dataclass
class TrainingConfig:
"""Training configuration."""
epochs: int = 3
batch_size: int = 4
gradient_accumulation_steps: int = 4
learning_rate: float = 2e-5
warmup_steps: int = 100
max_grad_norm: float = 1.0
eval_steps: int = 500
save_steps: int = 1000
logging_steps: int = 10
output_dir: str = "./output"
@dataclass
class TrainingState:
"""Training state."""
global_step: int = 0
epoch: int = 0
best_loss: float = float("inf")
train_losses: list[float] = None
eval_losses: list[float] = None
def __post_init__(self):
self.train_losses = self.train_losses or []
self.eval_losses = self.eval_losses or []
class Trainer:
"""Full fine-tuning trainer."""
def __init__(
self,
model: Any,
tokenizer: Any,
config: TrainingConfig,
train_dataset: Any,
eval_dataset: Any = None
):
self.model = model
self.tokenizer = tokenizer
self.config = config
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
self.state = TrainingState()
# Setup optimizer
self.optimizer = torch.optim.AdamW(
model.parameters(),
lr=config.learning_rate
)
# Setup scheduler
total_steps = (
len(train_dataset) // config.batch_size
// config.gradient_accumulation_steps
* config.epochs
)
self.scheduler = self._get_scheduler(total_steps)
def _get_scheduler(self, total_steps: int):
"""Get learning rate scheduler."""
from torch.optim.lr_scheduler import LambdaLR
warmup_steps = self.config.warmup_steps
def lr_lambda(step):
if step < warmup_steps:
return step / warmup_steps
return max(0.0, (total_steps - step) / (total_steps - warmup_steps))
return LambdaLR(self.optimizer, lr_lambda)
def train(self) -> TrainingState:
"""Run training."""
train_loader = DataLoader(
self.train_dataset,
batch_size=self.config.batch_size,
shuffle=True
)
self.model.train()
for epoch in range(self.config.epochs):
self.state.epoch = epoch
epoch_loss = 0.0
for step, batch in enumerate(train_loader):
loss = self._training_step(batch)
epoch_loss += loss
# Gradient accumulation
if (step + 1) % self.config.gradient_accumulation_steps == 0:
# Clip gradients
torch.nn.utils.clip_grad_norm_(
self.model.parameters(),
self.config.max_grad_norm
)
self.optimizer.step()
self.scheduler.step()
self.optimizer.zero_grad()
self.state.global_step += 1
# Logging
if self.state.global_step % self.config.logging_steps == 0:
avg_loss = epoch_loss / (step + 1)
self.state.train_losses.append(avg_loss)
print(f"Step {self.state.global_step}: loss={avg_loss:.4f}")
# Evaluation
if (self.eval_dataset and
self.state.global_step % self.config.eval_steps == 0):
eval_loss = self.evaluate()
self.state.eval_losses.append(eval_loss)
if eval_loss < self.state.best_loss:
self.state.best_loss = eval_loss
self.save_checkpoint("best")
# Save checkpoint
if self.state.global_step % self.config.save_steps == 0:
self.save_checkpoint(f"step_{self.state.global_step}")
return self.state
def _training_step(self, batch: dict) -> float:
"""Single training step."""
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels
)
loss = outputs.loss / self.config.gradient_accumulation_steps
loss.backward()
return loss.item() * self.config.gradient_accumulation_steps
def evaluate(self) -> float:
"""Run evaluation."""
self.model.eval()
eval_loader = DataLoader(
self.eval_dataset,
batch_size=self.config.batch_size
)
total_loss = 0.0
total_steps = 0
with torch.no_grad():
for batch in eval_loader:
outputs = self.model(
input_ids=batch["input_ids"],
attention_mask=batch["attention_mask"],
labels=batch["labels"]
)
total_loss += outputs.loss.item()
total_steps += 1
self.model.train()
return total_loss / total_steps if total_steps > 0 else 0.0
def save_checkpoint(self, name: str):
"""Save checkpoint."""
import os
path = os.path.join(self.config.output_dir, name)
os.makedirs(path, exist_ok=True)
# Save model
self.model.save_pretrained(path)
self.tokenizer.save_pretrained(path)
# Save state
torch.save({
"global_step": self.state.global_step,
"epoch": self.state.epoch,
"best_loss": self.state.best_loss,
"optimizer": self.optimizer.state_dict(),
"scheduler": self.scheduler.state_dict()
}, os.path.join(path, "training_state.pt"))
Evaluation and Metrics
from dataclasses import dataclass
from typing import Any, Optional, Callable
from enum import Enum
class MetricType(Enum):
"""Types of evaluation metrics."""
PERPLEXITY = "perplexity"
ACCURACY = "accuracy"
BLEU = "bleu"
ROUGE = "rouge"
EXACT_MATCH = "exact_match"
F1 = "f1"
@dataclass
class EvalResult:
"""Evaluation result."""
metric: MetricType
value: float
details: dict = None
class Evaluator:
"""Model evaluator."""
def __init__(self, model: Any, tokenizer: Any):
self.model = model
self.tokenizer = tokenizer
def compute_perplexity(
self,
texts: list[str],
batch_size: int = 8
) -> EvalResult:
"""Compute perplexity."""
import torch
import math
self.model.eval()
total_loss = 0.0
total_tokens = 0
with torch.no_grad():
for i in range(0, len(texts), batch_size):
batch_texts = texts[i:i + batch_size]
encodings = self.tokenizer(
batch_texts,
return_tensors="pt",
padding=True,
truncation=True
)
outputs = self.model(
input_ids=encodings["input_ids"],
attention_mask=encodings["attention_mask"],
labels=encodings["input_ids"]
)
total_loss += outputs.loss.item() * encodings["input_ids"].numel()
total_tokens += encodings["input_ids"].numel()
avg_loss = total_loss / total_tokens
perplexity = math.exp(avg_loss)
return EvalResult(
metric=MetricType.PERPLEXITY,
value=perplexity,
details={"avg_loss": avg_loss, "total_tokens": total_tokens}
)
def compute_accuracy(
self,
examples: list[tuple[str, str]], # (input, expected_output)
generation_config: dict = None
) -> EvalResult:
"""Compute exact match accuracy."""
correct = 0
total = len(examples)
gen_config = generation_config or {
"max_new_tokens": 100,
"temperature": 0.0
}
for input_text, expected in examples:
generated = self.generate(input_text, **gen_config)
if generated.strip() == expected.strip():
correct += 1
accuracy = correct / total if total > 0 else 0.0
return EvalResult(
metric=MetricType.ACCURACY,
value=accuracy,
details={"correct": correct, "total": total}
)
def compute_rouge(
self,
predictions: list[str],
references: list[str]
) -> EvalResult:
"""Compute ROUGE scores."""
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(
["rouge1", "rouge2", "rougeL"],
use_stemmer=True
)
scores = {"rouge1": [], "rouge2": [], "rougeL": []}
for pred, ref in zip(predictions, references):
result = scorer.score(ref, pred)
for key in scores:
scores[key].append(result[key].fmeasure)
avg_scores = {k: sum(v) / len(v) for k, v in scores.items()}
return EvalResult(
metric=MetricType.ROUGE,
value=avg_scores["rougeL"],
details=avg_scores
)
def generate(self, prompt: str, **kwargs) -> str:
"""Generate text."""
inputs = self.tokenizer(prompt, return_tensors="pt")
outputs = self.model.generate(
inputs["input_ids"],
**kwargs
)
generated = self.tokenizer.decode(
outputs[0][inputs["input_ids"].shape[1]:],
skip_special_tokens=True
)
return generated
class BenchmarkRunner:
"""Run standard benchmarks."""
def __init__(self, evaluator: Evaluator):
self.evaluator = evaluator
def run_mmlu(self, subjects: list[str] = None) -> dict:
"""Run MMLU benchmark."""
# Load MMLU dataset
# This is simplified - would load actual MMLU data
results = {}
subjects = subjects or ["all"]
for subject in subjects:
# Load subject questions
questions = self._load_mmlu_subject(subject)
correct = 0
for q in questions:
answer = self._answer_mcq(q)
if answer == q["correct"]:
correct += 1
results[subject] = correct / len(questions) if questions else 0.0
return results
def _load_mmlu_subject(self, subject: str) -> list[dict]:
"""Load MMLU subject questions."""
# Would load from actual dataset
return []
def _answer_mcq(self, question: dict) -> str:
"""Answer multiple choice question."""
prompt = f"""Question: {question['question']}
A) {question['choices'][0]}
B) {question['choices'][1]}
C) {question['choices'][2]}
D) {question['choices'][3]}
Answer with just the letter (A, B, C, or D):"""
response = self.evaluator.generate(
prompt,
max_new_tokens=5,
temperature=0.0
)
# Extract letter
for letter in ["A", "B", "C", "D"]:
if letter in response.upper():
return letter
return "A" # Default
Production Fine-Tuning Service
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
from typing import Optional
from enum import Enum
import uuid
app = FastAPI()
# Storage for jobs and models
fine_tuning_jobs: dict = {}
trained_models: dict = {}
class JobStatus(str, Enum):
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
class CreateJobRequest(BaseModel):
base_model: str
training_file: str
validation_file: Optional[str] = None
hyperparameters: Optional[dict] = None
class JobResponse(BaseModel):
job_id: str
status: JobStatus
base_model: str
created_at: str
finished_at: Optional[str] = None
trained_model: Optional[str] = None
error: Optional[str] = None
class InferenceRequest(BaseModel):
model_id: str
prompt: str
max_tokens: int = 100
temperature: float = 0.7
class TrainingExample(BaseModel):
instruction: str
input: Optional[str] = ""
output: str
class UploadDataRequest(BaseModel):
examples: list[TrainingExample]
purpose: str = "fine-tune"
@app.post("/v1/fine-tuning/jobs")
async def create_fine_tuning_job(
request: CreateJobRequest,
background_tasks: BackgroundTasks
):
"""Create a fine-tuning job."""
job_id = f"ftjob-{uuid.uuid4().hex[:8]}"
job = {
"job_id": job_id,
"status": JobStatus.PENDING,
"base_model": request.base_model,
"training_file": request.training_file,
"validation_file": request.validation_file,
"hyperparameters": request.hyperparameters or {
"epochs": 3,
"learning_rate": 2e-5,
"batch_size": 4
},
"created_at": datetime.utcnow().isoformat(),
"finished_at": None,
"trained_model": None,
"error": None
}
fine_tuning_jobs[job_id] = job
# Start training in background
background_tasks.add_task(run_fine_tuning, job_id)
return JobResponse(**job)
async def run_fine_tuning(job_id: str):
"""Run fine-tuning job."""
job = fine_tuning_jobs[job_id]
job["status"] = JobStatus.RUNNING
try:
# Load base model
# model, tokenizer = load_model(job["base_model"])
# Load training data
# train_data = load_training_file(job["training_file"])
# Prepare dataset
# dataset = prepare_dataset(train_data, tokenizer)
# Train
# trainer = Trainer(model, tokenizer, config, dataset)
# trainer.train()
# Save model
model_id = f"ft:{job['base_model']}:{job_id}"
# trainer.save_checkpoint(model_id)
trained_models[model_id] = {
"base_model": job["base_model"],
"job_id": job_id,
"created_at": datetime.utcnow().isoformat()
}
job["status"] = JobStatus.COMPLETED
job["trained_model"] = model_id
job["finished_at"] = datetime.utcnow().isoformat()
except Exception as e:
job["status"] = JobStatus.FAILED
job["error"] = str(e)
job["finished_at"] = datetime.utcnow().isoformat()
@app.get("/v1/fine-tuning/jobs/{job_id}")
async def get_fine_tuning_job(job_id: str):
"""Get fine-tuning job status."""
if job_id not in fine_tuning_jobs:
raise HTTPException(404, "Job not found")
return JobResponse(**fine_tuning_jobs[job_id])
@app.get("/v1/fine-tuning/jobs")
async def list_fine_tuning_jobs(limit: int = 20):
"""List fine-tuning jobs."""
jobs = list(fine_tuning_jobs.values())[-limit:]
return {"data": [JobResponse(**j) for j in jobs]}
@app.post("/v1/fine-tuning/jobs/{job_id}/cancel")
async def cancel_fine_tuning_job(job_id: str):
"""Cancel a fine-tuning job."""
if job_id not in fine_tuning_jobs:
raise HTTPException(404, "Job not found")
job = fine_tuning_jobs[job_id]
if job["status"] not in [JobStatus.PENDING, JobStatus.RUNNING]:
raise HTTPException(400, "Job cannot be cancelled")
job["status"] = JobStatus.FAILED
job["error"] = "Cancelled by user"
job["finished_at"] = datetime.utcnow().isoformat()
return JobResponse(**job)
@app.post("/v1/files")
async def upload_training_file(request: UploadDataRequest):
"""Upload training data file."""
file_id = f"file-{uuid.uuid4().hex[:8]}"
# Convert to JSONL format
jsonl_content = "\n".join([
json.dumps({
"instruction": e.instruction,
"input": e.input,
"output": e.output
})
for e in request.examples
])
# Store file (would save to storage in production)
return {
"id": file_id,
"purpose": request.purpose,
"bytes": len(jsonl_content),
"created_at": datetime.utcnow().isoformat(),
"filename": f"{file_id}.jsonl"
}
@app.post("/v1/completions")
async def create_completion(request: InferenceRequest):
"""Generate completion with fine-tuned model."""
if request.model_id not in trained_models:
raise HTTPException(404, "Model not found")
# Load model and generate
# model = load_trained_model(request.model_id)
# response = model.generate(request.prompt, ...)
return {
"id": f"cmpl-{uuid.uuid4().hex[:8]}",
"model": request.model_id,
"choices": [
{
"text": "Generated response placeholder",
"index": 0,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 20,
"total_tokens": 30
}
}
@app.get("/v1/models")
async def list_models():
"""List available models."""
models = []
# Add fine-tuned models
for model_id, info in trained_models.items():
models.append({
"id": model_id,
"object": "model",
"created": info["created_at"],
"owned_by": "user"
})
return {"data": models}
@app.delete("/v1/models/{model_id}")
async def delete_model(model_id: str):
"""Delete a fine-tuned model."""
if model_id not in trained_models:
raise HTTPException(404, "Model not found")
del trained_models[model_id]
return {"id": model_id, "deleted": True}
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- LoRA Paper: https://arxiv.org/abs/2106.09685
- QLoRA Paper: https://arxiv.org/abs/2305.14314
- Hugging Face PEFT: https://huggingface.co/docs/peft
- OpenAI Fine-Tuning: https://platform.openai.com/docs/guides/fine-tuning
Conclusion
Fine-tuning embeds domain knowledge directly into model weights. Start with data preparation—quality matters more than quantity, so invest in cleaning and validating your training examples. Choose between full fine-tuning for maximum customization or parameter-efficient methods like LoRA for faster training and smaller storage. LoRA adds trainable low-rank matrices to frozen model weights, achieving comparable results with a fraction of the parameters. Implement proper training loops with gradient accumulation, learning rate scheduling, and regular evaluation checkpoints. Monitor perplexity on held-out data to detect overfitting early. For production, build APIs that manage fine-tuning jobs, track model versions, and serve inference from trained models. The key insight is that fine-tuning trades flexibility for consistency—a fine-tuned model reliably produces outputs in your desired format without elaborate prompting. Use fine-tuning when you have clear patterns you want the model to learn, sufficient training data, and requirements for consistent behavior that prompting alone cannot achieve.
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.