Introduction: Prompt optimization is the systematic process of improving prompts to achieve better LLM outputs—higher accuracy, more consistent formatting, reduced latency, and lower costs. Unlike ad-hoc prompt engineering, optimization treats prompts as artifacts that can be measured, tested, and iteratively improved. This guide covers the techniques that make prompts more effective: structural patterns that improve clarity, few-shot example selection strategies, automatic prompt optimization with DSPy and similar frameworks, A/B testing methodologies, and production prompt management. Whether you’re optimizing for quality, speed, or cost, these patterns will help you build prompts that perform reliably at scale.

Prompt Structure Patterns
from dataclasses import dataclass, field
from typing import Any, Optional
from abc import ABC, abstractmethod
from enum import Enum
class PromptSection(Enum):
"""Sections of a structured prompt."""
SYSTEM = "system"
CONTEXT = "context"
TASK = "task"
EXAMPLES = "examples"
CONSTRAINTS = "constraints"
OUTPUT_FORMAT = "output_format"
@dataclass
class PromptComponent:
"""A component of a prompt."""
section: PromptSection
content: str
priority: int = 0 # Higher = more important
class StructuredPrompt:
"""Build structured prompts."""
def __init__(self):
self.components: list[PromptComponent] = []
def add_system(self, content: str, priority: int = 100) -> 'StructuredPrompt':
"""Add system instruction."""
self.components.append(PromptComponent(
section=PromptSection.SYSTEM,
content=content,
priority=priority
))
return self
def add_context(self, content: str, priority: int = 80) -> 'StructuredPrompt':
"""Add context information."""
self.components.append(PromptComponent(
section=PromptSection.CONTEXT,
content=content,
priority=priority
))
return self
def add_task(self, content: str, priority: int = 90) -> 'StructuredPrompt':
"""Add task description."""
self.components.append(PromptComponent(
section=PromptSection.TASK,
content=content,
priority=priority
))
return self
def add_examples(self, examples: list[dict], priority: int = 70) -> 'StructuredPrompt':
"""Add few-shot examples."""
formatted = "\n\n".join(
f"Input: {ex['input']}\nOutput: {ex['output']}"
for ex in examples
)
self.components.append(PromptComponent(
section=PromptSection.EXAMPLES,
content=f"Examples:\n{formatted}",
priority=priority
))
return self
def add_constraints(self, constraints: list[str], priority: int = 85) -> 'StructuredPrompt':
"""Add constraints."""
formatted = "\n".join(f"- {c}" for c in constraints)
self.components.append(PromptComponent(
section=PromptSection.CONSTRAINTS,
content=f"Constraints:\n{formatted}",
priority=priority
))
return self
def add_output_format(self, format_spec: str, priority: int = 95) -> 'StructuredPrompt':
"""Add output format specification."""
self.components.append(PromptComponent(
section=PromptSection.OUTPUT_FORMAT,
content=f"Output format:\n{format_spec}",
priority=priority
))
return self
def build(self, max_tokens: int = None) -> str:
"""Build the prompt."""
# Sort by priority
sorted_components = sorted(
self.components,
key=lambda c: c.priority,
reverse=True
)
# Build prompt
sections = []
current_tokens = 0
for component in sorted_components:
component_tokens = len(component.content.split()) * 1.3 # Rough estimate
if max_tokens and current_tokens + component_tokens > max_tokens:
continue
sections.append(component.content)
current_tokens += component_tokens
return "\n\n".join(sections)
class PromptTemplate:
"""Template-based prompt construction."""
def __init__(self, template: str):
self.template = template
self.variables: set[str] = self._extract_variables()
def _extract_variables(self) -> set[str]:
"""Extract template variables."""
import re
return set(re.findall(r'\{(\w+)\}', self.template))
def format(self, **kwargs) -> str:
"""Format template with variables."""
missing = self.variables - set(kwargs.keys())
if missing:
raise ValueError(f"Missing variables: {missing}")
return self.template.format(**kwargs)
def partial(self, **kwargs) -> 'PromptTemplate':
"""Partially fill template."""
new_template = self.template
for key, value in kwargs.items():
new_template = new_template.replace(f"{{{key}}}", value)
return PromptTemplate(new_template)
class ChainOfThoughtPrompt:
"""Build chain-of-thought prompts."""
def __init__(self, task: str):
self.task = task
self.reasoning_steps: list[str] = []
def add_step(self, step: str) -> 'ChainOfThoughtPrompt':
"""Add reasoning step."""
self.reasoning_steps.append(step)
return self
def build(self) -> str:
"""Build CoT prompt."""
steps = "\n".join(
f"{i+1}. {step}"
for i, step in enumerate(self.reasoning_steps)
)
return f"""{self.task}
Let's think step by step:
{steps}
Now, apply this reasoning to solve the problem."""
class TreeOfThoughtPrompt:
"""Build tree-of-thought prompts."""
def __init__(self, task: str, num_branches: int = 3):
self.task = task
self.num_branches = num_branches
def build_exploration_prompt(self) -> str:
"""Build prompt for exploring options."""
return f"""{self.task}
Generate {self.num_branches} different approaches to solve this problem.
For each approach:
1. Describe the approach
2. List potential advantages
3. List potential challenges
Approaches:"""
def build_evaluation_prompt(self, approaches: list[str]) -> str:
"""Build prompt for evaluating approaches."""
formatted = "\n\n".join(
f"Approach {i+1}: {a}"
for i, a in enumerate(approaches)
)
return f"""Evaluate these approaches for solving: {self.task}
{formatted}
For each approach, rate on a scale of 1-10:
- Feasibility
- Effectiveness
- Efficiency
Then recommend the best approach and explain why."""
Few-Shot Example Selection
from dataclasses import dataclass, field
from typing import Any, Optional
from abc import ABC, abstractmethod
import numpy as np
@dataclass
class Example:
"""A few-shot example."""
input: str
output: str
embedding: np.ndarray = None
metadata: dict = field(default_factory=dict)
class ExampleSelector(ABC):
"""Abstract example selector."""
@abstractmethod
def select(self, query: str, k: int = 3) -> list[Example]:
"""Select examples for query."""
pass
class RandomSelector(ExampleSelector):
"""Random example selection."""
def __init__(self, examples: list[Example], seed: int = 42):
self.examples = examples
self.rng = np.random.RandomState(seed)
def select(self, query: str, k: int = 3) -> list[Example]:
"""Select random examples."""
indices = self.rng.choice(len(self.examples), size=min(k, len(self.examples)), replace=False)
return [self.examples[i] for i in indices]
class SemanticSelector(ExampleSelector):
"""Semantic similarity-based selection."""
def __init__(self, examples: list[Example], embedding_model: Any):
self.examples = examples
self.embedder = embedding_model
# Pre-compute embeddings
for ex in self.examples:
if ex.embedding is None:
ex.embedding = self.embedder.embed(ex.input).vector
def select(self, query: str, k: int = 3) -> list[Example]:
"""Select most similar examples."""
query_embedding = self.embedder.embed(query).vector
# Calculate similarities
similarities = []
for ex in self.examples:
sim = np.dot(query_embedding, ex.embedding) / (
np.linalg.norm(query_embedding) * np.linalg.norm(ex.embedding)
)
similarities.append(sim)
# Get top-k
top_indices = np.argsort(similarities)[-k:][::-1]
return [self.examples[i] for i in top_indices]
class DiversitySelector(ExampleSelector):
"""Select diverse examples."""
def __init__(self, examples: list[Example], embedding_model: Any):
self.examples = examples
self.embedder = embedding_model
for ex in self.examples:
if ex.embedding is None:
ex.embedding = self.embedder.embed(ex.input).vector
def select(self, query: str, k: int = 3) -> list[Example]:
"""Select diverse examples using MMR."""
query_embedding = self.embedder.embed(query).vector
# Calculate query similarities
query_sims = []
for ex in self.examples:
sim = np.dot(query_embedding, ex.embedding) / (
np.linalg.norm(query_embedding) * np.linalg.norm(ex.embedding)
)
query_sims.append(sim)
# MMR selection
selected = []
remaining = list(range(len(self.examples)))
for _ in range(k):
if not remaining:
break
best_idx = None
best_score = -float('inf')
for idx in remaining:
# Relevance to query
relevance = query_sims[idx]
# Max similarity to already selected
if selected:
max_sim = max(
np.dot(self.examples[idx].embedding, self.examples[s].embedding) / (
np.linalg.norm(self.examples[idx].embedding) *
np.linalg.norm(self.examples[s].embedding)
)
for s in selected
)
else:
max_sim = 0
# MMR score
lambda_param = 0.5
score = lambda_param * relevance - (1 - lambda_param) * max_sim
if score > best_score:
best_score = score
best_idx = idx
selected.append(best_idx)
remaining.remove(best_idx)
return [self.examples[i] for i in selected]
class CoverageSelector(ExampleSelector):
"""Select examples that cover different aspects."""
def __init__(self, examples: list[Example], categories: dict[str, list[int]]):
self.examples = examples
self.categories = categories # category -> example indices
def select(self, query: str, k: int = 3) -> list[Example]:
"""Select examples covering different categories."""
selected = []
categories_used = set()
# First, select one from each category
for category, indices in self.categories.items():
if len(selected) >= k:
break
if category not in categories_used and indices:
selected.append(self.examples[indices[0]])
categories_used.add(category)
# Fill remaining with random
remaining = [
ex for i, ex in enumerate(self.examples)
if ex not in selected
]
while len(selected) < k and remaining:
selected.append(remaining.pop(0))
return selected
class AdaptiveSelector(ExampleSelector):
"""Adapt selection based on task performance."""
def __init__(self, examples: list[Example]):
self.examples = examples
self.performance: dict[int, list[float]] = {i: [] for i in range(len(examples))}
def select(self, query: str, k: int = 3) -> list[Example]:
"""Select based on historical performance."""
# Calculate average performance
avg_performance = {}
for idx, scores in self.performance.items():
if scores:
avg_performance[idx] = sum(scores) / len(scores)
else:
avg_performance[idx] = 0.5 # Default
# Sort by performance
sorted_indices = sorted(avg_performance.keys(), key=lambda i: avg_performance[i], reverse=True)
return [self.examples[i] for i in sorted_indices[:k]]
def record_performance(self, example_indices: list[int], score: float):
"""Record performance for examples."""
for idx in example_indices:
self.performance[idx].append(score)
Automatic Prompt Optimization
from dataclasses import dataclass, field
from typing import Any, Optional, Callable
from abc import ABC, abstractmethod
@dataclass
class OptimizationResult:
"""Result of prompt optimization."""
original_prompt: str
optimized_prompt: str
original_score: float
optimized_score: float
iterations: int
history: list[dict] = field(default_factory=list)
class PromptOptimizer(ABC):
"""Abstract prompt optimizer."""
@abstractmethod
async def optimize(
self,
prompt: str,
eval_fn: Callable,
max_iterations: int = 10
) -> OptimizationResult:
"""Optimize prompt."""
pass
class GradientFreeOptimizer(PromptOptimizer):
"""Optimize prompts without gradients."""
def __init__(self, llm_client: Any):
self.llm = llm_client
async def optimize(
self,
prompt: str,
eval_fn: Callable,
max_iterations: int = 10
) -> OptimizationResult:
"""Optimize using LLM-based refinement."""
current_prompt = prompt
current_score = await eval_fn(current_prompt)
history = [{"prompt": current_prompt, "score": current_score}]
for i in range(max_iterations):
# Generate variations
variations = await self._generate_variations(current_prompt)
# Evaluate variations
best_variation = current_prompt
best_score = current_score
for variation in variations:
score = await eval_fn(variation)
if score > best_score:
best_variation = variation
best_score = score
# Update if improved
if best_score > current_score:
current_prompt = best_variation
current_score = best_score
history.append({"prompt": current_prompt, "score": current_score})
else:
# No improvement, try different strategy
current_prompt = await self._refine_prompt(current_prompt, history)
current_score = await eval_fn(current_prompt)
history.append({"prompt": current_prompt, "score": current_score})
return OptimizationResult(
original_prompt=prompt,
optimized_prompt=current_prompt,
original_score=history[0]["score"],
optimized_score=current_score,
iterations=len(history) - 1,
history=history
)
async def _generate_variations(self, prompt: str) -> list[str]:
"""Generate prompt variations."""
meta_prompt = f"""Generate 3 variations of this prompt that might perform better.
Keep the core task the same but try different:
- Wording and phrasing
- Structure and organization
- Level of detail
- Tone and style
Original prompt:
{prompt}
Variations (separated by ---):"""
response = await self.llm.complete(meta_prompt)
variations = response.content.split('---')
return [v.strip() for v in variations if v.strip()]
async def _refine_prompt(self, prompt: str, history: list[dict]) -> str:
"""Refine prompt based on history."""
history_text = "\n".join(
f"Score {h['score']:.2f}: {h['prompt'][:100]}..."
for h in history[-3:]
)
meta_prompt = f"""Analyze these prompt attempts and their scores, then create an improved version.
History:
{history_text}
Current prompt:
{prompt}
Create an improved prompt that addresses weaknesses in previous attempts:"""
response = await self.llm.complete(meta_prompt)
return response.content.strip()
class DSPyOptimizer(PromptOptimizer):
"""DSPy-style prompt optimization."""
def __init__(self, llm_client: Any):
self.llm = llm_client
async def optimize(
self,
prompt: str,
eval_fn: Callable,
max_iterations: int = 10,
train_examples: list[dict] = None
) -> OptimizationResult:
"""Optimize using DSPy-style compilation."""
# Extract signature from prompt
signature = self._extract_signature(prompt)
# Bootstrap examples
if train_examples:
bootstrapped = await self._bootstrap_examples(signature, train_examples)
else:
bootstrapped = []
# Optimize instruction
best_instruction = await self._optimize_instruction(
signature,
bootstrapped,
eval_fn,
max_iterations
)
# Build optimized prompt
optimized = self._build_prompt(signature, best_instruction, bootstrapped)
original_score = await eval_fn(prompt)
optimized_score = await eval_fn(optimized)
return OptimizationResult(
original_prompt=prompt,
optimized_prompt=optimized,
original_score=original_score,
optimized_score=optimized_score,
iterations=max_iterations,
history=[]
)
def _extract_signature(self, prompt: str) -> dict:
"""Extract input/output signature from prompt."""
# Simplified extraction
return {
"inputs": ["input"],
"outputs": ["output"],
"task": prompt
}
async def _bootstrap_examples(
self,
signature: dict,
train_examples: list[dict]
) -> list[dict]:
"""Bootstrap high-quality examples."""
bootstrapped = []
for example in train_examples[:10]:
# Generate reasoning trace
prompt = f"""Task: {signature['task']}
Input: {example['input']}
Think step by step to produce the output:"""
response = await self.llm.complete(prompt)
bootstrapped.append({
"input": example["input"],
"reasoning": response.content,
"output": example.get("output", "")
})
return bootstrapped
async def _optimize_instruction(
self,
signature: dict,
examples: list[dict],
eval_fn: Callable,
max_iterations: int
) -> str:
"""Optimize the instruction."""
current_instruction = signature["task"]
best_score = 0
for _ in range(max_iterations):
# Generate instruction candidates
candidates = await self._generate_instructions(current_instruction, examples)
# Evaluate each
for candidate in candidates:
test_prompt = self._build_prompt(signature, candidate, examples)
score = await eval_fn(test_prompt)
if score > best_score:
best_score = score
current_instruction = candidate
return current_instruction
async def _generate_instructions(
self,
current: str,
examples: list[dict]
) -> list[str]:
"""Generate instruction candidates."""
prompt = f"""Generate 3 improved versions of this instruction.
Current instruction: {current}
Example inputs/outputs:
{examples[:2]}
Improved instructions (one per line):"""
response = await self.llm.complete(prompt)
return [line.strip() for line in response.content.split('\n') if line.strip()]
def _build_prompt(
self,
signature: dict,
instruction: str,
examples: list[dict]
) -> str:
"""Build prompt from components."""
examples_text = "\n\n".join(
f"Input: {ex['input']}\nOutput: {ex['output']}"
for ex in examples[:3]
)
return f"""{instruction}
Examples:
{examples_text}
Now process the following:
Input: {{input}}
Output:"""
class EvolutionaryOptimizer(PromptOptimizer):
"""Evolutionary prompt optimization."""
def __init__(self, llm_client: Any, population_size: int = 10):
self.llm = llm_client
self.population_size = population_size
async def optimize(
self,
prompt: str,
eval_fn: Callable,
max_iterations: int = 10
) -> OptimizationResult:
"""Optimize using evolutionary algorithm."""
# Initialize population
population = [prompt]
for _ in range(self.population_size - 1):
mutated = await self._mutate(prompt)
population.append(mutated)
history = []
for generation in range(max_iterations):
# Evaluate fitness
fitness = []
for individual in population:
score = await eval_fn(individual)
fitness.append((individual, score))
# Sort by fitness
fitness.sort(key=lambda x: x[1], reverse=True)
# Record best
history.append({
"generation": generation,
"best_score": fitness[0][1],
"best_prompt": fitness[0][0]
})
# Selection (top half)
survivors = [f[0] for f in fitness[:self.population_size // 2]]
# Reproduction
new_population = survivors.copy()
while len(new_population) < self.population_size:
# Crossover
parent1, parent2 = np.random.choice(survivors, 2, replace=False)
child = await self._crossover(parent1, parent2)
# Mutation
if np.random.random() < 0.3:
child = await self._mutate(child)
new_population.append(child)
population = new_population
# Return best
best = max(history, key=lambda h: h["best_score"])
return OptimizationResult(
original_prompt=prompt,
optimized_prompt=best["best_prompt"],
original_score=history[0]["best_score"],
optimized_score=best["best_score"],
iterations=max_iterations,
history=history
)
async def _mutate(self, prompt: str) -> str:
"""Mutate a prompt."""
mutation_prompt = f"""Slightly modify this prompt while keeping its core meaning.
Make one small change (word choice, structure, or detail level).
Original: {prompt}
Modified:"""
response = await self.llm.complete(mutation_prompt)
return response.content.strip()
async def _crossover(self, parent1: str, parent2: str) -> str:
"""Crossover two prompts."""
crossover_prompt = f"""Combine the best elements of these two prompts into one.
Prompt 1: {parent1}
Prompt 2: {parent2}
Combined prompt:"""
response = await self.llm.complete(crossover_prompt)
return response.content.strip()
A/B Testing Framework
from dataclasses import dataclass, field
from typing import Any, Optional, Callable
from datetime import datetime
import numpy as np
from scipy import stats
@dataclass
class Variant:
"""A prompt variant."""
id: str
prompt: str
weight: float = 1.0
@dataclass
class ExperimentResult:
"""Result of an A/B test."""
variant_id: str
input: str
output: str
metrics: dict
timestamp: datetime = field(default_factory=datetime.now)
class ABTest:
"""A/B test for prompts."""
def __init__(
self,
name: str,
variants: list[Variant],
metrics: list[str]
):
self.name = name
self.variants = {v.id: v for v in variants}
self.metrics = metrics
self.results: list[ExperimentResult] = []
# Normalize weights
total_weight = sum(v.weight for v in variants)
self.weights = {v.id: v.weight / total_weight for v in variants}
def select_variant(self) -> Variant:
"""Select variant based on weights."""
ids = list(self.weights.keys())
weights = [self.weights[id] for id in ids]
selected_id = np.random.choice(ids, p=weights)
return self.variants[selected_id]
def record_result(
self,
variant_id: str,
input: str,
output: str,
metrics: dict
):
"""Record experiment result."""
self.results.append(ExperimentResult(
variant_id=variant_id,
input=input,
output=output,
metrics=metrics
))
def analyze(self) -> dict:
"""Analyze experiment results."""
analysis = {}
for metric in self.metrics:
metric_analysis = {}
for variant_id in self.variants:
values = [
r.metrics.get(metric, 0)
for r in self.results
if r.variant_id == variant_id
]
if values:
metric_analysis[variant_id] = {
"mean": np.mean(values),
"std": np.std(values),
"count": len(values)
}
# Statistical significance
if len(metric_analysis) == 2:
ids = list(metric_analysis.keys())
values1 = [r.metrics.get(metric, 0) for r in self.results if r.variant_id == ids[0]]
values2 = [r.metrics.get(metric, 0) for r in self.results if r.variant_id == ids[1]]
if len(values1) > 1 and len(values2) > 1:
t_stat, p_value = stats.ttest_ind(values1, values2)
metric_analysis["significance"] = {
"t_statistic": t_stat,
"p_value": p_value,
"significant": p_value < 0.05
}
analysis[metric] = metric_analysis
return analysis
def get_winner(self, metric: str) -> Optional[str]:
"""Get winning variant for metric."""
analysis = self.analyze()
if metric not in analysis:
return None
metric_data = analysis[metric]
# Check significance
if "significance" in metric_data and not metric_data["significance"]["significant"]:
return None # No significant winner
# Find best
best_id = None
best_mean = -float('inf')
for variant_id, data in metric_data.items():
if variant_id == "significance":
continue
if data["mean"] > best_mean:
best_mean = data["mean"]
best_id = variant_id
return best_id
class MultiArmedBandit:
"""Multi-armed bandit for prompt selection."""
def __init__(self, variants: list[Variant], epsilon: float = 0.1):
self.variants = {v.id: v for v in variants}
self.epsilon = epsilon
self.rewards: dict[str, list[float]] = {v.id: [] for v in variants}
def select_variant(self) -> Variant:
"""Select using epsilon-greedy."""
if np.random.random() < self.epsilon:
# Explore
return np.random.choice(list(self.variants.values()))
# Exploit
best_id = None
best_mean = -float('inf')
for variant_id, rewards in self.rewards.items():
mean = np.mean(rewards) if rewards else 0
if mean > best_mean:
best_mean = mean
best_id = variant_id
return self.variants[best_id]
def record_reward(self, variant_id: str, reward: float):
"""Record reward for variant."""
self.rewards[variant_id].append(reward)
class ThompsonSampling:
"""Thompson sampling for prompt selection."""
def __init__(self, variants: list[Variant]):
self.variants = {v.id: v for v in variants}
# Beta distribution parameters
self.alpha: dict[str, float] = {v.id: 1.0 for v in variants}
self.beta: dict[str, float] = {v.id: 1.0 for v in variants}
def select_variant(self) -> Variant:
"""Select using Thompson sampling."""
samples = {}
for variant_id in self.variants:
samples[variant_id] = np.random.beta(
self.alpha[variant_id],
self.beta[variant_id]
)
best_id = max(samples, key=samples.get)
return self.variants[best_id]
def record_result(self, variant_id: str, success: bool):
"""Record result (success/failure)."""
if success:
self.alpha[variant_id] += 1
else:
self.beta[variant_id] += 1
Production Prompt Management
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Any
from datetime import datetime
import uuid
app = FastAPI()
class PromptVersion:
"""A versioned prompt."""
def __init__(
self,
prompt_id: str,
version: int,
content: str,
metadata: dict = None
):
self.prompt_id = prompt_id
self.version = version
self.content = content
self.metadata = metadata or {}
self.created_at = datetime.now()
self.is_active = False
class PromptRegistry:
"""Registry for managing prompts."""
def __init__(self):
self.prompts: dict[str, dict[int, PromptVersion]] = {}
self.active_versions: dict[str, int] = {}
def register(
self,
prompt_id: str,
content: str,
metadata: dict = None
) -> PromptVersion:
"""Register new prompt version."""
if prompt_id not in self.prompts:
self.prompts[prompt_id] = {}
version = len(self.prompts[prompt_id]) + 1
prompt_version = PromptVersion(
prompt_id=prompt_id,
version=version,
content=content,
metadata=metadata
)
self.prompts[prompt_id][version] = prompt_version
# Auto-activate if first version
if version == 1:
self.activate(prompt_id, version)
return prompt_version
def activate(self, prompt_id: str, version: int):
"""Activate a prompt version."""
if prompt_id not in self.prompts:
raise ValueError(f"Unknown prompt: {prompt_id}")
if version not in self.prompts[prompt_id]:
raise ValueError(f"Unknown version: {version}")
# Deactivate current
if prompt_id in self.active_versions:
current = self.active_versions[prompt_id]
self.prompts[prompt_id][current].is_active = False
# Activate new
self.prompts[prompt_id][version].is_active = True
self.active_versions[prompt_id] = version
def get_active(self, prompt_id: str) -> Optional[PromptVersion]:
"""Get active version of prompt."""
if prompt_id not in self.active_versions:
return None
version = self.active_versions[prompt_id]
return self.prompts[prompt_id][version]
def get_version(self, prompt_id: str, version: int) -> Optional[PromptVersion]:
"""Get specific version."""
if prompt_id not in self.prompts:
return None
return self.prompts[prompt_id].get(version)
def list_versions(self, prompt_id: str) -> list[PromptVersion]:
"""List all versions of a prompt."""
if prompt_id not in self.prompts:
return []
return list(self.prompts[prompt_id].values())
# Global registry
registry = PromptRegistry()
class RegisterRequest(BaseModel):
prompt_id: str
content: str
metadata: Optional[dict] = None
class ActivateRequest(BaseModel):
prompt_id: str
version: int
class PromptResponse(BaseModel):
prompt_id: str
version: int
content: str
is_active: bool
created_at: str
@app.post("/v1/prompts")
async def register_prompt(request: RegisterRequest) -> PromptResponse:
"""Register new prompt version."""
version = registry.register(
prompt_id=request.prompt_id,
content=request.content,
metadata=request.metadata
)
return PromptResponse(
prompt_id=version.prompt_id,
version=version.version,
content=version.content,
is_active=version.is_active,
created_at=version.created_at.isoformat()
)
@app.post("/v1/prompts/activate")
async def activate_prompt(request: ActivateRequest) -> dict:
"""Activate prompt version."""
try:
registry.activate(request.prompt_id, request.version)
return {"status": "activated"}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.get("/v1/prompts/{prompt_id}")
async def get_prompt(prompt_id: str) -> PromptResponse:
"""Get active prompt."""
version = registry.get_active(prompt_id)
if not version:
raise HTTPException(status_code=404, detail="Prompt not found")
return PromptResponse(
prompt_id=version.prompt_id,
version=version.version,
content=version.content,
is_active=version.is_active,
created_at=version.created_at.isoformat()
)
@app.get("/v1/prompts/{prompt_id}/versions")
async def list_versions(prompt_id: str) -> list[PromptResponse]:
"""List all versions."""
versions = registry.list_versions(prompt_id)
return [
PromptResponse(
prompt_id=v.prompt_id,
version=v.version,
content=v.content,
is_active=v.is_active,
created_at=v.created_at.isoformat()
)
for v in versions
]
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- DSPy Framework: https://github.com/stanfordnlp/dspy
- Prompt Engineering Guide: https://www.promptingguide.ai/
- Chain-of-Thought Paper: https://arxiv.org/abs/2201.11903
- Tree of Thoughts Paper: https://arxiv.org/abs/2305.10601
Conclusion
Prompt optimization transforms prompt engineering from art to science. Start with structured prompts—clear sections for system instructions, context, task, examples, constraints, and output format make prompts easier to understand and modify. Few-shot example selection matters more than example count; semantic similarity and diversity-based selection outperform random selection. Automatic optimization with frameworks like DSPy can discover prompts that outperform hand-crafted ones, especially when you have evaluation data. A/B testing is essential for production; use multi-armed bandits or Thompson sampling to balance exploration and exploitation. Version your prompts like code—track changes, maintain rollback capability, and associate prompts with their performance metrics. Monitor prompt performance continuously; model updates and data drift can degrade prompt effectiveness over time. The key insight is that prompts are not static artifacts—they should evolve based on measured performance, and the infrastructure for testing and deploying prompt changes is as important as the prompts themselves.
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.