Introduction: LLMs generate text, but applications need structured data—JSON objects, database records, API payloads. Getting reliable structured output from language models requires more than asking nicely in the prompt. This guide covers practical techniques for structured generation: defining schemas with Pydantic or JSON Schema, using constrained decoding to guarantee valid output, implementing retry logic with validation, and leveraging native structured output features in modern APIs. Whether you’re extracting entities from documents, generating function call arguments, or building data pipelines powered by LLMs, structured output techniques ensure your application receives the exact format it expects, every time.

Schema Definition
from dataclasses import dataclass, field
from typing import Any, Optional, Union, Literal
from enum import Enum
from pydantic import BaseModel, Field, validator
import json
class Priority(str, Enum):
"""Task priority levels."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class ExtractedEntity(BaseModel):
"""An extracted entity."""
name: str = Field(..., description="Entity name")
entity_type: str = Field(..., description="Type of entity (person, org, location)")
confidence: float = Field(..., ge=0, le=1, description="Confidence score 0-1")
@validator("entity_type")
def validate_type(cls, v):
allowed = {"person", "organization", "location", "product", "event"}
if v.lower() not in allowed:
raise ValueError(f"entity_type must be one of {allowed}")
return v.lower()
class TaskExtraction(BaseModel):
"""Extracted task from text."""
title: str = Field(..., min_length=1, max_length=200)
description: Optional[str] = Field(None, max_length=1000)
priority: Priority = Field(default=Priority.MEDIUM)
due_date: Optional[str] = Field(None, pattern=r"^\d{4}-\d{2}-\d{2}$")
assignee: Optional[str] = None
tags: list[str] = Field(default_factory=list)
class SentimentAnalysis(BaseModel):
"""Sentiment analysis result."""
sentiment: Literal["positive", "negative", "neutral", "mixed"]
confidence: float = Field(..., ge=0, le=1)
aspects: list[dict] = Field(default_factory=list)
class Config:
schema_extra = {
"example": {
"sentiment": "positive",
"confidence": 0.92,
"aspects": [
{"aspect": "quality", "sentiment": "positive"},
{"aspect": "price", "sentiment": "negative"}
]
}
}
class SchemaGenerator:
"""Generate JSON Schema from Pydantic models."""
@staticmethod
def to_json_schema(model: type[BaseModel]) -> dict:
"""Convert Pydantic model to JSON Schema."""
return model.schema()
@staticmethod
def to_openai_schema(model: type[BaseModel]) -> dict:
"""Convert to OpenAI function calling schema."""
schema = model.schema()
return {
"name": model.__name__,
"description": model.__doc__ or f"Extract {model.__name__}",
"parameters": {
"type": "object",
"properties": schema.get("properties", {}),
"required": schema.get("required", [])
}
}
@staticmethod
def to_prompt_schema(model: type[BaseModel]) -> str:
"""Convert to human-readable schema for prompts."""
schema = model.schema()
properties = schema.get("properties", {})
required = set(schema.get("required", []))
lines = ["Expected JSON format:"]
lines.append("{")
for name, prop in properties.items():
req_marker = " (required)" if name in required else " (optional)"
prop_type = prop.get("type", "any")
description = prop.get("description", "")
if "enum" in prop:
prop_type = f"one of {prop['enum']}"
lines.append(f' "{name}": {prop_type}{req_marker} - {description}')
lines.append("}")
return "\n".join(lines)
class DynamicSchema:
"""Build schemas dynamically."""
@staticmethod
def from_dict(schema_dict: dict) -> type[BaseModel]:
"""Create Pydantic model from dictionary."""
from pydantic import create_model
fields = {}
for name, config in schema_dict.items():
field_type = config.get("type", str)
default = config.get("default", ...)
description = config.get("description", "")
# Map type strings to Python types
type_map = {
"string": str,
"integer": int,
"number": float,
"boolean": bool,
"array": list,
"object": dict
}
if isinstance(field_type, str):
field_type = type_map.get(field_type, str)
fields[name] = (field_type, Field(default, description=description))
return create_model("DynamicModel", **fields)
@staticmethod
def from_example(example: dict) -> type[BaseModel]:
"""Infer schema from example JSON."""
from pydantic import create_model
fields = {}
for name, value in example.items():
field_type = type(value)
if field_type == list and value:
# Infer list element type
elem_type = type(value[0])
field_type = list[elem_type]
fields[name] = (field_type, ...)
return create_model("InferredModel", **fields)
Constrained Generation
from dataclasses import dataclass
from typing import Any, Optional
import json
import re
class JSONConstrainer:
"""Constrain LLM output to valid JSON."""
def __init__(self, schema: dict = None):
self.schema = schema
def extract_json(self, text: str) -> Optional[dict]:
"""Extract JSON from LLM output."""
# Try direct parse
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try to find JSON in text
patterns = [
r'```json\s*([\s\S]*?)\s*```', # Markdown code block
r'```\s*([\s\S]*?)\s*```', # Generic code block
r'\{[\s\S]*\}', # Raw JSON object
r'\[[\s\S]*\]', # Raw JSON array
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
try:
json_str = match.group(1) if '```' in pattern else match.group(0)
return json.loads(json_str)
except (json.JSONDecodeError, IndexError):
continue
return None
def repair_json(self, text: str) -> Optional[dict]:
"""Attempt to repair malformed JSON."""
# Common fixes
repairs = [
# Add missing quotes to keys
(r'(\{|\,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":'),
# Fix single quotes
(r"'([^']*)'", r'"\1"'),
# Remove trailing commas
(r',\s*([\}\]])', r'\1'),
# Fix unquoted string values
(r':\s*([a-zA-Z][a-zA-Z0-9_]*)\s*(,|\})', r':"\1"\2'),
]
repaired = text
for pattern, replacement in repairs:
repaired = re.sub(pattern, replacement, repaired)
try:
return json.loads(repaired)
except json.JSONDecodeError:
return None
def validate_schema(self, data: dict) -> tuple[bool, list[str]]:
"""Validate data against schema."""
if not self.schema:
return True, []
errors = []
# Check required fields
required = self.schema.get("required", [])
for field in required:
if field not in data:
errors.append(f"Missing required field: {field}")
# Check types
properties = self.schema.get("properties", {})
for field, value in data.items():
if field in properties:
expected_type = properties[field].get("type")
if not self._check_type(value, expected_type):
errors.append(f"Field {field} has wrong type")
return len(errors) == 0, errors
def _check_type(self, value: Any, expected: str) -> bool:
"""Check if value matches expected type."""
type_map = {
"string": str,
"integer": int,
"number": (int, float),
"boolean": bool,
"array": list,
"object": dict,
"null": type(None)
}
expected_types = type_map.get(expected)
if expected_types:
return isinstance(value, expected_types)
return True
class GrammarConstrainer:
"""Constrain generation using grammar."""
def __init__(self, grammar: str):
self.grammar = grammar
def build_json_grammar(self, schema: dict) -> str:
"""Build GBNF grammar from JSON schema."""
rules = []
rules.append('root ::= object')
rules.append('object ::= "{" ws members ws "}"')
rules.append('members ::= pair ("," ws pair)*')
rules.append('pair ::= string ":" ws value')
rules.append('value ::= string | number | object | array | "true" | "false" | "null"')
rules.append('array ::= "[" ws (value ("," ws value)*)? ws "]"')
rules.append('string ::= "\\"" [^"\\\\]* "\\""')
rules.append('number ::= "-"? [0-9]+ ("." [0-9]+)?')
rules.append('ws ::= [ \\t\\n]*')
return "\n".join(rules)
def constrain_tokens(
self,
current_text: str,
allowed_tokens: list[str]
) -> list[str]:
"""Filter tokens based on grammar state."""
# Parse current state
state = self._parse_state(current_text)
# Filter tokens
valid_tokens = []
for token in allowed_tokens:
if self._is_valid_next(state, token):
valid_tokens.append(token)
return valid_tokens
def _parse_state(self, text: str) -> dict:
"""Parse current grammar state."""
# Track JSON structure
depth = 0
in_string = False
expecting = "value"
for char in text:
if char == '"' and (not text or text[-1] != '\\'):
in_string = not in_string
elif not in_string:
if char == '{':
depth += 1
expecting = "key"
elif char == '}':
depth -= 1
elif char == ':':
expecting = "value"
elif char == ',':
expecting = "key" if depth > 0 else "value"
return {
"depth": depth,
"in_string": in_string,
"expecting": expecting
}
def _is_valid_next(self, state: dict, token: str) -> bool:
"""Check if token is valid given state."""
if state["in_string"]:
return True # Most tokens valid in strings
if state["expecting"] == "key":
return token.startswith('"') or token in ['}', ' ', '\n']
if state["expecting"] == "value":
return token in ['"', '{', '[', 't', 'f', 'n', '-'] or token.isdigit()
return True
LLM Structured Output
from dataclasses import dataclass
from typing import Any, Optional, TypeVar, Generic
from pydantic import BaseModel
import json
T = TypeVar('T', bound=BaseModel)
class StructuredLLM(Generic[T]):
"""LLM wrapper for structured output."""
def __init__(
self,
client: Any,
model: str = "gpt-4o",
max_retries: int = 3
):
self.client = client
self.model = model
self.max_retries = max_retries
self.constrainer = JSONConstrainer()
async def generate(
self,
prompt: str,
output_schema: type[T],
system_prompt: str = None
) -> T:
"""Generate structured output."""
# Build schema prompt
schema_prompt = SchemaGenerator.to_prompt_schema(output_schema)
full_prompt = f"""{prompt}
{schema_prompt}
Return only valid JSON matching this schema."""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": full_prompt})
for attempt in range(self.max_retries):
response = await self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=0 if attempt == 0 else 0.1
)
content = response.choices[0].message.content
# Extract and validate JSON
data = self.constrainer.extract_json(content)
if data is None:
data = self.constrainer.repair_json(content)
if data:
try:
return output_schema(**data)
except Exception as e:
# Add error to messages for retry
messages.append({
"role": "assistant",
"content": content
})
messages.append({
"role": "user",
"content": f"Invalid output: {e}. Please fix and return valid JSON."
})
raise ValueError(f"Failed to generate valid {output_schema.__name__}")
async def generate_with_functions(
self,
prompt: str,
output_schema: type[T]
) -> T:
"""Generate using function calling."""
function_schema = SchemaGenerator.to_openai_schema(output_schema)
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
functions=[function_schema],
function_call={"name": function_schema["name"]}
)
function_call = response.choices[0].message.function_call
data = json.loads(function_call.arguments)
return output_schema(**data)
async def generate_with_response_format(
self,
prompt: str,
output_schema: type[T]
) -> T:
"""Generate using response_format (OpenAI)."""
schema = output_schema.schema()
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
response_format={
"type": "json_schema",
"json_schema": {
"name": output_schema.__name__,
"schema": schema
}
}
)
data = json.loads(response.choices[0].message.content)
return output_schema(**data)
class InstructorWrapper:
"""Instructor-style structured output."""
def __init__(self, client: Any, model: str = "gpt-4o"):
self.client = client
self.model = model
async def create(
self,
response_model: type[T],
messages: list[dict],
max_retries: int = 3
) -> T:
"""Create structured response."""
# Add schema to system message
schema_text = json.dumps(response_model.schema(), indent=2)
system_msg = f"""You must respond with valid JSON matching this schema:
{schema_text}
Return only the JSON object, no other text."""
full_messages = [{"role": "system", "content": system_msg}] + messages
for attempt in range(max_retries):
response = await self.client.chat.completions.create(
model=self.model,
messages=full_messages,
temperature=0
)
content = response.choices[0].message.content
try:
# Extract JSON
if content.startswith("```"):
content = content.split("```")[1]
if content.startswith("json"):
content = content[4:]
data = json.loads(content.strip())
return response_model(**data)
except Exception as e:
if attempt < max_retries - 1:
full_messages.append({
"role": "assistant",
"content": content
})
full_messages.append({
"role": "user",
"content": f"Error: {e}. Please return valid JSON."
})
raise ValueError("Failed to generate valid response")
Validation and Retry
from dataclasses import dataclass
from typing import Any, Optional, Callable
from enum import Enum
class ValidationStrategy(Enum):
"""Validation strategies."""
STRICT = "strict" # Fail on any error
LENIENT = "lenient" # Try to fix errors
PARTIAL = "partial" # Return partial results
@dataclass
class ValidationResult:
"""Result of validation."""
is_valid: bool
data: Any
errors: list[str]
warnings: list[str]
class OutputValidator:
"""Validate and fix LLM output."""
def __init__(
self,
schema: type[BaseModel],
strategy: ValidationStrategy = ValidationStrategy.LENIENT
):
self.schema = schema
self.strategy = strategy
def validate(self, data: dict) -> ValidationResult:
"""Validate data against schema."""
errors = []
warnings = []
fixed_data = data.copy()
# Get schema info
schema_dict = self.schema.schema()
properties = schema_dict.get("properties", {})
required = set(schema_dict.get("required", []))
# Check required fields
for field in required:
if field not in data:
if self.strategy == ValidationStrategy.STRICT:
errors.append(f"Missing required field: {field}")
else:
# Try to add default
default = self._get_default(properties.get(field, {}))
if default is not None:
fixed_data[field] = default
warnings.append(f"Added default for {field}")
else:
errors.append(f"Missing required field: {field}")
# Validate types
for field, value in list(fixed_data.items()):
if field in properties:
prop = properties[field]
fixed_value, field_errors = self._validate_field(
field, value, prop
)
if fixed_value != value:
fixed_data[field] = fixed_value
warnings.append(f"Fixed field {field}")
errors.extend(field_errors)
# Try to create model
try:
validated = self.schema(**fixed_data)
return ValidationResult(
is_valid=True,
data=validated,
errors=[],
warnings=warnings
)
except Exception as e:
errors.append(str(e))
return ValidationResult(
is_valid=False,
data=fixed_data,
errors=errors,
warnings=warnings
)
def _get_default(self, prop: dict) -> Any:
"""Get default value for property."""
if "default" in prop:
return prop["default"]
type_defaults = {
"string": "",
"integer": 0,
"number": 0.0,
"boolean": False,
"array": [],
"object": {}
}
return type_defaults.get(prop.get("type"))
def _validate_field(
self,
name: str,
value: Any,
prop: dict
) -> tuple[Any, list[str]]:
"""Validate and fix a field."""
errors = []
expected_type = prop.get("type")
# Type coercion
if expected_type == "string" and not isinstance(value, str):
value = str(value)
elif expected_type == "integer" and not isinstance(value, int):
try:
value = int(value)
except (ValueError, TypeError):
errors.append(f"{name} must be integer")
elif expected_type == "number" and not isinstance(value, (int, float)):
try:
value = float(value)
except (ValueError, TypeError):
errors.append(f"{name} must be number")
elif expected_type == "boolean" and not isinstance(value, bool):
value = bool(value)
# Enum validation
if "enum" in prop and value not in prop["enum"]:
if self.strategy != ValidationStrategy.STRICT:
# Try to match case-insensitively
for enum_val in prop["enum"]:
if str(value).lower() == str(enum_val).lower():
value = enum_val
break
else:
errors.append(f"{name} must be one of {prop['enum']}")
else:
errors.append(f"{name} must be one of {prop['enum']}")
# Range validation
if "minimum" in prop and isinstance(value, (int, float)):
if value < prop["minimum"]:
value = prop["minimum"]
if "maximum" in prop and isinstance(value, (int, float)):
if value > prop["maximum"]:
value = prop["maximum"]
return value, errors
class RetryHandler:
"""Handle retries for structured output."""
def __init__(
self,
max_retries: int = 3,
validator: OutputValidator = None
):
self.max_retries = max_retries
self.validator = validator
async def execute_with_retry(
self,
generate_fn: Callable,
prompt: str,
schema: type[BaseModel]
) -> BaseModel:
"""Execute with retry logic."""
errors_history = []
for attempt in range(self.max_retries):
try:
# Generate
result = await generate_fn(prompt)
# Validate
if self.validator:
validation = self.validator.validate(result)
if validation.is_valid:
return validation.data
errors_history.extend(validation.errors)
# Modify prompt with errors
prompt = self._add_error_context(
prompt, result, validation.errors
)
else:
return schema(**result)
except Exception as e:
errors_history.append(str(e))
prompt = self._add_error_context(prompt, None, [str(e)])
raise ValueError(
f"Failed after {self.max_retries} attempts. Errors: {errors_history}"
)
def _add_error_context(
self,
prompt: str,
result: Any,
errors: list[str]
) -> str:
"""Add error context to prompt."""
error_text = "\n".join(f"- {e}" for e in errors)
return f"""{prompt}
Previous attempt had errors:
{error_text}
Please fix these issues and return valid JSON."""
Production Structured Output Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import Optional, Any
from enum import Enum
import json
app = FastAPI()
# Initialize components
# structured_llm = StructuredLLM(client)
class OutputFormat(str, Enum):
JSON = "json"
PYDANTIC = "pydantic"
FUNCTION_CALL = "function_call"
class GenerateRequest(BaseModel):
prompt: str
schema: dict = Field(..., description="JSON Schema for output")
format: OutputFormat = OutputFormat.JSON
max_retries: int = 3
strict: bool = False
class GenerateResponse(BaseModel):
data: dict
is_valid: bool
errors: list[str]
attempts: int
class ExtractEntitiesRequest(BaseModel):
text: str
entity_types: list[str] = ["person", "organization", "location"]
class ExtractedEntitiesResponse(BaseModel):
entities: list[dict]
text_length: int
class ExtractTasksRequest(BaseModel):
text: str
include_metadata: bool = True
class ExtractedTasksResponse(BaseModel):
tasks: list[dict]
count: int
@app.post("/v1/generate")
async def generate_structured(request: GenerateRequest) -> GenerateResponse:
"""Generate structured output."""
# Create dynamic schema
schema_model = DynamicSchema.from_dict(request.schema)
# Create validator
strategy = ValidationStrategy.STRICT if request.strict else ValidationStrategy.LENIENT
validator = OutputValidator(schema_model, strategy)
attempts = 0
errors = []
for attempt in range(request.max_retries):
attempts += 1
try:
# Generate (placeholder)
# result = await structured_llm.generate(request.prompt, schema_model)
result = {"placeholder": "data"}
# Validate
validation = validator.validate(result)
if validation.is_valid:
return GenerateResponse(
data=validation.data.dict() if hasattr(validation.data, 'dict') else validation.data,
is_valid=True,
errors=[],
attempts=attempts
)
errors.extend(validation.errors)
except Exception as e:
errors.append(str(e))
return GenerateResponse(
data={},
is_valid=False,
errors=errors,
attempts=attempts
)
@app.post("/v1/extract/entities")
async def extract_entities(request: ExtractEntitiesRequest) -> ExtractedEntitiesResponse:
"""Extract entities from text."""
# Define entity schema
class Entity(BaseModel):
name: str
type: str
confidence: float
class Entities(BaseModel):
entities: list[Entity]
prompt = f"""Extract all entities from this text.
Entity types to extract: {', '.join(request.entity_types)}
Text: {request.text}
Return JSON with extracted entities."""
# Generate (placeholder)
entities = [
{"name": "Example", "type": "organization", "confidence": 0.9}
]
return ExtractedEntitiesResponse(
entities=entities,
text_length=len(request.text)
)
@app.post("/v1/extract/tasks")
async def extract_tasks(request: ExtractTasksRequest) -> ExtractedTasksResponse:
"""Extract tasks from text."""
prompt = f"""Extract all tasks or action items from this text.
Text: {request.text}
Return JSON with extracted tasks including title, description, priority, and due date if mentioned."""
# Generate (placeholder)
tasks = [
{
"title": "Example task",
"description": "Task description",
"priority": "medium",
"due_date": None
}
]
return ExtractedTasksResponse(
tasks=tasks,
count=len(tasks)
)
class AnalyzeSentimentRequest(BaseModel):
text: str
include_aspects: bool = True
class SentimentResponse(BaseModel):
sentiment: str
confidence: float
aspects: list[dict]
@app.post("/v1/analyze/sentiment")
async def analyze_sentiment(request: AnalyzeSentimentRequest) -> SentimentResponse:
"""Analyze sentiment of text."""
prompt = f"""Analyze the sentiment of this text.
Text: {request.text}
Return JSON with overall sentiment (positive/negative/neutral/mixed), confidence score, and aspect-level sentiments."""
# Generate (placeholder)
return SentimentResponse(
sentiment="positive",
confidence=0.85,
aspects=[
{"aspect": "quality", "sentiment": "positive"},
{"aspect": "service", "sentiment": "neutral"}
] if request.include_aspects else []
)
class ValidateSchemaRequest(BaseModel):
data: dict
schema: dict
class ValidateSchemaResponse(BaseModel):
is_valid: bool
errors: list[str]
fixed_data: Optional[dict]
@app.post("/v1/validate")
async def validate_schema(request: ValidateSchemaRequest) -> ValidateSchemaResponse:
"""Validate data against schema."""
# Create validator
schema_model = DynamicSchema.from_dict(request.schema)
validator = OutputValidator(schema_model, ValidationStrategy.LENIENT)
result = validator.validate(request.data)
return ValidateSchemaResponse(
is_valid=result.is_valid,
errors=result.errors,
fixed_data=result.data if not result.is_valid else None
)
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- OpenAI Structured Outputs: https://platform.openai.com/docs/guides/structured-outputs
- Instructor: https://github.com/jxnl/instructor
- Outlines: https://github.com/outlines-dev/outlines
- Pydantic: https://docs.pydantic.dev/
Conclusion
Structured output transforms LLMs from text generators into data producers. Start with schema definition—Pydantic models provide type safety, validation, and automatic JSON Schema generation. Use the right generation method for your use case: prompt-based for flexibility, function calling for reliability, or native structured output features when available. Implement robust extraction that handles markdown code blocks, repairs common JSON errors, and validates against your schema. Build retry logic that feeds validation errors back to the model for self-correction. For production, combine multiple strategies: try native structured output first, fall back to function calling, then prompt-based with validation. The key insight is that structured output is a contract between your application and the LLM—define it precisely with schemas, enforce it with validation, and handle failures gracefully with retries. This transforms unreliable text generation into dependable data extraction that your application can trust.
