Introduction: Getting LLMs to output valid JSON, XML, or other structured formats is surprisingly difficult. Models hallucinate extra fields, forget closing brackets, and produce malformed output that breaks downstream systems. Prompt engineering helps but doesn’t guarantee valid output. This guide covers techniques for reliable structured generation: using native JSON mode and structured outputs, constrained decoding with grammar-based approaches, schema validation with automatic repair, and building robust pipelines that handle edge cases gracefully.

Native Structured Outputs
from dataclasses import dataclass
from typing import Any, Optional, Type, TypeVar
from pydantic import BaseModel, Field
import json
T = TypeVar('T', bound=BaseModel)
class StructuredOutputClient:
"""Client for structured output generation."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
async def generate_json(
self,
prompt: str,
system_prompt: str = None
) -> dict:
"""Generate JSON output using native JSON mode."""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
response = await self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
async def generate_typed(
self,
prompt: str,
response_type: Type[T],
system_prompt: str = None
) -> T:
"""Generate typed output using structured outputs."""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
response = await self.client.beta.chat.completions.parse(
model=self.model,
messages=messages,
response_format=response_type
)
return response.choices[0].message.parsed
# Example Pydantic models for structured output
class ExtractedEntity(BaseModel):
"""An extracted entity."""
name: str = Field(description="Name of the entity")
entity_type: str = Field(description="Type: person, organization, location, etc.")
confidence: float = Field(ge=0, le=1, description="Confidence score 0-1")
class EntityExtractionResult(BaseModel):
"""Result of entity extraction."""
entities: list[ExtractedEntity] = Field(description="List of extracted entities")
text_summary: str = Field(description="Brief summary of the text")
class SentimentAnalysis(BaseModel):
"""Sentiment analysis result."""
sentiment: str = Field(description="positive, negative, or neutral")
confidence: float = Field(ge=0, le=1)
key_phrases: list[str] = Field(description="Key phrases indicating sentiment")
explanation: str = Field(description="Brief explanation of sentiment")
class CodeReview(BaseModel):
"""Code review result."""
issues: list[dict] = Field(description="List of issues found")
suggestions: list[str] = Field(description="Improvement suggestions")
overall_quality: int = Field(ge=1, le=10, description="Quality score 1-10")
summary: str = Field(description="Review summary")
# Usage example
async def extract_entities(client: StructuredOutputClient, text: str) -> EntityExtractionResult:
"""Extract entities from text."""
prompt = f"""Extract all named entities from this text:
{text}
Identify people, organizations, locations, and other named entities."""
return await client.generate_typed(
prompt=prompt,
response_type=EntityExtractionResult,
system_prompt="You are an expert at named entity recognition."
)
async def analyze_sentiment(client: StructuredOutputClient, text: str) -> SentimentAnalysis:
"""Analyze sentiment of text."""
prompt = f"""Analyze the sentiment of this text:
{text}"""
return await client.generate_typed(
prompt=prompt,
response_type=SentimentAnalysis,
system_prompt="You are a sentiment analysis expert."
)
Schema-Based Generation
from dataclasses import dataclass
from typing import Any, Optional
import json
@dataclass
class SchemaField:
"""A field in a schema."""
name: str
field_type: str
description: str
required: bool = True
enum_values: list[str] = None
default: Any = None
class SchemaBuilder:
"""Build JSON schemas for structured output."""
def __init__(self):
self.fields: list[SchemaField] = []
def add_string(
self,
name: str,
description: str,
required: bool = True,
enum_values: list[str] = None
) -> "SchemaBuilder":
"""Add a string field."""
self.fields.append(SchemaField(
name=name,
field_type="string",
description=description,
required=required,
enum_values=enum_values
))
return self
def add_number(
self,
name: str,
description: str,
required: bool = True
) -> "SchemaBuilder":
"""Add a number field."""
self.fields.append(SchemaField(
name=name,
field_type="number",
description=description,
required=required
))
return self
def add_boolean(
self,
name: str,
description: str,
required: bool = True
) -> "SchemaBuilder":
"""Add a boolean field."""
self.fields.append(SchemaField(
name=name,
field_type="boolean",
description=description,
required=required
))
return self
def add_array(
self,
name: str,
description: str,
item_type: str = "string",
required: bool = True
) -> "SchemaBuilder":
"""Add an array field."""
self.fields.append(SchemaField(
name=name,
field_type=f"array[{item_type}]",
description=description,
required=required
))
return self
def build_json_schema(self) -> dict:
"""Build JSON Schema."""
properties = {}
required = []
for field in self.fields:
prop = {"description": field.description}
if field.field_type == "string":
prop["type"] = "string"
if field.enum_values:
prop["enum"] = field.enum_values
elif field.field_type == "number":
prop["type"] = "number"
elif field.field_type == "boolean":
prop["type"] = "boolean"
elif field.field_type.startswith("array"):
prop["type"] = "array"
item_type = field.field_type.split("[")[1].rstrip("]")
prop["items"] = {"type": item_type}
properties[field.name] = prop
if field.required:
required.append(field.name)
return {
"type": "object",
"properties": properties,
"required": required
}
def build_prompt_instructions(self) -> str:
"""Build prompt instructions for the schema."""
lines = ["Respond with a JSON object containing these fields:"]
for field in self.fields:
req = "(required)" if field.required else "(optional)"
type_hint = field.field_type
if field.enum_values:
type_hint = f"one of: {', '.join(field.enum_values)}"
lines.append(f"- {field.name} ({type_hint}) {req}: {field.description}")
return "\n".join(lines)
class SchemaEnforcedGenerator:
"""Generate output enforced by schema."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
async def generate(
self,
prompt: str,
schema: dict,
instructions: str = None
) -> dict:
"""Generate output matching schema."""
system_prompt = "You are a helpful assistant that always responds with valid JSON."
if instructions:
system_prompt += f"\n\n{instructions}"
# Add schema to prompt
full_prompt = f"""{prompt}
Respond with JSON matching this schema:
```json
{json.dumps(schema, indent=2)}
```"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": full_prompt}
],
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
Output Validation and Repair
from dataclasses import dataclass
from typing import Any, Optional, Type, TypeVar
from pydantic import BaseModel, ValidationError
import json
import re
T = TypeVar('T', bound=BaseModel)
@dataclass
class ValidationResult:
"""Result of output validation."""
is_valid: bool
data: Any = None
errors: list[str] = None
repaired: bool = False
class OutputValidator:
"""Validate and repair LLM outputs."""
def validate_json(self, text: str) -> ValidationResult:
"""Validate JSON output."""
# Try to extract JSON from text
json_text = self._extract_json(text)
if not json_text:
return ValidationResult(
is_valid=False,
errors=["No valid JSON found in output"]
)
try:
data = json.loads(json_text)
return ValidationResult(is_valid=True, data=data)
except json.JSONDecodeError as e:
return ValidationResult(
is_valid=False,
errors=[f"JSON parse error: {str(e)}"]
)
def _extract_json(self, text: str) -> Optional[str]:
"""Extract JSON from text that may contain other content."""
# Try to find JSON in code blocks
code_block_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
if code_block_match:
return code_block_match.group(1)
# Try to find JSON object
brace_match = re.search(r'\{[\s\S]*\}', text)
if brace_match:
return brace_match.group(0)
# Try to find JSON array
bracket_match = re.search(r'\[[\s\S]*\]', text)
if bracket_match:
return bracket_match.group(0)
return None
def validate_pydantic(
self,
data: dict,
model: Type[T]
) -> ValidationResult:
"""Validate data against Pydantic model."""
try:
validated = model.model_validate(data)
return ValidationResult(is_valid=True, data=validated)
except ValidationError as e:
errors = [f"{err['loc']}: {err['msg']}" for err in e.errors()]
return ValidationResult(is_valid=False, errors=errors)
class OutputRepairer:
"""Repair invalid LLM outputs."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
def repair_json(self, text: str) -> Optional[dict]:
"""Attempt to repair malformed JSON."""
# Common fixes
fixed = text
# Remove trailing commas
fixed = re.sub(r',\s*}', '}', fixed)
fixed = re.sub(r',\s*]', ']', fixed)
# Add missing quotes to keys
fixed = re.sub(r'(\{|\,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', fixed)
# Fix single quotes to double quotes
fixed = fixed.replace("'", '"')
# Try to parse
try:
return json.loads(fixed)
except json.JSONDecodeError:
return None
async def repair_with_llm(
self,
invalid_output: str,
schema: dict,
original_prompt: str = None
) -> Optional[dict]:
"""Use LLM to repair invalid output."""
repair_prompt = f"""The following output is invalid or doesn't match the expected schema.
Please fix it and return valid JSON.
Invalid output:
{invalid_output[:1500]}
Expected schema:
{json.dumps(schema, indent=2)}
Return only the corrected JSON, no explanation."""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": repair_prompt}],
response_format={"type": "json_object"}
)
try:
return json.loads(response.choices[0].message.content)
except json.JSONDecodeError:
return None
class RobustStructuredGenerator:
"""Generate structured output with validation and repair."""
def __init__(
self,
client: Any,
model: str = "gpt-4o-mini",
max_retries: int = 3
):
self.client = client
self.model = model
self.max_retries = max_retries
self.validator = OutputValidator()
self.repairer = OutputRepairer(client, model)
async def generate(
self,
prompt: str,
response_type: Type[T],
system_prompt: str = None
) -> tuple[Optional[T], list[str]]:
"""Generate with validation and automatic repair."""
errors = []
for attempt in range(self.max_retries):
# Generate
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = await self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}
)
output = response.choices[0].message.content
# Validate JSON
json_result = self.validator.validate_json(output)
if not json_result.is_valid:
errors.extend(json_result.errors)
# Try to repair
repaired = self.repairer.repair_json(output)
if repaired:
json_result = ValidationResult(is_valid=True, data=repaired, repaired=True)
else:
continue
# Validate against Pydantic model
pydantic_result = self.validator.validate_pydantic(json_result.data, response_type)
if pydantic_result.is_valid:
return pydantic_result.data, errors
errors.extend(pydantic_result.errors)
# Try LLM repair
schema = response_type.model_json_schema()
repaired = await self.repairer.repair_with_llm(output, schema, prompt)
if repaired:
final_result = self.validator.validate_pydantic(repaired, response_type)
if final_result.is_valid:
return final_result.data, errors
except Exception as e:
errors.append(f"Attempt {attempt + 1} failed: {str(e)}")
return None, errors
Constrained Decoding
from dataclasses import dataclass
from typing import Any, Optional
from enum import Enum
class ConstraintType(Enum):
"""Types of output constraints."""
REGEX = "regex"
GRAMMAR = "grammar"
CHOICES = "choices"
LENGTH = "length"
@dataclass
class OutputConstraint:
"""A constraint on output generation."""
constraint_type: ConstraintType
value: Any
description: str = ""
class ConstrainedGenerator:
"""Generate output with constraints."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
async def generate_with_choices(
self,
prompt: str,
choices: list[str]
) -> str:
"""Generate output constrained to specific choices."""
choices_str = ", ".join(f'"{c}"' for c in choices)
constrained_prompt = f"""{prompt}
You must respond with exactly one of these options: {choices_str}
Respond with only the chosen option, nothing else."""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": constrained_prompt}],
max_tokens=50
)
output = response.choices[0].message.content.strip()
# Validate against choices
output_lower = output.lower().strip('"\'')
for choice in choices:
if choice.lower() == output_lower:
return choice
# Fuzzy match
for choice in choices:
if choice.lower() in output_lower or output_lower in choice.lower():
return choice
return choices[0] # Default to first choice
async def generate_with_length(
self,
prompt: str,
min_length: int = None,
max_length: int = None,
target_length: int = None
) -> str:
"""Generate output with length constraints."""
length_instruction = ""
if target_length:
length_instruction = f"Your response should be approximately {target_length} words."
elif min_length and max_length:
length_instruction = f"Your response should be between {min_length} and {max_length} words."
elif min_length:
length_instruction = f"Your response should be at least {min_length} words."
elif max_length:
length_instruction = f"Your response should be no more than {max_length} words."
constrained_prompt = f"""{prompt}
{length_instruction}"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": constrained_prompt}]
)
return response.choices[0].message.content
async def generate_with_format(
self,
prompt: str,
format_template: str,
placeholders: list[str]
) -> dict[str, str]:
"""Generate output matching a format template."""
placeholders_str = ", ".join(placeholders)
constrained_prompt = f"""{prompt}
Respond in this exact format:
{format_template}
Where the placeholders are: {placeholders_str}
Respond with JSON containing the placeholder values."""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": constrained_prompt}],
response_format={"type": "json_object"}
)
import json
return json.loads(response.choices[0].message.content)
class GrammarConstrainedGenerator:
"""Generate output constrained by grammar (for local models)."""
def __init__(self):
self.grammars = {
"json_object": r'"{" (string ":" value ("," string ":" value)*)? "}"',
"json_array": r'"[" (value ("," value)*)? "]"',
"boolean": r'"true" | "false"',
"integer": r'"-"? [0-9]+',
"float": r'"-"? [0-9]+ ("." [0-9]+)?'
}
def build_json_grammar(self, schema: dict) -> str:
"""Build grammar from JSON schema."""
# Simplified grammar builder
rules = []
if schema.get("type") == "object":
properties = schema.get("properties", {})
required = schema.get("required", [])
prop_rules = []
for name, prop in properties.items():
prop_type = prop.get("type", "string")
if prop_type == "string":
if "enum" in prop:
values = " | ".join(f'"{v}"' for v in prop["enum"])
prop_rules.append(f'"{name}": ({values})')
else:
prop_rules.append(f'"{name}": string')
elif prop_type == "number":
prop_rules.append(f'"{name}": number')
elif prop_type == "boolean":
prop_rules.append(f'"{name}": boolean')
elif prop_type == "array":
prop_rules.append(f'"{name}": array')
rules.append("root ::= \"{\" " + " \",\" ".join(prop_rules) + " \"}\"")
return "\n".join(rules)
Production Structured Generation Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, create_model
from typing import Optional, Any
app = FastAPI()
# Initialize clients
structured_client = None # Initialize with actual client
robust_generator = None
constrained_generator = None
class JsonGenerationRequest(BaseModel):
prompt: str
system_prompt: Optional[str] = None
class TypedGenerationRequest(BaseModel):
prompt: str
schema: dict
system_prompt: Optional[str] = None
class ChoiceGenerationRequest(BaseModel):
prompt: str
choices: list[str]
class LengthConstrainedRequest(BaseModel):
prompt: str
min_length: Optional[int] = None
max_length: Optional[int] = None
target_length: Optional[int] = None
class EntityExtractionRequest(BaseModel):
text: str
class SentimentRequest(BaseModel):
text: str
@app.post("/v1/generate/json")
async def generate_json(request: JsonGenerationRequest):
"""Generate JSON output."""
result = await structured_client.generate_json(
request.prompt,
request.system_prompt
)
return {"result": result}
@app.post("/v1/generate/typed")
async def generate_typed(request: TypedGenerationRequest):
"""Generate typed output matching schema."""
# Create dynamic Pydantic model from schema
schema = request.schema
fields = {}
for name, prop in schema.get("properties", {}).items():
field_type = str
if prop.get("type") == "number":
field_type = float
elif prop.get("type") == "integer":
field_type = int
elif prop.get("type") == "boolean":
field_type = bool
elif prop.get("type") == "array":
field_type = list
required = name in schema.get("required", [])
if required:
fields[name] = (field_type, ...)
else:
fields[name] = (Optional[field_type], None)
DynamicModel = create_model("DynamicModel", **fields)
result, errors = await robust_generator.generate(
request.prompt,
DynamicModel,
request.system_prompt
)
if result:
return {"result": result.model_dump(), "errors": errors}
else:
raise HTTPException(status_code=422, detail={"errors": errors})
@app.post("/v1/generate/choice")
async def generate_choice(request: ChoiceGenerationRequest):
"""Generate output from constrained choices."""
result = await constrained_generator.generate_with_choices(
request.prompt,
request.choices
)
return {"result": result, "choices": request.choices}
@app.post("/v1/generate/length-constrained")
async def generate_length_constrained(request: LengthConstrainedRequest):
"""Generate output with length constraints."""
result = await constrained_generator.generate_with_length(
request.prompt,
request.min_length,
request.max_length,
request.target_length
)
word_count = len(result.split())
return {
"result": result,
"word_count": word_count,
"constraints": {
"min_length": request.min_length,
"max_length": request.max_length,
"target_length": request.target_length
}
}
@app.post("/v1/extract/entities")
async def extract_entities(request: EntityExtractionRequest):
"""Extract entities from text."""
result = await extract_entities(structured_client, request.text)
return {
"entities": [e.model_dump() for e in result.entities],
"summary": result.text_summary
}
@app.post("/v1/analyze/sentiment")
async def analyze_sentiment_endpoint(request: SentimentRequest):
"""Analyze sentiment of text."""
result = await analyze_sentiment(structured_client, request.text)
return {
"sentiment": result.sentiment,
"confidence": result.confidence,
"key_phrases": result.key_phrases,
"explanation": result.explanation
}
@app.post("/v1/validate/json")
async def validate_json(text: str):
"""Validate JSON output."""
validator = OutputValidator()
result = validator.validate_json(text)
return {
"is_valid": result.is_valid,
"data": result.data,
"errors": result.errors
}
@app.post("/v1/repair/json")
async def repair_json(text: str, schema: dict = None):
"""Repair invalid JSON."""
repairer = OutputRepairer(structured_client.client)
# Try simple repair first
repaired = repairer.repair_json(text)
if repaired:
return {"repaired": repaired, "method": "simple"}
# Try LLM repair if schema provided
if schema:
repaired = await repairer.repair_with_llm(text, schema)
if repaired:
return {"repaired": repaired, "method": "llm"}
raise HTTPException(status_code=422, detail="Could not repair JSON")
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- OpenAI Structured Outputs: https://platform.openai.com/docs/guides/structured-outputs
- Instructor Library: https://python.useinstructor.com/
- Outlines (Grammar-based): https://github.com/outlines-dev/outlines
- Pydantic: https://docs.pydantic.dev/
Conclusion
Reliable structured generation requires multiple layers of defense. Native JSON mode and structured outputs from API providers offer the most reliable approach when available—the model is constrained at the decoding level to produce valid output. Schema-based generation with clear instructions helps when native support isn’t available. Validation catches errors that slip through, and repair mechanisms—both rule-based and LLM-assisted—recover from failures gracefully. Constrained decoding with choices, length limits, and format templates handles specific use cases. The key insight is that you should never trust raw LLM output for structured data—always validate and be prepared to retry or repair. Start with native structured outputs when available, add Pydantic validation for type safety, implement repair mechanisms for robustness, and build retry logic to handle edge cases. The goal is reliable structured data extraction that doesn’t break your downstream systems.
