Introduction: Getting reliable, structured data from LLMs is one of the most practical challenges in building AI applications. Whether you’re extracting entities from text, generating API parameters, or building data pipelines, you need JSON that actually parses and validates against your schema. This guide covers the evolution of structured output techniques—from prompt engineering hacks to native JSON mode and the Instructor library—with production patterns for handling the inevitable edge cases where models don’t follow instructions perfectly.

OpenAI JSON Mode
from openai import OpenAI
import json
client = OpenAI()
# Basic JSON mode
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "system",
"content": "Extract information and return valid JSON."
},
{
"role": "user",
"content": """Extract the following from this text:
- Person's name
- Company
- Role
- Contact email
Text: "Hi, I'm Sarah Chen, Senior Engineer at TechCorp. Reach me at sarah@techcorp.com"
Return JSON with keys: name, company, role, email"""
}
],
response_format={"type": "json_object"}
)
data = json.loads(response.choices[0].message.content)
print(data)
# {"name": "Sarah Chen", "company": "TechCorp", "role": "Senior Engineer", "email": "sarah@techcorp.com"}
# Structured Outputs with JSON Schema (GPT-4o and later)
response = client.chat.completions.create(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Extract meeting details."},
{"role": "user", "content": "Schedule a meeting with John tomorrow at 3pm to discuss Q4 planning"}
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "meeting",
"schema": {
"type": "object",
"properties": {
"attendees": {"type": "array", "items": {"type": "string"}},
"date": {"type": "string", "description": "ISO date format"},
"time": {"type": "string", "description": "24-hour format"},
"topic": {"type": "string"},
"duration_minutes": {"type": "integer"}
},
"required": ["attendees", "date", "time", "topic"],
"additionalProperties": False
},
"strict": True
}
}
)
meeting = json.loads(response.choices[0].message.content)
print(meeting)
Function Calling for Structured Output
from openai import OpenAI
import json
client = OpenAI()
# Define the schema as a function
tools = [
{
"type": "function",
"function": {
"name": "extract_product_info",
"description": "Extract product information from text",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Product name"
},
"price": {
"type": "number",
"description": "Price in USD"
},
"currency": {
"type": "string",
"enum": ["USD", "EUR", "GBP"]
},
"features": {
"type": "array",
"items": {"type": "string"},
"description": "List of product features"
},
"in_stock": {
"type": "boolean"
},
"category": {
"type": "string",
"enum": ["electronics", "clothing", "home", "other"]
}
},
"required": ["name", "price", "category"]
}
}
}
]
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "user",
"content": """Extract product info from:
"The new iPhone 15 Pro is available for $999. Features include A17 chip,
titanium design, and USB-C. Currently in stock."""
}
],
tools=tools,
tool_choice={"type": "function", "function": {"name": "extract_product_info"}}
)
# Parse the function call arguments
tool_call = response.choices[0].message.tool_calls[0]
product = json.loads(tool_call.function.arguments)
print(product)
# {"name": "iPhone 15 Pro", "price": 999, "currency": "USD",
# "features": ["A17 chip", "titanium design", "USB-C"],
# "in_stock": true, "category": "electronics"}
Pydantic with Instructor
# pip install instructor
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field, field_validator
from typing import Optional
from enum import Enum
# Patch OpenAI client with Instructor
client = instructor.from_openai(OpenAI())
class Priority(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class Task(BaseModel):
"""A task extracted from natural language."""
title: str = Field(description="Brief task title")
description: Optional[str] = Field(default=None, description="Detailed description")
assignee: Optional[str] = Field(default=None, description="Person assigned")
priority: Priority = Field(default=Priority.MEDIUM)
due_date: Optional[str] = Field(default=None, description="Due date in YYYY-MM-DD format")
tags: list[str] = Field(default_factory=list)
@field_validator("due_date")
@classmethod
def validate_date(cls, v):
if v is None:
return v
from datetime import datetime
try:
datetime.strptime(v, "%Y-%m-%d")
return v
except ValueError:
raise ValueError("Date must be in YYYY-MM-DD format")
# Extract with automatic validation and retry
task = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "user",
"content": "Create a high priority task for John to review the Q4 budget by next Friday"
}
],
response_model=Task,
max_retries=3 # Automatically retry on validation failure
)
print(task.model_dump_json(indent=2))
# Complex nested structures
class Address(BaseModel):
street: str
city: str
state: str
zip_code: str
country: str = "USA"
class Person(BaseModel):
name: str
email: str
phone: Optional[str] = None
address: Optional[Address] = None
tags: list[str] = Field(default_factory=list)
class Company(BaseModel):
name: str
industry: str
employees: list[Person]
headquarters: Address
founded_year: Optional[int] = None
# Extract complex nested data
company = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "user",
"content": """Extract company info:
TechStartup Inc, a fintech company founded in 2020, is headquartered at
123 Innovation Way, San Francisco, CA 94105. Key employees include
CEO Jane Smith (jane@techstartup.com) and CTO Bob Johnson (bob@techstartup.com)."""
}
],
response_model=Company
)
print(company.model_dump_json(indent=2))
Handling Extraction Failures
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field, ValidationError
from typing import Optional, Union
import json
client = instructor.from_openai(OpenAI())
class ExtractionResult(BaseModel):
"""Wrapper for extraction with confidence."""
data: dict
confidence: float = Field(ge=0, le=1, description="Confidence score 0-1")
missing_fields: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
def robust_extract(
text: str,
schema: type[BaseModel],
max_retries: int = 3
) -> Union[BaseModel, dict]:
"""Extract with fallback handling."""
# First attempt: strict extraction
try:
result = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "system",
"content": f"Extract information matching this schema. If information is missing, use null."
},
{"role": "user", "content": text}
],
response_model=schema,
max_retries=max_retries
)
return result
except ValidationError as e:
print(f"Validation failed: {e}")
# Fallback: lenient JSON extraction
try:
raw_client = OpenAI()
response = raw_client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "system",
"content": "Extract as much information as possible. Return valid JSON."
},
{"role": "user", "content": text}
],
response_format={"type": "json_object"}
)
data = json.loads(response.choices[0].message.content)
# Try to coerce into schema
try:
return schema.model_validate(data)
except ValidationError:
# Return raw data with warning
return {
"raw_data": data,
"schema_validation_failed": True,
"original_text": text[:200]
}
except Exception as e:
return {"error": str(e), "original_text": text[:200]}
# Batch extraction with progress
from tqdm import tqdm
def batch_extract(
texts: list[str],
schema: type[BaseModel],
show_progress: bool = True
) -> list[dict]:
"""Extract from multiple texts with error handling."""
results = []
iterator = tqdm(texts) if show_progress else texts
for text in iterator:
try:
result = robust_extract(text, schema)
if isinstance(result, BaseModel):
results.append({"success": True, "data": result.model_dump()})
else:
results.append({"success": False, "data": result})
except Exception as e:
results.append({"success": False, "error": str(e)})
success_rate = sum(1 for r in results if r["success"]) / len(results)
print(f"Success rate: {success_rate:.1%}")
return results
Streaming Structured Output
import instructor
from openai import OpenAI
from pydantic import BaseModel
from typing import Iterable
client = instructor.from_openai(OpenAI())
class SearchResult(BaseModel):
title: str
url: str
snippet: str
relevance_score: float
# Stream partial objects as they're generated
def stream_extraction(query: str) -> Iterable[SearchResult]:
"""Stream extracted results one at a time."""
return client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[
{
"role": "user",
"content": f"Generate 5 search results for: {query}"
}
],
response_model=Iterable[SearchResult],
stream=True
)
# Process results as they arrive
for result in stream_extraction("python async programming"):
print(f"Found: {result.title} (score: {result.relevance_score})")
# Partial streaming for long extractions
from instructor import Partial
class Article(BaseModel):
title: str
author: str
summary: str
key_points: list[str]
conclusion: str
# Get partial results during generation
for partial_article in client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Summarize the benefits of microservices"}],
response_model=Partial[Article],
stream=True
):
# Access fields as they become available
if partial_article.title:
print(f"Title: {partial_article.title}")
if partial_article.key_points:
print(f"Points so far: {len(partial_article.key_points)}")
Claude and Other Models
import instructor
from anthropic import Anthropic
from pydantic import BaseModel
# Instructor works with Claude too
client = instructor.from_anthropic(Anthropic())
class Sentiment(BaseModel):
text: str
sentiment: str # positive, negative, neutral
confidence: float
key_phrases: list[str]
result = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[
{
"role": "user",
"content": "Analyze sentiment: 'This product exceeded my expectations!'"
}
],
response_model=Sentiment
)
print(result)
# For models without native JSON mode, use prompt engineering
def extract_with_prompt(text: str, schema: dict) -> dict:
"""Extract using careful prompting for any model."""
schema_str = json.dumps(schema, indent=2)
prompt = f"""Extract information from the text below and return ONLY valid JSON matching this schema:
Schema:
{schema_str}
Text:
{text}
Important:
- Return ONLY the JSON object, no other text
- Use null for missing values
- Ensure all required fields are present
JSON:"""
# Works with any model
response = some_llm_client.generate(prompt)
# Clean and parse
json_str = response.strip()
if json_str.startswith("```"):
json_str = json_str.split("```")[1]
if json_str.startswith("json"):
json_str = json_str[4:]
return json.loads(json_str)
Production Patterns
from pydantic import BaseModel, Field
from typing import TypeVar, Generic
from datetime import datetime
import hashlib
T = TypeVar("T", bound=BaseModel)
class ExtractionMetadata(BaseModel):
"""Metadata for tracking extractions."""
extraction_id: str
model: str
timestamp: datetime
input_hash: str
tokens_used: int
latency_ms: float
class ExtractionResponse(BaseModel, Generic[T]):
"""Wrapper with metadata for production use."""
data: T
metadata: ExtractionMetadata
class ProductionExtractor:
"""Production-ready structured extraction."""
def __init__(self, model: str = "gpt-4-turbo-preview"):
self.client = instructor.from_openai(OpenAI())
self.model = model
self.cache = {} # Use Redis in production
def extract(
self,
text: str,
schema: type[T],
use_cache: bool = True
) -> ExtractionResponse[T]:
"""Extract with caching and metadata."""
import time
import uuid
# Check cache
input_hash = hashlib.md5(text.encode()).hexdigest()
cache_key = f"{schema.__name__}:{input_hash}"
if use_cache and cache_key in self.cache:
return self.cache[cache_key]
# Extract
start = time.time()
result = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": text}],
response_model=schema,
max_retries=3
)
latency = (time.time() - start) * 1000
# Build response
response = ExtractionResponse(
data=result,
metadata=ExtractionMetadata(
extraction_id=str(uuid.uuid4()),
model=self.model,
timestamp=datetime.now(),
input_hash=input_hash,
tokens_used=0, # Get from response in production
latency_ms=latency
)
)
# Cache
if use_cache:
self.cache[cache_key] = response
return response
# Usage
extractor = ProductionExtractor()
response = extractor.extract(
"John Smith, CEO of Acme Corp, john@acme.com",
Person
)
print(f"Extracted in {response.metadata.latency_ms:.0f}ms")
print(response.data)
References
- OpenAI Structured Outputs: https://platform.openai.com/docs/guides/structured-outputs
- Instructor Library: https://python.useinstructor.com/
- Pydantic: https://docs.pydantic.dev/
- OpenAI Function Calling: https://platform.openai.com/docs/guides/function-calling
Conclusion
Structured output transforms LLMs from text generators into reliable data extraction engines. The combination of JSON mode for guaranteed valid JSON, function calling for schema enforcement, and libraries like Instructor for Pydantic integration gives you multiple tools for different situations. Start with OpenAI’s native structured outputs for the most reliable results, fall back to function calling when you need enum constraints, and use Instructor when you want the full power of Pydantic validation. Always implement retry logic—even the best models occasionally produce invalid output. For production systems, add caching, monitoring, and graceful degradation to handle the edge cases that will inevitably occur at scale.
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.

Leave a Reply