Introduction: LLMs generate text, but applications need structured, reliable data. The gap between free-form text and validated output is where many LLM applications fail. Output validation ensures LLM responses meet your application’s requirements—correct schema, valid values, appropriate content, and consistent format. This guide covers practical validation techniques: schema validation with Pydantic, semantic validation for content quality, automatic repair mechanisms for malformed output, and building validation pipelines that catch errors before they reach your users. Whether you’re extracting structured data, generating code, or building conversational agents, robust output validation is essential for production reliability.

Schema Validation
from dataclasses import dataclass
from typing import Any, Optional, TypeVar, Generic
from pydantic import BaseModel, Field, validator, ValidationError
import json
import re
T = TypeVar('T', bound=BaseModel)
@dataclass
class ValidationResult(Generic[T]):
"""Result of validation."""
valid: bool
data: Optional[T] = None
errors: list[str] = None
raw_output: str = None
class SchemaValidator(Generic[T]):
"""Validate LLM output against Pydantic schema."""
def __init__(self, schema: type[T]):
self.schema = schema
def validate(self, output: str) -> ValidationResult[T]:
"""Validate output against schema."""
# Try to extract JSON
json_str = self._extract_json(output)
if not json_str:
return ValidationResult(
valid=False,
errors=["No valid JSON found in output"],
raw_output=output
)
try:
# Parse JSON
data = json.loads(json_str)
# Validate against schema
validated = self.schema.model_validate(data)
return ValidationResult(
valid=True,
data=validated,
raw_output=output
)
except json.JSONDecodeError as e:
return ValidationResult(
valid=False,
errors=[f"JSON parse error: {e}"],
raw_output=output
)
except ValidationError as e:
return ValidationResult(
valid=False,
errors=[str(err) for err in e.errors()],
raw_output=output
)
def _extract_json(self, text: str) -> Optional[str]:
"""Extract JSON from text."""
# Try code block first
code_block = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
if code_block:
return code_block.group(1).strip()
# Try to find JSON object
json_match = re.search(r'\{[\s\S]*\}', text)
if json_match:
return json_match.group(0)
# Try to find JSON array
array_match = re.search(r'\[[\s\S]*\]', text)
if array_match:
return array_match.group(0)
return None
# Example schemas
class ExtractedEntity(BaseModel):
"""An extracted entity."""
name: str = Field(..., min_length=1)
entity_type: str = Field(..., pattern=r'^(person|organization|location|date|money)$')
confidence: float = Field(..., ge=0.0, le=1.0)
@validator('name')
def name_not_empty(cls, v):
if not v.strip():
raise ValueError('Name cannot be empty')
return v.strip()
class ExtractionResult(BaseModel):
"""Result of entity extraction."""
entities: list[ExtractedEntity]
summary: str = Field(..., min_length=10, max_length=500)
class CodeGeneration(BaseModel):
"""Generated code."""
language: str = Field(..., pattern=r'^(python|javascript|typescript|go|rust)$')
code: str = Field(..., min_length=1)
explanation: str
dependencies: list[str] = Field(default_factory=list)
class StructuredAnswer(BaseModel):
"""A structured answer."""
answer: str = Field(..., min_length=1)
confidence: float = Field(..., ge=0.0, le=1.0)
sources: list[str] = Field(default_factory=list)
reasoning: str = None
Semantic Validation
from dataclasses import dataclass
from typing import Any, Optional
from enum import Enum
class ContentQuality(Enum):
"""Content quality levels."""
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INVALID = "invalid"
@dataclass
class SemanticValidationResult:
"""Result of semantic validation."""
valid: bool
quality: ContentQuality
issues: list[str]
suggestions: list[str]
class SemanticValidator:
"""Validate semantic quality of LLM output."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
async def validate(
self,
output: str,
context: str,
requirements: list[str] = None
) -> SemanticValidationResult:
"""Validate semantic quality."""
requirements_text = "\n".join(f"- {r}" for r in (requirements or []))
prompt = f"""Evaluate this LLM output for quality and correctness.
Context/Question: {context}
Output to evaluate:
{output}
Requirements:
{requirements_text if requirements_text else "- Be accurate and relevant"}
Evaluate:
1. Is the output relevant to the context?
2. Is the information accurate (no hallucinations)?
3. Is the output complete?
4. Are there any issues?
Return JSON:
{{
"valid": true/false,
"quality": "high"/"medium"/"low"/"invalid",
"issues": ["list of issues"],
"suggestions": ["list of improvements"]
}}"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0
)
result = json.loads(response.choices[0].message.content)
return SemanticValidationResult(
valid=result["valid"],
quality=ContentQuality(result["quality"]),
issues=result.get("issues", []),
suggestions=result.get("suggestions", [])
)
class FactualValidator:
"""Validate factual accuracy of output."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
async def validate(
self,
output: str,
source_documents: list[str]
) -> SemanticValidationResult:
"""Check if output is grounded in source documents."""
sources_text = "\n---\n".join(source_documents)
prompt = f"""Check if this output is factually grounded in the source documents.
Source Documents:
{sources_text}
Output to verify:
{output}
For each claim in the output:
1. Is it supported by the sources?
2. Is it a reasonable inference?
3. Is it potentially hallucinated?
Return JSON:
{{
"valid": true/false,
"quality": "high"/"medium"/"low"/"invalid",
"issues": ["list of unsupported claims"],
"suggestions": ["how to improve grounding"]
}}"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0
)
result = json.loads(response.choices[0].message.content)
return SemanticValidationResult(
valid=result["valid"],
quality=ContentQuality(result["quality"]),
issues=result.get("issues", []),
suggestions=result.get("suggestions", [])
)
class ConsistencyValidator:
"""Validate consistency with previous outputs."""
def __init__(self, client: Any, model: str = "gpt-4o-mini"):
self.client = client
self.model = model
async def validate(
self,
current_output: str,
previous_outputs: list[str]
) -> SemanticValidationResult:
"""Check consistency with previous outputs."""
history = "\n---\n".join(previous_outputs[-5:])
prompt = f"""Check if this output is consistent with previous outputs.
Previous outputs:
{history}
Current output:
{current_output}
Check for:
1. Contradictions with previous statements
2. Inconsistent facts or claims
3. Changed positions without explanation
Return JSON:
{{
"valid": true/false,
"quality": "high"/"medium"/"low"/"invalid",
"issues": ["list of inconsistencies"],
"suggestions": ["how to resolve"]
}}"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0
)
result = json.loads(response.choices[0].message.content)
return SemanticValidationResult(
valid=result["valid"],
quality=ContentQuality(result["quality"]),
issues=result.get("issues", []),
suggestions=result.get("suggestions", [])
)
Output Repair
from dataclasses import dataclass
from typing import Any, Optional, TypeVar, Generic
import json
import re
T = TypeVar('T', bound=BaseModel)
@dataclass
class RepairResult(Generic[T]):
"""Result of output repair."""
success: bool
data: Optional[T] = None
original: str = None
repaired: str = None
repair_actions: list[str] = None
class JSONRepairer:
"""Repair malformed JSON."""
def repair(self, text: str) -> str:
"""Attempt to repair malformed JSON."""
repairs = []
# Extract JSON-like content
json_str = self._extract_json_like(text)
# Fix common issues
json_str = self._fix_trailing_commas(json_str)
repairs.append("Fixed trailing commas")
json_str = self._fix_single_quotes(json_str)
repairs.append("Fixed single quotes")
json_str = self._fix_unquoted_keys(json_str)
repairs.append("Fixed unquoted keys")
json_str = self._fix_missing_brackets(json_str)
repairs.append("Fixed missing brackets")
return json_str
def _extract_json_like(self, text: str) -> str:
"""Extract JSON-like content."""
# Find first { or [
start = -1
for i, c in enumerate(text):
if c in '{[':
start = i
break
if start == -1:
return text
# Find matching closing bracket
bracket_map = {'{': '}', '[': ']'}
open_bracket = text[start]
close_bracket = bracket_map[open_bracket]
depth = 0
end = start
for i in range(start, len(text)):
if text[i] == open_bracket:
depth += 1
elif text[i] == close_bracket:
depth -= 1
if depth == 0:
end = i
break
return text[start:end + 1]
def _fix_trailing_commas(self, text: str) -> str:
"""Remove trailing commas before } or ]."""
return re.sub(r',\s*([}\]])', r'\1', text)
def _fix_single_quotes(self, text: str) -> str:
"""Replace single quotes with double quotes."""
# Simple replacement (doesn't handle escaped quotes)
return text.replace("'", '"')
def _fix_unquoted_keys(self, text: str) -> str:
"""Quote unquoted keys."""
return re.sub(r'(\{|,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', text)
def _fix_missing_brackets(self, text: str) -> str:
"""Add missing closing brackets."""
open_braces = text.count('{')
close_braces = text.count('}')
open_brackets = text.count('[')
close_brackets = text.count(']')
text += '}' * (open_braces - close_braces)
text += ']' * (open_brackets - close_brackets)
return text
class LLMRepairer(Generic[T]):
"""Use LLM to repair output."""
def __init__(
self,
client: Any,
schema: type[T],
model: str = "gpt-4o-mini"
):
self.client = client
self.schema = schema
self.model = model
async def repair(
self,
output: str,
errors: list[str]
) -> RepairResult[T]:
"""Repair output using LLM."""
schema_json = json.dumps(
self.schema.model_json_schema(),
indent=2
)
prompt = f"""Fix this malformed output to match the required schema.
Original output:
{output}
Validation errors:
{chr(10).join(f'- {e}' for e in errors)}
Required schema:
{schema_json}
Return only the corrected JSON, no explanation."""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0
)
repaired = response.choices[0].message.content
# Validate repaired output
try:
# Extract JSON
json_match = re.search(r'[\{\[][\s\S]*[\}\]]', repaired)
if json_match:
repaired = json_match.group(0)
data = json.loads(repaired)
validated = self.schema.model_validate(data)
return RepairResult(
success=True,
data=validated,
original=output,
repaired=repaired,
repair_actions=["LLM repair"]
)
except Exception as e:
return RepairResult(
success=False,
original=output,
repaired=repaired,
repair_actions=[f"LLM repair failed: {e}"]
)
class OutputRepairer(Generic[T]):
"""Combined repair pipeline."""
def __init__(
self,
client: Any,
schema: type[T],
model: str = "gpt-4o-mini"
):
self.json_repairer = JSONRepairer()
self.llm_repairer = LLMRepairer(client, schema, model)
self.schema = schema
async def repair(
self,
output: str,
errors: list[str]
) -> RepairResult[T]:
"""Attempt repair using multiple strategies."""
repair_actions = []
# Try JSON repair first
repaired = self.json_repairer.repair(output)
repair_actions.append("JSON syntax repair")
try:
data = json.loads(repaired)
validated = self.schema.model_validate(data)
return RepairResult(
success=True,
data=validated,
original=output,
repaired=repaired,
repair_actions=repair_actions
)
except Exception:
pass
# Fall back to LLM repair
repair_actions.append("LLM repair")
return await self.llm_repairer.repair(output, errors)
Validation Pipeline
from dataclasses import dataclass
from typing import Any, Optional, TypeVar, Generic, Callable
import asyncio
T = TypeVar('T', bound=BaseModel)
@dataclass
class PipelineResult(Generic[T]):
"""Result of validation pipeline."""
valid: bool
data: Optional[T] = None
schema_result: ValidationResult = None
semantic_result: SemanticValidationResult = None
repair_result: RepairResult = None
attempts: int = 0
class ValidationPipeline(Generic[T]):
"""Complete validation pipeline."""
def __init__(
self,
schema: type[T],
client: Any = None,
max_repair_attempts: int = 2,
semantic_validation: bool = True
):
self.schema_validator = SchemaValidator(schema)
self.semantic_validator = SemanticValidator(client) if client else None
self.repairer = OutputRepairer(client, schema) if client else None
self.max_repair_attempts = max_repair_attempts
self.semantic_validation = semantic_validation and client
async def validate(
self,
output: str,
context: str = None,
requirements: list[str] = None
) -> PipelineResult[T]:
"""Run complete validation pipeline."""
attempts = 0
current_output = output
repair_result = None
while attempts < self.max_repair_attempts:
attempts += 1
# Schema validation
schema_result = self.schema_validator.validate(current_output)
if schema_result.valid:
# Semantic validation if enabled
semantic_result = None
if self.semantic_validation and context:
semantic_result = await self.semantic_validator.validate(
str(schema_result.data),
context,
requirements
)
if not semantic_result.valid:
return PipelineResult(
valid=False,
data=schema_result.data,
schema_result=schema_result,
semantic_result=semantic_result,
attempts=attempts
)
return PipelineResult(
valid=True,
data=schema_result.data,
schema_result=schema_result,
semantic_result=semantic_result,
repair_result=repair_result,
attempts=attempts
)
# Attempt repair
if self.repairer and attempts < self.max_repair_attempts:
repair_result = await self.repairer.repair(
current_output,
schema_result.errors
)
if repair_result.success:
return PipelineResult(
valid=True,
data=repair_result.data,
schema_result=schema_result,
repair_result=repair_result,
attempts=attempts
)
current_output = repair_result.repaired or current_output
# All attempts failed
return PipelineResult(
valid=False,
schema_result=schema_result,
repair_result=repair_result,
attempts=attempts
)
class RetryingValidator(Generic[T]):
"""Validator that retries LLM calls on failure."""
def __init__(
self,
client: Any,
schema: type[T],
generate_fn: Callable,
max_retries: int = 3
):
self.client = client
self.schema = schema
self.generate_fn = generate_fn
self.max_retries = max_retries
self.pipeline = ValidationPipeline(schema, client)
async def generate_validated(
self,
prompt: str,
context: str = None
) -> PipelineResult[T]:
"""Generate and validate with retries."""
errors_so_far = []
for attempt in range(self.max_retries):
# Generate output
if errors_so_far:
# Include previous errors in prompt
error_context = "\n".join(f"- {e}" for e in errors_so_far)
enhanced_prompt = f"""{prompt}
Previous attempts had these errors:
{error_context}
Please fix these issues in your response."""
else:
enhanced_prompt = prompt
output = await self.generate_fn(enhanced_prompt)
# Validate
result = await self.pipeline.validate(output, context)
if result.valid:
return result
# Collect errors for next attempt
if result.schema_result and result.schema_result.errors:
errors_so_far.extend(result.schema_result.errors)
if result.semantic_result and result.semantic_result.issues:
errors_so_far.extend(result.semantic_result.issues)
return result # Return last failed result
Production Validation Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Any
app = FastAPI()
# Initialize components
client = None # Initialize with OpenAI client
class ValidateSchemaRequest(BaseModel):
output: str
schema_name: str
class ValidateSemanticRequest(BaseModel):
output: str
context: str
requirements: Optional[list[str]] = None
class RepairRequest(BaseModel):
output: str
schema_name: str
errors: list[str]
class PipelineRequest(BaseModel):
output: str
schema_name: str
context: Optional[str] = None
requirements: Optional[list[str]] = None
# Schema registry
SCHEMAS = {
"entity_extraction": ExtractionResult,
"code_generation": CodeGeneration,
"structured_answer": StructuredAnswer
}
@app.post("/v1/validate/schema")
async def validate_schema(request: ValidateSchemaRequest):
"""Validate output against schema."""
if request.schema_name not in SCHEMAS:
raise HTTPException(400, f"Unknown schema: {request.schema_name}")
schema = SCHEMAS[request.schema_name]
validator = SchemaValidator(schema)
result = validator.validate(request.output)
return {
"valid": result.valid,
"data": result.data.model_dump() if result.data else None,
"errors": result.errors
}
@app.post("/v1/validate/semantic")
async def validate_semantic(request: ValidateSemanticRequest):
"""Validate semantic quality."""
validator = SemanticValidator(client)
result = await validator.validate(
request.output,
request.context,
request.requirements
)
return {
"valid": result.valid,
"quality": result.quality.value,
"issues": result.issues,
"suggestions": result.suggestions
}
@app.post("/v1/repair")
async def repair_output(request: RepairRequest):
"""Repair malformed output."""
if request.schema_name not in SCHEMAS:
raise HTTPException(400, f"Unknown schema: {request.schema_name}")
schema = SCHEMAS[request.schema_name]
repairer = OutputRepairer(client, schema)
result = await repairer.repair(request.output, request.errors)
return {
"success": result.success,
"data": result.data.model_dump() if result.data else None,
"repaired": result.repaired,
"repair_actions": result.repair_actions
}
@app.post("/v1/validate/pipeline")
async def validate_pipeline(request: PipelineRequest):
"""Run complete validation pipeline."""
if request.schema_name not in SCHEMAS:
raise HTTPException(400, f"Unknown schema: {request.schema_name}")
schema = SCHEMAS[request.schema_name]
pipeline = ValidationPipeline(schema, client)
result = await pipeline.validate(
request.output,
request.context,
request.requirements
)
return {
"valid": result.valid,
"data": result.data.model_dump() if result.data else None,
"attempts": result.attempts,
"schema_valid": result.schema_result.valid if result.schema_result else None,
"semantic_valid": result.semantic_result.valid if result.semantic_result else None,
"repaired": result.repair_result is not None
}
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- Pydantic Documentation: https://docs.pydantic.dev/
- Instructor Library: https://github.com/jxnl/instructor
- Guardrails AI: https://github.com/guardrails-ai/guardrails
- Outlines Library: https://github.com/outlines-dev/outlines
Conclusion
Output validation is the bridge between LLM text generation and reliable application behavior. Start with schema validation using Pydantic—define exactly what structure you expect and let validation catch malformed responses. Add semantic validation when content quality matters—checking for relevance, accuracy, and consistency. Build repair mechanisms for when validation fails—JSON syntax repair handles common formatting issues, while LLM-based repair can fix more complex problems. Combine these into a validation pipeline that tries multiple strategies before giving up. The key insight is that LLMs are probabilistic—they won’t always produce perfect output on the first try. Robust validation and repair mechanisms turn unreliable text generation into dependable structured data. Invest in validation infrastructure early and your LLM applications will be production-ready from day one.
