March 2022 « Code, Cloud & Context

Structured Generation Techniques: Getting Reliable JSON from LLMs

Introduction: Getting LLMs to output valid JSON, XML, or other structured formats is surprisingly difficult. Models hallucinate extra fields, forget closing brackets, and produce malformed output that breaks downstream systems. Prompt engineering helps but doesn’t guarantee valid output. This guide covers techniques for reliable structured generation: using native JSON mode and structured outputs, constrained decoding with grammar-based approaches, schema validation with automatic repair, and building robust pipelines that handle edge cases gracefully.

Native Structured Outputs

from dataclasses import dataclass
from typing import Any, Optional, Type, TypeVar
from pydantic import BaseModel, Field
import json

T = TypeVar('T', bound=BaseModel)

class StructuredOutputClient:
    """Client for structured output generation."""
    
    def __init__(self, client: Any, model: str = "gpt-4o-mini"):
        self.client = client
        self.model = model
    
    async def generate_json(
        self,
        prompt: str,
        system_prompt: str = None
    ) -> dict:
        """Generate JSON output using native JSON mode."""
        
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)
    
    async def generate_typed(
        self,
        prompt: str,
        response_type: Type[T],
        system_prompt: str = None
    ) -> T:
        """Generate typed output using structured outputs."""
        
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})
        
        response = await self.client.beta.chat.completions.parse(
            model=self.model,
            messages=messages,
            response_format=response_type
        )
        
        return response.choices[0].message.parsed

# Example Pydantic models for structured output
class ExtractedEntity(BaseModel):
    """An extracted entity."""
    
    name: str = Field(description="Name of the entity")
    entity_type: str = Field(description="Type: person, organization, location, etc.")
    confidence: float = Field(ge=0, le=1, description="Confidence score 0-1")

class EntityExtractionResult(BaseModel):
    """Result of entity extraction."""
    
    entities: list[ExtractedEntity] = Field(description="List of extracted entities")
    text_summary: str = Field(description="Brief summary of the text")

class SentimentAnalysis(BaseModel):
    """Sentiment analysis result."""
    
    sentiment: str = Field(description="positive, negative, or neutral")
    confidence: float = Field(ge=0, le=1)
    key_phrases: list[str] = Field(description="Key phrases indicating sentiment")
    explanation: str = Field(description="Brief explanation of sentiment")

class CodeReview(BaseModel):
    """Code review result."""
    
    issues: list[dict] = Field(description="List of issues found")
    suggestions: list[str] = Field(description="Improvement suggestions")
    overall_quality: int = Field(ge=1, le=10, description="Quality score 1-10")
    summary: str = Field(description="Review summary")

# Usage example
async def extract_entities(client: StructuredOutputClient, text: str) -> EntityExtractionResult:
    """Extract entities from text."""
    
    prompt = f"""Extract all named entities from this text:

{text}

Identify people, organizations, locations, and other named entities."""
    
    return await client.generate_typed(
        prompt=prompt,
        response_type=EntityExtractionResult,
        system_prompt="You are an expert at named entity recognition."
    )

async def analyze_sentiment(client: StructuredOutputClient, text: str) -> SentimentAnalysis:
    """Analyze sentiment of text."""
    
    prompt = f"""Analyze the sentiment of this text:

{text}"""
    
    return await client.generate_typed(
        prompt=prompt,
        response_type=SentimentAnalysis,
        system_prompt="You are a sentiment analysis expert."
    )

Schema-Based Generation

from dataclasses import dataclass
from typing import Any, Optional
import json

@dataclass
class SchemaField:
    """A field in a schema."""
    
    name: str
    field_type: str
    description: str
    required: bool = True
    enum_values: list[str] = None
    default: Any = None

class SchemaBuilder:
    """Build JSON schemas for structured output."""
    
    def __init__(self):
        self.fields: list[SchemaField] = []
    
    def add_string(
        self,
        name: str,
        description: str,
        required: bool = True,
        enum_values: list[str] = None
    ) -> "SchemaBuilder":
        """Add a string field."""
        
        self.fields.append(SchemaField(
            name=name,
            field_type="string",
            description=description,
            required=required,
            enum_values=enum_values
        ))
        return self
    
    def add_number(
        self,
        name: str,
        description: str,
        required: bool = True
    ) -> "SchemaBuilder":
        """Add a number field."""
        
        self.fields.append(SchemaField(
            name=name,
            field_type="number",
            description=description,
            required=required
        ))
        return self
    
    def add_boolean(
        self,
        name: str,
        description: str,
        required: bool = True
    ) -> "SchemaBuilder":
        """Add a boolean field."""
        
        self.fields.append(SchemaField(
            name=name,
            field_type="boolean",
            description=description,
            required=required
        ))
        return self
    
    def add_array(
        self,
        name: str,
        description: str,
        item_type: str = "string",
        required: bool = True
    ) -> "SchemaBuilder":
        """Add an array field."""
        
        self.fields.append(SchemaField(
            name=name,
            field_type=f"array[{item_type}]",
            description=description,
            required=required
        ))
        return self
    
    def build_json_schema(self) -> dict:
        """Build JSON Schema."""
        
        properties = {}
        required = []
        
        for field in self.fields:
            prop = {"description": field.description}
            
            if field.field_type == "string":
                prop["type"] = "string"
                if field.enum_values:
                    prop["enum"] = field.enum_values
            elif field.field_type == "number":
                prop["type"] = "number"
            elif field.field_type == "boolean":
                prop["type"] = "boolean"
            elif field.field_type.startswith("array"):
                prop["type"] = "array"
                item_type = field.field_type.split("[")[1].rstrip("]")
                prop["items"] = {"type": item_type}
            
            properties[field.name] = prop
            
            if field.required:
                required.append(field.name)
        
        return {
            "type": "object",
            "properties": properties,
            "required": required
        }
    
    def build_prompt_instructions(self) -> str:
        """Build prompt instructions for the schema."""
        
        lines = ["Respond with a JSON object containing these fields:"]
        
        for field in self.fields:
            req = "(required)" if field.required else "(optional)"
            type_hint = field.field_type
            if field.enum_values:
                type_hint = f"one of: {', '.join(field.enum_values)}"
            
            lines.append(f"- {field.name} ({type_hint}) {req}: {field.description}")
        
        return "\n".join(lines)

class SchemaEnforcedGenerator:
    """Generate output enforced by schema."""
    
    def __init__(self, client: Any, model: str = "gpt-4o-mini"):
        self.client = client
        self.model = model
    
    async def generate(
        self,
        prompt: str,
        schema: dict,
        instructions: str = None
    ) -> dict:
        """Generate output matching schema."""
        
        system_prompt = "You are a helpful assistant that always responds with valid JSON."
        if instructions:
            system_prompt += f"\n\n{instructions}"
        
        # Add schema to prompt
        full_prompt = f"""{prompt}

Respond with JSON matching this schema:
```json
{json.dumps(schema, indent=2)}
```"""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": full_prompt}
            ],
            response_format={"type": "json_object"}
        )
        
        return json.loads(response.choices[0].message.content)

Output Validation and Repair

from dataclasses import dataclass
from typing import Any, Optional, Type, TypeVar
from pydantic import BaseModel, ValidationError
import json
import re

T = TypeVar('T', bound=BaseModel)

@dataclass
class ValidationResult:
    """Result of output validation."""
    
    is_valid: bool
    data: Any = None
    errors: list[str] = None
    repaired: bool = False

class OutputValidator:
    """Validate and repair LLM outputs."""
    
    def validate_json(self, text: str) -> ValidationResult:
        """Validate JSON output."""
        
        # Try to extract JSON from text
        json_text = self._extract_json(text)
        
        if not json_text:
            return ValidationResult(
                is_valid=False,
                errors=["No valid JSON found in output"]
            )
        
        try:
            data = json.loads(json_text)
            return ValidationResult(is_valid=True, data=data)
        except json.JSONDecodeError as e:
            return ValidationResult(
                is_valid=False,
                errors=[f"JSON parse error: {str(e)}"]
            )
    
    def _extract_json(self, text: str) -> Optional[str]:
        """Extract JSON from text that may contain other content."""
        
        # Try to find JSON in code blocks
        code_block_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
        if code_block_match:
            return code_block_match.group(1)
        
        # Try to find JSON object
        brace_match = re.search(r'\{[\s\S]*\}', text)
        if brace_match:
            return brace_match.group(0)
        
        # Try to find JSON array
        bracket_match = re.search(r'\[[\s\S]*\]', text)
        if bracket_match:
            return bracket_match.group(0)
        
        return None
    
    def validate_pydantic(
        self,
        data: dict,
        model: Type[T]
    ) -> ValidationResult:
        """Validate data against Pydantic model."""
        
        try:
            validated = model.model_validate(data)
            return ValidationResult(is_valid=True, data=validated)
        except ValidationError as e:
            errors = [f"{err['loc']}: {err['msg']}" for err in e.errors()]
            return ValidationResult(is_valid=False, errors=errors)

class OutputRepairer:
    """Repair invalid LLM outputs."""
    
    def __init__(self, client: Any, model: str = "gpt-4o-mini"):
        self.client = client
        self.model = model
    
    def repair_json(self, text: str) -> Optional[dict]:
        """Attempt to repair malformed JSON."""
        
        # Common fixes
        fixed = text
        
        # Remove trailing commas
        fixed = re.sub(r',\s*}', '}', fixed)
        fixed = re.sub(r',\s*]', ']', fixed)
        
        # Add missing quotes to keys
        fixed = re.sub(r'(\{|\,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', fixed)
        
        # Fix single quotes to double quotes
        fixed = fixed.replace("'", '"')
        
        # Try to parse
        try:
            return json.loads(fixed)
        except json.JSONDecodeError:
            return None
    
    async def repair_with_llm(
        self,
        invalid_output: str,
        schema: dict,
        original_prompt: str = None
    ) -> Optional[dict]:
        """Use LLM to repair invalid output."""
        
        repair_prompt = f"""The following output is invalid or doesn't match the expected schema.
Please fix it and return valid JSON.

Invalid output:
{invalid_output[:1500]}

Expected schema:
{json.dumps(schema, indent=2)}

Return only the corrected JSON, no explanation."""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": repair_prompt}],
            response_format={"type": "json_object"}
        )
        
        try:
            return json.loads(response.choices[0].message.content)
        except json.JSONDecodeError:
            return None

class RobustStructuredGenerator:
    """Generate structured output with validation and repair."""
    
    def __init__(
        self,
        client: Any,
        model: str = "gpt-4o-mini",
        max_retries: int = 3
    ):
        self.client = client
        self.model = model
        self.max_retries = max_retries
        self.validator = OutputValidator()
        self.repairer = OutputRepairer(client, model)
    
    async def generate(
        self,
        prompt: str,
        response_type: Type[T],
        system_prompt: str = None
    ) -> tuple[Optional[T], list[str]]:
        """Generate with validation and automatic repair."""
        
        errors = []
        
        for attempt in range(self.max_retries):
            # Generate
            messages = []
            if system_prompt:
                messages.append({"role": "system", "content": system_prompt})
            messages.append({"role": "user", "content": prompt})
            
            try:
                response = await self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    response_format={"type": "json_object"}
                )
                
                output = response.choices[0].message.content
                
                # Validate JSON
                json_result = self.validator.validate_json(output)
                if not json_result.is_valid:
                    errors.extend(json_result.errors)
                    
                    # Try to repair
                    repaired = self.repairer.repair_json(output)
                    if repaired:
                        json_result = ValidationResult(is_valid=True, data=repaired, repaired=True)
                    else:
                        continue
                
                # Validate against Pydantic model
                pydantic_result = self.validator.validate_pydantic(json_result.data, response_type)
                if pydantic_result.is_valid:
                    return pydantic_result.data, errors
                
                errors.extend(pydantic_result.errors)
                
                # Try LLM repair
                schema = response_type.model_json_schema()
                repaired = await self.repairer.repair_with_llm(output, schema, prompt)
                if repaired:
                    final_result = self.validator.validate_pydantic(repaired, response_type)
                    if final_result.is_valid:
                        return final_result.data, errors
            
            except Exception as e:
                errors.append(f"Attempt {attempt + 1} failed: {str(e)}")
        
        return None, errors

Constrained Decoding

from dataclasses import dataclass
from typing import Any, Optional
from enum import Enum

class ConstraintType(Enum):
    """Types of output constraints."""
    
    REGEX = "regex"
    GRAMMAR = "grammar"
    CHOICES = "choices"
    LENGTH = "length"

@dataclass
class OutputConstraint:
    """A constraint on output generation."""
    
    constraint_type: ConstraintType
    value: Any
    description: str = ""

class ConstrainedGenerator:
    """Generate output with constraints."""
    
    def __init__(self, client: Any, model: str = "gpt-4o-mini"):
        self.client = client
        self.model = model
    
    async def generate_with_choices(
        self,
        prompt: str,
        choices: list[str]
    ) -> str:
        """Generate output constrained to specific choices."""
        
        choices_str = ", ".join(f'"{c}"' for c in choices)
        
        constrained_prompt = f"""{prompt}

You must respond with exactly one of these options: {choices_str}
Respond with only the chosen option, nothing else."""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": constrained_prompt}],
            max_tokens=50
        )
        
        output = response.choices[0].message.content.strip()
        
        # Validate against choices
        output_lower = output.lower().strip('"\'')
        for choice in choices:
            if choice.lower() == output_lower:
                return choice
        
        # Fuzzy match
        for choice in choices:
            if choice.lower() in output_lower or output_lower in choice.lower():
                return choice
        
        return choices[0]  # Default to first choice
    
    async def generate_with_length(
        self,
        prompt: str,
        min_length: int = None,
        max_length: int = None,
        target_length: int = None
    ) -> str:
        """Generate output with length constraints."""
        
        length_instruction = ""
        if target_length:
            length_instruction = f"Your response should be approximately {target_length} words."
        elif min_length and max_length:
            length_instruction = f"Your response should be between {min_length} and {max_length} words."
        elif min_length:
            length_instruction = f"Your response should be at least {min_length} words."
        elif max_length:
            length_instruction = f"Your response should be no more than {max_length} words."
        
        constrained_prompt = f"""{prompt}

{length_instruction}"""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": constrained_prompt}]
        )
        
        return response.choices[0].message.content
    
    async def generate_with_format(
        self,
        prompt: str,
        format_template: str,
        placeholders: list[str]
    ) -> dict[str, str]:
        """Generate output matching a format template."""
        
        placeholders_str = ", ".join(placeholders)
        
        constrained_prompt = f"""{prompt}

Respond in this exact format:
{format_template}

Where the placeholders are: {placeholders_str}

Respond with JSON containing the placeholder values."""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": constrained_prompt}],
            response_format={"type": "json_object"}
        )
        
        import json
        return json.loads(response.choices[0].message.content)

class GrammarConstrainedGenerator:
    """Generate output constrained by grammar (for local models)."""
    
    def __init__(self):
        self.grammars = {
            "json_object": r'"{" (string ":" value ("," string ":" value)*)? "}"',
            "json_array": r'"[" (value ("," value)*)? "]"',
            "boolean": r'"true" | "false"',
            "integer": r'"-"? [0-9]+',
            "float": r'"-"? [0-9]+ ("." [0-9]+)?'
        }
    
    def build_json_grammar(self, schema: dict) -> str:
        """Build grammar from JSON schema."""
        
        # Simplified grammar builder
        rules = []
        
        if schema.get("type") == "object":
            properties = schema.get("properties", {})
            required = schema.get("required", [])
            
            prop_rules = []
            for name, prop in properties.items():
                prop_type = prop.get("type", "string")
                if prop_type == "string":
                    if "enum" in prop:
                        values = " | ".join(f'"{v}"' for v in prop["enum"])
                        prop_rules.append(f'"{name}": ({values})')
                    else:
                        prop_rules.append(f'"{name}": string')
                elif prop_type == "number":
                    prop_rules.append(f'"{name}": number')
                elif prop_type == "boolean":
                    prop_rules.append(f'"{name}": boolean')
                elif prop_type == "array":
                    prop_rules.append(f'"{name}": array')
            
            rules.append("root ::= \"{\" " + " \",\" ".join(prop_rules) + " \"}\"")
        
        return "\n".join(rules)

Production Structured Generation Service

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, create_model
from typing import Optional, Any

app = FastAPI()

# Initialize clients
structured_client = None  # Initialize with actual client
robust_generator = None
constrained_generator = None

class JsonGenerationRequest(BaseModel):
    prompt: str
    system_prompt: Optional[str] = None

class TypedGenerationRequest(BaseModel):
    prompt: str
    schema: dict
    system_prompt: Optional[str] = None

class ChoiceGenerationRequest(BaseModel):
    prompt: str
    choices: list[str]

class LengthConstrainedRequest(BaseModel):
    prompt: str
    min_length: Optional[int] = None
    max_length: Optional[int] = None
    target_length: Optional[int] = None

class EntityExtractionRequest(BaseModel):
    text: str

class SentimentRequest(BaseModel):
    text: str

@app.post("/v1/generate/json")
async def generate_json(request: JsonGenerationRequest):
    """Generate JSON output."""
    
    result = await structured_client.generate_json(
        request.prompt,
        request.system_prompt
    )
    
    return {"result": result}

@app.post("/v1/generate/typed")
async def generate_typed(request: TypedGenerationRequest):
    """Generate typed output matching schema."""
    
    # Create dynamic Pydantic model from schema
    schema = request.schema
    fields = {}
    
    for name, prop in schema.get("properties", {}).items():
        field_type = str
        if prop.get("type") == "number":
            field_type = float
        elif prop.get("type") == "integer":
            field_type = int
        elif prop.get("type") == "boolean":
            field_type = bool
        elif prop.get("type") == "array":
            field_type = list
        
        required = name in schema.get("required", [])
        if required:
            fields[name] = (field_type, ...)
        else:
            fields[name] = (Optional[field_type], None)
    
    DynamicModel = create_model("DynamicModel", **fields)
    
    result, errors = await robust_generator.generate(
        request.prompt,
        DynamicModel,
        request.system_prompt
    )
    
    if result:
        return {"result": result.model_dump(), "errors": errors}
    else:
        raise HTTPException(status_code=422, detail={"errors": errors})

@app.post("/v1/generate/choice")
async def generate_choice(request: ChoiceGenerationRequest):
    """Generate output from constrained choices."""
    
    result = await constrained_generator.generate_with_choices(
        request.prompt,
        request.choices
    )
    
    return {"result": result, "choices": request.choices}

@app.post("/v1/generate/length-constrained")
async def generate_length_constrained(request: LengthConstrainedRequest):
    """Generate output with length constraints."""
    
    result = await constrained_generator.generate_with_length(
        request.prompt,
        request.min_length,
        request.max_length,
        request.target_length
    )
    
    word_count = len(result.split())
    
    return {
        "result": result,
        "word_count": word_count,
        "constraints": {
            "min_length": request.min_length,
            "max_length": request.max_length,
            "target_length": request.target_length
        }
    }

@app.post("/v1/extract/entities")
async def extract_entities(request: EntityExtractionRequest):
    """Extract entities from text."""
    
    result = await extract_entities(structured_client, request.text)
    
    return {
        "entities": [e.model_dump() for e in result.entities],
        "summary": result.text_summary
    }

@app.post("/v1/analyze/sentiment")
async def analyze_sentiment_endpoint(request: SentimentRequest):
    """Analyze sentiment of text."""
    
    result = await analyze_sentiment(structured_client, request.text)
    
    return {
        "sentiment": result.sentiment,
        "confidence": result.confidence,
        "key_phrases": result.key_phrases,
        "explanation": result.explanation
    }

@app.post("/v1/validate/json")
async def validate_json(text: str):
    """Validate JSON output."""
    
    validator = OutputValidator()
    result = validator.validate_json(text)
    
    return {
        "is_valid": result.is_valid,
        "data": result.data,
        "errors": result.errors
    }

@app.post("/v1/repair/json")
async def repair_json(text: str, schema: dict = None):
    """Repair invalid JSON."""
    
    repairer = OutputRepairer(structured_client.client)
    
    # Try simple repair first
    repaired = repairer.repair_json(text)
    
    if repaired:
        return {"repaired": repaired, "method": "simple"}
    
    # Try LLM repair if schema provided
    if schema:
        repaired = await repairer.repair_with_llm(text, schema)
        if repaired:
            return {"repaired": repaired, "method": "llm"}
    
    raise HTTPException(status_code=422, detail="Could not repair JSON")

@app.get("/health")
async def health():
    return {"status": "healthy"}

References

OpenAI Structured Outputs: https://platform.openai.com/docs/guides/structured-outputs
Instructor Library: https://python.useinstructor.com/
Outlines (Grammar-based): https://github.com/outlines-dev/outlines
Pydantic: https://docs.pydantic.dev/

Conclusion

Reliable structured generation requires multiple layers of defense. Native JSON mode and structured outputs from API providers offer the most reliable approach when available—the model is constrained at the decoding level to produce valid output. Schema-based generation with clear instructions helps when native support isn’t available. Validation catches errors that slip through, and repair mechanisms—both rule-based and LLM-assisted—recover from failures gracefully. Constrained decoding with choices, length limits, and format templates handles specific use cases. The key insight is that you should never trust raw LLM output for structured data—always validate and be prepared to retry or repair. Start with native structured outputs when available, add Pydantic validation for type safety, implement repair mechanisms for robustness, and build retry logic to handle edge cases. The goal is reliable structured data extraction that doesn’t break your downstream systems.

Searching in

Code, Cloud & Context

Categories

Archives

A sample text widget