LLM Output Parsing: From Raw Text to Typed Objects

Introduction: LLMs generate text, but applications need structured data. Parsing LLM output reliably is surprisingly tricky—models don’t always follow instructions, JSON can be malformed, and edge cases abound. This guide covers robust output parsing strategies: using JSON mode for guaranteed valid JSON, Pydantic for type-safe parsing, handling partial and streaming outputs, implementing retry logic for parsing failures, and building fallback strategies. These patterns ensure your application handles LLM outputs gracefully, even when the model misbehaves.

Basic JSON Parsing

import json
import re
from typing import Any, Optional
from openai import OpenAI

client = OpenAI()

def extract_json(text: str) -> Optional[dict]:
    """Extract JSON from LLM response, handling common issues."""
    
    # Try direct parsing first
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass
    
    # Try to find JSON in markdown code blocks
    json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
    if json_match:
        try:
            return json.loads(json_match.group(1))
        except json.JSONDecodeError:
            pass
    
    # Try to find JSON object/array in text
    for pattern in [r'\{[\s\S]*\}', r'\[[\s\S]*\]']:
        match = re.search(pattern, text)
        if match:
            try:
                return json.loads(match.group())
            except json.JSONDecodeError:
                continue
    
    return None

def get_json_response(prompt: str, model: str = "gpt-4o") -> dict:
    """Get JSON response from LLM."""
    
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": "Always respond with valid JSON. No markdown, no explanation."
            },
            {"role": "user", "content": prompt}
        ]
    )
    
    text = response.choices[0].message.content
    result = extract_json(text)
    
    if result is None:
        raise ValueError(f"Could not parse JSON from response: {text[:200]}")
    
    return result

# Usage
data = get_json_response(
    "List 3 programming languages with their year of creation as JSON array"
)
print(data)

JSON Mode for Guaranteed Valid JSON

def get_structured_response(
    prompt: str,
    system: str = "You are a helpful assistant that responds in JSON.",
    model: str = "gpt-4o"
) -> dict:
    """Get guaranteed valid JSON using JSON mode."""
    
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": prompt}
        ],
        response_format={"type": "json_object"}
    )
    
    # JSON mode guarantees valid JSON
    return json.loads(response.choices[0].message.content)

# Usage - always returns valid JSON
result = get_structured_response(
    "Extract the person's name and age from: 'John Smith is 35 years old'"
)
# {"name": "John Smith", "age": 35}

# With schema guidance in prompt
result = get_structured_response(
    """Extract product info. Return JSON with schema:
    {"name": string, "price": number, "in_stock": boolean}
    
    Product: iPhone 15 Pro - $999, currently available""",
    system="Extract structured data as JSON matching the provided schema."
)
# {"name": "iPhone 15 Pro", "price": 999, "in_stock": true}

Pydantic for Type-Safe Parsing

from pydantic import BaseModel, Field, ValidationError
from typing import Optional
from enum import Enum

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"

class Task(BaseModel):
    title: str = Field(..., min_length=1, max_length=200)
    description: Optional[str] = None
    priority: Priority = Priority.MEDIUM
    due_date: Optional[str] = Field(None, pattern=r'\d{4}-\d{2}-\d{2}')
    tags: list[str] = Field(default_factory=list)

class TaskList(BaseModel):
    tasks: list[Task]

def parse_with_pydantic(
    prompt: str,
    model_class: type[BaseModel],
    llm_model: str = "gpt-4o"
) -> BaseModel:
    """Parse LLM output into Pydantic model."""
    
    # Generate schema description for the prompt
    schema = model_class.model_json_schema()
    
    response = client.chat.completions.create(
        model=llm_model,
        messages=[
            {
                "role": "system",
                "content": f"Respond with JSON matching this schema:\n{json.dumps(schema, indent=2)}"
            },
            {"role": "user", "content": prompt}
        ],
        response_format={"type": "json_object"}
    )
    
    data = json.loads(response.choices[0].message.content)
    
    # Validate and parse with Pydantic
    return model_class.model_validate(data)

# Usage
task_list = parse_with_pydantic(
    """Extract tasks from this text:
    - Fix the login bug (urgent, due tomorrow)
    - Update documentation
    - Review PR #123 (high priority, due 2024-03-15)""",
    TaskList
)

for task in task_list.tasks:
    print(f"{task.priority.value}: {task.title}")

Retry Logic for Parsing Failures

from functools import wraps
from typing import TypeVar, Type

T = TypeVar('T', bound=BaseModel)

class ParsingError(Exception):
    """Error during output parsing."""
    
    def __init__(self, message: str, raw_output: str, validation_errors: list = None):
        super().__init__(message)
        self.raw_output = raw_output
        self.validation_errors = validation_errors or []

def parse_with_retry(
    prompt: str,
    model_class: Type[T],
    max_retries: int = 3,
    llm_model: str = "gpt-4o"
) -> T:
    """Parse with automatic retry on failure."""
    
    schema = model_class.model_json_schema()
    
    messages = [
        {
            "role": "system",
            "content": f"Respond with JSON matching this schema:\n{json.dumps(schema, indent=2)}"
        },
        {"role": "user", "content": prompt}
    ]
    
    last_error = None
    
    for attempt in range(max_retries):
        response = client.chat.completions.create(
            model=llm_model,
            messages=messages,
            response_format={"type": "json_object"}
        )
        
        raw_output = response.choices[0].message.content
        
        try:
            data = json.loads(raw_output)
            return model_class.model_validate(data)
        
        except json.JSONDecodeError as e:
            last_error = ParsingError(
                f"Invalid JSON: {e}",
                raw_output
            )
        
        except ValidationError as e:
            last_error = ParsingError(
                f"Validation failed: {e}",
                raw_output,
                e.errors()
            )
        
        # Add error feedback for retry
        if attempt < max_retries - 1:
            messages.append({"role": "assistant", "content": raw_output})
            messages.append({
                "role": "user",
                "content": f"That response had errors: {last_error}. Please fix and try again."
            })
    
    raise last_error

# Usage with automatic retry
try:
    result = parse_with_retry(
        "Extract: John, 25 years old, software engineer",
        Task,  # Wrong model for this data - will retry
        max_retries=2
    )
except ParsingError as e:
    print(f"Parsing failed after retries: {e}")
    print(f"Raw output: {e.raw_output[:200]}")

Streaming Output Parsing

import json
from typing import Iterator, Any

class StreamingJSONParser:
    """Parse JSON from streaming LLM output."""
    
    def __init__(self):
        self.buffer = ""
        self.in_string = False
        self.escape_next = False
        self.brace_count = 0
        self.bracket_count = 0
    
    def feed(self, chunk: str) -> Iterator[dict]:
        """Feed a chunk and yield any complete JSON objects."""
        
        for char in chunk:
            self.buffer += char
            
            if self.escape_next:
                self.escape_next = False
                continue
            
            if char == '\\' and self.in_string:
                self.escape_next = True
                continue
            
            if char == '"':
                self.in_string = not self.in_string
                continue
            
            if self.in_string:
                continue
            
            if char == '{':
                self.brace_count += 1
            elif char == '}':
                self.brace_count -= 1
            elif char == '[':
                self.bracket_count += 1
            elif char == ']':
                self.bracket_count -= 1
            
            # Check if we have a complete object
            if self.brace_count == 0 and self.bracket_count == 0:
                if '{' in self.buffer or '[' in self.buffer:
                    try:
                        # Find the start of JSON
                        start = min(
                            self.buffer.find('{') if '{' in self.buffer else len(self.buffer),
                            self.buffer.find('[') if '[' in self.buffer else len(self.buffer)
                        )
                        json_str = self.buffer[start:]
                        obj = json.loads(json_str)
                        self.buffer = ""
                        yield obj
                    except json.JSONDecodeError:
                        pass

async def stream_and_parse(prompt: str) -> Iterator[dict]:
    """Stream LLM response and parse JSON objects as they complete."""
    
    parser = StreamingJSONParser()
    
    stream = await client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "Respond with a JSON array of objects."},
            {"role": "user", "content": prompt}
        ],
        stream=True
    )
    
    async for chunk in stream:
        if chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content
            
            for obj in parser.feed(content):
                yield obj

# Usage
async def process_stream():
    async for item in stream_and_parse("List 5 countries with capitals as JSON array"):
        print(f"Received: {item}")
        # Process each item as it arrives

Fallback Strategies

from typing import Union, Callable

class OutputParser:
    """Robust output parser with multiple fallback strategies."""
    
    def __init__(self, model_class: Type[T]):
        self.model_class = model_class
        self.fallbacks: list[Callable[[str], T]] = []
    
    def add_fallback(self, parser: Callable[[str], T]):
        """Add a fallback parser."""
        self.fallbacks.append(parser)
        return self
    
    def parse(self, text: str) -> T:
        """Parse with fallbacks."""
        
        # Try JSON mode parsing first
        try:
            data = json.loads(text)
            return self.model_class.model_validate(data)
        except (json.JSONDecodeError, ValidationError):
            pass
        
        # Try extracting JSON from text
        extracted = extract_json(text)
        if extracted:
            try:
                return self.model_class.model_validate(extracted)
            except ValidationError:
                pass
        
        # Try fallback parsers
        for fallback in self.fallbacks:
            try:
                return fallback(text)
            except Exception:
                continue
        
        raise ParsingError(f"All parsing strategies failed", text)

# Example: Regex fallback for simple structures
class Person(BaseModel):
    name: str
    age: int

def regex_person_parser(text: str) -> Person:
    """Fallback parser using regex."""
    
    name_match = re.search(r'name[:\s]+([A-Za-z\s]+)', text, re.IGNORECASE)
    age_match = re.search(r'age[:\s]+(\d+)', text, re.IGNORECASE)
    
    if not name_match or not age_match:
        raise ValueError("Could not extract person data")
    
    return Person(
        name=name_match.group(1).strip(),
        age=int(age_match.group(1))
    )

# Usage
parser = OutputParser(Person)
parser.add_fallback(regex_person_parser)

# Works with JSON
result = parser.parse('{"name": "John", "age": 30}')

# Also works with plain text via fallback
result = parser.parse("The person's name is John and their age is 30")

Production Parser Service

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Any

app = FastAPI()

class ParseRequest(BaseModel):
    prompt: str
    schema: dict  # JSON Schema
    model: str = "gpt-4o"
    max_retries: int = 2

class ParseResponse(BaseModel):
    data: Any
    attempts: int
    raw_output: str

@app.post("/parse", response_model=ParseResponse)
async def parse_output(request: ParseRequest):
    """Parse LLM output into structured data."""
    
    messages = [
        {
            "role": "system",
            "content": f"Respond with JSON matching this schema:\n{json.dumps(request.schema, indent=2)}"
        },
        {"role": "user", "content": request.prompt}
    ]
    
    for attempt in range(request.max_retries + 1):
        response = client.chat.completions.create(
            model=request.model,
            messages=messages,
            response_format={"type": "json_object"}
        )
        
        raw_output = response.choices[0].message.content
        
        try:
            data = json.loads(raw_output)
            
            # Basic schema validation
            # In production, use jsonschema library
            
            return ParseResponse(
                data=data,
                attempts=attempt + 1,
                raw_output=raw_output
            )
        
        except json.JSONDecodeError as e:
            if attempt < request.max_retries:
                messages.append({"role": "assistant", "content": raw_output})
                messages.append({
                    "role": "user",
                    "content": f"Invalid JSON: {e}. Please fix."
                })
            else:
                raise HTTPException(
                    status_code=422,
                    detail=f"Failed to parse after {attempt + 1} attempts"
                )
    
    raise HTTPException(status_code=500, detail="Unexpected error")

# Health check
@app.get("/health")
async def health():
    return {"status": "healthy"}

References

Pydantic: https://docs.pydantic.dev/
OpenAI JSON Mode: https://platform.openai.com/docs/guides/text-generation/json-mode
Instructor: https://github.com/jxnl/instructor
Outlines: https://github.com/outlines-dev/outlines

Conclusion

Robust output parsing is essential for production LLM applications. Use JSON mode when available—it guarantees valid JSON and eliminates most parsing headaches. Combine with Pydantic for type-safe parsing with automatic validation. Implement retry logic that feeds errors back to the model for self-correction. Add fallback parsers for graceful degradation when structured parsing fails. For streaming applications, use incremental JSON parsing to process data as it arrives. The goal is resilient parsing that handles the inherent variability of LLM outputs while providing clean, typed data to your application logic.

Discover more from Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Searching in

Code, Cloud & Context

Categories

Archives

A sample text widget