Handle rate limits and transient failures gracefully with exponential backoff.
Code Snippet
import time
import random
from functools import wraps
from openai import RateLimitError, APIError
def retry_with_backoff(max_retries=3, base_delay=1):
"""Decorator for retrying LLM calls with exponential backoff."""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except RateLimitError:
if attempt == max_retries - 1:
raise
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
print(f"Rate limited. Retrying in {delay:.2f}s...")
time.sleep(delay)
except APIError as e:
if attempt == max_retries - 1:
raise
print(f"API error: {e}. Retrying...")
time.sleep(base_delay)
return None
return wrapper
return decorator
@retry_with_backoff(max_retries=5, base_delay=2)
def call_llm(prompt: str) -> str:
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
Why This Helps
- Handles rate limits automatically
- Improves reliability of LLM integrations
- Prevents cascading failures
How to Test
- Simulate rate limit errors
- Verify exponential backoff timing
When to Use
All production LLM integrations. Essential for high-volume applications.
Performance/Security Notes
Add jitter to prevent thundering herd. Log retries for monitoring.
References
Try this tip in your next project and share your results in the comments!
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.