Theory is great, but you’re here to build things. Let’s create a real AI agent—a code review assistant that can analyze pull requests, find issues, and explain its reasoning.
This isn’t a toy example. I’ve deployed variations of this in production. You’ll see the full architecture, edge cases, and the decisions that matter.
Series Navigation: Part 1: GenAI Intro → Part 2: LLMs → Part 3: Frameworks → Part 4: Agentic AI → Part 5: Building Agents (You are here) → Part 6: Enterprise
What We’re Building
A code review agent that:
- Analyzes code changes in a PR
- Identifies bugs, security issues, and style problems
- Suggests specific improvements with code examples
- Explains reasoning for each suggestion
- Uses tools to gather context (file contents, git history, documentation)
Architecture Overview
┌──────────────────────────────────────────────────────────────────┐
│ CODE REVIEW AGENT │
├──────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ ANALYZE │ → │ REVIEW │ → │ SYNTHESIZE │ │
│ │ (Gather │ │ (Find │ │ (Create │ │
│ │ context) │ │ issues) │ │ report) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │ │ │ │
│ └──────────────────┴──────────────────┘ │
│ ↓ │
│ ┌───────────┐ │
│ │ TOOLS │ │
│ └───────────┘ │
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
│ │read_file│ │git_blame│ │ search │ │get_deps │ │
│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
│ │
└──────────────────────────────────────────────────────────────────┘
Step 1: Define the Tools
# tools.py
import subprocess
import os
from pathlib import Path
from typing import Optional
class CodeReviewTools:
"""Tools for the code review agent."""
def __init__(self, repo_path: str):
self.repo_path = Path(repo_path)
def read_file(self, filepath: str, start_line: Optional[int] = None,
end_line: Optional[int] = None) -> dict:
"""Read a file from the repository."""
full_path = self.repo_path / filepath
if not full_path.exists():
return {"error": f"File not found: {filepath}", "success": False}
if not full_path.is_relative_to(self.repo_path):
return {"error": "Path traversal not allowed", "success": False}
try:
content = full_path.read_text()
lines = content.split("\n")
if start_line and end_line:
lines = lines[start_line-1:end_line]
content = "\n".join(lines)
return {
"success": True,
"filepath": filepath,
"content": content,
"total_lines": len(lines)
}
except Exception as e:
return {"error": str(e), "success": False}
def get_git_diff(self, base_branch: str = "main") -> dict:
"""Get the diff for the current changes."""
try:
result = subprocess.run(
["git", "diff", base_branch, "--unified=3"],
cwd=self.repo_path,
capture_output=True,
text=True,
timeout=30
)
return {
"success": True,
"diff": result.stdout,
"files_changed": self._parse_changed_files(result.stdout)
}
except subprocess.TimeoutExpired:
return {"error": "Git diff timed out", "success": False}
except Exception as e:
return {"error": str(e), "success": False}
def _parse_changed_files(self, diff: str) -> list:
"""Extract changed files from diff."""
files = []
for line in diff.split("\n"):
if line.startswith("diff --git"):
parts = line.split(" b/")
if len(parts) > 1:
files.append(parts[1])
return files
def git_blame(self, filepath: str, line_start: int, line_end: int) -> dict:
"""Get git blame for specific lines."""
try:
result = subprocess.run(
["git", "blame", "-L", f"{line_start},{line_end}", filepath],
cwd=self.repo_path,
capture_output=True,
text=True,
timeout=30
)
return {"success": True, "blame": result.stdout}
except Exception as e:
return {"error": str(e), "success": False}
def search_codebase(self, pattern: str, file_pattern: str = "*") -> dict:
"""Search for a pattern in the codebase."""
try:
result = subprocess.run(
["grep", "-r", "-n", "--include", file_pattern, pattern, "."],
cwd=self.repo_path,
capture_output=True,
text=True,
timeout=30
)
matches = []
for line in result.stdout.split("\n")[:20]: # Limit results
if line:
matches.append(line)
return {"success": True, "matches": matches, "count": len(matches)}
except Exception as e:
return {"error": str(e), "success": False}
def get_function_context(self, filepath: str, function_name: str) -> dict:
"""Get the full context of a function."""
file_result = self.read_file(filepath)
if not file_result["success"]:
return file_result
# Simple function extraction (would use AST in production)
content = file_result["content"]
lines = content.split("\n")
in_function = False
function_lines = []
indent_level = 0
start_line = 0
for i, line in enumerate(lines):
if f"def {function_name}" in line or f"async def {function_name}" in line:
in_function = True
start_line = i + 1
indent_level = len(line) - len(line.lstrip())
if in_function:
function_lines.append(line)
# Check for function end (simplified)
if line.strip() and not line.startswith(" " * (indent_level + 1)):
if len(function_lines) > 1:
break
return {
"success": True,
"function": "\n".join(function_lines),
"start_line": start_line
}
Step 2: Tool Definitions for OpenAI
# tool_definitions.py
TOOL_DEFINITIONS = [
{
"type": "function",
"function": {
"name": "read_file",
"description": "Read the contents of a file from the repository. Can optionally read specific line ranges.",
"parameters": {
"type": "object",
"properties": {
"filepath": {
"type": "string",
"description": "Path to the file relative to repository root"
},
"start_line": {
"type": "integer",
"description": "Starting line number (1-indexed, optional)"
},
"end_line": {
"type": "integer",
"description": "Ending line number (inclusive, optional)"
}
},
"required": ["filepath"]
}
}
},
{
"type": "function",
"function": {
"name": "get_git_diff",
"description": "Get the git diff showing all changes in the current PR/branch compared to main.",
"parameters": {
"type": "object",
"properties": {
"base_branch": {
"type": "string",
"description": "Base branch to compare against (default: main)"
}
}
}
}
},
{
"type": "function",
"function": {
"name": "search_codebase",
"description": "Search for a pattern across the codebase. Useful for finding related code, usages, or patterns.",
"parameters": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "Search pattern (grep syntax)"
},
"file_pattern": {
"type": "string",
"description": "File pattern to filter (e.g., '*.py', '*.js')"
}
},
"required": ["pattern"]
}
}
},
{
"type": "function",
"function": {
"name": "git_blame",
"description": "Get git blame information for specific lines to understand history and authorship.",
"parameters": {
"type": "object",
"properties": {
"filepath": {"type": "string"},
"line_start": {"type": "integer"},
"line_end": {"type": "integer"}
},
"required": ["filepath", "line_start", "line_end"]
}
}
},
{
"type": "function",
"function": {
"name": "get_function_context",
"description": "Get the full implementation of a specific function.",
"parameters": {
"type": "object",
"properties": {
"filepath": {"type": "string"},
"function_name": {"type": "string"}
},
"required": ["filepath", "function_name"]
}
}
}
]
Step 3: The Agent Core
# agent.py
from openai import OpenAI
import json
from tools import CodeReviewTools
from tool_definitions import TOOL_DEFINITIONS
client = OpenAI()
SYSTEM_PROMPT = """You are an expert code reviewer with 15+ years of experience.
You're reviewing a pull request.
## Your Responsibilities:
1. Identify bugs, security vulnerabilities, and logic errors
2. Point out performance issues and suggest optimizations
3. Ensure code follows best practices and is maintainable
4. Check for proper error handling and edge cases
5. Verify test coverage for changes
## Review Style:
- Be constructive and specific
- Provide code examples for suggestions
- Explain WHY something is an issue, not just WHAT
- Prioritize issues by severity (Critical, High, Medium, Low)
- Acknowledge good code when you see it
## Available Tools:
Use the provided tools to gather context before making judgments.
Don't guess - if you need to see how something is used elsewhere, search for it.
## Output Format:
After gathering sufficient context, provide your review in this format:
### Summary
[Brief overview of changes and overall assessment]
### Critical Issues
[Issues that must be fixed before merge]
### Suggestions
[Improvements that would make the code better]
### Positive Notes
[What was done well]
"""
class CodeReviewAgent:
def __init__(self, repo_path: str):
self.tools = CodeReviewTools(repo_path)
self.messages = [{"role": "system", "content": SYSTEM_PROMPT}]
self.max_iterations = 15
def _execute_tool(self, name: str, args: dict) -> str:
"""Execute a tool and return result as string."""
tool_method = getattr(self.tools, name, None)
if tool_method:
result = tool_method(**args)
return json.dumps(result, indent=2)
return json.dumps({"error": f"Unknown tool: {name}"})
def review(self, pr_description: str = "") -> str:
"""Run the code review agent."""
# Initial prompt
user_message = f"""Please review this pull request.
PR Description:
{pr_description if pr_description else "No description provided."}
Start by getting the diff to see what changed, then gather any additional
context you need before providing your review."""
self.messages.append({"role": "user", "content": user_message})
for iteration in range(self.max_iterations):
print(f"\n--- Iteration {iteration + 1} ---")
response = client.chat.completions.create(
model="gpt-4o",
messages=self.messages,
tools=TOOL_DEFINITIONS,
tool_choice="auto",
temperature=0.1 # Low temp for consistent reviews
)
message = response.choices[0].message
# No tool calls - agent is done
if not message.tool_calls:
print("Review complete.")
return message.content
# Process tool calls
self.messages.append(message)
for tool_call in message.tool_calls:
func_name = tool_call.function.name
func_args = json.loads(tool_call.function.arguments)
print(f" Tool: {func_name}({list(func_args.keys())})")
result = self._execute_tool(func_name, func_args)
self.messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": result
})
return "Max iterations reached. Partial review may be incomplete."
# Usage
if __name__ == "__main__":
agent = CodeReviewAgent("/path/to/repo")
review = agent.review("""
Added user authentication feature:
- New login endpoint
- JWT token generation
- Password validation
""")
print("\n" + "="*60)
print("FINAL REVIEW")
print("="*60)
print(review)
Step 4: Adding Structured Output
# structured_review.py
from pydantic import BaseModel
from typing import Literal
from openai import OpenAI
client = OpenAI()
class ReviewIssue(BaseModel):
severity: Literal["critical", "high", "medium", "low"]
file: str
line_range: str
title: str
description: str
suggestion: str
code_example: str | None = None
class CodeReview(BaseModel):
summary: str
overall_rating: Literal["approve", "request_changes", "needs_discussion"]
issues: list[ReviewIssue]
positive_notes: list[str]
def get_structured_review(review_text: str) -> CodeReview:
"""Convert free-form review to structured format."""
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "Convert the code review into structured JSON format."
},
{
"role": "user",
"content": f"Convert this review:\n\n{review_text}"
}
],
response_format=CodeReview
)
return response.choices[0].message.parsed
Step 5: Integration with GitHub
# github_integration.py
import os
from github import Github
from agent import CodeReviewAgent
from structured_review import get_structured_review
def review_pull_request(repo_name: str, pr_number: int):
"""Review a GitHub PR and post comments."""
# GitHub setup
gh = Github(os.environ["GITHUB_TOKEN"])
repo = gh.get_repo(repo_name)
pr = repo.get_pull(pr_number)
# Clone or update repo locally
local_path = f"/tmp/repos/{repo_name.replace('/', '_')}"
# (Git clone/pull logic here)
# Checkout PR branch
# (Git checkout logic here)
# Run agent
agent = CodeReviewAgent(local_path)
review_text = agent.review(pr.body or "")
# Structure the review
structured = get_structured_review(review_text)
# Post review to GitHub
review_event = "APPROVE" if structured.overall_rating == "approve" else "REQUEST_CHANGES"
# Create main review comment
pr.create_review(
body=f"## AI Code Review\n\n{structured.summary}",
event=review_event
)
# Post inline comments for each issue
for issue in structured.issues:
if issue.severity in ["critical", "high"]:
# Post as inline comment on the specific file/line
pr.create_review_comment(
body=f"**{issue.severity.upper()}**: {issue.title}\n\n{issue.description}\n\n**Suggestion:** {issue.suggestion}",
commit=pr.get_commits().reversed[0],
path=issue.file,
line=int(issue.line_range.split("-")[0])
)
return structured
# Usage with GitHub Actions
if __name__ == "__main__":
import sys
repo = os.environ.get("GITHUB_REPOSITORY")
pr_num = int(os.environ.get("PR_NUMBER", sys.argv[1]))
result = review_pull_request(repo, pr_num)
print(f"Review posted: {result.overall_rating}")
Production Considerations
Rate Limiting and Caching
# caching.py
import hashlib
import json
import redis
from functools import wraps
redis_client = redis.Redis()
def cache_tool_result(ttl_seconds: int = 3600):
"""Cache tool results to reduce redundant calls."""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
# Create cache key from function name and arguments
key_data = f"{func.__name__}:{args}:{kwargs}"
cache_key = hashlib.md5(key_data.encode()).hexdigest()
# Check cache
cached = redis_client.get(cache_key)
if cached:
return json.loads(cached)
# Execute and cache
result = func(*args, **kwargs)
redis_client.setex(cache_key, ttl_seconds, json.dumps(result))
return result
return wrapper
return decorator
# Apply to tools
class CachedCodeReviewTools(CodeReviewTools):
@cache_tool_result(ttl_seconds=300)
def read_file(self, filepath, start_line=None, end_line=None):
return super().read_file(filepath, start_line, end_line)
Error Handling and Recovery
# resilient_agent.py
from tenacity import retry, stop_after_attempt, wait_exponential
class ResilientCodeReviewAgent(CodeReviewAgent):
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10)
)
def _call_llm(self, messages, tools):
"""LLM call with automatic retry."""
return client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
tool_choice="auto",
timeout=60
)
def review(self, pr_description: str = "") -> str:
"""Review with error recovery."""
try:
return super().review(pr_description)
except Exception as e:
# Fallback to simpler review without tools
return self._simple_review(pr_description)
def _simple_review(self, pr_description: str) -> str:
"""Fallback review using just the diff."""
diff = self.tools.get_git_diff()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Review this diff:\n\n{diff['diff'][:15000]}"}
]
)
return response.choices[0].message.content
Testing the Agent
# test_agent.py
import pytest
from unittest.mock import Mock, patch
from agent import CodeReviewAgent
class TestCodeReviewAgent:
def test_detects_sql_injection(self):
"""Agent should detect SQL injection vulnerability."""
# Mock the git diff tool to return vulnerable code
vulnerable_diff = '''
diff --git a/api.py b/api.py
+def get_user(user_id):
+ query = f"SELECT * FROM users WHERE id = {user_id}"
+ return db.execute(query)
'''
with patch.object(CodeReviewTools, 'get_git_diff') as mock:
mock.return_value = {"success": True, "diff": vulnerable_diff}
agent = CodeReviewAgent("/fake/path")
review = agent.review()
assert "sql injection" in review.lower() or "sanitize" in review.lower()
def test_approves_good_code(self):
"""Agent should approve well-written code."""
good_diff = '''
diff --git a/api.py b/api.py
+def get_user(user_id: int) -> User | None:
+ """Fetch user by ID with proper error handling."""
+ try:
+ return User.objects.get(id=user_id)
+ except User.DoesNotExist:
+ return None
'''
with patch.object(CodeReviewTools, 'get_git_diff') as mock:
mock.return_value = {"success": True, "diff": good_diff}
agent = CodeReviewAgent("/fake/path")
review = agent.review()
# Should mention positive aspects
assert "error handling" in review.lower() or "type hint" in review.lower()
Key Takeaways
- Tools are everything: Well-designed tools make agents effective
- Constrain the loop: Set max iterations, timeouts, and fallbacks
- Cache aggressively: Tool calls are often redundant
- Structure outputs: Use Pydantic/JSON schemas for reliable parsing
- Test with mocks: Agent behavior should be testable
- Plan for failure: Always have fallback paths
What’s Next
In Part 6, we’ll cover enterprise deployment—scaling agents, observability, security, cost management, and what the future holds for GenAI in production systems.
References & Further Reading
- OpenAI Function Calling Guide – platform.openai.com
- GitHub Actions for AI – docs.github.com
- PyGithub Documentation – pygithub.readthedocs.io
- Pydantic for Structured Outputs – docs.pydantic.dev
- Tenacity Retry Library – tenacity.readthedocs.io
Built something cool with AI agents? Share it on GitHub or let me know in the comments!
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.