Introduction: AI agents become powerful when they can use tools—searching the web, querying databases, calling APIs, executing code. But tool selection is where many agent implementations fail. The agent might choose the wrong tool, call tools with incorrect parameters, or get stuck in loops trying tools that won’t work. This guide covers practical patterns for robust tool selection: defining tools with clear schemas, implementing selection strategies, handling tool execution errors, and building agents that reliably choose and use the right tools for each task.

Tool Definition
from dataclasses import dataclass, field
from typing import Any, Callable, Optional
from enum import Enum
import json
class ParameterType(Enum):
STRING = "string"
INTEGER = "integer"
NUMBER = "number"
BOOLEAN = "boolean"
ARRAY = "array"
OBJECT = "object"
@dataclass
class ToolParameter:
"""Definition of a tool parameter."""
name: str
param_type: ParameterType
description: str
required: bool = True
default: Any = None
enum: list[str] = None
@dataclass
class Tool:
"""A tool that an agent can use."""
name: str
description: str
parameters: list[ToolParameter]
function: Callable
examples: list[dict] = field(default_factory=list)
category: str = "general"
def to_openai_schema(self) -> dict:
"""Convert to OpenAI function calling schema."""
properties = {}
required = []
for param in self.parameters:
prop = {
"type": param.param_type.value,
"description": param.description
}
if param.enum:
prop["enum"] = param.enum
properties[param.name] = prop
if param.required:
required.append(param.name)
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": {
"type": "object",
"properties": properties,
"required": required
}
}
}
async def execute(self, **kwargs) -> Any:
"""Execute the tool with given parameters."""
# Validate required parameters
for param in self.parameters:
if param.required and param.name not in kwargs:
raise ValueError(f"Missing required parameter: {param.name}")
# Apply defaults
for param in self.parameters:
if param.name not in kwargs and param.default is not None:
kwargs[param.name] = param.default
# Execute
import asyncio
if asyncio.iscoroutinefunction(self.function):
return await self.function(**kwargs)
else:
return self.function(**kwargs)
class ToolRegistry:
"""Registry of available tools."""
def __init__(self):
self.tools: dict[str, Tool] = {}
self.categories: dict[str, list[str]] = {}
def register(self, tool: Tool):
"""Register a tool."""
self.tools[tool.name] = tool
if tool.category not in self.categories:
self.categories[tool.category] = []
self.categories[tool.category].append(tool.name)
def get(self, name: str) -> Optional[Tool]:
"""Get a tool by name."""
return self.tools.get(name)
def get_by_category(self, category: str) -> list[Tool]:
"""Get all tools in a category."""
tool_names = self.categories.get(category, [])
return [self.tools[name] for name in tool_names]
def get_all_schemas(self) -> list[dict]:
"""Get OpenAI schemas for all tools."""
return [tool.to_openai_schema() for tool in self.tools.values()]
def get_schemas_for_category(self, category: str) -> list[dict]:
"""Get schemas for tools in a category."""
tools = self.get_by_category(category)
return [tool.to_openai_schema() for tool in tools]
# Example tool definitions
def create_search_tool() -> Tool:
"""Create a web search tool."""
async def search(query: str, num_results: int = 5) -> list[dict]:
# Simulated search
return [{"title": f"Result for {query}", "url": "https://example.com"}]
return Tool(
name="web_search",
description="Search the web for information. Use this when you need current information or facts you don't know.",
parameters=[
ToolParameter(
name="query",
param_type=ParameterType.STRING,
description="The search query"
),
ToolParameter(
name="num_results",
param_type=ParameterType.INTEGER,
description="Number of results to return",
required=False,
default=5
)
],
function=search,
examples=[
{"query": "latest Python version", "num_results": 3}
],
category="search"
)
def create_calculator_tool() -> Tool:
"""Create a calculator tool."""
def calculate(expression: str) -> float:
# Safe evaluation
allowed = set("0123456789+-*/().^ ")
if not all(c in allowed for c in expression):
raise ValueError("Invalid characters in expression")
expression = expression.replace("^", "**")
return eval(expression)
return Tool(
name="calculator",
description="Perform mathematical calculations. Use for any arithmetic operations.",
parameters=[
ToolParameter(
name="expression",
param_type=ParameterType.STRING,
description="Mathematical expression to evaluate (e.g., '2 + 2', '10 * 5')"
)
],
function=calculate,
examples=[
{"expression": "15 * 23"},
{"expression": "(100 + 50) / 3"}
],
category="math"
)
Tool Selection Strategies
from dataclasses import dataclass
from typing import Any, Optional
import json
@dataclass
class ToolSelection:
"""Result of tool selection."""
tool_name: str
arguments: dict
confidence: float
reasoning: str = ""
class LLMToolSelector:
"""Select tools using LLM function calling."""
def __init__(
self,
client: Any,
model: str = "gpt-4o",
registry: ToolRegistry = None
):
self.client = client
self.model = model
self.registry = registry or ToolRegistry()
async def select(
self,
query: str,
available_tools: list[str] = None
) -> Optional[ToolSelection]:
"""Select the best tool for a query."""
# Get tool schemas
if available_tools:
schemas = [
self.registry.get(name).to_openai_schema()
for name in available_tools
if self.registry.get(name)
]
else:
schemas = self.registry.get_all_schemas()
if not schemas:
return None
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": query}],
tools=schemas,
tool_choice="auto"
)
message = response.choices[0].message
if not message.tool_calls:
return None
tool_call = message.tool_calls[0]
return ToolSelection(
tool_name=tool_call.function.name,
arguments=json.loads(tool_call.function.arguments),
confidence=1.0 # LLM doesn't provide confidence
)
class SemanticToolSelector:
"""Select tools using semantic similarity."""
def __init__(
self,
embedding_client: Any,
registry: ToolRegistry
):
self.embedding_client = embedding_client
self.registry = registry
self.tool_embeddings: dict[str, list[float]] = {}
async def build_index(self):
"""Build embeddings for all tools."""
for name, tool in self.registry.tools.items():
# Create rich description
description = f"{tool.name}: {tool.description}"
if tool.examples:
examples_text = " Examples: " + ", ".join(
str(ex) for ex in tool.examples[:3]
)
description += examples_text
response = await self.embedding_client.embeddings.create(
model="text-embedding-3-small",
input=description
)
self.tool_embeddings[name] = response.data[0].embedding
async def select(
self,
query: str,
top_k: int = 3
) -> list[tuple[str, float]]:
"""Select top-k most relevant tools."""
import numpy as np
# Get query embedding
response = await self.embedding_client.embeddings.create(
model="text-embedding-3-small",
input=query
)
query_embedding = np.array(response.data[0].embedding)
# Calculate similarities
similarities = []
for name, embedding in self.tool_embeddings.items():
similarity = np.dot(query_embedding, embedding)
similarities.append((name, similarity))
# Sort by similarity
similarities.sort(key=lambda x: x[1], reverse=True)
return similarities[:top_k]
class HybridToolSelector:
"""Combine semantic and LLM selection."""
def __init__(
self,
llm_selector: LLMToolSelector,
semantic_selector: SemanticToolSelector
):
self.llm_selector = llm_selector
self.semantic_selector = semantic_selector
async def select(self, query: str) -> Optional[ToolSelection]:
"""Select tool using hybrid approach."""
# First, narrow down with semantic search
candidates = await self.semantic_selector.select(query, top_k=5)
candidate_names = [name for name, _ in candidates]
# Then, use LLM to make final selection
selection = await self.llm_selector.select(
query,
available_tools=candidate_names
)
if selection:
# Add semantic confidence
for name, score in candidates:
if name == selection.tool_name:
selection.confidence = score
break
return selection
class RuleBasedToolSelector:
"""Select tools using pattern matching rules."""
def __init__(self, registry: ToolRegistry):
self.registry = registry
self.rules: list[tuple[str, str, float]] = [] # (pattern, tool_name, priority)
def add_rule(self, pattern: str, tool_name: str, priority: float = 1.0):
"""Add a selection rule."""
import re
self.rules.append((re.compile(pattern, re.IGNORECASE), tool_name, priority))
def select(self, query: str) -> Optional[ToolSelection]:
"""Select tool based on rules."""
matches = []
for pattern, tool_name, priority in self.rules:
if pattern.search(query):
matches.append((tool_name, priority))
if not matches:
return None
# Return highest priority match
matches.sort(key=lambda x: x[1], reverse=True)
tool_name = matches[0][0]
return ToolSelection(
tool_name=tool_name,
arguments={}, # Rules don't extract arguments
confidence=matches[0][1],
reasoning=f"Matched rule pattern"
)
Tool Execution
from dataclasses import dataclass
from typing import Any, Optional
import asyncio
import traceback
@dataclass
class ToolResult:
"""Result from tool execution."""
tool_name: str
success: bool
result: Any = None
error: str = None
execution_time: float = 0.0
class ToolExecutor:
"""Execute tools with error handling."""
def __init__(
self,
registry: ToolRegistry,
timeout: float = 30.0,
max_retries: int = 2
):
self.registry = registry
self.timeout = timeout
self.max_retries = max_retries
async def execute(
self,
tool_name: str,
arguments: dict
) -> ToolResult:
"""Execute a tool with retries and timeout."""
import time
tool = self.registry.get(tool_name)
if not tool:
return ToolResult(
tool_name=tool_name,
success=False,
error=f"Tool not found: {tool_name}"
)
start_time = time.time()
last_error = None
for attempt in range(self.max_retries + 1):
try:
result = await asyncio.wait_for(
tool.execute(**arguments),
timeout=self.timeout
)
return ToolResult(
tool_name=tool_name,
success=True,
result=result,
execution_time=time.time() - start_time
)
except asyncio.TimeoutError:
last_error = f"Tool execution timed out after {self.timeout}s"
except Exception as e:
last_error = f"{type(e).__name__}: {str(e)}"
# Don't retry on validation errors
if isinstance(e, (ValueError, TypeError)):
break
# Wait before retry
if attempt < self.max_retries:
await asyncio.sleep(1.0 * (attempt + 1))
return ToolResult(
tool_name=tool_name,
success=False,
error=last_error,
execution_time=time.time() - start_time
)
class ToolChain:
"""Execute multiple tools in sequence."""
def __init__(self, executor: ToolExecutor):
self.executor = executor
self.steps: list[tuple[str, dict]] = []
def add_step(self, tool_name: str, arguments: dict):
"""Add a tool execution step."""
self.steps.append((tool_name, arguments))
async def execute(self) -> list[ToolResult]:
"""Execute all steps in sequence."""
results = []
context = {} # Shared context between steps
for tool_name, arguments in self.steps:
# Substitute context variables
resolved_args = self._resolve_arguments(arguments, context)
result = await self.executor.execute(tool_name, resolved_args)
results.append(result)
if not result.success:
break # Stop on first failure
# Add result to context
context[f"step_{len(results)}_result"] = result.result
return results
def _resolve_arguments(self, arguments: dict, context: dict) -> dict:
"""Resolve context references in arguments."""
resolved = {}
for key, value in arguments.items():
if isinstance(value, str) and value.startswith("$"):
# Reference to context variable
context_key = value[1:]
resolved[key] = context.get(context_key, value)
else:
resolved[key] = value
return resolved
class ParallelToolExecutor:
"""Execute multiple tools in parallel."""
def __init__(self, executor: ToolExecutor, max_concurrent: int = 5):
self.executor = executor
self.semaphore = asyncio.Semaphore(max_concurrent)
async def execute_many(
self,
tool_calls: list[tuple[str, dict]]
) -> list[ToolResult]:
"""Execute multiple tools concurrently."""
async def execute_with_semaphore(tool_name: str, arguments: dict):
async with self.semaphore:
return await self.executor.execute(tool_name, arguments)
tasks = [
execute_with_semaphore(name, args)
for name, args in tool_calls
]
return await asyncio.gather(*tasks)
Agent with Tools
from dataclasses import dataclass, field
from typing import Any, Optional
import json
@dataclass
class AgentStep:
"""A step in agent execution."""
thought: str
tool_name: Optional[str] = None
tool_arguments: Optional[dict] = None
tool_result: Optional[Any] = None
observation: str = ""
@dataclass
class AgentResponse:
"""Final agent response."""
answer: str
steps: list[AgentStep] = field(default_factory=list)
total_tokens: int = 0
class ToolAgent:
"""Agent that uses tools to answer questions."""
SYSTEM_PROMPT = """You are a helpful assistant with access to tools.
Available tools:
{tools}
To use a tool, respond with:
THOUGHT:
ACTION:
ACTION_INPUT:
After receiving tool results, continue reasoning or provide final answer:
THOUGHT:
FINAL_ANSWER:
Always think step by step."""
def __init__(
self,
client: Any,
model: str = "gpt-4o",
registry: ToolRegistry = None,
executor: ToolExecutor = None,
max_steps: int = 10
):
self.client = client
self.model = model
self.registry = registry or ToolRegistry()
self.executor = executor or ToolExecutor(self.registry)
self.max_steps = max_steps
async def run(self, query: str) -> AgentResponse:
"""Run the agent on a query."""
# Build system prompt with tool descriptions
tools_desc = "\n".join([
f"- {tool.name}: {tool.description}"
for tool in self.registry.tools.values()
])
system_prompt = self.SYSTEM_PROMPT.format(tools=tools_desc)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query}
]
steps = []
total_tokens = 0
for _ in range(self.max_steps):
response = await self.client.chat.completions.create(
model=self.model,
messages=messages
)
total_tokens += response.usage.total_tokens
content = response.choices[0].message.content
# Parse response
step = self._parse_response(content)
steps.append(step)
# Check for final answer
if "FINAL_ANSWER:" in content:
answer = content.split("FINAL_ANSWER:")[-1].strip()
return AgentResponse(
answer=answer,
steps=steps,
total_tokens=total_tokens
)
# Execute tool if requested
if step.tool_name:
result = await self.executor.execute(
step.tool_name,
step.tool_arguments or {}
)
step.tool_result = result.result if result.success else result.error
step.observation = str(step.tool_result)
# Add to conversation
messages.append({"role": "assistant", "content": content})
messages.append({
"role": "user",
"content": f"OBSERVATION: {step.observation}"
})
# Max steps reached
return AgentResponse(
answer="I was unable to complete the task within the step limit.",
steps=steps,
total_tokens=total_tokens
)
def _parse_response(self, content: str) -> AgentStep:
"""Parse agent response into structured step."""
thought = ""
tool_name = None
tool_arguments = None
if "THOUGHT:" in content:
thought = content.split("THOUGHT:")[-1].split("ACTION:")[0].strip()
if "ACTION:" in content:
action_part = content.split("ACTION:")[-1]
tool_name = action_part.split("\n")[0].strip()
if "ACTION_INPUT:" in content:
input_part = content.split("ACTION_INPUT:")[-1].strip()
try:
tool_arguments = json.loads(input_part)
except json.JSONDecodeError:
tool_arguments = {"input": input_part}
return AgentStep(
thought=thought,
tool_name=tool_name,
tool_arguments=tool_arguments
)
class FunctionCallingAgent:
"""Agent using native function calling."""
def __init__(
self,
client: Any,
model: str = "gpt-4o",
registry: ToolRegistry = None,
executor: ToolExecutor = None,
max_steps: int = 10
):
self.client = client
self.model = model
self.registry = registry or ToolRegistry()
self.executor = executor or ToolExecutor(self.registry)
self.max_steps = max_steps
async def run(self, query: str) -> AgentResponse:
"""Run agent with function calling."""
messages = [{"role": "user", "content": query}]
tools = self.registry.get_all_schemas()
steps = []
total_tokens = 0
for _ in range(self.max_steps):
response = await self.client.chat.completions.create(
model=self.model,
messages=messages,
tools=tools if tools else None
)
total_tokens += response.usage.total_tokens
message = response.choices[0].message
# No tool calls - final answer
if not message.tool_calls:
return AgentResponse(
answer=message.content or "",
steps=steps,
total_tokens=total_tokens
)
# Process tool calls
messages.append(message)
for tool_call in message.tool_calls:
step = AgentStep(
thought=message.content or "",
tool_name=tool_call.function.name,
tool_arguments=json.loads(tool_call.function.arguments)
)
result = await self.executor.execute(
step.tool_name,
step.tool_arguments
)
step.tool_result = result.result if result.success else result.error
step.observation = str(step.tool_result)
steps.append(step)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": step.observation
})
return AgentResponse(
answer="Max steps reached",
steps=steps,
total_tokens=total_tokens
)
Production Tool Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Any
app = FastAPI()
# Initialize components
registry = ToolRegistry()
registry.register(create_search_tool())
registry.register(create_calculator_tool())
executor = ToolExecutor(registry)
llm_selector = None # Initialize with actual client
agent = None # Initialize with actual client
class ToolCallRequest(BaseModel):
tool_name: str
arguments: dict
class SelectToolRequest(BaseModel):
query: str
available_tools: Optional[list[str]] = None
class AgentRequest(BaseModel):
query: str
max_steps: int = 10
@app.get("/v1/tools")
async def list_tools():
"""List all available tools."""
tools = []
for name, tool in registry.tools.items():
tools.append({
"name": name,
"description": tool.description,
"category": tool.category,
"parameters": [
{
"name": p.name,
"type": p.param_type.value,
"description": p.description,
"required": p.required
}
for p in tool.parameters
]
})
return {"tools": tools}
@app.post("/v1/tools/execute")
async def execute_tool(request: ToolCallRequest):
"""Execute a specific tool."""
result = await executor.execute(
request.tool_name,
request.arguments
)
if not result.success:
raise HTTPException(400, result.error)
return {
"tool_name": result.tool_name,
"result": result.result,
"execution_time": result.execution_time
}
@app.post("/v1/tools/select")
async def select_tool(request: SelectToolRequest):
"""Select the best tool for a query."""
selection = await llm_selector.select(
request.query,
request.available_tools
)
if not selection:
return {"selected": None, "message": "No suitable tool found"}
return {
"selected": {
"tool_name": selection.tool_name,
"arguments": selection.arguments,
"confidence": selection.confidence
}
}
@app.post("/v1/agent/run")
async def run_agent(request: AgentRequest):
"""Run the agent on a query."""
response = await agent.run(request.query)
return {
"answer": response.answer,
"steps": [
{
"thought": step.thought,
"tool": step.tool_name,
"arguments": step.tool_arguments,
"result": str(step.tool_result) if step.tool_result else None
}
for step in response.steps
],
"total_tokens": response.total_tokens
}
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- OpenAI Function Calling: https://platform.openai.com/docs/guides/function-calling
- LangChain Tools: https://python.langchain.com/docs/modules/tools/
- Anthropic Tool Use: https://docs.anthropic.com/claude/docs/tool-use
- ReAct Pattern: https://arxiv.org/abs/2210.03629
Conclusion
Effective tool selection is what separates useful AI agents from frustrating ones. Start with clear tool definitions—good descriptions and examples help the LLM understand when to use each tool. Use semantic pre-filtering to narrow candidates before LLM selection when you have many tools. Implement robust execution with timeouts, retries, and proper error handling—tools fail in production. The ReAct pattern (Reason + Act) provides a solid foundation for agents that think before acting. Native function calling is cleaner than parsing text responses when your model supports it. Build tool chains for multi-step workflows and parallel execution for independent operations. Monitor tool usage patterns to identify which tools are most valuable and which need better descriptions. The goal is agents that reliably choose the right tool and handle failures gracefully.
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.