Agent Tool Selection: Building AI Agents That Choose and Use the Right Tools

Introduction: AI agents become powerful when they can use tools—searching the web, querying databases, calling APIs, executing code. But tool selection is where many agent implementations fail. The agent might choose the wrong tool, call tools with incorrect parameters, or get stuck in loops trying tools that won’t work. This guide covers practical patterns for robust tool selection: defining tools with clear schemas, implementing selection strategies, handling tool execution errors, and building agents that reliably choose and use the right tools for each task.

Tool Selection
Tool Selection: Intent Analysis, Tool Matching, Execution

Tool Definition

from dataclasses import dataclass, field
from typing import Any, Callable, Optional
from enum import Enum
import json

class ParameterType(Enum):
    STRING = "string"
    INTEGER = "integer"
    NUMBER = "number"
    BOOLEAN = "boolean"
    ARRAY = "array"
    OBJECT = "object"

@dataclass
class ToolParameter:
    """Definition of a tool parameter."""
    
    name: str
    param_type: ParameterType
    description: str
    required: bool = True
    default: Any = None
    enum: list[str] = None

@dataclass
class Tool:
    """A tool that an agent can use."""
    
    name: str
    description: str
    parameters: list[ToolParameter]
    function: Callable
    examples: list[dict] = field(default_factory=list)
    category: str = "general"
    
    def to_openai_schema(self) -> dict:
        """Convert to OpenAI function calling schema."""
        
        properties = {}
        required = []
        
        for param in self.parameters:
            prop = {
                "type": param.param_type.value,
                "description": param.description
            }
            if param.enum:
                prop["enum"] = param.enum
            
            properties[param.name] = prop
            
            if param.required:
                required.append(param.name)
        
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": properties,
                    "required": required
                }
            }
        }
    
    async def execute(self, **kwargs) -> Any:
        """Execute the tool with given parameters."""
        
        # Validate required parameters
        for param in self.parameters:
            if param.required and param.name not in kwargs:
                raise ValueError(f"Missing required parameter: {param.name}")
        
        # Apply defaults
        for param in self.parameters:
            if param.name not in kwargs and param.default is not None:
                kwargs[param.name] = param.default
        
        # Execute
        import asyncio
        if asyncio.iscoroutinefunction(self.function):
            return await self.function(**kwargs)
        else:
            return self.function(**kwargs)

class ToolRegistry:
    """Registry of available tools."""
    
    def __init__(self):
        self.tools: dict[str, Tool] = {}
        self.categories: dict[str, list[str]] = {}
    
    def register(self, tool: Tool):
        """Register a tool."""
        
        self.tools[tool.name] = tool
        
        if tool.category not in self.categories:
            self.categories[tool.category] = []
        self.categories[tool.category].append(tool.name)
    
    def get(self, name: str) -> Optional[Tool]:
        """Get a tool by name."""
        return self.tools.get(name)
    
    def get_by_category(self, category: str) -> list[Tool]:
        """Get all tools in a category."""
        
        tool_names = self.categories.get(category, [])
        return [self.tools[name] for name in tool_names]
    
    def get_all_schemas(self) -> list[dict]:
        """Get OpenAI schemas for all tools."""
        return [tool.to_openai_schema() for tool in self.tools.values()]
    
    def get_schemas_for_category(self, category: str) -> list[dict]:
        """Get schemas for tools in a category."""
        
        tools = self.get_by_category(category)
        return [tool.to_openai_schema() for tool in tools]

# Example tool definitions
def create_search_tool() -> Tool:
    """Create a web search tool."""
    
    async def search(query: str, num_results: int = 5) -> list[dict]:
        # Simulated search
        return [{"title": f"Result for {query}", "url": "https://example.com"}]
    
    return Tool(
        name="web_search",
        description="Search the web for information. Use this when you need current information or facts you don't know.",
        parameters=[
            ToolParameter(
                name="query",
                param_type=ParameterType.STRING,
                description="The search query"
            ),
            ToolParameter(
                name="num_results",
                param_type=ParameterType.INTEGER,
                description="Number of results to return",
                required=False,
                default=5
            )
        ],
        function=search,
        examples=[
            {"query": "latest Python version", "num_results": 3}
        ],
        category="search"
    )

def create_calculator_tool() -> Tool:
    """Create a calculator tool."""
    
    def calculate(expression: str) -> float:
        # Safe evaluation
        allowed = set("0123456789+-*/().^ ")
        if not all(c in allowed for c in expression):
            raise ValueError("Invalid characters in expression")
        
        expression = expression.replace("^", "**")
        return eval(expression)
    
    return Tool(
        name="calculator",
        description="Perform mathematical calculations. Use for any arithmetic operations.",
        parameters=[
            ToolParameter(
                name="expression",
                param_type=ParameterType.STRING,
                description="Mathematical expression to evaluate (e.g., '2 + 2', '10 * 5')"
            )
        ],
        function=calculate,
        examples=[
            {"expression": "15 * 23"},
            {"expression": "(100 + 50) / 3"}
        ],
        category="math"
    )

Tool Selection Strategies

from dataclasses import dataclass
from typing import Any, Optional
import json

@dataclass
class ToolSelection:
    """Result of tool selection."""
    
    tool_name: str
    arguments: dict
    confidence: float
    reasoning: str = ""

class LLMToolSelector:
    """Select tools using LLM function calling."""
    
    def __init__(
        self,
        client: Any,
        model: str = "gpt-4o",
        registry: ToolRegistry = None
    ):
        self.client = client
        self.model = model
        self.registry = registry or ToolRegistry()
    
    async def select(
        self,
        query: str,
        available_tools: list[str] = None
    ) -> Optional[ToolSelection]:
        """Select the best tool for a query."""
        
        # Get tool schemas
        if available_tools:
            schemas = [
                self.registry.get(name).to_openai_schema()
                for name in available_tools
                if self.registry.get(name)
            ]
        else:
            schemas = self.registry.get_all_schemas()
        
        if not schemas:
            return None
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": query}],
            tools=schemas,
            tool_choice="auto"
        )
        
        message = response.choices[0].message
        
        if not message.tool_calls:
            return None
        
        tool_call = message.tool_calls[0]
        
        return ToolSelection(
            tool_name=tool_call.function.name,
            arguments=json.loads(tool_call.function.arguments),
            confidence=1.0  # LLM doesn't provide confidence
        )

class SemanticToolSelector:
    """Select tools using semantic similarity."""
    
    def __init__(
        self,
        embedding_client: Any,
        registry: ToolRegistry
    ):
        self.embedding_client = embedding_client
        self.registry = registry
        self.tool_embeddings: dict[str, list[float]] = {}
    
    async def build_index(self):
        """Build embeddings for all tools."""
        
        for name, tool in self.registry.tools.items():
            # Create rich description
            description = f"{tool.name}: {tool.description}"
            if tool.examples:
                examples_text = " Examples: " + ", ".join(
                    str(ex) for ex in tool.examples[:3]
                )
                description += examples_text
            
            response = await self.embedding_client.embeddings.create(
                model="text-embedding-3-small",
                input=description
            )
            
            self.tool_embeddings[name] = response.data[0].embedding
    
    async def select(
        self,
        query: str,
        top_k: int = 3
    ) -> list[tuple[str, float]]:
        """Select top-k most relevant tools."""
        
        import numpy as np
        
        # Get query embedding
        response = await self.embedding_client.embeddings.create(
            model="text-embedding-3-small",
            input=query
        )
        query_embedding = np.array(response.data[0].embedding)
        
        # Calculate similarities
        similarities = []
        for name, embedding in self.tool_embeddings.items():
            similarity = np.dot(query_embedding, embedding)
            similarities.append((name, similarity))
        
        # Sort by similarity
        similarities.sort(key=lambda x: x[1], reverse=True)
        
        return similarities[:top_k]

class HybridToolSelector:
    """Combine semantic and LLM selection."""
    
    def __init__(
        self,
        llm_selector: LLMToolSelector,
        semantic_selector: SemanticToolSelector
    ):
        self.llm_selector = llm_selector
        self.semantic_selector = semantic_selector
    
    async def select(self, query: str) -> Optional[ToolSelection]:
        """Select tool using hybrid approach."""
        
        # First, narrow down with semantic search
        candidates = await self.semantic_selector.select(query, top_k=5)
        candidate_names = [name for name, _ in candidates]
        
        # Then, use LLM to make final selection
        selection = await self.llm_selector.select(
            query,
            available_tools=candidate_names
        )
        
        if selection:
            # Add semantic confidence
            for name, score in candidates:
                if name == selection.tool_name:
                    selection.confidence = score
                    break
        
        return selection

class RuleBasedToolSelector:
    """Select tools using pattern matching rules."""
    
    def __init__(self, registry: ToolRegistry):
        self.registry = registry
        self.rules: list[tuple[str, str, float]] = []  # (pattern, tool_name, priority)
    
    def add_rule(self, pattern: str, tool_name: str, priority: float = 1.0):
        """Add a selection rule."""
        
        import re
        self.rules.append((re.compile(pattern, re.IGNORECASE), tool_name, priority))
    
    def select(self, query: str) -> Optional[ToolSelection]:
        """Select tool based on rules."""
        
        matches = []
        
        for pattern, tool_name, priority in self.rules:
            if pattern.search(query):
                matches.append((tool_name, priority))
        
        if not matches:
            return None
        
        # Return highest priority match
        matches.sort(key=lambda x: x[1], reverse=True)
        tool_name = matches[0][0]
        
        return ToolSelection(
            tool_name=tool_name,
            arguments={},  # Rules don't extract arguments
            confidence=matches[0][1],
            reasoning=f"Matched rule pattern"
        )

Tool Execution

from dataclasses import dataclass
from typing import Any, Optional
import asyncio
import traceback

@dataclass
class ToolResult:
    """Result from tool execution."""
    
    tool_name: str
    success: bool
    result: Any = None
    error: str = None
    execution_time: float = 0.0

class ToolExecutor:
    """Execute tools with error handling."""
    
    def __init__(
        self,
        registry: ToolRegistry,
        timeout: float = 30.0,
        max_retries: int = 2
    ):
        self.registry = registry
        self.timeout = timeout
        self.max_retries = max_retries
    
    async def execute(
        self,
        tool_name: str,
        arguments: dict
    ) -> ToolResult:
        """Execute a tool with retries and timeout."""
        
        import time
        
        tool = self.registry.get(tool_name)
        if not tool:
            return ToolResult(
                tool_name=tool_name,
                success=False,
                error=f"Tool not found: {tool_name}"
            )
        
        start_time = time.time()
        last_error = None
        
        for attempt in range(self.max_retries + 1):
            try:
                result = await asyncio.wait_for(
                    tool.execute(**arguments),
                    timeout=self.timeout
                )
                
                return ToolResult(
                    tool_name=tool_name,
                    success=True,
                    result=result,
                    execution_time=time.time() - start_time
                )
            
            except asyncio.TimeoutError:
                last_error = f"Tool execution timed out after {self.timeout}s"
            
            except Exception as e:
                last_error = f"{type(e).__name__}: {str(e)}"
                
                # Don't retry on validation errors
                if isinstance(e, (ValueError, TypeError)):
                    break
            
            # Wait before retry
            if attempt < self.max_retries:
                await asyncio.sleep(1.0 * (attempt + 1))
        
        return ToolResult(
            tool_name=tool_name,
            success=False,
            error=last_error,
            execution_time=time.time() - start_time
        )

class ToolChain:
    """Execute multiple tools in sequence."""
    
    def __init__(self, executor: ToolExecutor):
        self.executor = executor
        self.steps: list[tuple[str, dict]] = []
    
    def add_step(self, tool_name: str, arguments: dict):
        """Add a tool execution step."""
        self.steps.append((tool_name, arguments))
    
    async def execute(self) -> list[ToolResult]:
        """Execute all steps in sequence."""
        
        results = []
        context = {}  # Shared context between steps
        
        for tool_name, arguments in self.steps:
            # Substitute context variables
            resolved_args = self._resolve_arguments(arguments, context)
            
            result = await self.executor.execute(tool_name, resolved_args)
            results.append(result)
            
            if not result.success:
                break  # Stop on first failure
            
            # Add result to context
            context[f"step_{len(results)}_result"] = result.result
        
        return results
    
    def _resolve_arguments(self, arguments: dict, context: dict) -> dict:
        """Resolve context references in arguments."""
        
        resolved = {}
        
        for key, value in arguments.items():
            if isinstance(value, str) and value.startswith("$"):
                # Reference to context variable
                context_key = value[1:]
                resolved[key] = context.get(context_key, value)
            else:
                resolved[key] = value
        
        return resolved

class ParallelToolExecutor:
    """Execute multiple tools in parallel."""
    
    def __init__(self, executor: ToolExecutor, max_concurrent: int = 5):
        self.executor = executor
        self.semaphore = asyncio.Semaphore(max_concurrent)
    
    async def execute_many(
        self,
        tool_calls: list[tuple[str, dict]]
    ) -> list[ToolResult]:
        """Execute multiple tools concurrently."""
        
        async def execute_with_semaphore(tool_name: str, arguments: dict):
            async with self.semaphore:
                return await self.executor.execute(tool_name, arguments)
        
        tasks = [
            execute_with_semaphore(name, args)
            for name, args in tool_calls
        ]
        
        return await asyncio.gather(*tasks)

Agent with Tools

from dataclasses import dataclass, field
from typing import Any, Optional
import json

@dataclass
class AgentStep:
    """A step in agent execution."""
    
    thought: str
    tool_name: Optional[str] = None
    tool_arguments: Optional[dict] = None
    tool_result: Optional[Any] = None
    observation: str = ""

@dataclass
class AgentResponse:
    """Final agent response."""
    
    answer: str
    steps: list[AgentStep] = field(default_factory=list)
    total_tokens: int = 0

class ToolAgent:
    """Agent that uses tools to answer questions."""
    
    SYSTEM_PROMPT = """You are a helpful assistant with access to tools.

Available tools:
{tools}

To use a tool, respond with:
THOUGHT: 
ACTION: 
ACTION_INPUT: 

After receiving tool results, continue reasoning or provide final answer:
THOUGHT: 
FINAL_ANSWER: 

Always think step by step."""
    
    def __init__(
        self,
        client: Any,
        model: str = "gpt-4o",
        registry: ToolRegistry = None,
        executor: ToolExecutor = None,
        max_steps: int = 10
    ):
        self.client = client
        self.model = model
        self.registry = registry or ToolRegistry()
        self.executor = executor or ToolExecutor(self.registry)
        self.max_steps = max_steps
    
    async def run(self, query: str) -> AgentResponse:
        """Run the agent on a query."""
        
        # Build system prompt with tool descriptions
        tools_desc = "\n".join([
            f"- {tool.name}: {tool.description}"
            for tool in self.registry.tools.values()
        ])
        
        system_prompt = self.SYSTEM_PROMPT.format(tools=tools_desc)
        
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": query}
        ]
        
        steps = []
        total_tokens = 0
        
        for _ in range(self.max_steps):
            response = await self.client.chat.completions.create(
                model=self.model,
                messages=messages
            )
            
            total_tokens += response.usage.total_tokens
            content = response.choices[0].message.content
            
            # Parse response
            step = self._parse_response(content)
            steps.append(step)
            
            # Check for final answer
            if "FINAL_ANSWER:" in content:
                answer = content.split("FINAL_ANSWER:")[-1].strip()
                return AgentResponse(
                    answer=answer,
                    steps=steps,
                    total_tokens=total_tokens
                )
            
            # Execute tool if requested
            if step.tool_name:
                result = await self.executor.execute(
                    step.tool_name,
                    step.tool_arguments or {}
                )
                
                step.tool_result = result.result if result.success else result.error
                step.observation = str(step.tool_result)
                
                # Add to conversation
                messages.append({"role": "assistant", "content": content})
                messages.append({
                    "role": "user",
                    "content": f"OBSERVATION: {step.observation}"
                })
        
        # Max steps reached
        return AgentResponse(
            answer="I was unable to complete the task within the step limit.",
            steps=steps,
            total_tokens=total_tokens
        )
    
    def _parse_response(self, content: str) -> AgentStep:
        """Parse agent response into structured step."""
        
        thought = ""
        tool_name = None
        tool_arguments = None
        
        if "THOUGHT:" in content:
            thought = content.split("THOUGHT:")[-1].split("ACTION:")[0].strip()
        
        if "ACTION:" in content:
            action_part = content.split("ACTION:")[-1]
            tool_name = action_part.split("\n")[0].strip()
            
            if "ACTION_INPUT:" in content:
                input_part = content.split("ACTION_INPUT:")[-1].strip()
                try:
                    tool_arguments = json.loads(input_part)
                except json.JSONDecodeError:
                    tool_arguments = {"input": input_part}
        
        return AgentStep(
            thought=thought,
            tool_name=tool_name,
            tool_arguments=tool_arguments
        )

class FunctionCallingAgent:
    """Agent using native function calling."""
    
    def __init__(
        self,
        client: Any,
        model: str = "gpt-4o",
        registry: ToolRegistry = None,
        executor: ToolExecutor = None,
        max_steps: int = 10
    ):
        self.client = client
        self.model = model
        self.registry = registry or ToolRegistry()
        self.executor = executor or ToolExecutor(self.registry)
        self.max_steps = max_steps
    
    async def run(self, query: str) -> AgentResponse:
        """Run agent with function calling."""
        
        messages = [{"role": "user", "content": query}]
        tools = self.registry.get_all_schemas()
        steps = []
        total_tokens = 0
        
        for _ in range(self.max_steps):
            response = await self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                tools=tools if tools else None
            )
            
            total_tokens += response.usage.total_tokens
            message = response.choices[0].message
            
            # No tool calls - final answer
            if not message.tool_calls:
                return AgentResponse(
                    answer=message.content or "",
                    steps=steps,
                    total_tokens=total_tokens
                )
            
            # Process tool calls
            messages.append(message)
            
            for tool_call in message.tool_calls:
                step = AgentStep(
                    thought=message.content or "",
                    tool_name=tool_call.function.name,
                    tool_arguments=json.loads(tool_call.function.arguments)
                )
                
                result = await self.executor.execute(
                    step.tool_name,
                    step.tool_arguments
                )
                
                step.tool_result = result.result if result.success else result.error
                step.observation = str(step.tool_result)
                steps.append(step)
                
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": step.observation
                })
        
        return AgentResponse(
            answer="Max steps reached",
            steps=steps,
            total_tokens=total_tokens
        )

Production Tool Service

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, Any

app = FastAPI()

# Initialize components
registry = ToolRegistry()
registry.register(create_search_tool())
registry.register(create_calculator_tool())

executor = ToolExecutor(registry)
llm_selector = None  # Initialize with actual client
agent = None  # Initialize with actual client

class ToolCallRequest(BaseModel):
    tool_name: str
    arguments: dict

class SelectToolRequest(BaseModel):
    query: str
    available_tools: Optional[list[str]] = None

class AgentRequest(BaseModel):
    query: str
    max_steps: int = 10

@app.get("/v1/tools")
async def list_tools():
    """List all available tools."""
    
    tools = []
    for name, tool in registry.tools.items():
        tools.append({
            "name": name,
            "description": tool.description,
            "category": tool.category,
            "parameters": [
                {
                    "name": p.name,
                    "type": p.param_type.value,
                    "description": p.description,
                    "required": p.required
                }
                for p in tool.parameters
            ]
        })
    
    return {"tools": tools}

@app.post("/v1/tools/execute")
async def execute_tool(request: ToolCallRequest):
    """Execute a specific tool."""
    
    result = await executor.execute(
        request.tool_name,
        request.arguments
    )
    
    if not result.success:
        raise HTTPException(400, result.error)
    
    return {
        "tool_name": result.tool_name,
        "result": result.result,
        "execution_time": result.execution_time
    }

@app.post("/v1/tools/select")
async def select_tool(request: SelectToolRequest):
    """Select the best tool for a query."""
    
    selection = await llm_selector.select(
        request.query,
        request.available_tools
    )
    
    if not selection:
        return {"selected": None, "message": "No suitable tool found"}
    
    return {
        "selected": {
            "tool_name": selection.tool_name,
            "arguments": selection.arguments,
            "confidence": selection.confidence
        }
    }

@app.post("/v1/agent/run")
async def run_agent(request: AgentRequest):
    """Run the agent on a query."""
    
    response = await agent.run(request.query)
    
    return {
        "answer": response.answer,
        "steps": [
            {
                "thought": step.thought,
                "tool": step.tool_name,
                "arguments": step.tool_arguments,
                "result": str(step.tool_result) if step.tool_result else None
            }
            for step in response.steps
        ],
        "total_tokens": response.total_tokens
    }

@app.get("/health")
async def health():
    return {"status": "healthy"}

References

Conclusion

Effective tool selection is what separates useful AI agents from frustrating ones. Start with clear tool definitions—good descriptions and examples help the LLM understand when to use each tool. Use semantic pre-filtering to narrow candidates before LLM selection when you have many tools. Implement robust execution with timeouts, retries, and proper error handling—tools fail in production. The ReAct pattern (Reason + Act) provides a solid foundation for agents that think before acting. Native function calling is cleaner than parsing text responses when your model supports it. Build tool chains for multi-step workflows and parallel execution for independent operations. Monitor tool usage patterns to identify which tools are most valuable and which need better descriptions. The goal is agents that reliably choose the right tool and handle failures gracefully.


Discover more from Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.