Agent Memory Patterns: Building Persistent Context for AI Agents

Introduction: Memory is what transforms a stateless LLM into a persistent, context-aware agent. Without memory, every interaction starts from scratch—the agent forgets previous conversations, learned preferences, and accumulated knowledge. But implementing memory for agents is more complex than simply storing chat history. You need short-term memory for the current task, long-term memory for persistent knowledge, episodic memory for past experiences, and semantic memory for facts and relationships. This guide covers practical memory patterns: working memory for active context, conversation memory with compression, vector-based long-term storage, and memory retrieval strategies that keep agents grounded in relevant context.

Agent Memory
Memory System: Short-term, Long-term, and Episodic Memory

Working Memory

from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
from collections import deque

@dataclass
class MemoryItem:
    """A single item in memory."""
    
    content: str
    memory_type: str  # observation, thought, action, result
    timestamp: datetime = field(default_factory=datetime.utcnow)
    importance: float = 0.5
    metadata: dict = field(default_factory=dict)

class WorkingMemory:
    """Short-term working memory for active task context."""
    
    def __init__(self, max_items: int = 20, max_tokens: int = 4000):
        self.items: deque[MemoryItem] = deque(maxlen=max_items)
        self.max_tokens = max_tokens
        self.current_goal: Optional[str] = None
        self.scratchpad: dict[str, Any] = {}
    
    def add(self, content: str, memory_type: str, importance: float = 0.5, metadata: dict = None):
        """Add item to working memory."""
        
        item = MemoryItem(
            content=content,
            memory_type=memory_type,
            importance=importance,
            metadata=metadata or {}
        )
        
        self.items.append(item)
    
    def add_observation(self, content: str, importance: float = 0.5):
        """Add an observation."""
        self.add(content, "observation", importance)
    
    def add_thought(self, content: str, importance: float = 0.6):
        """Add a thought/reasoning step."""
        self.add(content, "thought", importance)
    
    def add_action(self, action: str, result: str = None):
        """Add an action and its result."""
        self.add(action, "action", importance=0.7)
        if result:
            self.add(result, "result", importance=0.6)
    
    def set_goal(self, goal: str):
        """Set the current goal."""
        self.current_goal = goal
    
    def set_scratchpad(self, key: str, value: Any):
        """Store temporary data in scratchpad."""
        self.scratchpad[key] = value
    
    def get_scratchpad(self, key: str, default: Any = None) -> Any:
        """Get data from scratchpad."""
        return self.scratchpad.get(key, default)
    
    def get_context(self, include_scratchpad: bool = True) -> str:
        """Get formatted working memory context."""
        
        lines = []
        
        if self.current_goal:
            lines.append(f"Current Goal: {self.current_goal}")
            lines.append("")
        
        if self.items:
            lines.append("Recent Memory:")
            for item in self.items:
                prefix = {
                    "observation": "[OBS]",
                    "thought": "[THINK]",
                    "action": "[ACT]",
                    "result": "[RES]"
                }.get(item.memory_type, "[?]")
                lines.append(f"  {prefix} {item.content}")
        
        if include_scratchpad and self.scratchpad:
            lines.append("")
            lines.append("Scratchpad:")
            for key, value in self.scratchpad.items():
                lines.append(f"  {key}: {value}")
        
        return "\n".join(lines)
    
    def get_recent(self, n: int = 5, memory_type: str = None) -> list[MemoryItem]:
        """Get recent memory items."""
        
        items = list(self.items)
        
        if memory_type:
            items = [i for i in items if i.memory_type == memory_type]
        
        return items[-n:]
    
    def clear(self):
        """Clear working memory."""
        self.items.clear()
        self.current_goal = None
        self.scratchpad.clear()

class FocusedWorkingMemory(WorkingMemory):
    """Working memory with attention-based focus."""
    
    def __init__(self, *args, focus_window: int = 5, **kwargs):
        super().__init__(*args, **kwargs)
        self.focus_window = focus_window
        self.focus_keywords: set[str] = set()
    
    def set_focus(self, keywords: list[str]):
        """Set focus keywords for attention."""
        self.focus_keywords = set(kw.lower() for kw in keywords)
    
    def get_focused_context(self) -> str:
        """Get context with attention to focused items."""
        
        if not self.focus_keywords:
            return self.get_context()
        
        # Score items by relevance to focus
        scored_items = []
        for item in self.items:
            content_lower = item.content.lower()
            relevance = sum(1 for kw in self.focus_keywords if kw in content_lower)
            scored_items.append((item, relevance + item.importance))
        
        # Sort by score and take top items
        scored_items.sort(key=lambda x: x[1], reverse=True)
        focused = [item for item, _ in scored_items[:self.focus_window]]
        
        lines = []
        if self.current_goal:
            lines.append(f"Current Goal: {self.current_goal}")
            lines.append("")
        
        lines.append("Focused Memory:")
        for item in focused:
            prefix = {
                "observation": "[OBS]",
                "thought": "[THINK]",
                "action": "[ACT]",
                "result": "[RES]"
            }.get(item.memory_type, "[?]")
            lines.append(f"  {prefix} {item.content}")
        
        return "\n".join(lines)

Conversation Memory

from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
from enum import Enum

class MessageRole(Enum):
    SYSTEM = "system"
    USER = "user"
    ASSISTANT = "assistant"
    TOOL = "tool"

@dataclass
class Message:
    """A conversation message."""
    
    role: MessageRole
    content: str
    timestamp: datetime = field(default_factory=datetime.utcnow)
    metadata: dict = field(default_factory=dict)
    
    def to_dict(self) -> dict:
        return {
            "role": self.role.value,
            "content": self.content
        }

class ConversationMemory:
    """Memory for conversation history."""
    
    def __init__(self, max_messages: int = 100):
        self.messages: list[Message] = []
        self.max_messages = max_messages
        self.summaries: list[str] = []
    
    def add_message(self, role: MessageRole, content: str, metadata: dict = None):
        """Add a message to history."""
        
        message = Message(
            role=role,
            content=content,
            metadata=metadata or {}
        )
        
        self.messages.append(message)
        
        # Trim if over limit
        if len(self.messages) > self.max_messages:
            self.messages = self.messages[-self.max_messages:]
    
    def add_user_message(self, content: str):
        """Add a user message."""
        self.add_message(MessageRole.USER, content)
    
    def add_assistant_message(self, content: str):
        """Add an assistant message."""
        self.add_message(MessageRole.ASSISTANT, content)
    
    def get_messages(self, limit: int = None) -> list[dict]:
        """Get messages in API format."""
        
        messages = self.messages[-limit:] if limit else self.messages
        return [m.to_dict() for m in messages]
    
    def get_last_n(self, n: int) -> list[Message]:
        """Get last n messages."""
        return self.messages[-n:]
    
    def search(self, query: str) -> list[Message]:
        """Search messages by content."""
        
        query_lower = query.lower()
        return [m for m in self.messages if query_lower in m.content.lower()]
    
    def clear(self):
        """Clear conversation history."""
        self.messages.clear()
        self.summaries.clear()

class CompressedConversationMemory(ConversationMemory):
    """Conversation memory with automatic compression."""
    
    def __init__(
        self,
        client: Any,
        max_messages: int = 100,
        compression_threshold: int = 20,
        model: str = "gpt-4o-mini"
    ):
        super().__init__(max_messages)
        self.client = client
        self.compression_threshold = compression_threshold
        self.model = model
    
    async def compress_if_needed(self):
        """Compress old messages if threshold reached."""
        
        if len(self.messages) < self.compression_threshold:
            return
        
        # Take oldest messages to compress
        to_compress = self.messages[:self.compression_threshold // 2]
        
        # Generate summary
        summary = await self._summarize_messages(to_compress)
        self.summaries.append(summary)
        
        # Remove compressed messages
        self.messages = self.messages[self.compression_threshold // 2:]
    
    async def _summarize_messages(self, messages: list[Message]) -> str:
        """Summarize a list of messages."""
        
        conversation_text = "\n".join([
            f"{m.role.value}: {m.content}" for m in messages
        ])
        
        prompt = f"""Summarize this conversation segment concisely, preserving key information:

{conversation_text}

Summary:"""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=300
        )
        
        return response.choices[0].message.content
    
    def get_context_with_summaries(self) -> str:
        """Get full context including summaries."""
        
        parts = []
        
        if self.summaries:
            parts.append("Previous conversation summary:")
            for summary in self.summaries:
                parts.append(f"  {summary}")
            parts.append("")
        
        parts.append("Recent messages:")
        for message in self.messages[-10:]:
            parts.append(f"  {message.role.value}: {message.content[:200]}")
        
        return "\n".join(parts)

class SlidingWindowMemory(ConversationMemory):
    """Memory with sliding window and token budget."""
    
    def __init__(self, token_budget: int = 4000):
        super().__init__()
        self.token_budget = token_budget
    
    def _estimate_tokens(self, text: str) -> int:
        """Estimate token count."""
        return len(text) // 4
    
    def get_messages_within_budget(self) -> list[dict]:
        """Get messages that fit within token budget."""
        
        result = []
        total_tokens = 0
        
        # Start from most recent
        for message in reversed(self.messages):
            msg_tokens = self._estimate_tokens(message.content)
            
            if total_tokens + msg_tokens > self.token_budget:
                break
            
            result.insert(0, message.to_dict())
            total_tokens += msg_tokens
        
        return result

Long-term Memory

from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
import numpy as np
import json

@dataclass
class LongTermMemoryItem:
    """An item in long-term memory."""
    
    id: str
    content: str
    embedding: list[float]
    memory_type: str  # fact, experience, preference, skill
    importance: float
    access_count: int = 0
    last_accessed: datetime = field(default_factory=datetime.utcnow)
    created_at: datetime = field(default_factory=datetime.utcnow)
    metadata: dict = field(default_factory=dict)

class VectorLongTermMemory:
    """Long-term memory with vector storage."""
    
    def __init__(
        self,
        embedding_client: Any,
        embedding_model: str = "text-embedding-3-small"
    ):
        self.embedding_client = embedding_client
        self.embedding_model = embedding_model
        self.memories: dict[str, LongTermMemoryItem] = {}
        self.embeddings_matrix: Optional[np.ndarray] = None
        self.id_to_index: dict[str, int] = {}
    
    async def add(
        self,
        content: str,
        memory_type: str,
        importance: float = 0.5,
        metadata: dict = None
    ) -> str:
        """Add item to long-term memory."""
        
        import uuid
        memory_id = str(uuid.uuid4())[:12]
        
        # Get embedding
        response = await self.embedding_client.embeddings.create(
            model=self.embedding_model,
            input=content
        )
        embedding = response.data[0].embedding
        
        item = LongTermMemoryItem(
            id=memory_id,
            content=content,
            embedding=embedding,
            memory_type=memory_type,
            importance=importance,
            metadata=metadata or {}
        )
        
        self.memories[memory_id] = item
        self._rebuild_index()
        
        return memory_id
    
    def _rebuild_index(self):
        """Rebuild the embeddings matrix for fast search."""
        
        if not self.memories:
            self.embeddings_matrix = None
            self.id_to_index = {}
            return
        
        embeddings = []
        self.id_to_index = {}
        
        for i, (memory_id, item) in enumerate(self.memories.items()):
            embeddings.append(item.embedding)
            self.id_to_index[memory_id] = i
        
        self.embeddings_matrix = np.array(embeddings)
    
    async def search(
        self,
        query: str,
        top_k: int = 5,
        memory_type: str = None,
        min_importance: float = 0
    ) -> list[LongTermMemoryItem]:
        """Search long-term memory."""
        
        if not self.memories:
            return []
        
        # Get query embedding
        response = await self.embedding_client.embeddings.create(
            model=self.embedding_model,
            input=query
        )
        query_embedding = np.array(response.data[0].embedding)
        
        # Calculate similarities
        similarities = np.dot(self.embeddings_matrix, query_embedding) / (
            np.linalg.norm(self.embeddings_matrix, axis=1) * np.linalg.norm(query_embedding)
        )
        
        # Get top results
        top_indices = np.argsort(similarities)[::-1]
        
        results = []
        index_to_id = {v: k for k, v in self.id_to_index.items()}
        
        for idx in top_indices:
            memory_id = index_to_id[idx]
            item = self.memories[memory_id]
            
            # Apply filters
            if memory_type and item.memory_type != memory_type:
                continue
            if item.importance < min_importance:
                continue
            
            # Update access stats
            item.access_count += 1
            item.last_accessed = datetime.utcnow()
            
            results.append(item)
            
            if len(results) >= top_k:
                break
        
        return results
    
    def get_by_type(self, memory_type: str) -> list[LongTermMemoryItem]:
        """Get all memories of a specific type."""
        return [m for m in self.memories.values() if m.memory_type == memory_type]
    
    def get_important(self, min_importance: float = 0.7) -> list[LongTermMemoryItem]:
        """Get important memories."""
        return [m for m in self.memories.values() if m.importance >= min_importance]
    
    def forget(self, memory_id: str):
        """Remove a memory."""
        if memory_id in self.memories:
            del self.memories[memory_id]
            self._rebuild_index()
    
    def decay_importance(self, decay_rate: float = 0.01):
        """Decay importance of unused memories."""
        
        now = datetime.utcnow()
        
        for item in self.memories.values():
            days_since_access = (now - item.last_accessed).days
            item.importance *= (1 - decay_rate) ** days_since_access

class SemanticMemory(VectorLongTermMemory):
    """Semantic memory for facts and knowledge."""
    
    async def add_fact(self, fact: str, source: str = None, confidence: float = 0.8):
        """Add a fact to semantic memory."""
        
        return await self.add(
            content=fact,
            memory_type="fact",
            importance=confidence,
            metadata={"source": source} if source else {}
        )
    
    async def add_knowledge(self, topic: str, content: str):
        """Add knowledge about a topic."""
        
        return await self.add(
            content=f"{topic}: {content}",
            memory_type="knowledge",
            importance=0.6,
            metadata={"topic": topic}
        )
    
    async def query_facts(self, query: str, top_k: int = 5) -> list[str]:
        """Query facts related to a topic."""
        
        results = await self.search(query, top_k, memory_type="fact")
        return [r.content for r in results]

Episodic Memory

from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime

@dataclass
class Episode:
    """An episode in memory."""
    
    id: str
    title: str
    summary: str
    events: list[dict]
    outcome: str
    lessons_learned: list[str]
    timestamp: datetime = field(default_factory=datetime.utcnow)
    importance: float = 0.5
    embedding: list[float] = None
    metadata: dict = field(default_factory=dict)

class EpisodicMemory:
    """Memory for past experiences and episodes."""
    
    def __init__(
        self,
        client: Any,
        embedding_client: Any,
        model: str = "gpt-4o-mini",
        embedding_model: str = "text-embedding-3-small"
    ):
        self.client = client
        self.embedding_client = embedding_client
        self.model = model
        self.embedding_model = embedding_model
        self.episodes: dict[str, Episode] = {}
        self.embeddings_matrix: Optional[np.ndarray] = None
    
    async def record_episode(
        self,
        events: list[dict],
        outcome: str,
        importance: float = 0.5
    ) -> str:
        """Record a new episode from events."""
        
        import uuid
        episode_id = str(uuid.uuid4())[:12]
        
        # Generate summary and lessons
        summary, lessons = await self._analyze_episode(events, outcome)
        
        # Generate title
        title = await self._generate_title(summary)
        
        # Get embedding
        response = await self.embedding_client.embeddings.create(
            model=self.embedding_model,
            input=summary
        )
        embedding = response.data[0].embedding
        
        episode = Episode(
            id=episode_id,
            title=title,
            summary=summary,
            events=events,
            outcome=outcome,
            lessons_learned=lessons,
            importance=importance,
            embedding=embedding
        )
        
        self.episodes[episode_id] = episode
        self._rebuild_index()
        
        return episode_id
    
    async def _analyze_episode(self, events: list[dict], outcome: str) -> tuple[str, list[str]]:
        """Analyze episode to extract summary and lessons."""
        
        events_text = "\n".join([
            f"- {e.get('action', 'Unknown')}: {e.get('result', 'Unknown')}"
            for e in events
        ])
        
        prompt = f"""Analyze this sequence of events and outcome:

Events:
{events_text}

Outcome: {outcome}

Provide:
1. A brief summary (2-3 sentences)
2. Key lessons learned (2-4 bullet points)

Format as JSON:
{{"summary": "...", "lessons": ["...", "..."]}}"""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        result = json.loads(response.choices[0].message.content)
        return result["summary"], result["lessons"]
    
    async def _generate_title(self, summary: str) -> str:
        """Generate a title for the episode."""
        
        prompt = f"Generate a short title (5-7 words) for this episode:\n{summary}"
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=20
        )
        
        return response.choices[0].message.content.strip()
    
    def _rebuild_index(self):
        """Rebuild embeddings index."""
        
        if not self.episodes:
            self.embeddings_matrix = None
            return
        
        embeddings = [e.embedding for e in self.episodes.values() if e.embedding]
        if embeddings:
            self.embeddings_matrix = np.array(embeddings)
    
    async def recall_similar(self, situation: str, top_k: int = 3) -> list[Episode]:
        """Recall episodes similar to current situation."""
        
        if not self.episodes:
            return []
        
        # Get situation embedding
        response = await self.embedding_client.embeddings.create(
            model=self.embedding_model,
            input=situation
        )
        query_embedding = np.array(response.data[0].embedding)
        
        # Calculate similarities
        similarities = np.dot(self.embeddings_matrix, query_embedding) / (
            np.linalg.norm(self.embeddings_matrix, axis=1) * np.linalg.norm(query_embedding)
        )
        
        # Get top results
        top_indices = np.argsort(similarities)[::-1][:top_k]
        
        episodes_list = list(self.episodes.values())
        return [episodes_list[i] for i in top_indices]
    
    def get_lessons_for_situation(self, episodes: list[Episode]) -> list[str]:
        """Extract relevant lessons from episodes."""
        
        all_lessons = []
        for episode in episodes:
            all_lessons.extend(episode.lessons_learned)
        
        return list(set(all_lessons))
    
    async def get_advice(self, situation: str) -> str:
        """Get advice based on past episodes."""
        
        similar_episodes = await self.recall_similar(situation, top_k=3)
        
        if not similar_episodes:
            return "No relevant past experiences found."
        
        lessons = self.get_lessons_for_situation(similar_episodes)
        
        context = "\n".join([
            f"Past experience: {e.title}\n  Outcome: {e.outcome}\n  Lessons: {', '.join(e.lessons_learned)}"
            for e in similar_episodes
        ])
        
        prompt = f"""Based on these past experiences:

{context}

Provide advice for this situation: {situation}"""
        
        response = await self.client.chat.completions.create(
            model=self.model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=300
        )
        
        return response.choices[0].message.content

Unified Memory System

from dataclasses import dataclass
from typing import Any, Optional

@dataclass
class MemoryContext:
    """Combined context from all memory systems."""
    
    working_memory: str
    conversation_context: str
    relevant_knowledge: list[str]
    relevant_episodes: list[str]
    advice: str = ""

class UnifiedMemorySystem:
    """Unified memory system combining all memory types."""
    
    def __init__(
        self,
        client: Any,
        embedding_client: Any,
        model: str = "gpt-4o-mini"
    ):
        self.client = client
        self.working = WorkingMemory()
        self.conversation = CompressedConversationMemory(client, model=model)
        self.semantic = SemanticMemory(embedding_client)
        self.episodic = EpisodicMemory(client, embedding_client, model=model)
    
    async def get_context(self, query: str) -> MemoryContext:
        """Get unified context from all memory systems."""
        
        # Get working memory context
        working_context = self.working.get_context()
        
        # Get conversation context
        conversation_context = self.conversation.get_context_with_summaries()
        
        # Search semantic memory
        relevant_facts = await self.semantic.search(query, top_k=5)
        knowledge = [f.content for f in relevant_facts]
        
        # Recall relevant episodes
        similar_episodes = await self.episodic.recall_similar(query, top_k=3)
        episodes = [f"{e.title}: {e.summary}" for e in similar_episodes]
        
        # Get advice if episodes found
        advice = ""
        if similar_episodes:
            advice = await self.episodic.get_advice(query)
        
        return MemoryContext(
            working_memory=working_context,
            conversation_context=conversation_context,
            relevant_knowledge=knowledge,
            relevant_episodes=episodes,
            advice=advice
        )
    
    def format_context_for_prompt(self, context: MemoryContext) -> str:
        """Format memory context for inclusion in prompt."""
        
        parts = []
        
        if context.working_memory:
            parts.append("=== Working Memory ===")
            parts.append(context.working_memory)
            parts.append("")
        
        if context.relevant_knowledge:
            parts.append("=== Relevant Knowledge ===")
            for fact in context.relevant_knowledge[:5]:
                parts.append(f"- {fact}")
            parts.append("")
        
        if context.relevant_episodes:
            parts.append("=== Relevant Past Experiences ===")
            for episode in context.relevant_episodes[:3]:
                parts.append(f"- {episode}")
            parts.append("")
        
        if context.advice:
            parts.append("=== Advice from Past Experience ===")
            parts.append(context.advice)
            parts.append("")
        
        return "\n".join(parts)
    
    async def learn_from_interaction(
        self,
        user_message: str,
        assistant_response: str,
        outcome: str = None
    ):
        """Learn from an interaction."""
        
        # Add to conversation memory
        self.conversation.add_user_message(user_message)
        self.conversation.add_assistant_message(assistant_response)
        await self.conversation.compress_if_needed()
        
        # Extract facts if present
        facts = await self._extract_facts(user_message, assistant_response)
        for fact in facts:
            await self.semantic.add_fact(fact)
        
        # Record episode if significant
        if outcome:
            await self.episodic.record_episode(
                events=[
                    {"action": "user_query", "result": user_message},
                    {"action": "assistant_response", "result": assistant_response[:500]}
                ],
                outcome=outcome
            )
    
    async def _extract_facts(self, user_message: str, response: str) -> list[str]:
        """Extract factual information from interaction."""
        
        prompt = f"""Extract any factual information from this exchange that should be remembered.
Return as JSON array of facts, or empty array if none.

User: {user_message}
Assistant: {response[:1000]}

Facts (JSON array):"""
        
        result = await self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )
        
        try:
            data = json.loads(result.choices[0].message.content)
            return data.get("facts", [])
        except:
            return []

Production Memory Service

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional

app = FastAPI()

# Initialize memory system (would be initialized with actual clients)
memory_system = None

class WorkingMemoryRequest(BaseModel):
    content: str
    memory_type: str
    importance: float = 0.5

class ConversationRequest(BaseModel):
    role: str
    content: str

class SemanticMemoryRequest(BaseModel):
    content: str
    memory_type: str = "fact"
    importance: float = 0.5

class EpisodeRequest(BaseModel):
    events: list[dict]
    outcome: str
    importance: float = 0.5

class QueryRequest(BaseModel):
    query: str
    top_k: int = 5

class ContextRequest(BaseModel):
    query: str
    include_working: bool = True
    include_conversation: bool = True
    include_semantic: bool = True
    include_episodic: bool = True

@app.post("/v1/memory/working/add")
async def add_to_working_memory(request: WorkingMemoryRequest):
    """Add item to working memory."""
    
    memory_system.working.add(
        request.content,
        request.memory_type,
        request.importance
    )
    
    return {"status": "added"}

@app.get("/v1/memory/working")
async def get_working_memory():
    """Get working memory context."""
    
    return {
        "context": memory_system.working.get_context(),
        "item_count": len(memory_system.working.items),
        "current_goal": memory_system.working.current_goal
    }

@app.post("/v1/memory/working/goal")
async def set_goal(goal: str):
    """Set current goal."""
    
    memory_system.working.set_goal(goal)
    return {"status": "goal_set", "goal": goal}

@app.delete("/v1/memory/working")
async def clear_working_memory():
    """Clear working memory."""
    
    memory_system.working.clear()
    return {"status": "cleared"}

@app.post("/v1/memory/conversation/add")
async def add_to_conversation(request: ConversationRequest):
    """Add message to conversation memory."""
    
    role = MessageRole(request.role)
    memory_system.conversation.add_message(role, request.content)
    
    return {"status": "added", "message_count": len(memory_system.conversation.messages)}

@app.get("/v1/memory/conversation")
async def get_conversation(limit: int = 20):
    """Get conversation history."""
    
    messages = memory_system.conversation.get_messages(limit)
    
    return {
        "messages": messages,
        "total_count": len(memory_system.conversation.messages),
        "summaries": memory_system.conversation.summaries
    }

@app.post("/v1/memory/semantic/add")
async def add_to_semantic_memory(request: SemanticMemoryRequest):
    """Add to semantic memory."""
    
    memory_id = await memory_system.semantic.add(
        request.content,
        request.memory_type,
        request.importance
    )
    
    return {"status": "added", "memory_id": memory_id}

@app.post("/v1/memory/semantic/search")
async def search_semantic_memory(request: QueryRequest):
    """Search semantic memory."""
    
    results = await memory_system.semantic.search(request.query, request.top_k)
    
    return {
        "results": [
            {
                "id": r.id,
                "content": r.content,
                "type": r.memory_type,
                "importance": r.importance
            }
            for r in results
        ]
    }

@app.post("/v1/memory/episodic/record")
async def record_episode(request: EpisodeRequest):
    """Record an episode."""
    
    episode_id = await memory_system.episodic.record_episode(
        request.events,
        request.outcome,
        request.importance
    )
    
    return {"status": "recorded", "episode_id": episode_id}

@app.post("/v1/memory/episodic/recall")
async def recall_episodes(request: QueryRequest):
    """Recall similar episodes."""
    
    episodes = await memory_system.episodic.recall_similar(request.query, request.top_k)
    
    return {
        "episodes": [
            {
                "id": e.id,
                "title": e.title,
                "summary": e.summary,
                "outcome": e.outcome,
                "lessons": e.lessons_learned
            }
            for e in episodes
        ]
    }

@app.post("/v1/memory/context")
async def get_unified_context(request: ContextRequest):
    """Get unified memory context."""
    
    context = await memory_system.get_context(request.query)
    formatted = memory_system.format_context_for_prompt(context)
    
    return {
        "formatted_context": formatted,
        "working_memory": context.working_memory if request.include_working else None,
        "conversation": context.conversation_context if request.include_conversation else None,
        "knowledge": context.relevant_knowledge if request.include_semantic else None,
        "episodes": context.relevant_episodes if request.include_episodic else None,
        "advice": context.advice
    }

@app.get("/health")
async def health():
    return {"status": "healthy"}

References

Conclusion

Effective agent memory requires multiple complementary systems working together. Working memory maintains the active context for the current task—observations, thoughts, actions, and a scratchpad for temporary data. Conversation memory preserves dialogue history with compression to handle long conversations within token limits. Long-term semantic memory stores facts and knowledge using vector embeddings for efficient retrieval. Episodic memory records past experiences with summaries and lessons learned, enabling agents to learn from history. The unified memory system combines all these components, providing rich context for each interaction. Key patterns include importance-based decay to forget irrelevant information, attention-based focus for working memory, automatic compression for conversation history, and similarity-based retrieval for long-term memories. Start with simple conversation memory, add working memory for task context, implement semantic memory for persistent knowledge, and layer in episodic memory as your agent handles more complex, multi-step tasks.


Discover more from Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.