Introduction: Memory is what transforms a stateless LLM into a persistent, context-aware agent. Without memory, every interaction starts from scratch—the agent forgets previous conversations, learned preferences, and accumulated knowledge. But implementing memory for agents is more complex than simply storing chat history. You need short-term memory for the current task, long-term memory for persistent knowledge, episodic memory for past experiences, and semantic memory for facts and relationships. This guide covers practical memory patterns: working memory for active context, conversation memory with compression, vector-based long-term storage, and memory retrieval strategies that keep agents grounded in relevant context.

Working Memory
from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
from collections import deque
@dataclass
class MemoryItem:
"""A single item in memory."""
content: str
memory_type: str # observation, thought, action, result
timestamp: datetime = field(default_factory=datetime.utcnow)
importance: float = 0.5
metadata: dict = field(default_factory=dict)
class WorkingMemory:
"""Short-term working memory for active task context."""
def __init__(self, max_items: int = 20, max_tokens: int = 4000):
self.items: deque[MemoryItem] = deque(maxlen=max_items)
self.max_tokens = max_tokens
self.current_goal: Optional[str] = None
self.scratchpad: dict[str, Any] = {}
def add(self, content: str, memory_type: str, importance: float = 0.5, metadata: dict = None):
"""Add item to working memory."""
item = MemoryItem(
content=content,
memory_type=memory_type,
importance=importance,
metadata=metadata or {}
)
self.items.append(item)
def add_observation(self, content: str, importance: float = 0.5):
"""Add an observation."""
self.add(content, "observation", importance)
def add_thought(self, content: str, importance: float = 0.6):
"""Add a thought/reasoning step."""
self.add(content, "thought", importance)
def add_action(self, action: str, result: str = None):
"""Add an action and its result."""
self.add(action, "action", importance=0.7)
if result:
self.add(result, "result", importance=0.6)
def set_goal(self, goal: str):
"""Set the current goal."""
self.current_goal = goal
def set_scratchpad(self, key: str, value: Any):
"""Store temporary data in scratchpad."""
self.scratchpad[key] = value
def get_scratchpad(self, key: str, default: Any = None) -> Any:
"""Get data from scratchpad."""
return self.scratchpad.get(key, default)
def get_context(self, include_scratchpad: bool = True) -> str:
"""Get formatted working memory context."""
lines = []
if self.current_goal:
lines.append(f"Current Goal: {self.current_goal}")
lines.append("")
if self.items:
lines.append("Recent Memory:")
for item in self.items:
prefix = {
"observation": "[OBS]",
"thought": "[THINK]",
"action": "[ACT]",
"result": "[RES]"
}.get(item.memory_type, "[?]")
lines.append(f" {prefix} {item.content}")
if include_scratchpad and self.scratchpad:
lines.append("")
lines.append("Scratchpad:")
for key, value in self.scratchpad.items():
lines.append(f" {key}: {value}")
return "\n".join(lines)
def get_recent(self, n: int = 5, memory_type: str = None) -> list[MemoryItem]:
"""Get recent memory items."""
items = list(self.items)
if memory_type:
items = [i for i in items if i.memory_type == memory_type]
return items[-n:]
def clear(self):
"""Clear working memory."""
self.items.clear()
self.current_goal = None
self.scratchpad.clear()
class FocusedWorkingMemory(WorkingMemory):
"""Working memory with attention-based focus."""
def __init__(self, *args, focus_window: int = 5, **kwargs):
super().__init__(*args, **kwargs)
self.focus_window = focus_window
self.focus_keywords: set[str] = set()
def set_focus(self, keywords: list[str]):
"""Set focus keywords for attention."""
self.focus_keywords = set(kw.lower() for kw in keywords)
def get_focused_context(self) -> str:
"""Get context with attention to focused items."""
if not self.focus_keywords:
return self.get_context()
# Score items by relevance to focus
scored_items = []
for item in self.items:
content_lower = item.content.lower()
relevance = sum(1 for kw in self.focus_keywords if kw in content_lower)
scored_items.append((item, relevance + item.importance))
# Sort by score and take top items
scored_items.sort(key=lambda x: x[1], reverse=True)
focused = [item for item, _ in scored_items[:self.focus_window]]
lines = []
if self.current_goal:
lines.append(f"Current Goal: {self.current_goal}")
lines.append("")
lines.append("Focused Memory:")
for item in focused:
prefix = {
"observation": "[OBS]",
"thought": "[THINK]",
"action": "[ACT]",
"result": "[RES]"
}.get(item.memory_type, "[?]")
lines.append(f" {prefix} {item.content}")
return "\n".join(lines)
Conversation Memory
from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
from enum import Enum
class MessageRole(Enum):
SYSTEM = "system"
USER = "user"
ASSISTANT = "assistant"
TOOL = "tool"
@dataclass
class Message:
"""A conversation message."""
role: MessageRole
content: str
timestamp: datetime = field(default_factory=datetime.utcnow)
metadata: dict = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"role": self.role.value,
"content": self.content
}
class ConversationMemory:
"""Memory for conversation history."""
def __init__(self, max_messages: int = 100):
self.messages: list[Message] = []
self.max_messages = max_messages
self.summaries: list[str] = []
def add_message(self, role: MessageRole, content: str, metadata: dict = None):
"""Add a message to history."""
message = Message(
role=role,
content=content,
metadata=metadata or {}
)
self.messages.append(message)
# Trim if over limit
if len(self.messages) > self.max_messages:
self.messages = self.messages[-self.max_messages:]
def add_user_message(self, content: str):
"""Add a user message."""
self.add_message(MessageRole.USER, content)
def add_assistant_message(self, content: str):
"""Add an assistant message."""
self.add_message(MessageRole.ASSISTANT, content)
def get_messages(self, limit: int = None) -> list[dict]:
"""Get messages in API format."""
messages = self.messages[-limit:] if limit else self.messages
return [m.to_dict() for m in messages]
def get_last_n(self, n: int) -> list[Message]:
"""Get last n messages."""
return self.messages[-n:]
def search(self, query: str) -> list[Message]:
"""Search messages by content."""
query_lower = query.lower()
return [m for m in self.messages if query_lower in m.content.lower()]
def clear(self):
"""Clear conversation history."""
self.messages.clear()
self.summaries.clear()
class CompressedConversationMemory(ConversationMemory):
"""Conversation memory with automatic compression."""
def __init__(
self,
client: Any,
max_messages: int = 100,
compression_threshold: int = 20,
model: str = "gpt-4o-mini"
):
super().__init__(max_messages)
self.client = client
self.compression_threshold = compression_threshold
self.model = model
async def compress_if_needed(self):
"""Compress old messages if threshold reached."""
if len(self.messages) < self.compression_threshold:
return
# Take oldest messages to compress
to_compress = self.messages[:self.compression_threshold // 2]
# Generate summary
summary = await self._summarize_messages(to_compress)
self.summaries.append(summary)
# Remove compressed messages
self.messages = self.messages[self.compression_threshold // 2:]
async def _summarize_messages(self, messages: list[Message]) -> str:
"""Summarize a list of messages."""
conversation_text = "\n".join([
f"{m.role.value}: {m.content}" for m in messages
])
prompt = f"""Summarize this conversation segment concisely, preserving key information:
{conversation_text}
Summary:"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=300
)
return response.choices[0].message.content
def get_context_with_summaries(self) -> str:
"""Get full context including summaries."""
parts = []
if self.summaries:
parts.append("Previous conversation summary:")
for summary in self.summaries:
parts.append(f" {summary}")
parts.append("")
parts.append("Recent messages:")
for message in self.messages[-10:]:
parts.append(f" {message.role.value}: {message.content[:200]}")
return "\n".join(parts)
class SlidingWindowMemory(ConversationMemory):
"""Memory with sliding window and token budget."""
def __init__(self, token_budget: int = 4000):
super().__init__()
self.token_budget = token_budget
def _estimate_tokens(self, text: str) -> int:
"""Estimate token count."""
return len(text) // 4
def get_messages_within_budget(self) -> list[dict]:
"""Get messages that fit within token budget."""
result = []
total_tokens = 0
# Start from most recent
for message in reversed(self.messages):
msg_tokens = self._estimate_tokens(message.content)
if total_tokens + msg_tokens > self.token_budget:
break
result.insert(0, message.to_dict())
total_tokens += msg_tokens
return result
Long-term Memory
from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
import numpy as np
import json
@dataclass
class LongTermMemoryItem:
"""An item in long-term memory."""
id: str
content: str
embedding: list[float]
memory_type: str # fact, experience, preference, skill
importance: float
access_count: int = 0
last_accessed: datetime = field(default_factory=datetime.utcnow)
created_at: datetime = field(default_factory=datetime.utcnow)
metadata: dict = field(default_factory=dict)
class VectorLongTermMemory:
"""Long-term memory with vector storage."""
def __init__(
self,
embedding_client: Any,
embedding_model: str = "text-embedding-3-small"
):
self.embedding_client = embedding_client
self.embedding_model = embedding_model
self.memories: dict[str, LongTermMemoryItem] = {}
self.embeddings_matrix: Optional[np.ndarray] = None
self.id_to_index: dict[str, int] = {}
async def add(
self,
content: str,
memory_type: str,
importance: float = 0.5,
metadata: dict = None
) -> str:
"""Add item to long-term memory."""
import uuid
memory_id = str(uuid.uuid4())[:12]
# Get embedding
response = await self.embedding_client.embeddings.create(
model=self.embedding_model,
input=content
)
embedding = response.data[0].embedding
item = LongTermMemoryItem(
id=memory_id,
content=content,
embedding=embedding,
memory_type=memory_type,
importance=importance,
metadata=metadata or {}
)
self.memories[memory_id] = item
self._rebuild_index()
return memory_id
def _rebuild_index(self):
"""Rebuild the embeddings matrix for fast search."""
if not self.memories:
self.embeddings_matrix = None
self.id_to_index = {}
return
embeddings = []
self.id_to_index = {}
for i, (memory_id, item) in enumerate(self.memories.items()):
embeddings.append(item.embedding)
self.id_to_index[memory_id] = i
self.embeddings_matrix = np.array(embeddings)
async def search(
self,
query: str,
top_k: int = 5,
memory_type: str = None,
min_importance: float = 0
) -> list[LongTermMemoryItem]:
"""Search long-term memory."""
if not self.memories:
return []
# Get query embedding
response = await self.embedding_client.embeddings.create(
model=self.embedding_model,
input=query
)
query_embedding = np.array(response.data[0].embedding)
# Calculate similarities
similarities = np.dot(self.embeddings_matrix, query_embedding) / (
np.linalg.norm(self.embeddings_matrix, axis=1) * np.linalg.norm(query_embedding)
)
# Get top results
top_indices = np.argsort(similarities)[::-1]
results = []
index_to_id = {v: k for k, v in self.id_to_index.items()}
for idx in top_indices:
memory_id = index_to_id[idx]
item = self.memories[memory_id]
# Apply filters
if memory_type and item.memory_type != memory_type:
continue
if item.importance < min_importance:
continue
# Update access stats
item.access_count += 1
item.last_accessed = datetime.utcnow()
results.append(item)
if len(results) >= top_k:
break
return results
def get_by_type(self, memory_type: str) -> list[LongTermMemoryItem]:
"""Get all memories of a specific type."""
return [m for m in self.memories.values() if m.memory_type == memory_type]
def get_important(self, min_importance: float = 0.7) -> list[LongTermMemoryItem]:
"""Get important memories."""
return [m for m in self.memories.values() if m.importance >= min_importance]
def forget(self, memory_id: str):
"""Remove a memory."""
if memory_id in self.memories:
del self.memories[memory_id]
self._rebuild_index()
def decay_importance(self, decay_rate: float = 0.01):
"""Decay importance of unused memories."""
now = datetime.utcnow()
for item in self.memories.values():
days_since_access = (now - item.last_accessed).days
item.importance *= (1 - decay_rate) ** days_since_access
class SemanticMemory(VectorLongTermMemory):
"""Semantic memory for facts and knowledge."""
async def add_fact(self, fact: str, source: str = None, confidence: float = 0.8):
"""Add a fact to semantic memory."""
return await self.add(
content=fact,
memory_type="fact",
importance=confidence,
metadata={"source": source} if source else {}
)
async def add_knowledge(self, topic: str, content: str):
"""Add knowledge about a topic."""
return await self.add(
content=f"{topic}: {content}",
memory_type="knowledge",
importance=0.6,
metadata={"topic": topic}
)
async def query_facts(self, query: str, top_k: int = 5) -> list[str]:
"""Query facts related to a topic."""
results = await self.search(query, top_k, memory_type="fact")
return [r.content for r in results]
Episodic Memory
from dataclasses import dataclass, field
from typing import Any, Optional
from datetime import datetime
@dataclass
class Episode:
"""An episode in memory."""
id: str
title: str
summary: str
events: list[dict]
outcome: str
lessons_learned: list[str]
timestamp: datetime = field(default_factory=datetime.utcnow)
importance: float = 0.5
embedding: list[float] = None
metadata: dict = field(default_factory=dict)
class EpisodicMemory:
"""Memory for past experiences and episodes."""
def __init__(
self,
client: Any,
embedding_client: Any,
model: str = "gpt-4o-mini",
embedding_model: str = "text-embedding-3-small"
):
self.client = client
self.embedding_client = embedding_client
self.model = model
self.embedding_model = embedding_model
self.episodes: dict[str, Episode] = {}
self.embeddings_matrix: Optional[np.ndarray] = None
async def record_episode(
self,
events: list[dict],
outcome: str,
importance: float = 0.5
) -> str:
"""Record a new episode from events."""
import uuid
episode_id = str(uuid.uuid4())[:12]
# Generate summary and lessons
summary, lessons = await self._analyze_episode(events, outcome)
# Generate title
title = await self._generate_title(summary)
# Get embedding
response = await self.embedding_client.embeddings.create(
model=self.embedding_model,
input=summary
)
embedding = response.data[0].embedding
episode = Episode(
id=episode_id,
title=title,
summary=summary,
events=events,
outcome=outcome,
lessons_learned=lessons,
importance=importance,
embedding=embedding
)
self.episodes[episode_id] = episode
self._rebuild_index()
return episode_id
async def _analyze_episode(self, events: list[dict], outcome: str) -> tuple[str, list[str]]:
"""Analyze episode to extract summary and lessons."""
events_text = "\n".join([
f"- {e.get('action', 'Unknown')}: {e.get('result', 'Unknown')}"
for e in events
])
prompt = f"""Analyze this sequence of events and outcome:
Events:
{events_text}
Outcome: {outcome}
Provide:
1. A brief summary (2-3 sentences)
2. Key lessons learned (2-4 bullet points)
Format as JSON:
{{"summary": "...", "lessons": ["...", "..."]}}"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
result = json.loads(response.choices[0].message.content)
return result["summary"], result["lessons"]
async def _generate_title(self, summary: str) -> str:
"""Generate a title for the episode."""
prompt = f"Generate a short title (5-7 words) for this episode:\n{summary}"
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=20
)
return response.choices[0].message.content.strip()
def _rebuild_index(self):
"""Rebuild embeddings index."""
if not self.episodes:
self.embeddings_matrix = None
return
embeddings = [e.embedding for e in self.episodes.values() if e.embedding]
if embeddings:
self.embeddings_matrix = np.array(embeddings)
async def recall_similar(self, situation: str, top_k: int = 3) -> list[Episode]:
"""Recall episodes similar to current situation."""
if not self.episodes:
return []
# Get situation embedding
response = await self.embedding_client.embeddings.create(
model=self.embedding_model,
input=situation
)
query_embedding = np.array(response.data[0].embedding)
# Calculate similarities
similarities = np.dot(self.embeddings_matrix, query_embedding) / (
np.linalg.norm(self.embeddings_matrix, axis=1) * np.linalg.norm(query_embedding)
)
# Get top results
top_indices = np.argsort(similarities)[::-1][:top_k]
episodes_list = list(self.episodes.values())
return [episodes_list[i] for i in top_indices]
def get_lessons_for_situation(self, episodes: list[Episode]) -> list[str]:
"""Extract relevant lessons from episodes."""
all_lessons = []
for episode in episodes:
all_lessons.extend(episode.lessons_learned)
return list(set(all_lessons))
async def get_advice(self, situation: str) -> str:
"""Get advice based on past episodes."""
similar_episodes = await self.recall_similar(situation, top_k=3)
if not similar_episodes:
return "No relevant past experiences found."
lessons = self.get_lessons_for_situation(similar_episodes)
context = "\n".join([
f"Past experience: {e.title}\n Outcome: {e.outcome}\n Lessons: {', '.join(e.lessons_learned)}"
for e in similar_episodes
])
prompt = f"""Based on these past experiences:
{context}
Provide advice for this situation: {situation}"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=300
)
return response.choices[0].message.content
Unified Memory System
from dataclasses import dataclass
from typing import Any, Optional
@dataclass
class MemoryContext:
"""Combined context from all memory systems."""
working_memory: str
conversation_context: str
relevant_knowledge: list[str]
relevant_episodes: list[str]
advice: str = ""
class UnifiedMemorySystem:
"""Unified memory system combining all memory types."""
def __init__(
self,
client: Any,
embedding_client: Any,
model: str = "gpt-4o-mini"
):
self.client = client
self.working = WorkingMemory()
self.conversation = CompressedConversationMemory(client, model=model)
self.semantic = SemanticMemory(embedding_client)
self.episodic = EpisodicMemory(client, embedding_client, model=model)
async def get_context(self, query: str) -> MemoryContext:
"""Get unified context from all memory systems."""
# Get working memory context
working_context = self.working.get_context()
# Get conversation context
conversation_context = self.conversation.get_context_with_summaries()
# Search semantic memory
relevant_facts = await self.semantic.search(query, top_k=5)
knowledge = [f.content for f in relevant_facts]
# Recall relevant episodes
similar_episodes = await self.episodic.recall_similar(query, top_k=3)
episodes = [f"{e.title}: {e.summary}" for e in similar_episodes]
# Get advice if episodes found
advice = ""
if similar_episodes:
advice = await self.episodic.get_advice(query)
return MemoryContext(
working_memory=working_context,
conversation_context=conversation_context,
relevant_knowledge=knowledge,
relevant_episodes=episodes,
advice=advice
)
def format_context_for_prompt(self, context: MemoryContext) -> str:
"""Format memory context for inclusion in prompt."""
parts = []
if context.working_memory:
parts.append("=== Working Memory ===")
parts.append(context.working_memory)
parts.append("")
if context.relevant_knowledge:
parts.append("=== Relevant Knowledge ===")
for fact in context.relevant_knowledge[:5]:
parts.append(f"- {fact}")
parts.append("")
if context.relevant_episodes:
parts.append("=== Relevant Past Experiences ===")
for episode in context.relevant_episodes[:3]:
parts.append(f"- {episode}")
parts.append("")
if context.advice:
parts.append("=== Advice from Past Experience ===")
parts.append(context.advice)
parts.append("")
return "\n".join(parts)
async def learn_from_interaction(
self,
user_message: str,
assistant_response: str,
outcome: str = None
):
"""Learn from an interaction."""
# Add to conversation memory
self.conversation.add_user_message(user_message)
self.conversation.add_assistant_message(assistant_response)
await self.conversation.compress_if_needed()
# Extract facts if present
facts = await self._extract_facts(user_message, assistant_response)
for fact in facts:
await self.semantic.add_fact(fact)
# Record episode if significant
if outcome:
await self.episodic.record_episode(
events=[
{"action": "user_query", "result": user_message},
{"action": "assistant_response", "result": assistant_response[:500]}
],
outcome=outcome
)
async def _extract_facts(self, user_message: str, response: str) -> list[str]:
"""Extract factual information from interaction."""
prompt = f"""Extract any factual information from this exchange that should be remembered.
Return as JSON array of facts, or empty array if none.
User: {user_message}
Assistant: {response[:1000]}
Facts (JSON array):"""
result = await self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
try:
data = json.loads(result.choices[0].message.content)
return data.get("facts", [])
except:
return []
Production Memory Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional
app = FastAPI()
# Initialize memory system (would be initialized with actual clients)
memory_system = None
class WorkingMemoryRequest(BaseModel):
content: str
memory_type: str
importance: float = 0.5
class ConversationRequest(BaseModel):
role: str
content: str
class SemanticMemoryRequest(BaseModel):
content: str
memory_type: str = "fact"
importance: float = 0.5
class EpisodeRequest(BaseModel):
events: list[dict]
outcome: str
importance: float = 0.5
class QueryRequest(BaseModel):
query: str
top_k: int = 5
class ContextRequest(BaseModel):
query: str
include_working: bool = True
include_conversation: bool = True
include_semantic: bool = True
include_episodic: bool = True
@app.post("/v1/memory/working/add")
async def add_to_working_memory(request: WorkingMemoryRequest):
"""Add item to working memory."""
memory_system.working.add(
request.content,
request.memory_type,
request.importance
)
return {"status": "added"}
@app.get("/v1/memory/working")
async def get_working_memory():
"""Get working memory context."""
return {
"context": memory_system.working.get_context(),
"item_count": len(memory_system.working.items),
"current_goal": memory_system.working.current_goal
}
@app.post("/v1/memory/working/goal")
async def set_goal(goal: str):
"""Set current goal."""
memory_system.working.set_goal(goal)
return {"status": "goal_set", "goal": goal}
@app.delete("/v1/memory/working")
async def clear_working_memory():
"""Clear working memory."""
memory_system.working.clear()
return {"status": "cleared"}
@app.post("/v1/memory/conversation/add")
async def add_to_conversation(request: ConversationRequest):
"""Add message to conversation memory."""
role = MessageRole(request.role)
memory_system.conversation.add_message(role, request.content)
return {"status": "added", "message_count": len(memory_system.conversation.messages)}
@app.get("/v1/memory/conversation")
async def get_conversation(limit: int = 20):
"""Get conversation history."""
messages = memory_system.conversation.get_messages(limit)
return {
"messages": messages,
"total_count": len(memory_system.conversation.messages),
"summaries": memory_system.conversation.summaries
}
@app.post("/v1/memory/semantic/add")
async def add_to_semantic_memory(request: SemanticMemoryRequest):
"""Add to semantic memory."""
memory_id = await memory_system.semantic.add(
request.content,
request.memory_type,
request.importance
)
return {"status": "added", "memory_id": memory_id}
@app.post("/v1/memory/semantic/search")
async def search_semantic_memory(request: QueryRequest):
"""Search semantic memory."""
results = await memory_system.semantic.search(request.query, request.top_k)
return {
"results": [
{
"id": r.id,
"content": r.content,
"type": r.memory_type,
"importance": r.importance
}
for r in results
]
}
@app.post("/v1/memory/episodic/record")
async def record_episode(request: EpisodeRequest):
"""Record an episode."""
episode_id = await memory_system.episodic.record_episode(
request.events,
request.outcome,
request.importance
)
return {"status": "recorded", "episode_id": episode_id}
@app.post("/v1/memory/episodic/recall")
async def recall_episodes(request: QueryRequest):
"""Recall similar episodes."""
episodes = await memory_system.episodic.recall_similar(request.query, request.top_k)
return {
"episodes": [
{
"id": e.id,
"title": e.title,
"summary": e.summary,
"outcome": e.outcome,
"lessons": e.lessons_learned
}
for e in episodes
]
}
@app.post("/v1/memory/context")
async def get_unified_context(request: ContextRequest):
"""Get unified memory context."""
context = await memory_system.get_context(request.query)
formatted = memory_system.format_context_for_prompt(context)
return {
"formatted_context": formatted,
"working_memory": context.working_memory if request.include_working else None,
"conversation": context.conversation_context if request.include_conversation else None,
"knowledge": context.relevant_knowledge if request.include_semantic else None,
"episodes": context.relevant_episodes if request.include_episodic else None,
"advice": context.advice
}
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- LangChain Memory: https://python.langchain.com/docs/modules/memory/
- MemGPT: https://memgpt.ai/
- Generative Agents Paper: https://arxiv.org/abs/2304.03442
- Cognitive Architectures: https://en.wikipedia.org/wiki/Cognitive_architecture
Conclusion
Effective agent memory requires multiple complementary systems working together. Working memory maintains the active context for the current task—observations, thoughts, actions, and a scratchpad for temporary data. Conversation memory preserves dialogue history with compression to handle long conversations within token limits. Long-term semantic memory stores facts and knowledge using vector embeddings for efficient retrieval. Episodic memory records past experiences with summaries and lessons learned, enabling agents to learn from history. The unified memory system combines all these components, providing rich context for each interaction. Key patterns include importance-based decay to forget irrelevant information, attention-based focus for working memory, automatic compression for conversation history, and similarity-based retrieval for long-term memories. Start with simple conversation memory, add working memory for task context, implement semantic memory for persistent knowledge, and layer in episodic memory as your agent handles more complex, multi-step tasks.
