Introduction: Neither keyword search nor semantic search is perfect alone. Keyword search excels at exact matches and specific terms but misses semantic relationships. Semantic search understands meaning but can miss exact phrases and rare terms. Hybrid search combines both approaches, leveraging the strengths of each to deliver superior retrieval quality. This guide covers practical hybrid search implementation: building keyword and semantic search components, fusion algorithms that combine results intelligently, weighting strategies for different use cases, and production patterns for scalable hybrid retrieval. Whether you’re building RAG systems, document search, or e-commerce search, hybrid approaches consistently outperform single-method retrieval.

Keyword Search Component
from dataclasses import dataclass, field
from typing import Any, Optional
import re
from collections import Counter
import math
@dataclass
class SearchResult:
"""A search result."""
doc_id: str
content: str
score: float
metadata: dict = field(default_factory=dict)
class BM25Searcher:
"""BM25 keyword search implementation."""
def __init__(
self,
k1: float = 1.5,
b: float = 0.75
):
self.k1 = k1
self.b = b
self.documents: dict[str, str] = {}
self.doc_lengths: dict[str, int] = {}
self.avg_doc_length: float = 0
self.term_frequencies: dict[str, dict[str, int]] = {}
self.doc_frequencies: dict[str, int] = {}
self.total_docs: int = 0
def index(self, documents: list[dict]):
"""Index documents for search."""
self.documents = {}
self.doc_lengths = {}
self.term_frequencies = {}
self.doc_frequencies = Counter()
for doc in documents:
doc_id = doc["id"]
content = doc["content"]
self.documents[doc_id] = content
# Tokenize
tokens = self._tokenize(content)
self.doc_lengths[doc_id] = len(tokens)
# Term frequencies
tf = Counter(tokens)
self.term_frequencies[doc_id] = dict(tf)
# Document frequencies
for term in set(tokens):
self.doc_frequencies[term] += 1
self.total_docs = len(documents)
self.avg_doc_length = sum(self.doc_lengths.values()) / max(1, self.total_docs)
def search(self, query: str, k: int = 10) -> list[SearchResult]:
"""Search for documents matching query."""
query_tokens = self._tokenize(query)
scores = {}
for doc_id in self.documents:
score = self._score_document(doc_id, query_tokens)
if score > 0:
scores[doc_id] = score
# Sort by score
sorted_docs = sorted(
scores.items(),
key=lambda x: x[1],
reverse=True
)[:k]
return [
SearchResult(
doc_id=doc_id,
content=self.documents[doc_id],
score=score
)
for doc_id, score in sorted_docs
]
def _tokenize(self, text: str) -> list[str]:
"""Tokenize text into terms."""
# Lowercase and split on non-alphanumeric
tokens = re.findall(r'\b\w+\b', text.lower())
# Remove stopwords (simplified)
stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare',
'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
'from', 'as', 'into', 'through', 'during', 'before', 'after',
'above', 'below', 'between', 'under', 'again', 'further', 'then',
'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all',
'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor',
'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just'}
return [t for t in tokens if t not in stopwords and len(t) > 1]
def _score_document(
self,
doc_id: str,
query_tokens: list[str]
) -> float:
"""Calculate BM25 score for document."""
score = 0.0
doc_length = self.doc_lengths[doc_id]
tf_dict = self.term_frequencies[doc_id]
for term in query_tokens:
if term not in self.doc_frequencies:
continue
# IDF
df = self.doc_frequencies[term]
idf = math.log((self.total_docs - df + 0.5) / (df + 0.5) + 1)
# TF with length normalization
tf = tf_dict.get(term, 0)
tf_normalized = (tf * (self.k1 + 1)) / (
tf + self.k1 * (1 - self.b + self.b * doc_length / self.avg_doc_length)
)
score += idf * tf_normalized
return score
class ElasticsearchSearcher:
"""Elasticsearch-based keyword search."""
def __init__(self, client: Any, index_name: str):
self.client = client
self.index_name = index_name
async def search(
self,
query: str,
k: int = 10,
filters: dict = None
) -> list[SearchResult]:
"""Search using Elasticsearch."""
body = {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": query,
"fields": ["title^2", "content"],
"type": "best_fields"
}
}
]
}
},
"size": k
}
if filters:
body["query"]["bool"]["filter"] = [
{"term": {k: v}} for k, v in filters.items()
]
response = await self.client.search(
index=self.index_name,
body=body
)
return [
SearchResult(
doc_id=hit["_id"],
content=hit["_source"].get("content", ""),
score=hit["_score"],
metadata=hit["_source"]
)
for hit in response["hits"]["hits"]
]
Semantic Search Component
from dataclasses import dataclass
from typing import Any, Optional
import numpy as np
class SemanticSearcher:
"""Semantic search using embeddings."""
def __init__(
self,
embedding_client: Any,
vector_store: Any,
model: str = "text-embedding-3-small"
):
self.embedding_client = embedding_client
self.vector_store = vector_store
self.model = model
async def index(self, documents: list[dict]):
"""Index documents with embeddings."""
# Generate embeddings
texts = [doc["content"] for doc in documents]
embeddings = await self._embed_batch(texts)
# Store in vector database
for doc, embedding in zip(documents, embeddings):
await self.vector_store.upsert(
id=doc["id"],
vector=embedding,
metadata={
"content": doc["content"],
**doc.get("metadata", {})
}
)
async def search(
self,
query: str,
k: int = 10,
filters: dict = None
) -> list[SearchResult]:
"""Search using semantic similarity."""
# Embed query
query_embedding = await self._embed(query)
# Search vector store
results = await self.vector_store.query(
vector=query_embedding,
top_k=k,
filter=filters
)
return [
SearchResult(
doc_id=r["id"],
content=r["metadata"].get("content", ""),
score=r["score"],
metadata=r["metadata"]
)
for r in results
]
async def _embed(self, text: str) -> list[float]:
"""Embed single text."""
response = await self.embedding_client.embeddings.create(
model=self.model,
input=text
)
return response.data[0].embedding
async def _embed_batch(self, texts: list[str]) -> list[list[float]]:
"""Embed batch of texts."""
response = await self.embedding_client.embeddings.create(
model=self.model,
input=texts
)
return [d.embedding for d in response.data]
class ColBERTSearcher:
"""ColBERT-style late interaction search."""
def __init__(
self,
model: Any,
index: Any
):
self.model = model
self.index = index
async def search(
self,
query: str,
k: int = 10
) -> list[SearchResult]:
"""Search using ColBERT late interaction."""
# Encode query into token embeddings
query_embeddings = await self._encode_query(query)
# Search with MaxSim
results = await self.index.search(
query_embeddings,
k=k
)
return [
SearchResult(
doc_id=r["id"],
content=r["content"],
score=r["score"]
)
for r in results
]
async def _encode_query(self, query: str) -> np.ndarray:
"""Encode query into token embeddings."""
# This would use the ColBERT model
# Placeholder for actual implementation
return np.zeros((32, 128)) # 32 tokens, 128 dims
Result Fusion
from dataclasses import dataclass
from typing import Any, Optional
from enum import Enum
class FusionMethod(Enum):
"""Fusion methods."""
RRF = "rrf" # Reciprocal Rank Fusion
LINEAR = "linear" # Linear combination
CONVEX = "convex" # Convex combination
LEARNED = "learned" # Learned fusion
@dataclass
class FusedResult:
"""Result after fusion."""
doc_id: str
content: str
fused_score: float
keyword_score: float = None
semantic_score: float = None
metadata: dict = None
class ResultFuser:
"""Fuse results from multiple search methods."""
def __init__(
self,
method: FusionMethod = FusionMethod.RRF,
keyword_weight: float = 0.5,
semantic_weight: float = 0.5,
rrf_k: int = 60
):
self.method = method
self.keyword_weight = keyword_weight
self.semantic_weight = semantic_weight
self.rrf_k = rrf_k
def fuse(
self,
keyword_results: list[SearchResult],
semantic_results: list[SearchResult],
k: int = 10
) -> list[FusedResult]:
"""Fuse keyword and semantic results."""
if self.method == FusionMethod.RRF:
return self._rrf_fusion(keyword_results, semantic_results, k)
elif self.method == FusionMethod.LINEAR:
return self._linear_fusion(keyword_results, semantic_results, k)
elif self.method == FusionMethod.CONVEX:
return self._convex_fusion(keyword_results, semantic_results, k)
else:
raise ValueError(f"Unknown fusion method: {self.method}")
def _rrf_fusion(
self,
keyword_results: list[SearchResult],
semantic_results: list[SearchResult],
k: int
) -> list[FusedResult]:
"""Reciprocal Rank Fusion."""
scores: dict[str, dict] = {}
# Process keyword results
for rank, result in enumerate(keyword_results):
if result.doc_id not in scores:
scores[result.doc_id] = {
"content": result.content,
"rrf_score": 0,
"keyword_score": result.score,
"semantic_score": None,
"metadata": result.metadata
}
scores[result.doc_id]["rrf_score"] += 1 / (self.rrf_k + rank + 1)
# Process semantic results
for rank, result in enumerate(semantic_results):
if result.doc_id not in scores:
scores[result.doc_id] = {
"content": result.content,
"rrf_score": 0,
"keyword_score": None,
"semantic_score": result.score,
"metadata": result.metadata
}
else:
scores[result.doc_id]["semantic_score"] = result.score
scores[result.doc_id]["rrf_score"] += 1 / (self.rrf_k + rank + 1)
# Sort by RRF score
sorted_results = sorted(
scores.items(),
key=lambda x: x[1]["rrf_score"],
reverse=True
)[:k]
return [
FusedResult(
doc_id=doc_id,
content=data["content"],
fused_score=data["rrf_score"],
keyword_score=data["keyword_score"],
semantic_score=data["semantic_score"],
metadata=data["metadata"]
)
for doc_id, data in sorted_results
]
def _linear_fusion(
self,
keyword_results: list[SearchResult],
semantic_results: list[SearchResult],
k: int
) -> list[FusedResult]:
"""Linear combination of normalized scores."""
# Normalize scores
keyword_scores = self._normalize_scores(keyword_results)
semantic_scores = self._normalize_scores(semantic_results)
# Combine scores
all_docs: dict[str, dict] = {}
for result in keyword_results:
all_docs[result.doc_id] = {
"content": result.content,
"keyword_score": keyword_scores.get(result.doc_id, 0),
"semantic_score": 0,
"metadata": result.metadata
}
for result in semantic_results:
if result.doc_id in all_docs:
all_docs[result.doc_id]["semantic_score"] = semantic_scores.get(result.doc_id, 0)
else:
all_docs[result.doc_id] = {
"content": result.content,
"keyword_score": 0,
"semantic_score": semantic_scores.get(result.doc_id, 0),
"metadata": result.metadata
}
# Calculate fused scores
for doc_id, data in all_docs.items():
data["fused_score"] = (
self.keyword_weight * data["keyword_score"] +
self.semantic_weight * data["semantic_score"]
)
# Sort and return
sorted_results = sorted(
all_docs.items(),
key=lambda x: x[1]["fused_score"],
reverse=True
)[:k]
return [
FusedResult(
doc_id=doc_id,
content=data["content"],
fused_score=data["fused_score"],
keyword_score=data["keyword_score"],
semantic_score=data["semantic_score"],
metadata=data["metadata"]
)
for doc_id, data in sorted_results
]
def _convex_fusion(
self,
keyword_results: list[SearchResult],
semantic_results: list[SearchResult],
k: int
) -> list[FusedResult]:
"""Convex combination (weights sum to 1)."""
# Normalize weights
total_weight = self.keyword_weight + self.semantic_weight
kw = self.keyword_weight / total_weight
sw = self.semantic_weight / total_weight
# Use linear fusion with normalized weights
original_kw = self.keyword_weight
original_sw = self.semantic_weight
self.keyword_weight = kw
self.semantic_weight = sw
result = self._linear_fusion(keyword_results, semantic_results, k)
self.keyword_weight = original_kw
self.semantic_weight = original_sw
return result
def _normalize_scores(
self,
results: list[SearchResult]
) -> dict[str, float]:
"""Normalize scores to [0, 1] range."""
if not results:
return {}
scores = [r.score for r in results]
min_score = min(scores)
max_score = max(scores)
if max_score == min_score:
return {r.doc_id: 1.0 for r in results}
return {
r.doc_id: (r.score - min_score) / (max_score - min_score)
for r in results
}
Hybrid Search Implementation
from dataclasses import dataclass
from typing import Any, Optional
import asyncio
@dataclass
class HybridSearchConfig:
"""Configuration for hybrid search."""
keyword_weight: float = 0.3
semantic_weight: float = 0.7
fusion_method: FusionMethod = FusionMethod.RRF
keyword_k: int = 20 # Retrieve more for fusion
semantic_k: int = 20
final_k: int = 10
class HybridSearcher:
"""Complete hybrid search implementation."""
def __init__(
self,
keyword_searcher: BM25Searcher,
semantic_searcher: SemanticSearcher,
config: HybridSearchConfig = None
):
self.keyword_searcher = keyword_searcher
self.semantic_searcher = semantic_searcher
self.config = config or HybridSearchConfig()
self.fuser = ResultFuser(
method=self.config.fusion_method,
keyword_weight=self.config.keyword_weight,
semantic_weight=self.config.semantic_weight
)
async def search(
self,
query: str,
k: int = None,
filters: dict = None
) -> list[FusedResult]:
"""Perform hybrid search."""
k = k or self.config.final_k
# Run both searches in parallel
keyword_task = asyncio.create_task(
self._keyword_search(query, filters)
)
semantic_task = asyncio.create_task(
self.semantic_searcher.search(
query,
k=self.config.semantic_k,
filters=filters
)
)
keyword_results, semantic_results = await asyncio.gather(
keyword_task,
semantic_task
)
# Fuse results
return self.fuser.fuse(
keyword_results,
semantic_results,
k=k
)
async def _keyword_search(
self,
query: str,
filters: dict = None
) -> list[SearchResult]:
"""Keyword search (sync wrapper)."""
# BM25 is synchronous
results = self.keyword_searcher.search(query, k=self.config.keyword_k)
# Apply filters if needed
if filters:
results = [
r for r in results
if all(
r.metadata.get(k) == v
for k, v in filters.items()
)
]
return results
class AdaptiveHybridSearcher:
"""Hybrid searcher with adaptive weighting."""
def __init__(
self,
keyword_searcher: BM25Searcher,
semantic_searcher: SemanticSearcher
):
self.keyword_searcher = keyword_searcher
self.semantic_searcher = semantic_searcher
async def search(
self,
query: str,
k: int = 10
) -> list[FusedResult]:
"""Search with adaptive weights based on query."""
# Analyze query to determine weights
weights = self._analyze_query(query)
# Create fuser with adaptive weights
fuser = ResultFuser(
method=FusionMethod.LINEAR,
keyword_weight=weights["keyword"],
semantic_weight=weights["semantic"]
)
# Run searches
keyword_results = self.keyword_searcher.search(query, k=k * 2)
semantic_results = await self.semantic_searcher.search(query, k=k * 2)
return fuser.fuse(keyword_results, semantic_results, k)
def _analyze_query(self, query: str) -> dict[str, float]:
"""Analyze query to determine optimal weights."""
# Heuristics for weight adjustment
query_lower = query.lower()
# Exact phrase queries favor keyword search
if '"' in query:
return {"keyword": 0.8, "semantic": 0.2}
# Technical terms favor keyword search
technical_terms = ['error', 'exception', 'api', 'function', 'class', 'method']
if any(term in query_lower for term in technical_terms):
return {"keyword": 0.6, "semantic": 0.4}
# Question queries favor semantic search
question_words = ['what', 'why', 'how', 'when', 'where', 'which']
if any(query_lower.startswith(w) for w in question_words):
return {"keyword": 0.3, "semantic": 0.7}
# Default balanced weights
return {"keyword": 0.4, "semantic": 0.6}
Production Hybrid Search Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional
app = FastAPI()
# Initialize components
keyword_searcher = None # Initialize BM25
semantic_searcher = None # Initialize with embedding client
hybrid_searcher = None
class IndexRequest(BaseModel):
documents: list[dict]
class SearchRequest(BaseModel):
query: str
k: int = 10
method: str = "hybrid" # keyword, semantic, hybrid
keyword_weight: Optional[float] = 0.3
semantic_weight: Optional[float] = 0.7
filters: Optional[dict] = None
class ConfigRequest(BaseModel):
keyword_weight: float = 0.3
semantic_weight: float = 0.7
fusion_method: str = "rrf"
@app.post("/v1/index")
async def index_documents(request: IndexRequest):
"""Index documents for search."""
# Index for keyword search
keyword_searcher.index(request.documents)
# Index for semantic search
await semantic_searcher.index(request.documents)
return {
"status": "indexed",
"document_count": len(request.documents)
}
@app.post("/v1/search")
async def search(request: SearchRequest):
"""Search documents."""
if request.method == "keyword":
results = keyword_searcher.search(request.query, k=request.k)
return {
"query": request.query,
"method": "keyword",
"results": [
{
"doc_id": r.doc_id,
"content": r.content[:200] + "..." if len(r.content) > 200 else r.content,
"score": r.score
}
for r in results
]
}
elif request.method == "semantic":
results = await semantic_searcher.search(
request.query,
k=request.k,
filters=request.filters
)
return {
"query": request.query,
"method": "semantic",
"results": [
{
"doc_id": r.doc_id,
"content": r.content[:200] + "..." if len(r.content) > 200 else r.content,
"score": r.score
}
for r in results
]
}
else: # hybrid
# Update weights if provided
config = HybridSearchConfig(
keyword_weight=request.keyword_weight,
semantic_weight=request.semantic_weight
)
searcher = HybridSearcher(
keyword_searcher,
semantic_searcher,
config
)
results = await searcher.search(
request.query,
k=request.k,
filters=request.filters
)
return {
"query": request.query,
"method": "hybrid",
"weights": {
"keyword": request.keyword_weight,
"semantic": request.semantic_weight
},
"results": [
{
"doc_id": r.doc_id,
"content": r.content[:200] + "..." if len(r.content) > 200 else r.content,
"fused_score": r.fused_score,
"keyword_score": r.keyword_score,
"semantic_score": r.semantic_score
}
for r in results
]
}
@app.post("/v1/config")
async def update_config(request: ConfigRequest):
"""Update search configuration."""
global hybrid_searcher
config = HybridSearchConfig(
keyword_weight=request.keyword_weight,
semantic_weight=request.semantic_weight,
fusion_method=FusionMethod(request.fusion_method)
)
hybrid_searcher = HybridSearcher(
keyword_searcher,
semantic_searcher,
config
)
return {
"status": "updated",
"config": {
"keyword_weight": request.keyword_weight,
"semantic_weight": request.semantic_weight,
"fusion_method": request.fusion_method
}
}
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- BM25 Algorithm: https://en.wikipedia.org/wiki/Okapi_BM25
- Reciprocal Rank Fusion: https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
- ColBERT: https://arxiv.org/abs/2004.12832
- Hybrid Search in Practice: https://www.pinecone.io/learn/hybrid-search/
Conclusion
Hybrid search delivers the best of both worlds—keyword precision and semantic understanding. Start with a solid BM25 implementation for keyword search; it’s battle-tested and handles exact matches well. Add semantic search using modern embedding models for conceptual similarity. The magic happens in fusion—Reciprocal Rank Fusion (RRF) is a great default because it doesn’t require score normalization and handles different score distributions gracefully. Tune your weights based on your use case: technical documentation benefits from higher keyword weights, while conversational queries favor semantic search. Consider adaptive weighting that adjusts based on query characteristics. Run both searches in parallel to minimize latency. The key insight is that users search in different ways—sometimes they know exact terms, sometimes they describe concepts. Hybrid search handles both gracefully, consistently outperforming single-method approaches across diverse query types.
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.