Hybrid Search Strategies: Combining Keyword and Semantic Search for Superior Retrieval

Introduction: Neither keyword search nor semantic search is perfect alone. Keyword search excels at exact matches and specific terms but misses semantic relationships. Semantic search understands meaning but can miss exact phrases and rare terms. Hybrid search combines both approaches, leveraging the strengths of each to deliver superior retrieval quality. This guide covers practical hybrid search implementation: building keyword and semantic search components, fusion algorithms that combine results intelligently, weighting strategies for different use cases, and production patterns for scalable hybrid retrieval. Whether you’re building RAG systems, document search, or e-commerce search, hybrid approaches consistently outperform single-method retrieval.

Hybrid Search
Hybrid Search: Keyword Search, Semantic Search, Result Fusion

Keyword Search Component

from dataclasses import dataclass, field
from typing import Any, Optional
import re
from collections import Counter
import math

@dataclass
class SearchResult:
    """A search result."""
    
    doc_id: str
    content: str
    score: float
    metadata: dict = field(default_factory=dict)

class BM25Searcher:
    """BM25 keyword search implementation."""
    
    def __init__(
        self,
        k1: float = 1.5,
        b: float = 0.75
    ):
        self.k1 = k1
        self.b = b
        self.documents: dict[str, str] = {}
        self.doc_lengths: dict[str, int] = {}
        self.avg_doc_length: float = 0
        self.term_frequencies: dict[str, dict[str, int]] = {}
        self.doc_frequencies: dict[str, int] = {}
        self.total_docs: int = 0
    
    def index(self, documents: list[dict]):
        """Index documents for search."""
        
        self.documents = {}
        self.doc_lengths = {}
        self.term_frequencies = {}
        self.doc_frequencies = Counter()
        
        for doc in documents:
            doc_id = doc["id"]
            content = doc["content"]
            
            self.documents[doc_id] = content
            
            # Tokenize
            tokens = self._tokenize(content)
            self.doc_lengths[doc_id] = len(tokens)
            
            # Term frequencies
            tf = Counter(tokens)
            self.term_frequencies[doc_id] = dict(tf)
            
            # Document frequencies
            for term in set(tokens):
                self.doc_frequencies[term] += 1
        
        self.total_docs = len(documents)
        self.avg_doc_length = sum(self.doc_lengths.values()) / max(1, self.total_docs)
    
    def search(self, query: str, k: int = 10) -> list[SearchResult]:
        """Search for documents matching query."""
        
        query_tokens = self._tokenize(query)
        scores = {}
        
        for doc_id in self.documents:
            score = self._score_document(doc_id, query_tokens)
            if score > 0:
                scores[doc_id] = score
        
        # Sort by score
        sorted_docs = sorted(
            scores.items(),
            key=lambda x: x[1],
            reverse=True
        )[:k]
        
        return [
            SearchResult(
                doc_id=doc_id,
                content=self.documents[doc_id],
                score=score
            )
            for doc_id, score in sorted_docs
        ]
    
    def _tokenize(self, text: str) -> list[str]:
        """Tokenize text into terms."""
        
        # Lowercase and split on non-alphanumeric
        tokens = re.findall(r'\b\w+\b', text.lower())
        
        # Remove stopwords (simplified)
        stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
                     'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
                     'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare',
                     'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
                     'from', 'as', 'into', 'through', 'during', 'before', 'after',
                     'above', 'below', 'between', 'under', 'again', 'further', 'then',
                     'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all',
                     'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor',
                     'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just'}
        
        return [t for t in tokens if t not in stopwords and len(t) > 1]
    
    def _score_document(
        self,
        doc_id: str,
        query_tokens: list[str]
    ) -> float:
        """Calculate BM25 score for document."""
        
        score = 0.0
        doc_length = self.doc_lengths[doc_id]
        tf_dict = self.term_frequencies[doc_id]
        
        for term in query_tokens:
            if term not in self.doc_frequencies:
                continue
            
            # IDF
            df = self.doc_frequencies[term]
            idf = math.log((self.total_docs - df + 0.5) / (df + 0.5) + 1)
            
            # TF with length normalization
            tf = tf_dict.get(term, 0)
            tf_normalized = (tf * (self.k1 + 1)) / (
                tf + self.k1 * (1 - self.b + self.b * doc_length / self.avg_doc_length)
            )
            
            score += idf * tf_normalized
        
        return score

class ElasticsearchSearcher:
    """Elasticsearch-based keyword search."""
    
    def __init__(self, client: Any, index_name: str):
        self.client = client
        self.index_name = index_name
    
    async def search(
        self,
        query: str,
        k: int = 10,
        filters: dict = None
    ) -> list[SearchResult]:
        """Search using Elasticsearch."""
        
        body = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "multi_match": {
                                "query": query,
                                "fields": ["title^2", "content"],
                                "type": "best_fields"
                            }
                        }
                    ]
                }
            },
            "size": k
        }
        
        if filters:
            body["query"]["bool"]["filter"] = [
                {"term": {k: v}} for k, v in filters.items()
            ]
        
        response = await self.client.search(
            index=self.index_name,
            body=body
        )
        
        return [
            SearchResult(
                doc_id=hit["_id"],
                content=hit["_source"].get("content", ""),
                score=hit["_score"],
                metadata=hit["_source"]
            )
            for hit in response["hits"]["hits"]
        ]

Semantic Search Component

from dataclasses import dataclass
from typing import Any, Optional
import numpy as np

class SemanticSearcher:
    """Semantic search using embeddings."""
    
    def __init__(
        self,
        embedding_client: Any,
        vector_store: Any,
        model: str = "text-embedding-3-small"
    ):
        self.embedding_client = embedding_client
        self.vector_store = vector_store
        self.model = model
    
    async def index(self, documents: list[dict]):
        """Index documents with embeddings."""
        
        # Generate embeddings
        texts = [doc["content"] for doc in documents]
        embeddings = await self._embed_batch(texts)
        
        # Store in vector database
        for doc, embedding in zip(documents, embeddings):
            await self.vector_store.upsert(
                id=doc["id"],
                vector=embedding,
                metadata={
                    "content": doc["content"],
                    **doc.get("metadata", {})
                }
            )
    
    async def search(
        self,
        query: str,
        k: int = 10,
        filters: dict = None
    ) -> list[SearchResult]:
        """Search using semantic similarity."""
        
        # Embed query
        query_embedding = await self._embed(query)
        
        # Search vector store
        results = await self.vector_store.query(
            vector=query_embedding,
            top_k=k,
            filter=filters
        )
        
        return [
            SearchResult(
                doc_id=r["id"],
                content=r["metadata"].get("content", ""),
                score=r["score"],
                metadata=r["metadata"]
            )
            for r in results
        ]
    
    async def _embed(self, text: str) -> list[float]:
        """Embed single text."""
        
        response = await self.embedding_client.embeddings.create(
            model=self.model,
            input=text
        )
        
        return response.data[0].embedding
    
    async def _embed_batch(self, texts: list[str]) -> list[list[float]]:
        """Embed batch of texts."""
        
        response = await self.embedding_client.embeddings.create(
            model=self.model,
            input=texts
        )
        
        return [d.embedding for d in response.data]

class ColBERTSearcher:
    """ColBERT-style late interaction search."""
    
    def __init__(
        self,
        model: Any,
        index: Any
    ):
        self.model = model
        self.index = index
    
    async def search(
        self,
        query: str,
        k: int = 10
    ) -> list[SearchResult]:
        """Search using ColBERT late interaction."""
        
        # Encode query into token embeddings
        query_embeddings = await self._encode_query(query)
        
        # Search with MaxSim
        results = await self.index.search(
            query_embeddings,
            k=k
        )
        
        return [
            SearchResult(
                doc_id=r["id"],
                content=r["content"],
                score=r["score"]
            )
            for r in results
        ]
    
    async def _encode_query(self, query: str) -> np.ndarray:
        """Encode query into token embeddings."""
        
        # This would use the ColBERT model
        # Placeholder for actual implementation
        return np.zeros((32, 128))  # 32 tokens, 128 dims

Result Fusion

from dataclasses import dataclass
from typing import Any, Optional
from enum import Enum

class FusionMethod(Enum):
    """Fusion methods."""
    
    RRF = "rrf"  # Reciprocal Rank Fusion
    LINEAR = "linear"  # Linear combination
    CONVEX = "convex"  # Convex combination
    LEARNED = "learned"  # Learned fusion

@dataclass
class FusedResult:
    """Result after fusion."""
    
    doc_id: str
    content: str
    fused_score: float
    keyword_score: float = None
    semantic_score: float = None
    metadata: dict = None

class ResultFuser:
    """Fuse results from multiple search methods."""
    
    def __init__(
        self,
        method: FusionMethod = FusionMethod.RRF,
        keyword_weight: float = 0.5,
        semantic_weight: float = 0.5,
        rrf_k: int = 60
    ):
        self.method = method
        self.keyword_weight = keyword_weight
        self.semantic_weight = semantic_weight
        self.rrf_k = rrf_k
    
    def fuse(
        self,
        keyword_results: list[SearchResult],
        semantic_results: list[SearchResult],
        k: int = 10
    ) -> list[FusedResult]:
        """Fuse keyword and semantic results."""
        
        if self.method == FusionMethod.RRF:
            return self._rrf_fusion(keyword_results, semantic_results, k)
        elif self.method == FusionMethod.LINEAR:
            return self._linear_fusion(keyword_results, semantic_results, k)
        elif self.method == FusionMethod.CONVEX:
            return self._convex_fusion(keyword_results, semantic_results, k)
        else:
            raise ValueError(f"Unknown fusion method: {self.method}")
    
    def _rrf_fusion(
        self,
        keyword_results: list[SearchResult],
        semantic_results: list[SearchResult],
        k: int
    ) -> list[FusedResult]:
        """Reciprocal Rank Fusion."""
        
        scores: dict[str, dict] = {}
        
        # Process keyword results
        for rank, result in enumerate(keyword_results):
            if result.doc_id not in scores:
                scores[result.doc_id] = {
                    "content": result.content,
                    "rrf_score": 0,
                    "keyword_score": result.score,
                    "semantic_score": None,
                    "metadata": result.metadata
                }
            
            scores[result.doc_id]["rrf_score"] += 1 / (self.rrf_k + rank + 1)
        
        # Process semantic results
        for rank, result in enumerate(semantic_results):
            if result.doc_id not in scores:
                scores[result.doc_id] = {
                    "content": result.content,
                    "rrf_score": 0,
                    "keyword_score": None,
                    "semantic_score": result.score,
                    "metadata": result.metadata
                }
            else:
                scores[result.doc_id]["semantic_score"] = result.score
            
            scores[result.doc_id]["rrf_score"] += 1 / (self.rrf_k + rank + 1)
        
        # Sort by RRF score
        sorted_results = sorted(
            scores.items(),
            key=lambda x: x[1]["rrf_score"],
            reverse=True
        )[:k]
        
        return [
            FusedResult(
                doc_id=doc_id,
                content=data["content"],
                fused_score=data["rrf_score"],
                keyword_score=data["keyword_score"],
                semantic_score=data["semantic_score"],
                metadata=data["metadata"]
            )
            for doc_id, data in sorted_results
        ]
    
    def _linear_fusion(
        self,
        keyword_results: list[SearchResult],
        semantic_results: list[SearchResult],
        k: int
    ) -> list[FusedResult]:
        """Linear combination of normalized scores."""
        
        # Normalize scores
        keyword_scores = self._normalize_scores(keyword_results)
        semantic_scores = self._normalize_scores(semantic_results)
        
        # Combine scores
        all_docs: dict[str, dict] = {}
        
        for result in keyword_results:
            all_docs[result.doc_id] = {
                "content": result.content,
                "keyword_score": keyword_scores.get(result.doc_id, 0),
                "semantic_score": 0,
                "metadata": result.metadata
            }
        
        for result in semantic_results:
            if result.doc_id in all_docs:
                all_docs[result.doc_id]["semantic_score"] = semantic_scores.get(result.doc_id, 0)
            else:
                all_docs[result.doc_id] = {
                    "content": result.content,
                    "keyword_score": 0,
                    "semantic_score": semantic_scores.get(result.doc_id, 0),
                    "metadata": result.metadata
                }
        
        # Calculate fused scores
        for doc_id, data in all_docs.items():
            data["fused_score"] = (
                self.keyword_weight * data["keyword_score"] +
                self.semantic_weight * data["semantic_score"]
            )
        
        # Sort and return
        sorted_results = sorted(
            all_docs.items(),
            key=lambda x: x[1]["fused_score"],
            reverse=True
        )[:k]
        
        return [
            FusedResult(
                doc_id=doc_id,
                content=data["content"],
                fused_score=data["fused_score"],
                keyword_score=data["keyword_score"],
                semantic_score=data["semantic_score"],
                metadata=data["metadata"]
            )
            for doc_id, data in sorted_results
        ]
    
    def _convex_fusion(
        self,
        keyword_results: list[SearchResult],
        semantic_results: list[SearchResult],
        k: int
    ) -> list[FusedResult]:
        """Convex combination (weights sum to 1)."""
        
        # Normalize weights
        total_weight = self.keyword_weight + self.semantic_weight
        kw = self.keyword_weight / total_weight
        sw = self.semantic_weight / total_weight
        
        # Use linear fusion with normalized weights
        original_kw = self.keyword_weight
        original_sw = self.semantic_weight
        
        self.keyword_weight = kw
        self.semantic_weight = sw
        
        result = self._linear_fusion(keyword_results, semantic_results, k)
        
        self.keyword_weight = original_kw
        self.semantic_weight = original_sw
        
        return result
    
    def _normalize_scores(
        self,
        results: list[SearchResult]
    ) -> dict[str, float]:
        """Normalize scores to [0, 1] range."""
        
        if not results:
            return {}
        
        scores = [r.score for r in results]
        min_score = min(scores)
        max_score = max(scores)
        
        if max_score == min_score:
            return {r.doc_id: 1.0 for r in results}
        
        return {
            r.doc_id: (r.score - min_score) / (max_score - min_score)
            for r in results
        }

Hybrid Search Implementation

from dataclasses import dataclass
from typing import Any, Optional
import asyncio

@dataclass
class HybridSearchConfig:
    """Configuration for hybrid search."""
    
    keyword_weight: float = 0.3
    semantic_weight: float = 0.7
    fusion_method: FusionMethod = FusionMethod.RRF
    keyword_k: int = 20  # Retrieve more for fusion
    semantic_k: int = 20
    final_k: int = 10

class HybridSearcher:
    """Complete hybrid search implementation."""
    
    def __init__(
        self,
        keyword_searcher: BM25Searcher,
        semantic_searcher: SemanticSearcher,
        config: HybridSearchConfig = None
    ):
        self.keyword_searcher = keyword_searcher
        self.semantic_searcher = semantic_searcher
        self.config = config or HybridSearchConfig()
        self.fuser = ResultFuser(
            method=self.config.fusion_method,
            keyword_weight=self.config.keyword_weight,
            semantic_weight=self.config.semantic_weight
        )
    
    async def search(
        self,
        query: str,
        k: int = None,
        filters: dict = None
    ) -> list[FusedResult]:
        """Perform hybrid search."""
        
        k = k or self.config.final_k
        
        # Run both searches in parallel
        keyword_task = asyncio.create_task(
            self._keyword_search(query, filters)
        )
        semantic_task = asyncio.create_task(
            self.semantic_searcher.search(
                query,
                k=self.config.semantic_k,
                filters=filters
            )
        )
        
        keyword_results, semantic_results = await asyncio.gather(
            keyword_task,
            semantic_task
        )
        
        # Fuse results
        return self.fuser.fuse(
            keyword_results,
            semantic_results,
            k=k
        )
    
    async def _keyword_search(
        self,
        query: str,
        filters: dict = None
    ) -> list[SearchResult]:
        """Keyword search (sync wrapper)."""
        
        # BM25 is synchronous
        results = self.keyword_searcher.search(query, k=self.config.keyword_k)
        
        # Apply filters if needed
        if filters:
            results = [
                r for r in results
                if all(
                    r.metadata.get(k) == v
                    for k, v in filters.items()
                )
            ]
        
        return results

class AdaptiveHybridSearcher:
    """Hybrid searcher with adaptive weighting."""
    
    def __init__(
        self,
        keyword_searcher: BM25Searcher,
        semantic_searcher: SemanticSearcher
    ):
        self.keyword_searcher = keyword_searcher
        self.semantic_searcher = semantic_searcher
    
    async def search(
        self,
        query: str,
        k: int = 10
    ) -> list[FusedResult]:
        """Search with adaptive weights based on query."""
        
        # Analyze query to determine weights
        weights = self._analyze_query(query)
        
        # Create fuser with adaptive weights
        fuser = ResultFuser(
            method=FusionMethod.LINEAR,
            keyword_weight=weights["keyword"],
            semantic_weight=weights["semantic"]
        )
        
        # Run searches
        keyword_results = self.keyword_searcher.search(query, k=k * 2)
        semantic_results = await self.semantic_searcher.search(query, k=k * 2)
        
        return fuser.fuse(keyword_results, semantic_results, k)
    
    def _analyze_query(self, query: str) -> dict[str, float]:
        """Analyze query to determine optimal weights."""
        
        # Heuristics for weight adjustment
        query_lower = query.lower()
        
        # Exact phrase queries favor keyword search
        if '"' in query:
            return {"keyword": 0.8, "semantic": 0.2}
        
        # Technical terms favor keyword search
        technical_terms = ['error', 'exception', 'api', 'function', 'class', 'method']
        if any(term in query_lower for term in technical_terms):
            return {"keyword": 0.6, "semantic": 0.4}
        
        # Question queries favor semantic search
        question_words = ['what', 'why', 'how', 'when', 'where', 'which']
        if any(query_lower.startswith(w) for w in question_words):
            return {"keyword": 0.3, "semantic": 0.7}
        
        # Default balanced weights
        return {"keyword": 0.4, "semantic": 0.6}

Production Hybrid Search Service

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional

app = FastAPI()

# Initialize components
keyword_searcher = None  # Initialize BM25
semantic_searcher = None  # Initialize with embedding client
hybrid_searcher = None

class IndexRequest(BaseModel):
    documents: list[dict]

class SearchRequest(BaseModel):
    query: str
    k: int = 10
    method: str = "hybrid"  # keyword, semantic, hybrid
    keyword_weight: Optional[float] = 0.3
    semantic_weight: Optional[float] = 0.7
    filters: Optional[dict] = None

class ConfigRequest(BaseModel):
    keyword_weight: float = 0.3
    semantic_weight: float = 0.7
    fusion_method: str = "rrf"

@app.post("/v1/index")
async def index_documents(request: IndexRequest):
    """Index documents for search."""
    
    # Index for keyword search
    keyword_searcher.index(request.documents)
    
    # Index for semantic search
    await semantic_searcher.index(request.documents)
    
    return {
        "status": "indexed",
        "document_count": len(request.documents)
    }

@app.post("/v1/search")
async def search(request: SearchRequest):
    """Search documents."""
    
    if request.method == "keyword":
        results = keyword_searcher.search(request.query, k=request.k)
        return {
            "query": request.query,
            "method": "keyword",
            "results": [
                {
                    "doc_id": r.doc_id,
                    "content": r.content[:200] + "..." if len(r.content) > 200 else r.content,
                    "score": r.score
                }
                for r in results
            ]
        }
    
    elif request.method == "semantic":
        results = await semantic_searcher.search(
            request.query,
            k=request.k,
            filters=request.filters
        )
        return {
            "query": request.query,
            "method": "semantic",
            "results": [
                {
                    "doc_id": r.doc_id,
                    "content": r.content[:200] + "..." if len(r.content) > 200 else r.content,
                    "score": r.score
                }
                for r in results
            ]
        }
    
    else:  # hybrid
        # Update weights if provided
        config = HybridSearchConfig(
            keyword_weight=request.keyword_weight,
            semantic_weight=request.semantic_weight
        )
        
        searcher = HybridSearcher(
            keyword_searcher,
            semantic_searcher,
            config
        )
        
        results = await searcher.search(
            request.query,
            k=request.k,
            filters=request.filters
        )
        
        return {
            "query": request.query,
            "method": "hybrid",
            "weights": {
                "keyword": request.keyword_weight,
                "semantic": request.semantic_weight
            },
            "results": [
                {
                    "doc_id": r.doc_id,
                    "content": r.content[:200] + "..." if len(r.content) > 200 else r.content,
                    "fused_score": r.fused_score,
                    "keyword_score": r.keyword_score,
                    "semantic_score": r.semantic_score
                }
                for r in results
            ]
        }

@app.post("/v1/config")
async def update_config(request: ConfigRequest):
    """Update search configuration."""
    
    global hybrid_searcher
    
    config = HybridSearchConfig(
        keyword_weight=request.keyword_weight,
        semantic_weight=request.semantic_weight,
        fusion_method=FusionMethod(request.fusion_method)
    )
    
    hybrid_searcher = HybridSearcher(
        keyword_searcher,
        semantic_searcher,
        config
    )
    
    return {
        "status": "updated",
        "config": {
            "keyword_weight": request.keyword_weight,
            "semantic_weight": request.semantic_weight,
            "fusion_method": request.fusion_method
        }
    }

@app.get("/health")
async def health():
    return {"status": "healthy"}

References

Conclusion

Hybrid search delivers the best of both worlds—keyword precision and semantic understanding. Start with a solid BM25 implementation for keyword search; it’s battle-tested and handles exact matches well. Add semantic search using modern embedding models for conceptual similarity. The magic happens in fusion—Reciprocal Rank Fusion (RRF) is a great default because it doesn’t require score normalization and handles different score distributions gracefully. Tune your weights based on your use case: technical documentation benefits from higher keyword weights, while conversational queries favor semantic search. Consider adaptive weighting that adjusts based on query characteristics. Run both searches in parallel to minimize latency. The key insight is that users search in different ways—sometimes they know exact terms, sometimes they describe concepts. Hybrid search handles both gracefully, consistently outperforming single-method approaches across diverse query types.


Discover more from Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.