Introduction: Embedding models convert text into dense vectors that capture semantic meaning. Choosing the right embedding model significantly impacts search quality, retrieval accuracy, and application performance. This guide compares leading embedding models—OpenAI’s text-embedding-3, Cohere’s embed-v3, Voyage AI, and open-source alternatives like BGE and E5. We cover benchmarks, pricing, dimension trade-offs, and practical guidance on selecting the right model for your use case. Whether you’re building semantic search, RAG systems, or recommendation engines, understanding embedding model characteristics is essential.

OpenAI Embeddings
from openai import OpenAI
import numpy as np
from typing import Union
client = OpenAI()
def get_openai_embedding(
text: Union[str, list[str]],
model: str = "text-embedding-3-small",
dimensions: int = None
) -> Union[list[float], list[list[float]]]:
"""Get embeddings from OpenAI."""
# Handle single string or list
input_text = [text] if isinstance(text, str) else text
kwargs = {"model": model, "input": input_text}
if dimensions:
kwargs["dimensions"] = dimensions
response = client.embeddings.create(**kwargs)
embeddings = [item.embedding for item in response.data]
return embeddings[0] if isinstance(text, str) else embeddings
# OpenAI model comparison
models = {
"text-embedding-3-small": {
"dimensions": 1536,
"max_tokens": 8191,
"price_per_1m": 0.02
},
"text-embedding-3-large": {
"dimensions": 3072,
"max_tokens": 8191,
"price_per_1m": 0.13
},
"text-embedding-ada-002": {
"dimensions": 1536,
"max_tokens": 8191,
"price_per_1m": 0.10
}
}
# Dimension reduction with text-embedding-3
# Smaller dimensions = faster search, less storage
small_embed = get_openai_embedding(
"What is machine learning?",
model="text-embedding-3-small",
dimensions=512 # Reduced from 1536
)
print(f"Reduced dimensions: {len(small_embed)}")
# Batch embedding for efficiency
texts = [
"Machine learning is a subset of AI",
"Deep learning uses neural networks",
"Natural language processing handles text"
]
batch_embeddings = get_openai_embedding(texts, model="text-embedding-3-small")
print(f"Batch size: {len(batch_embeddings)}")
Cohere Embeddings
# pip install cohere
import cohere
co = cohere.Client("your-api-key")
def get_cohere_embedding(
texts: list[str],
model: str = "embed-english-v3.0",
input_type: str = "search_document"
) -> list[list[float]]:
"""Get embeddings from Cohere.
input_type options:
- search_document: For documents to be searched
- search_query: For search queries
- classification: For classification tasks
- clustering: For clustering tasks
"""
response = co.embed(
texts=texts,
model=model,
input_type=input_type
)
return response.embeddings
# Cohere model comparison
cohere_models = {
"embed-english-v3.0": {
"dimensions": 1024,
"max_tokens": 512,
"languages": "English",
"price_per_1m": 0.10
},
"embed-multilingual-v3.0": {
"dimensions": 1024,
"max_tokens": 512,
"languages": "100+",
"price_per_1m": 0.10
},
"embed-english-light-v3.0": {
"dimensions": 384,
"max_tokens": 512,
"languages": "English",
"price_per_1m": 0.10
}
}
# Document vs Query embeddings (asymmetric search)
documents = [
"Python is a programming language",
"JavaScript runs in browsers"
]
query = "What language is used for web development?"
# Embed documents with document type
doc_embeddings = get_cohere_embedding(
documents,
input_type="search_document"
)
# Embed query with query type
query_embedding = get_cohere_embedding(
[query],
input_type="search_query"
)[0]
# Calculate similarities
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
for i, doc in enumerate(documents):
sim = cosine_similarity(query_embedding, doc_embeddings[i])
print(f"{doc[:40]}: {sim:.3f}")
Voyage AI Embeddings
# pip install voyageai
import voyageai
vo = voyageai.Client(api_key="your-api-key")
def get_voyage_embedding(
texts: list[str],
model: str = "voyage-2",
input_type: str = None
) -> list[list[float]]:
"""Get embeddings from Voyage AI."""
result = vo.embed(
texts,
model=model,
input_type=input_type # "query" or "document"
)
return result.embeddings
# Voyage model comparison
voyage_models = {
"voyage-2": {
"dimensions": 1024,
"max_tokens": 4000,
"specialty": "General purpose",
"price_per_1m": 0.10
},
"voyage-large-2": {
"dimensions": 1536,
"max_tokens": 16000,
"specialty": "Higher quality",
"price_per_1m": 0.12
},
"voyage-code-2": {
"dimensions": 1536,
"max_tokens": 16000,
"specialty": "Code understanding",
"price_per_1m": 0.12
},
"voyage-law-2": {
"dimensions": 1024,
"max_tokens": 4000,
"specialty": "Legal documents",
"price_per_1m": 0.12
}
}
# Code embedding example
code_snippets = [
"def fibonacci(n): return n if n <= 1 else fibonacci(n-1) + fibonacci(n-2)",
"function factorial(n) { return n <= 1 ? 1 : n * factorial(n-1); }",
"SELECT * FROM users WHERE age > 18"
]
code_embeddings = get_voyage_embedding(
code_snippets,
model="voyage-code-2",
input_type="document"
)
# Search for similar code
query = "recursive function to calculate fibonacci numbers"
query_embed = get_voyage_embedding([query], model="voyage-code-2", input_type="query")[0]
for i, code in enumerate(code_snippets):
sim = cosine_similarity(query_embed, code_embeddings[i])
print(f"{code[:50]}: {sim:.3f}")
Open Source: BGE and E5
# pip install sentence-transformers
from sentence_transformers import SentenceTransformer
# BGE (BAAI General Embedding)
bge_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
def get_bge_embedding(texts: list[str], is_query: bool = False) -> list[list[float]]:
"""Get BGE embeddings (free, local)."""
# BGE recommends adding instruction prefix for queries
if is_query:
texts = [f"Represent this sentence for searching relevant passages: {t}" for t in texts]
embeddings = bge_model.encode(texts, normalize_embeddings=True)
return embeddings.tolist()
# E5 (Embeddings from bidirectional Encoder representations)
e5_model = SentenceTransformer('intfloat/e5-large-v2')
def get_e5_embedding(texts: list[str], is_query: bool = False) -> list[list[float]]:
"""Get E5 embeddings (free, local)."""
# E5 requires specific prefixes
prefix = "query: " if is_query else "passage: "
texts = [prefix + t for t in texts]
embeddings = e5_model.encode(texts, normalize_embeddings=True)
return embeddings.tolist()
# Open source model comparison
open_source_models = {
"bge-large-en-v1.5": {
"dimensions": 1024,
"max_tokens": 512,
"size_mb": 1340,
"mteb_score": 64.23
},
"bge-base-en-v1.5": {
"dimensions": 768,
"max_tokens": 512,
"size_mb": 438,
"mteb_score": 63.55
},
"e5-large-v2": {
"dimensions": 1024,
"max_tokens": 512,
"size_mb": 1340,
"mteb_score": 62.25
},
"all-MiniLM-L6-v2": {
"dimensions": 384,
"max_tokens": 256,
"size_mb": 91,
"mteb_score": 56.26
}
}
# Local embedding - no API costs
documents = ["Machine learning basics", "Deep learning fundamentals"]
doc_embeds = get_bge_embedding(documents, is_query=False)
query = "What is ML?"
query_embed = get_bge_embedding([query], is_query=True)[0]
print(f"BGE embedding dimension: {len(doc_embeds[0])}")
Benchmark Comparison
import time
from dataclasses import dataclass
@dataclass
class BenchmarkResult:
model: str
avg_latency_ms: float
throughput_docs_per_sec: float
dimension: int
mteb_retrieval_score: float
def benchmark_embedding_model(
embed_func,
texts: list[str],
num_runs: int = 5
) -> dict:
"""Benchmark an embedding model."""
latencies = []
for _ in range(num_runs):
start = time.time()
embeddings = embed_func(texts)
latency = (time.time() - start) * 1000
latencies.append(latency)
avg_latency = sum(latencies) / len(latencies)
throughput = len(texts) / (avg_latency / 1000)
return {
"avg_latency_ms": avg_latency,
"throughput_docs_per_sec": throughput,
"dimension": len(embeddings[0])
}
# MTEB Retrieval Benchmark Scores (as of 2024)
mteb_scores = {
"text-embedding-3-large": 64.59,
"text-embedding-3-small": 62.26,
"voyage-2": 64.83,
"voyage-large-2": 65.89,
"embed-english-v3.0": 64.47,
"bge-large-en-v1.5": 64.23,
"e5-large-v2": 62.25,
"all-MiniLM-L6-v2": 56.26
}
# Cost comparison for 1M embeddings
cost_per_million = {
"text-embedding-3-small": 0.02,
"text-embedding-3-large": 0.13,
"text-embedding-ada-002": 0.10,
"voyage-2": 0.10,
"voyage-large-2": 0.12,
"embed-english-v3.0": 0.10,
"bge-large-en-v1.5": 0.00, # Free (local)
"e5-large-v2": 0.00, # Free (local)
}
# Print comparison table
print("Model Comparison:")
print("-" * 70)
print(f"{'Model':<25} {'MTEB Score':<12} {'Cost/1M':<10} {'Dims':<8}")
print("-" * 70)
for model, score in sorted(mteb_scores.items(), key=lambda x: -x[1]):
cost = cost_per_million.get(model, "N/A")
dims = {
"text-embedding-3-large": 3072,
"text-embedding-3-small": 1536,
"voyage-2": 1024,
"voyage-large-2": 1536,
"embed-english-v3.0": 1024,
"bge-large-en-v1.5": 1024,
"e5-large-v2": 1024,
"all-MiniLM-L6-v2": 384
}.get(model, "?")
print(f"{model:<25} {score:<12.2f} ${cost:<9} {dims:<8}")
Unified Embedding Interface
from abc import ABC, abstractmethod
from enum import Enum
class EmbeddingProvider(str, Enum):
OPENAI = "openai"
COHERE = "cohere"
VOYAGE = "voyage"
BGE = "bge"
E5 = "e5"
class EmbeddingModel(ABC):
"""Abstract base for embedding models."""
@abstractmethod
def embed(self, texts: list[str], is_query: bool = False) -> list[list[float]]:
pass
@property
@abstractmethod
def dimension(self) -> int:
pass
class OpenAIEmbedding(EmbeddingModel):
def __init__(self, model: str = "text-embedding-3-small", dimensions: int = None):
self.model = model
self._dimensions = dimensions or {"text-embedding-3-small": 1536, "text-embedding-3-large": 3072}.get(model, 1536)
def embed(self, texts: list[str], is_query: bool = False) -> list[list[float]]:
return get_openai_embedding(texts, self.model, self._dimensions)
@property
def dimension(self) -> int:
return self._dimensions
class BGEEmbedding(EmbeddingModel):
def __init__(self, model_name: str = "BAAI/bge-large-en-v1.5"):
self.model = SentenceTransformer(model_name)
self._dimension = self.model.get_sentence_embedding_dimension()
def embed(self, texts: list[str], is_query: bool = False) -> list[list[float]]:
if is_query:
texts = [f"Represent this sentence for searching relevant passages: {t}" for t in texts]
return self.model.encode(texts, normalize_embeddings=True).tolist()
@property
def dimension(self) -> int:
return self._dimension
class EmbeddingFactory:
"""Factory for creating embedding models."""
@staticmethod
def create(provider: EmbeddingProvider, **kwargs) -> EmbeddingModel:
if provider == EmbeddingProvider.OPENAI:
return OpenAIEmbedding(**kwargs)
elif provider == EmbeddingProvider.BGE:
return BGEEmbedding(**kwargs)
# Add other providers...
else:
raise ValueError(f"Unknown provider: {provider}")
# Usage
embedder = EmbeddingFactory.create(EmbeddingProvider.OPENAI, model="text-embedding-3-small")
docs = ["Document 1", "Document 2"]
doc_embeds = embedder.embed(docs, is_query=False)
query_embed = embedder.embed(["Search query"], is_query=True)[0]
print(f"Using {embedder.dimension}-dimensional embeddings")
Choosing the Right Model
The best embedding model depends on your specific requirements. For general-purpose semantic search with good quality and low cost, OpenAI’s text-embedding-3-small offers excellent value at $0.02 per million tokens. If you need the highest retrieval quality and can afford higher costs, Voyage’s voyage-large-2 leads benchmarks. For multilingual applications, Cohere’s embed-multilingual-v3.0 supports 100+ languages. When running locally without API costs is essential, BGE-large-en-v1.5 provides near-commercial quality. For code search specifically, Voyage’s voyage-code-2 is purpose-built for programming languages.
References
- OpenAI Embeddings: https://platform.openai.com/docs/guides/embeddings
- Cohere Embed: https://docs.cohere.com/docs/embeddings
- Voyage AI: https://docs.voyageai.com/
- MTEB Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
- BGE Models: https://huggingface.co/BAAI/bge-large-en-v1.5
Conclusion
Embedding model selection significantly impacts your application’s search quality, latency, and costs. Start with OpenAI’s text-embedding-3-small for most use cases—it’s affordable, high-quality, and easy to integrate. Consider Voyage for specialized domains like code or legal documents. Use open-source models like BGE when you need local inference or want to eliminate API costs. Always benchmark on your specific data, as performance varies by domain. The embedding landscape evolves rapidly, so revisit your choice periodically as new models emerge. A well-chosen embedding model is the foundation of effective semantic search and RAG systems.
Discover more from Code, Cloud & Context
Subscribe to get the latest posts sent to your email.

Leave a Reply