Overview
Optimized for code retrieval. See blog post for details. Visit the Voyage documentation for an overview of all Voyage embedding models and rerankers.
Access to models is through the Voyage Python client. You must register for Voyage API keys to access.
Using the model
Installation
!pip install -qU voyageai pinecone
Define Embedding Parameters
EMBEDDING_DIMENSION = 1024 # can choose between 1024 (default), 256, 512, and 2048
EMBEDDING_DTYPE = "float" # can choose between "float" (default), "int8", "uint8", "binary", "ubinary"
Create Index
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key="API_KEY")
# Create Index
index_name = "voyage-code-3"
if not pc.has_index(index_name):
pc.create_index(
name=index_name,
dimension=EMBEDDING_DIMENSION,
metric="cosine",
spec=ServerlessSpec(
cloud="aws",
region="us-east-1"
)
)
index = pc.Index(index_name)
Embed & Upsert
# Embed data
data = [
{"id": "code1", "text": "def factorial(n):\n if n == 0:\n return 1\n return n * factorial(n-1)"},
{"id": "code2", "text": "Sort a list using the quicksort algorithm.\nSteps:\n1. Select a pivot.\n2. Partition the list into elements smaller and larger than the pivot.\n3. Recursively apply the process to partitions.\n4. Combine results."},
{"id": "code3", "text": "def reverse_string(s):\n return s[::-1]"},
{"id": "code4", "text": "Determine if a number is prime - iterate from 2 to sqrt(num) to check divisibility."},
{"id": "code5", "text": "def fibonacci(n):\n if n <= 0:\n return 0\n elif n == 1:\n return 1\n return fibonacci(n-1) + fibonacci(n-2)"},
]
import voyageai
vo = voyageai.Client(api_key=VOYAGE_API_KEY)
model_id = "voyage-code-3"
def embed(docs: list[str], input_type: str) -> list[list[float]]:
embeddings = vo.embed(
docs,
model=model_id,
input_type=input_type,
output_dimension=EMBEDDING_DIMENSION,
output_dtype=EMBEDDING_DTYPE
).embeddings
return embeddings
# Use "document" input type for documents
embeddings = embed([d["text"] for d in data], input_type="document")
vectors = []
for d, e in zip(data, embeddings):
vectors.append({
"id": d['id'],
"values": e,
"metadata": {'text': d['text']}
})
index.upsert(
vectors=vectors,
namespace="ns1"
)
Query
query = "Sort an array"
# Use "query" input type for queries
x = embed([query], input_type="query")
results = index.query(
namespace="ns1",
vector=x[0],
top_k=3,
include_values=False,
include_metadata=True
)
print(results)