Overview
Rich multimodal embedding model that can vectorize interleaved text and content-rich images, such as screenshots of PDFs, slides, tables, figures, and more. See blog post for details. Visit the Voyage documentation for an overview of all Voyage embedding models and rerankers.
Access to models is through the Voyage Python client. You must register for Voyage API keys to access.
Using the model
Installation
!pip install -qU voyageai pinecone
Create Index
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key="API_KEY")
# Create Index
index_name = "voyage-multimodal-3"
if not pc.has_index(index_name):
pc.create_index(
name=index_name,
dimension=1024,
metric="cosine",
spec=ServerlessSpec(
cloud="aws,
region="us-east-1"
)
)
index = pc.Index(index_name)
Embed & Upsert
from typing import Union
# Embed data
data = [
{"id": "vec1", "data": {"content": [{"type": "image_url", "image_url": "https://example.com/image.jpg"}, {"type": "text", "text": "Frontier intelligence at 2x the speed"}]}},
{"id": "vec2", "data": {"content": [{"type": "image_url", "image_url": "https://example.com/page1.jpg"}, {"type": "image_url", "image_url": "https://example.com/page2.jpg"}]}},
{"id": "vec3", "data": {"content": [{"type": "image_base64", "image_base64": "data:image/jpeg;base64,..."}]}}
]
# You can also use lists of texts and PIL Images, e.g.:
# "data": ["This is a banana", PIL.Image.open("banana.jpg")] ]
import voyageai
vo = voyageai.Client(api_key=VOYAGE_API_KEY)
model_id = "voyage-multimodal-3"
def embed(docs: Union[list[dict[str, str]], list[list[Union[str, Image]]]], input_type: str) -> list[list[float]]:
embeddings = vo.multimodal_embed(
docs,
model=model_id,
input_type=input_type
).embeddings
return embeddings
# Use "document" input type for documents
embeddings = embed([d["data"] for d in data], input_type="document")
vectors = []
for d, e in zip(data, embeddings):
vectors.append({
"id": d['id'],
"values": e,
"metadata": {'inputs': d['inputs']}
})
index.upsert(
vectors=vectors,
namespace="ns1"
)
Query
query = ["Strong LLMs in 2024"]
# Use "query" input type for queries
x = embed([query], input_type="query")
results = index.query(
namespace="ns1",
vector=x[0],
top_k=3,
include_values=False,
include_metadata=True
)
print(results)