Jina Embeddings v3 is the latest iteration in the Jina AI’s text embedding model series, building upon Jina Embedding v2. Key features include multilingual support for over 30 languages, task-oriented design using Low-Rank Adaptation (LoRa) instruction adapters, and Matryoshka Representation Learning (MRL) for flexible embedding generation. The model is built on a custom backbone based on XLM-Roberta with extended training and Rotary Position Embedding encoding, allowing it to supports up to 8,192 input tokens.
pip install pinecone requests
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key="API_KEY")
JINA_API_KEY = ""
dimension = 1024
index_name = "jina-embeddings-v3"
if not pc.has_index(index_name):
pc.create_index(
name=index_name,
dimension=dimension,
metric="cosine",
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
index = pc.Index(index_name)
from typing import literal, List
import requests
def get_embeddiggs(
texts: List[str],
dimensions: int,
task: Literal['text-matching', 'separation', 'classification', 'retrieval.query', 'retrieval.passage']):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {JINA_API_KEY}'
}
data = = {
'input': texts,
'model': 'jina-embeddings-v3',
'dimensions': dimensions,
'task': task
}
response = requests.post('https://api.jina.ai/v1/embeddings', headers=headers, json=data)
return response.json()
data = [
{"id": "vec1", "text": "Apple is a popular fruit known for its sweetness and crisp texture."},
{"id": "vec2", "text": "The tech company Apple is known for its innovative products like the iPhone."},
{"id": "vec3", "text": "Many people enjoy eating apples as a healthy snack."},
{"id": "vec4", "text": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces."},
{"id": "vec5", "text": "An apple a day keeps the doctor away, as the saying goes."},
]
embeddings = get_embeddings([d["text"] for d in data], dimensions=dimension, task='retrieval.passage')
embeddings = [e["embedding"] for e in embeddings["data"]]
vectors = []
for d, e in zip(data, embeddings):
vectors.append({
"id": d['id'],
"values": e,
"metadata": {'text': d['text']}
})
index.upsert(
vectors=vectors,
namespace="ns1"
)
query = "Tell me about the tech company known as Apple"
x = get_embeddings([query], dimensions=dimension, task='retrieval.query')["data"][0]["embedding"]
results = index.query(
namespace="ns1",
vector=x,
top_k=3,
include_values=False,
include_metadata=True
)
print(results)