Jina Embeddings v3 is the latest iteration in the Jina AI’s text embedding model series, building upon Jina Embedding v2. Key features include multilingual support for over 30 languages, task-oriented design using Low-Rank Adaptation (LoRa) instruction adapters, and Matryoshka Representation Learning (MRL) for flexible embedding generation. The model is built on a custom backbone based on XLM-Roberta with extended training and Rotary Position Embedding encoding, allowing it to supports up to 8,192 input tokens.
from typing import literal, Listimport requestsdef get_embeddings( texts: List[str], dimensions: int, task: Literal['text-matching', 'separation', 'classification', 'retrieval.query', 'retrieval.passage']): headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {JINA_API_KEY}' } data = { 'input': texts, 'model': 'jina-embeddings-v3', 'dimensions': dimensions, 'task': task } response = requests.post('https://api.jina.ai/v1/embeddings', headers=headers, json=data) return response.json()# Data to indexdata = [ {"id": "vec1", "text": "Apple is a popular fruit known for its sweetness and crisp texture."}, {"id": "vec2", "text": "The tech company Apple is known for its innovative products like the iPhone."}, {"id": "vec3", "text": "Many people enjoy eating apples as a healthy snack."}, {"id": "vec4", "text": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces."}, {"id": "vec5", "text": "An apple a day keeps the doctor away, as the saying goes."},]embeddings = get_embeddings([d["text"] for d in data], dimensions=dimension, task='retrieval.passage')embeddings = [e["embedding"] for e in embeddings["data"]]vectors = []for d, e in zip(data, embeddings): vectors.append({ "id": d['id'], "values": e, "metadata": {'text': d['text']} })index.upsert( vectors=vectors, namespace="ns1")
query = "Tell me about the tech company known as Apple"# Remember to keep query and document embedding to the same dimensionsx = get_embeddings([query], dimensions=dimension, task='retrieval.query')["data"][0]["embedding"]results = index.query( namespace="ns1", vector=x, top_k=3, include_values=False, include_metadata=True)print(results)
Lorem Ipsum
Was this page helpful?
Assistant
Responses are generated using AI and may contain mistakes.