from typing import List
import requests
def get_embeddings(
inputs: List[str], # List of text or image URLs
dimensions: 1024,
task: str = None # Set to 'retrieval.query' for text retrieval
):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {JINA_API_KEY}'
}
data = {
'input': inputs,
'model': 'jina-clip-v2',
'dimensions': dimensions,
}
response = requests.post('https://api.jina.ai/v1/embeddings', headers=headers, json=data)
return response.json()
# Example data with image and text
data = [
{"id": "img1", "modality": "image", "content": "<https://example.com/image1.jpg>"},
{"id": "txt1", "modality": "text", "content": "A red apple on a table."},
{"id": "img2", "modality": "image", "content": "<https://example.com/image2.png>"},
{"id": "txt2", "modality": "text", "content": "A basket of green apples."},
]
vectors = []
for item in data:
embeddings = get_embeddings([item["content"]], dimensions=dimension)
embedding = embeddings["data"][0]["embedding"]
vectors.append({
"id": item['id'],
"values": embedding,
"metadata": {'content': item['content'], 'modality': item['modality']}
})
index.upsert(
vectors=vectors,
namespace="ns1" # optionally specify a namespace
)