Documentation Index
Fetch the complete documentation index at: https://docs.pinecone.io/llms.txt
Use this file to discover all available pages before exploring further.
Overview
Ideal for text embeddings where short queries are expected to return large passages of text. Works well with messy data. Can be used via Jina Embeddings API - users can get an API key here https://jina.ai/embeddings/.Using the model
Installation:
Create Index
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key="API_KEY")
# Create Index
index_name = "jina-embeddings-v2-base-en"
if not pc.has_index(index_name):
pc.create_index(
name=index_name,
dimension=768,
metric="cosine",
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
index = pc.Index(index_name)
Embed & Upsert
# Embed data
data = [
{"id": "vec1", "text": "Apple is a popular fruit known for its sweetness and crisp texture."},
{"id": "vec2", "text": "The tech company Apple is known for its innovative products like the iPhone."},
{"id": "vec3", "text": "Many people enjoy eating apples as a healthy snack."},
{"id": "vec4", "text": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces."},
{"id": "vec5", "text": "An apple a day keeps the doctor away, as the saying goes."},
]
import requests
url = 'https://api.jina.ai/v1/embeddings'
def get_embeddings(texts):
# returns embeddings given list of texts
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {JINA_API_KEY}'
}
data = {
'input': texts,
'model': 'jina-embeddings-v2-base-en'
}
response = requests.post(url, headers=headers, json=data)
return response.json()
embeddings = get_embeddings([d["text"] for d in data])
embeddings = [e["embedding"] for e in embeddings["data"]]
vectors = []
for d, e in zip(data, embeddings):
vectors.append({
"id": d['id'],
"values": e,
"metadata": {'text': d['text']}
})
index.upsert(
vectors=vectors,
namespace="ns1"
)
Query
query = "Tell me about the tech company known as Apple"
x = get_embeddings([query])["data"][0]["embedding"]
results = index.query(
namespace="ns1",
vector=x,
top_k=3,
include_values=False,
include_metadata=True
)
print(results)
Lorem Ipsum