Database
- Indexes
- Namespaces
- Vectors
- Search
- Imports
- Backups
Inference
- Embed
- Rerank
- Models
Admin
- API keys
- Projects
- Service accounts
Architecture
Models
List available models
List the embedding and reranking models hosted by Pinecone.
You can use hosted models as an integrated part of Pinecone operations or for standalone embedding and reranking. For more details, see Vector embedding and Rerank results.
GET
/
models
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
Authorizations
Query Parameters
Filter models by type ('embed' or 'rerank').
Filter embedding models by vector type ('dense' or 'sparse'). Only relevant when type=embed
.
Response
200
application/json
The list of available models.
Was this page helpful?
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]