from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
List the embedding and reranking models hosted by Pinecone.
You can use hosted models as an integrated part of Pinecone operations or for standalone embedding and reranking. For more details, see Vector embedding and Rerank results.
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
Filter models by type ('embed' or 'rerank').
Filter embedding models by vector type ('dense' or 'sparse'). Only relevant when type=embed
.
The list of available models.
Was this page helpful?