from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
List the embedding and reranking models hosted by Pinecone.
You can use hosted models as an integrated part of Pinecone operations or for standalone embedding and reranking. For more details, see Vector embedding and Rerank results.
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
models = pc.inference.list_models()
print(models)
[{
"model": "llama-text-embed-v2",
"short_description": "A high performance dense embedding model optimized for multilingual and cross-lingual text question-answering retrieval with support for long documents (up to 2048 tokens) and dynamic embedding size (Matryoshka Embeddings).",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE",
"START"
]
},
{
"parameter": "dimension",
"type": "one_of",
"value_type": "integer",
"required": false,
"default": 1024,
"allowed_values": [
384,
512,
768,
1024,
2048
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 2048,
"max_batch_size": 96,
"provider_name": "NVIDIA",
"supported_metrics": [
"cosine",
"dotproduct"
],
"supported_dimensions": [
384,
512,
768,
1024,
2048
]
}, {
"model": "multilingual-e5-large",
"short_description": "A high-performance dense embedding model trained on a mixture of multilingual datasets. It works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"vector_type": "dense",
"default_dimension": 1024,
"modality": "text",
"max_sequence_length": 507,
"max_batch_size": 96,
"provider_name": "Microsoft",
"supported_metrics": [
"cosine",
"euclidean"
],
"supported_dimensions": [
1024
]
}, {
"model": "pinecone-sparse-english-v0",
"short_description": "A sparse embedding model for converting text to sparse vectors for keyword or hybrid semantic/keyword search. Built on the innovations of the DeepImpact architecture.",
"type": "embed",
"supported_parameters": [
{
"parameter": "input_type",
"type": "one_of",
"value_type": "string",
"required": true,
"allowed_values": [
"query",
"passage"
]
},
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
},
{
"parameter": "return_tokens",
"type": "any",
"value_type": "boolean",
"required": false,
"default": false
}
],
"vector_type": "sparse",
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 96,
"provider_name": "Pinecone",
"supported_metrics": [
"dotproduct"
]
}, {
"model": "bge-reranker-v2-m3",
"short_description": "A high-performance, multilingual reranking model that works well on messy data and short queries expected to return medium-length passages of text (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "NONE",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 1024,
"max_batch_size": 100,
"provider_name": "BAAI",
"supported_metrics": []
}, {
"model": "cohere-rerank-3.5",
"short_description": "Cohere's leading reranking model, balancing performance and latency for a wide range of enterprise search applications.",
"type": "rerank",
"supported_parameters": [
{
"parameter": "max_chunks_per_doc",
"type": "numeric_range",
"value_type": "integer",
"required": false,
"default": 3072,
"min": 1.0,
"max": 3072.0
}
],
"modality": "text",
"max_sequence_length": 40000,
"max_batch_size": 200,
"provider_name": "Cohere",
"supported_metrics": []
}, {
"model": "pinecone-rerank-v0",
"short_description": "A state of the art reranking model that out-performs competitors on widely accepted benchmarks. It can handle chunks up to 512 tokens (1-2 paragraphs)",
"type": "rerank",
"supported_parameters": [
{
"parameter": "truncate",
"type": "one_of",
"value_type": "string",
"required": false,
"default": "END",
"allowed_values": [
"END",
"NONE"
]
}
],
"modality": "text",
"max_sequence_length": 512,
"max_batch_size": 100,
"provider_name": "Pinecone",
"supported_metrics": []
}]
Filter models by type ('embed' or 'rerank').
Filter embedding models by vector type ('dense' or 'sparse'). Only relevant when type=embed.
The list of available models.
The list of available models.
Show child attributes
The name of the model.
"multilingual-e5-large"
A summary of the model.
"multilingual-e5-large"
The type of model (e.g. 'embed' or 'rerank').
"embed"
Show child attributes
The name of the parameter.
"input_type"
The parameter type e.g. 'one_of', 'numeric_range', or 'any'.
If the type is 'one_of', then 'allowed_values' will be set, and the value specified must be one of the allowed values. 'one_of' is only compatible with value_type 'string' or 'integer'.
If 'numeric_range', then 'min' and 'max' will be set, then the value specified must adhere to the value_type and must fall within the [min, max] range (inclusive).
If 'any' then any value is allowed, as long as it adheres to the value_type.
"one_of"
The type of value the parameter accepts, e.g. 'string', 'integer', 'float', or 'boolean'.
"string"
Whether the parameter is required (true) or optional (false).
true
The allowed parameter values when the type is 'one_of'.
The minimum allowed value (inclusive) when the type is 'numeric_range'.
1
The maximum allowed value (inclusive) when the type is 'numeric_range'.
1
The default value for the parameter when a parameter is optional.
"END"
Whether the embedding model produces 'dense' or 'sparse' embeddings.
The default embedding model dimension (applies to dense embedding models only).
1 <= x <= 200001024
The modality of the model (e.g. 'text').
"text"
The maximum tokens per sequence supported by the model.
x >= 1512
The maximum batch size (number of sequences) supported by the model.
x >= 196
The name of the provider of the model.
"NVIDIA"
The list of supported dimensions for the model (applies to dense embedding models only).
1 <= x <= 20000The distance metrics supported by the model for similarity search.
A distance metric that the embedding model supports for similarity searches.
cosine, euclidean, dotproduct Was this page helpful?