123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- ---
- title: Pinecone
- ---
- ## Overview
- Install pinecone related dependencies using the following command:
- ```bash
- pip install --upgrade 'embedchain[pinecone]'
- ```
- In order to use Pinecone as vector database, set the environment variable `PINECONE_API_KEY` which you can find on [Pinecone dashboard](https://app.pinecone.io/).
- <CodeGroup>
- ```python main.py
- from embedchain import App
- # Load pinecone configuration from yaml file
- app = App.from_config(config_path="pod_config.yaml")
- # Or
- app = App.from_config(config_path="serverless_config.yaml")
- ```
- ```yaml pod_config.yaml
- vectordb:
- provider: pinecone
- config:
- metric: cosine
- vector_dimension: 1536
- index_name: my-pinecone-index
- pod_config:
- environment: gcp-starter
- metadata_config:
- indexed:
- - "url"
- - "hash"
- ```
- ```yaml serverless_config.yaml
- vectordb:
- provider: pinecone
- config:
- metric: cosine
- vector_dimension: 1536
- index_name: my-pinecone-index
- serverless_config:
- cloud: aws
- region: us-west-2
- ```
- </CodeGroup>
- <br />
- <Note>
- You can find more information about Pinecone configuration [here](https://docs.pinecone.io/docs/manage-indexes#create-a-pod-based-index).
- You can also optionally provide `index_name` as a config param in yaml file to specify the index name. If not provided, the index name will be `{collection_name}-{vector_dimension}`.
- </Note>
- ## Usage
- ### Hybrid search
- Here is an example of how you can do hybrid search using Pinecone as a vector database through Embedchain.
- ```python
- import os
- from embedchain import App
- config = {
- 'app': {
- "config": {
- "id": "ec-docs-hybrid-search"
- }
- },
- 'vectordb': {
- 'provider': 'pinecone',
- 'config': {
- 'metric': 'dotproduct',
- 'vector_dimension': 1536,
- 'index_name': 'my-index',
- 'serverless_config': {
- 'cloud': 'aws',
- 'region': 'us-west-2'
- },
- 'hybrid_search': True, # Remember to set this for hybrid search
- }
- }
- }
- # Initialize app
- app = App.from_config(config=config)
- # Add documents
- app.add("/path/to/file.pdf", data_type="pdf_file", namespace="my-namespace")
- # Query
- app.query("<YOUR QUESTION HERE>", namespace="my-namespace")
- ```
- Under the hood, Embedchain fetches the relevant chunks from the documents you added by doing hybrid search on the pinecone index.
- If you have questions on how pinecone hybrid search works, please refer to their [offical documentation here](https://docs.pinecone.io/docs/hybrid-search).
- <Snippet file="missing-vector-db-tip.mdx" />
|