pinecone.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import os
  2. from typing import Optional
  3. from embedchain.config.vectordb.base import BaseVectorDbConfig
  4. from embedchain.helpers.json_serializable import register_deserializable
  5. @register_deserializable
  6. class PineconeDBConfig(BaseVectorDbConfig):
  7. def __init__(
  8. self,
  9. index_name: Optional[str] = None,
  10. api_key: Optional[str] = None,
  11. vector_dimension: int = 1536,
  12. metric: Optional[str] = "cosine",
  13. pod_config: Optional[dict[str, any]] = None,
  14. serverless_config: Optional[dict[str, any]] = None,
  15. hybrid_search: bool = False,
  16. bm25_encoder: any = None,
  17. batch_size: Optional[int] = 100,
  18. **extra_params: dict[str, any],
  19. ):
  20. self.metric = metric
  21. self.api_key = api_key
  22. self.index_name = index_name
  23. self.vector_dimension = vector_dimension
  24. self.extra_params = extra_params
  25. self.hybrid_search = hybrid_search
  26. self.bm25_encoder = bm25_encoder
  27. self.batch_size = batch_size
  28. if pod_config is None and serverless_config is None:
  29. # If no config is provided, use the default pod spec config
  30. pod_environment = os.environ.get("PINECONE_ENV", "gcp-starter")
  31. self.pod_config = {"environment": pod_environment, "metadata_config": {"indexed": ["*"]}}
  32. else:
  33. self.pod_config = pod_config
  34. self.serverless_config = serverless_config
  35. if self.pod_config and self.serverless_config:
  36. raise ValueError("Only one of pod_config or serverless_config can be provided.")
  37. if self.hybrid_search and self.metric != "dotproduct":
  38. raise ValueError(
  39. "Hybrid search is only supported with dotproduct metric in Pinecone. See full docs here: https://docs.pinecone.io/docs/hybrid-search#limitations"
  40. ) # noqa:E501
  41. super().__init__(collection_name=self.index_name, dir=None)