chroma_db.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import logging
  2. import chromadb
  3. from chromadb.config import Settings
  4. from embedchain.vectordb.base_vector_db import BaseVectorDB
  5. class ChromaDB(BaseVectorDB):
  6. """Vector database using ChromaDB."""
  7. def __init__(self, db_dir=None, embedding_fn=None, host=None, port=None):
  8. self.embedding_fn = embedding_fn
  9. if not hasattr(embedding_fn, "__call__"):
  10. raise ValueError("Embedding function is not a function")
  11. if host and port:
  12. logging.info(f"Connecting to ChromaDB server: {host}:{port}")
  13. self.settings = Settings(chroma_server_host=host, chroma_server_http_port=port)
  14. else:
  15. if db_dir is None:
  16. db_dir = "db"
  17. self.settings = Settings(persist_directory=db_dir, anonymized_telemetry=False, allow_reset=True)
  18. super().__init__()
  19. def _get_or_create_db(self):
  20. """Get or create the database."""
  21. return chromadb.Client(self.settings)
  22. def _get_or_create_collection(self):
  23. """Get or create the collection."""
  24. return self.client.get_or_create_collection(
  25. "embedchain_store",
  26. embedding_function=self.embedding_fn,
  27. )