chroma_db.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import logging
  2. try:
  3. import chromadb
  4. except RuntimeError:
  5. from embedchain.utils import use_pysqlite3
  6. use_pysqlite3()
  7. import chromadb
  8. from chromadb.config import Settings
  9. from embedchain.vectordb.base_vector_db import BaseVectorDB
  10. class ChromaDB(BaseVectorDB):
  11. """Vector database using ChromaDB."""
  12. def __init__(self, db_dir=None, embedding_fn=None, host=None, port=None):
  13. self.embedding_fn = embedding_fn
  14. if not hasattr(embedding_fn, "__call__"):
  15. raise ValueError("Embedding function is not a function")
  16. if host and port:
  17. logging.info(f"Connecting to ChromaDB server: {host}:{port}")
  18. self.settings = Settings(chroma_server_host=host, chroma_server_http_port=port)
  19. self.client = chromadb.HttpClient(self.settings)
  20. else:
  21. if db_dir is None:
  22. db_dir = "db"
  23. self.settings = Settings(anonymized_telemetry=False, allow_reset=True)
  24. self.client = chromadb.PersistentClient(
  25. path=db_dir,
  26. settings=self.settings,
  27. )
  28. super().__init__()
  29. def _get_or_create_db(self):
  30. """Get or create the database."""
  31. return self.client
  32. def _get_or_create_collection(self):
  33. """Get or create the collection."""
  34. return self.client.get_or_create_collection(
  35. "embedchain_store",
  36. embedding_function=self.embedding_fn,
  37. )