Forráskód Böngészése

[Feat]: Add support for running chromadb in server mode with embedchain (#220)

Deshraj Yadav 2 éve
szülő
commit
73dd7151cb

+ 1 - 1
Makefile

@@ -8,7 +8,7 @@ PROJECT_NAME := embedchain
 
 install:
 	$(PIP) install --upgrade pip
-	$(PIP) install .[dev]
+	$(PIP) install -e .[dev]
 
 format:
 	$(PYTHON) -m black .

+ 2 - 2
embedchain/config/InitConfig.py

@@ -9,7 +9,7 @@ class InitConfig(BaseConfig):
     Config to initialize an embedchain `App` instance.
     """
 
-    def __init__(self, log_level=None, ef=None, db=None):
+    def __init__(self, log_level=None, ef=None, db=None, host=None, port=None):
         """
         :param log_level: Optional. (String) Debug level
         ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'].
@@ -33,7 +33,7 @@ class InitConfig(BaseConfig):
         if db is None:
             from embedchain.vectordb.chroma_db import ChromaDB
 
-            self.db = ChromaDB(ef=self.ef)
+            self.db = ChromaDB(ef=self.ef, host=host, port=port)
         else:
             self.db = db
 

+ 16 - 8
embedchain/vectordb/chroma_db.py

@@ -9,7 +9,7 @@ from embedchain.vectordb.base_vector_db import BaseVectorDB
 class ChromaDB(BaseVectorDB):
     """Vector database using ChromaDB."""
 
-    def __init__(self, db_dir=None, ef=None):
+    def __init__(self, db_dir=None, ef=None, host=None, port=None):
         if ef:
             self.ef = ef
         else:
@@ -18,13 +18,21 @@ class ChromaDB(BaseVectorDB):
                 organization_id=os.getenv("OPENAI_ORGANIZATION"),
                 model_name="text-embedding-ada-002",
             )
-        if db_dir is None:
-            db_dir = "db"
-        self.client_settings = chromadb.config.Settings(
-            chroma_db_impl="duckdb+parquet",
-            persist_directory=db_dir,
-            anonymized_telemetry=False,
-        )
+
+        if host and port:
+            self.client_settings = chromadb.config.Settings(
+                chroma_api_impl="rest",
+                chroma_server_host=host,
+                chroma_server_http_port=port,
+            )
+        else:
+            if db_dir is None:
+                db_dir = "db"
+            self.client_settings = chromadb.config.Settings(
+                chroma_db_impl="duckdb+parquet",
+                persist_directory=db_dir,
+                anonymized_telemetry=False,
+            )
         super().__init__()
 
     def _get_or_create_db(self):

+ 111 - 0
notebooks/embedchain-chromadb-server.ipynb

@@ -0,0 +1,111 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "553f2e71",
+   "metadata": {},
+   "source": [
+    "## Embedchain chromadb server example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "513e12e6",
+   "metadata": {},
+   "source": [
+    "This notebook shows an example of how you can use embedchain with chromdb (server). \n",
+    "\n",
+    "\n",
+    "First, run chroma inside docker using the following command:\n",
+    "\n",
+    "\n",
+    "```bash\n",
+    "git clone https://github.com/chroma-core/chroma\n",
+    "cd chroma && docker-compose up -d --build\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "92e7ad71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from embedchain import App\n",
+    "from embedchain.config import InitConfig\n",
+    "\n",
+    "\n",
+    "chromadb_host = \"localhost\"\n",
+    "chromadb_port = 8000\n",
+    "\n",
+    "config = InitConfig(host=chromadb_host, port=chromadb_port)\n",
+    "elon_bot = App(config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1a6d6841",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "All data from https://en.wikipedia.org/wiki/Elon_Musk already exists in the database.\n",
+      "All data from https://www.tesla.com/elon-musk already exists in the database.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Embed Online Resources\n",
+    "elon_bot.add(\"web_page\", \"https://en.wikipedia.org/wiki/Elon_Musk\")\n",
+    "elon_bot.add(\"web_page\", \"https://www.tesla.com/elon-musk\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "34cda99c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Elon Musk runs four companies: Tesla, SpaceX, Neuralink, and The Boring Company.'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "elon_bot.query(\"How many companies does Elon Musk run?\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}