Преглед на файлове

Update notebooks to use dict instead of yaml and remove dataloaders (#1075)

Sidharth Mohanty преди 1 година
родител
ревизия
6df63d9ca7

+ 1 - 1
embedchain/pipeline.py

@@ -6,4 +6,4 @@ class Pipeline(App):
     This is deprecated. Use `App` instead.
     """
 
-    pass
+    pass

+ 13 - 37
notebooks/anthropic.ipynb

@@ -31,7 +31,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders]"
+        "!pip install embedchain"
       ]
     },
     {
@@ -60,45 +60,13 @@
         "os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: anthropic\n",
-        "  config:\n",
-        "    model: 'claude-instant-1'\n",
-        "    temperature: 0.5\n",
-        "    top_p: 1\n",
-        "    stream: false\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('anthropic.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3: Create embedchain app and define your config"
       ]
     },
     {
@@ -109,7 +77,15 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"anthropic.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"anthropic\",\n",
+        "    \"config\": {\n",
+        "        \"model\": \"claude-instant-1\",\n",
+        "        \"temperature\": 0.5,\n",
+        "        \"top_p\": 1,\n",
+        "        \"stream\": False\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -118,7 +94,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -143,7 +119,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 1 - 1
notebooks/azure-openai.ipynb

@@ -23,7 +23,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install embedchain[dataloaders]"
+    "!pip install embedchain"
    ]
   },
   {

+ 13 - 39
notebooks/chromadb.ipynb

@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders]"
+        "!pip install embedchain"
       ]
     },
     {
@@ -54,47 +54,13 @@
         "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your Vector Database config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "vectordb:\n",
-        "  provider: chroma\n",
-        "  config:\n",
-        "    collection_name: 'my-collection'\n",
-        "    # CHANGE THE BELOW TWO LINES!\n",
-        "    # pass remote database variables - host and port\n",
-        "    host: your-chromadb-url.com\n",
-        "    port: 5200\n",
-        "    allow_reset: true\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('chromadb.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -105,7 +71,15 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"chromadb.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"chroma\",\n",
+        "    \"config\": {\n",
+        "        \"collection_name\": \"my-collection\",\n",
+        "        \"host\": \"your-chromadb-url.com\",\n",
+        "        \"port\": 5200,\n",
+        "        \"allow_reset\": True\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -114,7 +88,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -134,7 +108,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 14 - 38
notebooks/cohere.ipynb

@@ -30,7 +30,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,cohere]"
+        "!pip install embedchain[cohere]"
       ]
     },
     {
@@ -59,46 +59,13 @@
         "os.environ[\"COHERE_API_KEY\"] = \"xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: cohere\n",
-        "  config:\n",
-        "    model: gptd-instruct-tft\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    top_p: 1\n",
-        "    stream: false\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('cohere.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -114,7 +81,16 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"cohere.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"cohere\",\n",
+        "    \"config\": {\n",
+        "        \"model\": \"gptd-instruct-tft\",\n",
+        "        \"temperature\": 0.5,\n",
+        "        \"max_tokens\": 1000,\n",
+        "        \"top_p\": 1,\n",
+        "        \"stream\": False\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -123,7 +99,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -148,7 +124,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 13 - 37
notebooks/elasticsearch.ipynb

@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,elasticsearch]"
+        "!pip install embedchain[elasticsearch]"
       ]
     },
     {
@@ -54,45 +54,13 @@
         "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your Vector Database config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "vectordb:\n",
-        "  provider: elasticsearch\n",
-        "  config:\n",
-        "    collection_name: 'es-index'\n",
-        "    es_url: your-elasticsearch-url.com\n",
-        "    allow_reset: true\n",
-        "    api_key: xxx\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('elasticsearch.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -103,7 +71,15 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"elasticsearch.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"elasticsearch\",\n",
+        "    \"config\": {\n",
+        "        \"collection_name\": \"es-index\",\n",
+        "        \"es_url\": \"your-elasticsearch-url.com\",\n",
+        "        \"allow_reset\": True,\n",
+        "        \"api_key\": \"xxx\"\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -112,7 +88,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -132,7 +108,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 22 - 43
notebooks/gpt4all.ipynb

@@ -30,7 +30,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,opensource]"
+        "!pip install embedchain[opensource]"
       ]
     },
     {
@@ -55,51 +55,13 @@
         "from embedchain import App"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: gpt4all\n",
-        "  config:\n",
-        "    model: 'orca-mini-3b-gguf2-q4_0.gguf'\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    top_p: 1\n",
-        "    stream: false\n",
-        "\n",
-        "embedder:\n",
-        "  provider: gpt4all\n",
-        "  config:\n",
-        "    model: 'all-MiniLM-L6-v2'\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('gpt4all.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -114,7 +76,24 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"gpt4all.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"llm\": {\n",
+        "        \"provider\": \"gpt4all\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"orca-mini-3b-gguf2-q4_0.gguf\",\n",
+        "            \"temperature\": 0.5,\n",
+        "            \"max_tokens\": 1000,\n",
+        "            \"top_p\": 1,\n",
+        "            \"stream\": False\n",
+        "        }\n",
+        "    },\n",
+        "    \"embedder\": {\n",
+        "        \"provider\": \"gpt4all\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"all-MiniLM-L6-v2\"\n",
+        "        }\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -123,7 +102,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -148,7 +127,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 22 - 43
notebooks/hugging_face_hub.ipynb

@@ -31,7 +31,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,huggingface_hub,opensource]"
+        "!pip install embedchain[huggingface_hub,opensource]"
       ]
     },
     {
@@ -59,51 +59,13 @@
         "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: huggingface\n",
-        "  config:\n",
-        "    model: 'google/flan-t5-xxl'\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    top_p: 0.8\n",
-        "    stream: false\n",
-        "\n",
-        "embedder:\n",
-        "  provider: huggingface\n",
-        "  config:\n",
-        "    model: 'sentence-transformers/all-mpnet-base-v2'\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('huggingface.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -114,7 +76,24 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"huggingface.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"llm\": {\n",
+        "        \"provider\": \"huggingface\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"google/flan-t5-xxl\",\n",
+        "            \"temperature\": 0.5,\n",
+        "            \"max_tokens\": 1000,\n",
+        "            \"top_p\": 0.8,\n",
+        "            \"stream\": False\n",
+        "        }\n",
+        "    },\n",
+        "    \"embedder\": {\n",
+        "        \"provider\": \"huggingface\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"sentence-transformers/all-mpnet-base-v2\"\n",
+        "        }\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -123,7 +102,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -148,7 +127,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 13 - 37
notebooks/jina.ipynb

@@ -31,7 +31,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders]"
+        "!pip install embedchain"
       ]
     },
     {
@@ -60,45 +60,13 @@
         "os.environ[\"JINACHAT_API_KEY\"] = \"xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: jina\n",
-        "  config:\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    top_p: 1\n",
-        "    stream: false\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('jina.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -114,7 +82,15 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"jina.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"jina\",\n",
+        "    \"config\": {\n",
+        "        \"temperature\": 0.5,\n",
+        "        \"max_tokens\": 1000,\n",
+        "        \"top_p\": 1,\n",
+        "        \"stream\": False\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -123,7 +99,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -148,7 +124,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 14 - 38
notebooks/llama2.ipynb

@@ -30,7 +30,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,llama2]"
+        "!pip install embedchain[llama2]"
       ]
     },
     {
@@ -59,46 +59,13 @@
         "os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: llama2\n",
-        "  config:\n",
-        "    model: 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    top_p: 0.5\n",
-        "    stream: false\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('llama2.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -109,7 +76,16 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"llama2.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"llama2\",\n",
+        "    \"config\": {\n",
+        "        \"model\": \"a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5\",\n",
+        "        \"temperature\": 0.5,\n",
+        "        \"max_tokens\": 1000,\n",
+        "        \"top_p\": 0.5,\n",
+        "        \"stream\": False\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -118,7 +94,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -143,7 +119,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 20 - 41
notebooks/ollama.ipynb

@@ -24,50 +24,13 @@
     "- ollama serve"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Ns6RhPfbiitr"
-   },
-   "source": [
-    "### Step-2: Define your llm and embedding model config (Going all out local inference, no need for OpenAI API Key)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "id": "S9CkxVjriotB"
-   },
-   "outputs": [],
-   "source": [
-    "config = \"\"\"\n",
-    "llm:\n",
-    "  provider: ollama\n",
-    "  config:\n",
-    "    model: 'llama2'\n",
-    "    temperature: 0.5\n",
-    "    top_p: 1\n",
-    "    stream: true\n",
-    "\n",
-    "embedder:\n",
-    "  provider: huggingface\n",
-    "  config:\n",
-    "    model: 'BAAI/bge-small-en-v1.5'\n",
-    "\"\"\"\n",
-    "\n",
-    "# Write the multi-line string to a YAML file\n",
-    "with open('ollama.yaml', 'w') as file:\n",
-    "    file.write(config)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
     "id": "PGt6uPLIi1CS"
    },
    "source": [
-    "### Step-3 Create embedchain app based on the config"
+    "### Step-2 Create embedchain app and define your config (all local inference)"
    ]
   },
   {
@@ -93,7 +56,23 @@
    ],
    "source": [
     "from embedchain import App\n",
-    "app = App.from_config(config_path=\"ollama.yaml\")"
+    "app = App.from_config(config={\n",
+    "    \"llm\": {\n",
+    "        \"provider\": \"ollama\",\n",
+    "        \"config\": {\n",
+    "            \"model\": \"llama2\",\n",
+    "            \"temperature\": 0.5,\n",
+    "            \"top_p\": 1,\n",
+    "            \"stream\": True\n",
+    "        }\n",
+    "    },\n",
+    "    \"embedder\": {\n",
+    "        \"provider\": \"huggingface\",\n",
+    "        \"config\": {\n",
+    "            \"model\": \"BAAI/bge-small-en-v1.5\"\n",
+    "        }\n",
+    "    }\n",
+    "})"
    ]
   },
   {
@@ -102,7 +81,7 @@
     "id": "XNXv4yZwi7ef"
    },
    "source": [
-    "### Step-4: Add data sources to your app"
+    "### Step-3: Add data sources to your app"
    ]
   },
   {
@@ -159,7 +138,7 @@
     "id": "_7W6fDeAjMAP"
    },
    "source": [
-    "### Step-5: All set. Now start asking questions related to your data"
+    "### Step-4: All set. Now start asking questions related to your data"
    ]
   },
   {

+ 24 - 44
notebooks/openai.ipynb

@@ -31,7 +31,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders]"
+        "!pip install embedchain"
       ]
     },
     {
@@ -59,51 +59,13 @@
         "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: openai\n",
-        "  config:\n",
-        "    model: gpt-3.5-turbo\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    top_p: 1\n",
-        "    stream: false\n",
-        "\n",
-        "embedder:\n",
-        "  provider: openai\n",
-        "  config:\n",
-        "    model: text-embedding-ada-002\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('openai.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -114,7 +76,24 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"openai.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"llm\": {\n",
+        "        \"provider\": \"openai\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"gpt-3.5-turbo\",\n",
+        "            \"temperature\": 0.5,\n",
+        "            \"max_tokens\": 1000,\n",
+        "            \"top_p\": 1,\n",
+        "            \"stream\": False\n",
+        "        }\n",
+        "    },\n",
+        "    \"embedder\": {\n",
+        "        \"provider\": \"openai\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"text-embedding-ada-002\"\n",
+        "        }\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -123,7 +102,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -143,7 +122,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {
@@ -172,7 +151,8 @@
       "name": "python3"
     },
     "language_info": {
-      "name": "python"
+      "name": "python",
+      "version": "3.11.6"
     }
   },
   "nbformat": 4,

+ 15 - 41
notebooks/opensearch.ipynb

@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,opensearch]"
+        "!pip install embedchain[opensearch]"
       ]
     },
     {
@@ -54,49 +54,13 @@
         "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your Vector Database config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "vectordb:\n",
-        "  provider: opensearch\n",
-        "  config:\n",
-        "    opensearch_url: 'your-opensearch-url.com'\n",
-        "    http_auth:\n",
-        "      - admin\n",
-        "      - admin\n",
-        "    vector_dimension: 1536\n",
-        "    collection_name: 'my-app'\n",
-        "    use_ssl: false\n",
-        "    verify_certs: false\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('opensearch.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -107,7 +71,17 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"opensearch.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"opensearch\",\n",
+        "    \"config\": {\n",
+        "        \"opensearch_url\": \"your-opensearch-url.com\",\n",
+        "        \"http_auth\": [\"admin\", \"admin\"],\n",
+        "        \"vector_dimension\": 1536,\n",
+        "        \"collection_name\": \"my-app\",\n",
+        "        \"use_ssl\": False,\n",
+        "        \"verify_certs\": False\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -116,7 +90,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -136,7 +110,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 12 - 36
notebooks/pinecone.ipynb

@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,pinecone]"
+        "!pip install embedchain[pinecone]"
       ]
     },
     {
@@ -56,44 +56,13 @@
         "os.environ[\"PINECONE_ENV\"] = \"xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your Vector Database config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "vectordb:\n",
-        "  provider: pinecone\n",
-        "  config:\n",
-        "    metric: cosine\n",
-        "    vector_dimension: 768\n",
-        "    collection_name: pc-index\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('pinecone.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -104,7 +73,14 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"pinecone.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"pinecone\",\n",
+        "    \"config\": {\n",
+        "        \"metric\": \"cosine\",\n",
+        "        \"vector_dimension\": 768,\n",
+        "        \"collection_name\": \"pc-index\"\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -113,7 +89,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -133,7 +109,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 12 - 36
notebooks/together.ipynb

@@ -30,7 +30,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,together]"
+        "!pip install embedchain[together]"
       ]
     },
     {
@@ -59,44 +59,13 @@
         "os.environ[\"TOGETHER_API_KEY\"] = \"\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: together\n",
-        "  config:\n",
-        "    model: mistralai/Mixtral-8x7B-Instruct-v0.1\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('together.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -112,7 +81,14 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"together.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"provider\": \"together\",\n",
+        "    \"config\": {\n",
+        "        \"model\": \"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
+        "        \"temperature\": 0.5,\n",
+        "        \"max_tokens\": 1000\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -121,7 +97,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -178,7 +154,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {

+ 21 - 42
notebooks/vertex_ai.ipynb

@@ -30,7 +30,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install embedchain[dataloaders,vertexai]"
+        "!pip install embedchain[vertexai]"
       ]
     },
     {
@@ -58,50 +58,13 @@
         "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ns6RhPfbiitr"
-      },
-      "source": [
-        "### Step-3: Define your llm and embedding model config"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S9CkxVjriotB"
-      },
-      "outputs": [],
-      "source": [
-        "config = \"\"\"\n",
-        "llm:\n",
-        "  provider: vertexai\n",
-        "  config:\n",
-        "    model: 'chat-bison'\n",
-        "    temperature: 0.5\n",
-        "    max_tokens: 1000\n",
-        "    stream: false\n",
-        "\n",
-        "embedder:\n",
-        "  provider: vertexai\n",
-        "  config:\n",
-        "    model: 'textembedding-gecko'\n",
-        "\"\"\"\n",
-        "\n",
-        "# Write the multi-line string to a YAML file\n",
-        "with open('vertexai.yaml', 'w') as file:\n",
-        "    file.write(config)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "PGt6uPLIi1CS"
       },
       "source": [
-        "### Step-4 Create embedchain app based on the config"
+        "### Step-3 Create embedchain app and define your config"
       ]
     },
     {
@@ -117,7 +80,23 @@
       },
       "outputs": [],
       "source": [
-        "app = App.from_config(config_path=\"vertexai.yaml\")"
+        "app = App.from_config(config={\n",
+        "    \"llm\": {\n",
+        "        \"provider\": \"vertexai\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"chat-bison\",\n",
+        "            \"temperature\": 0.5,\n",
+        "            \"max_tokens\": 1000,\n",
+        "            \"stream\": False\n",
+        "        }\n",
+        "    },\n",
+        "    \"embedder\": {\n",
+        "        \"provider\": \"vertexai\",\n",
+        "        \"config\": {\n",
+        "            \"model\": \"textembedding-gecko\"\n",
+        "        }\n",
+        "    }\n",
+        "})"
       ]
     },
     {
@@ -126,7 +105,7 @@
         "id": "XNXv4yZwi7ef"
       },
       "source": [
-        "### Step-5: Add data sources to your app"
+        "### Step-4: Add data sources to your app"
       ]
     },
     {
@@ -146,7 +125,7 @@
         "id": "_7W6fDeAjMAP"
       },
       "source": [
-        "### Step-6: All set. Now start asking questions related to your data"
+        "### Step-5: All set. Now start asking questions related to your data"
       ]
     },
     {