Преглед на файлове

Google Colab Notebooks for LLMs, Embedders and VectorDBs (#821)

Sidharth Mohanty преди 1 година
родител
ревизия
b2286f3e34

+ 185 - 0
notebooks/anthropic.ipynb

@@ -0,0 +1,185 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using Anthropic with Embedchain\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "efdce0dc-fb30-4e01-f5a8-ef1a7f4e8c09"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set Anthropic related environment variables\n",
+        "\n",
+        "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `ANTHROPIC_API_KEY` on your [Anthropic dashboard](https://console.anthropic.com/account/keys)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
+        "os.environ[\"ANTHROPIC_API_KEY\"] = \"xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: anthropic\n",
+        "  config:\n",
+        "    model: 'claude-instant-1'\n",
+        "    temperature: 0.5\n",
+        "    top_p: 1\n",
+        "    stream: false\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('anthropic.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"anthropic.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 52
+        },
+        "id": "Sn_0rx9QjIY9",
+        "outputId": "dc17baec-39b5-4dc8-bd42-f2aad92697eb"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 391
+        },
+        "id": "cvIK7dWRjN_f",
+        "outputId": "3d1cb7ce-969e-4dad-d48c-b818b7447cc0"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 171 - 0
notebooks/chromadb.ipynb

@@ -0,0 +1,171 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Cookbook for using ChromaDB with Embedchain"
+      ],
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-1: Install embedchain package"
+      ],
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install embedchain"
+      ],
+      "metadata": {
+        "id": "-NbXjAdlh0vJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-2: Set OpenAI environment variables\n",
+        "\n",
+        "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys)."
+      ],
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
+      ],
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-3: Define your Vector Database config"
+      ],
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "config = \"\"\"\n",
+        "vectordb:\n",
+        "  provider: chroma\n",
+        "  config:\n",
+        "    collection_name: 'my-collection'\n",
+        "    # CHANGE THE BELOW TWO LINES!\n",
+        "    # pass remote database variables - host and port\n",
+        "    host: your-chromadb-url.com\n",
+        "    port: 5200\n",
+        "    allow_reset: true\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('chromadb.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ],
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ],
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "app = App.from_config(yaml_path=\"chromadb.yaml\")"
+      ],
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ],
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ],
+      "metadata": {
+        "id": "Sn_0rx9QjIY9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ],
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ],
+      "metadata": {
+        "id": "cvIK7dWRjN_f"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

+ 205 - 0
notebooks/cohere.ipynb

@@ -0,0 +1,205 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using Cohere with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "fae77912-4e6a-4c78-fcb7-fbbe46f7a9c7"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set Cohere related environment variables and install the dependencies\n",
+        "\n",
+        "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `COHERE_API_KEY` key on your [Cohere dashboard](https://dashboard.cohere.com/api-keys)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "S5jTywPZNtrj",
+        "outputId": "4a23c813-c9e5-4b6c-e3d9-b41e4fdbc54d"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain[cohere]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
+        "os.environ[\"COHERE_API_KEY\"] = \"xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: cohere\n",
+        "  config:\n",
+        "    model: gptd-instruct-tft\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    top_p: 1\n",
+        "    stream: false\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('cohere.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 321
+        },
+        "id": "Amzxk3m-i3tD",
+        "outputId": "afe8afde-5cb8-46bc-c541-3ad26cc3fa6e"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"cohere.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 176
+        },
+        "id": "Sn_0rx9QjIY9",
+        "outputId": "2f2718a4-3b7e-4844-fd46-3e0857653ca0"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cvIK7dWRjN_f",
+        "outputId": "79e873c8-9594-45da-f5a3-0a893511267f"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 180 - 0
notebooks/elasticsearch.ipynb

@@ -0,0 +1,180 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Cookbook for using ElasticSearchDB with Embedchain"
+      ],
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-1: Install embedchain package"
+      ],
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install embedchain"
+      ],
+      "metadata": {
+        "id": "-NbXjAdlh0vJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-2: Set OpenAI environment variables and install the dependencies.\n",
+        "\n",
+        "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys). Now lets install the dependencies needed for Elasticsearch."
+      ],
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install --upgrade 'embedchain[elasticsearch]'"
+      ],
+      "metadata": {
+        "id": "-MUFRfxV7Jk7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
+      ],
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-3: Define your Vector Database config"
+      ],
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "config = \"\"\"\n",
+        "vectordb:\n",
+        "  provider: elasticsearch\n",
+        "  config:\n",
+        "    collection_name: 'es-index'\n",
+        "    es_url: your-elasticsearch-url.com\n",
+        "    allow_reset: true\n",
+        "    api_key: xxx\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('elasticsearch.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ],
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ],
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "app = App.from_config(yaml_path=\"elasticsearch.yaml\")"
+      ],
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ],
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ],
+      "metadata": {
+        "id": "Sn_0rx9QjIY9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ],
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ],
+      "metadata": {
+        "id": "cvIK7dWRjN_f"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

+ 205 - 0
notebooks/gpt4all.ipynb

@@ -0,0 +1,205 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using GPT4All with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "077fa470-b51f-4c29-8c22-9c5f0a9cef47"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set GPT4ALL related environment variables and install dependencies\n",
+        "\n",
+        "GPT4All is free for all and doesn't require any API Key to use it. Just import the dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dGOE4u3dC6at",
+        "outputId": "c1c0087b-3f14-49fa-fb86-a4a3391ba14c"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install --upgrade embedchain[opensource]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "from embedchain import App"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: gpt4all\n",
+        "  config:\n",
+        "    model: 'orca-mini-3b.ggmlv3.q4_0.bin'\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    top_p: 1\n",
+        "    stream: false\n",
+        "\n",
+        "embedder:\n",
+        "  provider: gpt4all\n",
+        "  config:\n",
+        "    model: 'all-MiniLM-L6-v2'\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('gpt4all.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Amzxk3m-i3tD",
+        "outputId": "775db99b-e217-47db-f87f-788495d86f26"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"gpt4all.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 52
+        },
+        "id": "Sn_0rx9QjIY9",
+        "outputId": "c6514f17-3cb2-4fbc-c80d-79b3a311ff30"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 480
+        },
+        "id": "cvIK7dWRjN_f",
+        "outputId": "c74f356a-d2fb-426d-b36c-d84911397338"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 228 - 0
notebooks/hugging_face_hub.ipynb

@@ -0,0 +1,228 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using Hugging Face Hub with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "35ddc904-8067-44cf-dcc9-3c8b4cd29989"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set Hugging Face Hub related environment variables and install dependencies\n",
+        "\n",
+        "You can find your `HUGGINGFACE_ACCESS_TOKEN` key on your [Hugging Face Hub dashboard](https://huggingface.co/settings/tokens) and install the dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "VfDNZJCqNfqo",
+        "outputId": "34894d35-7142-42ee-8564-2e9f718afcbb"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain[huggingface-hub]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SCNT8khqcR3G",
+        "outputId": "b789ee77-ef50-4330-8ac6-5da645dc36d6"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain[opensource]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: huggingface\n",
+        "  config:\n",
+        "    model: 'google/flan-t5-xxl'\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    top_p: 0.8\n",
+        "    stream: false\n",
+        "\n",
+        "embedder:\n",
+        "  provider: huggingface\n",
+        "  config:\n",
+        "    model: 'sentence-transformers/all-mpnet-base-v2'\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('huggingface.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"huggingface.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 70
+        },
+        "id": "Sn_0rx9QjIY9",
+        "outputId": "3c2a803a-3a93-4b0d-a6ae-17ae3c96c3c2"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cvIK7dWRjN_f",
+        "outputId": "47a89d1c-b322-495c-822a-6c2ecef894d2"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HvZVn6gU5xB_"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 189 - 0
notebooks/jina.ipynb

@@ -0,0 +1,189 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using JinaChat with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "69cb79a6-c758-4656-ccf7-9f3105c81d16"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set JinaChat related environment variables\n",
+        "\n",
+        "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `JINACHAT_API_KEY` key on your [Chat Jina dashboard](https://chat.jina.ai/api)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
+        "os.environ[\"JINACHAT_API_KEY\"] = \"xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: jina\n",
+        "  config:\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    top_p: 1\n",
+        "    stream: false\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('jina.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 321
+        },
+        "id": "Amzxk3m-i3tD",
+        "outputId": "8d00da74-5f73-49bb-b868-dcf1c375ac85"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"jina.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 52
+        },
+        "id": "Sn_0rx9QjIY9",
+        "outputId": "10eeacc7-9263-448e-876d-002af897ebe5"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cvIK7dWRjN_f",
+        "outputId": "7dc7212f-a0e9-43c8-f119-f595ba79b4b7"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 196 - 0
notebooks/llama2.ipynb

@@ -0,0 +1,196 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using LLAMA2 with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "86a4a9b2-4ed6-431c-da6f-c3eacb390f42"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set LLAMA2 related environment variables and install dependencies\n",
+        "\n",
+        "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and `REPLICATE_API_TOKEN` key on your [Replicate dashboard](https://replicate.com/account/api-tokens). Now lets install the dependencies for LLAMA2."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qoBUbocNtUUD"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain[llama2]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
+        "os.environ[\"REPLICATE_API_TOKEN\"] = \"xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: llama2\n",
+        "  config:\n",
+        "    model: 'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5'\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    top_p: 0.5\n",
+        "    stream: false\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('llama2.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"llama2.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 52
+        },
+        "id": "Sn_0rx9QjIY9",
+        "outputId": "ba158e9c-0f16-4c6b-a876-7543120985a2"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 599
+        },
+        "id": "cvIK7dWRjN_f",
+        "outputId": "e2d11a25-a2ed-4034-ec6a-e8a5986c89ae"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 181 - 0
notebooks/openai.ipynb

@@ -0,0 +1,181 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using OpenAI with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "6c630676-c7fc-4054-dc94-c613de58a037"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set OpenAI environment variables\n",
+        "\n",
+        "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: openai\n",
+        "  config:\n",
+        "    model: gpt-35-turbo\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    top_p: 1\n",
+        "    stream: false\n",
+        "\n",
+        "embedder:\n",
+        "  provider: openai\n",
+        "  config:\n",
+        "    model: text-embedding-ada-002\n",
+        "    deployment_name: ec_embeddings_ada_002\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('openai.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"openai.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Sn_0rx9QjIY9"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cvIK7dWRjN_f"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 184 - 0
notebooks/opensearch.ipynb

@@ -0,0 +1,184 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Cookbook for using OpenSearchDB with Embedchain"
+      ],
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-1: Install embedchain package"
+      ],
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install embedchain"
+      ],
+      "metadata": {
+        "id": "-NbXjAdlh0vJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-2: Set OpenAI environment variables and install the dependencies.\n",
+        "\n",
+        "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys). Now lets install the dependencies needed for Opensearch."
+      ],
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install --upgrade 'embedchain[opensearch]'"
+      ],
+      "metadata": {
+        "id": "-MUFRfxV7Jk7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
+      ],
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-3: Define your Vector Database config"
+      ],
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "config = \"\"\"\n",
+        "vectordb:\n",
+        "  provider: opensearch\n",
+        "  config:\n",
+        "    opensearch_url: 'your-opensearch-url.com'\n",
+        "    http_auth:\n",
+        "      - admin\n",
+        "      - admin\n",
+        "    vector_dimension: 1536\n",
+        "    collection_name: 'my-app'\n",
+        "    use_ssl: false\n",
+        "    verify_certs: false\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('opensearch.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ],
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ],
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "app = App.from_config(yaml_path=\"opensearch.yaml\")"
+      ],
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ],
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ],
+      "metadata": {
+        "id": "Sn_0rx9QjIY9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ],
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ],
+      "metadata": {
+        "id": "cvIK7dWRjN_f"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

+ 181 - 0
notebooks/pinecone.ipynb

@@ -0,0 +1,181 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using PineconeDB with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-NbXjAdlh0vJ"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set environment variables needed for Pinecone and install the dependencies.\n",
+        "\n",
+        "You can find this env variable on your [OpenAI dashboard](https://platform.openai.com/account/api-keys) and [Pinecone dashboard](https://app.pinecone.io/). Now lets install the dependencies needed for Pinecone."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-MUFRfxV7Jk7"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install --upgrade 'embedchain[pinecone]'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\"\n",
+        "os.environ[\"PINECONE_API_KEY\"] = \"xxx\"\n",
+        "os.environ[\"PINECONE_ENV\"] = \"xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your Vector Database config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "vectordb:\n",
+        "  provider: pinecone\n",
+        "  config:\n",
+        "    metric: cosine\n",
+        "    vector_dimension: 768\n",
+        "    collection_name: pc-index\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('pinecone.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Amzxk3m-i3tD"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"pinecone.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Sn_0rx9QjIY9"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cvIK7dWRjN_f"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

+ 194 - 0
notebooks/vertex_ai.ipynb

@@ -0,0 +1,194 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b02n_zJ_hl3d"
+      },
+      "source": [
+        "## Cookbook for using VertexAI with Embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gyJ6ui2vhtMY"
+      },
+      "source": [
+        "### Step-1: Install embedchain package"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-NbXjAdlh0vJ",
+        "outputId": "eb9be5b6-dc81-43d2-d515-df8f0116be11"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nGnpSYAAh2bQ"
+      },
+      "source": [
+        "### Step-2: Set VertexAI related environment variables and install dependencies.\n",
+        "\n",
+        "You can find `OPENAI_API_KEY` on your [OpenAI dashboard](https://platform.openai.com/account/api-keys). Now lets install the dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "a_shbIFBtnwu"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install embedchain[vertexai]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fBdQ9GAiRvK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from embedchain import App\n",
+        "\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxx\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ns6RhPfbiitr"
+      },
+      "source": [
+        "### Step-3: Define your llm and embedding model config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S9CkxVjriotB"
+      },
+      "outputs": [],
+      "source": [
+        "config = \"\"\"\n",
+        "llm:\n",
+        "  provider: vertexai\n",
+        "  config:\n",
+        "    model: 'chat-bison'\n",
+        "    temperature: 0.5\n",
+        "    max_tokens: 1000\n",
+        "    stream: false\n",
+        "\n",
+        "embedder:\n",
+        "  provider: vertexai\n",
+        "  config:\n",
+        "    model: 'textembedding-gecko'\n",
+        "\"\"\"\n",
+        "\n",
+        "# Write the multi-line string to a YAML file\n",
+        "with open('vertexai.yaml', 'w') as file:\n",
+        "    file.write(config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PGt6uPLIi1CS"
+      },
+      "source": [
+        "### Step-4 Create embedchain app based on the config"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 582
+        },
+        "id": "Amzxk3m-i3tD",
+        "outputId": "5084b6ea-ec20-4281-9f36-e21e93c17475"
+      },
+      "outputs": [],
+      "source": [
+        "app = App.from_config(yaml_path=\"vertexai.yaml\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XNXv4yZwi7ef"
+      },
+      "source": [
+        "### Step-5: Add data sources to your app"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Sn_0rx9QjIY9"
+      },
+      "outputs": [],
+      "source": [
+        "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_7W6fDeAjMAP"
+      },
+      "source": [
+        "### Step-6: All set. Now start asking questions related to your data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cvIK7dWRjN_f"
+      },
+      "outputs": [],
+      "source": [
+        "while(True):\n",
+        "    question = input(\"Enter question: \")\n",
+        "    if question in ['q', 'exit', 'quit']:\n",
+        "        break\n",
+        "    answer = app.query(question)\n",
+        "    print(answer)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}