|
@@ -0,0 +1,228 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "b02n_zJ_hl3d"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "## Cookbook for using Hugging Face Hub with Embedchain"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "gyJ6ui2vhtMY"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "### Step-1: Install embedchain package"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/",
|
|
|
+ "height": 1000
|
|
|
+ },
|
|
|
+ "id": "-NbXjAdlh0vJ",
|
|
|
+ "outputId": "35ddc904-8067-44cf-dcc9-3c8b4cd29989"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "!pip install embedchain"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "nGnpSYAAh2bQ"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "### Step-2: Set Hugging Face Hub related environment variables and install dependencies\n",
|
|
|
+ "\n",
|
|
|
+ "You can find your `HUGGINGFACE_ACCESS_TOKEN` key on your [Hugging Face Hub dashboard](https://huggingface.co/settings/tokens) and install the dependencies"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "VfDNZJCqNfqo",
|
|
|
+ "outputId": "34894d35-7142-42ee-8564-2e9f718afcbb"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "!pip install embedchain[huggingface-hub]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "SCNT8khqcR3G",
|
|
|
+ "outputId": "b789ee77-ef50-4330-8ac6-5da645dc36d6"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "!pip install embedchain[opensource]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "id": "0fBdQ9GAiRvK"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import os\n",
|
|
|
+ "from embedchain import App\n",
|
|
|
+ "\n",
|
|
|
+ "os.environ[\"HUGGINGFACE_ACCESS_TOKEN\"] = \"hf_xxx\""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "Ns6RhPfbiitr"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "### Step-3: Define your llm and embedding model config"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "id": "S9CkxVjriotB"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "config = \"\"\"\n",
|
|
|
+ "llm:\n",
|
|
|
+ " provider: huggingface\n",
|
|
|
+ " config:\n",
|
|
|
+ " model: 'google/flan-t5-xxl'\n",
|
|
|
+ " temperature: 0.5\n",
|
|
|
+ " max_tokens: 1000\n",
|
|
|
+ " top_p: 0.8\n",
|
|
|
+ " stream: false\n",
|
|
|
+ "\n",
|
|
|
+ "embedder:\n",
|
|
|
+ " provider: huggingface\n",
|
|
|
+ " config:\n",
|
|
|
+ " model: 'sentence-transformers/all-mpnet-base-v2'\n",
|
|
|
+ "\"\"\"\n",
|
|
|
+ "\n",
|
|
|
+ "# Write the multi-line string to a YAML file\n",
|
|
|
+ "with open('huggingface.yaml', 'w') as file:\n",
|
|
|
+ " file.write(config)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "PGt6uPLIi1CS"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "### Step-4 Create embedchain app based on the config"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "id": "Amzxk3m-i3tD"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "app = App.from_config(yaml_path=\"huggingface.yaml\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "XNXv4yZwi7ef"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "### Step-5: Add data sources to your app"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/",
|
|
|
+ "height": 70
|
|
|
+ },
|
|
|
+ "id": "Sn_0rx9QjIY9",
|
|
|
+ "outputId": "3c2a803a-3a93-4b0d-a6ae-17ae3c96c3c2"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "app.add(\"https://www.forbes.com/profile/elon-musk\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "_7W6fDeAjMAP"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "### Step-6: All set. Now start asking questions related to your data"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "cvIK7dWRjN_f",
|
|
|
+ "outputId": "47a89d1c-b322-495c-822a-6c2ecef894d2"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "while(True):\n",
|
|
|
+ " question = input(\"Enter question: \")\n",
|
|
|
+ " if question in ['q', 'exit', 'quit']:\n",
|
|
|
+ " break\n",
|
|
|
+ " answer = app.query(question)\n",
|
|
|
+ " print(answer)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "id": "HvZVn6gU5xB_"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "provenance": []
|
|
|
+ },
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "name": "python"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 0
|
|
|
+}
|