فهرست منبع

Added Support for Ollama for local model inference. (#1045)

Co-authored-by: Deshraj Yadav <deshraj@gatech.edu>
Sukkrit Sharma 1 سال پیش
والد
کامیت
0f73ba9677

+ 12 - 0
configs/ollama.yaml

@@ -0,0 +1,12 @@
+llm:
+  provider: ollama
+  config:
+    model: 'llama2'
+    temperature: 0.5
+    top_p: 1
+    stream: true
+
+embedder:
+  provider: huggingface
+  config:
+    model: 'BAAI/bge-small-en-v1.5'

+ 1 - 1
docs/components/data-sources/docs-site.mdx

@@ -10,5 +10,5 @@ from embedchain import Pipeline as App
 app = App()
 app.add("https://docs.embedchain.ai/", data_type="docs_site")
 app.query("What is Embedchain?")
-# Answer: Embedchain is a platform that utilizes various components, including paid/proprietary ones, to provide what is believed to be the best configuration available. It uses LLM (Language Model) providers such as OpenAI, Anthpropic, Vertex_AI, GPT4ALL, Azure_OpenAI, LLAMA2, JINA, and COHERE. Embedchain allows users to import and utilize these LLM providers for their applications.'
+# Answer: Embedchain is a platform that utilizes various components, including paid/proprietary ones, to provide what is believed to be the best configuration available. It uses LLM (Language Model) providers such as OpenAI, Anthpropic, Vertex_AI, GPT4ALL, Azure_OpenAI, LLAMA2, JINA, Ollama and COHERE. Embedchain allows users to import and utilize these LLM providers for their applications.'
 ```

+ 27 - 0
docs/components/llms.mdx

@@ -12,6 +12,7 @@ Embedchain comes with built-in support for various popular large language models
   <Card title="Azure OpenAI" href="#azure-openai"></Card>
   <Card title="Anthropic" href="#anthropic"></Card>
   <Card title="Cohere" href="#cohere"></Card>
+  <Card title="Ollama" href="#Ollama"></Card>
   <Card title="GPT4All" href="#gpt4all"></Card>
   <Card title="JinaChat" href="#jinachat"></Card>
   <Card title="Hugging Face" href="#hugging-face"></Card>
@@ -329,6 +330,32 @@ llm:
 
 </CodeGroup>
 
+## Ollama
+
+Setup Ollama using https://github.com/jmorganca/ollama
+
+<CodeGroup>
+
+```python main.py
+import os
+from embedchain import Pipeline as App
+
+# load llm configuration from config.yaml file
+app = App.from_config(config_path="config.yaml")
+```
+
+```yaml config.yaml
+llm:
+  provider: ollama
+  config:
+    model: 'llama2'
+    temperature: 0.5
+    top_p: 1
+    stream: true
+```
+
+</CodeGroup>
+
 ## GPT4ALL
 
 Install related dependencies using the following command:

+ 4 - 0
docs/examples/notebooks-and-replits.mdx

@@ -44,6 +44,10 @@ Get started with Embedchain by trying out the examples below. You can run the ex
       <td className="align-middle"><a target="_blank" href="https://colab.research.google.com/github/embedchain/embedchain/blob/main/notebooks/cohere.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" noZoom alt="Open In Colab"/></a></td>
       <td className="align-middle"><a target="_blank" href="https://replit.com/@taranjeetio/cohere#main.py"><img src="https://replit.com/badge?caption=Try%20with%20Replit&amp;variant=small" noZoom alt="Try with Replit Badge"/></a></td>
     </tr>
+    <tr>
+      <td className="align-middle">Ollama</td>
+      <td className="align-middle"><a target="_blank" href="https://colab.research.google.com/github/embedchain/embedchain/blob/main/notebooks/ollama.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" noZoom alt="Open In Colab"/></a></td>
+    </tr>
     <tr>
       <td className="align-middle">Hugging Face</td>
       <td className="align-middle"><a target="_blank" href="https://colab.research.google.com/github/embedchain/embedchain/blob/main/notebooks/hugging_face_hub.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" noZoom alt="Open In Colab"/></a></td>

+ 1 - 0
embedchain/factory.py

@@ -13,6 +13,7 @@ class LlmFactory:
         "azure_openai": "embedchain.llm.azure_openai.AzureOpenAILlm",
         "cohere": "embedchain.llm.cohere.CohereLlm",
         "gpt4all": "embedchain.llm.gpt4all.GPT4ALLLlm",
+        "ollama": "embedchain.llm.ollama.OllamaLlm",
         "huggingface": "embedchain.llm.huggingface.HuggingFaceLlm",
         "jina": "embedchain.llm.jina.JinaLlm",
         "llama2": "embedchain.llm.llama2.Llama2Llm",

+ 34 - 0
embedchain/llm/ollama.py

@@ -0,0 +1,34 @@
+from typing import Iterable, Optional, Union
+
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.stdout import StdOutCallbackHandler
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.llms.ollama import Ollama
+
+from embedchain.config import BaseLlmConfig
+from embedchain.helpers.json_serializable import register_deserializable
+from embedchain.llm.base import BaseLlm
+
+
+@register_deserializable
+class OllamaLlm(BaseLlm):
+    def __init__(self, config: Optional[BaseLlmConfig] = None):
+        super().__init__(config=config)
+        if self.config.model is None:
+            self.config.model = "llama2"
+
+    def get_llm_model_answer(self, prompt):
+        return self._get_answer(prompt=prompt, config=self.config)
+    
+    def _get_answer(self, prompt: str, config: BaseLlmConfig) -> Union[str, Iterable]:
+        callback_manager = [StreamingStdOutCallbackHandler()] if config.stream else [StdOutCallbackHandler()]
+
+        llm = Ollama(
+            model=config.model,
+            system=config.system_prompt,
+            temperature=config.temperature,
+            top_p=config.top_p,
+            callback_manager=CallbackManager(callback_manager)
+        )
+
+        return llm(prompt)

+ 1 - 0
embedchain/models/providers.py

@@ -6,4 +6,5 @@ class Providers(Enum):
     ANTHROPHIC = "ANTHPROPIC"
     VERTEX_AI = "VERTEX_AI"
     GPT4ALL = "GPT4ALL"
+    OLLAMA = "OLLAMA"
     AZURE_OPENAI = "AZURE_OPENAI"

+ 1 - 0
embedchain/utils.py

@@ -385,6 +385,7 @@ def validate_config(config_data):
                     "huggingface",
                     "cohere",
                     "gpt4all",
+                    "ollama",
                     "jina",
                     "llama2",
                     "vertexai",

+ 228 - 0
notebooks/ollama.ipynb

@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "b02n_zJ_hl3d"
+   },
+   "source": [
+    "## Cookbook for using Ollama with Embedchain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gyJ6ui2vhtMY"
+   },
+   "source": [
+    "### Step-1: Setup Ollama, follow these instructions https://github.com/jmorganca/ollama\n",
+    "\n",
+    "Once Setup is done:\n",
+    "\n",
+    "- ollama pull llama2 (All supported models can be found here: https://ollama.ai/library)\n",
+    "- ollama run llama2 (Test out the model once)\n",
+    "- ollama serve"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Ns6RhPfbiitr"
+   },
+   "source": [
+    "### Step-2: Define your llm and embedding model config (Going all out local inference, no need for OpenAI API Key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "S9CkxVjriotB"
+   },
+   "outputs": [],
+   "source": [
+    "config = \"\"\"\n",
+    "llm:\n",
+    "  provider: ollama\n",
+    "  config:\n",
+    "    model: 'llama2'\n",
+    "    temperature: 0.5\n",
+    "    top_p: 1\n",
+    "    stream: true\n",
+    "\n",
+    "embedder:\n",
+    "  provider: huggingface\n",
+    "  config:\n",
+    "    model: 'BAAI/bge-small-en-v1.5'\n",
+    "\"\"\"\n",
+    "\n",
+    "# Write the multi-line string to a YAML file\n",
+    "with open('ollama.yaml', 'w') as file:\n",
+    "    file.write(config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "PGt6uPLIi1CS"
+   },
+   "source": [
+    "### Step-3 Create embedchain app based on the config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 321
+    },
+    "id": "Amzxk3m-i3tD",
+    "outputId": "afe8afde-5cb8-46bc-c541-3ad26cc3fa6e"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sukkritsharma/workspace/embedchain/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from embedchain import Pipeline as App\n",
+    "app = App.from_config(config_path=\"ollama.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XNXv4yZwi7ef"
+   },
+   "source": [
+    "### Step-4: Add data sources to your app"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 176
+    },
+    "id": "Sn_0rx9QjIY9",
+    "outputId": "2f2718a4-3b7e-4844-fd46-3e0857653ca0"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Inserting batches in chromadb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.57it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully saved https://www.forbes.com/profile/elon-musk (DataType.WEB_PAGE). New chunks count: 4\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'8cf46026cabf9b05394a2658bd1fe890'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "app.add(\"https://www.forbes.com/profile/elon-musk\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_7W6fDeAjMAP"
+   },
+   "source": [
+    "### Step-5: All set. Now start asking questions related to your data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "cvIK7dWRjN_f",
+    "outputId": "79e873c8-9594-45da-f5a3-0a893511267f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elon Musk is a business magnate, investor, and engineer. He is the CEO of SpaceX and Tesla, Inc., and has been involved in other successful ventures such as Neuralink and The Boring Company. Musk is known for his innovative ideas, entrepreneurial spirit, and vision for the future of humanity.\n",
+      "\n",
+      "As the CEO of Tesla, Musk has played a significant role in popularizing electric vehicles and making them more accessible to the masses. Under his leadership, Tesla has grown into one of the most valuable companies in the world.\n",
+      "\n",
+      "SpaceX, another company founded by Musk, is a leading player in the commercial space industry. SpaceX has developed advanced rockets and spacecraft, including the Falcon 9 and Dragon, which have successfully launched numerous satellites and other payloads into orbit.\n",
+      "\n",
+      "Musk is also known for his ambitious goals, such as establishing a human settlement on Mars and developing sustainable energy solutions to address climate change. He has been recognized for his philanthropic efforts, particularly in the area of education, and has been awarded numerous honors and awards for his contributions to society.\n",
+      "\n",
+      "Overall, Elon Musk is a highly influential and innovative entrepreneur who has made significant impacts in various industries and has inspired many people around the world with his vision and leadership."
+     ]
+    }
+   ],
+   "source": [
+    "answer = app.query(\"who is elon musk?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 38 - 0
tests/llm/test_ollama.py

@@ -0,0 +1,38 @@
+import pytest
+
+from embedchain.config import BaseLlmConfig
+from embedchain.llm.ollama import OllamaLlm
+
+
+@pytest.fixture
+def ollama_llm_config():
+    config = BaseLlmConfig(model="llama2", temperature=0.7, top_p=0.8, stream=True, system_prompt=None)
+    yield config
+
+def test_get_llm_model_answer(ollama_llm_config, mocker):
+    mocker.patch("embedchain.llm.ollama.OllamaLlm._get_answer", return_value="Test answer")
+
+    llm = OllamaLlm(ollama_llm_config)
+    answer = llm.get_llm_model_answer("Test query")
+
+    assert answer == "Test answer"
+
+
+def test_get_answer_mocked_ollama(ollama_llm_config, mocker):
+    mocked_ollama = mocker.patch("embedchain.llm.ollama.Ollama")
+    mock_instance = mocked_ollama.return_value
+    mock_instance.return_value = "Mocked answer"
+
+    llm = OllamaLlm(ollama_llm_config)
+    prompt = "Test query"
+    answer = llm.get_llm_model_answer(prompt)
+
+    assert answer == "Mocked answer"
+    mocked_ollama.assert_called_once_with(
+        model="llama2",
+        system=None,
+        temperature=0.7,
+        top_p=0.8,
+        callback_manager=mocker.ANY  # Use mocker.ANY to ignore the exact instance
+    )
+    mock_instance.assert_called_once_with(prompt)

+ 1 - 0
tests/test_utils.py

@@ -8,6 +8,7 @@ CONFIG_YAMLS = [
     "configs/chroma.yaml",
     "configs/chunker.yaml",
     "configs/cohere.yaml",
+    "configs/ollama.yaml",
     "configs/full-stack.yaml",
     "configs/gpt4.yaml",
     "configs/gpt4all.yaml",