2 年之前 · adf50f1e81
--- a/.gitignore
+++ b/.gitignore
@@ -76,7 +76,7 @@ docs/_build/
 
				 target/
			
 
				 
			
 
				 # Jupyter Notebook
			
 
				-.ipynb_checkpoints
			
 
				+*.yaml
			
 
				 
			
 
				 # IPython
			
 
				 profile_default/
			
@@ -171,3 +171,6 @@ db
 
				 .idea/
			
 
				 
			
 
				 .DS_Store
			
 
				+
			
 
				+notebooks/*.yaml
			
 
				+.ipynb_checkpoints/
			
--- a/configs/azure_openai.yaml
+++ b/configs/azure_openai.yaml
@@ -0,0 +1,19 @@
 
				+app:
			
 
				+  config:
			
 
				+    id: azure-openai-app
			
 
				+
			
 
				+llm:
			
 
				+  provider: azure_openai
			
 
				+  model: gpt-35-turbo
			
 
				+  config:
			
 
				+    deployment_name: your_llm_deployment_name
			
 
				+    temperature: 0.5
			
 
				+    max_tokens: 1000
			
 
				+    top_p: 1
			
 
				+    stream: false
			
 
				+
			
 
				+embedder:
			
 
				+  provider: azure_openai
			
 
				+  config:
			
 
				+    model: text-embedding-ada-002
			
 
				+    deployment_name: you_embedding_model_deployment_name
			
--- a/docs/components/embedding-models.mdx
+++ b/docs/components/embedding-models.mdx
@@ -8,6 +8,7 @@ Embedchain supports several embedding models from the following providers:
 
				 
			
 
				 <CardGroup cols={4}>
			
 
				   <Card title="OpenAI" href="#openai"></Card>
			
 
				+  <Card title="Azure OpenAI" href="#azure-openai"></Card>
			
 
				   <Card title="GPT4All" href="#gpt4all"></Card>
			
 
				   <Card title="Hugging Face" href="#hugging-face"></Card>
			
 
				   <Card title="Vertex AI" href="#vertex-ai"></Card>
			
@@ -43,6 +44,45 @@ embedder:
 
				 
			
 
				 </CodeGroup>
			
 
				 
			
 
				+## Azure OpenAI
			
 
				+
			
 
				+To use Azure OpenAI embedding model, you have to set some of the azure openai related environment variables as given in the code block below:
			
 
				+
			
 
				+<CodeGroup>
			
 
				+
			
 
				+```python main.py
			
 
				+import os
			
 
				+from embedchain import App
			
 
				+
			
 
				+os.environ["OPENAI_API_TYPE"] = "azure"
			
 
				+os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
			
 
				+os.environ["OPENAI_API_KEY"] = "xxx"
			
 
				+os.environ["OPENAI_API_VERSION"] = "xxx"
			
 
				+
			
 
				+app = App.from_config(yaml_path="config.yaml")
			
 
				+```
			
 
				+
			
 
				+```yaml config.yaml
			
 
				+llm:
			
 
				+  provider: azure_openai
			
 
				+  model: gpt-35-turbo
			
 
				+  config:
			
 
				+    deployment_name: your_llm_deployment_name
			
 
				+    temperature: 0.5
			
 
				+    max_tokens: 1000
			
 
				+    top_p: 1
			
 
				+    stream: false
			
 
				+
			
 
				+embedder:
			
 
				+  provider: azure_openai
			
 
				+  config:
			
 
				+    model: text-embedding-ada-002
			
 
				+    deployment_name: you_embedding_model_deployment_name
			
 
				+```
			
 
				+</CodeGroup>
			
 
				+
			
 
				+You can find the list of models and deployment name on the [Azure OpenAI Platform](https://oai.azure.com/portal).
			
 
				+
			
 
				 ## GPT4ALL
			
 
				 
			
 
				 GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained Sentence Transformer.
			
--- a/docs/components/llms.mdx
+++ b/docs/components/llms.mdx
@@ -65,7 +65,42 @@ llm:
 
				 
			
 
				 ## Azure OpenAI
			
 
				 
			
 
				-_Coming soon_
			
 
				+To use Azure OpenAI model, you have to set some of the azure openai related environment variables as given in the code block below:
			
 
				+
			
 
				+<CodeGroup>
			
 
				+
			
 
				+```python main.py
			
 
				+import os
			
 
				+from embedchain import App
			
 
				+
			
 
				+os.environ["OPENAI_API_TYPE"] = "azure"
			
 
				+os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
			
 
				+os.environ["OPENAI_API_KEY"] = "xxx"
			
 
				+os.environ["OPENAI_API_VERSION"] = "xxx"
			
 
				+
			
 
				+app = App.from_config(yaml_path="config.yaml")
			
 
				+```
			
 
				+
			
 
				+```yaml config.yaml
			
 
				+llm:
			
 
				+  provider: azure_openai
			
 
				+  model: gpt-35-turbo
			
 
				+  config:
			
 
				+    deployment_name: your_llm_deployment_name
			
 
				+    temperature: 0.5
			
 
				+    max_tokens: 1000
			
 
				+    top_p: 1
			
 
				+    stream: false
			
 
				+
			
 
				+embedder:
			
 
				+  provider: azure_openai
			
 
				+  config:
			
 
				+    model: text-embedding-ada-002
			
 
				+    deployment_name: you_embedding_model_deployment_name
			
 
				+```
			
 
				+</CodeGroup>
			
 
				+
			
 
				+You can find the list of models and deployment name on the [Azure OpenAI Platform](https://oai.azure.com/portal).
			
 
				 
			
 
				 ## Anthropic
			
 
				 
			
--- a/docs/components/vector-databases.mdx
+++ b/docs/components/vector-databases.mdx
@@ -119,11 +119,17 @@ Install related dependencies using the following command:
 
				 pip install --upgrade 'embedchain[milvus]'
			
 
				 ```
			
 
				 
			
 
				+Set the Zilliz environment variables `ZILLIZ_CLOUD_URI` and `ZILLIZ_CLOUD_TOKEN` which you can find it on their [cloud platform](https://cloud.zilliz.com/).
			
 
				+
			
 
				 <CodeGroup>
			
 
				 
			
 
				 ```python main.py
			
 
				+import os
			
 
				 from embedchain import App
			
 
				 
			
 
				+os.environ['ZILLIZ_CLOUD_URI'] = 'https://xxx.zillizcloud.com'
			
 
				+os.environ['ZILLIZ_CLOUD_TOKEN'] = 'xxx'
			
 
				+
			
 
				 # load zilliz configuration from yaml file
			
 
				 app = App.from_config(yaml_path="config.yaml")
			
 
				 ```
			
@@ -147,8 +153,16 @@ _Coming soon_
 
				 
			
 
				 ## Pinecone
			
 
				 
			
 
				+Install pinecone related dependencies using the following command:
			
 
				+
			
 
				+```bash
			
 
				+pip install --upgrade 'embedchain[pinecone]'
			
 
				+```
			
 
				+
			
 
				 In order to use Pinecone as vector database, set the environment variables `PINECONE_API_KEY` and `PINECONE_ENV` which you can find on [Pinecone dashboard](https://app.pinecone.io/).
			
 
				 
			
 
				+<CodeGroup>
			
 
				+
			
 
				 ```python main.py
			
 
				 from embedchain import App
			
 
				 
			
@@ -165,6 +179,8 @@ vectordb:
 
				     collection_name: my-pinecone-index
			
 
				 ```
			
 
				 
			
 
				+</CodeGroup>
			
 
				+
			
 
				 ## Qdrant
			
 
				 
			
 
				 _Coming soon_
			
--- a/embedchain/factory.py
+++ b/embedchain/factory.py
@@ -44,10 +44,12 @@ class EmbedderFactory:
 
				         "gpt4all": "embedchain.embedder.gpt4all.GPT4AllEmbedder",
			
 
				         "huggingface": "embedchain.embedder.huggingface.HuggingFaceEmbedder",
			
 
				         "vertexai": "embedchain.embedder.vertexai.VertexAIEmbedder",
			
 
				+        "azure_openai": "embedchain.embedder.openai.OpenAIEmbedder",
			
 
				         "openai": "embedchain.embedder.openai.OpenAIEmbedder",
			
 
				     }
			
 
				     provider_to_config_class = {
			
 
				         "openai": "embedchain.config.embedder.base.BaseEmbedderConfig",
			
 
				+        "azure_openai": "embedchain.config.embedder.base.BaseEmbedderConfig",
			
 
				     }
			
 
				 
			
 
				     @classmethod
			
--- a/notebooks/azure-openai.ipynb
+++ b/notebooks/azure-openai.ipynb
@@ -0,0 +1,182 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "63ab5e89",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Cookbook for using Azure OpenAI with Embedchain"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "e32a0265",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Step-1: Install embedchain package"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "b80ff15a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!pip install embedchain"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "ac982a56",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Step-2: Set Azure OpenAI related environment variables\n",
			
 
				+    "\n",
			
 
				+    "You can find these env variables on your Azure OpenAI dashboard."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e0a36133",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import os\n",
			
 
				+    "from embedchain import App\n",
			
 
				+    "\n",
			
 
				+    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
			
 
				+    "os.environ[\"OPENAI_API_BASE\"] = \"https://xxx.openai.azure.com/\"\n",
			
 
				+    "os.environ[\"OPENAI_API_KEY\"] = \"xxx\"\n",
			
 
				+    "os.environ[\"OPENAI_API_VERSION\"] = \"xxx\""
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "7d7b554e",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Step-3: Define your llm and embedding model config"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "b9f52fc5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "config = \"\"\"\n",
			
 
				+    "llm:\n",
			
 
				+    "  provider: azure_openai\n",
			
 
				+    "  model: gpt-35-turbo\n",
			
 
				+    "  config:\n",
			
 
				+    "    deployment_name: ec_openai_azure\n",
			
 
				+    "    temperature: 0.5\n",
			
 
				+    "    max_tokens: 1000\n",
			
 
				+    "    top_p: 1\n",
			
 
				+    "    stream: false\n",
			
 
				+    "\n",
			
 
				+    "embedder:\n",
			
 
				+    "  provider: azure_openai\n",
			
 
				+    "  config:\n",
			
 
				+    "    model: text-embedding-ada-002\n",
			
 
				+    "    deployment_name: ec_embeddings_ada_002\n",
			
 
				+    "\"\"\"\n",
			
 
				+    "\n",
			
 
				+    "# Write the multi-line string to a YAML file\n",
			
 
				+    "with open('azure_openai.yaml', 'w') as file:\n",
			
 
				+    "    file.write(config)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "98a11130",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Step-4 Create embedchain app based on the config"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "1ee9bdd9",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "app = App.from_config(yaml_path=\"azure_openai.yaml\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "554dc97b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Step-5: Add data sources to your app"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "686ae765",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "app.add(\"https://www.forbes.com/profile/elon-musk\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "ccc7d421",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Step-6: All set. Now start asking questions related to your data"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "27868a7d",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "while(True):\n",
			
 
				+    "    question = input(\"Enter question: \")\n",
			
 
				+    "    if question in ['q', 'exit', 'quit']\n",
			
 
				+    "        break\n",
			
 
				+    "    answer = app.query(question)\n",
			
 
				+    "    print(answer)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "e1f2ead5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.4"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}