Explorar el Código

Support for Cohere Embeddings (#1310)

Dev Khant hace 1 año
padre
commit
f77f5b996e

+ 85 - 0
docs/components/embedding-models.mdx

@@ -14,6 +14,7 @@ Embedchain supports several embedding models from the following providers:
   <Card title="Hugging Face" href="#hugging-face"></Card>
   <Card title="Vertex AI" href="#vertex-ai"></Card>
   <Card title="NVIDIA AI" href="#nvidia-ai"></Card>
+  <Card title="Cohere" href="#cohere"></Card>
 </CardGroup>
 
 ## OpenAI
@@ -273,3 +274,87 @@ answer = app.query("What is the net worth of Elon Musk today?")
 # Additionally, his net worth may include other assets such as real estate and art, which are not reflected in his stock portfolio.
 ```
 </CodeGroup>
+
+
+## Cohere
+
+To use embedding models and LLMs from COHERE, create an account on [COHERE](https://dashboard.cohere.com/welcome/login?redirect_uri=%2Fapi-keys).
+
+Generate an API key from their dashboard. Set the API key as `COHERE_API_KEY` environment variable.
+
+Once you have obtained the key, you can use it like this:
+
+<CodeGroup>
+
+```python main.py
+import os
+from embedchain import App
+
+os.environ['COHERE_API_KEY'] = 'xxx'
+
+# load embedding model configuration from config.yaml file
+app = App.from_config(config_path="config.yaml")
+```
+
+```yaml config.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-english-light-v3.0'
+```
+
+</CodeGroup>
+
+* Cohere has few embedding models: `embed-english-v3.0`, `embed-multilingual-v3.0`, `embed-multilingual-light-v3.0`, `embed-english-v2.0`, `embed-english-light-v2.0` and `embed-multilingual-v2.0`. Embedchain supports all these models. Below you can find YAML config for all:
+
+<CodeGroup>
+
+```yaml embed-english-v3.0.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-english-v3.0'
+    vector_dimension: 1024
+```
+
+```yaml embed-multilingual-v3.0.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-multilingual-v3.0'
+    vector_dimension: 1024
+```
+
+```yaml embed-multilingual-light-v3.0.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-multilingual-light-v3.0'
+    vector_dimension: 384
+```
+
+```yaml embed-english-v2.0.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-english-v2.0'
+    vector_dimension: 4096
+```
+
+```yaml embed-english-light-v2.0.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-english-light-v2.0'
+    vector_dimension: 1024
+```
+
+```yaml embed-multilingual-v2.0.yaml
+embedder:
+  provider: cohere
+  config:
+    model: 'embed-multilingual-v2.0'
+    vector_dimension: 768
+```
+
+</CodeGroup>

+ 19 - 0
embedchain/embedder/cohere.py

@@ -0,0 +1,19 @@
+from typing import Optional
+
+from langchain_community.embeddings import CohereEmbeddings
+
+from embedchain.config import BaseEmbedderConfig
+from embedchain.embedder.base import BaseEmbedder
+from embedchain.models import VectorDimensions
+
+
+class CohereEmbedder(BaseEmbedder):
+    def __init__(self, config: Optional[BaseEmbedderConfig] = None):
+        super().__init__(config=config)
+
+        embeddings = CohereEmbeddings(model=self.config.model)
+        embedding_fn = BaseEmbedder._langchain_default_concept(embeddings)
+        self.set_embedding_fn(embedding_fn=embedding_fn)
+
+        vector_dimension = self.config.vector_dimension or VectorDimensions.COHERE.value
+        self.set_vector_dimension(vector_dimension=vector_dimension)

+ 1 - 0
embedchain/factory.py

@@ -56,6 +56,7 @@ class EmbedderFactory:
         "google": "embedchain.embedder.google.GoogleAIEmbedder",
         "mistralai": "embedchain.embedder.mistralai.MistralAIEmbedder",
         "nvidia": "embedchain.embedder.nvidia.NvidiaEmbedder",
+        "cohere": "embedchain.embedder.cohere.CohereEmbedder",
     }
     provider_to_config_class = {
         "azure_openai": "embedchain.config.embedder.base.BaseEmbedderConfig",

+ 1 - 0
embedchain/models/vector_dimensions.py

@@ -10,3 +10,4 @@ class VectorDimensions(Enum):
     GOOGLE_AI = 768
     MISTRAL_AI = 1024
     NVIDIA_AI = 1024
+    COHERE = 384