1 year ago · 7c6b88c7c5
--- a/docs/data-sources/gmail.mdx
+++ b/docs/data-sources/gmail.mdx
@@ -24,12 +24,11 @@ To use this you need to save `credentials.json` in the directory from where you
 
				 12. Put the `.json` file in your current directory and rename it to `credentials.json`
			
 
				 
			
 
				 ```python
			
 
				-import os
			
 
				-from embedchain.apps.app import App
			
 
				-from embedchain.models.data_type import DataType
			
 
				+from embedchain import Pipeline as App
			
 
				+
			
 
				 app = App()
			
 
				 
			
 
				-query = "to: me label:inbox"
			
 
				-app.add(query, data_type=DataType.GMAIL)
			
 
				+gmail_filter = "to: me label:inbox"
			
 
				+app.add(gmail_filter, data_type="gmail")
			
 
				 app.query("Summarize my email conversations")
			
 
				 ```
			
--- a/docs/data-sources/json.mdx
+++ b/docs/data-sources/json.mdx
@@ -2,52 +2,43 @@
 
				 title: '📃 JSON'
			
 
				 ---
			
 
				 
			
 
				-To add any json file, use the data_type as `json`. Headers are included for each line, so if you have an `age` column, `18` will be added as `age: 18`. Eg:
			
 
				+To add any json file, use the data_type as `json`. Headers are included for each line, so for example if you have a json like `{"age": 18}`, then it will be added as `age: 18`.
			
 
				 
			
 
				 Here are the supported sources for loading `json`:
			
 
				+
			
 
				 ```
			
 
				 1. URL - valid url to json file that ends with ".json" extension.
			
 
				 2. Local file - valid url to local json file that ends with ".json" extension.
			
 
				 3. String - valid json string (e.g. - app.add('{"foo": "bar"}'))
			
 
				 ```
			
 
				 
			
 
				-If you would like to add other data structures (e.x. list, dict etc.), do:
			
 
				-```python
			
 
				-    import json
			
 
				-    a = {"foo": "bar"}
			
 
				-    valid_json_string_data = json.dumps(a, indent=0)
			
 
				+<Tip>
			
 
				+If you would like to add other data structures (e.g. list, dict etc.), convert it to a valid json first using `json.dumps()` function.
			
 
				+</Tip>
			
 
				 
			
 
				-    b = [{"foo": "bar"}]
			
 
				-    valid_json_string_data = json.dumps(b, indent=0)
			
 
				-```
			
 
				-Example:
			
 
				-```python
			
 
				-import os
			
 
				+## Example
			
 
				 
			
 
				-from embedchain.apps.app import App
			
 
				+<CodeGroup>
			
 
				 
			
 
				-os.environ["OPENAI_API_KEY"] = "openai_api_key"
			
 
				+```python python
			
 
				+from embedchain import Pipeline as App
			
 
				 
			
 
				 app = App()
			
 
				 
			
 
				-response = app.query("What is the net worth of Elon Musk as of October 2023?")
			
 
				-
			
 
				-print(response)
			
 
				-"I'm sorry, but I don't have access to real-time information or future predictions. Therefore, I don't know the net worth of Elon Musk as of October 2023."
			
 
				+# Add json file
			
 
				+app.add("temp.json")
			
 
				 
			
 
				-source_id = app.add("temp.json")
			
 
				+app.query("What is the net worth of Elon Musk as of October 2023?")
			
 
				+# As of October 2023, Elon Musk's net worth is $255.2 billion.
			
 
				+```
			
 
				 
			
 
				-response = app.query("What is the net worth of Elon Musk as of October 2023?")
			
 
				 
			
 
				-print(response)
			
 
				-"As of October 2023, Elon Musk's net worth is $255.2 billion."
			
 
				-```
			
 
				-temp.json
			
 
				-```json
			
 
				+```json temp.json
			
 
				 {
			
 
				     "question": "What is your net worth, Elon Musk?",
			
 
				     "answer": "As of October 2023, Elon Musk's net worth is $255.2 billion, making him one of the wealthiest individuals in the world."
			
 
				 }
			
 
				 ```
			
 
				+</CodeGroup>
			
 
				 
			
 
				 
			
--- a/docs/data-sources/openapi.mdx
+++ b/docs/data-sources/openapi.mdx
@@ -2,13 +2,10 @@
 
				 title: 🙌 OpenAPI
			
 
				 ---
			
 
				 
			
 
				-To add any OpenAPI spec yaml file (currently the json file will be detected as JSON data type), use the data_type as 'openapi'. 'openapi' allows remote urls and conventional file paths. Headers are included for each line, so if you have an `age` column, `18` will be added as `age: 18`. Eg:
			
 
				+To add any OpenAPI spec yaml file (currently the json file will be detected as JSON data type), use the data_type as 'openapi'. 'openapi' allows remote urls and conventional file paths.
			
 
				 
			
 
				 ```python
			
 
				-from embedchain.apps.app import App
			
 
				-import os
			
 
				-
			
 
				-os.environ["OPENAI_API_KEY"] = "sk-xxx"
			
 
				+from embedchain import Pipeline as App
			
 
				 
			
 
				 app = App()
			
 
				 
			
@@ -16,8 +13,10 @@ app.add("https://github.com/openai/openai-openapi/blob/master/openapi.yaml", dat
 
				 # Or add using the local file path
			
 
				 # app.add("configs/openai_openapi.yaml", data_type="openapi")
			
 
				 
			
 
				-response = app.query("What can OpenAI API endpoint do? Can you list the things it can learn from?")
			
 
				+app.query("What can OpenAI API endpoint do? Can you list the things it can learn from?")
			
 
				 # Answer: The OpenAI API endpoint allows users to interact with OpenAI's models and perform various tasks such as generating text, answering questions, summarizing documents, translating languages, and more. The specific capabilities and tasks that the API can learn from may vary depending on the models and features provided by OpenAI. For more detailed information, it is recommended to refer to the OpenAI API documentation at https://platform.openai.com/docs/api-reference.
			
 
				 ```
			
 
				 
			
 
				-NOTE: The yaml file added to the App must have the required OpenAPI fields otherwise the adding OpenAPI spec will fail. Please refer to [OpenAPI Spec Doc](https://spec.openapis.org/oas/v3.1.0)
			
 
				+<Note>
			
 
				+The yaml file added to the App must have the required OpenAPI fields otherwise the adding OpenAPI spec will fail. Please refer to [OpenAPI Spec Doc](https://spec.openapis.org/oas/v3.1.0)
			
 
				+</Note>
			
--- a/docs/get-started/openai-assistant.mdx
+++ b/docs/get-started/openai-assistant.mdx
@@ -8,7 +8,7 @@ Embedchain now supports [OpenAI Assistants API](https://platform.openai.com/docs
 
				 
			
 
				 At a high level, an integration of the Assistants API has the following flow:
			
 
				 
			
 
				-1. Create an Assistant in the API by defining it custom instructions and picking a model
			
 
				+1. Create an Assistant in the API by defining custom instructions and picking a model
			
 
				 2. Create a Thread when a user starts a conversation
			
 
				 3. Add Messages to the Thread as the user ask questions
			
 
				 4. Run the Assistant on the Thread to trigger responses. This automatically calls the relevant tools.
			
@@ -19,7 +19,7 @@ Creating an OpenAI Assistant using Embedchain is very simple 3 step process.
 
				 
			
 
				 Make sure that you have `OPENAI_API_KEY` set in the environment variable.
			
 
				 
			
 
				-```python
			
 
				+```python Initialize
			
 
				 from embedchain.store.assistants import OpenAIAssistant
			
 
				 
			
 
				 assistant = OpenAIAssistant(
			
@@ -28,10 +28,28 @@ assistant = OpenAIAssistant(
 
				 )
			
 
				 ```
			
 
				 
			
 
				+If you want to use the existing assistant, you can do something like this:
			
 
				+
			
 
				+```python Initialize
			
 
				+# Load an assistant and create a new thread
			
 
				+assistant = OpenAIAssistant(assistant_id="asst_xxx")
			
 
				+
			
 
				+# Load a specific thread for an assistant
			
 
				+assistant = OpenAIAssistant(assistant_id="asst_xxx", thread_id="thread_xxx")
			
 
				+```
			
 
				+
			
 
				 ### Arguments
			
 
				 
			
 
				-<ResponseField name="assistant_id" type="string" required>
			
 
				-  Load existing OpenAI Assistant. If you pass this, you don't have to pass other arguments
			
 
				+<ResponseField name="name" type="string">
			
 
				+  Name for your AI assistant
			
 
				+</ResponseField>
			
 
				+
			
 
				+<ResponseField name="instructions" type="string">
			
 
				+  how the Assistant and model should behave or respond
			
 
				+</ResponseField>
			
 
				+
			
 
				+<ResponseField name="assistant_id" type="string">
			
 
				+  Load existing OpenAI Assistant. If you pass this, you don't have to pass other arguments.
			
 
				 </ResponseField>
			
 
				 
			
 
				 <ResponseField name="thread_id" type="string">
			
@@ -53,14 +71,14 @@ assistant = OpenAIAssistant(
 
				 ## Step-2: Add data to thread
			
 
				 
			
 
				 You can add any custom data source that is supported by Embedchain. Else, you can directly pass the file path on your local system and Embedchain propagates it to OpenAI Assistant.
			
 
				-```python
			
 
				+```python Add data
			
 
				 assistant.add("/path/to/file.pdf")
			
 
				-assistant.add("https://www.youtube.com/watch?v=U9mJuUkhUzk", data_type="youtube_video")
			
 
				+assistant.add("https://www.youtube.com/watch?v=U9mJuUkhUzk")
			
 
				 assistant.add("https://openai.com/blog/new-models-and-developer-products-announced-at-devday")
			
 
				 ```
			
 
				 
			
 
				 ## Step-3: Chat with your Assistant
			
 
				-```python
			
 
				+```python Chat
			
 
				 assistant.chat("How much OpenAI credits were offered to attendees during OpenAI DevDay?")
			
 
				 # Response: 'Every attendee of OpenAI DevDay 2023 was offered $500 in OpenAI credits.'
			
 
				 ```
			
--- a/embedchain/store/assistants.py
+++ b/embedchain/store/assistants.py
@@ -1,5 +1,6 @@
 
				 import logging
			
 
				 import os
			
 
				+import re
			
 
				 import tempfile
			
 
				 import time
			
 
				 from pathlib import Path
			
@@ -70,9 +71,9 @@ class OpenAIAssistant:
 
				         if Path(source).is_file():
			
 
				             return source
			
 
				         data_type = data_type or detect_datatype(source)
			
 
				-        formatter = DataFormatter(data_type=DataType(data_type), config=AddConfig())
			
 
				+        formatter = DataFormatter(data_type=DataType(data_type), config=AddConfig(), kwargs={})
			
 
				         data = formatter.loader.load_data(source)["data"]
			
 
				-        return self._save_temp_data(data[0]["content"].encode())
			
 
				+        return self._save_temp_data(data=data[0]["content"].encode(), source=source)
			
 
				 
			
 
				     def _add_file_to_assistant(self, file_path):
			
 
				         file_obj = self._client.files.create(file=open(file_path, "rb"), purpose="assistants")
			
@@ -117,9 +118,11 @@ class OpenAIAssistant:
 
				         content = [c.text.value for c in thread_message.content if isinstance(c, MessageContentText)]
			
 
				         return " ".join(content)
			
 
				 
			
 
				-    def _save_temp_data(self, data):
			
 
				+    def _save_temp_data(self, data, source):
			
 
				+        special_chars_pattern = r'[\\/:*?"<>|&=% ]+'
			
 
				+        sanitized_source = re.sub(special_chars_pattern, "_", source)[:256]
			
 
				         temp_dir = tempfile.mkdtemp()
			
 
				-        file_path = os.path.join(temp_dir, "temp_data")
			
 
				+        file_path = os.path.join(temp_dir, sanitized_source)
			
 
				         with open(file_path, "wb") as file:
			
 
				             file.write(data)
			
 
				         return file_path
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 
				 [tool.poetry]
			
 
				 name = "embedchain"
			
 
				-version = "0.1.0"
			
 
				+version = "0.1.1"
			
 
				 description = "Data platform for LLMs - Load, index, retrieve and sync any unstructured data"
			
 
				 authors = [
			
 
				     "Taranjeet Singh <taranjeet@embedchain.ai>",