Browse Source

Package improvements (#989)

Co-authored-by: Deven Patel <deven298@yahoo.com>
Deven Patel 1 year ago
parent
commit
0f4f220119
2 changed files with 14 additions and 13 deletions
  1. 13 2
      docs/get-started/quickstart.mdx
  2. 1 11
      embedchain/embedchain.py

+ 13 - 2
docs/get-started/quickstart.mdx

@@ -26,10 +26,21 @@ Creating an app involves 3 steps:
     <Accordion title="Customize your app by a simple YAML config" icon="gear-complex">
       Embedchain provides a wide range of options to customize your app. You can customize the model, data sources, and much more.
       Explore the custom configurations [here](https://docs.embedchain.ai/advanced/configuration).
-      ```python
+      <CodeGroup>
+      ```python yaml_app.py
+      from embedchain import Pipeline as App
+      app = App.from_config(config_path="config.yaml")
+      ```
+      ```python json_app.py
+      from embedchain import Pipeline as App
+      app = App.from_config(config_path="config.json")
+      ```
+      ```python app.py
       from embedchain import Pipeline as App
-      app = App(yaml_config="config.yaml")
+      config = {} # Add your config here
+      app = App.from_config(config=config)
       ```
+      </CodeGroup>
     </Accordion>
   </Step>
   <Step title="🗃️ Add data sources">

+ 1 - 11
embedchain/embedchain.py

@@ -190,16 +190,6 @@ class EmbedChain(JSONSerializable):
         hash_object = hashlib.md5(str(source).encode("utf-8"))
         source_hash = hash_object.hexdigest()
 
-        # Check if the data hash already exists, if so, skip the addition
-        self.cursor.execute(
-            "SELECT 1 FROM data_sources WHERE hash = ? AND pipeline_id = ?", (source_hash, self.config.id)
-        )
-        existing_data = self.cursor.fetchone()
-
-        if existing_data:
-            print(f"Data with hash {source_hash} already exists. Skipping addition.")
-            return source_hash
-
         self.user_asks.append([source, data_type.value, metadata])
 
         data_formatter = DataFormatter(data_type, config, kwargs)
@@ -212,7 +202,7 @@ class EmbedChain(JSONSerializable):
         # Insert the data into the 'data' table
         self.cursor.execute(
             """
-            INSERT INTO data_sources (hash, pipeline_id, type, value, metadata)
+            INSERT OR REPLACE INTO data_sources (hash, pipeline_id, type, value, metadata)
             VALUES (?, ?, ?, ?, ?)
         """,
             (source_hash, self.config.id, data_type.value, str(source), json.dumps(metadata)),