|
@@ -6,6 +6,7 @@ import re
|
|
|
import requests
|
|
|
|
|
|
from embedchain.loaders.base_loader import BaseLoader
|
|
|
+from embedchain.utils import clean_string
|
|
|
|
|
|
VALID_URL_PATTERN = "^https:\/\/[0-9A-z.]+.[0-9A-z.]+.[a-z]+\/.*\.json$"
|
|
|
|
|
@@ -49,7 +50,7 @@ class JSONLoader(BaseLoader):
|
|
|
|
|
|
docs = loader.load_data(json_data)
|
|
|
for doc in docs:
|
|
|
- doc_content = doc.text
|
|
|
+ doc_content = clean_string(doc.text)
|
|
|
data.append({"content": doc_content, "meta_data": {"url": content}})
|
|
|
data_content.append(doc_content)
|
|
|
doc_id = hashlib.sha256((content + ", ".join(data_content)).encode()).hexdigest()
|