Przeglądaj źródła

chore: removed markdown to plaintext function

cachho 2 lat temu
rodzic
commit
e60f1680a0
2 zmienionych plików z 0 dodań i 45 usunięć
  1. 0 4
      embedchain/loaders_local/qna_pair.py
  2. 0 41
      embedchain/utils.py

+ 0 - 4
embedchain/loaders_local/qna_pair.py

@@ -1,11 +1,7 @@
-from embedchain.utils import markdown_to_plaintext
-
-
 class QnaPairLoader:
 
     def load_data(self, content):
         question, answer = content
-        answer = markdown_to_plaintext(answer)
         content = f"Q: {question}\nA: {answer}"
         meta_data = {
             "url": "local",

+ 0 - 41
embedchain/utils.py

@@ -8,44 +8,3 @@ def clean_string(text):
     cleaned_text = cleaned_text.replace('#', ' ')
     cleaned_text = re.sub(r'([^\w\s])\1*', r'\1', cleaned_text)
     return cleaned_text
-
-def markdown_to_plaintext(markdown_string):
-    # Lines surrounded by empty lines are considered paragraph text
-    markdown_string = markdown_string.strip().replace("\n\n", "\n")
-
-    # Headers
-    markdown_string = markdown_string.replace("# ", "")
-    markdown_string = markdown_string.replace("## ", "")
-    markdown_string = markdown_string.replace("### ", "")
-
-    # Bold text
-    markdown_string = markdown_string.replace("**", "")
-    markdown_string = markdown_string.replace("__", "")
-
-    # Italicized text
-    markdown_string = markdown_string.replace("*", "")
-    markdown_string = markdown_string.replace("_", "")
-
-    # Ordered lists
-    markdown_string = markdown_string.replace("1. ", "")
-    markdown_string = markdown_string.replace("2. ", "")
-    markdown_string = markdown_string.replace("3. ", "")
-    # And so on for other numbers
-
-    # Unordered lists
-    markdown_string = markdown_string.replace("- ", "")
-    markdown_string = markdown_string.replace("* ", "")
-    markdown_string = markdown_string.replace("+ ", "")
-
-    # Links and images
-    while ("[" in markdown_string and "]" in markdown_string and 
-           "(" in markdown_string and ")" in markdown_string):
-        start_link = markdown_string.find("[")
-        end_link = markdown_string.find("]")
-        start_paren = markdown_string.find("(")
-        end_paren = markdown_string.find(")")
-
-        if start_link < start_paren and end_link < end_paren:
-            markdown_string = markdown_string[:start_link] + markdown_string[start_paren+1:end_paren] + markdown_string[end_paren+1:]
-
-    return markdown_string