utils.py 312 B

12345678910
  1. import re
  2. def clean_string(text):
  3. text = text.replace('\n', ' ')
  4. cleaned_text = re.sub(r'\s+', ' ', text.strip())
  5. cleaned_text = cleaned_text.replace('\\', '')
  6. cleaned_text = cleaned_text.replace('#', ' ')
  7. cleaned_text = re.sub(r'([^\w\s])\1*', r'\1', cleaned_text)
  8. return cleaned_text