utils.ts 1.1 KB

1234567891011121314151617181920212223242526
  1. /**
  2. * This function takes in a string and performs a series of text cleaning operations.
  3. * @param {str} text: The text to be cleaned. This is expected to be a string.
  4. * @returns {str}: The cleaned text after all the cleaning operations have been performed.
  5. */
  6. export function cleanString(text: string): string {
  7. // Replacement of newline characters:
  8. let cleanedText = text.replace(/\n/g, ' ');
  9. // Stripping and reducing multiple spaces to single:
  10. cleanedText = cleanedText.trim().replace(/\s+/g, ' ');
  11. // Removing backslashes:
  12. cleanedText = cleanedText.replace(/\\/g, '');
  13. // Replacing hash characters:
  14. cleanedText = cleanedText.replace(/#/g, ' ');
  15. // Eliminating consecutive non-alphanumeric characters:
  16. // This regex identifies consecutive non-alphanumeric characters (i.e., not a word character [a-zA-Z0-9_] and not a whitespace) in the string
  17. // and replaces each group of such characters with a single occurrence of that character.
  18. // For example, "!!! hello !!!" would become "! hello !".
  19. cleanedText = cleanedText.replace(/([^\w\s])\1*/g, '$1');
  20. return cleanedText;
  21. }