|
@@ -52,9 +52,10 @@ class WebPageLoader:
|
|
|
content = clean_string(content)
|
|
|
|
|
|
cleaned_size = len(content)
|
|
|
- logging.info(
|
|
|
- f"[{url}] Cleaned page size: {cleaned_size} characters, down from {original_size} (shrunk: {original_size-cleaned_size} chars, {round((1-(cleaned_size/original_size)) * 100, 2)}%)" # noqa:E501
|
|
|
- )
|
|
|
+ if original_size != 0:
|
|
|
+ logging.info(
|
|
|
+ f"[{url}] Cleaned page size: {cleaned_size} characters, down from {original_size} (shrunk: {original_size-cleaned_size} chars, {round((1-(cleaned_size/original_size)) * 100, 2)}%)" # noqa:E501
|
|
|
+ )
|
|
|
|
|
|
meta_data = {
|
|
|
"url": url,
|