parse_index_table.py 1.1 KB

1234567891011121314151617181920212223242526
  1. from typing import List
  2. import pandas as pd
  3. # from celery_tasks import app
  4. # @app.task
  5. def parse_index_table(tables_list: List[dict]):
  6. for table_dict in tables_list:
  7. if (table_dict.get('table_name') == '索引表') or ('索引表' in table_dict.get('table')[0][0]):
  8. # return True
  9. table = table_dict.get('table')
  10. df = pd.DataFrame(table[2:], columns=table[1])
  11. print(df[['条款号', '评分因素', '页码']])
  12. # return False
  13. if __name__ == '__main__':
  14. import json
  15. from glob import glob
  16. for file in glob('D:\\desktop\\三峡水利\\data\\projects\\*\\投标\\*\\*table.json'):
  17. # with open('D:\\desktop\\三峡水利\\data\\projects\\三峡左岸及地下电站地坪整治\\投标\\湖北建新建设工程有限公司_T221100130348%2F01整本文件\\投标文件-修改版9-5-1-1-table.json', 'r', encoding='utf-8') as fp:
  18. print(file)
  19. with open(file, 'r', encoding='utf-8') as fp:
  20. tables_list = json.load(fp)
  21. print(parse_index_table(tables_list=tables_list))