.timertask.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. #!/usr/local/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Last Modified time: 2022-02-24 09:43:13
  4. #
  5. # 爬虫批量定时任务
  6. import os
  7. import json
  8. import time
  9. import datetime
  10. import logging
  11. import requests
  12. from requests.adapters import HTTPAdapter
  13. from apscheduler.schedulers.blocking import BlockingScheduler
  14. from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
  15. logging.basicConfig(level=logging.INFO,
  16. filename='timertask.log',
  17. format='%(asctime)s:%(levelname)s:%(message)s'
  18. )
  19. sched = BlockingScheduler(timezone="Asia/Shanghai")
  20. spiderlist = ['bjx','cecn','ceec','ceeia','chinapower','chinapv','chng','cnen','cnnpn','cny','cpnn','csg','ctg','cweea','eptc','escn','ewindpower','gxepa','iesplaza','nengyuanjie','newenergy','piec','ppcc','powerchina','solarbe','solarenpv','sungrow','twea','xhhydropower','zzsolar']
  21. # 从后端获取爬虫列表
  22. def get_spiders():
  23. # 后端 ip
  24. ip = os.environ.get("Back_End_Ip", "192.168.1.203")
  25. # 后端 port
  26. port = os.environ.get("Back_End_Port", 11031)
  27. # 请求后端数据库
  28. url = 'http://{}:{}/resource/judge'.format(ip,port)
  29. session = requests.Session()
  30. session.mount('http://', HTTPAdapter(max_retries = 3))
  31. try:
  32. response = session.get(url, timeout=10)
  33. # 返回运行列表
  34. return json.loads(response.text)['data']['running']
  35. except requests.exceptions.RequestException as e:
  36. print(e)
  37. # 运行任务
  38. @sched.scheduled_job('cron', hour=1, next_run_time=datetime.datetime.now())
  39. def spiders_job():
  40. # 获取运行列表
  41. spiders = get_spiders()
  42. # 执行任务
  43. for spider in spiders:
  44. if spider in spiderlist:
  45. data = {'project': os.environ.get("ProjectName", ""),'spider':spider,'jobid':datetime.datetime.now().strftime("%Y-%m-%dT%H_%M_%S")}
  46. response = requests.post(url='http://localhost:6800/schedule.json', data=data)
  47. logging.info(response.text)
  48. time.sleep(2)
  49. sched.start()