mansched.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/local/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Last Modified time: 2022-02-24 09:43:13
  4. #
  5. # 爬虫批量定时任务
  6. import os
  7. import json
  8. import time
  9. import datetime
  10. import logging
  11. import requests
  12. import subprocess
  13. from requests.adapters import HTTPAdapter
  14. from apscheduler.schedulers.blocking import BlockingScheduler
  15. from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
  16. logging.basicConfig(level=logging.INFO,
  17. filename='test.log',
  18. format='%(asctime)s:%(levelname)s:%(message)s'
  19. )
  20. sched = BlockingScheduler()
  21. spiderlist = ['bjx','cecn','ceec','ceeia','chinapower','chinapv','chng','cnen','cnnpn','cny','cpnn','csg','ctg','cweea','eptc','escn','ewindpower','gxepa','iesplaza','nengyuanjie','newenergy','piec','ppcc','powerchina','solarbe','solarenpv','sungrow','twea','xhhydropower','zzsolar']
  22. # 从后端获取爬虫列表
  23. def get_spiders():
  24. # 后端 ip
  25. ip = os.environ.get("Back_End_Ip", "192.168.1.203")
  26. # 后端 port
  27. port = os.environ.get("Back_End_Port", 11031)
  28. # 请求后端数据库
  29. url = 'http://{}:{}/resource/judge'.format(ip,port)
  30. session = requests.Session()
  31. session.mount('http://', HTTPAdapter(max_retries = 3))
  32. try:
  33. response = session.get(url, timeout=10)
  34. # 返回运行列表
  35. return json.loads(response.text)['data']['running']
  36. except requests.exceptions.RequestException as e:
  37. print(e)
  38. return []
  39. # 运行任务
  40. @sched.scheduled_job('cron', hour=1, next_run_time=datetime.datetime.now())
  41. def spiders_job():
  42. # 获取运行列表
  43. spiders = get_spiders()
  44. print(spiders)
  45. # 执行任务
  46. with open("/workspace/sched.sh", "w", encoding="utf-8") as ff:
  47. ff.write("#!/bin/bash\n")
  48. ff.write("cd /workspace/electric\n")
  49. for spider in spiders:
  50. if spider in spiderlist:
  51. cmd = 'nohup scrapy crawl {spider} >> /workspace/scrapycrawl.log 2>&1 &\n'.format(spider=spider)
  52. with open("/workspace/sched.sh", "a", encoding="utf-8") as ff:
  53. ff.write(cmd)
  54. with open("/workspace/sched.sh", "r", encoding="utf-8") as ff:
  55. logging.info(ff.read())
  56. os.system("chmod +x /workspace/sched.sh")
  57. p = subprocess.Popen("/workspace/sched.sh", shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
  58. stdout, stderr = p.communicate()
  59. if stderr:
  60. raise subprocess.SubprocessError(stderr)
  61. sched.start()