123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- #!/usr/local/bin/python
- # -*- coding: utf-8 -*-
- # @Last Modified time: 2022-02-24 09:43:13
- #
- # 爬虫批量定时任务
- import os
- import json
- import time
- import datetime
- import logging
- import requests
- import subprocess
- from requests.adapters import HTTPAdapter
- from apscheduler.schedulers.blocking import BlockingScheduler
- from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
- logging.basicConfig(level=logging.INFO,
- filename='test.log',
- format='%(asctime)s:%(levelname)s:%(message)s'
- )
- sched = BlockingScheduler()
- spiderlist = ['bjx','cecn','ceec','ceeia','chinapower','chinapv','chng','cnen','cnnpn','cny','cpnn','csg','ctg','cweea','eptc','escn','ewindpower','gxepa','iesplaza','nengyuanjie','newenergy','piec','ppcc','powerchina','solarbe','solarenpv','sungrow','twea','xhhydropower','zzsolar']
- # 从后端获取爬虫列表
- def get_spiders():
- # 后端 ip
- ip = os.environ.get("Back_End_Ip", "192.168.1.203")
- # 后端 port
- port = os.environ.get("Back_End_Port", 11031)
- # 请求后端数据库
- url = 'http://{}:{}/resource/judge'.format(ip,port)
- session = requests.Session()
- session.mount('http://', HTTPAdapter(max_retries = 3))
- try:
- response = session.get(url, timeout=10)
- # 返回运行列表
- return json.loads(response.text)['data']['running']
- except requests.exceptions.RequestException as e:
- print(e)
- return []
- # 运行任务
- @sched.scheduled_job('cron', hour=1, next_run_time=datetime.datetime.now())
- def spiders_job():
- # 获取运行列表
- spiders = get_spiders()
- print(spiders)
- # 执行任务
- with open("/workspace/sched.sh", "w", encoding="utf-8") as ff:
- ff.write("#!/bin/bash\n")
- ff.write("cd /workspace/electric\n")
- for spider in spiders:
- if spider in spiderlist:
- cmd = 'nohup scrapy crawl {spider} >> /workspace/scrapycrawl.log 2>&1 &\n'.format(spider=spider)
- with open("/workspace/sched.sh", "a", encoding="utf-8") as ff:
- ff.write(cmd)
- with open("/workspace/sched.sh", "r", encoding="utf-8") as ff:
- logging.info(ff.read())
- os.system("chmod +x /workspace/sched.sh")
- p = subprocess.Popen("/workspace/sched.sh", shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- stdout, stderr = p.communicate()
- if stderr:
- raise subprocess.SubprocessError(stderr)
- sched.start()
|