|
|
|
from db import DbAction
|
|
|
|
from req import ReqAction
|
|
|
|
from time import sleep
|
|
|
|
import datetime
|
|
|
|
import random
|
|
|
|
from data_dict import exclude_list
|
|
|
|
|
|
|
|
# config
|
|
|
|
# get_end_date = 'today'
|
|
|
|
get_end_date = '2023-12-01'
|
|
|
|
get_start_date = 'today'
|
|
|
|
|
|
|
|
|
|
|
|
# get_start_date = '2023-09-30'
|
|
|
|
|
|
|
|
|
|
|
|
# 整理链接格式
|
|
|
|
def format_link(link, code):
|
|
|
|
if '?pwd=' in link:
|
|
|
|
_link = link
|
|
|
|
link = _link[:-9]
|
|
|
|
code = _link[-4:]
|
|
|
|
if 'init?surl=' in link:
|
|
|
|
link = 'https://pan.baidu.com/s/1' + link[38:60]
|
|
|
|
return link, code
|
|
|
|
|
|
|
|
|
|
|
|
# 封装通用方法
|
|
|
|
def req(_data, req_obj, db_obj):
|
|
|
|
base_temp_path = r'/Temp/'
|
|
|
|
date_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d')
|
|
|
|
save_path = [base_temp_path, _data['cate'] + '_' + date_str + r'/', str(_data['id'])]
|
|
|
|
print('-' * 30 + str(_data['id']) + '-' * 30)
|
|
|
|
print(save_path)
|
|
|
|
link, code = format_link(_data['save_link'], _data['code'])
|
|
|
|
print(_data['save_link'], ' -> ', link, ' ', _data['code'], ' -> ', code)
|
|
|
|
result, name = req_obj.process(save_path, link, code)
|
|
|
|
if result:
|
|
|
|
db_obj.update_file_name(_data['id'], name)
|
|
|
|
print(f'{_data["id"]}保存成功' + '\n' * 2)
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
print(f'{_data["id"]}保存失败,请检查!' + '\n' * 2)
|
|
|
|
return _data
|
|
|
|
|
|
|
|
|
|
|
|
# 过滤数据
|
|
|
|
def data_filter(db_obj, _data_list):
|
|
|
|
for index, data in enumerate(_data_list):
|
|
|
|
for exclude in exclude_list:
|
|
|
|
if exclude in data['name']:
|
|
|
|
_data_list.pop(index)
|
|
|
|
print('跳过数据:', data['id'], data['name'])
|
|
|
|
db_obj.set_skip(data['id'])
|
|
|
|
break
|
|
|
|
return _data_list
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
db_obj = DbAction(get_end_date, get_start_date)
|
|
|
|
req_obj = ReqAction()
|
|
|
|
try:
|
|
|
|
# 处理正常任务
|
|
|
|
data_list = db_obj.get_db_data()
|
|
|
|
data_list = data_filter(db_obj, data_list)
|
|
|
|
failed_list = []
|
|
|
|
req_obj.prepare()
|
|
|
|
for _data in data_list:
|
|
|
|
result = req(_data, req_obj, db_obj)
|
|
|
|
if isinstance(result, dict):
|
|
|
|
failed_list.append(result)
|
|
|
|
sleep(random.randint(0, 3) + random.random())
|
|
|
|
i = 3
|
|
|
|
# 重试失败任务
|
|
|
|
while len(failed_list) > 0 and i > 0:
|
|
|
|
_temp_list = []
|
|
|
|
for _data in failed_list:
|
|
|
|
result = req(_data, req_obj, db_obj)
|
|
|
|
if isinstance(result, dict):
|
|
|
|
_temp_list.append(result)
|
|
|
|
sleep(random.randint(0, 3) + random.random())
|
|
|
|
failed_list = _temp_list
|
|
|
|
i -= 1
|
|
|
|
# 标记失败任务
|
|
|
|
if len(failed_list):
|
|
|
|
failed_id = []
|
|
|
|
for _data in failed_list:
|
|
|
|
failed_id.append(_data['id'])
|
|
|
|
db_obj.mark_failed(_data['id'])
|
|
|
|
print('重试后依然失败:')
|
|
|
|
print(failed_id)
|
|
|
|
print('请手动重试以下链接:')
|
|
|
|
print(db_obj.get_retry())
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
finally:
|
|
|
|
db_obj.disconnect_db()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|