优化代码

main
roger_home_pc 11 months ago
parent ab8e29402c
commit 81ba2c98b3
  1. 2
      cookie.txt
  2. 1
      data_dict.py
  3. 15
      db.py
  4. 27
      main.py
  5. 7
      req.py

@ -1 +1 @@
Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1690550836,1690723621,1690809059,1691281792; ndut_fmt=08603C41725B9D1E463435C4C64207479195C09D833C50A24353D22EB2E4456E; BDCLND=mZG1nNwbvm0KuvJr0nMrsqnZaPHR9lfsfkzVrAx5mig%3D; BDUSS=3dlMEhZLWtLS2tGeVh2T1ozYWtYaXdFTWV6QnRrYzByMEwzd2hWWmxkamd4dk5pRVFBQUFBJCQAAAAAAAAAAAEAAADgajfycm9nZXJzdW4wOTAxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOA5zGLgOcxid; BIDUPSID=2947C9BCCF77B5D4BFC420C602BE3DF5; PSTM=1658846657; ZFY=0Vc50oTr7EmlM6WxzWE2Eewo0ltT5VQJ2KG9s8DcdoY:C; MCITY=-131%3A; STOKEN=d598c65d443486ba06db34f988e6ac8d108c6b2e4f4eae47aeec796b8c487453; BAIDUID=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; BAIDUID_BFESS=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; PANWEB=1; newlogin=1; csrfToken=1xjPBP_dJWHgaoiwR4nQ7VBL; PANPSC=12489461165957612349%3AKkwrx6t0uHBNjb%2BA%2BPLlBZgtJeEFa7WQw1jWL8y1tqu8ztnSQmWL1wZuq6kOUHWCO5C65PtcmdJbCWSrkiaxSMZIzuhpmLh6b55KeZe4CQBn3K3RJ8ZwedL9vR6DsgcTu1tPRVPr6y7%2FwyO%2B4eG7s0I0NZhR03fF1bSRWp4nHGroivDAyr3Yne4PuTzNr1rr; ab_sr=1.0.1_NTI5MWQ4OGQ2ZTcwMTM0ZTgzZDQ5YzEzZjc5NmJlMWMxOGVjODEyOGM1ZWFlZGRkMDc1YzRiYjE3OGVjOWFlOWFkNmViNzVlMzU0YmM0YmFmM2EyNWViZWFhY2U4MWEyOTdiOWM2NGRlOTE5OWUyZWI2NGNmYTE3YmE0OTFkZTUxYjQ0YmQ1Mjk5Njk1NGZlMWI5Mzg3Y2Y0OWZhZTE0NDQ0YzBkYjE4NWNiM2Y2YTVjZGZmZjhlYjExOTAzZTY2; Hm_lpvt_7a3960b6f067eb0085b7f96ff5e660b0=1691281801; Hm_lvt_fa0277816200010a74ab7d2895df481b=1691281797; Hm_lpvt_fa0277816200010a74ab7d2895df481b=1691281797 Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1700350643,1700744212,1701005259,1701296112; ndut_fmt=174C6829094E7CED90BB8BA52805CB8EBE49756A26CDBB65C46D26B5BA983076; BDCLND=e1upVESanlCWk99%2F%2Bvp8B6xin8K0D2l9qgH76us%2Byn4%3D; BDUSS=3dlMEhZLWtLS2tGeVh2T1ozYWtYaXdFTWV6QnRrYzByMEwzd2hWWmxkamd4dk5pRVFBQUFBJCQAAAAAAAAAAAEAAADgajfycm9nZXJzdW4wOTAxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOA5zGLgOcxid; BIDUPSID=2947C9BCCF77B5D4BFC420C602BE3DF5; PSTM=1658846657; ZFY=AXajD5RSzh43DHl4zC7si9bfiNiYAcqipVvKokKtclc:C; MCITY=-131%3A; BAIDUID=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; BAIDUID_BFESS=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; PANWEB=1; newlogin=1; Hm_lvt_fa0277816200010a74ab7d2895df481b=1691281797; H_PS_PSSID=39633_39669_39663_39688_39676; STOKEN=d598c65d443486ba06db34f988e6ac8d38c1217cc49d786b4d12a331466bf1ae; csrfToken=3bnMK9CeiHP9kNN3VFPXwTOC

@ -0,0 +1 @@
exclude_list = ['CP', 'CD', '女厕', '厕拍', '抄底', '尾随', '沟厕', '跟踪', 'c拍']

15
db.py

@ -11,10 +11,10 @@ db_config = {
class DbAction: class DbAction:
def __init__(self, end_date='today'): def __init__(self, end_date='today', start_date='today'):
self.conn = pymysql.Connection(**db_config) self.conn = pymysql.Connection(**db_config)
self.cursor = self.conn.cursor(cursor=DictCursor) self.cursor = self.conn.cursor(cursor=DictCursor)
self.date_list = self.get_date_list(end_date) self.date_list = self.get_date_list(end_date, start_date)
def get_db_data(self): def get_db_data(self):
query_sql = f"SELECT h.id, h.cate, h.date, h.`name`, h.save_link, h.`code`, h.unzip_pwd FROM scrapyh h WHERE h.date IN ({self.date_list})" query_sql = f"SELECT h.id, h.cate, h.date, h.`name`, h.save_link, h.`code`, h.unzip_pwd FROM scrapyh h WHERE h.date IN ({self.date_list})"
@ -45,20 +45,25 @@ class DbAction:
# 错误调试 # 错误调试
# query_sql = query_sql + " AND id IN ('35334', '35335', '35336', '35337', '35338', '35339')" # query_sql = query_sql + " AND id IN ('35334', '35335', '35336', '35337', '35338', '35339')"
self.cursor.execute(query_sql) self.cursor.execute(query_sql)
return self.cursor.fetchall() r = self.cursor.fetchall()
return r
def set_skip(self, _id):
query_sql = f"UPDATE scrapyh SET file_name = 'skip' WHERE id = %s"
self.cursor.execute(query_sql, (_id,))
self.conn.commit()
def disconnect_db(self): def disconnect_db(self):
self.cursor.close() self.cursor.close()
self.conn.close() self.conn.close()
@staticmethod @staticmethod
def get_date_list(_end_date): def get_date_list(_end_date, _start_date='today'):
date_list = [] date_list = []
if _end_date == 'today': if _end_date == 'today':
date_list.append(f"'{datetime.date.strftime(datetime.date.today(), '%Y-%m-%d')}'") date_list.append(f"'{datetime.date.strftime(datetime.date.today(), '%Y-%m-%d')}'")
else: else:
start_date = datetime.datetime.today() start_date = datetime.datetime.today() if _start_date == 'today' else datetime.datetime.strptime(_start_date, '%Y-%m-%d')
end_date = datetime.datetime.strptime(_end_date, '%Y-%m-%d') end_date = datetime.datetime.strptime(_end_date, '%Y-%m-%d')
delta = datetime.timedelta(days=1) delta = datetime.timedelta(days=1)
while start_date > end_date: while start_date > end_date:

@ -3,17 +3,23 @@ from req import ReqAction
from time import sleep from time import sleep
import datetime import datetime
import random import random
from data_dict import exclude_list
# config # config
# get_end_date = 'today' # get_end_date = 'today'
get_end_date = '2023-08-01' get_end_date = '2023-12-01'
get_start_date = 'today'
# get_start_date = '2023-09-30'
# 整理链接格式 # 整理链接格式
def format_link(link, code): def format_link(link, code):
if '?pwd=' in link: if '?pwd=' in link:
link = link[:-9] _link = link
# code = link[-4:] link = _link[:-9]
code = _link[-4:]
if 'init?surl=' in link: if 'init?surl=' in link:
link = 'https://pan.baidu.com/s/1' + link[38:60] link = 'https://pan.baidu.com/s/1' + link[38:60]
return link, code return link, code
@ -38,12 +44,25 @@ def req(_data, req_obj, db_obj):
return _data return _data
# 过滤数据
def data_filter(db_obj, _data_list):
for index, data in enumerate(_data_list):
for exclude in exclude_list:
if exclude in data['name']:
_data_list.pop(index)
print('跳过数据:', data['id'], data['name'])
db_obj.set_skip(data['id'])
break
return _data_list
def main(): def main():
db_obj = DbAction(get_end_date) db_obj = DbAction(get_end_date, get_start_date)
req_obj = ReqAction() req_obj = ReqAction()
try: try:
# 处理正常任务 # 处理正常任务
data_list = db_obj.get_db_data() data_list = db_obj.get_db_data()
data_list = data_filter(db_obj, data_list)
failed_list = [] failed_list = []
req_obj.prepare() req_obj.prepare()
for _data in data_list: for _data in data_list:

@ -59,10 +59,13 @@ class ReqAction:
def get_cookies(self): def get_cookies(self):
with open('cookie.txt', 'r', encoding='utf-8') as f: with open('cookie.txt', 'r', encoding='utf-8') as f:
self.request_header['Cookie'] = f.readline() cookie = f.readline()
print("获取Cookie: 访问网盘首页开启控制台,查找main请求,使用原始格式查看请求头,复制cookie到cookie.txt")
self.request_header['Cookie'] = cookie
# 获取bdstoken函数 # 获取bdstoken函数
@retry(stop_max_attempt_number=3, wait_fixed=random.randint(1000, 3000)) # @retry(stop_max_attempt_number=3, wait_fixed=random.randint(1000, 3000))
def get_bdstoken(self): def get_bdstoken(self):
url = f'{BASE_URL}/api/gettemplatevariable?clienttype=0&app_id=250528&web=1&fields=[%22bdstoken%22,%22token%22,%22uk%22,%22isdocuser%22,%22servertime%22]' url = f'{BASE_URL}/api/gettemplatevariable?clienttype=0&app_id=250528&web=1&fields=[%22bdstoken%22,%22token%22,%22uk%22,%22isdocuser%22,%22servertime%22]'
response = self.session.get(url=url, headers=self.request_header, timeout=20, allow_redirects=True, response = self.session.get(url=url, headers=self.request_header, timeout=20, allow_redirects=True,

Loading…
Cancel
Save