From 81ba2c98b3cebbdb18aadef3a4f53fcdb98eed1f Mon Sep 17 00:00:00 2001 From: roger_home_pc Date: Mon, 1 Jan 2024 12:21:54 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cookie.txt | 2 +- data_dict.py | 1 + db.py | 15 ++++++++++----- main.py | 27 +++++++++++++++++++++++---- req.py | 7 +++++-- 5 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 data_dict.py diff --git a/cookie.txt b/cookie.txt index 7f803a7..153c058 100644 --- a/cookie.txt +++ b/cookie.txt @@ -1 +1 @@ -Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1690550836,1690723621,1690809059,1691281792; ndut_fmt=08603C41725B9D1E463435C4C64207479195C09D833C50A24353D22EB2E4456E; BDCLND=mZG1nNwbvm0KuvJr0nMrsqnZaPHR9lfsfkzVrAx5mig%3D; BDUSS=3dlMEhZLWtLS2tGeVh2T1ozYWtYaXdFTWV6QnRrYzByMEwzd2hWWmxkamd4dk5pRVFBQUFBJCQAAAAAAAAAAAEAAADgajfycm9nZXJzdW4wOTAxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOA5zGLgOcxid; BIDUPSID=2947C9BCCF77B5D4BFC420C602BE3DF5; PSTM=1658846657; ZFY=0Vc50oTr7EmlM6WxzWE2Eewo0ltT5VQJ2KG9s8DcdoY:C; MCITY=-131%3A; STOKEN=d598c65d443486ba06db34f988e6ac8d108c6b2e4f4eae47aeec796b8c487453; BAIDUID=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; BAIDUID_BFESS=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; PANWEB=1; newlogin=1; csrfToken=1xjPBP_dJWHgaoiwR4nQ7VBL; PANPSC=12489461165957612349%3AKkwrx6t0uHBNjb%2BA%2BPLlBZgtJeEFa7WQw1jWL8y1tqu8ztnSQmWL1wZuq6kOUHWCO5C65PtcmdJbCWSrkiaxSMZIzuhpmLh6b55KeZe4CQBn3K3RJ8ZwedL9vR6DsgcTu1tPRVPr6y7%2FwyO%2B4eG7s0I0NZhR03fF1bSRWp4nHGroivDAyr3Yne4PuTzNr1rr; ab_sr=1.0.1_NTI5MWQ4OGQ2ZTcwMTM0ZTgzZDQ5YzEzZjc5NmJlMWMxOGVjODEyOGM1ZWFlZGRkMDc1YzRiYjE3OGVjOWFlOWFkNmViNzVlMzU0YmM0YmFmM2EyNWViZWFhY2U4MWEyOTdiOWM2NGRlOTE5OWUyZWI2NGNmYTE3YmE0OTFkZTUxYjQ0YmQ1Mjk5Njk1NGZlMWI5Mzg3Y2Y0OWZhZTE0NDQ0YzBkYjE4NWNiM2Y2YTVjZGZmZjhlYjExOTAzZTY2; Hm_lpvt_7a3960b6f067eb0085b7f96ff5e660b0=1691281801; Hm_lvt_fa0277816200010a74ab7d2895df481b=1691281797; Hm_lpvt_fa0277816200010a74ab7d2895df481b=1691281797 \ No newline at end of file +Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1700350643,1700744212,1701005259,1701296112; ndut_fmt=174C6829094E7CED90BB8BA52805CB8EBE49756A26CDBB65C46D26B5BA983076; BDCLND=e1upVESanlCWk99%2F%2Bvp8B6xin8K0D2l9qgH76us%2Byn4%3D; BDUSS=3dlMEhZLWtLS2tGeVh2T1ozYWtYaXdFTWV6QnRrYzByMEwzd2hWWmxkamd4dk5pRVFBQUFBJCQAAAAAAAAAAAEAAADgajfycm9nZXJzdW4wOTAxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOA5zGLgOcxid; BIDUPSID=2947C9BCCF77B5D4BFC420C602BE3DF5; PSTM=1658846657; ZFY=AXajD5RSzh43DHl4zC7si9bfiNiYAcqipVvKokKtclc:C; MCITY=-131%3A; BAIDUID=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; BAIDUID_BFESS=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; PANWEB=1; newlogin=1; Hm_lvt_fa0277816200010a74ab7d2895df481b=1691281797; H_PS_PSSID=39633_39669_39663_39688_39676; STOKEN=d598c65d443486ba06db34f988e6ac8d38c1217cc49d786b4d12a331466bf1ae; csrfToken=3bnMK9CeiHP9kNN3VFPXwTOC \ No newline at end of file diff --git a/data_dict.py b/data_dict.py new file mode 100644 index 0000000..7f7982d --- /dev/null +++ b/data_dict.py @@ -0,0 +1 @@ +exclude_list = ['CP', 'CD', '女厕', '厕拍', '抄底', '尾随', '沟厕', '跟踪', 'c拍'] diff --git a/db.py b/db.py index 98ebff9..76b5086 100644 --- a/db.py +++ b/db.py @@ -11,10 +11,10 @@ db_config = { class DbAction: - def __init__(self, end_date='today'): + def __init__(self, end_date='today', start_date='today'): self.conn = pymysql.Connection(**db_config) self.cursor = self.conn.cursor(cursor=DictCursor) - self.date_list = self.get_date_list(end_date) + self.date_list = self.get_date_list(end_date, start_date) def get_db_data(self): query_sql = f"SELECT h.id, h.cate, h.date, h.`name`, h.save_link, h.`code`, h.unzip_pwd FROM scrapyh h WHERE h.date IN ({self.date_list})" @@ -45,20 +45,25 @@ class DbAction: # 错误调试 # query_sql = query_sql + " AND id IN ('35334', '35335', '35336', '35337', '35338', '35339')" self.cursor.execute(query_sql) - return self.cursor.fetchall() + r = self.cursor.fetchall() + return r + def set_skip(self, _id): + query_sql = f"UPDATE scrapyh SET file_name = 'skip' WHERE id = %s" + self.cursor.execute(query_sql, (_id,)) + self.conn.commit() def disconnect_db(self): self.cursor.close() self.conn.close() @staticmethod - def get_date_list(_end_date): + def get_date_list(_end_date, _start_date='today'): date_list = [] if _end_date == 'today': date_list.append(f"'{datetime.date.strftime(datetime.date.today(), '%Y-%m-%d')}'") else: - start_date = datetime.datetime.today() + start_date = datetime.datetime.today() if _start_date == 'today' else datetime.datetime.strptime(_start_date, '%Y-%m-%d') end_date = datetime.datetime.strptime(_end_date, '%Y-%m-%d') delta = datetime.timedelta(days=1) while start_date > end_date: diff --git a/main.py b/main.py index 086d8d2..fa8ec92 100644 --- a/main.py +++ b/main.py @@ -3,17 +3,23 @@ from req import ReqAction from time import sleep import datetime import random +from data_dict import exclude_list # config # get_end_date = 'today' -get_end_date = '2023-08-01' +get_end_date = '2023-12-01' +get_start_date = 'today' + + +# get_start_date = '2023-09-30' # 整理链接格式 def format_link(link, code): if '?pwd=' in link: - link = link[:-9] - # code = link[-4:] + _link = link + link = _link[:-9] + code = _link[-4:] if 'init?surl=' in link: link = 'https://pan.baidu.com/s/1' + link[38:60] return link, code @@ -38,12 +44,25 @@ def req(_data, req_obj, db_obj): return _data +# 过滤数据 +def data_filter(db_obj, _data_list): + for index, data in enumerate(_data_list): + for exclude in exclude_list: + if exclude in data['name']: + _data_list.pop(index) + print('跳过数据:', data['id'], data['name']) + db_obj.set_skip(data['id']) + break + return _data_list + + def main(): - db_obj = DbAction(get_end_date) + db_obj = DbAction(get_end_date, get_start_date) req_obj = ReqAction() try: # 处理正常任务 data_list = db_obj.get_db_data() + data_list = data_filter(db_obj, data_list) failed_list = [] req_obj.prepare() for _data in data_list: diff --git a/req.py b/req.py index ecd3d06..3579035 100644 --- a/req.py +++ b/req.py @@ -59,10 +59,13 @@ class ReqAction: def get_cookies(self): with open('cookie.txt', 'r', encoding='utf-8') as f: - self.request_header['Cookie'] = f.readline() + cookie = f.readline() + print("获取Cookie: 访问网盘首页开启控制台,查找main请求,使用原始格式查看请求头,复制cookie到cookie.txt") + self.request_header['Cookie'] = cookie + # 获取bdstoken函数 - @retry(stop_max_attempt_number=3, wait_fixed=random.randint(1000, 3000)) + # @retry(stop_max_attempt_number=3, wait_fixed=random.randint(1000, 3000)) def get_bdstoken(self): url = f'{BASE_URL}/api/gettemplatevariable?clienttype=0&app_id=250528&web=1&fields=[%22bdstoken%22,%22token%22,%22uk%22,%22isdocuser%22,%22servertime%22]' response = self.session.get(url=url, headers=self.request_header, timeout=20, allow_redirects=True,