parent
0145b29f95
commit
29dace50f4
5 changed files with 213 additions and 0 deletions
@ -0,0 +1 @@ |
|||||||
|
Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1690080220,1690550836,1690723621,1690809059; ndut_fmt=2F3F7209152B005542DB9E27E1FE8B25A229DCC706598E1B99139109B3AD5791; BDCLND=Kiz1b8Bpv31XBFDJNdGFjZSNEpvCREEMgiHA5oAF99k%3D; BDUSS=3dlMEhZLWtLS2tGeVh2T1ozYWtYaXdFTWV6QnRrYzByMEwzd2hWWmxkamd4dk5pRVFBQUFBJCQAAAAAAAAAAAEAAADgajfycm9nZXJzdW4wOTAxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOA5zGLgOcxid; BIDUPSID=2947C9BCCF77B5D4BFC420C602BE3DF5; PSTM=1658846657; ZFY=0Vc50oTr7EmlM6WxzWE2Eewo0ltT5VQJ2KG9s8DcdoY:C; MCITY=-131%3A; STOKEN=d598c65d443486ba06db34f988e6ac8d108c6b2e4f4eae47aeec796b8c487453; BAIDUID=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; BAIDUID_BFESS=8DC7F291FA0638DF89C98F0178FCDC7C:FG=1; PANWEB=1; newlogin=1; csrfToken=2r7JSCtVulGN3eOExz3JqsxP; PANPSC=12925419710669722059%3AKkwrx6t0uHBNjb%2BA%2BPLlBZgtJeEFa7WQw1jWL8y1tqu8ztnSQmWL1wZuq6kOUHWChFbm%2BExtunGiz1cJOClRr8ZIzuhpmLh6b55KeZe4CQBn3K3RJ8ZwedL9vR6DsgcTu1tPRVPr6y7%2FwyO%2B4eG7s0I0NZhR03fFuueAr2t%2FRhNXpcvfoZtUp%2B4PuTzNr1rr; ab_sr=1.0.1_NDE5NTY0NjYxYjNjZmVjZmQ2YTY1YjYzMjFlY2E5MGViZWI2MWY5MzA1Yzk4N2M1YzM3YmFlZmM2MmM1NmNmYjgzYTk4NWQxNzdiZWQxZTAzN2RlYjQ2NmI1NjAzNTAwNmJjNjNlZGFkN2JmNTUxNDMzNmRkM2RkZWVhZTZhMTM4ZjA3ZjJlMTAxYTc2MTk2YmM0YTA5OGUzMWIzNzE5Yjg0MTFkNzdjOThhMjA5ZDVhODczOTZiYzM2ZmEzZjhk |
@ -0,0 +1,43 @@ |
|||||||
|
import pymysql |
||||||
|
from pymysql.cursors import DictCursor |
||||||
|
import datetime |
||||||
|
|
||||||
|
db_config = { |
||||||
|
'host': '192.168.66.101', |
||||||
|
'user': 'root', |
||||||
|
'password': 'Sxzgx1209', |
||||||
|
'database': 'scrapy' |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
class DbAction: |
||||||
|
def __init__(self, end_date='today'): |
||||||
|
self.conn = pymysql.Connection(**db_config) |
||||||
|
self.cursor = self.conn.cursor(cursor=DictCursor) |
||||||
|
self.date_list = self.get_date_list(end_date) |
||||||
|
|
||||||
|
def get_db_data(self): |
||||||
|
query_sql = f"SELECT h.id, h.cate, h.date, h.`name`, h.save_link, h.`code`, h.unzip_pwd FROM scrapyh h WHERE h.date IN ({self.date_list});" |
||||||
|
self.cursor.execute(query_sql) |
||||||
|
return self.cursor.fetchall() |
||||||
|
|
||||||
|
@staticmethod |
||||||
|
def get_date_list(_end_date): |
||||||
|
date_list = [] |
||||||
|
if _end_date == 'today': |
||||||
|
date_list.append(f"'{datetime.date.strftime(datetime.date.today(), '%Y-%m-%d')}'") |
||||||
|
else: |
||||||
|
start_date = datetime.datetime.today() |
||||||
|
end_date = datetime.datetime.strptime(_end_date, '%Y-%m-%d') |
||||||
|
delta = datetime.timedelta(days=1) |
||||||
|
while start_date > end_date: |
||||||
|
date_list.append(f"'{datetime.datetime.strftime(start_date, '%Y-%m-%d')}'") |
||||||
|
start_date = start_date - delta |
||||||
|
print(date_list) |
||||||
|
return ','.join(date_list) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
db_action = DbAction('2023-07-28') |
||||||
|
result = db_action.get_db_data() |
||||||
|
print(result) |
@ -0,0 +1,169 @@ |
|||||||
|
import os |
||||||
|
|
||||||
|
import requests |
||||||
|
import urllib3 |
||||||
|
import random |
||||||
|
import re |
||||||
|
import time |
||||||
|
from retrying import retry |
||||||
|
|
||||||
|
urllib3.disable_warnings() |
||||||
|
|
||||||
|
# 静态变量 |
||||||
|
BASE_URL = 'https://pan.baidu.com' |
||||||
|
ERROR_CODES = { |
||||||
|
1: '链接失效,没获取到 shareid', |
||||||
|
2: '链接失效,没获取到 user_id', |
||||||
|
3: '链接失效,没获取到 fs_id', |
||||||
|
'百度网盘-链接不存在': '链接失效,文件已经被删除或取消分享', |
||||||
|
'百度网盘 请输入提取码': '链接错误,缺少提取码', |
||||||
|
-9: '链接错误,提取码错误或验证已过期', |
||||||
|
-62: '链接错误尝试次数过多,请手动转存或稍后再试', |
||||||
|
105: '链接错误,链接格式不正确', |
||||||
|
-4: '转存失败,无效登录。请退出账号在其他地方的登录', |
||||||
|
-6: '转存失败,请用浏览器无痕模式获取 Cookie', |
||||||
|
4: '转存失败,目录中已有同名文件或文件夹存在', |
||||||
|
-8: '转存失败,目录中已有同名文件或文件夹存在', |
||||||
|
12: '转存失败,转存文件数超过限制', |
||||||
|
-7: '转存失败,秒传文件名有非法字符', |
||||||
|
404: '转存失败,秒传无效', |
||||||
|
31190: '转存失败,秒传未生效', |
||||||
|
31039: '转存失败,秒传文件名冲突', |
||||||
|
-10: '转存失败,容量不足', |
||||||
|
20: '转存失败,容量不足', |
||||||
|
0: '转存成功', |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
class ReqAction: |
||||||
|
# 请求变量 |
||||||
|
request_header = { |
||||||
|
'Host': 'pan.baidu.com', |
||||||
|
'Connection': 'keep-alive', |
||||||
|
'Upgrade-Insecure-Requests': '1', |
||||||
|
'Sec-Fetch-Dest': 'document', |
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', |
||||||
|
'Sec-Fetch-Site': 'same-site', |
||||||
|
'Sec-Fetch-Mode': 'navigate', |
||||||
|
'Referer': 'https://pan.baidu.com', |
||||||
|
'Accept-Encoding': 'gzip, deflate, br', |
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7,en-GB;q=0.6,ru;q=0.5', |
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36', |
||||||
|
} |
||||||
|
|
||||||
|
def __init__(self): |
||||||
|
# 会话配置 |
||||||
|
self.session = requests.Session() |
||||||
|
self.bdstoken = None |
||||||
|
|
||||||
|
def get_cookies(self): |
||||||
|
with open('cookie.txt', 'r', encoding='utf-8') as f: |
||||||
|
self.request_header['Cookie'] = f.readline() |
||||||
|
|
||||||
|
# 获取bdstoken函数 |
||||||
|
@retry(stop_max_attempt_number=3, wait_fixed=random.randint(1000, 3000)) |
||||||
|
def get_bdstoken(self): |
||||||
|
url = f'{BASE_URL}/api/gettemplatevariable?clienttype=0&app_id=250528&web=1&fields=[%22bdstoken%22,%22token%22,%22uk%22,%22isdocuser%22,%22servertime%22]' |
||||||
|
response = self.session.get(url=url, headers=self.request_header, timeout=20, allow_redirects=True, |
||||||
|
verify=False) |
||||||
|
print(response.text) |
||||||
|
return response.json()['errno'] if response.json()['errno'] != 0 else response.json()['result']['bdstoken'] |
||||||
|
|
||||||
|
# 获取目录列表函数 |
||||||
|
@retry(stop_max_attempt_number=5, wait_fixed=random.randint(1000, 3000)) |
||||||
|
def get_dir_list(self, dir_path): |
||||||
|
url = f'{BASE_URL}/api/list?order=time&desc=1&showempty=0&web=1&page=1&num=1000&dir={dir_path}&bdstoken={self.bdstoken}' |
||||||
|
response = self.session.get(url=url, headers=self.request_header, timeout=15, allow_redirects=False, |
||||||
|
verify=False) |
||||||
|
print(response.text) |
||||||
|
return response.json()['errno'] if response.json()['errno'] != 0 else response.json()['list'] |
||||||
|
|
||||||
|
# 新建目录函数 |
||||||
|
@retry(stop_max_attempt_number=5, wait_fixed=random.randint(1000, 3000)) |
||||||
|
def create_dir(self, target_directory_name): |
||||||
|
url = f'{BASE_URL}/api/create?a=commit&bdstoken={self.bdstoken}' |
||||||
|
post_data = {'path': target_directory_name, 'isdir': '1', 'block_list': '[]', } |
||||||
|
response = self.session.post(url=url, headers=self.request_header, data=post_data, timeout=15, |
||||||
|
allow_redirects=False, verify=False) |
||||||
|
print(response.text) |
||||||
|
return response.json()['errno'] |
||||||
|
|
||||||
|
# 更新 cookie 函数 |
||||||
|
def update_cookie(self, bdclnd): |
||||||
|
if 'BDCLND=' in self.request_header['Cookie']: |
||||||
|
self.request_header['Cookie'] = re.sub(r'BDCLND=(\S+);?', f'BDCLND={bdclnd};', |
||||||
|
self.request_header['Cookie']) |
||||||
|
else: |
||||||
|
self.request_header['Cookie'] += f';BDCLND={bdclnd}' |
||||||
|
|
||||||
|
# 验证提取码函数 |
||||||
|
@retry(stop_max_attempt_number=6, wait_fixed=1700) |
||||||
|
def verify_pass_code(self, link_url, pass_code): |
||||||
|
check_url = f'{BASE_URL}/share/verify?surl={link_url[25:48]}&bdstoken={self.bdstoken}&t={str(int(round(time.time() * 1000)))}&channel=chunlei&web=1&clienttype=0' |
||||||
|
post_data = {'pwd': pass_code, 'vcode': '', 'vcode_str': '', } |
||||||
|
response = self.session.post(url=check_url, headers=self.request_header, data=post_data, timeout=10, |
||||||
|
allow_redirects=False, verify=False) |
||||||
|
print(response.text) |
||||||
|
return response.json()['errno'] if response.json()['errno'] != 0 else response.json()['randsk'] |
||||||
|
|
||||||
|
# 验证链接函数 |
||||||
|
@retry(stop_max_attempt_number=12, wait_fixed=1700) |
||||||
|
def verify_links(self, link_url, pass_code): |
||||||
|
if pass_code: |
||||||
|
bdclnd = self.verify_pass_code(link_url, pass_code) |
||||||
|
if isinstance(bdclnd, int): |
||||||
|
return bdclnd |
||||||
|
self.update_cookie(bdclnd) |
||||||
|
|
||||||
|
response = self.session.get(url=link_url, headers=self.request_header, timeout=15, allow_redirects=True, |
||||||
|
verify=False).content.decode("utf-8") |
||||||
|
print(response) |
||||||
|
shareid_list = re.findall('"shareid":(\\d+?),"', response) |
||||||
|
user_id_list = re.findall('"share_uk":"(\\d+?)","', response) |
||||||
|
fs_id_list = re.findall('"fs_id":(\\d+?),"', response) |
||||||
|
info_title_list = re.findall('<title>(.+)</title>', response) |
||||||
|
|
||||||
|
if not shareid_list: |
||||||
|
return 1 |
||||||
|
elif not user_id_list: |
||||||
|
return 2 |
||||||
|
elif not fs_id_list: |
||||||
|
return info_title_list[0] if info_title_list else 3 |
||||||
|
else: |
||||||
|
return [shareid_list[0], user_id_list[0], fs_id_list] |
||||||
|
|
||||||
|
# 转存文件函数 |
||||||
|
@retry(stop_max_attempt_number=9, wait_fixed=random.randint(1000, 3000)) |
||||||
|
def transfer_files(self, verify_links_reason, target_directory_name): |
||||||
|
url = f'{BASE_URL}/share/transfer?shareid={verify_links_reason[0]}&from={verify_links_reason[1]}&bdstoken={self.bdstoken}&channel=chunlei&web=1&clienttype=0' |
||||||
|
post_data = {'fsidlist': f'[{",".join(i for i in verify_links_reason[2])}]', |
||||||
|
'path': f'/{target_directory_name}', } |
||||||
|
response = self.session.post(url=url, headers=self.request_header, data=post_data, timeout=15, |
||||||
|
allow_redirects=False, verify=False) |
||||||
|
print(response.text) |
||||||
|
return response.json()['errno'] |
||||||
|
|
||||||
|
def main(self): |
||||||
|
base_dir = '/Temp/' |
||||||
|
cate_dir = 'leshe/' |
||||||
|
target_dir_name = '2' |
||||||
|
target_dir = os.path.join(base_dir, cate_dir, target_dir_name) |
||||||
|
print(target_dir) |
||||||
|
# link_list = [] |
||||||
|
# self.get_cookies() |
||||||
|
# self.bdstoken = self.get_bdstoken() |
||||||
|
# dir_list = self.get_dir_list('/Temp/leshe') |
||||||
|
# print([_dir['path'] for _dir in dir_list]) |
||||||
|
# if target_dir in [_dir['path'] for _dir in dir_list]: |
||||||
|
# print('找到了') |
||||||
|
# # if dir_name and dir_name not in [_dir('path') for _dir in self.dir_list]: |
||||||
|
# # self.create_dir(r'/Temp/leshe/1') |
||||||
|
# # 执行转存 |
||||||
|
# for link in link_list: |
||||||
|
# verified_links = self.verify_links(link[0], link[1]) |
||||||
|
# self.transfer_files(verified_links, target_dir) |
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
req = ReqAction() |
||||||
|
req.main() |
Binary file not shown.
Loading…
Reference in new issue