From 9d3986d5acac8dd74e6e2978183ff14f456254d1 Mon Sep 17 00:00:00 2001 From: roger_home_pc Date: Mon, 1 Jan 2024 12:27:09 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=A4=9A=E5=8E=8B=E7=BC=A9?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_dict.py | 14 ++++- db.py | 42 +++++++++----- file.py | 42 +++++++++++--- main.py | 159 ++++++++++++++++++++++++++++++++++----------------- unzip.py | 4 +- 5 files changed, 183 insertions(+), 78 deletions(-) diff --git a/data_dict.py b/data_dict.py index 1ac7048..2130cb6 100644 --- a/data_dict.py +++ b/data_dict.py @@ -1,7 +1,7 @@ know_ext_name = ['.jpg', '.mp4', '.png', '.mov', '.txt', '.jpeg', '.m4v', '.flv', '.url', '.db', '.avi', '.mkv', '.bmp', '.ini', '.doc', '.docx', '.ogg', '.wmv', '.gif', '.ts', '.mts', '.iso', '.mpg', '.webp', '.heic', '.livp', '.ppt', '.mp3', '.htm', '.jfif', '.webm', '.3gp', '.m4a', '.rmvb', '.rm', '.asf', '.f4v', - '.mpeg', '.torrent', '.tiff'] + '.mpeg', '.torrent', '.tiff', '.wav', '.vob', '.cr2', '.CR2', '.srt', '.apk'] handle_zip_ext_name = ['.zip', '.7z', '.001', '.rar', '.tar', '.wim'] @@ -19,7 +19,7 @@ re_ext_list = {'.7z': r'(\.7z.+?$)', clear_list = { 'file_name': ['ds_store'], - 'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk', '.txt'], + 'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk', '.txt', '.htm', '.xltd', '.url', '.srt', '.apk'], } pwd_dict = { @@ -71,4 +71,14 @@ ext_name_list = {'.7': '.7z', '.c': '.zip', '.word': '.rar', '.7删除z': '.7z', + '.7z1': '.7z', + '.7z删除': '.7z', + '.7z(1)': '.7z', + '.zip删除': '.zip', + '.rar删除': '.rar', + '.001删除': '.001', + '.7z删除汉字再解压一次': '.7z', + '.7z删除中文': '.7z', + '.7删z': '.7z', + '.downloading': '.7z', } diff --git a/db.py b/db.py index 750c416..314f410 100644 --- a/db.py +++ b/db.py @@ -27,7 +27,7 @@ class DbAction: return _pwd def get_data_by_id(self, _id): - SELECT_SQL = "SELECT * FROM scrapyh s WHERE s.id = %s;" + SELECT_SQL = "SELECT `id`, cate, `date`, name, unzip_pwd FROM scrapyh s WHERE s.id = %s;" self.cursor.execute(SELECT_SQL, (_id,)) result = self.cursor.fetchone() if result: @@ -35,20 +35,36 @@ class DbAction: return result def get_available_pwd(self): - SELECT_SQL = "SELECT * FROM scrapyh_pwd;" + SELECT_SQL = "SELECT * FROM scrapyh_pwd sp ORDER BY sp.times DESC LIMIT 20;" + # SELECT_SQL = "SELECT * FROM scrapyh_pwd;" self.cursor.execute(SELECT_SQL) result = self.cursor.fetchall() return [r['pwd'] for r in result] def insert_pwd(self, _pwd): - SELECT_SQL = "SELECT * FROM scrapyh_pwd sp WHERE sp.pwd = %s;" - self.cursor.execute(SELECT_SQL, (_pwd,)) - result = self.cursor.fetchone() - if not result: - try: - INSERT_SQL = "INSERT INTO scrapyh_pwd (pwd) VALUES (%s);" - self.cursor.execute(INSERT_SQL, (_pwd,)) - self.conn.commit() - except Exception as e: - print(e) - self.conn.rollback() + if _pwd: + SELECT_SQL = "SELECT * FROM scrapyh_pwd sp WHERE sp.pwd = %s;" + self.cursor.execute(SELECT_SQL, (_pwd,)) + result = self.cursor.fetchone() + if not result: + try: + INSERT_SQL = "INSERT INTO scrapyh_pwd (pwd) VALUES (%s);" + self.cursor.execute(INSERT_SQL, (_pwd,)) + self.conn.commit() + except Exception as e: + print(e) + self.conn.rollback() + + def update_pwd(self, _pwd): + UPDATE_SQL = "UPDATE scrapyh_pwd sp SET sp.times = sp.times+1 WHERE sp.pwd = %s;" + self.cursor.execute(UPDATE_SQL, (_pwd,)) + self.conn.commit() + + def get_failed(self, _id_list): + id_list_str = ', '.join(_id_list) + SELECT_SQL = "SELECT `id`, url, unzip_pwd FROM scrapyh s WHERE s.id IN (%s);" + self.cursor.execute(SELECT_SQL, (id_list_str,)) + result = self.cursor.fetchall() + if result: + for r in result: + print(r) \ No newline at end of file diff --git a/file.py b/file.py index c7d56ca..7c983a1 100644 --- a/file.py +++ b/file.py @@ -30,7 +30,7 @@ class FilesUnzip: # 目标根目录,返回根目录下全部文件夹的列表 def get_root_folder_list(self): - return list(os.walk(self.root_path))[0][1] + return sorted(list(os.walk(self.root_path))[0][1]) # 目标文件, 获取文件扩展名, 返回 文件路径+文件名, 扩展名 @staticmethod @@ -46,6 +46,8 @@ class FilesUnzip: new = self.change_to_know_name(_f) _, ext_new = self.get_ext_name(new) if ext_new in handle_zip_ext_name: + if self.del_small_zip_file(new): + continue new_file_list['handle_zip'].append(new) new_file_list['zip'].append(new) elif ext_new in know_zip_ext_name: @@ -64,6 +66,7 @@ class FilesUnzip: for unknown_ext, know_ext in ext_name_list.items(): if ext_file_name == unknown_ext: new_file_name = base_file_name + know_ext + log_info(f'修改未知扩展名: {_file} -> {new_file_name}') file_rename(_file, new_file_name) return new_file_name return _file @@ -73,8 +76,17 @@ class FilesUnzip: def del_all_files(path_list): for path in path_list: result = os.system(f'del "{path}"') - # logger.info(f"删除文件成功: {path}") - log_info(f"删除文件{'成功' if result else '失败'}: {path}") + log_info(f"删除文件{'成功' if result == '0' else '失败'}: {path}") + + # 删除小文件 + @staticmethod + def del_small_zip_file(_path): + if os.path.getsize(_path) < 1024000: + log_info(f"无效文件小于1MB,将被删除 {_path}") + result = os.system(f'del "{_path}"') + log_info(f"删除文件{'成功' if result == '0' else '失败'}: {_path}") + return True + return False # 获取可删除文件列表 # def get_del_files(self, _path): @@ -156,12 +168,14 @@ class FilesCollection: for move in self.get_move_files(): # print(move) if not os.path.exists(os.path.dirname(move[1])): + log_info(f'创建文件夹: {os.path.dirname(move[1])}') os.makedirs(os.path.dirname(move[1])) + log_info(f'移动文件:{move[0]} -> {move[1]}') shutil.move(move[0], move[1]) - # 获取一个空文件夹 + # 获取一个空文件夹, 如果是空文件夹则返回路径,否则返回False @staticmethod - def get_empty(_path): + def is_empty(_path): tree = list(os.walk(_path)) empty_list = [] for leaf in tree: @@ -171,18 +185,30 @@ class FilesCollection: # 清除全部空文件夹 def remove_empty(self): - while _empty := self.get_empty(self.path): + while _empty := self.is_empty(self.path): # print(_empty) + log_info(f'移除空文件夹:{_empty}') os.system(f"attrib -r {_empty}") os.removedirs(_empty) + # 如果根目录是空的话就移除根目录并返回True + def remove_empty_root_folder(self): + if _empty := self.is_empty(self.path): + log_info(f'移除空的根目录:{_empty}') + os.system(f"attrib -r {_empty}") + os.removedirs(_empty) + return True + return False + # 重命名根路径的文件夹, 如果只有一个文件就把文件重命名 def rename_root_folder(self, _root, name, _org, _target): if len(file := get_all_files(_org)) == 1: ext = os.path.splitext(file[0])[1] + log_info(f'重命名:{file[0]} -> {os.path.join(_root, name + ext)}') file_rename(file[0], os.path.join(_root, name + ext)) self.remove_empty() else: + log_info(f'重命名:{_org} -> {_target}') file_rename(_org, _target) @@ -198,4 +224,6 @@ if __name__ == '__main__': # all_file = files.get_all_files(os.path.join(root, f)) # print(files.clear_files(all_file)) # print(files.collection_files(r"F:\Temp\sjry\hj\35316")) - print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345'))) + # print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345'))) + col_obj = FilesCollection(r'F:\Temp\leshe\leshe_20230810\35480') + print(col_obj.remove_empty_root_folder()) diff --git a/main.py b/main.py index 83a55d2..deebc24 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,12 @@ import os.path +from time import sleep + from log import logger import unzip import file import db -root_path = r'F:\Temp\sjry\hj' +root_path = r'F:\Temp\leshe_20240101' # 初始化数据库 db_obj = db.DbAction() @@ -12,59 +14,102 @@ file_obj = file.FilesUnzip(root_path) unzip_obj = unzip.UnzipFile() # 初始化成功和失败任务列表 -unzip_succeed, unzip_failed = [], [] +unzip_succeed, unzip_failed, unknown, none_unzip_pwd = [], [], [], [] -# 开始任务 -def start_unzip_task(): +def unzip_task(folder): result = True - for folder in (set(file_obj.get_root_folder_list()) - set(unzip_succeed)): - logger.info(f'开始解压 {folder}') - all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果 - print(all_file) + all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果 + logger.info(all_file) + # print(all_file) + + if not all_file['handle_zip'] and not all_file['zip'] and not all_file['others'] and not all_file['unknown']: + file_col_obj = file.FilesCollection(os.path.join(root_path, folder)) + file_col_obj.remove_empty_root_folder() + unzip_failed.append(folder) if folder not in unzip_failed else '' + return False + + # 从数据库中获取数据 + data = db_obj.get_data_by_id(folder) + print(data) + if not data: + logger.info(f'{folder} 在数据库中不存在或以处理完成,请检查') + unzip_failed.append(folder) if folder not in unzip_failed else '' + return False + + if not data['unzip_pwd']: + logger.info(f'解压密码缺失:{folder}') + none_unzip_pwd.append(folder) if folder not in none_unzip_pwd else '' + unzip_failed.append(folder) if folder not in unzip_failed else '' + return False + + if data and all_file['handle_zip']: + # 解压, 增加适用历史密码重试的功能 + result_pass = [] + result_fail = [] + for unzip_file in all_file['handle_zip']: + logger.info(f"使用密码{data['unzip_pwd']}解压{unzip_file}") + if unzip_obj.unzip(unzip_file, data['unzip_pwd']): + result_pass.append(unzip_file) + else: + result_fail.append(unzip_file) - # 从数据库中获取数据 - data = db_obj.get_data_by_id(folder) - print(data) + if len(result_pass) == len(all_file['handle_zip']): + logger.info(f"全部解压成功,删除全部压缩文件") + file_obj.del_all_files(all_file['zip']) + return - if data and all_file['handle_zip']: - # 解压, 增加适用历史密码重试的功能 - if unzip_obj.unzip(all_file['handle_zip'][0], data['unzip_pwd']): - file_obj.del_all_files(all_file['zip']) - else: - print('password') - print(db_obj.get_available_pwd()) - for pwd in db_obj.get_available_pwd(): - result = unzip_obj.unzip(all_file['handle_zip'][0], pwd) - if result: - file_obj.del_all_files(all_file['zip']) - break - - # 重新获取文件检查解压结果 - all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果 - print(all_file) - - # 检查没哟解压的文件 - if all_file['handle_zip'] or all_file['zip']: - logger.info(f'{folder} 中依然存在没有解压的文件,请检查') - unzip_failed.append(folder) if folder not in unzip_failed else '' - result = False + for pwd in db_obj.get_available_pwd(): + logger.info(f'使用历史密码{pwd}重新解压') + for index, unzip_failed_file in enumerate(result_fail): + if unzip_obj.unzip(unzip_failed_file, pwd): + db_obj.update_pwd(pwd) + result_pass.append(unzip_failed_file) + result_fail.pop(index) + + logger.info(f"部分解压成功,删除全部压缩文件") + file_obj.del_all_files(all_file['zip']) + # 重新获取文件检查解压结果 + all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果 + # print(all_file) + + # 检查没有解压的文件 + if all_file['handle_zip'] or all_file['zip']: + logger.info(f'{folder} 中依然存在没有解压的文件,请检查') + unzip_failed.append(folder) if folder not in unzip_failed else '' + result = False + + # 检查打印结果 + if not all_file['handle_zip'] and not all_file['zip'] and not all_file['unknown']: + db_obj.insert_pwd(data['unzip_pwd']) + unzip_succeed.append(folder) if folder not in unzip_succeed else '' + unzip_failed.remove(folder) if folder in unzip_failed else '' + logger.info('全部文件已解压') # 检查未知文件 - elif all_file['unknown']: - logger.info("打印没有处理的文件扩展名:") - logger.info(', '.join(all_file['unknown'])) - unzip_failed.append(folder) if folder not in unzip_failed else '' - result = False - - # 检查打印结果 - if not all_file['handle_zip'] and not all_file['zip'] and not all_file['unknown']: - db_obj.insert_pwd(data['unzip_pwd']) - unzip_succeed.append(folder) if folder not in unzip_succeed else '' - logger.info('全部文件已解压') + if all_file['unknown']: + logger.info("打印没有处理的文件扩展名:") + logger.info(', '.join(all_file['unknown'])) + unzip_failed.append(folder) if folder not in unzip_failed else '' + unknown.append(folder) if folder not in unknown else '' + result = False return result +# 开始任务 +def start_unzip_task(): + for folder in sorted(list(set(file_obj.get_root_folder_list()) - set(unzip_succeed))): + logger.info(f'开始解压 {folder}') + n = 1 + # unzip_result = False + while n <= 5: + logger.info(f'第{n}轮解压任务') + unzip_result = unzip_task(folder) + if unzip_result: + break + n += 1 + + # 整理文件夹 def start_collation_task(): logger.info('开始整理文件夹') @@ -73,6 +118,8 @@ def start_collation_task(): file_col_obj = file.FilesCollection(folder_path) if db_obj.get_data_by_id(folder) and folder not in unzip_failed: logger.info(f'开始整理 {folder}') + if file_col_obj.remove_empty_root_folder(): + continue # 清除多余的文件 file_col_obj.clear_files() # 整理无效文件夹 @@ -82,23 +129,27 @@ def start_collation_task(): # 从数据库中获取数据 data = db_obj.get_data_by_id(folder) # 重命名文件夹 - name = str(data['id']) + '_' + data['name'] + name = 'id' + str(data['id']) + '_' + data['name'] file_col_obj.rename_root_folder(root_path, name, folder_path, os.path.join(root_path, name)) +def pc_sleep(delay): + sleep(delay) + os.system("rundll32.exe powrprof.dll,SetSuspendState Sleep") + + def main(): - n = 1 - # unzip_result = False - while n <= 5: - logger.info(f'第{n}轮解压任务') - unzip_result = start_unzip_task() - if unzip_result: - break - n += 1 + start_unzip_task() # if unzip_result: start_collation_task() - logger.info('失败的任务:') + logger.info('解压失败的任务:') logger.info(', '.join(unzip_failed)) + db_obj.get_failed(unzip_failed) + logger.info('包含未知文件:') + logger.info(', '.join(unknown)) + logger.info('解压密码缺失:') + logger.info(', '.join(none_unzip_pwd)) + # pc_sleep(10) if __name__ == '__main__': diff --git a/unzip.py b/unzip.py index 554f399..1bcdf4d 100644 --- a/unzip.py +++ b/unzip.py @@ -35,10 +35,10 @@ class UnzipFile: @staticmethod def unzip_7z(source, password=""): # 准备参数 - cmd = '7z.exe x ' + cmd = '7z.exe x -y ' args = '-o' if password: - pwd = '-p' + str(password) + pwd = '-p"' + str(password) + '"' else: pwd = '' target = os.path.splitext(source)[0] + '\\'