修复多压缩文件的bug

main
roger_home_pc 11 months ago
parent cd996e96d0
commit 9d3986d5ac
  1. 14
      data_dict.py
  2. 20
      db.py
  3. 42
      file.py
  4. 117
      main.py
  5. 4
      unzip.py

@ -1,7 +1,7 @@
know_ext_name = ['.jpg', '.mp4', '.png', '.mov', '.txt', '.jpeg', '.m4v', '.flv', '.url', '.db', '.avi', '.mkv', '.bmp', know_ext_name = ['.jpg', '.mp4', '.png', '.mov', '.txt', '.jpeg', '.m4v', '.flv', '.url', '.db', '.avi', '.mkv', '.bmp',
'.ini', '.doc', '.docx', '.ogg', '.wmv', '.gif', '.ts', '.mts', '.iso', '.mpg', '.webp', '.heic', '.ini', '.doc', '.docx', '.ogg', '.wmv', '.gif', '.ts', '.mts', '.iso', '.mpg', '.webp', '.heic',
'.livp', '.ppt', '.mp3', '.htm', '.jfif', '.webm', '.3gp', '.m4a', '.rmvb', '.rm', '.asf', '.f4v', '.livp', '.ppt', '.mp3', '.htm', '.jfif', '.webm', '.3gp', '.m4a', '.rmvb', '.rm', '.asf', '.f4v',
'.mpeg', '.torrent', '.tiff'] '.mpeg', '.torrent', '.tiff', '.wav', '.vob', '.cr2', '.CR2', '.srt', '.apk']
handle_zip_ext_name = ['.zip', '.7z', '.001', '.rar', '.tar', '.wim'] handle_zip_ext_name = ['.zip', '.7z', '.001', '.rar', '.tar', '.wim']
@ -19,7 +19,7 @@ re_ext_list = {'.7z': r'(\.7z.+?$)',
clear_list = { clear_list = {
'file_name': ['ds_store'], 'file_name': ['ds_store'],
'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk', '.txt'], 'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk', '.txt', '.htm', '.xltd', '.url', '.srt', '.apk'],
} }
pwd_dict = { pwd_dict = {
@ -71,4 +71,14 @@ ext_name_list = {'.7': '.7z',
'.c': '.zip', '.c': '.zip',
'.word': '.rar', '.word': '.rar',
'.7删除z': '.7z', '.7删除z': '.7z',
'.7z1': '.7z',
'.7z删除': '.7z',
'.7z(1)': '.7z',
'.zip删除': '.zip',
'.rar删除': '.rar',
'.001删除': '.001',
'.7z删除汉字再解压一次': '.7z',
'.7z删除中文': '.7z',
'.7删z': '.7z',
'.downloading': '.7z',
} }

20
db.py

@ -27,7 +27,7 @@ class DbAction:
return _pwd return _pwd
def get_data_by_id(self, _id): def get_data_by_id(self, _id):
SELECT_SQL = "SELECT * FROM scrapyh s WHERE s.id = %s;" SELECT_SQL = "SELECT `id`, cate, `date`, name, unzip_pwd FROM scrapyh s WHERE s.id = %s;"
self.cursor.execute(SELECT_SQL, (_id,)) self.cursor.execute(SELECT_SQL, (_id,))
result = self.cursor.fetchone() result = self.cursor.fetchone()
if result: if result:
@ -35,12 +35,14 @@ class DbAction:
return result return result
def get_available_pwd(self): def get_available_pwd(self):
SELECT_SQL = "SELECT * FROM scrapyh_pwd;" SELECT_SQL = "SELECT * FROM scrapyh_pwd sp ORDER BY sp.times DESC LIMIT 20;"
# SELECT_SQL = "SELECT * FROM scrapyh_pwd;"
self.cursor.execute(SELECT_SQL) self.cursor.execute(SELECT_SQL)
result = self.cursor.fetchall() result = self.cursor.fetchall()
return [r['pwd'] for r in result] return [r['pwd'] for r in result]
def insert_pwd(self, _pwd): def insert_pwd(self, _pwd):
if _pwd:
SELECT_SQL = "SELECT * FROM scrapyh_pwd sp WHERE sp.pwd = %s;" SELECT_SQL = "SELECT * FROM scrapyh_pwd sp WHERE sp.pwd = %s;"
self.cursor.execute(SELECT_SQL, (_pwd,)) self.cursor.execute(SELECT_SQL, (_pwd,))
result = self.cursor.fetchone() result = self.cursor.fetchone()
@ -52,3 +54,17 @@ class DbAction:
except Exception as e: except Exception as e:
print(e) print(e)
self.conn.rollback() self.conn.rollback()
def update_pwd(self, _pwd):
UPDATE_SQL = "UPDATE scrapyh_pwd sp SET sp.times = sp.times+1 WHERE sp.pwd = %s;"
self.cursor.execute(UPDATE_SQL, (_pwd,))
self.conn.commit()
def get_failed(self, _id_list):
id_list_str = ', '.join(_id_list)
SELECT_SQL = "SELECT `id`, url, unzip_pwd FROM scrapyh s WHERE s.id IN (%s);"
self.cursor.execute(SELECT_SQL, (id_list_str,))
result = self.cursor.fetchall()
if result:
for r in result:
print(r)

@ -30,7 +30,7 @@ class FilesUnzip:
# 目标根目录,返回根目录下全部文件夹的列表 # 目标根目录,返回根目录下全部文件夹的列表
def get_root_folder_list(self): def get_root_folder_list(self):
return list(os.walk(self.root_path))[0][1] return sorted(list(os.walk(self.root_path))[0][1])
# 目标文件, 获取文件扩展名, 返回 文件路径+文件名, 扩展名 # 目标文件, 获取文件扩展名, 返回 文件路径+文件名, 扩展名
@staticmethod @staticmethod
@ -46,6 +46,8 @@ class FilesUnzip:
new = self.change_to_know_name(_f) new = self.change_to_know_name(_f)
_, ext_new = self.get_ext_name(new) _, ext_new = self.get_ext_name(new)
if ext_new in handle_zip_ext_name: if ext_new in handle_zip_ext_name:
if self.del_small_zip_file(new):
continue
new_file_list['handle_zip'].append(new) new_file_list['handle_zip'].append(new)
new_file_list['zip'].append(new) new_file_list['zip'].append(new)
elif ext_new in know_zip_ext_name: elif ext_new in know_zip_ext_name:
@ -64,6 +66,7 @@ class FilesUnzip:
for unknown_ext, know_ext in ext_name_list.items(): for unknown_ext, know_ext in ext_name_list.items():
if ext_file_name == unknown_ext: if ext_file_name == unknown_ext:
new_file_name = base_file_name + know_ext new_file_name = base_file_name + know_ext
log_info(f'修改未知扩展名: {_file} -> {new_file_name}')
file_rename(_file, new_file_name) file_rename(_file, new_file_name)
return new_file_name return new_file_name
return _file return _file
@ -73,8 +76,17 @@ class FilesUnzip:
def del_all_files(path_list): def del_all_files(path_list):
for path in path_list: for path in path_list:
result = os.system(f'del "{path}"') result = os.system(f'del "{path}"')
# logger.info(f"删除文件成功: {path}") log_info(f"删除文件{'成功' if result == '0' else '失败'}{path}")
log_info(f"删除文件{'成功' if result else '失败'}{path}")
# 删除小文件
@staticmethod
def del_small_zip_file(_path):
if os.path.getsize(_path) < 1024000:
log_info(f"无效文件小于1MB,将被删除 {_path}")
result = os.system(f'del "{_path}"')
log_info(f"删除文件{'成功' if result == '0' else '失败'}{_path}")
return True
return False
# 获取可删除文件列表 # 获取可删除文件列表
# def get_del_files(self, _path): # def get_del_files(self, _path):
@ -156,12 +168,14 @@ class FilesCollection:
for move in self.get_move_files(): for move in self.get_move_files():
# print(move) # print(move)
if not os.path.exists(os.path.dirname(move[1])): if not os.path.exists(os.path.dirname(move[1])):
log_info(f'创建文件夹: {os.path.dirname(move[1])}')
os.makedirs(os.path.dirname(move[1])) os.makedirs(os.path.dirname(move[1]))
log_info(f'移动文件:{move[0]} -> {move[1]}')
shutil.move(move[0], move[1]) shutil.move(move[0], move[1])
# 获取一个空文件夹 # 获取一个空文件夹, 如果是空文件夹则返回路径,否则返回False
@staticmethod @staticmethod
def get_empty(_path): def is_empty(_path):
tree = list(os.walk(_path)) tree = list(os.walk(_path))
empty_list = [] empty_list = []
for leaf in tree: for leaf in tree:
@ -171,18 +185,30 @@ class FilesCollection:
# 清除全部空文件夹 # 清除全部空文件夹
def remove_empty(self): def remove_empty(self):
while _empty := self.get_empty(self.path): while _empty := self.is_empty(self.path):
# print(_empty) # print(_empty)
log_info(f'移除空文件夹:{_empty}')
os.system(f"attrib -r {_empty}") os.system(f"attrib -r {_empty}")
os.removedirs(_empty) os.removedirs(_empty)
# 如果根目录是空的话就移除根目录并返回True
def remove_empty_root_folder(self):
if _empty := self.is_empty(self.path):
log_info(f'移除空的根目录:{_empty}')
os.system(f"attrib -r {_empty}")
os.removedirs(_empty)
return True
return False
# 重命名根路径的文件夹, 如果只有一个文件就把文件重命名 # 重命名根路径的文件夹, 如果只有一个文件就把文件重命名
def rename_root_folder(self, _root, name, _org, _target): def rename_root_folder(self, _root, name, _org, _target):
if len(file := get_all_files(_org)) == 1: if len(file := get_all_files(_org)) == 1:
ext = os.path.splitext(file[0])[1] ext = os.path.splitext(file[0])[1]
log_info(f'重命名:{file[0]} -> {os.path.join(_root, name + ext)}')
file_rename(file[0], os.path.join(_root, name + ext)) file_rename(file[0], os.path.join(_root, name + ext))
self.remove_empty() self.remove_empty()
else: else:
log_info(f'重命名:{_org} -> {_target}')
file_rename(_org, _target) file_rename(_org, _target)
@ -198,4 +224,6 @@ if __name__ == '__main__':
# all_file = files.get_all_files(os.path.join(root, f)) # all_file = files.get_all_files(os.path.join(root, f))
# print(files.clear_files(all_file)) # print(files.clear_files(all_file))
# print(files.collection_files(r"F:\Temp\sjry\hj\35316")) # print(files.collection_files(r"F:\Temp\sjry\hj\35316"))
print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345'))) # print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345')))
col_obj = FilesCollection(r'F:\Temp\leshe\leshe_20230810\35480')
print(col_obj.remove_empty_root_folder())

@ -1,10 +1,12 @@
import os.path import os.path
from time import sleep
from log import logger from log import logger
import unzip import unzip
import file import file
import db import db
root_path = r'F:\Temp\sjry\hj' root_path = r'F:\Temp\leshe_20240101'
# 初始化数据库 # 初始化数据库
db_obj = db.DbAction() db_obj = db.DbAction()
@ -12,59 +14,102 @@ file_obj = file.FilesUnzip(root_path)
unzip_obj = unzip.UnzipFile() unzip_obj = unzip.UnzipFile()
# 初始化成功和失败任务列表 # 初始化成功和失败任务列表
unzip_succeed, unzip_failed = [], [] unzip_succeed, unzip_failed, unknown, none_unzip_pwd = [], [], [], []
# 开始任务 def unzip_task(folder):
def start_unzip_task():
result = True result = True
for folder in (set(file_obj.get_root_folder_list()) - set(unzip_succeed)):
logger.info(f'开始解压 {folder}')
all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果 all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果
print(all_file) logger.info(all_file)
# print(all_file)
if not all_file['handle_zip'] and not all_file['zip'] and not all_file['others'] and not all_file['unknown']:
file_col_obj = file.FilesCollection(os.path.join(root_path, folder))
file_col_obj.remove_empty_root_folder()
unzip_failed.append(folder) if folder not in unzip_failed else ''
return False
# 从数据库中获取数据 # 从数据库中获取数据
data = db_obj.get_data_by_id(folder) data = db_obj.get_data_by_id(folder)
print(data) print(data)
if not data:
logger.info(f'{folder} 在数据库中不存在或以处理完成,请检查')
unzip_failed.append(folder) if folder not in unzip_failed else ''
return False
if not data['unzip_pwd']:
logger.info(f'解压密码缺失:{folder}')
none_unzip_pwd.append(folder) if folder not in none_unzip_pwd else ''
unzip_failed.append(folder) if folder not in unzip_failed else ''
return False
if data and all_file['handle_zip']: if data and all_file['handle_zip']:
# 解压, 增加适用历史密码重试的功能 # 解压, 增加适用历史密码重试的功能
if unzip_obj.unzip(all_file['handle_zip'][0], data['unzip_pwd']): result_pass = []
file_obj.del_all_files(all_file['zip']) result_fail = []
for unzip_file in all_file['handle_zip']:
logger.info(f"使用密码{data['unzip_pwd']}解压{unzip_file}")
if unzip_obj.unzip(unzip_file, data['unzip_pwd']):
result_pass.append(unzip_file)
else: else:
print('password') result_fail.append(unzip_file)
print(db_obj.get_available_pwd())
if len(result_pass) == len(all_file['handle_zip']):
logger.info(f"全部解压成功,删除全部压缩文件")
file_obj.del_all_files(all_file['zip'])
return
for pwd in db_obj.get_available_pwd(): for pwd in db_obj.get_available_pwd():
result = unzip_obj.unzip(all_file['handle_zip'][0], pwd) logger.info(f'使用历史密码{pwd}重新解压')
if result: for index, unzip_failed_file in enumerate(result_fail):
if unzip_obj.unzip(unzip_failed_file, pwd):
db_obj.update_pwd(pwd)
result_pass.append(unzip_failed_file)
result_fail.pop(index)
logger.info(f"部分解压成功,删除全部压缩文件")
file_obj.del_all_files(all_file['zip']) file_obj.del_all_files(all_file['zip'])
break
# 重新获取文件检查解压结果 # 重新获取文件检查解压结果
all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果 all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果
print(all_file) # print(all_file)
# 检查没哟解压的文件 # 检查没有解压的文件
if all_file['handle_zip'] or all_file['zip']: if all_file['handle_zip'] or all_file['zip']:
logger.info(f'{folder} 中依然存在没有解压的文件,请检查') logger.info(f'{folder} 中依然存在没有解压的文件,请检查')
unzip_failed.append(folder) if folder not in unzip_failed else '' unzip_failed.append(folder) if folder not in unzip_failed else ''
result = False result = False
# 检查未知文件
elif all_file['unknown']:
logger.info("打印没有处理的文件扩展名:")
logger.info(', '.join(all_file['unknown']))
unzip_failed.append(folder) if folder not in unzip_failed else ''
result = False
# 检查打印结果 # 检查打印结果
if not all_file['handle_zip'] and not all_file['zip'] and not all_file['unknown']: if not all_file['handle_zip'] and not all_file['zip'] and not all_file['unknown']:
db_obj.insert_pwd(data['unzip_pwd']) db_obj.insert_pwd(data['unzip_pwd'])
unzip_succeed.append(folder) if folder not in unzip_succeed else '' unzip_succeed.append(folder) if folder not in unzip_succeed else ''
unzip_failed.remove(folder) if folder in unzip_failed else ''
logger.info('全部文件已解压') logger.info('全部文件已解压')
# 检查未知文件
if all_file['unknown']:
logger.info("打印没有处理的文件扩展名:")
logger.info(', '.join(all_file['unknown']))
unzip_failed.append(folder) if folder not in unzip_failed else ''
unknown.append(folder) if folder not in unknown else ''
result = False
return result return result
# 开始任务
def start_unzip_task():
for folder in sorted(list(set(file_obj.get_root_folder_list()) - set(unzip_succeed))):
logger.info(f'开始解压 {folder}')
n = 1
# unzip_result = False
while n <= 5:
logger.info(f'{n}轮解压任务')
unzip_result = unzip_task(folder)
if unzip_result:
break
n += 1
# 整理文件夹 # 整理文件夹
def start_collation_task(): def start_collation_task():
logger.info('开始整理文件夹') logger.info('开始整理文件夹')
@ -73,6 +118,8 @@ def start_collation_task():
file_col_obj = file.FilesCollection(folder_path) file_col_obj = file.FilesCollection(folder_path)
if db_obj.get_data_by_id(folder) and folder not in unzip_failed: if db_obj.get_data_by_id(folder) and folder not in unzip_failed:
logger.info(f'开始整理 {folder}') logger.info(f'开始整理 {folder}')
if file_col_obj.remove_empty_root_folder():
continue
# 清除多余的文件 # 清除多余的文件
file_col_obj.clear_files() file_col_obj.clear_files()
# 整理无效文件夹 # 整理无效文件夹
@ -82,23 +129,27 @@ def start_collation_task():
# 从数据库中获取数据 # 从数据库中获取数据
data = db_obj.get_data_by_id(folder) data = db_obj.get_data_by_id(folder)
# 重命名文件夹 # 重命名文件夹
name = str(data['id']) + '_' + data['name'] name = 'id' + str(data['id']) + '_' + data['name']
file_col_obj.rename_root_folder(root_path, name, folder_path, os.path.join(root_path, name)) file_col_obj.rename_root_folder(root_path, name, folder_path, os.path.join(root_path, name))
def pc_sleep(delay):
sleep(delay)
os.system("rundll32.exe powrprof.dll,SetSuspendState Sleep")
def main(): def main():
n = 1 start_unzip_task()
# unzip_result = False
while n <= 5:
logger.info(f'{n}轮解压任务')
unzip_result = start_unzip_task()
if unzip_result:
break
n += 1
# if unzip_result: # if unzip_result:
start_collation_task() start_collation_task()
logger.info('失败的任务:') logger.info('解压失败的任务:')
logger.info(', '.join(unzip_failed)) logger.info(', '.join(unzip_failed))
db_obj.get_failed(unzip_failed)
logger.info('包含未知文件:')
logger.info(', '.join(unknown))
logger.info('解压密码缺失:')
logger.info(', '.join(none_unzip_pwd))
# pc_sleep(10)
if __name__ == '__main__': if __name__ == '__main__':

@ -35,10 +35,10 @@ class UnzipFile:
@staticmethod @staticmethod
def unzip_7z(source, password=""): def unzip_7z(source, password=""):
# 准备参数 # 准备参数
cmd = '7z.exe x ' cmd = '7z.exe x -y '
args = '-o' args = '-o'
if password: if password:
pwd = '-p' + str(password) pwd = '-p"' + str(password) + '"'
else: else:
pwd = '' pwd = ''
target = os.path.splitext(source)[0] + '\\' target = os.path.splitext(source)[0] + '\\'

Loading…
Cancel
Save