修复多压缩文件的bug

main
roger_home_pc 11 months ago
parent cd996e96d0
commit 9d3986d5ac
  1. 14
      data_dict.py
  2. 42
      db.py
  3. 42
      file.py
  4. 159
      main.py
  5. 4
      unzip.py

@ -1,7 +1,7 @@
know_ext_name = ['.jpg', '.mp4', '.png', '.mov', '.txt', '.jpeg', '.m4v', '.flv', '.url', '.db', '.avi', '.mkv', '.bmp',
'.ini', '.doc', '.docx', '.ogg', '.wmv', '.gif', '.ts', '.mts', '.iso', '.mpg', '.webp', '.heic',
'.livp', '.ppt', '.mp3', '.htm', '.jfif', '.webm', '.3gp', '.m4a', '.rmvb', '.rm', '.asf', '.f4v',
'.mpeg', '.torrent', '.tiff']
'.mpeg', '.torrent', '.tiff', '.wav', '.vob', '.cr2', '.CR2', '.srt', '.apk']
handle_zip_ext_name = ['.zip', '.7z', '.001', '.rar', '.tar', '.wim']
@ -19,7 +19,7 @@ re_ext_list = {'.7z': r'(\.7z.+?$)',
clear_list = {
'file_name': ['ds_store'],
'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk', '.txt'],
'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk', '.txt', '.htm', '.xltd', '.url', '.srt', '.apk'],
}
pwd_dict = {
@ -71,4 +71,14 @@ ext_name_list = {'.7': '.7z',
'.c': '.zip',
'.word': '.rar',
'.7删除z': '.7z',
'.7z1': '.7z',
'.7z删除': '.7z',
'.7z(1)': '.7z',
'.zip删除': '.zip',
'.rar删除': '.rar',
'.001删除': '.001',
'.7z删除汉字再解压一次': '.7z',
'.7z删除中文': '.7z',
'.7删z': '.7z',
'.downloading': '.7z',
}

42
db.py

@ -27,7 +27,7 @@ class DbAction:
return _pwd
def get_data_by_id(self, _id):
SELECT_SQL = "SELECT * FROM scrapyh s WHERE s.id = %s;"
SELECT_SQL = "SELECT `id`, cate, `date`, name, unzip_pwd FROM scrapyh s WHERE s.id = %s;"
self.cursor.execute(SELECT_SQL, (_id,))
result = self.cursor.fetchone()
if result:
@ -35,20 +35,36 @@ class DbAction:
return result
def get_available_pwd(self):
SELECT_SQL = "SELECT * FROM scrapyh_pwd;"
SELECT_SQL = "SELECT * FROM scrapyh_pwd sp ORDER BY sp.times DESC LIMIT 20;"
# SELECT_SQL = "SELECT * FROM scrapyh_pwd;"
self.cursor.execute(SELECT_SQL)
result = self.cursor.fetchall()
return [r['pwd'] for r in result]
def insert_pwd(self, _pwd):
SELECT_SQL = "SELECT * FROM scrapyh_pwd sp WHERE sp.pwd = %s;"
self.cursor.execute(SELECT_SQL, (_pwd,))
result = self.cursor.fetchone()
if not result:
try:
INSERT_SQL = "INSERT INTO scrapyh_pwd (pwd) VALUES (%s);"
self.cursor.execute(INSERT_SQL, (_pwd,))
self.conn.commit()
except Exception as e:
print(e)
self.conn.rollback()
if _pwd:
SELECT_SQL = "SELECT * FROM scrapyh_pwd sp WHERE sp.pwd = %s;"
self.cursor.execute(SELECT_SQL, (_pwd,))
result = self.cursor.fetchone()
if not result:
try:
INSERT_SQL = "INSERT INTO scrapyh_pwd (pwd) VALUES (%s);"
self.cursor.execute(INSERT_SQL, (_pwd,))
self.conn.commit()
except Exception as e:
print(e)
self.conn.rollback()
def update_pwd(self, _pwd):
UPDATE_SQL = "UPDATE scrapyh_pwd sp SET sp.times = sp.times+1 WHERE sp.pwd = %s;"
self.cursor.execute(UPDATE_SQL, (_pwd,))
self.conn.commit()
def get_failed(self, _id_list):
id_list_str = ', '.join(_id_list)
SELECT_SQL = "SELECT `id`, url, unzip_pwd FROM scrapyh s WHERE s.id IN (%s);"
self.cursor.execute(SELECT_SQL, (id_list_str,))
result = self.cursor.fetchall()
if result:
for r in result:
print(r)

@ -30,7 +30,7 @@ class FilesUnzip:
# 目标根目录,返回根目录下全部文件夹的列表
def get_root_folder_list(self):
return list(os.walk(self.root_path))[0][1]
return sorted(list(os.walk(self.root_path))[0][1])
# 目标文件, 获取文件扩展名, 返回 文件路径+文件名, 扩展名
@staticmethod
@ -46,6 +46,8 @@ class FilesUnzip:
new = self.change_to_know_name(_f)
_, ext_new = self.get_ext_name(new)
if ext_new in handle_zip_ext_name:
if self.del_small_zip_file(new):
continue
new_file_list['handle_zip'].append(new)
new_file_list['zip'].append(new)
elif ext_new in know_zip_ext_name:
@ -64,6 +66,7 @@ class FilesUnzip:
for unknown_ext, know_ext in ext_name_list.items():
if ext_file_name == unknown_ext:
new_file_name = base_file_name + know_ext
log_info(f'修改未知扩展名: {_file} -> {new_file_name}')
file_rename(_file, new_file_name)
return new_file_name
return _file
@ -73,8 +76,17 @@ class FilesUnzip:
def del_all_files(path_list):
for path in path_list:
result = os.system(f'del "{path}"')
# logger.info(f"删除文件成功: {path}")
log_info(f"删除文件{'成功' if result else '失败'}{path}")
log_info(f"删除文件{'成功' if result == '0' else '失败'}{path}")
# 删除小文件
@staticmethod
def del_small_zip_file(_path):
if os.path.getsize(_path) < 1024000:
log_info(f"无效文件小于1MB,将被删除 {_path}")
result = os.system(f'del "{_path}"')
log_info(f"删除文件{'成功' if result == '0' else '失败'}{_path}")
return True
return False
# 获取可删除文件列表
# def get_del_files(self, _path):
@ -156,12 +168,14 @@ class FilesCollection:
for move in self.get_move_files():
# print(move)
if not os.path.exists(os.path.dirname(move[1])):
log_info(f'创建文件夹: {os.path.dirname(move[1])}')
os.makedirs(os.path.dirname(move[1]))
log_info(f'移动文件:{move[0]} -> {move[1]}')
shutil.move(move[0], move[1])
# 获取一个空文件夹
# 获取一个空文件夹, 如果是空文件夹则返回路径,否则返回False
@staticmethod
def get_empty(_path):
def is_empty(_path):
tree = list(os.walk(_path))
empty_list = []
for leaf in tree:
@ -171,18 +185,30 @@ class FilesCollection:
# 清除全部空文件夹
def remove_empty(self):
while _empty := self.get_empty(self.path):
while _empty := self.is_empty(self.path):
# print(_empty)
log_info(f'移除空文件夹:{_empty}')
os.system(f"attrib -r {_empty}")
os.removedirs(_empty)
# 如果根目录是空的话就移除根目录并返回True
def remove_empty_root_folder(self):
if _empty := self.is_empty(self.path):
log_info(f'移除空的根目录:{_empty}')
os.system(f"attrib -r {_empty}")
os.removedirs(_empty)
return True
return False
# 重命名根路径的文件夹, 如果只有一个文件就把文件重命名
def rename_root_folder(self, _root, name, _org, _target):
if len(file := get_all_files(_org)) == 1:
ext = os.path.splitext(file[0])[1]
log_info(f'重命名:{file[0]} -> {os.path.join(_root, name + ext)}')
file_rename(file[0], os.path.join(_root, name + ext))
self.remove_empty()
else:
log_info(f'重命名:{_org} -> {_target}')
file_rename(_org, _target)
@ -198,4 +224,6 @@ if __name__ == '__main__':
# all_file = files.get_all_files(os.path.join(root, f))
# print(files.clear_files(all_file))
# print(files.collection_files(r"F:\Temp\sjry\hj\35316"))
print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345')))
# print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345')))
col_obj = FilesCollection(r'F:\Temp\leshe\leshe_20230810\35480')
print(col_obj.remove_empty_root_folder())

@ -1,10 +1,12 @@
import os.path
from time import sleep
from log import logger
import unzip
import file
import db
root_path = r'F:\Temp\sjry\hj'
root_path = r'F:\Temp\leshe_20240101'
# 初始化数据库
db_obj = db.DbAction()
@ -12,59 +14,102 @@ file_obj = file.FilesUnzip(root_path)
unzip_obj = unzip.UnzipFile()
# 初始化成功和失败任务列表
unzip_succeed, unzip_failed = [], []
unzip_succeed, unzip_failed, unknown, none_unzip_pwd = [], [], [], []
# 开始任务
def start_unzip_task():
def unzip_task(folder):
result = True
for folder in (set(file_obj.get_root_folder_list()) - set(unzip_succeed)):
logger.info(f'开始解压 {folder}')
all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果
print(all_file)
all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果
logger.info(all_file)
# print(all_file)
if not all_file['handle_zip'] and not all_file['zip'] and not all_file['others'] and not all_file['unknown']:
file_col_obj = file.FilesCollection(os.path.join(root_path, folder))
file_col_obj.remove_empty_root_folder()
unzip_failed.append(folder) if folder not in unzip_failed else ''
return False
# 从数据库中获取数据
data = db_obj.get_data_by_id(folder)
print(data)
if not data:
logger.info(f'{folder} 在数据库中不存在或以处理完成,请检查')
unzip_failed.append(folder) if folder not in unzip_failed else ''
return False
if not data['unzip_pwd']:
logger.info(f'解压密码缺失:{folder}')
none_unzip_pwd.append(folder) if folder not in none_unzip_pwd else ''
unzip_failed.append(folder) if folder not in unzip_failed else ''
return False
if data and all_file['handle_zip']:
# 解压, 增加适用历史密码重试的功能
result_pass = []
result_fail = []
for unzip_file in all_file['handle_zip']:
logger.info(f"使用密码{data['unzip_pwd']}解压{unzip_file}")
if unzip_obj.unzip(unzip_file, data['unzip_pwd']):
result_pass.append(unzip_file)
else:
result_fail.append(unzip_file)
# 从数据库中获取数据
data = db_obj.get_data_by_id(folder)
print(data)
if len(result_pass) == len(all_file['handle_zip']):
logger.info(f"全部解压成功,删除全部压缩文件")
file_obj.del_all_files(all_file['zip'])
return
if data and all_file['handle_zip']:
# 解压, 增加适用历史密码重试的功能
if unzip_obj.unzip(all_file['handle_zip'][0], data['unzip_pwd']):
file_obj.del_all_files(all_file['zip'])
else:
print('password')
print(db_obj.get_available_pwd())
for pwd in db_obj.get_available_pwd():
result = unzip_obj.unzip(all_file['handle_zip'][0], pwd)
if result:
file_obj.del_all_files(all_file['zip'])
break
# 重新获取文件检查解压结果
all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果
print(all_file)
# 检查没哟解压的文件
if all_file['handle_zip'] or all_file['zip']:
logger.info(f'{folder} 中依然存在没有解压的文件,请检查')
unzip_failed.append(folder) if folder not in unzip_failed else ''
result = False
for pwd in db_obj.get_available_pwd():
logger.info(f'使用历史密码{pwd}重新解压')
for index, unzip_failed_file in enumerate(result_fail):
if unzip_obj.unzip(unzip_failed_file, pwd):
db_obj.update_pwd(pwd)
result_pass.append(unzip_failed_file)
result_fail.pop(index)
logger.info(f"部分解压成功,删除全部压缩文件")
file_obj.del_all_files(all_file['zip'])
# 重新获取文件检查解压结果
all_file = file_obj.get_cate_files(os.path.join(root_path, folder)) # 整理文件返回整理后的结果
# print(all_file)
# 检查没有解压的文件
if all_file['handle_zip'] or all_file['zip']:
logger.info(f'{folder} 中依然存在没有解压的文件,请检查')
unzip_failed.append(folder) if folder not in unzip_failed else ''
result = False
# 检查打印结果
if not all_file['handle_zip'] and not all_file['zip'] and not all_file['unknown']:
db_obj.insert_pwd(data['unzip_pwd'])
unzip_succeed.append(folder) if folder not in unzip_succeed else ''
unzip_failed.remove(folder) if folder in unzip_failed else ''
logger.info('全部文件已解压')
# 检查未知文件
elif all_file['unknown']:
logger.info("打印没有处理的文件扩展名:")
logger.info(', '.join(all_file['unknown']))
unzip_failed.append(folder) if folder not in unzip_failed else ''
result = False
# 检查打印结果
if not all_file['handle_zip'] and not all_file['zip'] and not all_file['unknown']:
db_obj.insert_pwd(data['unzip_pwd'])
unzip_succeed.append(folder) if folder not in unzip_succeed else ''
logger.info('全部文件已解压')
if all_file['unknown']:
logger.info("打印没有处理的文件扩展名:")
logger.info(', '.join(all_file['unknown']))
unzip_failed.append(folder) if folder not in unzip_failed else ''
unknown.append(folder) if folder not in unknown else ''
result = False
return result
# 开始任务
def start_unzip_task():
for folder in sorted(list(set(file_obj.get_root_folder_list()) - set(unzip_succeed))):
logger.info(f'开始解压 {folder}')
n = 1
# unzip_result = False
while n <= 5:
logger.info(f'{n}轮解压任务')
unzip_result = unzip_task(folder)
if unzip_result:
break
n += 1
# 整理文件夹
def start_collation_task():
logger.info('开始整理文件夹')
@ -73,6 +118,8 @@ def start_collation_task():
file_col_obj = file.FilesCollection(folder_path)
if db_obj.get_data_by_id(folder) and folder not in unzip_failed:
logger.info(f'开始整理 {folder}')
if file_col_obj.remove_empty_root_folder():
continue
# 清除多余的文件
file_col_obj.clear_files()
# 整理无效文件夹
@ -82,23 +129,27 @@ def start_collation_task():
# 从数据库中获取数据
data = db_obj.get_data_by_id(folder)
# 重命名文件夹
name = str(data['id']) + '_' + data['name']
name = 'id' + str(data['id']) + '_' + data['name']
file_col_obj.rename_root_folder(root_path, name, folder_path, os.path.join(root_path, name))
def pc_sleep(delay):
sleep(delay)
os.system("rundll32.exe powrprof.dll,SetSuspendState Sleep")
def main():
n = 1
# unzip_result = False
while n <= 5:
logger.info(f'{n}轮解压任务')
unzip_result = start_unzip_task()
if unzip_result:
break
n += 1
start_unzip_task()
# if unzip_result:
start_collation_task()
logger.info('失败的任务:')
logger.info('解压失败的任务:')
logger.info(', '.join(unzip_failed))
db_obj.get_failed(unzip_failed)
logger.info('包含未知文件:')
logger.info(', '.join(unknown))
logger.info('解压密码缺失:')
logger.info(', '.join(none_unzip_pwd))
# pc_sleep(10)
if __name__ == '__main__':

@ -35,10 +35,10 @@ class UnzipFile:
@staticmethod
def unzip_7z(source, password=""):
# 准备参数
cmd = '7z.exe x '
cmd = '7z.exe x -y '
args = '-o'
if password:
pwd = '-p' + str(password)
pwd = '-p"' + str(password) + '"'
else:
pwd = ''
target = os.path.splitext(source)[0] + '\\'

Loading…
Cancel
Save