优化重复文件处理

main
roger_home_pc 1 year ago
parent ea0d80aaec
commit 62085db022
  1. 21
      getFiles.py
  2. 39
      main.py

@ -11,6 +11,27 @@ def get_file_list(_path):
return file_list return file_list
def get_empty_folder_list(path):
folder_list = []
for f in os.walk(path):
if len(f[2]) == 0 and len(f[1]) == 0:
folder_list.append(f[0])
return folder_list
def remove_empty_folder(path):
del_folder_list = []
del_folder_list.extend(get_empty_folder_list(path))
while len(del_folder_list) > 0:
for f in del_folder_list:
if os.path.exists(f):
print(f"删除空文件夹 {f}")
os.rmdir(f)
del_folder_list = []
del_folder_list.extend(get_empty_folder_list(path))
if __name__ == "__main__": if __name__ == "__main__":
result = get_file_list(r"E:\Backup\Tools\Normal\浏览器") result = get_file_list(r"E:\Backup\Tools\Normal\浏览器")
for r in result: for r in result:

@ -1,14 +1,20 @@
from getFiles import get_file_list import os
import win32con
import win32api
from getFiles import get_file_list, remove_empty_folder
from getMd5 import get_file_md5 from getMd5 import get_file_md5
from db import * from db import *
from time import time as t from time import time as t
disk = ("Data_sjry"
""
""
"")
cate = "sjry"
path = r"F:\Temp\sjry\2023-10"
def main_process():
disk = "Myxx_Backup_16T_3"
cate = "leshe"
path = r"X:\leshe"
def main_process():
_exist_file_list = [] _exist_file_list = []
if disk == "" or cate == "" or path == "": if disk == "" or cate == "" or path == "":
@ -32,10 +38,10 @@ def main_process():
file_list_len = len(unchecked_list) file_list_len = len(unchecked_list)
for (i, f) in enumerate(unchecked_list): for (i, f) in enumerate(unchecked_list):
print(f"({i+1}/{file_list_len}) - 开始处理: {f} - {t()}") print(f"({i + 1}/{file_list_len}) - 开始处理: {f} - {t()}")
t_start = t() t_start = t()
md5 = get_file_md5(f) md5 = get_file_md5(f)
print(f"完成MD5计算耗时:{t()-t_start}") print(f"完成MD5计算耗时:{t() - t_start}")
if exist := is_exist(md5): if exist := is_exist(md5):
_exist_file_list.append(f) _exist_file_list.append(f)
print(f"重复文件!!!md5为({md5})的文件已经存在,文件路径:{exist['path']} -> {f}") print(f"重复文件!!!md5为({md5})的文件已经存在,文件路径:{exist['path']} -> {f}")
@ -45,13 +51,20 @@ def main_process():
return _exist_file_list return _exist_file_list
if __name__ == "__main__": def handle_exist(_exist_file_list):
exist_file_list = main_process() if len(_exist_file_list) > 0:
if len(exist_file_list) > 0:
with open('dup.txt', 'w', encoding='utf-8') as dup_file: with open('dup.txt', 'w', encoding='utf-8') as dup_file:
for ef in exist_file_list: for ef in _exist_file_list:
print(ef) dup_file.write(ef + "\r\n")
dup_file.write(ef) win32api.SetFileAttributes(ef, win32con.FILE_ATTRIBUTE_NORMAL)
os.remove(ef)
print(f"删除: {ef}")
dup_file.close() dup_file.close()
else: else:
print("没有找到重复文件") print("没有找到重复文件")
if __name__ == "__main__":
exist_file_list = main_process()
handle_exist(exist_file_list)
remove_empty_folder(path)

Loading…
Cancel
Save