import os import win32con import win32api from getFiles import get_file_list, remove_empty_folder from getMd5 import get_file_md5 from db import * from time import time as t # sjry # disk = "Data_sjry" # cate = "sjry" # path = r"F:\Temp\sjry\2023-11" # leshe disk = "Data_leshe" cate = "leshe" path = r"F:\Temp\leshe\2023-12" # # hj # disk = "Data_hj" # cate = "hj" # path = r"F:\Temp\hj\2023-12" def main_process(): _exist_file_list = [] if disk == "" or cate == "" or path == "": print("检查参数") return print(f"开始获取文件列表:{t()}") file_list = get_file_list(path) print(f"结束获取文件列表:{t()}") unchecked_list = [] cate_file_list = get_path_by_cate(cate) total_len = len(file_list) for (index, file) in enumerate(file_list): if file[2:] in cate_file_list: print(f"{index}/{total_len}文件已经存在: {file}") else: print(f"{index}/{total_len}未处理文件: {file}") unchecked_list.append(file) file_list_len = len(unchecked_list) for (i, f) in enumerate(unchecked_list): print(f"({i + 1}/{file_list_len}) - 开始处理: {f} - {t()}") t_start = t() md5 = get_file_md5(f) print(f"完成MD5计算耗时:{t() - t_start}") if exist := is_exist(md5): _exist_file_list.append(f) print(f"重复文件!!!md5为({md5})的文件已经存在,文件路径:{exist['path']} -> {f}") continue insert_data((cate, disk, f, md5)) print(f"文件的md5({md5})成功插入: {f}") return _exist_file_list def handle_exist(_exist_file_list): if len(_exist_file_list) > 0: with open('dup.txt', 'w', encoding='utf-8') as dup_file: for ef in _exist_file_list: dup_file.write(ef + "\r\n") win32api.SetFileAttributes(ef, win32con.FILE_ATTRIBUTE_NORMAL) os.remove(ef) print(f"删除: {ef}") dup_file.close() else: print("没有找到重复文件") if __name__ == "__main__": exist_file_list = main_process() handle_exist(exist_file_list) remove_empty_folder(path)