|
|
|
import os
|
|
|
|
import win32con
|
|
|
|
import win32api
|
|
|
|
from getFiles import get_file_list, remove_empty_folder
|
|
|
|
from getMd5 import get_file_md5
|
|
|
|
from db import *
|
|
|
|
from time import time as t
|
|
|
|
|
|
|
|
handle_data = [
|
|
|
|
{'disk': "Data_sjry", 'cate': "sjry", 'path': r"F:\Temp\sjry\2023-11"},
|
|
|
|
{'disk': "Data_leshe", 'cate': "leshe", 'path': r"F:\Temp\leshe\2023-12"},
|
|
|
|
{'disk': "Data_hj", 'cate': "hj", 'path': r"F:\Temp\hj\2023-12"},
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def main_process(disk, cate, path):
|
|
|
|
_exist_file_list = []
|
|
|
|
|
|
|
|
if disk == "" or cate == "" or path == "":
|
|
|
|
print("检查参数")
|
|
|
|
return
|
|
|
|
|
|
|
|
print(f"开始获取文件列表:{t()}")
|
|
|
|
file_list = get_file_list(path)
|
|
|
|
print(f"结束获取文件列表:{t()}")
|
|
|
|
|
|
|
|
unchecked_list = []
|
|
|
|
cate_file_list = get_path_by_cate(cate)
|
|
|
|
|
|
|
|
total_len = len(file_list)
|
|
|
|
for (index, file) in enumerate(file_list):
|
|
|
|
if file[2:] in cate_file_list:
|
|
|
|
print(f"{index}/{total_len}文件已经存在: {file}")
|
|
|
|
else:
|
|
|
|
print(f"{index}/{total_len}未处理文件: {file}")
|
|
|
|
unchecked_list.append(file)
|
|
|
|
|
|
|
|
file_list_len = len(unchecked_list)
|
|
|
|
for (i, f) in enumerate(unchecked_list):
|
|
|
|
print(f"({i + 1}/{file_list_len}) - 开始处理: {f} - {t()}")
|
|
|
|
t_start = t()
|
|
|
|
md5 = get_file_md5(f)
|
|
|
|
print(f"完成MD5计算耗时:{t() - t_start}")
|
|
|
|
if exist := is_exist(md5):
|
|
|
|
_exist_file_list.append(f)
|
|
|
|
print(f"重复文件!!!md5为({md5})的文件已经存在,文件路径:{exist['path']} -> {f}")
|
|
|
|
continue
|
|
|
|
insert_data((cate, disk, f, md5))
|
|
|
|
print(f"文件的md5({md5})成功插入: {f}")
|
|
|
|
return _exist_file_list
|
|
|
|
|
|
|
|
|
|
|
|
def handle_exist(_exist_file_list):
|
|
|
|
if len(_exist_file_list) > 0:
|
|
|
|
with open('dup.txt', 'w', encoding='utf-8') as dup_file:
|
|
|
|
for ef in _exist_file_list:
|
|
|
|
dup_file.write(ef + "\r\n")
|
|
|
|
win32api.SetFileAttributes(ef, win32con.FILE_ATTRIBUTE_NORMAL)
|
|
|
|
os.remove(ef)
|
|
|
|
print(f"删除: {ef}")
|
|
|
|
dup_file.close()
|
|
|
|
else:
|
|
|
|
print("没有找到重复文件")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
for d in handle_data:
|
|
|
|
exist_file_list = main_process(d['disk'], d['cate'], d['path'])
|
|
|
|
handle_exist(exist_file_list)
|
|
|
|
remove_empty_folder(d['path'])
|