添加更多文件

3 years ago · 729a3a741d
parent 240e8e56bf
commit 729a3a741d
6 changed files with 403 additions and 0 deletions
--- a/data_dict.py
+++ b/data_dict.py
@ -0,0 +1,73 @@
+know_ext_name = ['.jpg', '.mp4', '.png', '.mov', '.txt', '.jpeg', '.m4v', '.flv', '.url', '.db', '.avi', '.mkv', '.bmp',
+                 '.ini', '.doc', '.docx', '.ogg', '.wmv', '.gif', '.ts', '.mts', '.iso', '.mpg', '.webp', '.heic',
+                 '.livp', '.ppt', '.mp3', '.htm', '.jfif', '.webm', '.3gp', '.m4a', '.rmvb', '.rm', '.asf', '.f4v',
+                 '.mpeg', '.torrent', '.tiff']
+
+handle_zip_ext_name = ['.zip', '.7z', '.001', '.rar', '.tar', '.wim']
+
+know_zip_ext_name = ['.zip', '.z01', '.z02', '.z03', '.z04', '.z05', '.z06', '.z07', '.z08', '.z09', '.z10', '.z11',
+                     '.z12', '.z13', '.z14', '.z15', '.z16', '.z17', '.z18', '.z19', '.z20', '.7z', '.001', '.002',
+                     '.003', '.004', '.005', '.006', '.007', '.008', '.009', '.010', '.011', '.012', '.013', '.014',
+                     '.015', '.016', '.017', '.018', '.019', '.020', '.rar', '.tar', '.wim']
+
+re_ext_list_7z = r'\.7z\.\d{2,3}'
+
+re_ext_list = {'.7z': r'(\.7z.+?$)',
+               '.rar': r'(\.rar.+?$)',
+               '.zip': r'(\.zip.+?$)',
+               }
+
+clear_list = {
+    'file_name': ['ds_store'],
+    'ext_name': ['.ds_store', '.ini', '.html', '.co', '.torrent', '.js', '.downloading', '.lnk'],
+}
+
+pwd_dict = {
+    '福利大吧': ['福利大吧@fulidaba00(1.2.3.N).com'],
+    '2048jp.com': ['https://2048jp.com/', 'https://2048jp.com'],
+    'fulibl.net': ['fulibl.com', 'fulibl.cc', 'fulibl.net'],
+    'vrzydh.com或vrpyf.com或vrkefu@outlook.com': ['vrzydh.com', 'vrpyf.com', 'vrkefu@outlook.com'],
+    'fulizifanji.club 或者fulizifanji.xyz': ['fulizifanji.club', 'fulizifanji.xyz'],
+    'fulizifanji.club或者fulizifanji.xyz': ['fulizifanji.club', 'fulizifanji.xyz']
+}
+
+ext_name_list = {'.7': '.7z',
+                 '.7z111': '.7z',
+                 '.7z12': '.7z',
+                 '.77': '.7z',
+                 '.pdf': '.rar',
+                 '.psd': '.rar',
+                 '.ra': '.rar',
+                 '.zi': '.zip',
+                 '.77z': '.7z',
+                 '.777': '.7z',
+                 '': '.zip',
+                 '.7tar': '.7z',
+                 '.abc1': '.7z',
+                 '.abc2': '.7z',
+                 '.abc3': '.7z',
+                 '.tex': '.7z',
+                 '.mp41': '.mp4',
+                 '.avi1': '.avi',
+                 '.tif': '.zip',
+                 '.lzh': '.zip',
+                 '.下载后改成rar': '.rar',
+                 '.c': '.zip',
+                 '.word': '.rar',
+                 }
+
+special_symbols = {'==1==': '<',
+                   '==2==': '>',
+                   '==3==': '|',
+                   '==4==': '*',
+                   '==5==': '?',
+                   '==6==': '/',
+                   '==7==': '\\',
+                   '==8==': ':',
+                   '==9==': "'",
+                   '==10==': '"',
+                   }
+
+escape_list = [('&amp;', '&'), ('&quot;', '"'), ('&lt;', '<'), ('&gt;', '>'), ('&nbsp;', ' ')]
+
+check_size = {'.mp3': '.zip', }
--- a/file.py
+++ b/file.py
@ -0,0 +1,177 @@
+import json
+import os
+from log import logger, log_info
+from data_dict import ext_name_list, know_zip_ext_name, know_ext_name, handle_zip_ext_name, clear_list
+
+
+class Files:
+
+    def __init__(self, root_path):
+        self.root_path = root_path
+
+    # 目标根目录，返回根目录下全部文件夹的列表
+    def get_root_folder_list(self):
+        return list(os.walk(self.root_path))[0][1]
+
+    # 目标根目录，返回根目录下全部文件夹的列表
+    @staticmethod
+    def get_folder_dict(_path):
+        data_dict = dict()
+        for d in os.walk(_path):
+            data_dict[d[0]] = dict(folders=d[1], files=d[2])
+        return data_dict
+
+    # 目标目录， 获取此目录下的全部文件，未经整理
+    @staticmethod
+    def get_all_files(_path):
+        file_list = []
+        for item in os.walk(_path):
+            if len(item[2]) > 0:
+                for file_name in item[2]:
+                    file_list.append(os.path.join(item[0], file_name))
+        return file_list
+
+    # 目标文件， 重命名文件
+    @staticmethod
+    def file_rename(_org, _target):
+        os.rename(_org, _target)
+
+    # 目标文件， 获取文件扩展名， 返回 文件路径+文件名， 扩展名
+    @staticmethod
+    def get_ext_name(_file):
+        base_name = os.path.splitext(_file)[0]
+        ext_name = os.path.splitext(_file)[1].lower()
+        return base_name, ext_name
+
+    # 目标文件， 将错误的扩展名换成正确的，执行重命名操作，返回新的文件名列表
+    def change_to_know_name(self, _file):
+        base_file_name, ext_file_name = self.get_ext_name(_file)
+        if ext_file_name in know_zip_ext_name:
+            return _file
+        for unknown_ext, know_ext in ext_name_list.items():
+            if ext_file_name == unknown_ext:
+                new_file_name = base_file_name + know_ext
+                self.file_rename(_file, new_file_name)
+                return new_file_name
+        return _file
+
+    # 目标文件， 将文件分类整理返回字典格式文件列表
+    def clear_files(self, files_list):
+        new_file_list = dict(handle_zip=[], zip=[], others=[], unknown=[])
+        for _f in files_list:
+            new = self.change_to_know_name(_f)
+            _, ext_new = self.get_ext_name(new)
+            if ext_new in handle_zip_ext_name:
+                new_file_list['handle_zip'].append(new)
+                new_file_list['zip'].append(new)
+            elif ext_new in know_zip_ext_name:
+                new_file_list['zip'].append(new)
+            elif ext_new in know_ext_name:
+                new_file_list['others'].append(new)
+            else:
+                new_file_list['unknown'].append(new)
+        return new_file_list
+
+    # 获取可删除文件列表
+    def get_del_files(self, _path):
+        del_list = []
+        for _f in self.get_all_files(_path):
+            if os.path.basename(_f) in clear_list['file_name']:
+                del_list.append(_f)
+            if os.path.splitext(_f)[1] in clear_list['ext_name']:
+                del_list.append(_f)
+        return del_list
+
+    # 目标文件， 将文件按照目录整理
+    def collection_files(self, _path):
+        _all_files = self.get_all_files(_path)
+        files_dict = dict(path_files={}, path=[], files=_all_files)
+        for _f in _all_files:
+            file_path = os.path.dirname(_f)
+            if file_path not in files_dict['path_files'].keys():
+                files_dict['path'].append(file_path)
+                files_dict['path_files'][file_path] = [_f]
+            else:
+                files_dict['path_files'][file_path].append(_f)
+        return files_dict
+
+    def get_file_dict(self, _path):
+        for p in os.walk(_path):
+            pass
+
+
+    def move_files(self, _root_path):
+        move_file_list = []
+        _files = self.collection_files(_root_path)
+        if not _files['files']:
+            log_info('文件夹为空')
+        else:
+            if len(_files['files']) == 1:  # 单文件处理
+                source = _files['files'][0]
+                target = os.path.join(_root_path, os.path.basename(source))
+                move_file_list.append((source, target))
+            elif len(_files['path']) == 1:  # 只有一个目录下有多文件处理
+                for _f in _files['files']:
+                    source = _f
+                    target = os.path.join(_root_path, os.path.basename(source))
+                    move_file_list.append((source, target))
+            else:  # 有多个目录，每个目录下有一个或多个文件
+                temp_data = dict()
+                for _p in _files['path']:
+                    for _f in _files['path_files'][_p]:
+                        temp_data[_p] = dict(current=_p, valid='', name=os.path.basename(_f))
+
+    # 寻找不重复的基础路径
+    # 传入的字典格式 {文件路径：{current: 当前路径，valid: 基础路径之后的部分, name: 文件名}}
+    # def get_base_dir(self, file_list):
+    #     base_dir = dict()
+    #     temp = []
+    #     for _f in file_list.keys():
+    #         current = base_dir[_f]['current']
+    #         base_dir[_f]['current'] = os.path.dirname(current)
+    #         base_dir[_f]['valid'] = os.path.basename(current) + '/' + base_dir[_f]['valid']
+    #         temp.append(base_dir[_f]['valid'])
+    #     if len(set(temp)) == len(temp):
+    #         return base_dir
+    #     else:
+    #         self.get_base_dir(base_dir)
+
+    def get_base_dir(self, _path):
+        files_dict = self.get_folder_dict(_path)
+
+        # for _p in check_list:
+        #     if len(files_dict[_p]['folders']) > 1 and len(files_dict[_p]['files']) == 0:
+        #         for _f in files_dict[_p]['folders']:
+        #             os.path.join()
+        #     elif len(files_dict[_path]['folders']) > 1 and len(files_dict[_path]['files']) > 0:
+        #         pass
+        #     elif len(files_dict[_path]['folders']) == 1 and len(files_dict[_path]['files']) == 0:
+        #         pass
+        #     elif len(files_dict[_path]['folders']) == 1 and len(files_dict[_path]['files']) > 0:
+        #         pass
+        #     elif len(files_dict[_path]['folders']) == 0 and len(files_dict[_path]['files']) > 0:
+        #         pass
+        #     else:
+        #         pass
+    # 删除文件
+    @staticmethod
+    def del_file(path_list):
+        for path in path_list:
+            os.system(f'del "{path}"')
+            # logger.info(f"删除文件成功： {path}")
+            log_info(f"删除文件成功： {path}")
+
+
+if __name__ == '__main__':
+    root = r"F:\Temp\sjry\hj"
+    files = Files(root)
+    # print(files.get_root_folder_list())
+    # print(files.get_all_files(r"F:\Temp\sjry\hj\35316"))
+    # files.file_rename('F:\\Temp\\sjry\\hj\\35316\\22\\新建 文本文档.123', 'F:\\Temp\\sjry\\hj\\35316\\22\\新建 文本文档.222')
+    # print(files.get_ext_name('F:\\Temp\\sjry\\hj\\35316\\22\\新建 文本文档.222'))
+    # print(files.change_to_know_name('F:\\Temp\\sjry\\hj\\35316\\22\\新建 文本文档.win'))
+    # for f in files.get_root_folder_list():
+    #     all_file = files.get_all_files(os.path.join(root, f))
+    #     print(files.clear_files(all_file))
+    # print(files.collection_files(r"F:\Temp\sjry\hj\35316"))
+    print(json.dumps(files.get_folder_dict(r'F:\Temp\test\12345')))
--- a/log.py
+++ b/log.py
@ -0,0 +1,26 @@
+import logging
+# import chardet
+from logging import handlers
+from logging import exception
+
+logger = logging.getLogger('file_unzip')
+
+logger.setLevel(logging.DEBUG)
+
+format_str = '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
+f = logging.Formatter(format_str)
+
+hf = handlers.TimedRotatingFileHandler("log_out.log", when='midnight', interval=1, backupCount=7)
+cf = logging.StreamHandler()
+hf.setFormatter(f)
+cf.setFormatter(f)
+logger.addHandler(hf)
+logger.addHandler(cf)
+
+
+def log_info(msg):
+    try:
+        # chardet.detect(msg)
+        logger.info(msg)
+    except exception:
+        logger.info(msg.decode('utf-8').encode('gbk'))
--- a/main.py
+++ b/main.py
@ -0,0 +1,60 @@
+import os.path
+from log import logger
+import unzip
+import file
+import db
+
+root_path = r'F:\Temp\sjry\hj'
+
+# 初始化数据库
+db_obj = db.DbAction()
+file_obj = file.Files(root_path)
+unzip_obj = unzip.UnzipFile()
+
+
+# 开始任务
+def start_unzip_task():
+    result = True
+    for folder in file_obj.get_root_folder_list():
+        all_file = file_obj.get_all_files(os.path.join(root_path, folder))  # 获取文件夹下的全部文件的原始数据
+        all_file = file_obj.clear_files(all_file)  # 整理文件返回整理后的结果
+        print(all_file)
+
+        # 从数据库中获取数据
+        data = db_obj.get_data_by_id(folder)
+
+        # 解压
+        if all_file['handle_zip'] and unzip_obj.unzip(all_file['handle_zip'][0], data['unzip_pwd']):
+            file_obj.del_file(all_file['zip'])
+
+        # 打印未知文件
+        if all_file['unknown']:
+            logger.info("打印没有处理的文件扩展名：")
+            logger.info(', '.join(all_file['unknown']))
+            result = False
+
+        # 检查打印结果
+        if not (all_file['handle_zip'] and all_file['zip'] and all_file['unknown']):
+            logger.info('全部文件已解压')
+    return result
+
+
+def start_collation_task():
+    result = True
+    for folder in file_obj.get_root_folder_list():
+        all_file = file_obj.get_all_files(os.path.join(root_path, folder))  # 获取文件夹下的全部文件的原始数据
+        all_file = file_obj.clear_files(all_file)  # 整理文件返回整理后的结果
+        print(all_file)
+
+
+def main():
+    n = 5
+    while n > 0:
+        result = start_unzip_task()
+        if result:
+            break
+        n -= 1
+
+
+if __name__ == '__main__':
+    main()
--- a/requirements.txt
+++ b/requirements.txt
--- a/unzip.py
+++ b/unzip.py
@ -0,0 +1,67 @@
+import os
+from log import logger, log_info
+
+
+# 需要安装winrar和7z并且配置环境变量
+
+class UnzipFile:
+    def __init__(self):
+        pass
+
+    # 处理zip和rar
+    @staticmethod
+    def unzip_zip(source, password=""):
+        # 准备参数
+        cmd = 'WinRAR.exe e -y -ibck -ilog.\\winrar.log'
+        files = '*.*'
+        args = '-or'
+        if password:
+            pwd = '-hp"' + str(password).strip() + '"'
+        else:
+            pwd = ''
+        target = os.path.splitext(source)[0] + '\\'
+        # 组装参数
+        cmd_list = [cmd, '"' + source + '"', files, '"' + target + '"', pwd, args]
+        cmd_str = ' '.join(cmd_list)
+        # 打印日志
+        log_info(cmd_str)
+        # 解压并处理结果
+        r = os.system(cmd_str)
+        if r == 0 and os.path.exists(target):
+            return True
+        return False
+
+    # 处理7z相关
+    @staticmethod
+    def unzip_7z(source, password=""):
+        # 准备参数
+        cmd = '7z.exe x '
+        args = '-o'
+        if password:
+            pwd = '-p' + str(password)
+        else:
+            pwd = ''
+        target = os.path.splitext(source)[0] + '\\'
+        # 组装参数
+        cmd_list = [cmd, '"' + source + '"', args + '"' + target + '"', pwd]
+        cmd_str = ' '.join(cmd_list)
+        # 打印日志
+        log_info(cmd_str)
+        # 解压并处理结果
+        r = os.system(cmd_str)
+        if r == 0 and os.path.exists(target):
+            return True
+        return False
+
+    # 打包一个通用方法
+    def unzip(self, source, password=""):
+        ext = os.path.splitext(source)[1]
+        if ext == '.7z' or ext == '.001' or ext == '.wim':
+            result = self.unzip_7z(source, password)
+        else:
+            result = self.unzip_zip(source, password)
+        return result
+
+
+if __name__ == '__main__':
+    pass