使用Python查找大文件的实用脚本
C盘满了,写了一个python脚本,2分多钟能找到比较大的文件,然后手动删除或者迁移D盘,最后发现是微信小程序开发工具缓存文件太多了,腾出来10个G念头通达了,这里备份一下脚本。
运行工具:PyCharm 2024.1.3 (Community Edition)
【完整代码】
import os
import threading
import time
import sys
from threading import Event
def is_large(file_path, threshold_mb):
"""判断文件大小是否超过指定MB阈值"""
return os.path.getsize(file_path) / (1024 * 1024) > threshold_mb
def show_loading_animation(stop_event, interval=0.5):
"""显示简易的文本加载动画,直到接收到停止信号"""
loading_chars = ['.', '..', '...', '....']
total_cycles = int(interval * 10)
cycle_length = 10
for _ in range(total_cycles):
for char in loading_chars:
for _ in range(cycle_length):
sys.stdout.write('\r正在查找大文件... ' + char)
sys.stdout.flush()
time.sleep(interval / cycle_length)
sys.stdout.write('\r正在查找大文件... ' + loading_chars[0])
sys.stdout.flush()
def filter_files(files, skip_file_keywords, include_file_keywords, extension=None):
"""根据文件名关键词和扩展名过滤文件列表"""
filtered_files = [file for file in files if (not skip_file_keywords or all(keyword not in file for keyword in skip_file_keywords)) and
(not include_file_keywords or any(keyword in file for keyword in include_file_keywords))]
if extension is not None:
filtered_files = [file for file in filtered_files if file.endswith('.' + extension)]
return filtered_files
def filter_dirs(dirs, skip_dir_keywords, include_dir_keywords):
"""根据目录名关键词过滤目录列表"""
return [dir for dir in dirs if (not skip_dir_keywords or all(keyword not in dir for keyword in skip_dir_keywords)) and
(not include_dir_keywords or any(keyword in dir for keyword in include_dir_keywords))]
def get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension=None, interval=0.5):
"""查找目录下所有大于指定大小的文件,同时跳过或仅包括特定关键词的文件夹及文件名称,并显示加载动画直到完成"""
start_time = time.time()
stop_event = Event()
large_files = []
loading_thread = threading.Thread(target=show_loading_animation, args=(stop_event, interval))
loading_thread.daemon = True
loading_thread.start()
try:
for root, dirs, files in os.walk(dir_path):
dirs[:] = filter_dirs(dirs, skip_dir_keywords, include_dir_keywords)
filtered_files = filter_files(files, skip_file_keywords, include_file_keywords, extension)
for file in filtered_files:
full_path = os.path.join(root, file)
try:
if is_large(full_path, threshold_mb):
file_info = {'path': full_path, 'size': os.path.getsize(full_path) / 1024 / 1024}
large_files.append(file_info)
except Exception as e:
print(f"警告访问文件出错 {full_path} 出错信息: {e}")
finally:
stop_event.set()
loading_thread.join()
large_files.sort(key=lambda x: x['size'], reverse=True)
for file_info in large_files:
print(f"文件路径: {file_info['path']} | 文件大小: {file_info['size']:.2f} MB")
end_time = time.time()
print(f"\n查找共耗时: {end_time - start_time:.2f} 秒")
def main():
dir_path = input("请输入要检查的目录路径: ")
try:
threshold_mb = float(input("请输入文件大小阈值(单位: MB): "))
skip_dir_keywords = input("请输入要跳过的文件夹名关键词,用逗号分隔(直接回车跳过,推荐modules,~~,.gradle): ").split(',')
skip_file_keywords = input("请输入要跳过的文件名关键词,用逗号分隔(直接回车跳过,推荐$): ").split(',')
include_dir_keywords = input("请输入要包含的文件夹名关键词,用逗号分隔(直接回车跳过): ").split(',')
include_file_keywords = input("请输入要包含的文件名关键词,用逗号分隔(直接回车跳过): ").split(',')
extension = input("请输入要筛选的文件扩展名(例如:txt,可选,直接回车跳过): ").strip('.') or None
get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension)
print("搜索结束.")
except ValueError:
print("错误:请输入有效的数字作为文件大小阈值.")
except OSError as e:
print(e)
if __name__ == '__main__':
main()
原文地址:https://blog.csdn.net/zhongcongxu01/article/details/143474937
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!