split dataset
import os
import shutil
import zipfile
from sklearn.model_selection import train_test_split
# 定义目录路径
dataset_directory = r'E:\2024\reflect\reflect'
images_directory = os.path.join(dataset_directory, 'images')
labels_directory = os.path.join(dataset_directory, 'labels')
# 定义备份 zip 文件路径
backup_zip_path = os.path.join(dataset_directory, 'dataset_backup.zip')
# 将整个数据集目录压缩备份
with zipfile.ZipFile(backup_zip_path, 'w') as backup_zip:
for folder in [images_directory, labels_directory]:
for root, dirs, files in os.walk(folder):
for file in files:
file_path = os.path.join(root, file)
backup_zip.write(file_path, os.path.relpath(file_path, dataset_directory))
# 所有图像文件名(不带扩展名)的列表
image_filenames = [os.path.splitext(filename)[0] for filename in os.listdir(images_directory) if filename.endswith('.jpg')]
# 定义拆分比例
train_ratio = 0.80
validation_ratio = 0.15
test_ratio = 0.05
# 执行拆分
train_filenames, test_filenames = train_test_split(image_filenames, test_size=1 - train_ratio)
validation_filenames, test_filenames = train_test_split(test_filenames, test_size=test_ratio/(test_ratio + validation_ratio))
# 创建目录并移动文件的函数
def create_and_move_files(file_list, source_folder, destination_folder, file_extension):
os.makedirs(destination_folder, exist_ok=True)
for filename in file_list:
shutil.move(os.path.join(source_folder, filename + file_extension),
os.path.join(destination_folder, filename + file_extension))
# 为每个集创建目录并移动文件
sets_directories = {
'train': os.path.join(dataset_directory, 'train'),
'val': os.path.join(dataset_directory, 'val'),
'test': os.path.join(dataset_directory, 'test')
}
for set_name, file_list in zip(['train', 'val', 'test'], [train_filenames, validation_filenames, test_filenames]):
images_set_directory = os.path.join(sets_directories[set_name], 'images')
labels_set_directory = os.path.join(sets_directories[set_name], 'labels')
create_and_move_files(file_list, images_directory, images_set_directory, '.jpg')
create_and_move_files(file_list, labels_directory, labels_set_directory, '.txt')
# 删除空的images和labels文件夹
# for folder in [images_directory, labels_directory]:
# if os.path.exists(folder) and not os.listdir(folder):
# os.rmdir(folder)
# 返回备份文件路径和已创建目录的路径
print(backup_zip_path, sets_directories)
原文地址:https://blog.csdn.net/weixin_38836909/article/details/137927317
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!