自学内容网 自学内容网

split dataset

import os

import shutil

import zipfile

from sklearn.model_selection import train_test_split

# 定义目录路径

dataset_directory = r'E:\2024\reflect\reflect'

images_directory = os.path.join(dataset_directory, 'images')

labels_directory = os.path.join(dataset_directory, 'labels')

# 定义备份 zip 文件路径

backup_zip_path = os.path.join(dataset_directory, 'dataset_backup.zip')

# 将整个数据集目录压缩备份

with zipfile.ZipFile(backup_zip_path, 'w') as backup_zip:

    for folder in [images_directory, labels_directory]:

        for root, dirs, files in os.walk(folder):

            for file in files:

                file_path = os.path.join(root, file)

                backup_zip.write(file_path, os.path.relpath(file_path, dataset_directory))

# 所有图像文件名(不带扩展名)的列表

image_filenames = [os.path.splitext(filename)[0] for filename in os.listdir(images_directory) if filename.endswith('.jpg')]

# 定义拆分比例

train_ratio = 0.80

validation_ratio = 0.15

test_ratio = 0.05

# 执行拆分

train_filenames, test_filenames = train_test_split(image_filenames, test_size=1 - train_ratio)

validation_filenames, test_filenames = train_test_split(test_filenames, test_size=test_ratio/(test_ratio + validation_ratio))

# 创建目录并移动文件的函数

def create_and_move_files(file_list, source_folder, destination_folder, file_extension):

    os.makedirs(destination_folder, exist_ok=True)

    for filename in file_list:

        shutil.move(os.path.join(source_folder, filename + file_extension),

                    os.path.join(destination_folder, filename + file_extension))

# 为每个集创建目录并移动文件

sets_directories = {

    'train': os.path.join(dataset_directory, 'train'),

    'val': os.path.join(dataset_directory, 'val'),

    'test': os.path.join(dataset_directory, 'test')

}

for set_name, file_list in zip(['train', 'val', 'test'], [train_filenames, validation_filenames, test_filenames]):

    images_set_directory = os.path.join(sets_directories[set_name], 'images')

    labels_set_directory = os.path.join(sets_directories[set_name], 'labels')

    create_and_move_files(file_list, images_directory, images_set_directory, '.jpg')

    create_and_move_files(file_list, labels_directory, labels_set_directory, '.txt')

# 删除空的images和labels文件夹

# for folder in [images_directory, labels_directory]:

#     if os.path.exists(folder) and not os.listdir(folder):

#         os.rmdir(folder)

# 返回备份文件路径和已创建目录的路径

print(backup_zip_path, sets_directories)


 


原文地址:https://blog.csdn.net/weixin_38836909/article/details/137927317

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!