自学内容网 自学内容网

Yolo数据集划分(训练集+验证集+测试集)

Yolo数据集划分(训练集+验证集+测试集)

一、前言

网上看了一些朋友写的代码,总感觉不太符合自己胃口,索性自己写一个,下面这个只需要提供图片所在文件夹和标签所在文件夹,就可以自动按比例划分训练集验证集测试集

代码有比较详细的注释,如有疑问,请读者评论区留言或issue,我会尽快回复

二、代码

import os
import shutil
import random


def SplitDatasets(src_img_folder, src_label_folder, Dest_Folder, train_ratio, val_ratio, test_ratio):
    # 创建数据集存放文件夹
    Dest_train_img_folder, = os.path.join(Dest_Folder, 'train', 'images')
    Dest_train_label_folder = os.path.join(Dest_Folder, 'train', 'labels')
    Dest_valid_img_folder = os.path.join(Dest_Folder, 'valid', 'images')
    Dest_valid_label_folder = os.path.join(Dest_Folder, 'valid', 'labels')
    Dest_Test_img_folder = os.path.join(Dest_Folder, 'test', 'images')
    Dest_Test_label_folder = os.path.join(Dest_Folder, 'test', 'labels')
    Dir_List = [Dest_train_img_folder, Dest_train_label_folder,
                Dest_valid_img_folder, Dest_valid_label_folder,
                Dest_Test_img_folder, Dest_Test_label_folder]
    for dir in Dir_List:
        if not os.path.exists(dir):
            os.mkdir(dir)

    All_Files = [item for item in os.listdir(src_img_folder)] 
    random.shuffle(All_Files)  # 打乱顺序
    Train_Sets = All_Files[:int(len(All_Files)*train_ratio)]  # 随机选取一定比例作为训练集
    Valid_Sets, Test_Sets = [], []

    # 获取验证集和测试集
    if val_ratio > 0:
        if test_ratio > 0:  # 验证集和测试集同时存在
            Valid_Sets = All_Files[int(len(All_Files)*train_ratio):int(len(All_Files)-int(len(All_Files)*test_ratio))]
            Test_Sets = All_Files[int(len(All_Files)-test_ratio*len(All_Files)):]
        else:  # 测试集不存在,那就只制作验证集咯
            Valid_Sets = All_Files[int(len(All_Files)*train_ratio):]
    elif test_ratio > 0:  # 验证集不存在,那就只制作测试集咯
        Test_Sets = All_Files[int(len(All_Files)*train_ratio):]

    # 拷贝训练集文件到相应目录
    for file in Train_Sets:
        shutil.copy2(os.path.join(src_img_folder, file), Dest_train_img_folder)
        print(f"copied {file} to {Dest_train_img_folder}")
        shutil.copy2(os.path.join(src_label_folder, file.split('.')[0]+'.txt'), Dest_train_label_folder)
        print(f"copied {file.split('.')[0]+'.txt'} to {Dest_train_label_folder}")

    # 拷贝验证集文件到相应目录
    for file in Valid_Sets:
        shutil.copy2(os.path.join(src_img_folder, file), Dest_valid_img_folder)
        print(f"copied {file} to {Dest_valid_img_folder}")
        shutil.copy2(os.path.join(src_label_folder, file.split('.')[0]+'.txt'), Dest_valid_label_folder)
        print(f"copied {file.split('.')[0]+'.txt'} to {Dest_valid_label_folder}")

    # 拷贝测试集文件到相应目录
    for file in Test_Sets:
        shutil.copy2(os.path.join(src_img_folder, file), Dest_Test_img_folder)
        print(f"copied {file} to {Dest_Test_img_folder}")
        shutil.copy2(os.path.join(src_label_folder, file.split('.')[0]+'.txt'), Dest_Test_label_folder)
        print(f"copied {file.split('.')[0]+'.txt'} to {Dest_Test_label_folder}")


# 定义训练集和测试集的比例
train_ratio = 0.8
val_ratio = 0
test_ratio = 0.2

# 源文件夹路径
src_img_folder = r'E:\Datasets\ExDark\images'
src_label_folder = r'E:\Datasets\ExDark\Annotations'

# 目标文件夹
Dest_Folder = r'E:\Datasets\ExDark'

# 调用函数, 进行数据集划分
SplitDatasets(src_img_folder, src_label_folder, Dest_Folder, train_ratio, val_ratio, test_ratio)

三、注意事项

  • 好像无

原文地址:https://blog.csdn.net/qq_46396470/article/details/140160447

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!