Yolo数据集划分(训练集+验证集+测试集)
Yolo数据集划分(训练集+验证集+测试集)
一、前言
网上看了一些朋友写的代码,总感觉不太符合自己胃口,索性自己写一个,下面这个只需要提供图片所在文件夹和标签所在文件夹,就可以自动按比例划分训练集、验证集和测试集。
代码有比较详细的注释,如有疑问,请读者评论区留言或issue,我会尽快回复
二、代码
import os
import shutil
import random
def SplitDatasets(src_img_folder, src_label_folder, Dest_Folder, train_ratio, val_ratio, test_ratio):
# 创建数据集存放文件夹
Dest_train_img_folder, = os.path.join(Dest_Folder, 'train', 'images')
Dest_train_label_folder = os.path.join(Dest_Folder, 'train', 'labels')
Dest_valid_img_folder = os.path.join(Dest_Folder, 'valid', 'images')
Dest_valid_label_folder = os.path.join(Dest_Folder, 'valid', 'labels')
Dest_Test_img_folder = os.path.join(Dest_Folder, 'test', 'images')
Dest_Test_label_folder = os.path.join(Dest_Folder, 'test', 'labels')
Dir_List = [Dest_train_img_folder, Dest_train_label_folder,
Dest_valid_img_folder, Dest_valid_label_folder,
Dest_Test_img_folder, Dest_Test_label_folder]
for dir in Dir_List:
if not os.path.exists(dir):
os.mkdir(dir)
All_Files = [item for item in os.listdir(src_img_folder)]
random.shuffle(All_Files) # 打乱顺序
Train_Sets = All_Files[:int(len(All_Files)*train_ratio)] # 随机选取一定比例作为训练集
Valid_Sets, Test_Sets = [], []
# 获取验证集和测试集
if val_ratio > 0:
if test_ratio > 0: # 验证集和测试集同时存在
Valid_Sets = All_Files[int(len(All_Files)*train_ratio):int(len(All_Files)-int(len(All_Files)*test_ratio))]
Test_Sets = All_Files[int(len(All_Files)-test_ratio*len(All_Files)):]
else: # 测试集不存在,那就只制作验证集咯
Valid_Sets = All_Files[int(len(All_Files)*train_ratio):]
elif test_ratio > 0: # 验证集不存在,那就只制作测试集咯
Test_Sets = All_Files[int(len(All_Files)*train_ratio):]
# 拷贝训练集文件到相应目录
for file in Train_Sets:
shutil.copy2(os.path.join(src_img_folder, file), Dest_train_img_folder)
print(f"copied {file} to {Dest_train_img_folder}")
shutil.copy2(os.path.join(src_label_folder, file.split('.')[0]+'.txt'), Dest_train_label_folder)
print(f"copied {file.split('.')[0]+'.txt'} to {Dest_train_label_folder}")
# 拷贝验证集文件到相应目录
for file in Valid_Sets:
shutil.copy2(os.path.join(src_img_folder, file), Dest_valid_img_folder)
print(f"copied {file} to {Dest_valid_img_folder}")
shutil.copy2(os.path.join(src_label_folder, file.split('.')[0]+'.txt'), Dest_valid_label_folder)
print(f"copied {file.split('.')[0]+'.txt'} to {Dest_valid_label_folder}")
# 拷贝测试集文件到相应目录
for file in Test_Sets:
shutil.copy2(os.path.join(src_img_folder, file), Dest_Test_img_folder)
print(f"copied {file} to {Dest_Test_img_folder}")
shutil.copy2(os.path.join(src_label_folder, file.split('.')[0]+'.txt'), Dest_Test_label_folder)
print(f"copied {file.split('.')[0]+'.txt'} to {Dest_Test_label_folder}")
# 定义训练集和测试集的比例
train_ratio = 0.8
val_ratio = 0
test_ratio = 0.2
# 源文件夹路径
src_img_folder = r'E:\Datasets\ExDark\images'
src_label_folder = r'E:\Datasets\ExDark\Annotations'
# 目标文件夹
Dest_Folder = r'E:\Datasets\ExDark'
# 调用函数, 进行数据集划分
SplitDatasets(src_img_folder, src_label_folder, Dest_Folder, train_ratio, val_ratio, test_ratio)
三、注意事项
- 好像无
原文地址:https://blog.csdn.net/qq_46396470/article/details/140160447
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!