学习笔记——一些数据转换脚本（Python）

🕗 发布于 2024-04-13 13:54 python numpy pycharm json YOLO

学习笔记——一些数据转换脚本（Python）

注：json 文件是 X-Anylabeling 多边形矩阵的标注。

json2YOLO（txt）

自定义name2id
自定义json_floder_path、txt_outer_path
保证存放 txt 的文件夹存在

# 处理 X-Anylabeling 多边形矩阵的标注 json 转化 txt，提取点
import json
import os

name2id = {'crack_concrete': 4}  # 修改你的类别并且赋与 index


def decode_json(json_floder_path, txt_outer_path, json_name):
    txt_name = os.path.join(txt_outer_path,json_name[:-5]) + '.txt'
    with open(txt_name, 'a') as f:
        json_path = os.path.join(json_floder_path, json_name)
        data = json.load(open(json_path, 'r', encoding='gb2312', errors='ignore'))
        img_w = data['imageWidth']
        img_h = data['imageHeight']
        isshape_type = data['shapes'][0]['shape_type']
        print(isshape_type)
        dw = 1. / (img_w)
        dh = 1. / (img_h)
        for i in data['shapes']:
            label_name = i['label']
            if (i['shape_type'] == 'polygon'):
                point = []
                for lk in range(len(i['points'])):
                    x = float(i['points'][lk][0])
                    y = float(i['points'][lk][1])
                    point_x = x * dw
                    point_y = y * dh
                    point.append(point_x)
                    point.append(point_y)
                f.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in point]) + '\n')
        f.close()


if __name__ == "__main__":
    json_floder_path = r'D:\JetBrains\PyCharm 2023.2\PycharmProjects\YOLOv8\datasets\crack_concrete_hybrid_augmentation\jsons'  # 存放 json 的文件夹的绝对路径
    txt_outer_path = r'D:\JetBrains\PyCharm 2023.2\PycharmProjects\YOLOv8\datasets\crack_concrete_hybrid_augmentation\labels'  # 存放 txt 的文件夹绝对路径
    json_names = os.listdir(json_floder_path)
    flagcount = 0
    for json_name in json_names:
        decode_json(json_floder_path, txt_outer_path, json_name)
        flagcount += 1

    print('-----------转化完毕------------')

VOC（xml）2YOLO（txt）

自定义class_mapping
自定义input_folder、output_folder
不必确保output_folder文件夹存在，程序会自动创建

import xml.etree.ElementTree as ET
import os

# 标签映射，将类别名称映射到类别 ID
class_mapping = {
    "person": 0,
    "helmet": 1,
    "life jacket": 2,
    "truck": 3,
    "excavator": 4,
    "car crane": 5,
    "crawler crane": 6,
    "rotary drill rig": 7,
    "concrete tanker": 8,
    # 添加更多类别映射
}

# 输入和输出文件夹路径
input_folder = "../datasets/drone/labels/convert"
output_folder = "../datasets/drone/labels/train"

# 遍历 XML 文件
for xml_file in os.listdir(input_folder):
    if xml_file.endswith(".xml"):
        xml_path = os.path.join(input_folder, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # 获取图像宽度和高度
        width = int(root.find("size/width").text)
        height = int(root.find("size/height").text)

        # 创建输出文件的路径和名称
        txt_file = os.path.splitext(xml_file)[0] + ".txt"
        txt_path = os.path.join(output_folder, txt_file)

        # 打开输出文件并写入 YOLO 格式数据
        with open(txt_path, "w") as f:
            for obj in root.findall("object"):
                class_name = obj.find("name").text
                class_id = class_mapping.get(class_name)
                if class_id is not None:
                    bbox = obj.find("bndbox")
                    x_center = (float(bbox.find("xmin").text) + float(bbox.find("xmax").text)) / 2.0 / width
                    y_center = (float(bbox.find("ymin").text) + float(bbox.find("ymax").text)) / 2.0 / height
                    width_norm = (float(bbox.find("xmax").text) - float(bbox.find("xmin").text)) / width
                    height_norm = (float(bbox.find("ymax").text) - float(bbox.find("ymin").text)) / height
                    f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width_norm:.6f} {height_norm:.6f}\n")

image2h5

自定义image_folder、h5_folder
不必确保h5_folder文件夹存在，程序会自动创建

import h5py
from PIL import Image
import numpy as np
import os


def image_to_h5(image_folder, h5_folder):
    """图片转 h5"""
    # 确保输出文件夹存在
    if not os.path.exists(h5_folder):
        os.makedirs(h5_folder)

    # 获取输入文件夹中的所有文件
    files = os.listdir(image_folder)
    total_files = len(files)

    for i, file_name in enumerate(files, 1):
        # 检查文件扩展名，如果是图像文件则进行处理
        if file_name.endswith('.png') or file_name.endswith('.jpg'):
            input_image_path = os.path.join(image_folder, file_name)
            output_h5_path = os.path.join(h5_folder, file_name.replace('.png', '.h5'))  # 根据文件名生成输出路径

            # 打开图像并转换为 NumPy 数组
            image = Image.open(input_image_path)
            image_array = np.array(image)

            # 创建 HDF5 文件并写入数据
            with h5py.File(output_h5_path, 'w') as hf:
                hf.create_dataset('image', data=image_array)

            # 输出处理进度
            print(f"Processed {i}/{total_files} images. Current image: {file_name}")


if __name__ == "__main__":
    image_folder = '../data/crack_concrete/images'  # 输入文件夹路径
    h5_folder = '../data/crack_concrete/h5s'  # 输出文件夹路径
    image_to_h5(image_folder, h5_folder)

json2npz

自定义json_folder、npz_folder
不必确保npz_folder文件夹存在，程序会自动创建

import os
import json
import numpy as np
from PIL import Image, ImageDraw


def load_json(json_path):
    """加载 json 文件"""
    with open(json_path, 'r') as f:
        return json.load(f)


def create_image(data):
    """读取图像数据"""
    image = Image.open(data['imagePath'])
    return np.array(image, dtype=np.float32)


def create_label(data):
    """创建与图像相同尺寸的空白 mask"""
    points = data['shapes'][0]['points']
    points_tuple = [(float(point[0]), float(point[1])) for point in points]
    label = Image.new('L', (data['imageWidth'], data['imageHeight']), 0)
    draw = ImageDraw.Draw(label)
    draw.polygon(points_tuple, fill=1)
    return np.array(label, dtype=np.float32)


def json_to_npz(json_folder, npz_folder):
    """将文件夹中的所有 json 文件转换为 npz 文件"""
    if not os.path.exists(npz_folder):
        os.makedirs(npz_folder)
    json_files = [file for file in os.listdir(json_folder) if file.endswith('.json')]
    total_files = len(json_files)

    for i, json_file in enumerate(json_files, 1):
        json_path = os.path.join(json_path, json_file)
        json_data = load_json(json_path)
        image_array = create_image(json_data)
        label_array = create_label(json_data)
        npz_file_path = os.path.join(npz_folder, json_file.split('.')[0] + '.npz')
        np.savez(npz_file_path, image=image_array, label=label_array)

        # 输出处理进度
        print(f"Processed {i}/{total_files} JSON files. Current file: {json_file}")


if __name__ == "__main__":
    json_folder = '../data/crack_concrete/jsons'
    npz_folder = '../data/crack_concrete/npzs'
    json_to_npz(json_folder, npz_folder)

原文地址：https://blog.csdn.net/qq_63512036/article/details/137677258

免责声明：本站文章内容转载自网络资源，如本站内容侵犯了原著者的合法权益，可联系本站删除。更多内容请关注自学内容网（zxcms.com）！

上一篇：二分最大值最小化-力扣-打家劫舍4
下一篇：论软件的三层架构设计，使用三段论写一篇架构设计师的论文

数据结构编程实践20讲(Python版)—06二叉搜索树
数据结构编程实践20讲(Python版)—06二叉搜索树
阅读更多2024-10-09
【Git原理与使用】远程操作&&标签管理
【Git原理与使用】远程操作&&标签管理
阅读更多2024-10-09
粉碎玉米的机器：水滴式饲料粉碎机
适用于各种饲料原料的粉碎，如稻壳、玉米、米糠等，广泛应用于饲料加工领域。水滴式饲料粉碎机凭借其高效、环保、安全、易于维护等多重优势，在饲料加工领域具有广泛的应用前景和市场需求。‌2、水滴外形和大型机身
阅读更多2024-10-09
bcp数据导入处理
select 'bcp 问题数据库..'+name + ' out '+'d:\testdb\'+name+'.txt -c -Usa -S服务器名小写 -P' FROM SYSOBJECTS WHE
阅读更多2024-10-09
全体起立！CEEMDAN-Kmeans-VMD-CNN-Attention双重分解+卷积神经网络注意力机制多元时间序列预测
全体起立！CEEMDAN-Kmeans-VMD-CNN-Attention双重分解+卷积神经网络注意力机制多元时间序列预测
阅读更多2024-10-09
MySQL基础教程（一）：连接数据库和使用表
登录MySQL的命令，查看数据库和表，查看某个表的结构。
阅读更多2024-10-09
df命令输出的详细解释
df -h命令是一个强大的工具，用于快速查看系统中各个文件系统的磁盘空间使用情况。监控磁盘空间，防止系统因磁盘满而出现问题。识别和管理虚拟文件系统和挂载点。有效管理用户数据和系统文件，确保系统运行顺畅
阅读更多2024-10-09
无尽加班何时休——状态模式
无尽加班何时休——状态模式
阅读更多2024-10-09
Java虚拟机（JVM）介绍
也被称为JVM的内存结构，包括方法区（Method Area）、堆（Heap）、Java栈（Java Stack）、本地方法栈（Native Method Stack）和程序计数器（Program C
阅读更多2024-10-09
不同jdk版本间的替换
假设安装了 JDK 21 后，发现电脑有兼容性问题或其他原因需要切换回 JDK 8，替换过程很简单。你只需卸载 JDK 21 或者让系统使用 JDK 8。
阅读更多2024-10-09

学习笔记——一些数据转换脚本（Python）

目录

文章目录

学习笔记——一些数据转换脚本（Python）

json2YOLO（txt）

VOC（xml）2YOLO（txt）

image2h5

json2npz

学习笔记——一些数据转换脚本（Python）

目录

文章目录

学习笔记——一些数据转换脚本（Python）

json2YOLO（txt）

VOC（xml）2YOLO（txt）

image2h5

json2npz

相关文章