学习笔记——一些数据转换脚本(Python)
目录
学习笔记——一些数据转换脚本(Python)
注:json 文件是 X-Anylabeling 多边形矩阵的标注。
json2YOLO(txt)
- 自定义
name2id
- 自定义
json_floder_path
、txt_outer_path
- 保证存放 txt 的文件夹存在
# 处理 X-Anylabeling 多边形矩阵的标注 json 转化 txt,提取点
import json
import os
name2id = {'crack_concrete': 4} # 修改你的类别并且赋与 index
def decode_json(json_floder_path, txt_outer_path, json_name):
txt_name = os.path.join(txt_outer_path,json_name[:-5]) + '.txt'
with open(txt_name, 'a') as f:
json_path = os.path.join(json_floder_path, json_name)
data = json.load(open(json_path, 'r', encoding='gb2312', errors='ignore'))
img_w = data['imageWidth']
img_h = data['imageHeight']
isshape_type = data['shapes'][0]['shape_type']
print(isshape_type)
dw = 1. / (img_w)
dh = 1. / (img_h)
for i in data['shapes']:
label_name = i['label']
if (i['shape_type'] == 'polygon'):
point = []
for lk in range(len(i['points'])):
x = float(i['points'][lk][0])
y = float(i['points'][lk][1])
point_x = x * dw
point_y = y * dh
point.append(point_x)
point.append(point_y)
f.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in point]) + '\n')
f.close()
if __name__ == "__main__":
json_floder_path = r'D:\JetBrains\PyCharm 2023.2\PycharmProjects\YOLOv8\datasets\crack_concrete_hybrid_augmentation\jsons' # 存放 json 的文件夹的绝对路径
txt_outer_path = r'D:\JetBrains\PyCharm 2023.2\PycharmProjects\YOLOv8\datasets\crack_concrete_hybrid_augmentation\labels' # 存放 txt 的文件夹绝对路径
json_names = os.listdir(json_floder_path)
flagcount = 0
for json_name in json_names:
decode_json(json_floder_path, txt_outer_path, json_name)
flagcount += 1
print('-----------转化完毕------------')
VOC(xml)2YOLO(txt)
- 自定义
class_mapping
- 自定义
input_folder
、output_folder
- 不必确保
output_folder
文件夹存在,程序会自动创建
import xml.etree.ElementTree as ET
import os
# 标签映射,将类别名称映射到类别 ID
class_mapping = {
"person": 0,
"helmet": 1,
"life jacket": 2,
"truck": 3,
"excavator": 4,
"car crane": 5,
"crawler crane": 6,
"rotary drill rig": 7,
"concrete tanker": 8,
# 添加更多类别映射
}
# 输入和输出文件夹路径
input_folder = "../datasets/drone/labels/convert"
output_folder = "../datasets/drone/labels/train"
# 遍历 XML 文件
for xml_file in os.listdir(input_folder):
if xml_file.endswith(".xml"):
xml_path = os.path.join(input_folder, xml_file)
tree = ET.parse(xml_path)
root = tree.getroot()
# 获取图像宽度和高度
width = int(root.find("size/width").text)
height = int(root.find("size/height").text)
# 创建输出文件的路径和名称
txt_file = os.path.splitext(xml_file)[0] + ".txt"
txt_path = os.path.join(output_folder, txt_file)
# 打开输出文件并写入 YOLO 格式数据
with open(txt_path, "w") as f:
for obj in root.findall("object"):
class_name = obj.find("name").text
class_id = class_mapping.get(class_name)
if class_id is not None:
bbox = obj.find("bndbox")
x_center = (float(bbox.find("xmin").text) + float(bbox.find("xmax").text)) / 2.0 / width
y_center = (float(bbox.find("ymin").text) + float(bbox.find("ymax").text)) / 2.0 / height
width_norm = (float(bbox.find("xmax").text) - float(bbox.find("xmin").text)) / width
height_norm = (float(bbox.find("ymax").text) - float(bbox.find("ymin").text)) / height
f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width_norm:.6f} {height_norm:.6f}\n")
image2h5
- 自定义
image_folder
、h5_folder
- 不必确保
h5_folder
文件夹存在,程序会自动创建
import h5py
from PIL import Image
import numpy as np
import os
def image_to_h5(image_folder, h5_folder):
"""图片转 h5"""
# 确保输出文件夹存在
if not os.path.exists(h5_folder):
os.makedirs(h5_folder)
# 获取输入文件夹中的所有文件
files = os.listdir(image_folder)
total_files = len(files)
for i, file_name in enumerate(files, 1):
# 检查文件扩展名,如果是图像文件则进行处理
if file_name.endswith('.png') or file_name.endswith('.jpg'):
input_image_path = os.path.join(image_folder, file_name)
output_h5_path = os.path.join(h5_folder, file_name.replace('.png', '.h5')) # 根据文件名生成输出路径
# 打开图像并转换为 NumPy 数组
image = Image.open(input_image_path)
image_array = np.array(image)
# 创建 HDF5 文件并写入数据
with h5py.File(output_h5_path, 'w') as hf:
hf.create_dataset('image', data=image_array)
# 输出处理进度
print(f"Processed {i}/{total_files} images. Current image: {file_name}")
if __name__ == "__main__":
image_folder = '../data/crack_concrete/images' # 输入文件夹路径
h5_folder = '../data/crack_concrete/h5s' # 输出文件夹路径
image_to_h5(image_folder, h5_folder)
json2npz
- 自定义
json_folder
、npz_folder
- 不必确保
npz_folder
文件夹存在,程序会自动创建
import os
import json
import numpy as np
from PIL import Image, ImageDraw
def load_json(json_path):
"""加载 json 文件"""
with open(json_path, 'r') as f:
return json.load(f)
def create_image(data):
"""读取图像数据"""
image = Image.open(data['imagePath'])
return np.array(image, dtype=np.float32)
def create_label(data):
"""创建与图像相同尺寸的空白 mask"""
points = data['shapes'][0]['points']
points_tuple = [(float(point[0]), float(point[1])) for point in points]
label = Image.new('L', (data['imageWidth'], data['imageHeight']), 0)
draw = ImageDraw.Draw(label)
draw.polygon(points_tuple, fill=1)
return np.array(label, dtype=np.float32)
def json_to_npz(json_folder, npz_folder):
"""将文件夹中的所有 json 文件转换为 npz 文件"""
if not os.path.exists(npz_folder):
os.makedirs(npz_folder)
json_files = [file for file in os.listdir(json_folder) if file.endswith('.json')]
total_files = len(json_files)
for i, json_file in enumerate(json_files, 1):
json_path = os.path.join(json_path, json_file)
json_data = load_json(json_path)
image_array = create_image(json_data)
label_array = create_label(json_data)
npz_file_path = os.path.join(npz_folder, json_file.split('.')[0] + '.npz')
np.savez(npz_file_path, image=image_array, label=label_array)
# 输出处理进度
print(f"Processed {i}/{total_files} JSON files. Current file: {json_file}")
if __name__ == "__main__":
json_folder = '../data/crack_concrete/jsons'
npz_folder = '../data/crack_concrete/npzs'
json_to_npz(json_folder, npz_folder)
原文地址:https://blog.csdn.net/qq_63512036/article/details/137677258
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!