本文用来记录如何将VOC格式的数据集转换成coco格式 数据文件夹目录 annotations文件内容 pindata文件内容首先我们先来了解一下voc数据集的目录格式(只用于目标检测) voc数据集内容标准格式我们再来看一下coco数据集内容标准格式 coco数据集格式那我们先将我们的文件夹改成voc格式(文件夹名称要完全对应) ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='379'%20height='192'/svg) ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='473'%20height='193'/svg) ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='508'%20height='208'/svg) ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='370'%20height='130'/svg) ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='628'%20height='198'/svg) 之外我们还需要新建一个labels.txt文件用来存放数据的类别名称: labels.txt存放位置及文件内容那么首先我们需要对标签文件进行划分,分成训练集验证集以及测试集。 这里我们按照8:1:1的比例来划分成训练集:测试集:验证集。 以下是划分代码,我们新建一个python文件叫做split.py,并将它放在如下图文件夹内: split.py存放位置# -*- coding: utf-8 -*-
"""
顺序:脚本A1
简介:分训练集、验证集和测试集,按照 8:1:1 的比例来分,训练集8,验证集1,测试集1
"""
import os
import random
import argparse
parser = argparse.ArgumentParser()
# xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='../Annotations/', type=str, help='input xml label path')
# 数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='Main/', type=str, help='output txt label path')
opt = parser.parse_args()
train_percent = 0.8 # 训练集所占比例
val_percent = 0.1 # 验证集所占比例
test_persent = 0.1 # 测试集所占比例
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
os.makedirs(txtsavepath)
num = len(total_xml)
list = list(range(num))
t_train = int(num * train_percent)
t_val = int(num * val_percent)
train = random.sample(list, t_train)
num1 = len(train)
for i in range(num1):
list.remove(train[i])
val_test = [i for i in list if not i in train]
val = random.sample(val_test, t_val)
num2 = len(val)
for i in range(num2):
list.remove(val[i])
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
for i in train:
name = total_xml[i][:-4] + '\n'
file_train.write(name)
for i in val:
name = total_xml[i][:-4] + '\n'
file_val.write(name)
for i in list:
name = total_xml[i][:-4] + '\n'
file_test.write(name)
file_train.close()
file_val.close()
file_test.close()
运行之后的结果保存在Main文件夹内,生成如下图三个文件,内容是划分好的图片名称 ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='640'%20height='467'/svg) 之后我们将Annotations中的voc格式转化成coco格式,新建一个voc2coco.py,并将它放在如下图所示位置 import os
import argparse
import json
import xml.etree.ElementTree as ET
from typing import Dict, List
import re
def get_label2id(labels_path: str) -> Dict[str, int]:
"""id is 1 start"""
with open(labels_path, 'r') as f:
labels_str = f.read().split()
labels_ids = list(range(1, len(labels_str) + 1))
return dict(zip(labels_str, labels_ids))
def get_annpaths(ann_dir_path: str = None,
ann_ids_path: str = None,
ext: str = '',
annpaths_list_path: str = None) -> List[str]:
# If use annotation paths list
if annpaths_list_path is not None:
with open(annpaths_list_path, 'r') as f:
ann_paths = f.read().split()
return ann_paths
# If use annotaion ids list
ext_with_dot = '.' + ext if ext != '' else ''
with open(ann_ids_path, 'r') as f:
ann_ids = f.read().split()
ann_paths = [os.path.join(ann_dir_path, aid + ext_with_dot) for aid in ann_ids]
return ann_paths
def get_image_info(annotation_root, extract_num_from_imgid=True):
path = annotation_root.findtext('path')
if path is None:
filename = annotation_root.findtext('filename')
else:
filename = os.path.basename(path)
img_name = os.path.basename(filename)
img_id = os.path.splitext(img_name)[0]
if extract_num_from_imgid and isinstance(img_id, str):
img_id = int(re.findall(r'\d+', img_id)[0])
size = annotation_root.find('size')
width = int(size.findtext('width'))
height = int(size.findtext('height'))
image_info = {
'file_name': filename,
'height': height,
'width': width,
'id': img_id
}
return image_info
def get_coco_annotation_from_obj(obj, label2id):
label = obj.findtext('name')
assert label in label2id, f"Error: {label} is not in label2id !"
category_id = label2id[label]
bndbox = obj.find('bndbox')
xmin = int(bndbox.findtext('xmin')) - 1
ymin = int(bndbox.findtext('ymin')) - 1
xmax = int(bndbox.findtext('xmax'))
ymax = int(bndbox.findtext('ymax'))
assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}"
o_width = xmax - xmin
o_height = ymax - ymin
ann = {
'area': o_width * o_height,
'iscrowd': 0,
'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id,
'ignore': 0,
'segmentation': [] # This script is not for segmentation
}
return ann
def convert_xmls_to_cocojson(annotation_paths: List[str],
label2id: Dict[str, int],
output_jsonpath: str,
extract_num_from_imgid: bool = True):
output_json_dict = {
"images": [],
"type": "instances",
"annotations": [],
"categories": []
}
bnd_id = 1 # START_BOUNDING_BOX_ID, TODO input as args ?
for a_path in annotation_paths:
# Read annotation xml
ann_tree = ET.parse(a_path)
ann_root = ann_tree.getroot()
img_info = get_image_info(annotation_root=ann_root,
extract_num_from_imgid=extract_num_from_imgid)
img_id = img_info['id']
output_json_dict['images'].append(img_info)
for obj in ann_root.findall('object'):
ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id)
ann.update({'image_id': img_id, 'id': bnd_id})
output_json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for label, label_id in label2id.items():
category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
output_json_dict['categories'].append(category_info)
with open(output_jsonpath, 'w') as f:
output_json = json.dumps(output_json_dict)
f.write(output_json)
print('Convert successfully !')
def main():
parser = argparse.ArgumentParser(
description='This script support converting voc format xmls to coco format json')
parser.add_argument('--ann_dir', type=str, default='./Annotations')
parser.add_argument('--ann_ids', type=str, default='./ImageSets/Main/test.txt')
parser.add_argument('--ann_paths_list', type=str, default=None)
parser.add_argument('--labels', type=str, default='./labels.txt')
parser.add_argument('--output', type=str, default='./output/annotations/test.json')
parser.add_argument('--ext', type=str, default='xml')
args = parser.parse_args()
label2id = get_label2id(labels_path=args.labels)
ann_paths = get_annpaths(
ann_dir_path=args.ann_dir,
ann_ids_path=args.ann_ids,
ext=args.ext,
annpaths_list_path=args.ann_paths_list
)
convert_xmls_to_cocojson(
annotation_paths=ann_paths,
label2id=label2id,
output_jsonpath=args.output,
extract_num_from_imgid=True
)
if __name__ == '__main__':
if not os.path.exists('./output/annotations'):
os.makedirs('./output/annotations')
main()
![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='1011'%20height='215'/svg) 运行之后会生成对应的json文件,但是该程序需要我们手动去改两次,将上图的test.txt和test.json分别改成train.txt,test.json和val.txt,val.json,分别运行,可得到如下图结果 ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='606'%20height='197'/svg) 之后我们需要对原始的图像按照程序自动划分好的文件来分别存放训练集验证集和测试集 新建一个splitimg.py放在如下位置 import os
import shutil
images_file_path = './JPEGImages/'
split_data_file_path = './ImageSets/Main/'
new_images_file_path = './output/'
if not os.path.exists(new_images_file_path + 'train'):
os.makedirs(new_images_file_path + 'train')
if not os.path.exists(new_images_file_path + 'val'):
os.makedirs(new_images_file_path + 'val')
if not os.path.exists(new_images_file_path + 'test'):
os.makedirs(new_images_file_path + 'test')
dst_train_Image = new_images_file_path + 'train/'
dst_val_Image = new_images_file_path + 'val/'
dst_test_Image = new_images_file_path + 'test/'
total_txt = os.listdir(split_data_file_path)
for i in total_txt:
name = i[:-4]
if name == 'train':
txt_file = open(split_data_file_path + i, 'r')
for line in txt_file:
line = line.strip('\n')
line = line.strip('\r')
srcImage = images_file_path + line + '.jpg'
dstImage = dst_train_Image + line + '.jpg'
shutil.copyfile(srcImage, dstImage)
txt_file.close()
elif name == 'val':
txt_file = open(split_data_file_path + i, 'r')
for line in txt_file:
line = line.strip('\n')
line = line.strip('\r')
srcImage = images_file_path + line + '.jpg'
dstImage = dst_val_Image + line + '.jpg'
shutil.copyfile(srcImage, dstImage)
txt_file.close()
elif name == 'test':
txt_file = open(split_data_file_path + i, 'r')
for line in txt_file:
line = line.strip('\n')
line = line.strip('\r')
srcImage = images_file_path + line + '.jpg'
dstImage = dst_test_Image + line + '.jpg'
shutil.copyfile(srcImage, dstImage)
txt_file.close()
else:
print("Error, Please check the file name of folder")
运行之后得到如下图三个文件 ![](data:image/svg+xml;utf8,svg%20xmlns='http://www.w3.org/2000/svg'%20width='428'%20height='225'/svg) 这样我们就完成了voc转coco数据集了
|