Citypersons数据集转VOC标准格式（YOLO 目标检测txt格式）

CItyscapes城市数据集包含一组不同的立体视频序列中记录来自50个不同城市的街景,高质量的进行像素级的注释。数据集下载地址（需要申请注册，通过申请才能下载）[https://www.cityscapes-]

该数据集用于常语义分割，包含以下类别：

提供百度云链接地址：链接: /s/108_NgFheDIpnQRrwz5uhmw 提取码: dhr8

#cityperson数据集是cityscapes数据集的子集，cityperson的标注文件只标注了其中Human的类别，如上图。该标注文件下载地址为：

下载地址：/shanshanzhang/citypersons/get/ae6814faa761.zip

提取cityscapes中标注好的类别为VOC的标准格式（JPEGImages和Annotations，txt的代码就不写了，参考博主其他博客）

#! /usr/bin/python# -*- coding:UTF-8 -*-import os, sysimport globfrom PIL import Imageimport shutilfrom scipy.io import loadmat#img_Lists = glob.glob(src_img_dir + '\*.png')# citypersons图像的标注位置src_anno_dir = loadmat(r'c:\Users\rockhuang\Desktop\anno_train.mat')# cityscapes图像的存储位置src_img_dir = r"g:\dataset\cityscapes\leftImg8bit\train\\"#保存为VOC 数据集的原图和xml标注路径new_img= r"g:\dataset\cityscapes\JPEGImages"new_xml=r"g:\dataset\cityscapes\Annotations"if not os.path.isdir(new_img):os.makedirs(new_img)if not os.path.isdir(new_xml):os.makedirs(new_xml) a=src_anno_dir['anno_train_aligned'][0]#处理标注文件for i in range(len(a)):img_name=a[i][0][0][1][0] #frankfurt_000000_000294_leftImg8bit.pngdir_name=img_name.split('_')[0]img=src_img_dir+dir_name+"\\"+img_nameshutil.copy(img, new_img+"\\"+img_name)img=Image.open(img)width, height = img.sizeposition=a[i][0][0][2]print(position)#sys.exit()xml_name=img_name.split('.')[0]xml_file = open((new_xml + '\\' + xml_name + '.xml'), 'w')xml_file.write('<annotation>\n')xml_file.write(' <folder>citysperson</folder>\n')xml_file.write(' <filename>' + str(img_name)+ '</filename>\n')xml_file.write(' <size>\n')xml_file.write(' <width>' + str(width) + '</width>\n')xml_file.write(' <height>' + str(height) + '</height>\n')xml_file.write(' <depth>3</depth>\n')xml_file.write(' </size>\n')for j in range(len(position)):category_location=position[j] #[ 1 947 406 17 40 24000 950 407 14 39]category=category_location[0] # class_label =0: ignore regions 1: pedestrians 2: riders 3: sitting persons 4: other persons 5: group of peopleif category == 0:continue# if #if category == 1 or category ==2 or category ==3 category ==4 or category ==5:else:x=category_location[1] #class_label==1 or 2: x1，y1，w，h是与全身对齐的边界框；y=category_location[2]w=category_location[3]h=category_location[4]xml_file.write(' <object>\n')xml_file.write(' <name>' + 'person' + '</name>\n')xml_file.write(' <pose>Unspecified</pose>\n')xml_file.write(' <truncated>0</truncated>\n')xml_file.write(' <difficult>0</difficult>\n')xml_file.write(' <bndbox>\n')xml_file.write(' <xmin>' + str(x) + '</xmin>\n')xml_file.write(' <ymin>' + str(y) + '</ymin>\n')xml_file.write(' <xmax>' + str(x+w) + '</xmax>\n')xml_file.write(' <ymax>' + str(y+h) + '</ymax>\n')xml_file.write(' </bndbox>\n')xml_file.write(' </object>\n')xml_file.write('</annotation>\n')

YOLO训练VOC格式数据集，源代码中附有voc_labels.py如下，自己转下归一化的txt就OK了

import xml.etree.ElementTree as ETimport pickleimport osfrom os import listdir, getcwdfrom os.path import joinsets=[ ('', 'train'), ('', 'val'), ('', 'test')]classes = ["car", "person","rider"]#parser = ET.XMLParser("utf-8")#tree = ET.fromstring(xmlstring, parser=parser)def convert(size, box):dw = 1./(size[0])dh = 1./(size[1])x = (box[0] + box[1])/2.0 - 1y = (box[2] + box[3])/2.0 - 1w = box[1] - box[0]h = box[3] - box[2]x = x*dww = w*dwy = y*dhh = h*dhreturn (x,y,w,h)def convert_annotation(year, image_id):in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')#parser = ET.XMLParser(encoding="utf-8")# tree = ET.fromstring(in_file, parser=True)print in_filetree=ET.parse(in_file)# print in_fileroot = tree.getroot()size = root.find('size')w = int(size.find('width').text)h = int(size.find('height').text)for obj in root.iter('object'):difficult = obj.find('difficult').textcls = obj.find('name').textif cls not in classes or int(difficult)==1:continuecls_id = classes.index(cls)xmlbox = obj.find('bndbox')b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))bb = convert((w,h), b)out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')wd = getcwd()for year, image_set in sets:if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):os.makedirs('VOCdevkit/VOC%s/labels/'%(year))image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()list_file = open('%s_%s.txt'%(year, image_set), 'w')for image_id in image_ids:list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))convert_annotation(year, image_id)list_file.close()#os.system("cat _train.txt _val.txt > train.txt")#os.system("cat _train.txt _val.txt _test.txt _train.txt _val.txt > train.all.txt")