1 前言
最近做的是一个比较偏的数据集,从网上找了700张图片,最多的类有500张,其余类都是20-40张,极不平衡,用标注工具手动标注后做了下实验,发现精度特别低,数量最多的类别还可以。所以决定在大型开源数据集上面找,根据要研究的对象,最终确定了Open Image数据集。查了下相应的文章,发现千篇一律的Ctrl C + Ctrl v,按指定类别下载的文章倒有,但是转为VOC格式的一个都没有。由于做的是目标检测这一块的,一个自己熟悉的标记格式很有帮助,而且VOC格式很多框架都支持,所以自己研究了一下谷歌数据集的标记特点并写了一个Python转化脚本。
2 Open Image v4简单介绍
scode type="green"谷歌OID官网主页[/button]
[/scode]
如果是做目标检测,要重点关注 class-descriptions-boxable.csv、validation-annotations-bbox.csv、train-annotations-bbox.csv、test-annotations-bbox.csv 这四个文件,然后按照下面的教程下载就行了。
3 按指定类别下载数据集
3.1 OIDv4_ToolKit
所要使用的一个开源工具是 OIDv4_ToolKit,目前该项目仅支持v4版本,但v4对于大多数任务足够了。
用Gitbash把项目下载到本地:
git clone https://github.com/EscVM/OIDv4_ToolKit.git
3.2 项目结构
3.3 下载命令
运行下面的命令查看参数说明:
python main.py -h
参数说明:
--type_csv
'train' 或 'validation' 或 'test' 或 'all' 从哪个csv下载图像--sub
人工验证图像或机器生成的子集(h或m)--image_IsOccluded
1或0 表示对象是否被图像中的另一个对象遮挡。--image_IsTruncated
1或0 表示对象是否超出图像的边界。--image_IsGroupOf
1或0 表示标记框是否跨越一组物体(分组5)。--image_IsDepiction
1或0 表示 对象是否是一个草稿或漫画。--image_IsInside
1或0 表示是否是从对象内部拍摄的照片。--multiclasses
0(默认值)或1 分别(0)或一起(1)下载不同的类--n_threads
[默认20] 要使用的线程数--noLabels
没有标签创作--limit
要下载的图像数量的可选限制
按照下面的命令就行了:
python main.py downloader --classes classes.txt --type_csv train
python main.py downloader --classes classes.txt --type_csv validation
python main.py downloader --classes classes.txt --type_csv test
4 批量转化为VOC格式
4.1 标签特点
具体类别的内部文件,一个Label文件夹,包含该类所有图像对应的标记文本,一个文本对应一个图片,一行一个标记框。
4.2 分析
由于标签文本的命名和图像一致,那么只需要提取txt文件就行了,然后改后缀把图像复制到VOC的JPEGImages文件夹(也可以手动,哈哈,但是写都写了,就偷个懒一次完成吧)。但是VOC文件夹的标签名不能有空格,Open Image 有的类别是两个单词,中间有空格,所以要把前两个单词合并。
4.3 批量转化
说了那么多,下面正式开始干货!
一键转换box2voc.py:
import math
import shutil
import cv2
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import os
# 对于每一个label.txt,制作VOC格式的xml文件
# param1:../OID/UnRestraint_Of_MarineCreature_In_OpenImagev4/Dataset/train/Seahorse/
# param2:imglabel.txt
# param3:annotation
def make_xml(txt_root_path, txt_name_path, save_xml_path):
txt_path = txt_root_path + "Label/" + txt_name_path
pic_path = txt_root_path
pic = cv2.imread(pic_path + txt_name_path.replace(".txt", ".jpg"))
shutil.copy(pic_path + txt_name_path.replace(".txt", ".jpg"), "VOC2007/JPEGImages")
shape = pic.shape
f = open(txt_path, 'r')
data = []
for line in f:
data.append(line.strip())
print(data)
f.close()
# lines = str(line).split(' ')
# box_num = len(data)
pic_name = txt_name_path.replace(".txt", ".jpg")
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'JPEGImages'
node_filename = SubElement(node_root, 'filename')
# 图片名字
node_filename.text = pic_name
# 图像尺寸
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = str(shape[1])
node_height = SubElement(node_size, 'height')
node_height.text = str(shape[0])
node_depth = SubElement(node_size, 'depth')
node_depth.text = str(shape[2])
# 第二层循环遍历有多少个框
for dat in data:
dat = dat.split(" ")
print(len(dat))
print(dat)
# 分支把标签合并
if len(dat) == 5:
Label = dat[0]
Xmin = math.floor(float(dat[1]))
Ymin = math.floor(float(dat[2]))
Xmax = math.ceil(float(dat[3]))
Ymax = math.ceil(float(dat[4]))
# Xmin = int(float(dat[1]))
# Ymin = int(float(dat[2]))
# Xmax = int(float(dat[3]))
# Ymax = int(float(dat[4]))
print(Label, Xmin, Ymin, Xmax, Ymax)
# img = cv2.rectangle(img, (Xmin, Ymin), (Xmax, Ymax), (0, 255, 0), 1)
cls_name = Label
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
# 类别名字
node_name.text = cls_name
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = str(Xmin)
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = str(Ymin)
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = str(Xmax)
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = str(Ymax)
else:
Label = dat[0] + dat[1]
Xmin = math.floor(float(dat[2]))
Ymin = math.floor(float(dat[3]))
Xmax = math.ceil(float(dat[4]))
Ymax = math.ceil(float(dat[5]))
# Xmin = int(float(dat[2]))
# Ymin = int(float(dat[3]))
# Xmax = int(float(dat[4]))
# Ymax = int(float(dat[5]))
print(Label, Xmin, Ymin, Xmax, Ymax)
# img = cv2.rectangle(img, (Xmin, Ymin), (Xmax, Ymax), (0, 255, 0), 1)
cls_name = Label
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
# 类别名字
node_name.text = cls_name
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '0'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = str(Xmin)
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = str(Ymin)
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = str(Xmax)
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = str(Ymax)
xml = tostring(node_root, pretty_print=True)
dom = parseString(xml)
# print xml 打印查看结果
xml_name = pic_name.replace(".jpg", "")
xml_name = os.path.join(save_xml_path, xml_name + '.xml')
with open(xml_name, 'wb') as f:
# f.write(dom.toprettyxml(indent='\t', encoding='utf-8'))
f.write(xml)
def make_voc_dir():
os.makedirs('VOC2007/Annotations')
os.makedirs('VOC2007/ImageSets')
os.makedirs('VOC2007/ImageSets/Main')
os.makedirs('VOC2007/ImageSets/Layout')
os.makedirs('VOC2007/ImageSets/Segmentation')
os.makedirs('VOC2007/JPEGImages')
os.makedirs('VOC2007/SegmentationClass')
os.makedirs('VOC2007/SegmentationObject')
if __name__ == '__main__':
dataset_folder = ["train", "validation", "test"]
class_folder = ["Seahorse",
...
]
root_path = "../OID/Restraint_Of_MarineCreature_In_OpenImagev4/Dataset/"
# 遍历train,val,test数据集
for folder1 in dataset_folder:
# ../OID/UnRestraint_Of_MarineCreature_In_OpenImagev4/Dataset/train/
dataset_root_path = root_path + folder1 + "/"
# 遍历具体类别
for folder2 in class_folder:
# ../OID/UnRestraint_Of_MarineCreature_In_OpenImagev4/Dataset/train/Seahorse/
label_root_path = dataset_root_path + folder2 + "/"
save_xml_path = 'VOC2007/Annotations'
# ../OID/UnRestraint_Of_MarineCreature_In_OpenImagev4/Dataset/train/Seahorse/Label/
files = os.listdir(label_root_path + "/Label/")
print(files)
# 遍历label和box的标签文件
for file in files:
# 对于每一个label.txt:
# param1:../OID/UnRestraint_Of_MarineCreature_In_OpenImagev4/Dataset/train/Seahorse/
# param2:imglabel.txt
# param3:annotation
make_xml(label_root_path, file, save_xml_path)
匹配测试isPatch.py:
import os
import xml.etree.ElementTree as ET
import cv2
# 看文件名和数量是否匹配
def patch_rate(xmlDirPath, pictureDirPath):
xmlList = []
pngList = []
count = 0
for item in os.listdir(xmlDirPath):
xmlList.append(item)
for item in os.listdir(pictureDirPath):
pngList.append(item)
for i in range(len(xmlList)):
if xmlList[i].split('.')[0] == pngList[i].split('.')[0]:
count += 1
return count, len(xmlList), len(pngList), count / len(xmlList)
def visualization_image(image_name, xml_file_name):
tree = ET.parse(xml_file_name)
root = tree.getroot()
object_lists = []
for child in root:
if child.tag == "folder":
print(child.tag, child.text)
elif child.tag == "filename":
print(child.tag, child.text)
elif child.tag == "size": # 解析size
for size_child in child:
if size_child.tag == "width":
print(size_child.tag, size_child.text)
elif size_child.tag == "height":
print(size_child.tag, size_child.text)
elif size_child.tag == "depth":
print(size_child.tag, size_child.text)
elif child.tag == "object": # 解析object
singleObject = {}
for object_child in child:
if object_child.tag == "name":
# print(object_child.tag,object_child.text)
singleObject["name"] = object_child.text
elif object_child.tag == "bndbox":
for bndbox_child in object_child:
if bndbox_child.tag == "xmin":
singleObject["xmin"] = bndbox_child.text
# print(bndbox_child.tag, bndbox_child.text)
elif bndbox_child.tag == "ymin":
# print(bndbox_child.tag, bndbox_child.text)
singleObject["ymin"] = bndbox_child.text
elif bndbox_child.tag == "xmax":
singleObject["xmax"] = bndbox_child.text
elif bndbox_child.tag == "ymax":
singleObject["ymax"] = bndbox_child.text
object_length = len(singleObject)
if object_length > 0:
object_lists.append(singleObject)
img = cv2.imread(image_name)
for object_coordinate in object_lists:
bboxes_draw_on_img(img, object_coordinate)
cv2.imshow("capture", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=1):
# Draw bounding box...
print(bbox)
p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
cv2.rectangle(img, p1, p2, color, thickness)
if __name__ == '__main__':
xmlDirPath = 'VOC2007/Annotations'
pictureDirPath = 'VOC2007/JPEGImages'
print(patch_rate(xmlDirPath, pictureDirPath))
xmlList = []
pngList = []
for item in os.listdir(xmlDirPath):
xmlList.append(item)
for item in os.listdir(pictureDirPath):
pngList.append(item)
# 上面的总数,由于太多,隔700个取一次
for i in range(1, 10909, 700):
visualization_image(
"VOC2007/JPEGImages/" + pngList[i],
"VOC2007/Annotations/" + xmlList[i])
5 最终效果
批量生成的annotations:
xml和图片的匹配效果:
2021/3/12更新:Open Image标注框有小数,从0.0-0.9不等,有的框边缘细微地方没有完全包含整个物体,所以采用取上/下整函数。
One comment
帮助很大!找了好久,确定谷歌Open Image后一直不清楚怎么下载,以及下载后怎么换成VOC,感谢博主!