首先下载数据集train-textloc.zip
其groundtruth文件如下所示:
158,128,412,182,"Footpath" 442,128,501,170,"To" 393,198,488,240,"and" 63,200,363,242,"Colchester" 71,271,383,313,"Greenstead"
ground truth 文件格式为:l。同时,要注意,这里的坐标系是如下摆放:
将此txt文件转换成voc xml文件的代码:
icdar2voc.py
1 #! /usr/bin/python 2 #-*-coding:utf8-*- 3 4 import os, sys 5 import glob 6 from PIL import Image 7 8 # ICDAR 图像存储位置 9 src_img_dir = "train-textloc" 10 # ICDAR 图像的 ground truth 的 txt 文件存放位置 11 src_txt_dir = "train-textloc" 12 13 img_Lists = glob.glob(src_img_dir + '/*.jpg') 14 15 img_basenames = [] # e.g. 100.jpg 16 for item in img_Lists: 17 img_basenames.append(os.path.basename(item)) 18 19 img_names = [] # e.g. 100 20 for item in img_basenames: 21 temp1, temp2 = os.path.splitext(item) 22 img_names.append(temp1) 23 24 for img in img_names: 25 im = Image.open((src_img_dir + '/' + img + '.jpg')) 26 width, height = im.size 27 28 # open the crospronding txt file 29 gt = open(src_txt_dir + '/gt_' + img + '.txt').read().splitlines() 30 31 # write in xml file 32 #os.mknod(src_txt_dir + '/' + img + '.xml') 33 xml_file = open((src_txt_dir + '/' + img + '.xml'), 'w') 34 xml_file.write('<annotation>\n') 35 xml_file.write(' <folder>VOC2007</folder>\n') 36 xml_file.write(' <filename>' + str(img) + '.jpg' + '</filename>\n') 37 xml_file.write(' <size>\n') 38 xml_file.write(' <width>' + str(width) + '</width>\n') 39 xml_file.write(' <height>' + str(height) + '</height>\n') 40 xml_file.write(' <depth>3</depth>\n') 41 xml_file.write(' </size>\n') 42 43 # write the region of text on xml file 44 for img_each_label in gt: 45 spt = img_each_label.split(',') 46 xml_file.write(' <object>\n') 47 xml_file.write(' <name>text</name>\n') 48 xml_file.write(' <pose>Unspecified</pose>\n') 49 xml_file.write(' <truncated>0</truncated>\n') 50 xml_file.write(' <difficult>0</difficult>\n') 51 xml_file.write(' <bndbox>\n') 52 xml_file.write(' <xmin>' + str(spt[0]) + '</xmin>\n') 53 xml_file.write(' <ymin>' + str(spt[1]) + '</ymin>\n') 54 xml_file.write(' <xmax>' + str(spt[2]) + '</xmax>\n') 55 xml_file.write(' <ymax>' + str(spt[3]) + '</ymax>\n') 56 xml_file.write(' </bndbox>\n') 57 xml_file.write(' </object>\n') 58 59 xml_file.write('</annotation>')