VIA(VGG Image Annotator)マークアップデータをcoco formatに変換するコード


VGG Image Annotator(VIA)は、Visual Geometry Groupが開発したオープンソースの画像表示ツールです.長方形、円、楕円、多角形、点、線を表示できるオンラインおよびオフラインで使用できます.寸法が完了すると、csvとjsonファイル形式にエクスポートできます.
mmdetectionなどのフレームワークにVIAマークアップをサポートしていないデータがある場合は、変換コードを書く必要があります.
実は2つのjsonファイルの変換で、2つの寸法フォーマットが読み込まれた後に2つのdictフォーマットのデータで、私たちがしなければならないのは1層1層dictをダイヤルして、coco formatのdictの中に入れます.
まずフォーマットして2つのjsonファイルを出力することができて、比較的に異なって、1つのパッケージをインストールする必要があります
yum install -y jq

そして
cat a.json | jq

それから変換のコードを書くことができて、ここで私の書いたバージョンを提供して、親測して使いやすいです.中には無視できる小さな論理がある(自分で使ったら、変えない)
import json
import os
import cv2
import numpy as np

def _get_bbox(all_points_x, all_points_y):

	min_x , max_x = min(all_points_x), max(all_points_x)
	min_y , max_y = min(all_points_y), max(all_points_y)
	bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
	return bbox

def _get_mask(all_points_x, all_points_y):
	min_x , max_x = min(all_points_x), max(all_points_x)
	min_y , max_y = min(all_points_y), max(all_points_y)
	for zz in range(len(all_points_x)): #find the biggest in list
		if(all_points_x[zz] == max_x):
			break
	left_lines = []
	right_lines = []
	mask = []
	for zzz in range(len(all_points_x)): # draw lines
		if zzz != zz :
			if all_points_y[zzz] < all_points_y[zz]:
				angle = abs(all_points_x[zzz] - all_points_x[zz])/abs(all_points_y[zzz] - all_points_y[zz] + 1e-7)
				#print('left',angle)
				left_lines.append([angle,all_points_x[zzz],all_points_y[zzz]])
			else:
				angle = abs(all_points_x[zzz] - all_points_x[zz])/abs(all_points_y[zzz] - all_points_y[zz] + 1e-7)
				#print('right',angle)
				right_lines.append([angle,all_points_x[zzz], all_points_y[zzz]])

	left_lines.sort()
	right_lines.sort(reverse=True)


	#print(left_lines)
	#print(right_lines)
	for line_num in range(len(left_lines)):
		mask.append(left_lines[line_num][1])
		mask.append(left_lines[line_num][2])
	for line_num in range(len(right_lines)):
		mask.append(right_lines[line_num][1])
		mask.append(right_lines[line_num][2])
	mask.append(all_points_x[zz])
	mask.append(all_points_y[zz])
	return [mask]

def _produce_category_id(data):   #       ,  ,index     id

	class_set = set()
	for i in data: 
		if(len(data[i]["regions"])!=0):
			if('name' in data[i]["regions"][0]["region_attributes"] and data[i]["regions"][0]["region_attributes"]["name"]!='USER'and data[i]["regions"][0]["region_attributes"]["name"]!='' and data[i]["regions"][0]["region_attributes"]["name"]!='ETTODAY' and data[i]["regions"][0]["region_attributes"]["name"]!='SICHUAN'and data[i]["regions"][0]["region_attributes"]["name"]!='RED' and data[i]["regions"][0]["region_attributes"]["name"]!='WEIXIN'):  #clean data
				class_set.add(data[i]["regions"][0]["region_attributes"]["name"].replace('
','')) class_set = list(class_set) class_set.sort() #print(class_set) #np.save('category_id_list.npy',class_set) return class_set old_data = json.load(open('/Users/hank/Desktop/ad_12/annotations/via_region_data_train1.json')) # new_data = json.load(open('/Users/hank/Desktop/data/coco2017/annotations/instances_val2017.json')) #coco image_sample = new_data["images"][0] annotations_sample = new_data["annotations"][0] category_sample = new_data["categories"][0] #print(annotations_sample) idx = 0 ann_idx = 0 errorcount = 0 img_info = [] #image ann_info = [] #annotations cat_info = [] #category category_id_list = _produce_category_id(old_data) image_path_list = [] #print(category_id_list) user_count = 0 for _, image_i in enumerate(old_data) : image_sample = {} image_name = old_data[image_i]["filename"] idx += 1 image_path = os.path.join("/Users/hank/Desktop/ad_12/train/" , image_name) if(not os.path.exists(image_path)): # , errorcount += 1 idx -= 1 continue all_annotations_in_per = old_data[image_i]["regions"] flag = 0 # label if(all_annotations_in_per ==[]): #not tag image_path_list.append(image_path) print(image_path) os.remove(image_path) for iii in range(len(all_annotations_in_per)): if(not 'name' in old_data[image_i]["regions"][iii]["region_attributes"]): print(image_name) if ('name' in old_data[image_i]["regions"][iii]["region_attributes"]): #print(old_data[image_i]["regions"][iii]["region_attributes"]["name"]) if(old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'USER' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'SICHUAN' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'ETTODAY' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'RED' or old_data[image_i]["regions"][iii]["region_attributes"]["name"] == 'WEIXIN'): flag = 1 #print("happen") if flag == 2: #print(image_name) user_count += 1 continue if flag == 1: #user_count += 1 os.remove(image_path) #print(image_path) continue img = cv2.imread(image_path) # image if img is not None: h , w, _ = np.shape(img) image_sample["license"] = 2 image_sample["coco_url"] = "http://images.cocodataset.org/val2017/000000500663.jpg" image_sample["file_name"] = image_name image_sample["id"] = idx image_sample["height"] = h image_sample["width"] = w image_sample["data_captured"] = "2013-11-17 21:48:19" image_sample["flickr_url"] = "http://farm1.staticflickr.com/198/488201322_ef2ebfeccb_z.jpg" img_info.append(image_sample) all_annotations_in_per = old_data[image_i]["regions"] # annotation #########change annotation to new type for j in range(len(all_annotations_in_per)): annotations_sample = {} #print(all_annotations_in_per) ann_idx += 1 ann = all_annotations_in_per[j] #dict[shape_attributes, region_attributes] ann_shape = ann["shape_attributes"] if ann_shape['name'] == 'polygon': all_points_x = ann_shape["all_points_x"] all_points_y = ann_shape["all_points_y"] bbox = _get_bbox(all_points_x, all_points_y) mask = _get_mask(all_points_x, all_points_y) annotations_sample['area'] = bbox[2] * bbox[3] annotations_sample['segmentation'] = mask annotations_sample['iscrowd'] = 0 annotations_sample['image_id'] = idx annotations_sample['bbox'] = bbox if('name' in ann["region_attributes"]): #print(image_path) annotations_sample['category_id'] = category_id_list.index(ann["region_attributes"]["name"].replace('
','')) + 1 else: continue annotations_sample['id'] = ann_idx #print(annotations_sample) ann_info.append(annotations_sample) for k in range(len(category_id_list)): # category category_sample = {} category_sample['supercategory'] = category_id_list[k] category_sample['id'] = k + 1 category_sample['name'] = category_id_list[k] print (category_sample) cat_info.append(category_sample) #print(errorcount) print('image——num',len(img_info)) new_data["annotations"] = ann_info new_data["images"] = img_info new_data["categories"] = cat_info print(len(category_id_list)) #print(new_data["categories"]) new_data = json.dumps(new_data) # json #print(category_id_list) with open('/Users/hank/Desktop/ad_12/annotations/coco_train1.json','w') as json_file: json_file.write(new_data) #print(type(new_data))