CenterNet読書ノート
これは人体を訓練しています.
https://github.com/ZongweiZhou1/CenterNetPerson
人体+追跡
https://github.com/kimyoon-young/centerNet-deep-sort
顔の:テストとモデルのみ
https://github.com/smartwell/centernet-train-wider-face
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import numpy as np
import torch
import json
import cv2
import os
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from utils.image import draw_dense_reg
import math
class CTDetDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, pic_len):
#border 128 pic_len w or h
i = 1
while pic_len <= border // i*2:
i *= 2
# boder*2,i , 128 // i
# 128, 256, 64
return border // i
def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)#
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)#1/4
if self.opt.keep_res:
input_h = (height | self.opt.pad) + 1
input_w = (width | self.opt.pad) + 1
s = np.array([input_w, input_h], dtype=np.float32)
else:
s = max(img.shape[0], img.shape[1]) * 1.0 #s
input_h, input_w = self.opt.input_h, self.opt.input_w#512,512
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))#
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1
#
trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
in_pic = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)#
in_pic = (in_pic.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, in_pic, self._eig_val, self._eig_vec)
#
in_pic = (in_pic - self.mean) / self.std
in_pic = in_pic.transpose(2, 0, 1)
output_h = input_h // self.opt.down_ratio# 512//4=128
output_w = input_w // self.opt.down_ratio
num_classes = self.num_classes
trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)# 80*128*128
wh = np.zeros((self.max_objs, 2), dtype=np.float32)# 32*2
dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)# 2*128*128
reg = np.zeros((self.max_objs, 2), dtype=np.float32)# 32*2,
#reg[k] = center - center_int
#reg_mask[k] = 1
ind = np.zeros((self.max_objs), dtype=np.int64)# 32 ind
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)# 32 mask reg_mask
cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)#32*80*2
cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)#32*80*2
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])#x1 y1 x2 y2
cls_id = int(self.cat_ids[ann['category_id']])
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
radius = self.opt.hm_gauss if self.opt.mse_loss else radius
center = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
center_int = center.astype(np.int32)
draw_gaussian(hm[cls_id], center_int, radius)
wh[k] = 1. * w, 1. * h
ind[k] = center_int[1] * output_w + center_int[0]
reg[k] = center - center_int
reg_mask[k] = 1
cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
if self.opt.dense_wh:
draw_dense_reg(dense_wh, hm.max(axis=0), center_int, wh[k], radius)
gt_det.append([center[0] - w / 2, center[1] - h / 2,
center[0] + w / 2, center[1] + h / 2, 1, cls_id])
ret = {'input': in_pic, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
if self.opt.dense_wh:
hm_a = hm.max(axis=0, keepdims=True)
dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
del ret['wh']
elif self.opt.cat_spec_wh:
ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
del ret['wh']
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 6), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret