史上最も詳細なPytorch版yolov 3コード中国語注釈詳細(一)

17123 ワード

ソースコードを真剣に理解してこそ、アルゴリズムを本当に理解することができます.yolov 3のpytorch版の公式ソースコードはgithubを参照してください.https://github.com/ayooshkathuria/YOLO_v3_tutorial_from_scratch
著者は公式原版のチュートリアルを書いた.このチュートリアルでは、著者はPyTorchを使用してYOLO v 3ベースのターゲット検出器を実現した.このチュートリアルには訓練部分は含まれていないが、5つの部分がある.リンク:https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/
このチュートリアルにはすでに完全な翻訳バージョンがあり、上下の2つの部分に分かれています.上部のリンク:https://www.jiqizhixin.com/articles/2018-04-23-3
次のリンク:https://www.jiqizhixin.com/articles/042602?from=synced&keyword=%E4%BB%8E%E9%9B%B6%E5%BC%80%E5%A7%8BPyTorch%E9%A1%B9%E7%9B%AE%EF%BC%9AYOLO%20v3%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B%E5%AE%9E%E7%8E%B0
以上のチュートリアルがあれば、私のこのチュートリアルは自然に前の仕事を繰り返すのではなく、各プログラムの各行のコードが最も詳細で全面的な白の入門注釈を与えて、基礎がどんなに悪くても理解することができて、各文の各変数がどういう意味なのかを注釈して、仕事をここまで細かくしてこそ、本当に私たちの白に有利です(大神たちは無視してください.これは私たちに白たちに見せただけです.)
本編はシリーズチュートリアルの第1編で、プログラムdarknetを詳しく述べる.py.次の住所は次のとおりです.
史上最も詳細なPytorch版yolov 3コード中国語注釈詳細(二):https://blog.csdn.net/qq_34199326/article/details/84206079
史上最も詳細なPytorch版yolov 3コード中国語注釈詳細(三):https://blog.csdn.net/qq_34199326/article/details/84349977
史上最も詳細なPytorch版yolov 3コード中国語注釈詳細(四):https://blog.csdn.net/qq_34199326/article/details/84529661
あまり話さないで、まずdarknetを見てください.pyコードの超詳細コメント.
from __future__ import division

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
from util import * 


def get_test_input():
    img = cv2.imread("dog-cycle-car.png")
    img = cv2.resize(img, (416,416))          #Resize to the input dimension
    img_ =  img[:,:,::-1].transpose((2,0,1))  #img 【h,w,channel】,   img[:,:,::-1]       channel opencv BGR   pytorch RGB,  transpose((2,0,1))     [height,width,channel]->[channel,height,width]
    img_ = img_[np.newaxis,:,:,:]/255.0       #Add a channel at 0 (for batch) | Normalise
    img_ = torch.from_numpy(img_).float()     #Convert to float
    img_ = Variable(img_)                     # Convert to Variable
    return img_

def parse_cfg(cfgfile):
    """
      :       
       :     ,                             ( )
    
    """
    #                
    file = open(cfgfile, 'r')
    lines = file.read().split('
') # store the lines in a list readlines lines = [x for x in lines if len(x) > 0] # lines = [x for x in lines if x[0] != '#'] # # lines = [x.rstrip().lstrip() for x in lines] # (rstricp ,lstrip ) # cfg [] , block , block 。 block = {} blocks = [] for line in lines: if line[0] == "[": # cfg ( ) if len(block) != 0: # , blocks.append(block) # ( ) blocks block = {} # block, (block ) block["type"] = line[1:-1].rstrip() # cfg [] type else: key,value = line.split("=") # block[key.rstrip()] = value.lstrip()# key( ), value( ), block blocks.append(block) # , block # print('

'.join([repr(x) for x in blocks])) return blocks # 6 type # 'net': , # {'convolutional', 'net', 'route', 'shortcut', 'upsample', 'yolo'} # cfg = parse_cfg("cfg/yolov3.cfg") # print(cfg) class EmptyLayer(nn.Module): """ shortcut layer / route layer , , Darknet forward """ def __init__(self): super(EmptyLayer, self).__init__() class DetectionLayer(nn.Module): '''yolo , , predict_transform ''' def __init__(self, anchors): super(DetectionLayer, self).__init__() self.anchors = anchors def create_modules(blocks): net_info = blocks[0] # blocks[0] cfg [net] , , module_list = nn.ModuleList() # module_list block, block cfg , [convolutional] prev_filters = 3 # 3 , ( ( )) output_filters = [] # , 。 , output_filters 。 for index, x in enumerate(blocks[1:]): # , block[1:] blocks, blocks net , 。 module = nn.Sequential()# nn.sequential() module, module #check the type of block #create a new module for the block #append to module_list if (x["type"] == "convolutional"): ''' 1. ''' # / / ( ) activation = x["activation"] try: batch_normalize = int(x["batch_normalize"]) bias = False# BN bias except: batch_normalize = 0 bias = True # BN bias filters= int(x["filters"]) padding = int(x["pad"]) kernel_size = int(x["size"]) stride = int(x["stride"]) if padding: pad = (kernel_size - 1) // 2 else: pad = 0 # # Add the convolutional layer # nn.Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True) conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias) module.add_module("conv_{0}".format(index), conv) #Add the Batch Norm Layer if batch_normalize: bn = nn.BatchNorm2d(filters) module.add_module("batch_norm_{0}".format(index), bn) #Check the activation. #It is either Linear or a Leaky ReLU for YOLO # 0.1 if activation == "leaky": activn = nn.LeakyReLU(0.1, inplace = True) module.add_module("leaky_{0}".format(index), activn) elif (x["type"] == "upsample"): ''' 2. upsampling layer Bilinear2dUpsampling ''' stride = int(x["stride"])# stride cfg 2, scale_factor 2 stride upsample = nn.Upsample(scale_factor = 2, mode = "nearest") module.add_module("upsample_{}".format(index), upsample) # route layer -> Empty layer # route : layer , , layer , route layer elif (x["type"] == "route"): x["layers"] = x["layers"].split(',') #Start of a route start = int(x["layers"][0]) #end, if there exists one. try: end = int(x["layers"][1]) except: end = 0 #Positive anotation: if start > 0: start = start - index if end > 0:# end>0, end= end - index, index + end end end = end - index route = EmptyLayer() module.add_module("route_{0}".format(index), route) if end < 0: # end<0, end end, index+end( end<0) index end 。 filters = output_filters[index + start] + output_filters[index + end] else: # ,end=0, , start>0, start = start - index, index + start start ; start<0, start start, index+start( start<0) index start 。 filters= output_filters[index + start] #shortcut corresponds to skip connection elif x["type"] == "shortcut": shortcut = EmptyLayer() # , ( )。 filters , 。 module.add_module("shortcut_{}".format(index), shortcut) #Yolo is the detection layer elif x["type"] == "yolo": mask = x["mask"].split(",") mask = [int(x) for x in mask] anchors = x["anchors"].split(",") anchors = [int(a) for a in anchors] anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)] anchors = [anchors[i] for i in mask] detection = DetectionLayer(anchors)# , , , , predict_transform module.add_module("Detection_{}".format(index), detection) module_list.append(module) prev_filters = filters output_filters.append(filters) return (net_info, module_list) class Darknet(nn.Module): def __init__(self, cfgfile): super(Darknet, self).__init__() self.blocks = parse_cfg(cfgfile) # parse_cfg self.net_info, self.module_list = create_modules(self.blocks)# create_modules def forward(self, x, CUDA): modules = self.blocks[1:] # net ,forward blocks block outputs = {} #We cache the outputs for the route layer write = 0#write 。write=0, ,write=1, , 。 for i, module in enumerate(modules): module_type = (module["type"]) if module_type == "convolutional" or module_type == "upsample": x = self.module_list[i](x) elif module_type == "route": layers = module["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i # 。 if (layers[0]) > 0: , layer[0]>0, layer[0] , layer[0]<0, route ( i ) layer[0] if len(layers) == 1: x = outputs[i + (layers[0])] # else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1)# 1, anchor 。 elif module_type == "shortcut": from_ = int(module["from"]) x = outputs[i-1] + outputs[i+from_] # , elif module_type == 'yolo': anchors = self.module_list[i][0].anchors # net_info( blocks[0], [net]) get the input dimensions inp_dim = int (self.net_info["height"]) #Get the number of classes num_classes = int (module["classes"]) #Transform x = x.data # yolo feature map # util.py predict_transform() x( yolo feature map), anchor # , 。 predict_transform x (batch_size, grid_size*grid_size*num_anchors, 5+ ) x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if not write: #if no collector has been intialised. tensor tensor concatenate , detections = x # detections , write = 1 # write = 1 , , concatenate 。 else: ''' x (batch_size, grid_size*grid_size*num_anchors, 5+ ), 1 concatenate, anchor , part3 Bounding Box attributes 。yolov3 3 yolo , yolo predict_transform() anchor ( batch_size ,x tensor), 3 yolo 。 anchor , NMS ''' detections = torch.cat((detections, x), 1)# 3 level feature map detections outputs[i] = x return detections # blocks = parse_cfg('cfg/yolov3.cfg') # x,y = create_modules(blocks) # print(y) def load_weights(self, weightfile): #Open the weights file fp = open(weightfile, "rb") #The first 5 values are header information # 1. Major version number # 2. Minor Version Number # 3. Subversion number # 4,5. Images seen by the network (during training) header = np.fromfile(fp, dtype = np.int32, count = 5)# first 5 values self.header = torch.from_numpy(header) self.seen = self.header[3] weights = np.fromfile(fp, dtype = np.float32)# np.ndarray , float32 ptr = 0 for i in range(len(self.module_list)): module_type = self.blocks[i + 1]["type"] # blocks , blocks[1] #If module_type is convolutional load weights #Otherwise ignore. if module_type == "convolutional": model = self.module_list[i] try: batch_normalize = int(self.blocks[i+1]["batch_normalize"]) # bn ,"batch_normalize" 1 except: batch_normalize = 0 conv = model[0] if (batch_normalize): bn = model[1] #Get the number of weights of Batch Norm Layer num_bn_biases = bn.bias.numel() #Load the weights bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases]) ptr += num_bn_biases bn_weights = torch.from_numpy(weights[ptr: ptr + num_bn_biases]) ptr += num_bn_biases bn_running_mean = torch.from_numpy(weights[ptr: ptr + num_bn_biases]) ptr += num_bn_biases bn_running_var = torch.from_numpy(weights[ptr: ptr + num_bn_biases]) ptr += num_bn_biases #Cast the loaded weights into dims of model weights. bn_biases = bn_biases.view_as(bn.bias.data) bn_weights = bn_weights.view_as(bn.weight.data) bn_running_mean = bn_running_mean.view_as(bn.running_mean) bn_running_var = bn_running_var.view_as(bn.running_var) #Copy the data to model weights bn_biases model (bn.bias.data) bn.bias.data.copy_(bn_biases) bn.weight.data.copy_(bn_weights) bn.running_mean.copy_(bn_running_mean) bn.running_var.copy_(bn_running_var) else:# batch_normalize True, #Number of biases num_biases = conv.bias.numel() #Load the weights conv_biases = torch.from_numpy(weights[ptr: ptr + num_biases]) ptr = ptr + num_biases #reshape the loaded weights according to the dims of the model weights conv_biases = conv_biases.view_as(conv.bias.data) #Finally copy the data conv.bias.data.copy_(conv_biases) #Let us load the weights for the Convolutional layers num_weights = conv.weight.numel() #Do the same as above for weights conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights]) ptr = ptr + num_weights conv_weights = conv_weights.view_as(conv.weight.data) conv.weight.data.copy_(conv_weights)

総じてdarknet.pyプログラムには関数parse_が含まれていますcfg入力プロファイルパスは、各要素が作成するニューラルネットワークモジュール(レイヤ)に対応する辞書タイプであり、関数create_modulesはネットワークレベルを作成するために使用されますが、Darknetクラスのforward関数はネットワークの前方伝播を実現する関数であり、load_もあります.Weightsは、事前トレーニングのネットワークウェイトパラメータをインポートするために使用されます.もちろん、forward関数では必要な予測出力形式を生成する必要があるので、変換出力である関数predit_ファイルにあるpyでは、Darknetカテゴリのforwardでこの関数を使用すると、その関数がインポートされます.次はutilについて詳しく説明します.pyしました.