pytorchテストshuffle yolov 3

11088 ワード

# coding='utf-8'
import os
import sys
import time
import datetime
import logging
import torch
import torch.nn as nn
import torch.optim as optim

torch.backends.cudnn.benchmark = True

MY_DIRNAME = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.join(MY_DIRNAME, '..'))
from nets.model_main import ModelMain
from nets.yolo_loss import YOLOLayer
from common.coco_dataset import COCODataset


TRAINING_PARAMS = \
{
    "model_params": {
        "backbone_name": "darknet_53",
        "backbone_pretrained":"", #  set empty to disable
        # "backbone_pretrained":"../weights/mobilenetv2_weights.pth", #  set empty to disable
    },
    "yolo": {
        "anchors": "16,24, 23,39, 25,84, 31,66, 42,54, 46,38, 56,81, 59,121, 74,236",
        "classes": 1,
    },
    "lr": {
        "backbone_lr": 0.01,
        "other_lr": 0.01,
        "freeze_backbone": False,   #  freeze backbone wegiths to finetune
        "decay_gamma": 0.2,
        "decay_step": 15,           #  decay lr in every ? epochs
    },
    "optimizer": {
        "type": "sgd",
        "weight_decay": 4e-05,
    },
    "batch_size": 1,
    # "train_path": "../data/coco/trainvalno5k.txt",
    "train_path": r"\\192.168.0.1\_2train",
    "epochs": 2001,
    "img_h": 352,
    "img_w": 352,
    # "parallels": [0,1,2,3],                         #  config GPU device
    "parallels": [0],                         #  config GPU device
    "working_dir": "YOUR_WORKING_DIR",              #  replace with your working dir
    "pretrain_snapshot": "",                        #  load checkpoint
    # "pretrain_snapshot": r"F:\Team-CV\checkpoints\torch_yolov0802/0.6185_0070.weights",
    "evaluate_type": r"F:\Team-CV\checkpoints\torch_yolov0824/0.9654_1667.weights",
    "try": 0,
    "export_onnx": False,
}
checkpoint_dir=r"F:\Team-CV\checkpoints\torch_yolov0824_"
os.makedirs(checkpoint_dir, exist_ok=True)
def train(config):
    config["global_step"] = config.get("start_step", 0)
    is_training = False if config.get("export_onnx") else True

    anchors = [int(x) for x in config["yolo"]["anchors"].split(",")]
    anchors = [[[anchors[i], anchors[i + 1]], [anchors[i + 2], anchors[i + 3]], [anchors[i + 4], anchors[i + 5]]] for i
               in range(0, len(anchors), 6)]
    anchors.reverse()
    config["yolo"]["anchors"] = []
    for i in range(3):
        config["yolo"]["anchors"].append(anchors[i])
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    # net=net.cuda()
    net.eval()


    # net.train(is_training)

    # Optimizer and learning rate
    optimizer = _get_optimizer(config, net)

    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)
    # lr_scheduler = optim.lr_scheduler.StepLR(
    #     optimizer,
    #     step_size=config["lr"]["decay_step"],
    #     gamma=config["lr"]["decay_gamma"])

    # Set data parallel
    # net = nn.DataParallel(net)
    # net = net.cuda()
    # Restore pretrain model
    # if config["pretrain_snapshot"]:
    #     logging.info("Load pretrained weights from {}".format(config["pretrain_snapshot"]))
    #     state_dict = torch.load(config["pretrain_snapshot"])
    #     net.load_state_dict(state_dict)

    # Only export onnx
    # if config.get("export_onnx"):
        # real_model = net.module
        # real_model.eval()
        # dummy_input = torch.randn(8, 3, config["img_h"], config["img_w"]).cuda()
        # save_path = os.path.join(config["sub_working_dir"], "pytorch.onnx")
        # logging.info("Exporting onnx to {}".format(save_path))
        # torch.onnx.export(real_model, dummy_input, save_path, verbose=False)
        # logging.info("Done. Exiting now.")
        # sys.exit()

    # Evaluate interface
    # if config["evaluate_type"]:
        # logging.info("Using {} to evaluate model.".format(config["evaluate_type"]))
        # evaluate_func = importlib.import_module(config["evaluate_type"]).run_eval
        # config["online_net"] = net

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLayer(config["batch_size"],i,config["yolo"]["anchors"][i],
                                     config["yolo"]["classes"], (config["img_w"], config["img_h"])))

    # DataLoader
    # dataloader = torch.utils.data.DataLoader(COCODataset(config["train_path"],
    #                                                      (config["img_w"], config["img_h"]),
    #                                                      is_training=True,is_scene=True),
    #                                          batch_size=config["batch_size"],
    #                                          shuffle=True,drop_last=True, num_workers=0, pin_memory=True)

    # Start the training loop
    for i in range(10):
        t1 = time.time()
        x = torch.rand(1, 3, 352, 352)
        outputs = net(x)
        # print(out3)
        # print(outputs.size())
        output_list = []
        for i in range(3):
            output_list.append(yolo_losses[i](outputs[i]))
        cnt = time.time() - t1
        print(cnt)

    # logging.info("Start training.")
    # dataload_len=len(dataloader)
    # for epoch in range(config["epochs"]):
    #
    #     recall = 0
    #     mini_step = 0
    #     for step, samples in enumerate(dataloader):
    #         images, labels = samples["image"], samples["label"]
    #         config["global_step"] += 1
    #         # Forward and backward
    #         optimizer.zero_grad()
    #         outputs = net(images.cuda())
    #         losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls", "recall"]
    #         losses = [0] * len(losses_name)
    #         for i in range(3):
    #             _loss_item = yolo_losses[i](outputs[i], labels)
    #             for j, l in enumerate(_loss_item):
    #                 losses[j] += l
    #         # losses = [sum(l) for l in losses]
    #         loss = losses[0]
    #         loss.backward()
    #         optimizer.step()
    #         _loss = loss.item()
    #         # example_per_second = config["batch_size"] / duration
    #         lr = optimizer.param_groups[0]['lr']
    #
    #         strftime = datetime.datetime.now().strftime("%H:%M:%S")
    #         # if (losses[7] / 3 >= recall / (step + 1)):#mini_batch 0 
    #         recall += losses[7] / 3
    #         print(
    #             '%s [Epoch %d/%d,batch %03d/%d loss:x %.5f,y %.5f,w %.5f,h %.5f,conf %.5f,cls %.5f,total %.5f,rec %.3f,avrec %.3f %.3f]' %
    #             (strftime, epoch, config["epochs"], step, dataload_len,
    #              losses[1], losses[2], losses[3],
    #              losses[4], losses[5], losses[6],
    #              _loss, losses[7] / 3, recall / (step + 1), lr))
    #
    #     if (epoch % 2 == 0 and recall / len(dataloader) > 0.5) or recall / len(dataloader) > 0.95:
    #         torch.save(net.state_dict(), '%s/%.4f_%04d.weights' % (checkpoint_dir, recall / len(dataloader), epoch))
    #
    #     lr_scheduler.step()
    #     net.train(is_training)
    #     torch.cuda.empty_cache()
    # net.train(True)
    logging.info("Bye bye")

def _get_optimizer(config, net):
    optimizer = None

    # Assign different lr for each layer
    params = None
    base_params = list(
        map(id, net.backbone.parameters())
    )
    logits_params = filter(lambda p: id(p) not in base_params, net.parameters())

    if not config["lr"]["freeze_backbone"]:
        params = [
            {"params": logits_params, "lr": config["lr"]["other_lr"]},
            {"params": net.backbone.parameters(), "lr": config["lr"]["backbone_lr"]},
        ]
    else:
        logging.info("freeze backbone's parameters.")
        for p in net.backbone.parameters():
            p.requires_grad = False
        params = [
            {"params": logits_params, "lr": config["lr"]["other_lr"]},
        ]
    logging.info("Using " + config["optimizer"]["type"] + " optimizer.")
    # Initialize optimizer class
    if config["optimizer"]["type"] == "adam":
        optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"])
    elif config["optimizer"]["type"] == "amsgrad":
        optimizer = optim.Adam(params, weight_decay=config["optimizer"]["weight_decay"],
                               amsgrad=True)
    elif config["optimizer"]["type"] == "rmsprop":
        optimizer = optim.RMSprop(params, weight_decay=config["optimizer"]["weight_decay"])
    else:
        # Default to sgd
        optimizer = optim.SGD(params, momentum=0.9,
                              weight_decay=config["optimizer"]["weight_decay"],
                              nesterov=(config["optimizer"]["type"] == "nesterov"))

    return optimizer

def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="[%(asctime)s %(filename)s] %(message)s")

    config = TRAINING_PARAMS
    config["batch_size"] *= len(config["parallels"])

    # Create sub_working_dir
    sub_working_dir = '{}/{}/size{}x{}_try{}/{}'.format(
        config['working_dir'], config['model_params']['backbone_name'], 
        config['img_w'], config['img_h'], config['try'],
        time.strftime("%Y%m%d%H%M%S", time.localtime()))
    if not os.path.exists(sub_working_dir):
        os.makedirs(sub_working_dir)
    config["sub_working_dir"] = sub_working_dir
    logging.info("sub working dir: %s" % sub_working_dir)

    # Creat tf_summary writer
    # config["tensorboard_writer"] = SummaryWriter(sub_working_dir)
    # logging.info("Please using 'python -m tensorboard.main --logdir={}'".format(sub_working_dir))

    # Start training
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, config["parallels"]))
    train(config)

if __name__ == "__main__":
    main()