pythonデータセットの平均値と分散の計算


データセットの平均と分散の計算
import numpy as np
import cv2
import random
import os
# calculate means and std
train_txt_path = './dataset/train_val_list.txt'

CNum = 2171     #           
img_h, img_w = 128, 416
imgs = np.zeros([img_w, img_h, 3, 1])
means, stdevs = [], []
with open(train_txt_path, 'r') as f:
    lines = f.readlines()
    random.shuffle(lines)   # shuffle ,       
for i in range(CNum):
        img_path = os.path.join('./train', lines[i].rstrip().split()[0])
        img = cv2.imread(img_path)
        img = cv2.resize(img, (img_h, img_w))
        img = img[:, :, :, np.newaxis]
        imgs = np.concatenate((imgs, img), axis=3)
#         print(i)

imgs = imgs.astype(np.float32)/255.
for i in range(3):
    pixels = imgs[:,:,i,:].ravel()  #     
    means.append(np.mean(pixels))
    stdevs.append(np.std(pixels))

# cv2         BGR,PIL/Skimage      RGB   
means.reverse() # BGR --> RGB
stdevs.reverse()
print("normMean = {}".format(means))
print("normStd = {}".format(stdevs))
print('transforms.Normalize(normMean = {}, normStd = {})'.format(means, stdevs))