初期python生成lmdbの問題と解決

6653 ワード

一、分割のためのlmdbデータ生成dataとlabelの2つの部分
import cv2 as cv
import os
import numpy as np
import caffe
from scipy.io import loadmat
#import h5py
#import struct
#Please do not change the parameters without permision!!!  PICTURE and MAT

def del_and_create(dname):
    if os.path.exists(dname):
        shutil.rmtree(dname)
         os.makedirs(dname)


def get_img_datum(image_fn):
    img = cv.imread(image_fn, cv.IMREAD_COLOR)
    img = img[:,:,::-1] #                :RGB     BGR     fcn_finetune_model
    img = img.transpose((2,0,1))
    #img = np.expand_dims(img,axis=0)
    datum = caffe.io.array_to_datum(img, 0)

    return datum


def get_gt_datum(gt_fn): #load mat , data_name = 'lb'
    gt = loadmat(gt_fn)
    gt = gt['lb']
    gt = np.expand_dims(gt,axis=0)
    datum = caffe.io.array_to_datum(gt, 0)
    
    return datum

def create_dataset():
    img_db_fn_train = r'D:\Maxee\LPB40\LMDB_data\LPBA_data_aug\img_train.lmdb'
    del_and_create(img_db_fn_train)
    img_env_train = lmdb.Environment(img_db_fn_train, map_size=6000*1024*1024)
    img_txn_train = img_env_train.begin(write=True, buffers=True)

    gt_db_fn_train = r'D:\Maxee\LPB40\LMDB_data\LPBA_data_aug\lb_train.lmdb'
    del_and_create(gt_db_fn_train)
    gt_env_train = lmdb.Environment(gt_db_fn_train, map_size=2400*1024*1024)
    gt_txn_train = gt_env_train.begin(write=True, buffers=True)

    keys = np.arange(15*3700)
    np.random.shuffle(keys)
    

    img_fns = glob.glob(r'D:\Maxee\LPB40\CaffeTrainer\LPBA_data_aug\train_data\*.bmp')
    gt_fns = glob.glob(r'D:\Maxee\LPB40\CaffeTrainer\LPBA_data_aug\train_label\*.mat')
    
	
    for i, (img_fn, gt_fn) in enumerate(
            zip(sorted(img_fns), sorted(gt_fns))):
        img_datum = get_img_datum(img_fn)
        gt_datum = get_gt_datum(gt_fn)
        key =  keys[i]
		
        key_put='%010d' % key#  10      0  
        img_txn_train.put(key_put, img_datum.SerializeToString())
        gt_txn_train.put(key_put, gt_datum.SerializeToString())
        print keys[i], i,os.path.basename(img_fn), os.path.basename(gt_fn)

    img_txn_train.commit()
    gt_txn_train.commit()
    img_env_train.close()
    gt_env_train.close()
	
if __name__ == '__main__':
    create_dataset()


caffe》windows pycaffe, release pycaffe caffe Lib\site-package , ipython debug

① improt caffe error:no module named google protobuf 

: , ,

:pip install protobuf anaconda conda install -----

:  http://www.tuicool.com/articles/AnMRJf7

② import cv2 as cv :no module named cv2

: opencv, opencv build\python\x64( x86 )\cv2.pyd anaconda Lib

③ import lmdb :no module named lmdb

:pip install lmdb 

④unindent does not match any outer identation level

: tab , notepad view》show symbol》show white space and TAB TAB

⑤python lmdb bug , map_size, lmdb

, , ~~

、 data label , label lmdb

train test, , ,

"""
Created on Fri Jul 08 13:55:12 2016

@author: fujiko
"""

import numpy as np
import cv2 as cv
import caffe
import lmdb
from caffe.proto import caffe_pb2 
import os
#import glob

# this def generate all the images in classification folders
def GetAllImages(folder):
    assert os.path.exists(folder)
    assert os.path.isdir(folder)
    imageList = os.listdir(folder)# folder        
    for image in imageList:
        if image == 'Thumbs.db':#       
            imageList.remove('Thumbs.db')
    imageList = [folder + '\\' + item for item in imageList if os.path.isfile(os.path.join(folder, item))]
    return imageList

lmdb_file = 'G:\\data\\origin\\lmdb_train'
lmdb_file2 = 'G:\\data\\origin\\lmdb_test'
batch_size = 30
n = 0
m = 0

lmdb_env = lmdb.open(lmdb_file, map_size=int(1024*1024*3000)) # size:3000mb
lmdb_env2 = lmdb.open(lmdb_file2, map_size=int(1024*1024*1000)) 
lmdb_txn = lmdb_env.begin(write=True)  
lmdb_txn2 = lmdb_env2.begin(write=True)  
datum = caffe_pb2.Datum() 
trns = 0
tsts = 0
#imgs = glob.glob(r'G:\data\origin\*\*.png')#all the image
#shuffle   
for p in range(0,8):
    imageList = GetAllImages(r'G:\data\train\\' + str(p))
    trns=trns+4*len(imageList)/5#train
    tsts=tsts+len(imageList)-4*len(imageList)/5# test 
    
key_trns = np.arange(trns)# 80% is train
key_tsts = np.arange(tsts)
np.random.shuffle(key_trns)#       ,   
np.random.shuffle(key_tsts)

for p in range(0,8):
    imageList = GetAllImages(r'G:\data\train\\' + str(p))
    
    for i in range(0, 4 * len(imageList)/5):
        n = n+1
        label = p
        tmp = imageList[i]
        img = cv.imread(tmp,cv.IMREAD_COLOR)
        data = cv.resize(img, (227, 227), interpolation=cv.INTER_LINEAR)
        data = data[:,:,::-1]
        data = data.transpose((2,0,1)) 
        datum = caffe.io.array_to_datum(data, label)
        keystr = '{:0>8d}'.format(key_trns[n-1]) #8  ,  0  , n   
        lmdb_txn.put(keystr, datum.SerializeToString())
        print key_trns[n-1], i,n-1,os.path.basename(tmp),label

        if i % batch_size == 0:
            lmdb_txn.commit()
            lmdb_txn = lmdb_env.begin(write=True)  
            #print 'batchtrain {} writen'.format(n)
        
    for i in range(4 * len(imageList)/5, len(imageList)):
        m = m+1
        label = p
        tmp = imageList[i]
        img = cv.imread(tmp,cv.IMREAD_COLOR)
        data = cv.resize(img, (227, 227), interpolation=cv.INTER_LINEAR)
        data = data[:,:,::-1]
        data = data.transpose((2,0,1)) 
        datum = caffe.io.array_to_datum(data, label)
        keystr = '{:0>8d}'.format(key_tsts[m-1]) 
        lmdb_txn2.put(keystr, datum.SerializeToString())
        print key_tsts[m-1], i,m-1,os.path.basename(tmp),label

        if i % batch_size == 0:
            
            lmdb_txn2.commit()
            lmdb_txn2 = lmdb_env2.begin(write=True)  
            #print 'batchtest {} writen'.format(m)
        
lmdb_env.close()
lmdb_env2.close()