初期python生成lmdbの問題と解決
一、分割のためのlmdbデータ生成dataとlabelの2つの部分
import cv2 as cv
import os
import numpy as np
import caffe
from scipy.io import loadmat
#import h5py
#import struct
#Please do not change the parameters without permision!!! PICTURE and MAT
def del_and_create(dname):
if os.path.exists(dname):
shutil.rmtree(dname)
os.makedirs(dname)
def get_img_datum(image_fn):
img = cv.imread(image_fn, cv.IMREAD_COLOR)
img = img[:,:,::-1] # :RGB BGR fcn_finetune_model
img = img.transpose((2,0,1))
#img = np.expand_dims(img,axis=0)
datum = caffe.io.array_to_datum(img, 0)
return datum
def get_gt_datum(gt_fn): #load mat , data_name = 'lb'
gt = loadmat(gt_fn)
gt = gt['lb']
gt = np.expand_dims(gt,axis=0)
datum = caffe.io.array_to_datum(gt, 0)
return datum
def create_dataset():
img_db_fn_train = r'D:\Maxee\LPB40\LMDB_data\LPBA_data_aug\img_train.lmdb'
del_and_create(img_db_fn_train)
img_env_train = lmdb.Environment(img_db_fn_train, map_size=6000*1024*1024)
img_txn_train = img_env_train.begin(write=True, buffers=True)
gt_db_fn_train = r'D:\Maxee\LPB40\LMDB_data\LPBA_data_aug\lb_train.lmdb'
del_and_create(gt_db_fn_train)
gt_env_train = lmdb.Environment(gt_db_fn_train, map_size=2400*1024*1024)
gt_txn_train = gt_env_train.begin(write=True, buffers=True)
keys = np.arange(15*3700)
np.random.shuffle(keys)
img_fns = glob.glob(r'D:\Maxee\LPB40\CaffeTrainer\LPBA_data_aug\train_data\*.bmp')
gt_fns = glob.glob(r'D:\Maxee\LPB40\CaffeTrainer\LPBA_data_aug\train_label\*.mat')
for i, (img_fn, gt_fn) in enumerate(
zip(sorted(img_fns), sorted(gt_fns))):
img_datum = get_img_datum(img_fn)
gt_datum = get_gt_datum(gt_fn)
key = keys[i]
key_put='%010d' % key# 10 0
img_txn_train.put(key_put, img_datum.SerializeToString())
gt_txn_train.put(key_put, gt_datum.SerializeToString())
print keys[i], i,os.path.basename(img_fn), os.path.basename(gt_fn)
img_txn_train.commit()
gt_txn_train.commit()
img_env_train.close()
gt_env_train.close()
if __name__ == '__main__':
create_dataset()
caffe》windows pycaffe, release pycaffe caffe Lib\site-package , ipython debug
:
① improt caffe error:no module named google protobuf
: , ,
:pip install protobuf anaconda conda install -----
: http://www.tuicool.com/articles/AnMRJf7
② import cv2 as cv :no module named cv2
: opencv, opencv build\python\x64( x86 )\cv2.pyd anaconda Lib
③ import lmdb :no module named lmdb
:pip install lmdb
④unindent does not match any outer identation level
: tab , notepad view》show symbol》show white space and TAB TAB
⑤python lmdb bug , map_size, lmdb
, , ~~
、 data label , label lmdb
train test, , ,
"""
Created on Fri Jul 08 13:55:12 2016
@author: fujiko
"""
import numpy as np
import cv2 as cv
import caffe
import lmdb
from caffe.proto import caffe_pb2
import os
#import glob
# this def generate all the images in classification folders
def GetAllImages(folder):
assert os.path.exists(folder)
assert os.path.isdir(folder)
imageList = os.listdir(folder)# folder
for image in imageList:
if image == 'Thumbs.db':#
imageList.remove('Thumbs.db')
imageList = [folder + '\\' + item for item in imageList if os.path.isfile(os.path.join(folder, item))]
return imageList
lmdb_file = 'G:\\data\\origin\\lmdb_train'
lmdb_file2 = 'G:\\data\\origin\\lmdb_test'
batch_size = 30
n = 0
m = 0
lmdb_env = lmdb.open(lmdb_file, map_size=int(1024*1024*3000)) # size:3000mb
lmdb_env2 = lmdb.open(lmdb_file2, map_size=int(1024*1024*1000))
lmdb_txn = lmdb_env.begin(write=True)
lmdb_txn2 = lmdb_env2.begin(write=True)
datum = caffe_pb2.Datum()
trns = 0
tsts = 0
#imgs = glob.glob(r'G:\data\origin\*\*.png')#all the image
#shuffle
for p in range(0,8):
imageList = GetAllImages(r'G:\data\train\\' + str(p))
trns=trns+4*len(imageList)/5#train
tsts=tsts+len(imageList)-4*len(imageList)/5# test
key_trns = np.arange(trns)# 80% is train
key_tsts = np.arange(tsts)
np.random.shuffle(key_trns)# ,
np.random.shuffle(key_tsts)
for p in range(0,8):
imageList = GetAllImages(r'G:\data\train\\' + str(p))
for i in range(0, 4 * len(imageList)/5):
n = n+1
label = p
tmp = imageList[i]
img = cv.imread(tmp,cv.IMREAD_COLOR)
data = cv.resize(img, (227, 227), interpolation=cv.INTER_LINEAR)
data = data[:,:,::-1]
data = data.transpose((2,0,1))
datum = caffe.io.array_to_datum(data, label)
keystr = '{:0>8d}'.format(key_trns[n-1]) #8 , 0 , n
lmdb_txn.put(keystr, datum.SerializeToString())
print key_trns[n-1], i,n-1,os.path.basename(tmp),label
if i % batch_size == 0:
lmdb_txn.commit()
lmdb_txn = lmdb_env.begin(write=True)
#print 'batchtrain {} writen'.format(n)
for i in range(4 * len(imageList)/5, len(imageList)):
m = m+1
label = p
tmp = imageList[i]
img = cv.imread(tmp,cv.IMREAD_COLOR)
data = cv.resize(img, (227, 227), interpolation=cv.INTER_LINEAR)
data = data[:,:,::-1]
data = data.transpose((2,0,1))
datum = caffe.io.array_to_datum(data, label)
keystr = '{:0>8d}'.format(key_tsts[m-1])
lmdb_txn2.put(keystr, datum.SerializeToString())
print key_tsts[m-1], i,m-1,os.path.basename(tmp),label
if i % batch_size == 0:
lmdb_txn2.commit()
lmdb_txn2 = lmdb_env2.begin(write=True)
#print 'batchtest {} writen'.format(m)
lmdb_env.close()
lmdb_env2.close()