Caffe学習1-画像認識とデータ可視化
17534 ワード
本文は深さ学習ライブラリcaffeを用いて画像の識別と分類を行い,使用したモデルはcaffemodelである.具体的にはcaffe公式サイトを参照してください.http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
CaffeNet found
mean-subtracted values: [(‘B’, 104.0069879317889), (‘G’, 116.66876761696767), (‘R’, 122.6789143406786)]
predicted class is: 253
output label: n02110806 basenji
probailities and labels:
[(0.81211644, ‘n02110806 basenji’), (0.098898478, ‘n02087046 toy terrier’), (0.030348787, ‘n02113023 Pembroke, Pembroke Welsh corgi’), (0.022625968, ‘n02091032 Italian greyhound’), (0.015911266, ‘n02113186 Cardigan, Cardigan Welsh corgi’)]
1 loop, best of 3: 521 ms per loop
10 loops, best of 3: 61.6 ms per loop
data (50, 3, 227, 227) conv1 (50, 96, 55, 55) pool1 (50, 96, 27, 27) norm1 (50, 96, 27, 27) conv2 (50, 256, 27, 27) pool2 (50, 256, 13, 13) norm2 (50, 256, 13, 13) conv3 (50, 384, 13, 13) conv4 (50, 384, 13, 13) conv5 (50, 256, 13, 13) pool5 (50, 256, 6, 6) fc6 (50, 4096) fc7 (50, 4096) fc8 (50, 1000) prob (50, 1000)
conv1 (96, 3, 11, 11) (96,) conv2 (256, 48, 5, 5) (256,) conv3 (384, 256, 3, 3) (384,) conv4 (384, 192, 3, 3) (384,) conv5 (256, 192, 3, 3) (256,) fc6 (4096, 9216) (4096,) fc7 (4096, 4096) (4096,) fc8 (1000, 4096) (1000,)
–2016-05-04 20:57:11– https://upload.wikimedia.org/wikipedia/commons/b/be/Orang_Utan%2C_Semenggok_Forest_Reserve%2C_Sarawak%2C_Borneo%2C_Malaysia.JPGホストを解析中wikimedia.org(upload.wikimedia.org)...198.35.26.112,2620:0:863:ed 1 a::2:b uploadに接続しています.wikimedia.org(upload.wikimedia.org)|198.35.26.112|:443...接続されています.HTTPリクエストが発行され、応答待ち中…200 OK長さ:1443340(1.4 M)[image/jpeg]が「image.jpg」に保存されている
100%[====================================================================================
2016-05-04 20:58:35(17.6 KB/s)-保存済み「image.jpg」[1443340/1443340])
probabilites and labels:
[(0.96807837, ‘n02480495 orangutan, orang, orangutang, Pongo pygmaeus’), (0.030588904, ‘n02492660 howler monkey, howler’), (0.00085891597, ‘n02493509 titi, titi monkey’), (0.00015429019, ‘n02493793 spider monkey, Ateles geoffroyi’), (7.2596624e-05, ‘n02488291 langur’)]
# caffe
# , python,numpy matploblib
import numpy as np
import matplotlib.pyplot as plt
# notebook
%matplotlib inline
# set display defaults
plt.rcParams['figure.figsize'] = (10, 10) #
plt.rcParams['image.interpolation'] = 'nearest' #
plt.rcParams['image.cmap'] = 'gray' #
# , caffe
import sys
caffe_root='../../' # caffe/example/test , caffe_root
sys.path.insert(0,caffe_root+'python')
import caffe
# caffemodel, ,
import os
if os.path.isfile(caffe_root+'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
print 'CaffeNet found.'
else:
print 'Downloading pre-trained CaffeNet model...'
!../scripts/download_model_binary.py ../models/bvlc_reference_caffenet
CaffeNet found
# , CPU Caffe
caffe.set_mode_cpu()
model_def=caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt' #
model_weights=caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' # caffe
#
net = caffe.Net(model_def,model_weights,caffe.TEST) # caffe , ,
#
#caffe BGR , matplotlib RGB ; caffe [0,255] matplotlib [0,1]。
# imagenet , ,
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) #
print 'mean-subtracted values:', zip('BGR', mu) # B、G、R
# data
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1)) # RGB
transformer.set_mean('data', mu) #
transformer.set_raw_scale('data', 255) # 0-1 0-255
transformer.set_channel_swap('data', (2,1,0)) # RGB BGR
mean-subtracted values: [(‘B’, 104.0069879317889), (‘G’, 116.66876761696767), (‘R’, 122.6789143406786)]
# , CPU
# , resize
net.blobs['data'].reshape(50,3,227,227) #batchsize=50, , 227*227
# ,
image = caffe.io.load_image(caffe_root + 'examples/00-classification-test/dog.jpg')
traformed_image = transformer.preprocess('data',image)
plt.imshow(image)
# net
net.blobs['data'].data[...] = traformed_image
# , caffemodel, imagenet , 1000
output = net.forward()
output_prob = output['prob'][0] # softmax
print 'predicted class is:', output_prob.argmax() #
predicted class is: 253
# , imagenet 1000
labels_file=caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
!../data/ilsvrc12/get_ilsvrc_aux.sh
labels = np.loadtxt(labels_file,str,delimiter='\t')
print 'output label:', labels[output_prob.argmax()] # , !
output label: n02110806 basenji
#
top_inds = output_prob.argsort()[::-1][:5]
print 'probailities and labels:'
zip(output_prob[top_inds],labels[top_inds]) # ,
probailities and labels:
[(0.81211644, ‘n02110806 basenji’), (0.098898478, ‘n02087046 toy terrier’), (0.030348787, ‘n02113023 Pembroke, Pembroke Welsh corgi’), (0.022625968, ‘n02091032 Italian greyhound’), (0.015911266, ‘n02113186 Cardigan, Cardigan Welsh corgi’)]
# cpu
%timeit net.forward()
1 loop, best of 3: 521 ms per loop
# gpu
caffe.set_device(0) # gpu, gpu
caffe.set_mode_gpu()
net.forward()
%timeit net.forward()
10 loops, best of 3: 61.6 ms per loop
# , (batchsize, feature map , image , image )
for layer_name,blob in net.blobs.iteritems():
print layer_name +'\t' + str(blob.data.shape)
data (50, 3, 227, 227) conv1 (50, 96, 55, 55) pool1 (50, 96, 27, 27) norm1 (50, 96, 27, 27) conv2 (50, 256, 27, 27) pool2 (50, 256, 13, 13) norm2 (50, 256, 13, 13) conv3 (50, 384, 13, 13) conv4 (50, 384, 13, 13) conv5 (50, 256, 13, 13) pool5 (50, 256, 6, 6) fc6 (50, 4096) fc7 (50, 4096) fc8 (50, 1000) prob (50, 1000)
# , feature-map , feature-map ,
# conv3 conv4 , 192, 192*2=384
# b
for layer_name,param in net.params.iteritems():
print layer_name + '\t' + str(param[0].data.shape),str(param[1].data.shape)
conv1 (96, 3, 11, 11) (96,) conv2 (256, 48, 5, 5) (256,) conv3 (384, 256, 3, 3) (384,) conv4 (384, 192, 3, 3) (384,) conv5 (256, 192, 3, 3) (256,) fc6 (4096, 9216) (4096,) fc7 (4096, 4096) (4096,) fc8 (1000, 4096) (1000,)
# ,
def vis_square(data):
"""
( , , ) ( , ), 。
"""
# normalize
data = (data-data.min())/(data.max()-data.min())
# /
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = (((0, n ** 2 - data.shape[0]),
(0, 1), (0, 1)) # add some space between filters
+ ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one)
data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white)
#
data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
plt.imshow(data);
# plt.axis('off') #
# (params)
filters = net.params['conv1'][0].data
vis_square(filters.transpose(0,2,3,1))
#
feat = net.blobs['data'].data[0,:3]
vis_square(feat)
# / prob
feat = net.blobs['fc7'].data[0]
plt.subplot(2,1,1) #
plt.plot(feat.flat)
plt.subplot(2,1,2) #
a= plt.hist(feat.flat[feat.flat > 0], bins=100) #
# , , imagenet 1000
feat = net.blobs['prob'].data[0]
plt.figure(figsize=(15,3))
plt.plot(feat.flat)
# , ,
# URL
my_image_url ="https://upload.wikimedia.org/wikipedia/commons/b/be/Orang_Utan%2C_Semenggok_Forest_Reserve%2C_Sarawak%2C_Borneo%2C_Malaysia.JPG"
!wget -O image.jpg $my_image_url
#
image = caffe.io.load_image("image.jpg")
net.blobs['data'].data[...] = transformer.preprocess('data',image)
net.forward()
out_prob = net.blobs['prob'].data[0]
top_inds = output_prob.argsort()[::-1][:5]
plt.imshow(image)
print 'probabilites and labels:'
zip(output_prob[top_inds],labels[top_inds])
–2016-05-04 20:57:11– https://upload.wikimedia.org/wikipedia/commons/b/be/Orang_Utan%2C_Semenggok_Forest_Reserve%2C_Sarawak%2C_Borneo%2C_Malaysia.JPGホストを解析中wikimedia.org(upload.wikimedia.org)...198.35.26.112,2620:0:863:ed 1 a::2:b uploadに接続しています.wikimedia.org(upload.wikimedia.org)|198.35.26.112|:443...接続されています.HTTPリクエストが発行され、応答待ち中…200 OK長さ:1443340(1.4 M)[image/jpeg]が「image.jpg」に保存されている
100%[====================================================================================
2016-05-04 20:58:35(17.6 KB/s)-保存済み「image.jpg」[1443340/1443340])
probabilites and labels:
[(0.96807837, ‘n02480495 orangutan, orang, orangutang, Pongo pygmaeus’), (0.030588904, ‘n02492660 howler monkey, howler’), (0.00085891597, ‘n02493509 titi, titi monkey’), (0.00015429019, ‘n02493793 spider monkey, Ateles geoffroyi’), (7.2596624e-05, ‘n02488291 langur’)]