Caffe学習1-画像認識とデータ可視化

17534 ワード

本文は深さ学習ライブラリcaffeを用いて画像の識別と分類を行い,使用したモデルはcaffemodelである.具体的にはcaffe公式サイトを参照してください.http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
# caffe              
#       ,  python,numpy matploblib
import numpy as np
import matplotlib.pyplot as plt
#  notebook      
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        #          
plt.rcParams['image.interpolation'] = 'nearest'  #     
plt.rcParams['image.cmap'] = 'gray'  #     
#   ,    caffe
import sys
caffe_root='../../'    #      caffe/example/test   ,       caffe_root      
sys.path.insert(0,caffe_root+'python')

import caffe
#                caffemodel,        ,             
import os
if os.path.isfile(caffe_root+'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
    print 'CaffeNet found.'
else:
    print 'Downloading pre-trained CaffeNet model...'
    !../scripts/download_model_binary.py ../models/bvlc_reference_caffenet

CaffeNet found
#   ,   CPU   Caffe      
caffe.set_mode_cpu()

model_def=caffe_root +  'models/bvlc_reference_caffenet/deploy.prototxt'   #        
model_weights=caffe_root +  'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'   #   caffe      

#    
net =  caffe.Net(model_def,model_weights,caffe.TEST)       # caffe     ,       ,   
#            
#caffe      BGR  ,  matplotlib   RGB  ;   caffe      [0,255]  matplotlib    [0,1]。         
#  imagenet   ,           ,         
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  #        
print 'mean-subtracted values:', zip('BGR', mu)     #  B、G、R      

#        data     
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  #      RGB   
transformer.set_mean('data', mu)            #        
transformer.set_raw_scale('data', 255)      #  0-1    0-255  
transformer.set_channel_swap('data', (2,1,0))  #   RGB   BGR  

mean-subtracted values: [(‘B’, 104.0069879317889), (‘G’, 116.66876761696767), (‘R’, 122.6789143406786)]
#   , CPU        
#  ,  resize     
net.blobs['data'].reshape(50,3,227,227)   #batchsize=50,   ,     227*227
#   ,    
image = caffe.io.load_image(caffe_root + 'examples/00-classification-test/dog.jpg')
traformed_image = transformer.preprocess('data',image)
plt.imshow(image)

Caffe学习1-图像识别与数据可视化_第1张图片
#                 net
net.blobs['data'].data[...] = traformed_image

#  ,     caffemodel,   imagenet       , 1000 
output = net.forward()

output_prob = output['prob'][0]                                      #     softmax    
print 'predicted class is:', output_prob.argmax()           #                

predicted class is: 253
#   ,  imagenet  1000    
labels_file=caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
    !../data/ilsvrc12/get_ilsvrc_aux.sh

labels =  np.loadtxt(labels_file,str,delimiter='\t')
print 'output label:', labels[output_prob.argmax()]    #         ,  !

output label: n02110806 basenji
#            
top_inds = output_prob.argsort()[::-1][:5]

print 'probailities and labels:'
zip(output_prob[top_inds],labels[top_inds])             #    ,       

probailities and labels:
[(0.81211644, ‘n02110806 basenji’), (0.098898478, ‘n02087046 toy terrier’), (0.030348787, ‘n02113023 Pembroke, Pembroke Welsh corgi’), (0.022625968, ‘n02091032 Italian greyhound’), (0.015911266, ‘n02113186 Cardigan, Cardigan Welsh corgi’)]
#  cpu          
%timeit net.forward()

1 loop, best of 3: 521 ms per loop
#  gpu         
caffe.set_device(0)  #      gpu,     gpu
caffe.set_mode_gpu()
net.forward()  
%timeit net.forward()

10 loops, best of 3: 61.6 ms per loop
#             ,     (batchsize,     feature map  ,  image ,  image )
for layer_name,blob in net.blobs.iteritems():
    print layer_name +'\t' + str(blob.data.shape)

data (50, 3, 227, 227) conv1 (50, 96, 55, 55) pool1 (50, 96, 27, 27) norm1 (50, 96, 27, 27) conv2 (50, 256, 27, 27) pool2 (50, 256, 13, 13) norm2 (50, 256, 13, 13) conv3 (50, 384, 13, 13) conv4 (50, 384, 13, 13) conv5 (50, 256, 13, 13) pool5 (50, 256, 6, 6) fc6 (50, 4096) fc7 (50, 4096) fc8 (50, 1000) prob (50, 1000)
#    ,             feature-map  ,   feature-map  ,     
#  conv3 conv4   ,   192, 192*2=384
#             b  
for layer_name,param in net.params.iteritems():
    print layer_name + '\t' + str(param[0].data.shape),str(param[1].data.shape)

conv1 (96, 3, 11, 11) (96,) conv2 (256, 48, 5, 5) (256,) conv3 (384, 256, 3, 3) (384,) conv4 (384, 192, 3, 3) (384,) conv5 (256, 192, 3, 3) (256,) fc6 (4096, 9216) (4096,) fc7 (4096, 4096) (4096,) fc8 (1000, 4096) (1000,)
#   ,          
def vis_square(data):
    """
       (   ,  ,  )    (  ,  ),       。
    """
    #     normalize
    data = (data-data.min())/(data.max()-data.min())
    #     /        
    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = (((0, n ** 2 - data.shape[0]),
               (0, 1), (0, 1))                 # add some space between filters
               + ((0, 0),) * (data.ndim - 3))  # don't pad the last dimension (if there is one)
    data = np.pad(data, padding, mode='constant', constant_values=1)  # pad with ones (white)

#      
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])

    plt.imshow(data);
#  plt.axis('off')         #     
#        (params)
filters = net.params['conv1'][0].data
vis_square(filters.transpose(0,2,3,1))

Caffe学习1-图像识别与数据可视化_第2张图片
#            
feat = net.blobs['data'].data[0,:3]
vis_square(feat)

Caffe学习1-图像识别与数据可视化_第3张图片
#  /       prob 
feat = net.blobs['fc7'].data[0]
plt.subplot(2,1,1)         #       
plt.plot(feat.flat)
plt.subplot(2,1,2)        #         
a= plt.hist(feat.flat[feat.flat > 0], bins=100)     #     

Caffe学习1-图像识别与数据可视化_第4张图片
#   ,          ,  imagenet 1000 
feat = net.blobs['prob'].data[0]
plt.figure(figsize=(15,3))
plt.plot(feat.flat)

Caffe学习1-图像识别与数据可视化_第5张图片
#   ,         ,     
#     URL  
my_image_url ="https://upload.wikimedia.org/wikipedia/commons/b/be/Orang_Utan%2C_Semenggok_Forest_Reserve%2C_Sarawak%2C_Borneo%2C_Malaysia.JPG"

!wget -O image.jpg $my_image_url
#    
image = caffe.io.load_image("image.jpg")
net.blobs['data'].data[...] = transformer.preprocess('data',image)

net.forward()

out_prob = net.blobs['prob'].data[0]
top_inds = output_prob.argsort()[::-1][:5]

plt.imshow(image)

print 'probabilites and labels:'

zip(output_prob[top_inds],labels[top_inds])

–2016-05-04 20:57:11– https://upload.wikimedia.org/wikipedia/commons/b/be/Orang_Utan%2C_Semenggok_Forest_Reserve%2C_Sarawak%2C_Borneo%2C_Malaysia.JPGホストを解析中wikimedia.org(upload.wikimedia.org)...198.35.26.112,2620:0:863:ed 1 a::2:b uploadに接続しています.wikimedia.org(upload.wikimedia.org)|198.35.26.112|:443...接続されています.HTTPリクエストが発行され、応答待ち中…200 OK長さ:1443340(1.4 M)[image/jpeg]が「image.jpg」に保存されている
100%[====================================================================================
2016-05-04 20:58:35(17.6 KB/s)-保存済み「image.jpg」[1443340/1443340])
probabilites and labels:
[(0.96807837, ‘n02480495 orangutan, orang, orangutang, Pongo pygmaeus’), (0.030588904, ‘n02492660 howler monkey, howler’), (0.00085891597, ‘n02493509 titi, titi monkey’), (0.00015429019, ‘n02493793 spider monkey, Ateles geoffroyi’), (7.2596624e-05, ‘n02488291 langur’)] Caffe学习1-图像识别与数据可视化_第6张图片