ビジュアルワードバッグモデル(Bow)

4820 ワード

BoW開始はヒストグラム統計として理解でき,開始は自然言語処理と情報検索における簡単な文書表現方法である.histogramと同様に、BoWも周波数情報を統計しているだけで、シーケンス情報はありません.histogramとは異なり、histogramは一般的にある区間の頻度を統計し、BoWはwords辞書を選択し、辞書の単語ごとに出現する回数を統計する.
視覚語袋の大まかな過程はまず画像セットの特徴の集合を抽出し,次にクラスタリングの方法でいくつかのクラスを集め,これらのクラスをdictionary,すなわちwordsに相当し,最後に各画像統計辞書にwordsが現れる頻度を出力ベクトルとして後続の分類,検索などの操作に用いることができる.
siftフィーチャーを例にとると,画像セットに顔,自転車,ギターなどが含まれていると仮定し,まず各画像に対してsiftフィーチャーを抽出し,次にkmeansなどのクラスタリング手法を用いてクラスタリングを行いコードブックを得る.
1.図を読む
def gettestfiles():
        imlist={}
        count=0
        for each in glob(r'./images/test/' + "*"):
            word= each.split("/")[-1]
            print( " #### Reading image category ", word, " ##### ")
            imlist[word]=[]
            for imagefile in glob(each+'/*'):
                im=cv2.imread(imagefile, 0)
                imlist[word].append(im)
                count +=1
        return [imlist, count]

2.siftフィーチャー
def trainModel(self):
        """
        This method contains the entire module 
        required for training the bag of visual words model

        Use of helper functions will be extensive.

        """

        # read file. prepare file lists.
        #self.images, self.trainImageCount = self.file_helper.getFiles()
        # extract SIFT Features from each image
        label_count = 0 
        for word in self.images.keys():
            self.name_dict[str(label_count)] = word
            print ("Computing Features for ", word)
            for im in self.images[word]:
                # cv2.imshow("im", im)
                # cv2.waitKey()
                self.train_labels = np.append(self.train_labels, label_count)
                kp, des = self.im_helper.features(im)
                self.descriptor_list.append(des)

            label_count += 1
class ImageHelpers:
	def __init__(self):
		self.sift_object = cv2.xfeatures2d.SIFT_create()

	def gray(self, image):
		gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
		return gray

	def features(self, image):
		keypoints, descriptors = self.sift_object.detectAndCompute(image, None)
		return [keypoints, descriptors]

ここでは画像セット全体のsift特性が得られ,desは128次元であり,ここではpca降次元クラスタリング,Kmeansを用いることができる.
# perform clustering
        bov_descriptor_stack = self.bov_helper.formatND(self.descriptor_list)
        self.bov_helper.cluster()
        self.bov_helper.developVocabulary(n_images = self.trainImageCount, descriptor_list=self.descriptor_list)
def formatND(self, l):
		"""	
		restructures list into vstack array of shape
		M samples x N features for sklearn

		"""
		vStack = np.array(l[0])
		for remaining in l[1:]:
			vStack = np.vstack((vStack, remaining))
		self.descriptor_vstack = vStack.copy()
		return vStack
def cluster(self):
		"""	
		cluster using KMeans algorithm, 

		"""
		self.kmeans_ret = self.kmeans_obj.fit_predict(self.descriptor_vstack)

	def developVocabulary(self,n_images, descriptor_list, kmeans_ret = None):
		
		"""
		Each cluster denotes a particular visual word 
		Every image can be represeted as a combination of multiple 
		visual words. The best method is to generate a sparse histogram
		that contains the frequency of occurence of each visual word 

		Thus the vocabulary comprises of a set of histograms of encompassing
		all descriptions for all images

		"""

		self.mega_histogram = np.array([np.zeros(self.n_clusters) for i in range(n_images)])
		old_count = 0
		for i in range(n_images):
			l = len(descriptor_list[i])
			for j in range(l):
				if kmeans_ret is None:
					idx = self.kmeans_ret[old_count+j]
				else:
					idx = kmeans_ret[old_count+j]
				self.mega_histogram[i][idx] += 1
			old_count += l
		print ("Vocabulary Histogram Generated")

分類:SVM
self.bov_helper.standardize()
self.bov_helper.train(self.train_labels)
def standardize(self, std=None):
		"""
		
		standardize is required to normalize the distribution
		wrt sample size and features. If not normalized, the classifier may become
		biased due to steep variances.

		"""
		if std is None:
			self.scale = StandardScaler().fit(self.mega_histogram)
			self.mega_histogram = self.scale.transform(self.mega_histogram)
		else:
			print( "STD not none. External STD supplied")
			self.mega_histogram = std.transform(self.mega_histogram)
def train(self, train_labels):
		"""
		uses sklearn.svm.SVC classifier (SVM) 


		"""
		print ("Training SVM")
		print (self.clf)
		print ("Train labels", train_labels)
		self.clf.fit(self.mega_histogram, train_labels)
		print ("Training completed")


参照:コード