貯水池サンプリングアルゴリズム

559 ワード

python実装
import time
import random
import copy

def reservoirSampling(seq, k):
	localSeq = copy.deepcopy(seq)
	N = len(localSeq)	
	for i in xrange(k, N, 1):
		M = int(random.uniform(0, i))
		if M < k :
			temp = copy.deepcopy(localSeq[M])
			localSeq[M] = copy.deepcopy(localSeq[i])
			localSeq[i] = temp
	return localSeq[0:k]
	
a = [4,5,6,3,4,7,7,4,3,3,2,4,5,5,6,9,5,4,3,45,3,23,44,55,33,5,8]
k = 5

print reservoirSampling(a, k)