python mongodbのmap/reduceの作成


python mongodbのmap/reduceの作成
目的:次のuser_を求めるinfo表のクラス2の各学科の点数の総和と平均点.
具体的な内容は以下の通りです.
{"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}}{"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}}{"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}}{"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}}{"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}}
1、遍歴読取方式を使用
find()を使用してclassが2の値を巡回し、統計計算を行います.具体的なpythonコードは次のとおりです.
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
from pymongo import Connection
from bson.code import Code
from bson.son import SON
mongoconn = Connection('192.168.0.203',27017)
db = mongoconn['things']["user_info"]
db.drop()
db.insert({"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}})
db.insert({"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}})
db.insert({"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}})
db.insert({"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}})
db.insert({"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}})
print time.time()
score = {"math":0,
         "english":0,
         "chinese":0,
         }
a = db.find({"class":2})
b = a.count()
print b
for i in a:
    score["math"] += i["score"]["math"]
    score["english"] += i["score"]["english"]
    score["chinese"] += i["score"]["chinese"]
               
math_avg = float(score["math"])/b
english_avg = float(score["english"])/b
chinese_avg = float(score["chinese"])/b
print "-------------------------------------------------"
print score
print "--------------------------------------------------"
print "math average score is :",math_avg
print "english average score is :",english_avg
print "chinese avarege score is :",chinese_avg

2 mongodbが持参したmap/reduceを用いて統計する
a,map/reduce概要
map/reduceは並列分布モデルである.大規模なデータの計算に使用します.具体的な作業過程は主にmapとreduceの2つの部分に分かれている.各フェーズはkey-value、すなわち健値対の形式で入力と出力として使用される.
具体的なkey-valueのフォーマットは多種多様で、具体的なプログラムによって定義されています.
mapフェーズでデータを読み込みkey-valueを生成します.
reduce map関数から生成されたkey-valueを読み込んで計算し、結果を返します
mongodbのmap/reduceプロセス:
collectionを読み込む
map関数を実行し、emit関数でkey-valueを生成します.
reduce関数を実行し、mapの出力を遍歴し、統計を行う
結果を返すcollection
b,pythonの具体的な実現
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
from pymongo import Connection
from bson.code import Code
from bson.son import SON
mongoconn = Connection('192.168.0.203',27017)
db = mongoconn['things']["user_info"]
db.drop()
db.insert({"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}})
db.insert({"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}})
db.insert({"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}})
db.insert({"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}})
db.insert({"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}})
map = Code("function() {"
          "   emit(this.class,this.score);"
          "}"
          )
          
reduce = Code("function(key,values){"
              "    var result={math:0,english:0,chinese:0};"
              "   for (var i = 0; i < values.length; i++) {"  
              "       result.math += values[i].math;"
              "       result.english += values[i].english;"
              "       result.chinese += values[i].chinese;"
              "   return result;"
              "}"        
             )
results = db.map_reduce(map,reduce,"class_user")
a = db.find({"class":2})
b = a.count()
for i in results.find({"_id":2}):
    print "*********************************************************"
    print i
    print "math average score is :",i["value"]["math"]/b
    print "english average score is :",i["value"]["english"]/b
    print "chinese avarege score is :",i["value"]["chinese"]/b