Storm【Storm-MongoDBインタフェース】-1:概要
8232 ワード
読解の前提:
その1:MongoDBについて初歩的な理解が必要です.
二つ目は、Storm自体について理解する必要があることです.
アドバイスを読む:
Stormインタフェースシリーズ全体には、Stormのリアルタイム処理をめぐるフレームワークを含む一連のインタフェースが含まれています.一連のインタフェースドキュメントでは、Storm-hbaseインタフェースのブログと比較してください.
全体のStornインタフェースは以下のいくつかのclassに分けられます
1:MongoBolt.java
2 : MongoSpout.java
3 : MongoTailableCursorTopology.java
4 : SimpleMongoBolt.java
コードを見て話す:
1
2 :
その1:MongoDBについて初歩的な理解が必要です.
二つ目は、Storm自体について理解する必要があることです.
アドバイスを読む:
Stormインタフェースシリーズ全体には、Stormのリアルタイム処理をめぐるフレームワークを含む一連のインタフェースが含まれています.一連のインタフェースドキュメントでは、Storm-hbaseインタフェースのブログと比較してください.
全体のStornインタフェースは以下のいくつかのclassに分けられます
1:MongoBolt.java
2 : MongoSpout.java
3 : MongoTailableCursorTopology.java
4 : SimpleMongoBolt.java
コードを見て話す:
1
package storm.mongo;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import com.mongodb.DB;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoException;
import com.mongodb.WriteConcern;
/**
*
* , , , Mongo Storm
*
* @author Adrian Petrescu <[email protected]>
*
*/
public abstract class MongoBolt extends BaseRichBolt {
private OutputCollector collector;
// MOngDB DB
private DB mongoDB;
// , , MongoDB DB
private final String mongoHost;
private final int mongoPort;
private final String mongoDbName;
/**
* @param mongoHost The host on which Mongo is running.
* @param mongoPort The port on which Mongo is running.
* @param mongoDbName The Mongo database containing all collections being
* written to.
*/
protected MongoBolt(String mongoHost, int mongoPort, String mongoDbName) {
this.mongoHost = mongoHost;
this.mongoPort = mongoPort;
this.mongoDbName = mongoDbName;
}
@Override
public void prepare(
@SuppressWarnings("rawtypes") Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
try {
//prepare Mongo
this.mongoDB = new MongoClient(mongoHost, mongoPort).getDB(mongoDbName);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public void execute(Tuple input) {
// ,
if (shouldActOnInput(input)) {
String collectionName = getMongoCollectionForInput(input);
DBObject dbObject = getDBObjectForInput(input);
if (dbObject != null) {
try {
mongoDB.getCollection(collectionName).save(dbObject, new WriteConcern(1));
collector.ack(input);
} catch (MongoException me) {
collector.fail(input);
}
}
} else {
collector.ack(input);
}
}
/**
* Decide whether or not this input tuple should trigger a Mongo write.
*
* @param input the input tuple under consideration
* @return {@code true} iff this input tuple should trigger a Mongo write
*/
public abstract boolean shouldActOnInput(Tuple input);
/**
* Returns the Mongo collection which the input tuple should be written to.
*
* @param input the input tuple under consideration
* @return the Mongo collection which the input tuple should be written to
*/
public abstract String getMongoCollectionForInput(Tuple input);
/**
* Returns the DBObject to store in Mongo for the specified input tuple.
*
DBObject
* @param input the input tuple under consideration
* @return the DBObject to be written to Mongo
*/
public abstract DBObject getDBObjectForInput(Tuple input);
// 。
@Override
public void cleanup() {
this.mongoDB.getMongo().close();
}
}
2 :
package storm.mongo;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.utils.Utils;
import com.mongodb.BasicDBObject;
import com.mongodb.Bytes;
import com.mongodb.DB;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoException;
/**
* A Spout which consumes documents from a Mongodb tailable cursor.
*
* Subclasses should simply override two methods:
* <ul>
* <li>{@link #declareOutputFields(OutputFieldsDeclarer) declareOutputFields}
* <li>{@link #dbObjectToStormTuple(DBObject) dbObjectToStormTuple}, which turns
* a Mongo document into a Storm tuple matching the declared output fields.
* </ul>
*
** <p>
* <b>WARNING:</b> You can only use tailable cursors on capped collections.
*
* @author Dan Beaulieu <[email protected]>
*
*/
// , , Spout ,MongoSpout abstract , //
// Cursor 。
public abstract class MongoSpout extends BaseRichSpout {
private SpoutOutputCollector collector;
private LinkedBlockingQueue<DBObject> queue;
private final AtomicBoolean opened = new AtomicBoolean(false);
private DB mongoDB;
private final DBObject query;
private final String mongoHost;
private final int mongoPort;
private final String mongoDbName;
private final String mongoCollectionName;
public MongoSpout(String mongoHost, int mongoPort, String mongoDbName, String mongoCollectionName, DBObject query) {
this.mongoHost = mongoHost;
this.mongoPort = mongoPort;
this.mongoDbName = mongoDbName;
this.mongoCollectionName = mongoCollectionName;
this.query = query;
}
class TailableCursorThread extends Thread {
// TailableCursorThread
// LinkedBlockingQueue , java , ID 【Java 】 。
LinkedBlockingQueue<DBObject> queue;
String mongoCollectionName;
DB mongoDB;
DBObject query;
public TailableCursorThread(LinkedBlockingQueue<DBObject> queue, DB mongoDB, String mongoCollectionName, DBObject query) {
this.queue = queue;
this.mongoDB = mongoDB;
this.mongoCollectionName = mongoCollectionName;
this.query = query;
}
public void run() {
while(opened.get()) {
try {
// create the cursor
mongoDB.requestStart();
final DBCursor cursor = mongoDB.getCollection(mongoCollectionName)
.find(query)
.sort(new BasicDBObject("$natural", 1))
.addOption(Bytes.QUERYOPTION_TAILABLE)
.addOption(Bytes.QUERYOPTION_AWAITDATA);
try {
while (opened.get() && cursor.hasNext()) {
final DBObject doc = cursor.next();
if (doc == null) break;
queue.put(doc);
}
} finally {
try {
if (cursor != null) cursor.close();
} catch (final Throwable t) { }
try {
mongoDB.requestDone();
} catch (final Throwable t) { }
}
Utils.sleep(500);
} catch (final MongoException.CursorNotFound cnf) {
// rethrow only if something went wrong while we expect the cursor to be open.
if (opened.get()) {
throw cnf;
}
} catch (InterruptedException e) { break; }
}
};
}
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
this.queue = new LinkedBlockingQueue<DBObject>(1000);
try {
this.mongoDB = new MongoClient(this.mongoHost, this.mongoPort).getDB(this.mongoDbName);
} catch (Exception e) {
throw new RuntimeException(e);
}
TailableCursorThread listener = new TailableCursorThread(this.queue, this.mongoDB, this.mongoCollectionName, this.query);
this.opened.set(true);
listener.start();
}
@Override
public void close() {
this.opened.set(false);
}
@Override
public void nextTuple() {
DBObject dbo = this.queue.poll();
if(dbo == null) {
Utils.sleep(50);
} else {
this.collector.emit(dbObjectToStormTuple(dbo));
}
}
@Override
public void ack(Object msgId) {
// TODO Auto-generated method stub
}
@Override
public void fail(Object msgId) {
// TODO Auto-generated method stub
}
public abstract List<Object> dbObjectToStormTuple(DBObject message);
}