Oozie、Flume、Mahoutの構成と応用
21905 ワード
-------------------------Oozie--------------------
【 、 】
1) Oozie
[root@cMaster~]#sudo yum install oozie #cMaster root , Oozie
2) Oozie
[root@iClient~]f#sudo yum install oozie-client
【 、 】
3) /etc/oozie/conf/oozie-env.sh
#export CATALINA_BASE=/var/lib/oozie/tomcat-deployment
export CATALINA_BASE=/usr/lib/oozie/oozie-server
# export OOZIE_CONFIG_FILE=oozie-site.xml
export OOZIE_CONFIG=/etc/oozie/conf
# export OOZIE_LOG=${OOZIE_HOME}/logs
export OOZIE_LOG=/var/log/oozie
4) /etc/hadoop/conf/core-site.xml :
hadoop.proxyuser.oozie.groups *
hadoop.proxyuser.oozie.hosts *
【 、 jar 】
Hadoop ,
$for x in `cd/etc/init.d;Is hadoop-*`;do service $x restart;done; #` ,esc 【 : 】 iCleint , 4) Oozie
[root@cMaster~]#sudo -u oozie /usr/lib/oozie/bin/ooziedb.sh create -run # cMaster
[root@cMaster ~]#mkdir /tmp/ooziesharelib
[root@cMaster~]#cd /tmp/ooziesharelib
[root@cMaster~]# tar xzf /usr/lib/oozie/oozie-sharelib-yarn.tar.gz #【 】 oozie-sharelib-yarn.tar.gz /usr/lib/oozie/ , /tmp/ooziesharelib
【 、 】
[root@cMaster~]#sudo service oozie start
:oozie job -oozie http://cMaster:11000/oozie -config 【/usr/share/doc/oozie-4.0.0+cdh5.0.0+54/examples/apps/map-reduce/job.properties 】 -run
oozie job -oozie http://cMaster:11000/oozie -config job.properties -run
HDFS :/user/【 】root/examples/apps/map-reduce/ job.properties , /user/【 】root/examples/apps/map-reduce/
:( ) job.properties , :
/usr/share/doc/oozie-4.0.0+cdh5.0.0+54/examples/apps/map-reduce/ job.properties
—————————————————————— ————————————————————
Oozie [21] Oozie Hadoop , Oozie server ,
Oozie, Oozie client,
cMaster Oozie server, iClient Oozie client。
1) Oozie
[root@cMaster~]#sudo yum install oozie #cMaster root , Oozie
2) Oozie
[root@iClient~]f#sudo yum install oozie-client
3) Oozie /etc/oozie/conf/oozie-env.sh CATALINA_BASE , ,
oozie-server-0.20 Oozie MRv1, oozie-server Yarn。
cMaster、iClient , 。
#export CATALINA_BASE=/usr/lib/oozie/oozie-server-0.20
export CATALINABASE=/usr/lib/oozie/oozie-server
/etc/hadoop/conf/core-site.xml configuration 。
,6 , ,
Hadoop , 。
hadoop.proxyuser.oozie.groups *
hadoop.proxyuser.oozie.hosts *
Hadoop :
$for x in 'cd/etc/init.d;Is hadoop-*';do service $x restart;done; # iCleint , 4) Oozie
[root@cMaster~]#sudo -u oozie /usr/lib/oozie/bin/ooziedb.sh create -run # cMaster
5) Oozie Web
[root@cMaster ~]#cd /var/lib/oozie/
[root@cMaster oozie]# sudo -u oozie wget http://archive.cloudera.com/gplextras/misc/ext-2.2.zip
[root@cMaster oozie]# sudo -u oozie unzip ext-2.2.zip
6) Oozie Jar HDFS
[root@cMaster~]# sudo -u hdfs hdfs dfs -mkdir /user/oozie
[root@cMaster~]#sudo -u hdfs hdfs dfs -chown oozie:oozie /user/oozie
[root@cMaster ~]#mkdir /tmp/ooziesharelib
[root@cMaster~]#cd /tmp/ooziesharelib
[root@cMaster~]# tar xzf /usr/lib/oozie/oozie-sharelib-yarn.tar.gz
[root@cMaster~]# sudo -u oozie hdfs dfs -put share /user/oozie/share
7) Oozie
[root@cMaster~]#sudo service oozie start
8) Oozie
cMaster Oozie , ext-2.2,
iClient “cmaster:11000” Oozie Web ,
Oozie 。
[root@iClient~]#oozie admin -oozie http://cMaster:11000/oozie -status
------------------------ ------
2.0ozie
Oozie , Web Oozie , 。
【 6-6】 :
① Oozie , 。
② Oozie MR 。
③ OoziePig、Hive 。
④ workflow.xml, WordCount。
⑤ workflow.xml, WordCount, WC WC 。
: ①, iClient , root joe。
[root@iClient~]#sudo-u joe oozie help # Oozie
②, Oozie iar , , , 。
[root@iClient~]#cd /usr/share/doc/oozie-4.0.0+cdh5.0.0+54
[root@iClient oozie-4.0.0+cdh5.0.0+54]# tar-zxvf oozie-examples.tar.gz
examples/apps/map-reduce/job.properties, :
nameNode=hdfs://localhost:8020
job Tracker=localhost:8021 :
nameNode=hdfs://cMaster:8020
job Tracker=cMaster:8032
examples HDFS, oozie :
[root@iClient oozie-4.0.0+cdh5.0.0+54]#sudo -u joe hdfs dfs-put examples examples
[root@iClient oozie-4.0.0+cdh5.0.0+54]#cd
[root@iClient ~]#sudo -u joe oozie job -oozie http://cMaster:11000/oozie -config /usr/share/doc/oozie-
4.0.0+cdh5.0.0+54/examples/apps/map-reduce/job.properties -run
③ ② , oozie Pig Hive 。
( examples/apps/pig/job.properties) , ,
( sudo-u joe oozie....../apps/pig/joe.properties-run)。
④, “examples/apps/map-reduce/workflow.xml”,
jar “examples/apps/map-reduce/lib” ,
DemoMapper.class DemoReducer.class WordCount ,
“examples/src” , 。
(1) “examples/apps/map-reduce/workflow.xml”, :
mapred.mapper.class org.apache.oozie.example.SampleMapper
mapred.reducer.class org.apache.oozie.example.SampleReducer
:
mapred.mapper.class org.apache.oozie.example.DemoMapper
mapred.reducer.class org.apache.oozie.example.DemoReducer
mapred.output.key.class org.apache.hadoop.io.Text
mapred.output.value.class org.apache.hadoop.io.IntWritable
(2) HDFS examples , ② ,
, examples , ② 。
[root@iClient~]#sudo -u joe hdfs dfs -rm -r -f examples # HDFS examples
(3) examples HDFS, oozie :
[root@iClient oozie-4.0.0+cdh5.0.0+54]#sudo -u joe hdfs dfs-put examples examples
[root@iClient oozie-4.0.0+cdh5.0.0+54]#cd
[root@iClient ~]#sudo -u joe oozie job -oozie http://cMaster:11000/oozie -config /usr/share/doc/oozie-
4.0.0+cdh5.0.0+54/examples/apps/map-reduce/job.properties -run
**** ⑤ , :
“M1”→“R1”→“Java1”→“Pig1”→“Hive1”→“M2”→“R2”→“Java2”
, , workflow.xml 。
, workflow.xml , ④。
wordcount
wordcount
Map/Reduce failed error message[${wf:errorMessage(wf:lastErrorNode())}]
——————————————————————————Flume——————————————————————
1.Flume [21] Flume ,
, Hadoop ,
cMaster iClient Flume。
(1) Flume
[root@cMaster~]#sudo yum install flume-ng-agent # cMaster Flume
(2) Flume
[root@iClient~]#sudo yum install flume-ng-agent # iClient Flume
---------------
2.Flume Flume , Flume ,
, Flume ,
Flume —— 。
【 6-7】 :
① Flume , 。
② iClient telnet cMaster , cMaster 44444 ,
。
③ iClient “/home/joe/source.txt” cMaster,
cMaster HDFS。
④ ③, cMaster Flume , iClient ,
iHacker ( ) , , 。
: ①, iClient
[root@iClient~]#flume-ng # Flume
②, cMaster Flume( ),
iClient telnet cMaster , 。
cMaster root , “/etc/flume/conf/flume.conf”,
: cMaster Flume
-----------------------------------------------------------------------------------------------
# agent al, al sources rl,channels cl,sinks kl
a1.sources=r1
a1.channels=c1
a1.sinks=k1
# sources : sources cMaster 44444 netcat
a1.sources.r1.type=netcat
a1.sources.r1.bind=cMaster
a1.sources.r1.port=44444
# channels , memory
a1.channels.c1.type=memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
# sink logger sink: sink
a1.sinks.k1.type=logger
# sources channels,channels sinks
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
---------------------------------------------------------------------------------------------
[root@cMaster~]#flume-ng agent -c /etc/flume-ng/ -f /etc/flume-ng/conf/flume.conf -n a1
, cMaster , , iClient :
[root@iClient~]# telnet cMaster 44444
,telnet cMaster, cMaster ,
iClient cMaster 。 iClient telnet,
Ctrl+] ( Ctrl ] ), telnet “quit” ,
cMaster Flume, Ctrl+C 。
③ 。 , cMaster “/etc/flume-ng/conf/flume.conf.hdfs”,
-----------------------------------------------------------------------------------------------
# agent al, al sources rl,channels c1,sinks k1
a1.sources=r1
a1.sinks=k1
a1.channels=c1
# sources
# sources avro , cMaster 4141 avro
a1.sources.r1.type=avro
a1.sources.r1.bind=cMaster
a1.sources.r1.port=4141
# channels , memory
a1.channels.c1.type=memory
# sink HDFS sink, sink HDFS
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=/user/joe/flume/cstorArchive
a1.sinks.k1.hdfs.file Type=DataStream
# sources channels,channels sinks
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
-----------------------------------------------------------------------------------------------
, iClient “/root/businessLog”, :
--------------------------------------
ccccccccccccccccccccc
ssssssssssssssssssssssss
tttttttttttttttttttttttttttttttttt
oooooooooooooooooo
rrrrrrrrrrrrrrrrrrrrrrrrrrrrr
--------------------------------------
iClient “/etc/flume-ng/conf/flume.conf.exce”, :
-----------------------------------------------------------------------------------------------
# agent al, al sources rl,channels c1,sinks k1
a1.sources=r1
a1.channels=c1
a1.sinks=k1
# sources , sources exce
# Linux cat /root/businessLog, channel
a1.sources.r1.type=exec
a1.sources.r1.command=cat /root/businessLog
# channels , memory
a1.channels.c1.type=memory
# sink avro sink, avro channel cMaster 4141
a1.sinks.k1.type=avro
a1.sinks.k1.hostname=cMaster
a1.sinks.k1.port=4141
# sources channels,channels sinks
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
-----------------------------------------------------------------------------------------------
, iClient cMaster Flume 。 HDFS ,
Flume Flume , 。 cMaster Flume,
“flume-ng…a1” flume.conf.hdfs Flume,
a1 a1
[root@cMaster ~]#sudo -u joe hdfs dfs -mkdir flume #HDFS /user/joe/flume
[root@cMaster ~]#sudo -u joe flume-ng agent -c /etc/flume-ng/ -f /etc/flume-ng/conf/flume.conf.hdfs -n a1
, iClient , , a1, flume.conf.exce a1:
[root@iClient~]#flume-ng agent -c/etc/flume-ng/ -f /etc/flume-ng/conf/flume.conf.exce -n a1
, iClient “cMaster:50070”, “/user/joe/flume/cstorArchive”,
iClient 。
—————————————————————————— Mahout——————————————————————
1.Mahout [21] Hadoop ,Mahout ,
iClient Mahout
[root@iClient ~]# sudo yum install mahout
2.Mahout Mahout ,
Mahout , 。
【 6-8】 joe Mahout naivebayes, , , ,
。
-------------------------------------------------------------------------
#!/bin/sh
# , HDFS
mkdir -p /tmp/mahout/20news-bydate /tmp/mahout/20news-all&&hdfs dfs -mkdir mahout
#
curl http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz\ -o /tmp/mahout/20news-bydate.tar.gz
# 、 , HDFS
cd /tmp/mahout/20news-bydate&&tar xzf /tmp/mahout/20news-bydate.tar.gz&&cd
cp -R /tmp/mahout/20news-bydate/*/*/tmp/mahout/20news-all
hdfs dfs -put /tmp/mahout/20news-all mahout/20news-all
# seqdirectory
mahout seqdirectory -i mahout/20news-all -o mahout/20news-seq -ow
-------------------------------------------------------------------------
: , MR MR, ,
Mahout Value , ,
, naivebayes.sh
-------------------------------------------------------------------------
# seq2sparse
mahout seq2sparse -i mahout/20news-seq -o mahout/20news-vectors -lnorm -nv -wt tfidf
# , 80%,
# 20% ,
mahout split -i mahout/20news-vectors/fidf-vectors --trainingOutput mahout/20news-train-vectors\
--testOutput mahout/20news-test-vectors\
--randomSelectionPct 40 --overwrite --sequenceFiles -xm sequential
# Naive Bayes
mahout trainnb -i mahout/20news-train-vectors -e1 -o mahout/model -li mahout/labelindex -ow
# ( )
mahout testnb -i mahout/20news-train-vectors -m mahout/model -l mahout/labelindex\
-ow -o mahout/20news-testing
#
mahout testnb -i mahout/20news-test-vectors -m mahout/model -I mahout/labelindex\
-ow -o mahout/20news-testing
-------------------------------------------------------------------------
, , , iClient , joe , 。
, ,
[root@iClient~]# cp naivebayes.sh /home/joe
[root@iClient~]# chown joe.joe naivebayes.sh
[root@iClient~]# sudo -u joe chmod +x naivebayes.sh
[root@iClient~]# sudo -u joe sh naivebayes.sh
, Web “cMaster:8088”,
Mahout ; Web “cMaster:50070”,
“/user/joe/mahout/”
転載先:https://www.cnblogs.com/Raodi/p/11053256.html