Oozie、Flume、Mahoutの構成と応用

21905 ワード

-------------------------Oozie--------------------
【 、  】    
1)  Oozie   
[root@cMaster~]#sudo yum install oozie    #cMaster  root    ,  Oozie   
2)  Oozie   
[root@iClient~]f#sudo yum install oozie-client

【 、    】
3)  /etc/oozie/conf/oozie-env.sh  
#export CATALINA_BASE=/var/lib/oozie/tomcat-deployment
export CATALINA_BASE=/usr/lib/oozie/oozie-server
# export OOZIE_CONFIG_FILE=oozie-site.xml
export OOZIE_CONFIG=/etc/oozie/conf
# export OOZIE_LOG=${OOZIE_HOME}/logs
export OOZIE_LOG=/var/log/oozie

4)  /etc/hadoop/conf/core-site.xml  :
hadoop.proxyuser.oozie.groups*
hadoop.proxyuser.oozie.hosts*    
【 、    jar  】    
  Hadoop  ,   
$for x in `cd/etc/init.d;Is hadoop-*`;do service $x restart;done;    #`         ,esc     【  :     】  iCleint ,        4)  Oozie     
[root@cMaster~]#sudo -u oozie /usr/lib/oozie/bin/ooziedb.sh create -run    # cMaster  

[root@cMaster ~]#mkdir /tmp/ooziesharelib
[root@cMaster~]#cd /tmp/ooziesharelib
[root@cMaster~]# tar xzf /usr/lib/oozie/oozie-sharelib-yarn.tar.gz    #【  】  oozie-sharelib-yarn.tar.gz    /usr/lib/oozie/    ,     /tmp/ooziesharelib  
【 、    】
[root@cMaster~]#sudo service oozie start


         :oozie job -oozie http://cMaster:11000/oozie  -config  【/usr/share/doc/oozie-4.0.0+cdh5.0.0+54/examples/apps/map-reduce/job.properties        】  -run
      oozie job -oozie http://cMaster:11000/oozie  -config  job.properties  -run
HDFS   :/user/【   】root/examples/apps/map-reduce/       job.properties   , /user/【   】root/examples/apps/map-reduce/   
    :(    )    job.properties      , :
        /usr/share/doc/oozie-4.0.0+cdh5.0.0+54/examples/apps/map-reduce/       job.properties     
——————————————————————    ————————————————————

Oozie   [21] Oozie   Hadoop      ,             Oozie server    ,
                  Oozie,           Oozie client,
          cMaster   Oozie server, iClient   Oozie client。 
1)  Oozie   
[root@cMaster~]#sudo yum install oozie    #cMaster  root    ,  Oozie   
2)  Oozie   
[root@iClient~]f#sudo yum install oozie-client
3)  Oozie   /etc/oozie/conf/oozie-env.sh  CATALINA_BASE   ,          ,
         oozie-server-0.20  Oozie  MRv1,  oozie-server    Yarn。
       cMaster、iClient    ,     。
#export CATALINA_BASE=/usr/lib/oozie/oozie-server-0.20
export CATALINABASE=/usr/lib/oozie/oozie-server
 /etc/hadoop/conf/core-site.xml   configuration         。
      ,6           ,        ,
              Hadoop  ,        。
hadoop.proxyuser.oozie.groups*
hadoop.proxyuser.oozie.hosts*

     Hadoop     :
$for x in 'cd/etc/init.d;Is hadoop-*';do service $x restart;done;    #  iCleint ,        4)  Oozie     
[root@cMaster~]#sudo -u oozie /usr/lib/oozie/bin/ooziedb.sh create -run    # cMaster  

5)  Oozie Web  
[root@cMaster ~]#cd /var/lib/oozie/
[root@cMaster oozie]# sudo -u oozie wget http://archive.cloudera.com/gplextras/misc/ext-2.2.zip
[root@cMaster oozie]# sudo -u oozie unzip ext-2.2.zip

6) Oozie  Jar   HDFS
[root@cMaster~]# sudo -u hdfs hdfs dfs -mkdir /user/oozie
[root@cMaster~]#sudo -u hdfs hdfs dfs -chown oozie:oozie /user/oozie
[root@cMaster ~]#mkdir /tmp/ooziesharelib
[root@cMaster~]#cd /tmp/ooziesharelib
[root@cMaster~]# tar xzf /usr/lib/oozie/oozie-sharelib-yarn.tar.gz
[root@cMaster~]# sudo -u oozie hdfs dfs -put share /user/oozie/share

7)  Oozie  
[root@cMaster~]#sudo service oozie start

8)  Oozie  
           cMaster   Oozie   ,     ext-2.2,
     iClient        “cmaster:11000”   Oozie Web  ,
               Oozie    。
[root@iClient~]#oozie admin -oozie http://cMaster:11000/oozie -status




------------------------  ------
2.0ozie    
    Oozie          ,  Web       Oozie     ,       。
【 6-6】       :
    ①  Oozie   ,      。
    ②  Oozie MR    。
    ③  OoziePig、Hive   。
    ④  workflow.xml,    WordCount。
    ⑤  workflow.xml,    WordCount,    WC       WC   。

  :    ①, iClient         ,     root joe。
[root@iClient~]#sudo-u joe oozie help        #    Oozie  

    ②,    Oozie  iar ,              ,           ,           。
[root@iClient~]#cd /usr/share/doc/oozie-4.0.0+cdh5.0.0+54
[root@iClient oozie-4.0.0+cdh5.0.0+54]# tar-zxvf oozie-examples.tar.gz

  examples/apps/map-reduce/job.properties,     :
    nameNode=hdfs://localhost:8020
    job Tracker=localhost:8021               :
nameNode=hdfs://cMaster:8020
job Tracker=cMaster:8032

   examples   HDFS,  oozie      :
[root@iClient oozie-4.0.0+cdh5.0.0+54]#sudo -u joe hdfs dfs-put examples examples
[root@iClient oozie-4.0.0+cdh5.0.0+54]#cd
[root@iClient ~]#sudo -u joe oozie job -oozie http://cMaster:11000/oozie  -config  /usr/share/doc/oozie-
4.0.0+cdh5.0.0+54/examples/apps/map-reduce/job.properties  -run

      ③   ②    ,          oozie  Pig Hive      。
            ( examples/apps/pig/job.properties) ,      ,
                ( sudo-u joe oozie....../apps/pig/joe.properties-run)。
    ④,     “examples/apps/map-reduce/workflow.xml”,
       jar  “examples/apps/map-reduce/lib” ,
       DemoMapper.class DemoReducer.class  WordCount   ,
           “examples/src” ,           。

(1)    “examples/apps/map-reduce/workflow.xml”,      :
    
    mapred.mapper.classorg.apache.oozie.example.SampleMapper
    
    
    mapred.reducer.classorg.apache.oozie.example.SampleReducer
    
mapred.mapper.classorg.apache.oozie.example.DemoMapper


mapred.reducer.classorg.apache.oozie.example.DemoReducer

mapred.output.key.classorg.apache.hadoop.io.Text

mapred.output.value.classorg.apache.hadoop.io.IntWritable
2)     HDFS examples    ,   ②   ,
          ,        examples   ,          ②    。
[root@iClient~]#sudo -u joe hdfs dfs -rm -r -f  examples    #  HDFS examples  

(3)   examples   HDFS,  oozie      :
[root@iClient oozie-4.0.0+cdh5.0.0+54]#sudo -u joe hdfs dfs-put examples examples
[root@iClient oozie-4.0.0+cdh5.0.0+54]#cd
[root@iClient ~]#sudo -u joe oozie job -oozie http://cMaster:11000/oozie  -config  /usr/share/doc/oozie-
4.0.0+cdh5.0.0+54/examples/apps/map-reduce/job.properties  -run

****  ⑤             ,           :
    “M1”→“R1”→“Java1”→“Pig1”→“Hive1”→“M2”→“R2”→“Java2”
    ,           ,        workflow.xml  。
        ,     workflow.xml   ,         ④。

    
    
    
       wordcount  
    
    
    
       wordcount   
    
    
    
    Map/Reduce failed error message[${wf:errorMessage(wf:lastErrorNode())}]
    
    
    






















——————————————————————————Flume——————————————————————
1.Flume   [21]            Flume        ,
                         ,   Hadoop      ,
       cMaster iClient   Flume。 
(1)  Flume   
[root@cMaster~]#sudo yum install flume-ng-agent    # cMaster   Flume

(2)  Flume   
[root@iClient~]#sudo yum install flume-ng-agent    # iClient   Flume



---------------  
2.Flume     Flume             , Flume        ,
                 ,     Flume    ,
       Flume      ——     。 
【 6-7】       :
    ①  Flume   ,      。
    ②      iClient  telnet cMaster    ,    cMaster  44444      ,
                      。
    ③     iClient     “/home/joe/source.txt”      cMaster,
            cMaster       HDFS。
    ④    ③,   cMaster        Flume  ,        iClient     ,
                iHacker   (  )     ,           ,       。 
  :    ①,   iClient         
[root@iClient~]#flume-ng        #  Flume    


    ②,     cMaster         Flume(             ),
       iClient   telnet cMaster    ,          。 
     cMaster  root  ,    “/etc/flume/conf/flume.conf”,
           :    cMaster             Flume  
-----------------------------------------------------------------------------------------------
#    agent  al,    al sources rl,channels cl,sinks kl 
a1.sources=r1
a1.channels=c1
a1.sinks=k1

#  sources    :  sources cMaster    44444     netcat       
a1.sources.r1.type=netcat 
a1.sources.r1.bind=cMaster 
a1.sources.r1.port=44444

#  channels      ,          memory     
a1.channels.c1.type=memory 
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100

#   sink logger  sink:   sink              
a1.sinks.k1.type=logger

# sources   channels,channels    sinks 
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
---------------------------------------------------------------------------------------------
[root@cMaster~]#flume-ng agent -c  /etc/flume-ng/  -f  /etc/flume-ng/conf/flume.conf -n a1

  ,   cMaster         ,          , iClient   :
[root@iClient~]# telnet cMaster 44444







                     ,telnet        cMaster,     cMaster          ,
          iClient          cMaster     。     iClient    telnet,
     Ctrl+]   (     Ctrl  ] ),   telnet   “quit”      ,
        cMaster  Flume,   Ctrl+C   。
   ③       。   , cMaster     “/etc/flume-ng/conf/flume.conf.hdfs”,       
-----------------------------------------------------------------------------------------------
#    agent  al,    al sources rl,channels c1,sinks k1
a1.sources=r1
a1.sinks=k1
a1.channels=c1

#  sources        
#  sources avro  ,   cMaster   4141    avro       
a1.sources.r1.type=avro 
a1.sources.r1.bind=cMaster 
a1.sources.r1.port=4141

#  channels        ,          memory     
a1.channels.c1.type=memory

#   sink HDFS   sink,  sink             HDFS    
a1.sinks.k1.type=hdfs 
a1.sinks.k1.hdfs.path=/user/joe/flume/cstorArchive 
a1.sinks.k1.hdfs.file Type=DataStream

# sources   channels,channels    sinks 
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
-----------------------------------------------------------------------------------------------

  , iClient     “/root/businessLog”,       :
--------------------------------------
ccccccccccccccccccccc
ssssssssssssssssssssssss
tttttttttttttttttttttttttttttttttt
oooooooooooooooooo
rrrrrrrrrrrrrrrrrrrrrrrrrrrrr
--------------------------------------






iClient       “/etc/flume-ng/conf/flume.conf.exce”,       :
-----------------------------------------------------------------------------------------------
#    agent  al,    al sources rl,channels c1,sinks k1
a1.sources=r1
a1.channels=c1
a1.sinks=k1

#  sources        , sources exce  
#   Linux cat       /root/businessLog,           channel 
a1.sources.r1.type=exec
a1.sources.r1.command=cat /root/businessLog

#  channels      ,          memory     
a1.channels.c1.type=memory

#   sink avro  sink,   avro   channel      cMaster 4141  
a1.sinks.k1.type=avro 
a1.sinks.k1.hostname=cMaster 
a1.sinks.k1.port=4141

# sources   channels,channels    sinks 
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1
-----------------------------------------------------------------------------------------------


  ,   iClient    cMaster Flume      。         HDFS     ,
            Flume      Flume  ,    。  cMaster   Flume,
      “flume-ng…a1”      flume.conf.hdfs     Flume,
      a1               a1
[root@cMaster ~]#sudo -u joe hdfs dfs      -mkdir flume    #HDFS     /user/joe/flume
[root@cMaster ~]#sudo -u joe flume-ng agent -c  /etc/flume-ng/  -f  /etc/flume-ng/conf/flume.conf.hdfs -n a1

  , iClient       ,        ,   a1, flume.conf.exce    a1:
[root@iClient~]#flume-ng agent -c/etc/flume-ng/ -f /etc/flume-ng/conf/flume.conf.exce -n a1

      ,   iClient     “cMaster:50070”,       “/user/joe/flume/cstorArchive”,
          iClient        。 










—————————————————————————— Mahout——————————————————————
1.Mahout   [21]   Hadoop      ,Mahout                    , 
          iClient   Mahout
[root@iClient ~]# sudo yum install mahout

2.Mahout     Mahout           ,
        Mahout           ,               。 
【 6-8】   joe    Mahout    naivebayes,      ,      ,     ,
                        。
-------------------------------------------------------------------------
#!/bin/sh
#      ,  HDFS  
mkdir -p /tmp/mahout/20news-bydate /tmp/mahout/20news-all&&hdfs dfs -mkdir mahout

#          
curl http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz\  -o /tmp/mahout/20news-bydate.tar.gz

#      、  ,    HDFS 
cd /tmp/mahout/20news-bydate&&tar xzf /tmp/mahout/20news-bydate.tar.gz&&cd 
cp -R /tmp/mahout/20news-bydate/*/*/tmp/mahout/20news-all 
hdfs dfs -put /tmp/mahout/20news-all mahout/20news-all

#     seqdirectory              
mahout seqdirectory -i mahout/20news-all -o mahout/20news-seq -ow
-------------------------------------------------------------------------


 :               ,      MR   MR,  ,
    Mahout           Value           ,           ,
                 ,     naivebayes.sh        
-------------------------------------------------------------------------
#     seq2sparse                        
mahout seq2sparse -i mahout/20news-seq -o mahout/20news-vectors -lnorm -nv -wt  tfidf

#           ,         80%,      
#    20%      ,      
mahout split -i mahout/20news-vectors/fidf-vectors --trainingOutput mahout/20news-train-vectors\
--testOutput mahout/20news-test-vectors\
--randomSelectionPct 40 --overwrite --sequenceFiles -xm sequential

#  Naive Bayes  
mahout trainnb -i mahout/20news-train-vectors  -e1 -o mahout/model -li mahout/labelindex -ow
#                (        )
mahout testnb -i mahout/20news-train-vectors -m mahout/model -l mahout/labelindex\
-ow -o mahout/20news-testing
#             
mahout testnb -i mahout/20news-test-vectors -m mahout/model -I mahout/labelindex\
-ow -o mahout/20news-testing
-------------------------------------------------------------------------




    ,      ,   ,    iClient , joe      ,        。
         ,          ,      
[root@iClient~]# cp naivebayes.sh /home/joe
[root@iClient~]# chown joe.joe naivebayes.sh
[root@iClient~]# sudo -u joe chmod +x naivebayes.sh
[root@iClient~]# sudo -u joe sh naivebayes.sh


     ,      Web  “cMaster:8088”,
           Mahout   ;     Web  “cMaster:50070”,
       “/user/joe/mahout/”      






転載先:https://www.cnblogs.com/Raodi/p/11053256.html