HadoopのHive

8547 ワード

/*
Hive   sql   map-reduce    
Hive         ,    hadoop         ,        DML  。
Hive       ,   ,     ,          。

*/

NoSQL,NOT Only SQL。           ,            。    sql   。
NewSQL,SQL   
  "    "   
Hive            

      ,   Hadoop           Hive   。
     SQL         HiveQL。       ,     ,  SQL   
      。
     SQL Map-Reduce    。
  shell,JDBC/ODBC,Thrift,web   。


Hive   :
Hive    :shell,thrift,web .
Thrift   (Java   )
     "Derby,Mysql" 
   
Hadoop


UDF = USER DEFINITION FUNCTION

Hive  

    :          Derby  ,         。
      :     Mysql,      Mysql 。
    :         Mysql   。

Hive Sql  
--   sql
create table Loc(
CDRID STRING,
IMSI INT,
....
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;

--      

DESCRIBE EXTENDED mydb.employees;

--    sql
LOAD DATA LOCAL INPATH '/home/james/location_20120316.txt'
OVERWRITE INTO TABLE LOC;

INSERT OVERWRITE TABLE RESULT
SELECT 
IMSI,IMEI,SUBSTR(CGI,8),STARTTIME,NULL,UPDATETYPE,3......
FROM LOC
WHERE IMSI IS NOT NULL;

--  SQL
SELECT IMSI,CGI,TIME FROM RESULT;
SELECT BAR FROM POKES LIMIT 5;

--   
SELECT RESULT.IMSI,LOC.INSTIME
FROM RESULT JOIN LOC ON (RESULT.IMSI = LOC.IMSI);


--       
show tables;

--   
drop table abc;



JDBC/ODBC  

                JDBC ODBC  Hive
      
  jdbc     Hive,            hive Thrift Server,    hive     connection refused   。
    :
hive -- service hiveserver

Hive       ?
   HDFS warehouse   ,          。
  reduce
   /tmp           

      
struct('John','Doe')
map('first','john','last','Doe')
array('john','Doe')

    
create table employees(
name STRING,
salary FLOAT,
subordinates ARRAY(<TRING>,
deductions MAP<STRING,FLOAT>,
address STRUCT<street:STRING,city:STRING,state:STRING,zip:INT>);
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '
' STORED AS TEXTFILE; DDL -- CREATE DATABASE financials; CREATE DATABASE IF NOT EXISTS financials; SHOW DATABASES; SHOW DATABASES LIKE 'h.*' -- CREATE DATABASE financials LOCATION '/my/preferred/directory' -- USE financials; -- set hive.cli.print.current.db=true; -- DROP DATABASE IF EXISTS financials; DROP DATABASE IF EXISTS financials CASCADE; -- ALTER DATABASE financials SET DBPROPERTIES('edited-by','Joe Dba'); -- -- , oracle Hive , ; , , 。 , , , 。 , , 。 CREATE EXTERNAL TABLE IF NOT EXISTS stocks ( exchange STIRNG, ... ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/data/stocks' -- -- , sql 。 create table employees( name STRING, salary FLOAT, ... ) PARTITIONED BY (country STRING,state STRING); 。 。 : 。 --Strict set hive.mapred.mode=strict; strict , , where 。 nostrict 。 -- SHOW PARTITIONS employees PARTITION(country='US'); SHOW PARTITIONS employees PARTITION(country='US','state=AK'); -- alter table xx rename to xx; -- alter table xx add if not exists partition(year=2011,month=1,day=1) location '/logs/2011/01/01' -- -- alter table xx change column xx int; -- alter table xx add columns (xx string comment ''); --replace alter table log_messages replace columns ( xx int comment '' ); DML DML 。 。 -- Load data local input 'path' overwrite into table employees partition(country='US',state='CA') --insert overwrite insert overwrite table employees partition(country='US',state='OR') select * from staged_employees se where se.cnty='US' and se.st='OR'; -- 。Hadoop , 。 -- FROM staged_employees se INSERT OVERWRITE table employees partition(country='US',state='OR') select * where se.cnty='US' and se.st='OR' INSERT OVERWRITE table employees partition(country='US',state='CA') select * where se.cnty='US' and se.st='CA' INSERT OVERWRITE table employees partition(country='US',state='IL') select * where se.cnty='US' and se.st='IL'; -- , 。 set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nostrict; set hive.exec.max.dynamic.partitions.pernode=1000; insert overwrite table employees partition(country,state) select ..,se.cty,se.st from staged_employees se; -- create table ... as select ... -- -- , hdfs -- INSERT OVERWRITE LOCAL DIRECTORY 'tmp/ca_employees' select name,salary,address from employees where se.state = 'CA'; -- -- prick SELECT sysmbol,price.* FROM stocks; --% 。_ SELECT NAME,ADDRESS.STREET FROM EMPLOYEES WHERE ADDRESS.STREET LIKE '%Ave.'; --RLIKE select name,address.street from employees where address.street RLIKE '.*(Chicago|Ontario).*'; --explode select array(1,2,3) from dual; [1,2,3] select explode(array(1,2,3)) as elements from src; 1, 2, 3 -- FROM ( select upper(name),salary,deductions["Federal Taxes"] as fed_taxes, round(salary*(1-deductions["Federal Taxes"])) as salary_minus_fed_taxes from employees )e select e.name,e.salary_minus_fed_taxes where e.salary_minus_fed_taxes >70000; -- -- , map-reduce combine。 -- , reduce set hive.map.aggr=true; select count(*),avg(salary) from employees; -- , ( , , ) -- ! -- reduce , "map-side joins", , -- reduce 。 -- , map-side joins . select /*+ MAPJOIN(d) */ s.ymd,s.symbol,s.price_close,d.dividend FROM stocks s JOIN dividends d on s.ymd = d.ymd and s.symbol=d.symbol where s.symbol='AAPL'; --hive.auto.convert.join=true --hive mapjoin. -- 。 --hive.mapjoin.smalltable.filesize=25000000 -- -- order by -- sort by -- reduce , . reducer distribute by-- reduce , sort by select s.ymd,s.symbol,s.price_close from stocks s distribute by s.symbol sort by s.symbol asc,s.ymd asc; cluster by -- sort by + distribute by --bucket select * from number tablesample(BUCKET 3 OUT OF 10 ON rand())s; -- select * from numbersflat TABLESAMPLE(0.1 PERCENT) S; -- --Hive Hive , B+ ,Hive 。