ビッグデータ分析ケース:毎日各時間帯のPVとUV分析.使用フレームワーク:hadoop,Hvie,Sqoop

9260 ワード

分析の第一歩:hiveでデータベースとデータテーブルを確立する
ここでは、日と時間のパーティション化テーブルを作成します.
注:パーティションに同じサブパーティションがある場合は、必ず親パーティションを作成します.毎日24時間、毎月1~31日、毎年12ヶ月
create database if not exists db_yhd;
use db_yhd;

CREATE TABLE db_yhd.track_log(
id              STRING,
url             STRING,       //url,pv  
referer         STRING,
keyword         STRING,
type            STRING,
guid            STRING,       //      ,uv  
pageId          STRING,
moduleId        STRING,
linkId          STRING,
attachedInfo    STRING,
sessionId       STRING,
trackerU        STRING,
trackerType     STRING,
ip              STRING,
trackerSrc      STRING,
cookie          STRING,
orderCode       STRING,
trackTime       STRING,
endUserId       STRING,
firstLink       STRING,
sessionViewNo   STRING,
productId       STRING,
curMerchantId   STRING,
provinceId      STRING,
cityId          STRING,
fee             STRING,
edmActivity     STRING,
edmEmail        STRING,
edmJobId        STRING,
ieVersion       STRING,
platform        STRING,
internalKeyword STRING,
resultSum       STRING,
currentPage     STRING,
linkPosition    STRING,
buttonPosition  STRING
)
PARTITIONED BY (date_str STRING ,hour_str STRING )     //    
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' 
LINES TERMINATED BY '
';

分析第2ステップ:hadoopのhdfsのデータをhiveテーブルにインポートする
LOAD DATA LOCAL INPATH '/opt/datas/2015082818'  INTO TABLE db_yhd.track_log PARTITION (date_str="20150828", hour_str="18"); 
LOAD DATA LOCAL INPATH '/opt/datas/2015082819'  INTO TABLE db_yhd.track_log PARTITION (date_str="20150828", hour_str="19"); 

分析の第3歩:hvieの中でsql文を書いて、毎日毎時pvとuvを分析します
注:パーティションフィールドはsql文で使用できます.
select
   date_str,hour_str,
   count(url) as pv ,
   count(distinct guid) as uv 
from
    db_yhd.track_log 
where 
    date_str="20150828"  and length(trim(url)) >0
group by
    date_str,hour_str;

分析ステップ4:hiveにクエリー結果を保存する
drop table if exists db_yhd.tb_pvuv_result;
create table db_yhd.tb_pvuv_result
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
AS 
SELECT 
   date_str,hour_str,
   count(url) as pv ,
   count(distinct guid) as uv 
from
    db_yhd.track_log 
where 
    date_str="20150828"  and length(trim(url)) ==0
group by
    date_str,hour_str

分析ステップ5:mysqlで対応結果テーブルを作成する
create table tb_result_pv(
    id int not null AUTO_INCREMENT,
    date_str varchar(255) not null,
    hour_str varchar(255) not null,
    pv int not null,
    uv int not null ,
    PRIMARY KEY(id)
)

分析ステップ6:hiveのデータをmysqlにインポートする
bin/sqoop export \
--connect jdbc:mysql://bigdata-hpsk01.huadian.com/test \
--username root \
--password 123456 \
--table tb_result_pv \
--columns date_str,hour_str,pv,uv \
--export-dir /user/hive/warehouse/db_yhd.db/tb_pvuv_result \
--input-fields-terminated-by ','

一部のケースデータ
121508281810000000	http://www.yhd.com/?union_ref=7&cp=0			3	PR4E9HWE38DMN4Z6HUG667SCJNZXMHSPJRER					VFA5QRQ1N4UJNS9P6MH6HPA76SXZ737P	10977119545		124.65.159.122		unionKey:10977119545		2015-08-28 18:10:00	50116447	http://image.yihaodianimg.com/virtual-web_static/virtual_yhd_iframe_index_widthscreen.html?randid=2015828	6				1000					Mozilla/5.0 (Windows NT 6.1; rv:40.0) Gecko/20100101 Firefox/40.0	Win32					lunbo_tab_3		   	2			   						1		1	1		1											1440*900																						1440756285639
121508281810000001	http://my.yhd.com/order/finishOrder.do?orderCode=5435446505152	http://buy.yhd.com/checkoutV3/index.do		3	YJ25S3QAVPAS31PHSB3HFGZ1E5AYMKX9XUTX					6W26QM41DM6HHND3R4FP42YYXXE1NKGA			222.73.202.251				2015-08-28 18:10:00	85133152	http://www.haosou.com/s?src=new_isearch&q=1%E5%8F%B7%E5%BA%97	25	0			1					Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36	Win32					MY_ORDERCOMPLETION_EDITADDRESS		   	1			   						2058		0	2058		0											1366*768																						1440756699916
121508281810000002	http://list.yhd.com/p/c5072-b-a-s1-v0-p1-price-d0-pid-pt1086211-pl1171565-m0-k?tp=44.1086211.0.0.0.Kxnn54p-11-FFJKr	http://list.yhd.com/p/pt1086211-pl1171565?tp=44.1086211.1508.0.1.Kxnmyye-11-FFJKr		3	JRBWWU6ECXN15Q2Z5QT4TETNHKY7QHE3Y8B3		44.1086211.0.0.0.Kxnn54p-11-FFJKr			5Z5JZMYUGK9TP3QWHDDTU6G5T6PHEQRZ	4734		111.193.165.158		msessionid:DW6SB2FGG84ZZ2WD77DAZHFBXNV8D5776RQ4,uname:gaochentongxue,unionKey:4734,websiteId:A100215249		2015-08-28 18:10:00	116262550	http://www.yhd.com/?tracker_u=1624169&t=1440753050503	107			2	1000					Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36	Win32		107	1	search_navi_cat_4			   	2			   	47					44	KxnnKjs-11-FFJKr	1086211	44	Kxnn54p-11-FFJKr	1086211											1366*768			0	0	0						1											1440756878359
121508281810000003	http://list.yhd.com/p/c5996-b-a-s1-v0-p1-price-d0-pid21496-pt1074467-pl1157690-m0-k?tp=44.1074467.0.0.0.KxnlcrD-11-EnNUs	http://list.yhd.com/p/c0-b-a-s1-v0-p1-price-d0-pid21496-pt1074467-pl1157690-m0?ref=1_1_51_search&tc=3.1.5.994560.48&tp=51.%E5%84%BF%E7%AB%A5%E6%B2%90%E6%B5%B4%E9%9C%B2.124.48.4.KxnlGug-11-EnNUs		2	37G1MDD68UF8K9XYGVCUA9WFNNR7C1133W9S		44.1074467.0.0.0.KxnlcrD-11-EnNUs			5TMZXMUKJWK76FNZMVE2TCM4UQW7ZNJH	8363	1	180.162.8.13		msessionid:D3DNC2F91D4VNF49RQG3RDG5J2SQ2JD9,uname:  ,unionKey:8363,unionType:1,uid:B000ph454itr		2015-08-28 18:10:00	117902468	http://fun.fanli.com/goshop/go/?id=633&lc=shopdetail_goumai&v=1440755590146	58			20	1					Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36	Win32		493	1	search_navi_cat_0			   	1			   	78					44	KxnmEa2-11-EnNUs	1074467	44	KxnlcrD-11-EnNUs	1074467											1280*1024			0	0	0						1											1440756591009
121508281810000004				3	2EC97A32-7C27-4F53-A122-B60AA9A987F7					PAU63A8H6A21F81NHTG2X4O9M08Y6148	10680917		49.65.71.158				2015-08-28 18:10:00	156854179					5	37						iPhone							   	5			   						1200002	MTQ0MDc1NjU1OTA1NQ==		300000		50568587							yhdapp	4.1.1	8.1.1	iPhone	750*1334	cmcc	4g									1	8366231	118.777052	32.005723							   -   -    -   -203 	
121508281810000005	http://t.yhd.com/detailBrand/21782?tp=4.174850.m3022912.0.2.KxnmxAS-11-8nDA8	http://cms.yhd.com/sale/174850?tc=ad.0.0.15114-19945723.1&tp=1.1.708.0.1.Kxnms^7-11-F4YGp		3	X491CWDNMC1YTEK7WRVUTQMZMXF4X63U54SC		4.174850.m3022912.0.2.KxnmxAS-11-8nDA8			25UYKQWJ13GB2E23SC1HH64HXV12TX3E			118.112.161.79		msessionid:ZNTMUVQ5VTTVHJ3763DFQVZM955ZWTNE,uname:   		2015-08-28 18:10:00	163477423	http://www.yhd.com/	4			12	111					Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36	Win32							   	12			   	9					2019	KxnmyW3-11-8nDA8	21782_1_-100_1	4	KxnmxAS-11-8nDA8	174850											1920*1080			m3022912	0	2						1											1440756787216
121508281810000006				3	7878E64C-B06B-4188-9F77-C4C77C38FDBD					823JXQ1J6BH4L0C68I78R83TK9ZQ6VND	10680917		117.94.64.207		unionKey:10680917		2015-08-28 18:10:00	137779971					5	48						iPhone							   	5			   						PhoneMallMoreProductsVC	MTQ0MDc1NjU0NzExMA==		5028		36400							yhdapp	4.1.1	8.0.2	iPhone	750*1334	cmcc	2g									1	8366231	119.865009	32.992094								
121508281810000007				3	E5FC9449-3777-4103-BF0E-F402F74E2C00					EW935DJP6RWJUCDR8REOQ1YA808F6LBF			112.82.93.34				2015-08-28 18:10:00	140741829					5	45						iPhone		1914					   	5			   						200000	MTQ0MDc1NjQyNzY0OQ==	     	200000		     							yhdapp	4.1.1	8.1.1	iPhone	750*1334	cmcc	2g	3		3						2	8366231	0.000000	0.000000								
121508281810000008				3	00000000-4049-1cca-e842-1f5049b8efdf					W2T8GEGK1JGHNGVCQRKP4A9S681VNBBF			221.220.248.11				2015-08-28 18:10:00	63573725				162371	2	1000					4.1.1	android							   	2			   		51283808				46000	dvb`WiU	2040913	5005	dvb`C8O	175319				43943800			yhdapp	4.1.1	4.1.2	ardphone	720*1280	cmcc	wifi	m3032384		2						1	1019323363	116.481765	40.00163							   -   -   -     -17 	
121508281810000009	http://item.m.yhd.com/item/73725?tp=5006.0.1756.0.11.Kxnlg48-11-4lXQc	http://m.yhd.com/1?tracker_u=10525888234		3	3CUMUY385J8ZH1Y72EA8C1W3WT9PT3C2PN4C	undefined.undefined.0.0.0.undefined	5006.0.1756.0.11.Kxnlg48-11-4lXQc			HEZ1RNMMBV1D4RQ38CB27J9KN95ZFNF4	10525888234		180.154.150.29		msessionid:2EKNRYZB1K5YWGCHXK6W2RCPKW1Q88SREKBP,uname:13862976878@phone,unionKey:10525888234		2015-08-28 18:10:00	202561027	http://m.yhd.com/1?tracker_u=10525888234	6	9928	1	1	1					Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; Che2-TL00 Build/HonorChe2-TL00) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/6.0 Mobile Safari/537.36	AndroidSystem				pms_apphome_umaylike_intent_profile_yhd_6_0_9928			   	1			   		73725				5048	KxnmGkk-11-4lXQc	73725	5006	Kxnlg48-11-4lXQc	0							br_qq	6.0	4.4.2		720*1280		wifi	1756	0	11																	1440756599518
121508281810000010				3	98363275-13EB-412C-AF69-FC20D7ADF622					4M9F757E3AN7PB7UMK75P40DLCWEJ55V			114.242.250.11				2015-08-28 18:10:00	129607077					2	1000						iosSystem							   	2			   						5026	MTQ0MDc1NjUxODYwNQ==	63211	300000		50244590							yhdapp	4.0.3	7.1.1	iPhone	640*1136											1	8366231										

データが欲しいなら下にメッセージを