[29]Presto window function
45595 ワード
一、テスト
presto:default> select id, name, sum(age) as age_num, sum(sum(age) ) over (partition by name) from mysql.dbtest_1.student group by name,id;
id | name | age_num | _col3
----+------------+---------+-------
5 | studentB | 13 | 13
20 | resulttest | 18 | 38
22 | resulttest | 20 | 38
4 | studentA | 12 | 12
2 | | 18 | 18
6 | studentAA | 12 | 12
8 | cut-test | 20 | 112
14 | cut-test | 20 | 112
18 | cut-test | 18 | 112
19 | cut-test | 18 | 112
17 | cut-test | 18 | 112
16 | cut-test | 18 | 112
1 | | 18 | 18
(13 rows)
Query 20190429_120601_00006_5f22y, FINISHED, 1 node
Splits: 81 total, 81 done (100.00%)
0:00 [13 rows, 0B] [37 rows/s, 0B/s]
presto:default> select id, name, sum(age) as age_num from mysql.dbtest_1.student group by name,id order by name;
id | name | age_num
----+------------+---------
18 | cut-test | 18
14 | cut-test | 20
17 | cut-test | 18
19 | cut-test | 18
8 | cut-test | 20
16 | cut-test | 18
22 | resulttest | 20
20 | resulttest | 18
4 | studentA | 12
6 | studentAA | 12
5 | studentB | 13
2 | | 18
1 | | 18
(13 rows)
Query 20190429_120702_00007_5f22y, FINISHED, 1 node
Splits: 51 total, 51 done (100.00%)
0:00 [13 rows, 0B] [39 rows/s, 0B/s]
presto:default> select id, name, sum(age) as age_num, sum(age_num ) over (partition by name) from mysql.dbtest_1.student group by name,id;
Query 20190429_120812_00008_5f22y failed: line 1:43: Column 'age_num' cannot be resolved
select id, name, sum(age) as age_num, sum(age_num ) over (partition by name) from mysql.dbtest_1.student group by name,id
presto:default> select id, name, sum(sum(age)) over (partition by name) from mysql.dbtest_1.student group by name,id;
id | name | _col2
----+------------+-------
20 | resulttest | 38
22 | resulttest | 38
4 | studentA | 12
14 | cut-test | 112
8 | cut-test | 112
18 | cut-test | 112
19 | cut-test | 112
17 | cut-test | 112
16 | cut-test | 112
1 | | 18
2 | | 18
6 | studentAA | 12
5 | studentB | 13
(13 rows)
presto:default> select id, name, sum(age) as age_num, sum(sum(age) ) over (partition by substr(name,1,1)) from mysql.dbtest_1.student group by name,id;
id | name | age_num | _col3
----+------------+---------+-------
22 | resulttest | 20 | 38
20 | resulttest | 18 | 38
18 | cut-test | 18 | 112
14 | cut-test | 20 | 112
8 | cut-test | 20 | 112
16 | cut-test | 18 | 112
17 | cut-test | 18 | 112
19 | cut-test | 18 | 112
4 | studentA | 12 | 37
6 | studentAA | 12 | 37
5 | studentB | 13 | 37
2 | | 18 | 18
1 | | 18 | 18
(13 rows)
二、窓関数はMTDと環比計算を実現する
------ partition mtd-----
presto:default> select dt, month(dt,'yyyyMMdd'), sum(age) as age_num, sum(sum(age) ) over (partition by month(dt,'yyyyMMdd')) from mysql.dbtest_1.mtd_test group by dt,month(dt,'yyyyMMdd');
dt | _col1 | age_num | _col3
----------+---------+---------+-------
20190103 | 2019-01 | 3 | 91
20190105 | 2019-01 | 5 | 91
20190113 | 2019-01 | 13 | 91
20190106 | 2019-01 | 6 | 91
20190108 | 2019-01 | 8 | 91
20190101 | 2019-01 | 1 | 91
20190110 | 2019-01 | 10 | 91
20190109 | 2019-01 | 9 | 91
20190111 | 2019-01 | 11 | 91
20190102 | 2019-01 | 2 | 91
20190107 | 2019-01 | 7 | 91
20190112 | 2019-01 | 12 | 91
20190104 | 2019-01 | 4 | 91
(13 rows)
Query 20190508_064751_00006_tcbxh, FINISHED, 1 node
Splits: 81 total, 81 done (100.00%)
0:00 [13 rows, 0B] [38 rows/s, 0B/s]
------ ------
presto:default> select
-> dt,sum(age) as sum_age,
-> his_data_rate(sum(age), 1, 'day',dt,'yyyyMMdd' ) over (order by dt) as his_rate from mysql.dbtest_1.mtd_test group by dt;
dt | sum_age | his_rate
----------+---------+---------------------
20190101 | 1 | 0.0
20190102 | 2 | 1.0
20190103 | 3 | 0.5
20190104 | 4 | 0.3333333333333333
20190105 | 5 | 0.25
20190106 | 6 | 0.2
20190107 | 7 | 0.16666666666666666
20190108 | 8 | 0.14285714285714285
20190109 | 9 | 0.125
20190110 | 10 | 0.1111111111111111
20190111 | 11 | 0.1
20190112 | 12 | 0.09090909090909091
20190113 | 13 | 0.08333333333333333
presto:default> select dt,sum(age), his_data_rate(sum(age), 1, 'day',dt,'yyyyMMdd' ) over (order by dt) from mysql.dbtest_1.mtd_test group by dt;
dt | _col1 | _col2
----------+-------+---------------------
20190101 | 1 | 0.0
20190102 | 2 | 1.0
20190103 | 3 | 0.5
20190104 | 4 | 0.3333333333333333
20190105 | 5 | 0.25
20190106 | 6 | 0.2
20190107 | 7 | 0.16666666666666666
20190108 | 8 | 0.14285714285714285
20190109 | 9 | 0.125
20190110 | 10 | 0.1111111111111111
20190111 | 11 | 0.1
20190112 | 12 | 0.09090909090909091
20190113 | 13 | 0.08333333333333333
(13 rows)
Query 20190508_054544_00000_tcbxh, FINISHED, 1 node
Splits: 66 total, 66 done (100.00%)
0:03 [13 rows, 0B] [5 rows/s, 0B/s]
presto:default> select
-> a.*
-> from
-> (
-> select
-> dt,
-> sum(age),
-> his_data_rate(sum(age), 1, 'day',dt,'yyyyMMdd' ) over (order by dt)
-> from mysql.dbtest_1.mtd_test
-> where dt between 20190101 and 20190109
-> group by dt
-> )a
-> where dt between 20190105 and 20190109 ;
dt | _col1 | _col2
----------+-------+---------------------
20190105 | 5 | 0.25
20190106 | 6 | 0.2
20190107 | 7 | 0.16666666666666666
20190108 | 8 | 0.14285714285714285
20190109 | 9 | 0.125
(5 rows)
Query 20190508_054722_00001_tcbxh, FINISHED, 1 node
Splits: 66 total, 66 done (100.00%)
0:01 [13 rows, 0B] [23 rows/s, 0B/s]