[29]Presto window function

45595 ワード

一、テスト

presto:default> select id, name, sum(age) as age_num, sum(sum(age) ) over (partition by name) from mysql.dbtest_1.student group by name,id;
 id |    name    | age_num | _col3
----+------------+---------+-------
  5 | studentB   |      13 |    13
 20 | resulttest |      18 |    38
 22 | resulttest |      20 |    38
  4 | studentA   |      12 |    12
  2 |         |      18 |    18
  6 | studentAA  |      12 |    12
  8 | cut-test   |      20 |   112
 14 | cut-test   |      20 |   112
 18 | cut-test   |      18 |   112
 19 | cut-test   |      18 |   112
 17 | cut-test   |      18 |   112
 16 | cut-test   |      18 |   112
  1 |         |      18 |    18
(13 rows)

Query 20190429_120601_00006_5f22y, FINISHED, 1 node
Splits: 81 total, 81 done (100.00%)
0:00 [13 rows, 0B] [37 rows/s, 0B/s]

presto:default> select id, name, sum(age) as age_num from mysql.dbtest_1.student group by name,id order by name;
 id |    name    | age_num
----+------------+---------
 18 | cut-test   |      18
 14 | cut-test   |      20
 17 | cut-test   |      18
 19 | cut-test   |      18
  8 | cut-test   |      20
 16 | cut-test   |      18
 22 | resulttest |      20
 20 | resulttest |      18
  4 | studentA   |      12
  6 | studentAA  |      12
  5 | studentB   |      13
  2 |         |      18
  1 |         |      18
(13 rows)

Query 20190429_120702_00007_5f22y, FINISHED, 1 node
Splits: 51 total, 51 done (100.00%)
0:00 [13 rows, 0B] [39 rows/s, 0B/s]

presto:default> select id, name, sum(age) as age_num, sum(age_num ) over (partition by name) from mysql.dbtest_1.student group by name,id;
Query 20190429_120812_00008_5f22y failed: line 1:43: Column 'age_num' cannot be resolved
select id, name, sum(age) as age_num, sum(age_num ) over (partition by name) from mysql.dbtest_1.student group by name,id

presto:default> select id, name, sum(sum(age)) over (partition by name) from mysql.dbtest_1.student group by name,id;
 id |    name    | _col2
----+------------+-------
 20 | resulttest |    38
 22 | resulttest |    38
  4 | studentA   |    12
 14 | cut-test   |   112
  8 | cut-test   |   112
 18 | cut-test   |   112
 19 | cut-test   |   112
 17 | cut-test   |   112
 16 | cut-test   |   112
  1 |         |    18
  2 |         |    18
  6 | studentAA  |    12
  5 | studentB   |    13
(13 rows)

presto:default> select id, name, sum(age) as age_num, sum(sum(age) ) over (partition by substr(name,1,1)) from mysql.dbtest_1.student group by name,id;
 id |    name    | age_num | _col3
----+------------+---------+-------
 22 | resulttest |      20 |    38
 20 | resulttest |      18 |    38
 18 | cut-test   |      18 |   112
 14 | cut-test   |      20 |   112
  8 | cut-test   |      20 |   112
 16 | cut-test   |      18 |   112
 17 | cut-test   |      18 |   112
 19 | cut-test   |      18 |   112
  4 | studentA   |      12 |    37
  6 | studentAA  |      12 |    37
  5 | studentB   |      13 |    37
  2 |         |      18 |    18
  1 |         |      18 |    18
(13 rows)

二、窓関数はMTDと環比計算を実現する

------ partition  mtd-----
presto:default> select dt, month(dt,'yyyyMMdd'), sum(age) as age_num, sum(sum(age) ) over (partition by month(dt,'yyyyMMdd')) from mysql.dbtest_1.mtd_test group by dt,month(dt,'yyyyMMdd');
    dt    |  _col1  | age_num | _col3
----------+---------+---------+-------
 20190103 | 2019-01 |       3 |    91
 20190105 | 2019-01 |       5 |    91
 20190113 | 2019-01 |      13 |    91
 20190106 | 2019-01 |       6 |    91
 20190108 | 2019-01 |       8 |    91
 20190101 | 2019-01 |       1 |    91
 20190110 | 2019-01 |      10 |    91
 20190109 | 2019-01 |       9 |    91
 20190111 | 2019-01 |      11 |    91
 20190102 | 2019-01 |       2 |    91
 20190107 | 2019-01 |       7 |    91
 20190112 | 2019-01 |      12 |    91
 20190104 | 2019-01 |       4 |    91
(13 rows)
Query 20190508_064751_00006_tcbxh, FINISHED, 1 node
Splits: 81 total, 81 done (100.00%)
0:00 [13 rows, 0B] [38 rows/s, 0B/s]
 
 
 
------ ------
presto:default> select
             -> dt,sum(age) as sum_age,
             -> his_data_rate(sum(age), 1, 'day',dt,'yyyyMMdd' ) over (order by dt) as his_rate from mysql.dbtest_1.mtd_test group by dt;
    dt    | sum_age |      his_rate
----------+---------+---------------------
 20190101 |       1 |                 0.0
 20190102 |       2 |                 1.0
 20190103 |       3 |                 0.5
 20190104 |       4 |  0.3333333333333333
 20190105 |       5 |                0.25
 20190106 |       6 |                 0.2
 20190107 |       7 | 0.16666666666666666
 20190108 |       8 | 0.14285714285714285
 20190109 |       9 |               0.125
 20190110 |      10 |  0.1111111111111111
 20190111 |      11 |                 0.1
 20190112 |      12 | 0.09090909090909091
 20190113 |      13 | 0.08333333333333333
 
presto:default> select dt,sum(age), his_data_rate(sum(age), 1, 'day',dt,'yyyyMMdd' ) over (order by dt) from mysql.dbtest_1.mtd_test group by dt;
    dt    | _col1 |        _col2
----------+-------+---------------------
 20190101 |     1 |                 0.0
 20190102 |     2 |                 1.0
 20190103 |     3 |                 0.5
 20190104 |     4 |  0.3333333333333333
 20190105 |     5 |                0.25
 20190106 |     6 |                 0.2
 20190107 |     7 | 0.16666666666666666
 20190108 |     8 | 0.14285714285714285
 20190109 |     9 |               0.125
 20190110 |    10 |  0.1111111111111111
 20190111 |    11 |                 0.1
 20190112 |    12 | 0.09090909090909091
 20190113 |    13 | 0.08333333333333333
(13 rows)
Query 20190508_054544_00000_tcbxh, FINISHED, 1 node
Splits: 66 total, 66 done (100.00%)
0:03 [13 rows, 0B] [5 rows/s, 0B/s]
presto:default> select
             -> a.*
             -> from
             -> (
             -> select
             -> dt,
             -> sum(age),
             -> his_data_rate(sum(age), 1, 'day',dt,'yyyyMMdd' ) over (order by dt)
             -> from mysql.dbtest_1.mtd_test
             -> where dt between 20190101 and 20190109
             -> group by dt
             -> )a
             -> where  dt between 20190105 and 20190109 ;
    dt    | _col1 |        _col2
----------+-------+---------------------
 20190105 |     5 |                0.25
 20190106 |     6 |                 0.2
 20190107 |     7 | 0.16666666666666666
 20190108 |     8 | 0.14285714285714285
 20190109 |     9 |               0.125
(5 rows)
Query 20190508_054722_00001_tcbxh, FINISHED, 1 node
Splits: 66 total, 66 done (100.00%)
0:01 [13 rows, 0B] [23 rows/s, 0B/s]