select user_id, month, month_visit, sum(month_visit)over(partition by user_id order by month asc) as total_countfrom (select user_id, substr(visit_date, 0, 7) as month, sum(visit_cnt) as month_visit from t_visit_stat_044 group by user_id, substr(visit_date, 0, 7) ) t
查询结果
2)在上面结果上,我们增加一列数据,对每个用户所有月份的访问次数和: total_month_count 。这里需要注意聚合函数sum()后面over()内的写法差别,计算累积求和时有order by 语句,计算全部访问次数时没有。
select user_id, month, month_visit, sum(month_visit)over(partition by user_id order by month asc) as total_count, sum(month_visit)over(partition by user_id) as total_all_countfrom (select user_id, substr(visit_date, 0, 7) as month, sum(visit_cnt) as month_visit from t_visit_stat_044 group by user_id, substr(visit_date, 0, 7) ) t
select user_id, month, month_visit/sum(month_visit)over(partition by user_id) as month_visit_per, sum(month_visit)over(partition by user_id order by month asc)/sum(month_visit)over(partition by user_id) as total_visit_perfrom (select user_id, substr(visit_date, 0, 7) as month, sum(visit_cnt) as month_visit from t_visit_stat_044 group by user_id, substr(visit_date, 0, 7) ) t
查询结果
说明: 聚合函数包括 sum()、count()、avg() 等,均可以使用over() 对其开窗。其中在over()函数内,如果存在order by 语句,则是计算从分组内的第一个数据累积到当前行数据的聚合结果,如果没有order by语句,则计算整个分组内的聚合结果。
四、建表语句和数据插入
--建表语句create table if not exists t_visit_stat_044 ( user_id string, visit_date string, visit_cnt bigint )ROW FORMAT DELIMITEDFIELDS TERMINATED BY ','STORED AS orc;--数据插入语句insert into table t_visit_stat_044 values('u001','2024-01-01',3),('u001','2024-01-02',4),('u002','2024-01-03',6),('u003','2024-01-07',2),('u001','2024-02-04',5),('u002','2024-02-10',3),('u002','2024-02-13',1),('u002','2024-02-14',7),('u003','2024-02-11',2);