|
| 1 | +/* |
| 2 | +需求: |
| 3 | +要求统计用户连续登录的天数 |
| 4 | +数据如下: |
| 5 | +uid,login_time |
| 6 | +1,'2016-11-25 13:30:45' |
| 7 | +1,'2016-11-24 13:30:45' |
| 8 | +1,'2016-11-24 10:30:45' |
| 9 | +1,'2016-11-24 09:30:45' |
| 10 | +1,'2016-11-23 09:30:45' |
| 11 | +1,'2016-11-10 09:30:45' |
| 12 | +1,'2016-11-09 09:30:45' |
| 13 | +1,'2016-11-01 09:30:45' |
| 14 | +1,'2016-10-31 09:30:45' |
| 15 | +2,'2016-11-25 13:30:45' |
| 16 | +2,'2016-11-24 13:30:45' |
| 17 | +2,'2016-11-23 10:30:45' |
| 18 | +2,'2016-11-22 09:30:45' |
| 19 | +2,'2016-11-21 09:30:45' |
| 20 | +2,'2016-11-20 09:30:45' |
| 21 | +2,'2016-11-19 09:30:45' |
| 22 | +2,'2016-11-02 09:30:45' |
| 23 | +2,'2016-11-01 09:30:45' |
| 24 | +2,'2016-10-31 09:30:45' |
| 25 | +2,'2016-10-30 09:30:45' |
| 26 | +2,'2016-10-29 09:30:45' |
| 27 | +*/ |
| 28 | + |
| 29 | +-- 1. 建表 |
| 30 | +set hive.exec.mode.local.auto=true; --开启Hive的本地模式 |
| 31 | + |
| 32 | +drop table if exists user_login; |
| 33 | +create table user_login( |
| 34 | + uid int, |
| 35 | + login_time string); |
| 36 | + |
| 37 | +insert into user_login values |
| 38 | +(1,'2016-11-25 13:30:45'), |
| 39 | +(1,'2016-11-24 13:30:45'), |
| 40 | +(1,'2016-11-24 10:30:45'), |
| 41 | +(1,'2016-11-24 09:30:45'), |
| 42 | +(1,'2016-11-23 09:30:45'), |
| 43 | +(1,'2016-11-10 09:30:45'), |
| 44 | +(1,'2016-11-09 09:30:45'), |
| 45 | +(1,'2016-11-01 09:30:45'), |
| 46 | +(1,'2016-10-31 09:30:45'), |
| 47 | +(2,'2016-11-25 13:30:45'), |
| 48 | +(2,'2016-11-24 13:30:45'), |
| 49 | +(2,'2016-11-23 10:30:45'), |
| 50 | +(2,'2016-11-22 09:30:45'), |
| 51 | +(2,'2016-11-21 09:30:45'), |
| 52 | +(2,'2016-11-20 09:30:45'), |
| 53 | +(2,'2016-11-19 09:30:45'), |
| 54 | +(2,'2016-11-02 09:30:45'), |
| 55 | +(2,'2016-11-01 09:30:45'), |
| 56 | +(2,'2016-10-31 09:30:45'), |
| 57 | +(2,'2016-10-30 09:30:45'), |
| 58 | +(2,'2016-10-29 09:30:45'); |
| 59 | + |
| 60 | + |
| 61 | +-- 2. 处理 |
| 62 | + |
| 63 | +select b.uid, |
| 64 | +min(b.login_time) login_min, --起始登录日期 |
| 65 | +max(b.login_time) login_max, --结束登录日期 |
| 66 | +count(b.mark_day) login_days --连续登录天数 |
| 67 | +from ( |
| 68 | + -- 增加mark day |
| 69 | + select a.*, date_sub(login_time, a.rank) mark_day from ( |
| 70 | + -- 每个用户按登录时间进行排序 |
| 71 | + select *, |
| 72 | + rank() over(partition by uid order by login_time asc) as rank |
| 73 | + from user_login |
| 74 | + ) a |
| 75 | +) b group by b.uid, b.mark_day; |
| 76 | + |
| 77 | + |
| 78 | +/* |
| 79 | +结果: |
| 80 | +1 2016-10-31 09:30:45 2016-11-01 09:30:45 2 |
| 81 | +1 2016-11-09 09:30:45 2016-11-10 09:30:45 2 |
| 82 | +1 2016-11-24 13:30:45 2016-11-25 13:30:45 2 |
| 83 | +1 2016-11-24 10:30:45 2016-11-24 10:30:45 1 |
| 84 | +1 2016-11-23 09:30:45 2016-11-24 09:30:45 2 |
| 85 | +2 2016-10-29 09:30:45 2016-11-02 09:30:45 5 |
| 86 | +2 2016-11-19 09:30:45 2016-11-25 13:30:45 7 |
| 87 | +*/ |
0 commit comments