Skip to content

Commit 61152c9

Browse files
committed
提交用户连续登录天数case
1 parent ddf9674 commit 61152c9

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

cases/用户连续登录天数.sql

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
需求:
3+
要求统计用户连续登录的天数
4+
数据如下:
5+
uid,login_time
6+
1,'2016-11-25 13:30:45'
7+
1,'2016-11-24 13:30:45'
8+
1,'2016-11-24 10:30:45'
9+
1,'2016-11-24 09:30:45'
10+
1,'2016-11-23 09:30:45'
11+
1,'2016-11-10 09:30:45'
12+
1,'2016-11-09 09:30:45'
13+
1,'2016-11-01 09:30:45'
14+
1,'2016-10-31 09:30:45'
15+
2,'2016-11-25 13:30:45'
16+
2,'2016-11-24 13:30:45'
17+
2,'2016-11-23 10:30:45'
18+
2,'2016-11-22 09:30:45'
19+
2,'2016-11-21 09:30:45'
20+
2,'2016-11-20 09:30:45'
21+
2,'2016-11-19 09:30:45'
22+
2,'2016-11-02 09:30:45'
23+
2,'2016-11-01 09:30:45'
24+
2,'2016-10-31 09:30:45'
25+
2,'2016-10-30 09:30:45'
26+
2,'2016-10-29 09:30:45'
27+
*/
28+
29+
-- 1. 建表
30+
set hive.exec.mode.local.auto=true; --开启Hive的本地模式
31+
32+
drop table if exists user_login;
33+
create table user_login(
34+
uid int,
35+
login_time string);
36+
37+
insert into user_login values
38+
(1,'2016-11-25 13:30:45'),
39+
(1,'2016-11-24 13:30:45'),
40+
(1,'2016-11-24 10:30:45'),
41+
(1,'2016-11-24 09:30:45'),
42+
(1,'2016-11-23 09:30:45'),
43+
(1,'2016-11-10 09:30:45'),
44+
(1,'2016-11-09 09:30:45'),
45+
(1,'2016-11-01 09:30:45'),
46+
(1,'2016-10-31 09:30:45'),
47+
(2,'2016-11-25 13:30:45'),
48+
(2,'2016-11-24 13:30:45'),
49+
(2,'2016-11-23 10:30:45'),
50+
(2,'2016-11-22 09:30:45'),
51+
(2,'2016-11-21 09:30:45'),
52+
(2,'2016-11-20 09:30:45'),
53+
(2,'2016-11-19 09:30:45'),
54+
(2,'2016-11-02 09:30:45'),
55+
(2,'2016-11-01 09:30:45'),
56+
(2,'2016-10-31 09:30:45'),
57+
(2,'2016-10-30 09:30:45'),
58+
(2,'2016-10-29 09:30:45');
59+
60+
61+
-- 2. 处理
62+
63+
select b.uid,
64+
min(b.login_time) login_min, --起始登录日期
65+
max(b.login_time) login_max, --结束登录日期
66+
count(b.mark_day) login_days --连续登录天数
67+
from (
68+
-- 增加mark day
69+
select a.*, date_sub(login_time, a.rank) mark_day from (
70+
-- 每个用户按登录时间进行排序
71+
select *,
72+
rank() over(partition by uid order by login_time asc) as rank
73+
from user_login
74+
) a
75+
) b group by b.uid, b.mark_day;
76+
77+
78+
/*
79+
结果:
80+
1 2016-10-31 09:30:45 2016-11-01 09:30:45 2
81+
1 2016-11-09 09:30:45 2016-11-10 09:30:45 2
82+
1 2016-11-24 13:30:45 2016-11-25 13:30:45 2
83+
1 2016-11-24 10:30:45 2016-11-24 10:30:45 1
84+
1 2016-11-23 09:30:45 2016-11-24 09:30:45 2
85+
2 2016-10-29 09:30:45 2016-11-02 09:30:45 5
86+
2 2016-11-19 09:30:45 2016-11-25 13:30:45 7
87+
*/
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
需求:
3+
要求统计某个时间段内连续登录天数>=5天的用户
4+
数据如下:
5+
uid,login_time
6+
1,'2016-11-25 13:30:45'
7+
1,'2016-11-24 13:30:45'
8+
1,'2016-11-24 10:30:45'
9+
1,'2016-11-24 09:30:45'
10+
1,'2016-11-23 09:30:45'
11+
1,'2016-11-10 09:30:45'
12+
1,'2016-11-09 09:30:45'
13+
1,'2016-11-01 09:30:45'
14+
1,'2016-10-31 09:30:45'
15+
2,'2016-11-25 13:30:45'
16+
2,'2016-11-24 13:30:45'
17+
2,'2016-11-23 10:30:45'
18+
2,'2016-11-22 09:30:45'
19+
2,'2016-11-21 09:30:45'
20+
2,'2016-11-20 09:30:45'
21+
2,'2016-11-19 09:30:45'
22+
2,'2016-11-02 09:30:45'
23+
2,'2016-11-01 09:30:45'
24+
2,'2016-10-31 09:30:45'
25+
2,'2016-10-30 09:30:45'
26+
2,'2016-10-29 09:30:45'
27+
*/
28+
29+
-- 1. 建表
30+
set hive.exec.mode.local.auto=true; --开启Hive的本地模式
31+
32+
drop table if exists user_login;
33+
create table user_login(
34+
uid int,
35+
login_time string);
36+
37+
insert into user_login values
38+
(1,'2016-11-25 13:30:45'),
39+
(1,'2016-11-24 13:30:45'),
40+
(1,'2016-11-24 10:30:45'),
41+
(1,'2016-11-24 09:30:45'),
42+
(1,'2016-11-23 09:30:45'),
43+
(1,'2016-11-10 09:30:45'),
44+
(1,'2016-11-09 09:30:45'),
45+
(1,'2016-11-01 09:30:45'),
46+
(1,'2016-10-31 09:30:45'),
47+
(2,'2016-11-25 13:30:45'),
48+
(2,'2016-11-24 13:30:45'),
49+
(2,'2016-11-23 10:30:45'),
50+
(2,'2016-11-22 09:30:45'),
51+
(2,'2016-11-21 09:30:45'),
52+
(2,'2016-11-20 09:30:45'),
53+
(2,'2016-11-19 09:30:45'),
54+
(2,'2016-11-02 09:30:45'),
55+
(2,'2016-11-01 09:30:45'),
56+
(2,'2016-10-31 09:30:45'),
57+
(2,'2016-10-30 09:30:45'),
58+
(2,'2016-10-29 09:30:45');
59+
60+
61+
-- 2. 处理
62+
-- lead函数:
63+
-- 第一个参数: 是指定的列(这里用登陆日期)
64+
-- 第二个参数: 是当前行向后几行的值,这里用的是4,也就是第五次登录的日期
65+
-- 第三个参数: 是如果返回的空值可以用指定值替代,这里没有使用第三个参数,返回空值
66+
67+
select distinct uid from (
68+
select uid,datediff(login_5_day, login_time) + 1 diff_day from (
69+
-- 用户按login_time排序,加入第五天的登录时间
70+
select *,
71+
lead(login_time, 4) over(partition by uid order by login_time asc) login_5_day
72+
from user_login
73+
) a
74+
)b where diff_day=5;
75+
76+
/*
77+
结果:
78+
2
79+
*/
80+
-- 只有uid=2连续登录了5天以上
81+
82+

0 commit comments

Comments
 (0)