with cte as ( select user_id, sales_date, row_number() over(partitionby user_id orderby sales_date) rn from sales_tb groupby user_id,sales_date ) select count(*) from cte groupby user_id, date_sub(sales_date,interval rn day)
变体:连续签到领取金币
思路:
与上题一样,将uid和登录日期combine-distinct后,加上一列行号
1 2 3 4 5 6 7 8 9 10
SELECT uid, date(in_time) as login_date, row_number() over(partitionby uid orderbydate(in_time)) as rn from tb_user_log wheredate(in_time) >='2021-07-07'anddate(in_time) <'2021-11-01' and sign_in =1 and artical_id =0 GROUPBY uid, date(in_time)
将上表作为cte,同样通过减去行号的方式,将连续的签到天数聚合到一天(past day)
1 2 3 4 5 6 7 8
select uid, date_sub(login_date, interval rn DAY) ad, count(*) as days, FLOOR(count(*)/7) as fac, count(*)%7as res from cte GROUPby uid, date_sub(login_date, interval rn DAY)
将上表作为cte2, 增加两列
将past day + lag(day counts)+ 1, 这样相当于找到了同uid下上一次连续登录了几天,由于当前行的past day 是由其所代表的连续签到天数的一天减掉其行号所得到的,而这个行号比上一行的最大连续签到天数再大1,所以是past + lag + 1
根据天数计算及金币数量
1 2 3 4 5 6 7 8
select uid, date_add(ad, interval (ifnull(lag(days) over(partitionby uid orderby ad),0)+1) DAY) cd, case when res <3then fac*15+ res when res >=3then fac*15+ res+2 end coin from cte2
以上表为cte3,根据uid,月份聚合,得到结果,注意时间格式抽取函数的运用
1 2 3 4 5 6
select uid, date_format(cd,"%Y%m"), sum(coin) from cte3 groupby uid, date_format(cd,"%Y%m")
技巧:MySQL 时间格式化函数
1 2 3 4
select date_format(target_datetime, '%Y-%m-...') from tb_time
格式具体代码如下
2. 同一时间最大观看人数
思路:
将进入和登出union起来,并把进入记为1,退出记为-1
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
select artical_id, uid, in_time as tn, 1 person from tb_user_log where artical_id !=0 UNION select artical_id, uid, out_time as tn, -1 person from tb_user_log where artical_id !=0
以上表为cte,以文章为组,根据文章,时间点,人数排序,用sum记录当前实时人数 running total
1 2 3 4 5
select *, sum(person) over(partitionby artical_id orderby artical_id, tn asc, person DESCrowsBETWEEN UNBOUNDED PRECEDING andCURRENTROW ) as p_count from cte1
以上表为cte,选取各文章组别组别中实时人数最大的时间节点
1 2 3 4
select artical_id, max(p_count) as max_read from cte2 groupby artical_id orderby max_read desc
3. 新用户的次日留存率
由于跨天登入登出算都活跃,所以将in和out union起来,然后用group by uid,date(time) 来去除一天内重复的活跃记录(相当于只记录每天有没有活跃)
1 2 3 4 5 6 7 8
select uid,date(in_time) as tn from tb_user_log union select uid, date(out_time) as tn from tb_user_log groupby uid,date(tn)
left join自身 on (1.day+1 = 2.day),增加一列判断该用户在后一天是否活跃(left join自身,如果后一天未活跃,则会得到null)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
select a1.*, case whenrow_number() over(partitionby a1.uid orderby a1.tn) =1then'NEW' else'OLD' end user_status, case when a2.uid isnotnullthen1 else0 end next_day from active a1 leftjoin active a2 ON date_add(a1.tn,interval1day) = a2.tn and a1.uid = a2.uid