0%

PostgreSQL 使用窗口函数 求连续K天保持排名前TopN 的数据

问题描述

  • 查询学生成绩排名, 找出持续超过 3 天 排名前 2 的同学

思路分析

  • 获取排名: dense_rank相同排名, rank间隔排名
  • lag(col, 1) 获取上一条 , lead 获取下(偏移量)条记录
  • 根据时间间隔 打标签, 配合 sum 实现中断过的时间分组
  • 过滤拿到结果

实现

数据准备

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
create table user_top(
id serial PRIMARY key,
username varchar(20),
date timestamptz,
grade int
);

-- 插入数据
INSERT INTO user_top(username,date,grade) values
('赵','2022-08-01', 90),
('钱','2022-08-01', 90),
('孙','2022-08-01', 90),
('李','2022-08-01', 95),
('赵','2022-08-02', 80),
('钱','2022-08-02', 60),
('孙','2022-08-02', 70),
('李','2022-08-02', 65),
('赵','2022-08-03', 75),
('钱','2022-08-03', 90),
('孙','2022-08-03', 85),
('赵','2022-08-04', 80),
('李','2022-08-04', 75),
('周','2022-08-04', 90),
('王','2022-08-04', 85),
('赵','2022-08-05', 80),
('孙','2022-08-05', 75),
('李','2022-08-05', 90),
('钱','2022-08-05', 85)

思路

  1. 获取排名
1
SELECT  username,date, grade, dense_rank() over(PARTITION BY date ORDER BY grade DESC ) AS top FROM user_top
  1. 时间间隔标记
1
2
3
4
WITH tmp AS (
SELECT username,date, grade, dense_rank() over(PARTITION BY date ORDER BY grade DESC ) AS top FROM user_top
) SELECT * ,lag("date",1) OVER (partition by username order by date) , CASE WHEN EXTRACT (DAY FROM (date - lag("date",1) OVER (partition by username order by date) ) )=1 THEN 0 ELSE 1 end AS flag
FROM tmp
  1. 分组
1
2
3
4
5
6
7
WITH tmp AS (
WITH tmp AS (
SELECT username,date, grade, dense_rank() over(PARTITION BY date ORDER BY grade DESC ) AS top FROM user_top
) SELECT * ,lag("date",1) OVER (partition by username order by date) ,
CASE WHEN EXTRACT (DAY FROM (date - lag("date",1) OVER (partition by username order by date) ) )=1 THEN 0 ELSE 1 end AS flag
FROM tmp
) SELECT *,sum(flag) OVER( PARTITION BY username ORDER BY date ) AS flag_group FROM tmp
  1. 最终结果 SQL
1
2
3
4
5
6
7
8
9
10
11
12
13
14
WITH tmp AS (
WITH tmp AS (
WITH tmp AS (
SELECT username,date, grade, dense_rank() over(PARTITION BY date ORDER BY grade DESC ) AS top FROM user_top
) SELECT * ,lag("date",1) OVER (partition by username order by date) ,
CASE WHEN EXTRACT (DAY FROM (date - lag("date",1) OVER (partition by username order by date) ) )=1 THEN 0 ELSE 1 end AS flag
FROM tmp WHERE top <=2 --排名前2
) SELECT *,sum(flag) OVER( PARTITION BY username ORDER BY date ) AS flag_group FROM tmp
) SELECT username , count(*) AS top_cnt,
array_agg(top) AS top_desc,
array_agg(grade ) AS grade,
array_agg(to_char(date AT time ZONE 'PRC', 'YYYY-MM-DD')) AS time_desc
FROM tmp GROUP BY username ,flag_group HAVING count(*) >=3 -- 保持3天

欢迎关注我的其它发布渠道