Noe*_*oel 11 sql oracle oracle11gr2
我有以下格式的数据.
match_id team_id won_ind
----------------------------
37 Team1 N
67 Team1 Y
98 Team1 N
109 Team1 N
158 Team1 Y
162 Team1 Y
177 Team1 Y
188 Team1 Y
198 Team1 N
207 Team1 Y
217 Team1 Y
10 Team2 N
13 Team2 N
24 Team2 N
39 Team2 Y
40 Team2 Y
51 Team2 Y
64 Team2 N
79 Team2 N
86 Team2 N
91 Team2 Y
101 Team2 N
Run Code Online (Sandbox Code Playgroud)
这里match_id按时间顺序排列,37是第一个,217是team1最后一场比赛.won_ind表明球队是否赢了比赛.
所以,从上面的数据来看,team1已经输掉了第一场比赛,然后赢了一场比赛,然后输掉了2场比赛,然后赢了4场比赛,依此类推.现在我有兴趣为每支球队找到最长的连胜纪录.
Team_id longest_streak
------------------------
Team1 4
Team2 3
Run Code Online (Sandbox Code Playgroud)
我知道如何在plsql中找到它,但我想知道这是否可以在纯SQL中计算.我尝试使用LEAD,LAG和其他几个功能,但没有到达任何地方.
我在这里创建了样本小提琴.
这应该工作,在这里小提琴:http://sqlfiddle.com/#!4/31f95/27
SELECT team_id, MAX(seq_length) AS longest_sequence
FROM (SELECT team_id, COUNT(*) AS seq_length
FROM (SELECT team_id, won_ind,match_id, SUM(new_group) OVER(ORDER BY match_id) AS group_no
FROM (SELECT team_id, won_ind, match_id,
DECODE(LAG(won_ind) OVER(ORDER BY match_id), won_ind, 0, 1) AS new_group
FROM matches
ORDER BY team_id))
WHERE won_ind = 'Y'
GROUP BY team_id, group_no)
GROUP BY team_id
ORDER BY 2 DESC, 1;
Run Code Online (Sandbox Code Playgroud)
with original_data as (
select 37 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 67 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 98 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 109 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 158 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 162 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 177 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 188 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 198 match_id, 'Team1' team_id, 'N' won_id from dual union all
select 207 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 217 match_id, 'Team1' team_id, 'Y' won_id from dual union all
select 10 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 13 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 24 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 39 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 40 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 51 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 64 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 79 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 86 match_id, 'Team2' team_id, 'N' won_id from dual union all
select 91 match_id, 'Team2' team_id, 'Y' won_id from dual union all
select 101 match_id, 'Team2' team_id, 'N' won_id from dual
),
----------------------------------------------------------------------
new_streaks as (
--
-- Identifying new streaks.
-- ------------------------
--
select
match_id,
team_id,
won_id,
--
-- A new streak is identfied if
--
case when
--
-- a) won_id = 'Y' and
--
won_id = 'Y' and
--
-- b) the previous won_id = 'N':
--
lag(won_id) over (partition by team_id order by match_id) = 'N'
--
--
then 1
--
-- All other cases: no new streak:
else 0
--
end new_streak
from
original_data
),
-------------------------------
streak_no as (
--
-- Assigning a unique number to each streak.
-- -----------------------------------------
--
select
--
match_id,
team_id,
--
-- In order to be able to count the number of records
-- of a streak, we first need to assign a unique number
-- to each streak:
--
sum(new_streak) over (partition by team_id order by match_id) streak_no
--
from
new_streaks
where
-- We're only interested in «winning streaks»:
won_id = 'Y'
),
-----------------------------------------------
--
-- Counting the elements per streak
-- --------------------------------
--
records_per_streak as (
select
count(*) counter,
team_id,
streak_no
from
streak_no
group by
team_id,
streak_no
)
------------------------------------------------
--
-- Finally: we can find the «longest streak»
-- per team:
--
select
max(counter) longest_streak,
team_id
from
records_per_streak
group by team_id
;
Run Code Online (Sandbox Code Playgroud)
我在 Teradata 上有一个类似的任务,将其修改为在 Oracle 上运行:
SELECT
team_id,
MAX(cnt)
FROM
(
SELECT
team_id,
COUNT(*) AS cnt
FROM
(
SELECT
team_id,
match_id,
won_ind,
SUM(CASE WHEN won_ind <> 'Y' THEN 1 END)
OVER (PARTITION BY team_id
ORDER BY match_id
ROWS UNBOUNDED PRECEDING) AS dummy
FROM matches
) dt
WHERE won_ind = 'Y'
GROUP BY team_id, dummy
) dt
GROUP BY team_id;
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
4483 次 |
| 最近记录: |