SQL*_*Guy 5 sql-server t-sql sql-server-2014
我想寻求帮助提出一个查询,它可以识别非重叠记录上的组。这是一个示例场景(无可否认是人为的)。假设我有一些员工被分配从事各种项目。虽然一名员工可以被分配到多个项目,但他/她在任何给定时间只能从事一个项目(你不希望我们都拥有这种奢侈吗:)。我需要找出哪些项目可以安排并行工作,因为它们不共享任何员工。这是设置示例表和数据的一些代码。
--1. Create #Projects table
IF OBJECT_ID('tempdb..#Projects') IS NOT NULL
DROP TABLE #Projects
CREATE TABLE #Projects (
ProjectId INT,
ProjectName VARCHAR(16)
)
INSERT INTO #Projects ( ProjectId, ProjectName )
VALUES ( 1, 'Project 1'), ( 2, 'Project 2'), ( 3, 'Project 3'), ( 4, 'Project 4'), ( 5, 'Project 5'),
( 6, 'Project 6'), ( 7, 'Project 7')
--2. Create #Employees table
IF OBJECT_ID('tempdb..#Employees') IS NOT NULL
DROP TABLE #Employees
CREATE TABLE #Employees (
EmployeeId INT,
EmployeeName VARCHAR(16)
)
INSERT INTO #Employees (EmployeeId, EmployeeName)
VALUES (101, 'Employee 101'), (102, 'Employee 102'), (103, 'Employee 103'), (104, 'Employee 104'),
(105, 'Employee 105'), (106, 'Employee 106'), (107, 'Employee 107')
--3. Create #Employee_Projects table
IF OBJECT_ID('tempdb..#Employee_Projects') IS NOT NULL
DROP TABLE #Employee_Projects
CREATE TABLE #Employee_Projects (
ProjectId INT,
EmployeeId INT
)
INSERT INTO #Employee_Projects (ProjectId, EmployeeId)
VALUES (1, 101), (1, 105), (1, 107), (2, 102), (2, 103), (2, 107), (3, 104), (3, 105), (3, 106), (4, 100), (4, 101), (4, 102), (5, 103), (5, 104), (6, 105), (6, 106), (7, 106), (7, 107), (8, 102), (8, 104), (8, 106)
Run Code Online (Sandbox Code Playgroud)
这是查询,它将向您显示我们创建的员工和项目:
SELECT p.ProjectId, p.ProjectName, e.EmployeeId, e.EmployeeName
FROM #Projects p
JOIN #Employee_Projects ep ON ep.ProjectId = p.ProjectId
JOIN #Employees e ON e.EmployeeId = ep.EmployeeId
ORDER BY ep.ProjectId, e.EmployeeId
Run Code Online (Sandbox Code Playgroud)
我们的数据如下所示:
ProjectId ProjectName EmployeeId EmployeeName
----------- ---------------- ----------- ----------------
1 Project 1 101 Employee 101
1 Project 1 105 Employee 105
1 Project 1 107 Employee 107
2 Project 2 102 Employee 102
2 Project 2 103 Employee 103
2 Project 2 107 Employee 107
3 Project 3 104 Employee 104
3 Project 3 105 Employee 105
3 Project 3 106 Employee 106
4 Project 4 101 Employee 101
4 Project 4 102 Employee 102
5 Project 5 103 Employee 103
5 Project 5 104 Employee 104
6 Project 6 105 Employee 105
6 Project 6 106 Employee 106
7 Project 7 106 Employee 106
7 Project 7 107 Employee 107
8 Project 8 102 Employee 102
8 Project 8 104 Employee 104
8 Project 8 106 Employee 106
Run Code Online (Sandbox Code Playgroud)
我们可以直观地看出,例如,我们可以安排项目 1、2、3 同时运行,因为它们不共享任何员工。让我们称这组项目为“第 1 组”。之后,我们可以安排项目 4、5、6。我们称之为“第 2 组”。最后,在我们的示例中,我们还剩下项目 7,这将是我们的“第 3 组”。我的问题是,如何编写 T-SQL 查询来执行此类项目分组?
谢谢!
这是一种装箱问题,因此您很可能需要从可用的近似解决方案中进行选择,而不是尝试进行详尽的搜索。
一个非常简单的想法是将项目分组,每次选择员工人数最多的项目。一旦当前项目不再适合当前组,我们就开始一个新组。
这相对容易迭代表达,尽管它可能表现不佳,这取决于真实数据集的特征。在单个 T-SQL 语句中表达相同的逻辑可能非常具有挑战性。无论如何,作为一个例子,这是一个使用上面概述的简单贪婪算法的迭代 T-SQL 解决方案:
SET NOCOUNT ON;
CREATE TABLE #Groups
(
GroupID integer NOT NULL,
ProjectID integer NOT NULL,
CONSTRAINT PK_Groups
PRIMARY KEY (GroupID, ProjectID)
);
-- First group
DECLARE @GroupID integer = 1;
-- Choose the largest project to start with
-- and assign to group 1
INSERT #Groups
SELECT TOP (1)
@GroupID AS GroupID,
P.ProjectId
FROM #Projects AS P
GROUP BY
P.ProjectId
ORDER BY
COUNT_BIG(*) DESC,
NEWID(); -- Random order tie-breaker
WHILE 1 = 1
BEGIN
-- Add the next largest project to the current group
INSERT #Groups
SELECT TOP (1)
@GroupID AS GroupID,
P.ProjectId
FROM #Projects AS P
WHERE NOT EXISTS
(
-- Project not already allocated to a group
SELECT *
FROM #Groups AS G
WHERE G.ProjectID = P.ProjectId
)
AND NOT EXISTS
(
-- Employees for this project
SELECT EP2.EmployeeId
FROM #Employee_Projects AS EP2
WHERE EP2.ProjectId = P.ProjectId
INTERSECT
-- Employees already in the current group of projects
SELECT EP.EmployeeId
FROM #Groups AS G2
JOIN #Employee_Projects AS EP
ON EP.ProjectId = G2.ProjectID
WHERE G2.GroupID = @GroupID
)
GROUP BY P.ProjectId
ORDER BY
COUNT_BIG(*) DESC,
NEWID(); -- Random order tie-breaker
-- No project found for this group
IF @@ROWCOUNT = 0
BEGIN
-- Finished when no more projects left to process
IF NOT EXISTS
(
SELECT P.ProjectId FROM #Projects AS P
EXCEPT
SELECT G.ProjectID FROM #Groups AS G
)
BEGIN
BREAK;
END;
-- Next group number
SET @GroupID += 1;
END;
END;
SELECT
G.GroupID,
G.ProjectID
FROM #Groups AS G;
Run Code Online (Sandbox Code Playgroud)
此代码是不确定的,因为遇到的任何联系(按每个项目的员工人数降序排序时)都会使用NEWID
. 然而,典型的输出是:
???????????????????????
? GroupID ? ProjectID ?
???????????????????????
? 1 ? 3 ?
? 1 ? 4 ?
? 2 ? 2 ?
? 2 ? 6 ?
? 3 ? 5 ?
? 3 ? 7 ?
? 4 ? 1 ?
???????????????????????
Run Code Online (Sandbox Code Playgroud)
归档时间: |
|
查看次数: |
897 次 |
最近记录: |