DECLARE @Table1 TABLE (ID_T1 int, Col1 varchar(10), C2 varchar(10), C3 varchar(10), C4 varchar(10), Col5 varchar(10), Col6 varchar(10))
DECLARE @Table2 TABLE (ID_T2 int, Col1 varchar(10), C2 varchar(10), C3 varchar(10), C4 varchar(10), Col5 varchar(10), Col6 varchar(10))
INSERT INTO @Table1 (ID_T1, Col1, C2, C3, C4, Col5, Col6) SELECT 1, 'One', 'Test1', 'Line1', 'Record1', 'OTLR1', 'RLTO1' UNION ALL SELECT 2, 'Two', 'Test2', 'Line2', 'Record2', 'OTLR2', 'RLTO2' UNION ALL SELECT 3, 'Three', 'Test3', 'Line3', 'Record3', 'OTLR3', 'RLTO3' UNION ALL SELECT 4, 'Four', 'Test4', 'Line4', 'Record4', 'OTLR4', 'RLTO4' UNION ALL SELECT 5, 'Five', 'Test5', 'Line5', 'Record5', 'OTLR5', 'RLTO5' UNION ALL SELECT 6, 'Six', 'Test6', 'Line6', 'Record6', 'OTLR6', 'RLTO6' UNION ALL SELECT 7, 'Seven', 'Test6', 'Line6', 'Record6', 'OTLR7', 'RLTO7' UNION ALL SELECT 8, 'Eight', 'Test8', 'Line8', 'Record8', 'OTLR8', 'RLTO8' INSERT INTO @Table2 (ID_T2, Col1, C2, C3, C4, Col5, Col6) SELECT 10, 'Ten', 'Test1', 'Line1', 'Record1', 'OTLR10', 'RLTO10' UNION ALL SELECT 20, 'Twenty', 'Test2', 'Line2', 'Record2', 'OTLR20', 'RLTO20' UNION ALL SELECT 30, 'Thirty', 'Test3', 'Line3', 'Record3', 'OTLR30', 'RLTO30' UNION ALL SELECT 40, 'Forty', 'Test4', 'Line4', 'Record4', 'OTLR40', 'RLTO40' UNION ALL SELECT 50, 'Fifty', 'Test5', 'Line5', 'Record5', 'OTLR50', 'RLTO50' UNION ALL SELECT 80, 'Eighty', 'Test80', 'Line80', 'Record80', 'OTLR80', 'RLTO80' UNION ALL SELECT 90, 'Ninety', 'Test90', 'Line90', 'Record90', 'OTLR90', 'RLTO90' SELECT * FROM @Table1 SELECT * FROM @Table2
Now, C2, C3, and C4 can have either unique or duplicate values in Table1 and Table2.
I am trying to get three outputs.
Output 1 will have ONLY the records from Table 1 which has same C2, C3, and C4 columns' values in table 2 with Duplicate marked as 1/0 in Duplicate_SameTable
Output 2 will have ONLY the records from Table 1 which has same C2, C3, and C4 columns' values in table 2 with Duplicate marked as 1/0 in Duplicate_PrimaryTable
Output 3 will have the records from Table 1 and Tabl2 2 which has same C2, C3, and C4 columns' values with Duplicate marked as 1/0 in Duplicate_BothTables.
I can get the Output 1 from the following query.
SELECT *, CASE WHEN COUNT(*) OVER (PARTITION BY C2, C3, C4) > 1 THEN 1 ELSE 0 END AS Duplicate_SameTable FROM @Table1 ORDER BY ID_T1 ASC
Output 2
SELECT B.ID, B.Col1, B.C2, B.C3, B.C4, B.Col5, B.Col6, CASE WHEN C.Duplicate_SameTable = 1 THEN 0 ELSE B.Duplicate_BothTables END AS Duplicate_PrimaryTable FROM ( SELECT ID, Col1, C2, C3, C4, Col5, Col6, CASE WHEN COUNT(*) OVER (PARTITION BY C2, C3, C4) > 1 THEN 1 ELSE 0 END AS Duplicate_BothTables FROM ( SELECT ID_T1 AS ID, Col1, C2, C3, C4, Col5, Col6 FROM @Table1 UNION SELECT ID_T2 AS ID, Col1, C2, C3, C4, Col5, Col6 FROM @Table2) A ) B INNER JOIN (SELECT *, CASE WHEN COUNT(*) OVER (PARTITION BY C2, C3, C4) > 1 THEN 1 ELSE 0 END AS Duplicate_SameTable FROM @Table1) C ON B.ID = C.ID_T1
Output 3
SELECT B.ID, B.Col1, B.C2, B.C3, B.C4, B.Col5, B.Col6, CASE WHEN C.Duplicate_SameTable = 1 THEN 0 ELSE B.Duplicate_BothTables END AS Duplicate_PrimaryTable FROM ( SELECT ID, Col1, C2, C3, C4, Col5, Col6, CASE WHEN COUNT(*) OVER (PARTITION BY C2, C3, C4) > 1 THEN 1 ELSE 0 END AS Duplicate_BothTables FROM ( SELECT ID_T1 AS ID, Col1, C2, C3, C4, Col5, Col6 FROM @Table1 UNION SELECT ID_T2 AS ID, Col1, C2, C3, C4, Col5, Col6 FROM @Table2) A ) B LEFT JOIN (SELECT *, CASE WHEN COUNT(*) OVER (PARTITION BY C2, C3, C4) > 1 THEN 1 ELSE 0 END AS Duplicate_SameTable FROM @Table1) C ON B.ID = C.ID_T1 ORDER BY B.ID
I am wondering how to go about getting Output 2 and Output 3 more efficiently.
One way I can think of doing is Union Table 1 and Table 2 and then run above query. Or is there a better way of doing this as real tables will have millions of records and doing UNION and then applying above query might take much longer.
Thank you