I am attempting to run an update that involves the following two tables. The indexes are listed below the table names, followed by the current number of records in the tables. The query contains come comments about results returned from the estimated execution
plan:
CREATE TABLE [PMR_ETL].[Quarterly_HospitalAffil_Work](
[ID] [int] IDENTITY(1,1) NOT NULL,
[group_key] [varchar](38) NULL,
[hospital_affiliation] [varchar](10) NULL,
[hospital_name] [varchar](50) NULL,
[prac1_key] [varchar](20) NULL,
[prac1_primary_address] [varchar](50) NULL,
[prac1_secondary_address] [varchar](50) NULL,
[prac1_city] [varchar](28) NULL,
[prac1_state] [varchar](2) NULL,
[prac1_zip] [varchar](5) NULL,
[prac1_zip4] [varchar](4) NULL,
[prac1_secondary_range] [varchar](50) NULL,
[prac_phone1] [varchar](10) NULL,
[IsNew] [bit] NULL,
[HasChanged] [bit] NULL,
[InsurerKey] [int] NULL,
[RowHashValue] [varbinary](20) NULL,
[KeyHashValue] [varbinary](20) NULL,
[SourceObjectKey] [int] NOT NULL,
[SourceDataKey] [int] NULL,
[SourceObjectInstanceKey] [int] NOT NULL,
[LoadInstanceID] [varchar](15) NOT NULL,
CONSTRAINT [PK_PMR_ETL_Quarterly_HospitalAffil_Work] PRIMARY KEY CLUSTERED
(
[ID] ASC
)ON [PRIMARY],
CONSTRAINT [AK_PMR_ETL_Quarterly_HospitalAffil_Work] UNIQUE NONCLUSTERED
(
[RowHashValue] ASC
)ON [PRIMARY]
) ON [PRIMARY]
647,955 records
CREATE TABLE [Physician].[SourceData](
[SourceDataKey] [int] IDENTITY(1,1) NOT NULL,
[InsurerKey] [int] NOT NULL,
[InitialSourceObjectInstanceKey] [int] NOT NULL,
[SourcePrimaryKeyHashValue] [varbinary](20) NOT NULL,
[SourcePrimaryKeyXml] [xml] NOT NULL,
[SourceRowCDCHashValue] [varbinary](20) NOT NULL,
[SourceRowXml] [xml] NULL,
[RowEffectiveDate] [smalldatetime] NOT NULL,
[RowEndDate] [smalldatetime] NOT NULL,
[AuditRowDate] [smalldatetime] NOT NULL,
[AuditRowProcessName] [varchar](100) NOT NULL,
[AuditRowUserName] [varchar](100) NOT NULL,
CONSTRAINT [PK_Physician_SourceData] PRIMARY KEY CLUSTERED
(
[SourceDataKey] ASC
)ON [PRIMARY],
CONSTRAINT [AK2_Physician_SourceData] UNIQUE NONCLUSTERED
(
[InsurerKey] ASC,
[SourcePrimaryKeyHashValue] ASC,
[SourceRowCDCHashValue] ASC
)ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
169,488,065 records
the query:
DECLARE @MinHashValue as varbinary(20),
@MaxHashValue as varbinary(20);
SELECT @MinHashValue = Min(RowHashValue), @MaxHashValue = MAX(RowHashValue)
FROM(
SELECT TOP (1000000) RowHashValue
FROM [PMR_ETL].[Quarterly_HospitalAffil_Work] w WITH(NOLOCK)
INNER JOIN [ProviderODS].[Physician].[SourceData] sd WITH(NOLOCK)
ON w.RowHashValue = sd.SourceRowCDCHashValue AND ISNULL(w.InsurerKey, 0) = sd.InsurerKey AND w.KeyHashValue = sd.SourcePrimaryKeyHashValue
WHEREw.SourceDataKey IS NULL) w;
WHILE @MinHashValue IS NOT NULL
BEGIN
UPDATEw
SETSourceDataKey = sd.SourceDataKey
FROM[PMR_ETL].[Quarterly_HospitalAffil_Work] w --(Clustered Index Update 21%) (Clustered index scan 2%)
INNER JOIN [ProviderODS].[Physician].[SourceData] sd --(Index Seek on AK2_Physician_SourceData 76%)
ON w.RowHashValue = sd.SourceRowCDCHashValue AND ISNULL(w.InsurerKey, 0) = sd.InsurerKey AND w.KeyHashValue = sd.SourcePrimaryKeyHashValue
WHEREw.SourceDataKey IS NULL AND w.RowHashValue BETWEEN @MinHashValue AND @MaxHashValue;
SELECT @MinHashValue = Min(RowHashValue), @MaxHashValue = MAX(RowHashValue)
FROM(
SELECT TOP (1000000) RowHashValue
FROM [PMR_ETL].[Quarterly_HospitalAffil_Work] w WITH(NOLOCK) --(Clustered Index Scan 99%)
INNER JOIN [ProviderODS].[Physician].[SourceData] sd WITH(NOLOCK)
ON w.RowHashValue = sd.SourceRowCDCHashValue AND ISNULL(w.InsurerKey, 0) = sd.InsurerKey AND w.KeyHashValue = sd.SourcePrimaryKeyHashValue
WHEREw.SourceDataKey IS NULL) w;
END
This update query has taken days to execute. What I am trying to do is to figure out ways to have this query run faster. The query is actually generated via dynamic sql from inside a stored procedure, and then executed.
Thank you for your help.
CREATE TABLE [PMR_ETL].[Quarterly_HospitalAffil_Work](
[ID] [int] IDENTITY(1,1) NOT NULL,
[group_key] [varchar](38) NULL,
[hospital_affiliation] [varchar](10) NULL,
[hospital_name] [varchar](50) NULL,
[prac1_key] [varchar](20) NULL,
[prac1_primary_address] [varchar](50) NULL,
[prac1_secondary_address] [varchar](50) NULL,
[prac1_city] [varchar](28) NULL,
[prac1_state] [varchar](2) NULL,
[prac1_zip] [varchar](5) NULL,
[prac1_zip4] [varchar](4) NULL,
[prac1_secondary_range] [varchar](50) NULL,
[prac_phone1] [varchar](10) NULL,
[IsNew] [bit] NULL,
[HasChanged] [bit] NULL,
[InsurerKey] [int] NULL,
[RowHashValue] [varbinary](20) NULL,
[KeyHashValue] [varbinary](20) NULL,
[SourceObjectKey] [int] NOT NULL,
[SourceDataKey] [int] NULL,
[SourceObjectInstanceKey] [int] NOT NULL,
[LoadInstanceID] [varchar](15) NOT NULL,
CONSTRAINT [PK_PMR_ETL_Quarterly_HospitalAffil_Work] PRIMARY KEY CLUSTERED
(
[ID] ASC
)ON [PRIMARY],
CONSTRAINT [AK_PMR_ETL_Quarterly_HospitalAffil_Work] UNIQUE NONCLUSTERED
(
[RowHashValue] ASC
)ON [PRIMARY]
) ON [PRIMARY]
647,955 records
CREATE TABLE [Physician].[SourceData](
[SourceDataKey] [int] IDENTITY(1,1) NOT NULL,
[InsurerKey] [int] NOT NULL,
[InitialSourceObjectInstanceKey] [int] NOT NULL,
[SourcePrimaryKeyHashValue] [varbinary](20) NOT NULL,
[SourcePrimaryKeyXml] [xml] NOT NULL,
[SourceRowCDCHashValue] [varbinary](20) NOT NULL,
[SourceRowXml] [xml] NULL,
[RowEffectiveDate] [smalldatetime] NOT NULL,
[RowEndDate] [smalldatetime] NOT NULL,
[AuditRowDate] [smalldatetime] NOT NULL,
[AuditRowProcessName] [varchar](100) NOT NULL,
[AuditRowUserName] [varchar](100) NOT NULL,
CONSTRAINT [PK_Physician_SourceData] PRIMARY KEY CLUSTERED
(
[SourceDataKey] ASC
)ON [PRIMARY],
CONSTRAINT [AK2_Physician_SourceData] UNIQUE NONCLUSTERED
(
[InsurerKey] ASC,
[SourcePrimaryKeyHashValue] ASC,
[SourceRowCDCHashValue] ASC
)ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
169,488,065 records
the query:
DECLARE @MinHashValue as varbinary(20),
@MaxHashValue as varbinary(20);
SELECT @MinHashValue = Min(RowHashValue), @MaxHashValue = MAX(RowHashValue)
FROM(
SELECT TOP (1000000) RowHashValue
FROM [PMR_ETL].[Quarterly_HospitalAffil_Work] w WITH(NOLOCK)
INNER JOIN [ProviderODS].[Physician].[SourceData] sd WITH(NOLOCK)
ON w.RowHashValue = sd.SourceRowCDCHashValue AND ISNULL(w.InsurerKey, 0) = sd.InsurerKey AND w.KeyHashValue = sd.SourcePrimaryKeyHashValue
WHEREw.SourceDataKey IS NULL) w;
WHILE @MinHashValue IS NOT NULL
BEGIN
UPDATEw
SETSourceDataKey = sd.SourceDataKey
FROM[PMR_ETL].[Quarterly_HospitalAffil_Work] w --(Clustered Index Update 21%) (Clustered index scan 2%)
INNER JOIN [ProviderODS].[Physician].[SourceData] sd --(Index Seek on AK2_Physician_SourceData 76%)
ON w.RowHashValue = sd.SourceRowCDCHashValue AND ISNULL(w.InsurerKey, 0) = sd.InsurerKey AND w.KeyHashValue = sd.SourcePrimaryKeyHashValue
WHEREw.SourceDataKey IS NULL AND w.RowHashValue BETWEEN @MinHashValue AND @MaxHashValue;
SELECT @MinHashValue = Min(RowHashValue), @MaxHashValue = MAX(RowHashValue)
FROM(
SELECT TOP (1000000) RowHashValue
FROM [PMR_ETL].[Quarterly_HospitalAffil_Work] w WITH(NOLOCK) --(Clustered Index Scan 99%)
INNER JOIN [ProviderODS].[Physician].[SourceData] sd WITH(NOLOCK)
ON w.RowHashValue = sd.SourceRowCDCHashValue AND ISNULL(w.InsurerKey, 0) = sd.InsurerKey AND w.KeyHashValue = sd.SourcePrimaryKeyHashValue
WHEREw.SourceDataKey IS NULL) w;
END
This update query has taken days to execute. What I am trying to do is to figure out ways to have this query run faster. The query is actually generated via dynamic sql from inside a stored procedure, and then executed.
Thank you for your help.