Wednesday, June 17, 2020

Monitor SQL Always On replication Issues


To monitor and get email alert when there is issue with SQL Always On Replication
This will check every 5 minutes for delay and if it is longer than 30, it will trigger email alert.

Steps:
1. Create  Store procedure as below.
2. Create Schedule job as below


1. Procedure:

USE [MyDbaDB]
GO
/****** Object:  StoredProcedure [dbo].[uspDBMon_rptGetAOAGLatency]    Script Date: 6/17/2017 8:49:48 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

ALTER PROCEDURE [dbo].[uspDBMon_rptGetAOAGLatency]
@Database_Name SYSNAME = 'My_Database',
@Mail BIT = 0,
@Mail_Subject VARCHAR(2000) = 'AOAG latency between replicas',
@Mail_Recipients VARCHAR(MAX) = 'youremail@mydomain.net',
@Delay_Minutes TINYINT = 10
AS
/*

Purpose : This Stored Procedure is used to report latency in timestamps for AlwaysOn Synchronization
EXEC [dbo].[uspDBMon_rptGetAOAGLatency]
@Database_Name = 'My_Database',
@Mail = 1,
@Mail_Subject = 'MY_AOAG latency between replicas',
@Mail_Recipients = 'youremail@yourdomain.net',
@Delay_Minutes = 0

*/

SET NOCOUNT ON
SET CONCAT_NULL_YIELDS_NULL OFF

--Variable declarations
DECLARE @tableHTML VARCHAR(MAX)
DECLARE @Output TINYINT

SELECT TOP 1 @Output = 1
FROM [sys].[dm_hadr_database_replica_states] AS drs
INNER JOIN [sys].[availability_databases_cluster] AS adc
ON drs.group_id = adc.group_id
AND drs.group_database_id = adc.group_database_id
WHERE adc.[database_name] = @Database_Name
AND drs.[is_local] = 0
AND (drs.[synchronization_health] <> 2 OR
DATEDIFF(mi, drs.[last_sent_time], GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_received_time], GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_hardened_time], GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_redone_time] , GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_commit_time] , GETDATE()) >= @Delay_Minutes)

--Generate the output only on the Primary Replica
IF ([sys].[fn_hadr_is_primary_replica] (@Database_Name)=1)
BEGIN
INSERT INTO [dbo].[tblDBMon_DM_HADR_Database_Replica_States]
( [Date_Captured],
[Server_Name],
[Database_Name],
[AG_Name],
[Synchronization_State],
[Synchronization_Health],
[Last_Sent_Time],
[Last_Sent_Time_Delay(Mins)],
[Last_Received_Time],
[Last_Received_Time_Delay(Mins)],
[Last_Hardened_Time],
[Last_Hardened_Time_Delay(Mins)],
[Last_Redone_Time],
[Last_Redone_Time_Delay(Mins)],
[Last_Commit_Time],
[Last_Commit_Time_Delay(Mins)],
[Log_Send_Queue_Size(KB)],
[Redo_Queue_Size(KB)])
SELECT GETDATE(),
ar.[replica_server_name] AS [Server_Name],
adc.[database_name] AS [Database_Name],
ag.[name] AS [AG_Name],
drs.[synchronization_state_desc] AS [Synchronization_State],
drs.[synchronization_health_desc] AS [Synchronization_Health],
drs.[last_sent_time] AS [Last_Sent_Time],
DATEDIFF(mi, drs.[last_sent_time], GETDATE()) AS [Last_Sent_Time_Delay(Mins)],
drs.[last_received_time] AS [Last_Received_Time],
DATEDIFF(mi, drs.[last_received_time], GETDATE()) AS [Last_Received_Time_Delay(Mins)],
drs.[last_hardened_time] AS [Last_Hardened_Time],
DATEDIFF(mi, drs.[last_hardened_time], GETDATE()) AS [Last_Hardened_Time_Delay(Mins)],
drs.[last_redone_time] AS [Last_Redone_Time],
DATEDIFF(mi, drs.[last_redone_time] , GETDATE()) AS [Last_Redone_Time_Delay(Mins)],
drs.[last_commit_time] AS [Last_Commit_Time],
DATEDIFF(mi, drs.[last_commit_time] , GETDATE()) AS [Last_Commit_Time_Delay(Mins)],
drs.[log_send_queue_size] AS [Log_Send_Queue_Size(KB)],
drs.[redo_queue_size] AS [Redo_Queue_Size(KB)]
FROM [sys].[dm_hadr_database_replica_states] AS drs
INNER JOIN [sys].[availability_databases_cluster] AS adc
ON drs.group_id = adc.group_id
AND drs.group_database_id = adc.group_database_id
INNER JOIN [sys].[availability_groups] AS ag
ON ag.group_id = drs.group_id
INNER JOIN [sys].[availability_replicas] AS ar
ON drs.group_id = ar.group_id
AND drs.replica_id = ar.replica_id
WHERE adc.[database_name] = @Database_Name
AND drs.[is_local] = 0
ORDER BY
ar.[replica_server_name],
adc.[database_name]

IF (@Mail = 1 AND @Mail_Recipients IS NOT NULL AND @Output = 1)
BEGIN
SET @tableHTML =
N'<H3>AlwaysOn Availability Group Latency Report</H3>' +
N'<table border="1";padding-left:50px>' +
N'<div style="margin-left:500px"></div>' +
N'<tr><th>Server_Name</th><th>Database_Name</th>' +
N'<th>Synchronization_State</th><th>Synchronization_Health</th>' +
N'<th>Last_Sent_Delay(Mins)</th>' +
N'<th>Last_Received_Delay(Mins)</th>' +
N'<th>Last_Hardened_Delay(Mins)</th>' +
N'<th>Last_Redone_Delay(Mins)</th>' +
N'<th>Last_Commit_Delay(Mins)</th></tr>' +
CAST ( ( SELECT
td = ar.[replica_server_name], '',
td = adc.[database_name], '',
td = drs.[synchronization_state_desc], '',
td = drs.[synchronization_health_desc], '',
td = DATEDIFF(mi, drs.[last_sent_time], GETDATE()), '',
td = DATEDIFF(mi, drs.[last_received_time], GETDATE()), '',
td = DATEDIFF(mi, drs.[last_hardened_time], GETDATE()), '',
td = DATEDIFF(mi, drs.[last_redone_time], GETDATE()), '',
td = DATEDIFF(mi, drs.[last_commit_time], GETDATE())
FROM [sys].[dm_hadr_database_replica_states] AS drs
INNER JOIN [sys].[availability_databases_cluster] AS adc
ON drs.group_id = adc.group_id
AND drs.group_database_id = adc.group_database_id
INNER JOIN [sys].[availability_replicas] AS ar
ON drs.group_id = ar.group_id
AND drs.replica_id = ar.replica_id
WHERE adc.[database_name] = @Database_Name
AND drs.[is_local] = 0
AND (drs.[synchronization_health] <> 2 OR
DATEDIFF(mi, drs.[last_sent_time], GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_received_time], GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_hardened_time], GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_redone_time] , GETDATE()) >= @Delay_Minutes OR
DATEDIFF(mi, drs.[last_commit_time] , GETDATE()) >= @Delay_Minutes)
ORDER BY
ar.[replica_server_name],
adc.[database_name]
FOR XML PATH('tr'), TYPE) AS NVARCHAR(MAX)) + N'</table>'

EXEC msdb.dbo.sp_send_dbmail @recipients = @Mail_Recipients,
@subject = @Mail_Subject,
@profile_name = N'MyDB Mail',
@body = @tableHTML,
@body_format = 'HTML'
END
ELSE
BEGIN
PRINT 'No latency identified beyond the threshold specified.'
END

DELETE TOP (10000)
FROM [dbo].[tblDBMon_DM_HADR_Database_Replica_States]
WHERE [Date_Captured] < GETDATE() - 90
END




2. Schedule Task:
Goto-Steps-Create New-PasteBelow

EXEC [dbo].[uspDBMon_rptGetAOAGLatency] 
@Database_Name = 'My_Database',
@Mail = 1,
@Mail_Subject = ': AOAG latency between replicas',
@Mail_Recipients = 'youremail@yourdomain.net
@Delay_Minutes = 30

No comments:

Post a Comment