If you have ever used replication in SQL Server, you know that sometimes network and/or database problems can cause your replication agents to go into a failed state. You can configure SQL Server to send an email when this happens, but that is about the extent of the alert you will get. Unfortunately, if you miss the alert and your agent stays in a failed state for too long, you have to reinitialize all the subscribers. Not Anymore! The following lua script will go into alarm when any replication agent goes into a failed state. You can then set up an action list to send reminder alerts so that you can correct the problem before a replication reinitialization is required!

CODE
-----------------------------------------------------------------
-- Name: SQLReplicationError.lua
-- Author: Kevin Prestage
-- Required INM version: 3.3
-- Version: 1.0
-- Date: 2007-12-13
-- Description: This script will monitor Distribution,
-- Merge, Snapshot and LogReader agents on a
-- server and alert you of any problems
--
-- Arguments:
-- 1) Server Name
-----------------------------------------------------------------


function getSQL(s)
sql =

"select distinct " ..
"    case when a.runstatus = 1 then 'Start' " ..
"         when a.runstatus = 2 then 'Succeed' " ..
"         when a.runstatus = 3 then 'In progress' " ..
"         when a.runstatus = 4 then 'Idle' " ..
"         when a.runstatus = 5 then 'Retry' " ..
"         when a.runstatus = 6 then 'Fail' " ..
"         else 'Unknown' end as current_status, " ..
"    a.runstatus, " ..
"    a.start_time, " ..
"    a.time, " ..
"    a.comments, " ..
"    a.error_id, " ..
"    x.name, " ..
"    '" .. s .. "' " ..
" from dbo.MS" .. s .. "_history a  " ..
" join dbo.MS" .. s .. "_history b on b.agent_id = a.agent_id " ..
" join  dbo.MS" .. s .. "_agents x on x.id = a.agent_id " ..
" where a.time = (select max(time) from MS" .. s .. "_history where agent_id = a.agent_id) ";

return sql;

end










servername = GetArgument(0);

db = TLuaDB();
con = servername .. "@distribution";
print(con);

sql = getSQL("Distribution");
sql = sql .. "union all ";
sql = sql .. getSQL("LogReader");
sql = sql .. "union all ";
sql = sql .. getSQL("Snapshot");
sql = sql .. "union all ";
sql = sql .. getSQL("Merge");



x = db:Connect(con,TLuaDB.CLIENT_SQLSERVER);
result = "";
time = "";
status = "";
statuscode = "";
comments = "";
errorid = 0;
type = "";
name = "";

if (x == true) then
    x = db:Execute(sql);
    if (db:ResultAvilable() == true) then
        while (db:NextRow() == true) do
            time = db:GetCol(4);
            status = db:GetCol(1);
            statuscode  = db:GetCol(2);
            comments = db:GetCol(5);
            errorid = db:GetCol(6);
            name = db:GetCol(7);
            type = db:GetCol(8);
        
            result = result .. name .. "\t" .. type .. "\t" .. status.."\t" .. time .. "\t" .. comments .. "\t" .. errorid .. "\n";
            if (status == "Fail") then
                x = false;
            end
            
        end


    else
        x = false;
        result = "No Agents Found.";
    end
    


else
    result = db:GetErrorDescription();
end





SetExitStatus(result, x);