Files
zoneminder/scripts/zmstats.pl.in
Isaac Connor 673d5a9336 fix: chunk zmstats bucket-prune DELETE IN-list to avoid packet-size limits
The DELETE WHERE EventId IN (?,?,...) is intentional: it locks each row
via the primary key, keeping the lock range minimal and preserving the
canonical lock order that this PR's deadlock fix relies on. But a single
IN-list with tens of thousands of placeholders (Events_Month after weeks
of accumulation) can hit max_allowed_packet and max_prepared_stmt_count.

Split the EventId list into 1000-row batches and loop. PK-based locking
is preserved; SQL/packet size stays bounded. Switching to a predicate-
based DELETE would re-introduce range locks on the bucket index and
undo the deadlock work.
2026-05-20 08:50:31 -04:00

357 lines
15 KiB
Perl

#!@PERL_EXECUTABLE@ -wT
use strict;
use warnings;
use bytes;
# ==========================================================================
#
# These are the elements you can edit to suit your installation
#
# ==========================================================================
use constant START_DELAY => 30; # To give everything else time to start
# ==========================================================================
#
# Don't change anything below here
#
# ==========================================================================
@EXTRA_PERL_LIB@
use ZoneMinder;
use DBI;
use Sys::MemInfo qw(totalmem freemem totalswap freeswap);
use ZoneMinder::Server;
$| = 1;
$ENV{PATH} = '/bin:/usr/bin:/usr/local/bin';
$ENV{SHELL} = '/bin/sh' if exists $ENV{SHELL};
delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
logInit();
logSetSignal();
my $zm_terminate = 0;
sub TermHandler {
Info('Received TERM, exiting');
$zm_terminate = 1;
}
$SIG{TERM} = \&TermHandler;
$SIG{INT} = \&TermHandler;
Info('Stats Daemon starting in '.START_DELAY.' seconds');
sleep(START_DELAY);
my $dbh = zmDbConnect();
$dbh->do('SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED');
my $server = new ZoneMinder::Server($Config{ZM_SERVER_ID});
while (!$zm_terminate) {
while ( ! ( $dbh and $dbh->ping() ) ) {
Info('Reconnecting to db');
if ( !($dbh = zmDbConnect()) ) {
#What we do here is not that important, so just skip this interval
sleep($Config{ZM_STATS_UPDATE_INTERVAL});
}
}
my @cpuload = $server->CpuLoad();
Debug("Cpuload: @cpuload");
my ($user_percent, $nice_percent, $sys_percent, $idle_percent, $usage_percent) = $server->CpuUsage();
if ($server->Id()) {
my $in_transaction = ZoneMinder::Database::start_transaction($dbh);
$server->lock_and_load(); # get fresh other values
if ($_=$server->save({
CpuLoad=>$cpuload[0],
TotalMem=>&totalmem, FreeMem=>&freemem, TotalSwap=>&totalswap, FreeSwap=>&freeswap,
CpuUserPercent=>$user_percent, CpuNicePercent=>$nice_percent, CpuSystemPercent=>$sys_percent, CpuIdlePercent=>$idle_percent, CpuUsagePercent=>$usage_percent,
})) {
Error('Failed Updating status of Server record for Id='.$server->Id().': '.$dbh->errstr());
}
ZoneMinder::Database::end_transaction($dbh, $in_transaction);
}
zmDbDo('INSERT INTO Server_Stats (ServerId, TimeStamp, CpuLoad, CpuUserPercent, CpuNicePercent, CpuSystemPercent, CpuIdlePercent, CpuUsagePercent, TotalMem, FreeMem, TotalSwap, FreeSwap) VALUES (?,NOW(),?,?,?,?,?,?,?,?,?,?)',
($Config{ZM_SERVER_ID} ? $Config{ZM_SERVER_ID} : 0),
$cpuload[0], $user_percent, $nice_percent, $sys_percent, $idle_percent, $usage_percent,
&totalmem, &freemem, &totalswap, &freeswap);
{
my $rows = zmDbDo('DELETE FROM `Server_Stats` WHERE `TimeStamp` < now() - interval 1 DAY LIMIT 100');
Debug("Deleted $rows Server Stats table entries by time");
}
# Clear out statuses for Monitors that aren't updating themselves.
my $monitor_ids = $dbh->selectcol_arrayref('SELECT MonitorId FROM Monitor_Status WHERE UpdatedOn < timestamp(DATE_SUB(NOW(), INTERVAL 1 MINUTE))');
zmDbDo('DELETE FROM Monitor_Status WHERE MonitorId IN ('.join(',', map { '?' } @$monitor_ids).')', @$monitor_ids) if $monitor_ids and @$monitor_ids;
# Prune aged rows from Events_Hour/Day/Week/Month and resync Event_Summaries
# in one transaction.
#
# The resync MUST NOT use a multi-table UPDATE that joins Event_Summaries to
# the bucket tables: a multi-table UPDATE takes S-locks on the joined rows
# and holds them to TX commit *regardless of isolation level*, which
# deadlocks against event_update_trigger / event_delete_trigger holding
# X-locks on those same bucket rows. Snapshot the bucket aggregates first
# via plain SELECT (consistent read at RC -> no locks), then UPDATE
# Event_Summaries one row at a time using the snapshotted values.
#
# READ COMMITTED is still set for the bucket DELETE range scans, so they
# don't take next-key/gap locks against concurrent filter deletes / zma
# trigger updates on adjacent EventIds.
#
# Atomicity tradeoff: between the per-bucket aggregate SELECT and the
# per-monitor UPDATE, a concurrent trigger writer (zma/zmc/Event::delete)
# can adjust Event_Summaries via the canonical lock chain. Our subsequent
# UPDATE will overwrite that adjustment with our older snapshot. This is
# intentional and safe: the bucket triggers keep ES drift bounded between
# zmstats passes, and any drift introduced by this race is corrected on
# the next pass. Locking ES before the snapshot would invert the canonical
# order and re-introduce the deadlock cycle this rewrite eliminated.
{
my $attempt = 0;
my $max_attempts = 5;
while (1) {
$attempt++;
# SET TRANSACTION ... applies only to the next transaction, so it must
# be issued before begin_work and re-issued on each retry. Use
# $dbh->do directly, NOT zmDbDo: zmDbDo's success Debug would write to
# the Logs table on this same $dbh, and that INSERT would become the
# "next transaction" that consumes the isolation directive — silently
# dropping our prune+resync TX back to the default.
$dbh->do('SET TRANSACTION ISOLATION LEVEL READ COMMITTED');
$dbh->begin_work();
my $err = 0;
my $errstr; # captured before rollback() — rollback can clear errstr
my %touched_monitors; # MonitorIds whose buckets we just modified
# Chunk size for DELETE WHERE EventId IN (...) — keeps each DELETE
# well under max_allowed_packet / max_prepared_stmt_count on installs
# where Events_Month has accumulated tens of thousands of aged rows,
# while preserving PK-based per-row locking (DELETE by predicate would
# range-lock the bucket index and re-introduce the lock-ordering
# inversions this rewrite was meant to eliminate).
my $delete_chunk = 1000;
foreach my $bucket (
['Events_Hour', '1 hour'],
['Events_Day', '1 day'],
['Events_Week', '1 week'],
['Events_Month', '1 month'],
) {
my ($table, $interval) = @$bucket;
my $rows = $dbh->selectall_arrayref(
"SELECT EventId, MonitorId FROM $table WHERE StartDateTime < DATE_SUB(NOW(), INTERVAL $interval)"
);
$err = $dbh->err() // 0;
if ($err) { $errstr = $dbh->errstr() // ''; last; }
next if !$rows or !@$rows;
my @event_ids = map { $_->[0] } @$rows;
$touched_monitors{$_->[1]} = 1 for @$rows;
for (my $i = 0; $i < @event_ids; $i += $delete_chunk) {
my $end = $i + $delete_chunk - 1;
$end = $#event_ids if $end > $#event_ids;
my @batch = @event_ids[$i .. $end];
zmDbDo(
"DELETE FROM $table WHERE EventId IN (".join(',', map { '?' } @batch).')',
@batch
);
$err = $dbh->err() // 0;
last if $err;
}
if ($err) { $errstr = $dbh->errstr() // ''; last; }
}
# Only resync ES for monitors we actually touched in this cycle. If
# nothing was pruned, the bucket triggers maintain ES correctly between
# zmstats passes; zmaudit is the periodic deep-resync safety net.
# Restricting to touched monitors also avoids X-locking every ES row
# on every zmstats cycle (which would contend with the trigger writers
# this rewrite is meant to protect).
if (!$err and %touched_monitors) {
my @mids = sort { $a <=> $b } keys %touched_monitors;
my $placeholders = join(',', map { '?' } @mids);
# Snapshot the per-monitor bucket aggregates for the touched monitors
# only. Plain SELECT under RC is a consistent read and takes no row
# locks, so this can't deadlock with the trigger writers.
my %agg;
$agg{$_} ||= {} for @mids; # seed so monitors with zero rows still get zeroed
foreach my $bucket (
['Events_Hour', 'h'],
['Events_Day', 'd'],
['Events_Week', 'w'],
['Events_Month', 'm'],
) {
my ($table, $key) = @$bucket;
my $rows = $dbh->selectall_arrayref(
"SELECT MonitorId, COUNT(*), COALESCE(SUM(DiskSpace), 0) FROM $table".
" WHERE MonitorId IN ($placeholders) GROUP BY MonitorId",
undef, @mids
);
$err = $dbh->err() // 0;
if ($err) { $errstr = $dbh->errstr() // ''; last; }
for my $r (@$rows) {
$agg{$r->[0]}{$key.'_c'} = $r->[1];
$agg{$r->[0]}{$key.'_s'} = $r->[2];
}
}
# One UPDATE per touched monitor. The transaction at this point is
# still holding the bucket-row X-locks acquired by the earlier
# DELETEs and any ES X-locks the bucket DELETE triggers acquired as
# a cascade. Those were all acquired in the canonical order
# (buckets -> ES) so they don't conflict with the trigger writers.
# The new statement itself only X-locks the one ES row it targets
# and reads no other table, so it doesn't add any cross-table
# dependency that could form a new cycle — its lock acquisition
# continues in the same direction.
if (!$err) {
for my $mid (@mids) {
my $a = $agg{$mid};
zmDbDo(
'UPDATE Event_Summaries SET '.
'HourEvents=?, HourEventDiskSpace=?, '.
'DayEvents=?, DayEventDiskSpace=?, '.
'WeekEvents=?, WeekEventDiskSpace=?, '.
'MonthEvents=?, MonthEventDiskSpace=? '.
'WHERE MonitorId=?',
$a->{h_c} // 0, $a->{h_s} // 0,
$a->{d_c} // 0, $a->{d_s} // 0,
$a->{w_c} // 0, $a->{w_s} // 0,
$a->{m_c} // 0, $a->{m_s} // 0,
$mid
);
$err = $dbh->err() // 0;
if ($err) { $errstr = $dbh->errstr() // ''; last; }
}
}
}
if (!$err) {
$dbh->commit();
last;
}
$dbh->rollback();
if ($err != 1213 or $attempt >= $max_attempts) { # 1213 = ER_LOCK_DEADLOCK
Error("Event_Summaries prune+resync gave up after $attempt attempt(s): ".($errstr // ''));
last;
}
Debug("Deadlock during Event_Summaries prune+resync, attempt $attempt/$max_attempts");
select(undef, undef, undef, 0.05 * (1 << $attempt) + rand(0.05));
}
}
# Prune the Logs table if required (excluding AUDIT entries)
if ( $Config{ZM_LOG_DATABASE_LIMIT} ) {
my $audit_level = ZoneMinder::Logger::AUDIT;
if ( $Config{ZM_LOG_DATABASE_LIMIT} =~ /^\d+$/ ) {
# Number of rows
my $selectLogRowCountSql = 'SELECT count(*) AS `Rows` FROM `Logs` WHERE `Level` != ?';
my $selectLogRowCountSth = $dbh->prepare_cached( $selectLogRowCountSql )
or Fatal("Can't prepare '$selectLogRowCountSql': ".$dbh->errstr());
my $res = $selectLogRowCountSth->execute($audit_level)
or Fatal("Can't execute: ".$selectLogRowCountSth->errstr());
my $row = $selectLogRowCountSth->fetchrow_hashref();
my $logRows = $row->{Rows};
if ( $logRows > $Config{ZM_LOG_DATABASE_LIMIT} ) {
my $rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` != ? ORDER BY `TimeKey` ASC LIMIT ?', $audit_level, $logRows - $Config{ZM_LOG_DATABASE_LIMIT});
Debug('Deleted '.$rows.' log table entries by count') if defined $rows;
}
} else {
# Time of record
# 7 days is invalid. We need to remove the s
if ( $Config{ZM_LOG_DATABASE_LIMIT} =~ /^(.*)s$/ ) {
$Config{ZM_LOG_DATABASE_LIMIT} = $1;
}
my $rows;
do {
$rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` != ? AND `TimeKey` < unix_timestamp(now() - interval '.$Config{ZM_LOG_DATABASE_LIMIT}.') LIMIT 100', $audit_level);
Debug("Deleted $rows log table entries by time") if $rows;
} while ($rows and ($rows == 100) and !$zm_terminate);
}
} # end if ZM_LOG_DATABASE_LIMIT
# Prune AUDIT log entries separately with their own retention period
if ( $Config{ZM_LOG_AUDIT_DATABASE_LIMIT} ) {
my $audit_level = ZoneMinder::Logger::AUDIT;
my $audit_limit = $Config{ZM_LOG_AUDIT_DATABASE_LIMIT};
if ( $audit_limit =~ /^\d+$/ ) {
# Number of rows
my $sth = $dbh->prepare_cached('SELECT count(*) AS `Rows` FROM `Logs` WHERE `Level` = ?')
or Fatal("Can't prepare audit log count: ".$dbh->errstr());
my $res = $sth->execute($audit_level)
or Fatal("Can't execute audit log count: ".$sth->errstr());
my $row = $sth->fetchrow_hashref();
my $logRows = $row->{Rows};
if ( $logRows > $audit_limit ) {
my $rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` = ? ORDER BY `TimeKey` ASC LIMIT ?', $audit_level, $logRows - $audit_limit);
Debug('Deleted '.$rows.' audit log entries by count') if defined $rows;
}
} else {
# Time of record
$audit_limit =~ s/s$//;
my $rows;
do {
$rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` = ? AND `TimeKey` < unix_timestamp(now() - interval '.$audit_limit.') LIMIT 100', $audit_level);
Debug("Deleted $rows audit log entries by time") if $rows;
} while ($rows and ($rows == 100) and !$zm_terminate);
}
} # end if ZM_LOG_AUDIT_DATABASE_LIMIT
{
my $rows;
do {
# Delete any sessions that are more than a week old. Limiting to 100 because mysql sucks
$rows = zmDbDo('DELETE FROM Sessions WHERE access < ? LIMIT 100', time - $Config{ZM_COOKIE_LIFETIME});
Debug("Deleted $rows sessions") if $rows;
} while ($rows and ($rows == 100) and !$zm_terminate);
}
sleep($Config{ZM_STATS_UPDATE_INTERVAL});
} # end while (!$zm_terminate)
Info('Stats Daemon exiting');
exit();
1;
__END__
#
# ==========================================================================
#
# ZoneMinder WatchDog Script, $Date$, $Revision$
# Copyright (C) 2001-2008 Philip Coombes
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# ==========================================================================
=head1 NAME
zmstats.pl - ZoneMinder Stats Updating Script
=head1 SYNOPSIS
zmstats.pl
=head1 DESCRIPTION
This does background updating various stats in the db like event counts, diskspace, etc.
=cut