mirror of
https://github.com/ZoneMinder/zoneminder.git
synced 2026-06-22 20:50:06 -04:00
The DELETE WHERE EventId IN (?,?,...) is intentional: it locks each row via the primary key, keeping the lock range minimal and preserving the canonical lock order that this PR's deadlock fix relies on. But a single IN-list with tens of thousands of placeholders (Events_Month after weeks of accumulation) can hit max_allowed_packet and max_prepared_stmt_count. Split the EventId list into 1000-row batches and loop. PK-based locking is preserved; SQL/packet size stays bounded. Switching to a predicate- based DELETE would re-introduce range locks on the bucket index and undo the deadlock work.
357 lines
15 KiB
Perl
357 lines
15 KiB
Perl
#!@PERL_EXECUTABLE@ -wT
|
|
use strict;
|
|
use warnings;
|
|
use bytes;
|
|
|
|
# ==========================================================================
|
|
#
|
|
# These are the elements you can edit to suit your installation
|
|
#
|
|
# ==========================================================================
|
|
|
|
use constant START_DELAY => 30; # To give everything else time to start
|
|
|
|
# ==========================================================================
|
|
#
|
|
# Don't change anything below here
|
|
#
|
|
# ==========================================================================
|
|
|
|
@EXTRA_PERL_LIB@
|
|
use ZoneMinder;
|
|
use DBI;
|
|
use Sys::MemInfo qw(totalmem freemem totalswap freeswap);
|
|
use ZoneMinder::Server;
|
|
|
|
$| = 1;
|
|
|
|
$ENV{PATH} = '/bin:/usr/bin:/usr/local/bin';
|
|
$ENV{SHELL} = '/bin/sh' if exists $ENV{SHELL};
|
|
delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
|
|
|
|
logInit();
|
|
logSetSignal();
|
|
my $zm_terminate = 0;
|
|
sub TermHandler {
|
|
Info('Received TERM, exiting');
|
|
$zm_terminate = 1;
|
|
}
|
|
$SIG{TERM} = \&TermHandler;
|
|
$SIG{INT} = \&TermHandler;
|
|
|
|
Info('Stats Daemon starting in '.START_DELAY.' seconds');
|
|
sleep(START_DELAY);
|
|
|
|
my $dbh = zmDbConnect();
|
|
$dbh->do('SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED');
|
|
|
|
my $server = new ZoneMinder::Server($Config{ZM_SERVER_ID});
|
|
|
|
while (!$zm_terminate) {
|
|
while ( ! ( $dbh and $dbh->ping() ) ) {
|
|
Info('Reconnecting to db');
|
|
if ( !($dbh = zmDbConnect()) ) {
|
|
#What we do here is not that important, so just skip this interval
|
|
sleep($Config{ZM_STATS_UPDATE_INTERVAL});
|
|
}
|
|
}
|
|
|
|
my @cpuload = $server->CpuLoad();
|
|
Debug("Cpuload: @cpuload");
|
|
my ($user_percent, $nice_percent, $sys_percent, $idle_percent, $usage_percent) = $server->CpuUsage();
|
|
|
|
if ($server->Id()) {
|
|
my $in_transaction = ZoneMinder::Database::start_transaction($dbh);
|
|
$server->lock_and_load(); # get fresh other values
|
|
if ($_=$server->save({
|
|
CpuLoad=>$cpuload[0],
|
|
TotalMem=>&totalmem, FreeMem=>&freemem, TotalSwap=>&totalswap, FreeSwap=>&freeswap,
|
|
CpuUserPercent=>$user_percent, CpuNicePercent=>$nice_percent, CpuSystemPercent=>$sys_percent, CpuIdlePercent=>$idle_percent, CpuUsagePercent=>$usage_percent,
|
|
})) {
|
|
Error('Failed Updating status of Server record for Id='.$server->Id().': '.$dbh->errstr());
|
|
}
|
|
ZoneMinder::Database::end_transaction($dbh, $in_transaction);
|
|
}
|
|
|
|
zmDbDo('INSERT INTO Server_Stats (ServerId, TimeStamp, CpuLoad, CpuUserPercent, CpuNicePercent, CpuSystemPercent, CpuIdlePercent, CpuUsagePercent, TotalMem, FreeMem, TotalSwap, FreeSwap) VALUES (?,NOW(),?,?,?,?,?,?,?,?,?,?)',
|
|
($Config{ZM_SERVER_ID} ? $Config{ZM_SERVER_ID} : 0),
|
|
$cpuload[0], $user_percent, $nice_percent, $sys_percent, $idle_percent, $usage_percent,
|
|
&totalmem, &freemem, &totalswap, &freeswap);
|
|
|
|
{
|
|
my $rows = zmDbDo('DELETE FROM `Server_Stats` WHERE `TimeStamp` < now() - interval 1 DAY LIMIT 100');
|
|
Debug("Deleted $rows Server Stats table entries by time");
|
|
}
|
|
|
|
# Clear out statuses for Monitors that aren't updating themselves.
|
|
my $monitor_ids = $dbh->selectcol_arrayref('SELECT MonitorId FROM Monitor_Status WHERE UpdatedOn < timestamp(DATE_SUB(NOW(), INTERVAL 1 MINUTE))');
|
|
zmDbDo('DELETE FROM Monitor_Status WHERE MonitorId IN ('.join(',', map { '?' } @$monitor_ids).')', @$monitor_ids) if $monitor_ids and @$monitor_ids;
|
|
|
|
# Prune aged rows from Events_Hour/Day/Week/Month and resync Event_Summaries
|
|
# in one transaction.
|
|
#
|
|
# The resync MUST NOT use a multi-table UPDATE that joins Event_Summaries to
|
|
# the bucket tables: a multi-table UPDATE takes S-locks on the joined rows
|
|
# and holds them to TX commit *regardless of isolation level*, which
|
|
# deadlocks against event_update_trigger / event_delete_trigger holding
|
|
# X-locks on those same bucket rows. Snapshot the bucket aggregates first
|
|
# via plain SELECT (consistent read at RC -> no locks), then UPDATE
|
|
# Event_Summaries one row at a time using the snapshotted values.
|
|
#
|
|
# READ COMMITTED is still set for the bucket DELETE range scans, so they
|
|
# don't take next-key/gap locks against concurrent filter deletes / zma
|
|
# trigger updates on adjacent EventIds.
|
|
#
|
|
# Atomicity tradeoff: between the per-bucket aggregate SELECT and the
|
|
# per-monitor UPDATE, a concurrent trigger writer (zma/zmc/Event::delete)
|
|
# can adjust Event_Summaries via the canonical lock chain. Our subsequent
|
|
# UPDATE will overwrite that adjustment with our older snapshot. This is
|
|
# intentional and safe: the bucket triggers keep ES drift bounded between
|
|
# zmstats passes, and any drift introduced by this race is corrected on
|
|
# the next pass. Locking ES before the snapshot would invert the canonical
|
|
# order and re-introduce the deadlock cycle this rewrite eliminated.
|
|
{
|
|
my $attempt = 0;
|
|
my $max_attempts = 5;
|
|
while (1) {
|
|
$attempt++;
|
|
# SET TRANSACTION ... applies only to the next transaction, so it must
|
|
# be issued before begin_work and re-issued on each retry. Use
|
|
# $dbh->do directly, NOT zmDbDo: zmDbDo's success Debug would write to
|
|
# the Logs table on this same $dbh, and that INSERT would become the
|
|
# "next transaction" that consumes the isolation directive — silently
|
|
# dropping our prune+resync TX back to the default.
|
|
$dbh->do('SET TRANSACTION ISOLATION LEVEL READ COMMITTED');
|
|
$dbh->begin_work();
|
|
|
|
my $err = 0;
|
|
my $errstr; # captured before rollback() — rollback can clear errstr
|
|
my %touched_monitors; # MonitorIds whose buckets we just modified
|
|
# Chunk size for DELETE WHERE EventId IN (...) — keeps each DELETE
|
|
# well under max_allowed_packet / max_prepared_stmt_count on installs
|
|
# where Events_Month has accumulated tens of thousands of aged rows,
|
|
# while preserving PK-based per-row locking (DELETE by predicate would
|
|
# range-lock the bucket index and re-introduce the lock-ordering
|
|
# inversions this rewrite was meant to eliminate).
|
|
my $delete_chunk = 1000;
|
|
foreach my $bucket (
|
|
['Events_Hour', '1 hour'],
|
|
['Events_Day', '1 day'],
|
|
['Events_Week', '1 week'],
|
|
['Events_Month', '1 month'],
|
|
) {
|
|
my ($table, $interval) = @$bucket;
|
|
my $rows = $dbh->selectall_arrayref(
|
|
"SELECT EventId, MonitorId FROM $table WHERE StartDateTime < DATE_SUB(NOW(), INTERVAL $interval)"
|
|
);
|
|
$err = $dbh->err() // 0;
|
|
if ($err) { $errstr = $dbh->errstr() // ''; last; }
|
|
next if !$rows or !@$rows;
|
|
my @event_ids = map { $_->[0] } @$rows;
|
|
$touched_monitors{$_->[1]} = 1 for @$rows;
|
|
for (my $i = 0; $i < @event_ids; $i += $delete_chunk) {
|
|
my $end = $i + $delete_chunk - 1;
|
|
$end = $#event_ids if $end > $#event_ids;
|
|
my @batch = @event_ids[$i .. $end];
|
|
zmDbDo(
|
|
"DELETE FROM $table WHERE EventId IN (".join(',', map { '?' } @batch).')',
|
|
@batch
|
|
);
|
|
$err = $dbh->err() // 0;
|
|
last if $err;
|
|
}
|
|
if ($err) { $errstr = $dbh->errstr() // ''; last; }
|
|
}
|
|
|
|
# Only resync ES for monitors we actually touched in this cycle. If
|
|
# nothing was pruned, the bucket triggers maintain ES correctly between
|
|
# zmstats passes; zmaudit is the periodic deep-resync safety net.
|
|
# Restricting to touched monitors also avoids X-locking every ES row
|
|
# on every zmstats cycle (which would contend with the trigger writers
|
|
# this rewrite is meant to protect).
|
|
if (!$err and %touched_monitors) {
|
|
my @mids = sort { $a <=> $b } keys %touched_monitors;
|
|
my $placeholders = join(',', map { '?' } @mids);
|
|
|
|
# Snapshot the per-monitor bucket aggregates for the touched monitors
|
|
# only. Plain SELECT under RC is a consistent read and takes no row
|
|
# locks, so this can't deadlock with the trigger writers.
|
|
my %agg;
|
|
$agg{$_} ||= {} for @mids; # seed so monitors with zero rows still get zeroed
|
|
foreach my $bucket (
|
|
['Events_Hour', 'h'],
|
|
['Events_Day', 'd'],
|
|
['Events_Week', 'w'],
|
|
['Events_Month', 'm'],
|
|
) {
|
|
my ($table, $key) = @$bucket;
|
|
my $rows = $dbh->selectall_arrayref(
|
|
"SELECT MonitorId, COUNT(*), COALESCE(SUM(DiskSpace), 0) FROM $table".
|
|
" WHERE MonitorId IN ($placeholders) GROUP BY MonitorId",
|
|
undef, @mids
|
|
);
|
|
$err = $dbh->err() // 0;
|
|
if ($err) { $errstr = $dbh->errstr() // ''; last; }
|
|
for my $r (@$rows) {
|
|
$agg{$r->[0]}{$key.'_c'} = $r->[1];
|
|
$agg{$r->[0]}{$key.'_s'} = $r->[2];
|
|
}
|
|
}
|
|
|
|
# One UPDATE per touched monitor. The transaction at this point is
|
|
# still holding the bucket-row X-locks acquired by the earlier
|
|
# DELETEs and any ES X-locks the bucket DELETE triggers acquired as
|
|
# a cascade. Those were all acquired in the canonical order
|
|
# (buckets -> ES) so they don't conflict with the trigger writers.
|
|
# The new statement itself only X-locks the one ES row it targets
|
|
# and reads no other table, so it doesn't add any cross-table
|
|
# dependency that could form a new cycle — its lock acquisition
|
|
# continues in the same direction.
|
|
if (!$err) {
|
|
for my $mid (@mids) {
|
|
my $a = $agg{$mid};
|
|
zmDbDo(
|
|
'UPDATE Event_Summaries SET '.
|
|
'HourEvents=?, HourEventDiskSpace=?, '.
|
|
'DayEvents=?, DayEventDiskSpace=?, '.
|
|
'WeekEvents=?, WeekEventDiskSpace=?, '.
|
|
'MonthEvents=?, MonthEventDiskSpace=? '.
|
|
'WHERE MonitorId=?',
|
|
$a->{h_c} // 0, $a->{h_s} // 0,
|
|
$a->{d_c} // 0, $a->{d_s} // 0,
|
|
$a->{w_c} // 0, $a->{w_s} // 0,
|
|
$a->{m_c} // 0, $a->{m_s} // 0,
|
|
$mid
|
|
);
|
|
$err = $dbh->err() // 0;
|
|
if ($err) { $errstr = $dbh->errstr() // ''; last; }
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!$err) {
|
|
$dbh->commit();
|
|
last;
|
|
}
|
|
|
|
$dbh->rollback();
|
|
if ($err != 1213 or $attempt >= $max_attempts) { # 1213 = ER_LOCK_DEADLOCK
|
|
Error("Event_Summaries prune+resync gave up after $attempt attempt(s): ".($errstr // ''));
|
|
last;
|
|
}
|
|
Debug("Deadlock during Event_Summaries prune+resync, attempt $attempt/$max_attempts");
|
|
select(undef, undef, undef, 0.05 * (1 << $attempt) + rand(0.05));
|
|
}
|
|
}
|
|
|
|
# Prune the Logs table if required (excluding AUDIT entries)
|
|
if ( $Config{ZM_LOG_DATABASE_LIMIT} ) {
|
|
my $audit_level = ZoneMinder::Logger::AUDIT;
|
|
if ( $Config{ZM_LOG_DATABASE_LIMIT} =~ /^\d+$/ ) {
|
|
# Number of rows
|
|
my $selectLogRowCountSql = 'SELECT count(*) AS `Rows` FROM `Logs` WHERE `Level` != ?';
|
|
my $selectLogRowCountSth = $dbh->prepare_cached( $selectLogRowCountSql )
|
|
or Fatal("Can't prepare '$selectLogRowCountSql': ".$dbh->errstr());
|
|
my $res = $selectLogRowCountSth->execute($audit_level)
|
|
or Fatal("Can't execute: ".$selectLogRowCountSth->errstr());
|
|
my $row = $selectLogRowCountSth->fetchrow_hashref();
|
|
my $logRows = $row->{Rows};
|
|
if ( $logRows > $Config{ZM_LOG_DATABASE_LIMIT} ) {
|
|
my $rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` != ? ORDER BY `TimeKey` ASC LIMIT ?', $audit_level, $logRows - $Config{ZM_LOG_DATABASE_LIMIT});
|
|
Debug('Deleted '.$rows.' log table entries by count') if defined $rows;
|
|
}
|
|
} else {
|
|
# Time of record
|
|
|
|
# 7 days is invalid. We need to remove the s
|
|
if ( $Config{ZM_LOG_DATABASE_LIMIT} =~ /^(.*)s$/ ) {
|
|
$Config{ZM_LOG_DATABASE_LIMIT} = $1;
|
|
}
|
|
my $rows;
|
|
do {
|
|
$rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` != ? AND `TimeKey` < unix_timestamp(now() - interval '.$Config{ZM_LOG_DATABASE_LIMIT}.') LIMIT 100', $audit_level);
|
|
Debug("Deleted $rows log table entries by time") if $rows;
|
|
} while ($rows and ($rows == 100) and !$zm_terminate);
|
|
}
|
|
} # end if ZM_LOG_DATABASE_LIMIT
|
|
|
|
# Prune AUDIT log entries separately with their own retention period
|
|
if ( $Config{ZM_LOG_AUDIT_DATABASE_LIMIT} ) {
|
|
my $audit_level = ZoneMinder::Logger::AUDIT;
|
|
my $audit_limit = $Config{ZM_LOG_AUDIT_DATABASE_LIMIT};
|
|
if ( $audit_limit =~ /^\d+$/ ) {
|
|
# Number of rows
|
|
my $sth = $dbh->prepare_cached('SELECT count(*) AS `Rows` FROM `Logs` WHERE `Level` = ?')
|
|
or Fatal("Can't prepare audit log count: ".$dbh->errstr());
|
|
my $res = $sth->execute($audit_level)
|
|
or Fatal("Can't execute audit log count: ".$sth->errstr());
|
|
my $row = $sth->fetchrow_hashref();
|
|
my $logRows = $row->{Rows};
|
|
if ( $logRows > $audit_limit ) {
|
|
my $rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` = ? ORDER BY `TimeKey` ASC LIMIT ?', $audit_level, $logRows - $audit_limit);
|
|
Debug('Deleted '.$rows.' audit log entries by count') if defined $rows;
|
|
}
|
|
} else {
|
|
# Time of record
|
|
$audit_limit =~ s/s$//;
|
|
my $rows;
|
|
do {
|
|
$rows = zmDbDo('DELETE low_priority FROM `Logs` WHERE `Level` = ? AND `TimeKey` < unix_timestamp(now() - interval '.$audit_limit.') LIMIT 100', $audit_level);
|
|
Debug("Deleted $rows audit log entries by time") if $rows;
|
|
} while ($rows and ($rows == 100) and !$zm_terminate);
|
|
}
|
|
} # end if ZM_LOG_AUDIT_DATABASE_LIMIT
|
|
|
|
{
|
|
my $rows;
|
|
do {
|
|
# Delete any sessions that are more than a week old. Limiting to 100 because mysql sucks
|
|
$rows = zmDbDo('DELETE FROM Sessions WHERE access < ? LIMIT 100', time - $Config{ZM_COOKIE_LIFETIME});
|
|
Debug("Deleted $rows sessions") if $rows;
|
|
} while ($rows and ($rows == 100) and !$zm_terminate);
|
|
}
|
|
|
|
sleep($Config{ZM_STATS_UPDATE_INTERVAL});
|
|
} # end while (!$zm_terminate)
|
|
|
|
Info('Stats Daemon exiting');
|
|
exit();
|
|
1;
|
|
__END__
|
|
|
|
#
|
|
# ==========================================================================
|
|
#
|
|
# ZoneMinder WatchDog Script, $Date$, $Revision$
|
|
# Copyright (C) 2001-2008 Philip Coombes
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
#
|
|
# ==========================================================================
|
|
|
|
=head1 NAME
|
|
|
|
zmstats.pl - ZoneMinder Stats Updating Script
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
zmstats.pl
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This does background updating various stats in the db like event counts, diskspace, etc.
|
|
|
|
=cut
|