Add retry logic with exponential backoff

Co-authored-by: connortechnology <925519+connortechnology@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-12-18 17:23:39 +00:00
parent f79bc2b895
commit 74414ddecc
2 changed files with 59 additions and 2 deletions

View File

@@ -347,7 +347,9 @@ class Monitor : public std::enable_shared_from_this<Monitor> {
void set_credentials(struct soap *soap); void set_credentials(struct soap *soap);
bool try_usernametoken_auth; // Track if we should try plain auth bool try_usernametoken_auth; // Track if we should try plain auth
int retry_count; // Track retry attempts int retry_count; // Track retry attempts
int max_retries; // Maximum retry attempts before giving up
std::string discovered_event_endpoint; // Store discovered endpoint std::string discovered_event_endpoint; // Store discovered endpoint
SystemTimePoint last_retry_time; // Time of last retry attempt
// Configurable timeout values (can be set via onvif_options) // Configurable timeout values (can be set via onvif_options)
std::string pull_timeout; // Default "PT20S" std::string pull_timeout; // Default "PT20S"
@@ -358,6 +360,7 @@ class Monitor : public std::enable_shared_from_this<Monitor> {
bool matches_topic_filter(const std::string &topic, const std::string &filter); bool matches_topic_filter(const std::string &topic, const std::string &filter);
void log_soap_request_response(const char *operation); void log_soap_request_response(const char *operation);
void parse_onvif_options(); // Parse options from parent->onvif_options void parse_onvif_options(); // Parse options from parent->onvif_options
int get_retry_delay(); // Calculate exponential backoff delay
#endif #endif
std::unordered_map<std::string, std::string> alarms; std::unordered_map<std::string, std::string> alarms;
std::mutex alarms_mutex; std::mutex alarms_mutex;

View File

@@ -47,12 +47,14 @@ Monitor::ONVIF::ONVIF(Monitor *parent_) :
,soap(nullptr) ,soap(nullptr)
,try_usernametoken_auth(false) ,try_usernametoken_auth(false)
,retry_count(0) ,retry_count(0)
,max_retries(5)
,pull_timeout("PT20S") ,pull_timeout("PT20S")
,subscription_timeout("PT60S") ,subscription_timeout("PT60S")
#endif #endif
{ {
#ifdef WITH_GSOAP #ifdef WITH_GSOAP
parse_onvif_options(); parse_onvif_options();
last_retry_time = std::chrono::system_clock::now();
#endif #endif
} }
@@ -188,7 +190,9 @@ void Monitor::ONVIF::start() {
rc = proxyEvent.CreatePullPointSubscription(&request, response); rc = proxyEvent.CreatePullPointSubscription(&request, response);
if (rc != SOAP_OK) { if (rc != SOAP_OK) {
Error("ONVIF: Plain authentication also failed. Error %d: %s", rc, soap_fault_string(soap)); retry_count++;
Error("ONVIF: Plain authentication also failed (retry %d/%d). Error %d: %s",
retry_count, max_retries, rc, soap_fault_string(soap));
if (config.log_level >= 3) { if (config.log_level >= 3) {
std::stringstream ss; std::stringstream ss;
std::ostream *old_stream = soap->os; std::ostream *old_stream = soap->os;
@@ -199,16 +203,27 @@ void Monitor::ONVIF::start() {
Debug(3, "ONVIF: Response was %s", ss.str().c_str()); Debug(3, "ONVIF: Response was %s", ss.str().c_str());
} }
if (retry_count >= max_retries) {
Error("ONVIF: Max retries (%d) reached, giving up on subscription", max_retries);
} else {
int delay = get_retry_delay();
Info("ONVIF: Will retry subscription in %d seconds (attempt %d/%d)",
delay, retry_count + 1, max_retries);
}
soap_destroy(soap); soap_destroy(soap);
soap_end(soap); soap_end(soap);
soap_free(soap); soap_free(soap);
soap = nullptr; soap = nullptr;
healthy = false;
return; return;
} }
Info("ONVIF: Plain authentication succeeded"); Info("ONVIF: Plain authentication succeeded");
retry_count = 0; // Reset retry count on success
} else { } else {
// Not an auth error or already tried plain auth // Not an auth error or already tried plain auth
retry_count++;
if (config.log_level >= 3) { if (config.log_level >= 3) {
std::stringstream ss; std::stringstream ss;
std::ostream *old_stream = soap->os; std::ostream *old_stream = soap->os;
@@ -219,13 +234,24 @@ void Monitor::ONVIF::start() {
Debug(3, "ONVIF: Response was %s", ss.str().c_str()); Debug(3, "ONVIF: Response was %s", ss.str().c_str());
} }
if (retry_count >= max_retries) {
Error("ONVIF: Max retries (%d) reached, giving up on subscription", max_retries);
} else {
int delay = get_retry_delay();
Info("ONVIF: Will retry subscription in %d seconds (attempt %d/%d)",
delay, retry_count + 1, max_retries);
}
soap_destroy(soap); soap_destroy(soap);
soap_end(soap); soap_end(soap);
soap_free(soap); soap_free(soap);
soap = nullptr; soap = nullptr;
healthy = false;
return; return;
} }
} } else {
// Success - reset retry count
retry_count = 0;
Debug(1, "ONVIF: Successfully created PullPoint subscription"); Debug(1, "ONVIF: Successfully created PullPoint subscription");
@@ -324,6 +350,13 @@ void Monitor::ONVIF::WaitForMessage() {
Debug(3, "ONVIF: Response was %s", ss.str().c_str()); Debug(3, "ONVIF: Response was %s", ss.str().c_str());
} }
retry_count++;
if (retry_count >= max_retries) {
Error("ONVIF: Max retries (%d) reached for PullMessages, subscription may be lost", max_retries);
} else {
Info("ONVIF: PullMessages failed (attempt %d/%d), will continue trying",
retry_count, max_retries);
}
healthy = false; healthy = false;
} else { } else {
// SOAP_EOF - this is just a timeout, not an error // SOAP_EOF - this is just a timeout, not an error
@@ -336,8 +369,15 @@ void Monitor::ONVIF::WaitForMessage() {
// For now, just leave alarms as-is on timeout // For now, just leave alarms as-is on timeout
Debug(3, "ONVIF: Timeout - keeping existing alarms. Current alarm count: %zu, alarmed: %s", Debug(3, "ONVIF: Timeout - keeping existing alarms. Current alarm count: %zu, alarmed: %s",
alarms.size(), alarmed ? "true" : "false"); alarms.size(), alarmed ? "true" : "false");
// Timeout is not an error, don't increment retry_count
} }
} else { } else {
// Success - reset retry count
if (retry_count > 0) {
Info("ONVIF: PullMessages succeeded after %d failed attempts", retry_count);
retry_count = 0;
}
Debug(1, "ONVIF polling : Got Good Response! %i, # of messages %zu", result, tev__PullMessagesResponse.wsnt__NotificationMessage.size()); Debug(1, "ONVIF polling : Got Good Response! %i, # of messages %zu", result, tev__PullMessagesResponse.wsnt__NotificationMessage.size());
{ // Scope for lock { // Scope for lock
std::unique_lock<std::mutex> lck(alarms_mutex); std::unique_lock<std::mutex> lck(alarms_mutex);
@@ -480,6 +520,9 @@ void Monitor::ONVIF::parse_onvif_options() {
} else if (key == "subscription_timeout") { } else if (key == "subscription_timeout") {
subscription_timeout = value; subscription_timeout = value;
Debug(2, "ONVIF: Set subscription_timeout to %s", subscription_timeout.c_str()); Debug(2, "ONVIF: Set subscription_timeout to %s", subscription_timeout.c_str());
} else if (key == "max_retries") {
max_retries = std::stoi(value);
Debug(2, "ONVIF: Set max_retries to %d", max_retries);
} }
} }
start = pos + 1; start = pos + 1;
@@ -498,10 +541,21 @@ void Monitor::ONVIF::parse_onvif_options() {
} else if (key == "subscription_timeout") { } else if (key == "subscription_timeout") {
subscription_timeout = value; subscription_timeout = value;
Debug(2, "ONVIF: Set subscription_timeout to %s", subscription_timeout.c_str()); Debug(2, "ONVIF: Set subscription_timeout to %s", subscription_timeout.c_str());
} else if (key == "max_retries") {
max_retries = std::stoi(value);
Debug(2, "ONVIF: Set max_retries to %d", max_retries);
} }
} }
} }
// Calculate exponential backoff delay for retries
// Returns delay in seconds: 2^retry_count (capped at 300 seconds = 5 minutes)
int Monitor::ONVIF::get_retry_delay() {
int delay = 1 << retry_count; // 2^retry_count
if (delay > 300) delay = 300; // Cap at 5 minutes
return delay;
}
//ONVIF Set Credentials //ONVIF Set Credentials
void Monitor::ONVIF::set_credentials(struct soap *soap) { void Monitor::ONVIF::set_credentials(struct soap *soap) {
soap_wsse_delete_Security(soap); soap_wsse_delete_Security(soap);