bb10979: clamd multi-threaded database reload #2

Add clamd config option to force blocking clamd database reload to
conserve RAM. Users may set `ConcurrentDatabaseReload no` in their
clamd.conf config file to force a blocking reload.

The blocking mode will still perform the reload in a new thread, but
will first free the current database, wait for scans targeting that
database to complete, and then load the new database in the new thread
and wait (`pthread_join()`) on that thread. Once loaded, any pending
scans will continue. This is effectively the same behavior as how
clamd reloads worked before the multi-threaded database reload feature
was added.
This commit is contained in:
Micah Snyder
2020-06-09 13:54:29 -04:00
committed by Micah Snyder (micasnyd)
parent 2444a01f19
commit dac084fb91
8 changed files with 167 additions and 24 deletions

18
NEWS.md
View File

@@ -9,6 +9,24 @@ ClamAV 0.103.0 includes the following improvements and changes.
### Major changes
- Clamd can now reload the signature database without blocking scanning.
This multi-threaded database reload improvement was made possible thanks to
a community effort.
Non-blocking database reloads are now the default behavior. Some systems that
are more contrained on RAM may need to disable non-blocking reloads as it will
temporarily consume 2x as much memory. For this purpose we have added a new
clamd config option `ConcurrentDatabaseReload` which may be set to `no`.
Special thanks to the following for making this feature a reality:
- Alberto Wu
- Alexander Sulfrian
- Arjen de Korte
- David Heidelberg
- Ged Haywood
Thank you all for your patience waiting for this feature.
### Notable changes
- The DLP module has been enhanced with additional credit card ranges and a new

View File

@@ -86,9 +86,9 @@ time_t reloaded_time = 0;
pthread_mutex_t reload_mutex = PTHREAD_MUTEX_INITIALIZER;
int sighup = 0;
static pthread_mutex_t rldstage_mutex = PTHREAD_MUTEX_INITIALIZER;
static reload_stage_t reload_stage = RELOAD_STAGE__IDLE; /* protected by rldstage_mutex */
struct cl_engine *g_newengine = NULL; /* protected by rldstage_mutex */
static pthread_mutex_t reload_stage_mutex = PTHREAD_MUTEX_INITIALIZER;
static reload_stage_t reload_stage = RELOAD_STAGE__IDLE; /* protected by reload_stage_mutex */
struct cl_engine *g_newengine = NULL; /* protected by reload_stage_mutex */
extern pthread_mutex_t logg_mutex;
static struct cl_stat dbstat;
@@ -265,10 +265,10 @@ done:
}
}
pthread_mutex_lock(&rldstage_mutex);
pthread_mutex_lock(&reload_stage_mutex);
reload_stage = RELOAD_STAGE__NEW_DB_AVAILABLE; /* New DB available */
g_newengine = engine;
pthread_mutex_unlock(&rldstage_mutex);
pthread_mutex_unlock(&reload_stage_mutex);
#ifdef _WIN32
SetEvent(event_wake_recv);
@@ -284,12 +284,12 @@ done:
/**
* @brief Reload the database.
*
* @param engine The current scan engine, used to copy the settings.
* @param dboptions The current database options, used to copy the options.
* @param opts The command line options, used to get the database directory.
* @return cl_error_t CL_SUCCESS if the reload thread was successfully started. This does not mean that the database has reloaded successfully.
* @param[in/out] engine The current scan engine, used to copy the settings.
* @param dboptions The current database options, used to copy the options.
* @param opts The command line options, used to get the database directory.
* @return cl_error_t CL_SUCCESS if the reload thread was successfully started. This does not mean that the database has reloaded successfully.
*/
static cl_error_t reload_db(struct cl_engine *engine, unsigned int dboptions, const struct optstruct *opts)
static cl_error_t reload_db(struct cl_engine **engine, unsigned int dboptions, const struct optstruct *opts, threadpool_t *thr_pool)
{
cl_error_t status = CL_EMALFDB;
cl_error_t retval;
@@ -297,7 +297,7 @@ static cl_error_t reload_db(struct cl_engine *engine, unsigned int dboptions, co
pthread_t th;
pthread_attr_t th_attr;
if (NULL == opts) {
if (NULL == opts || NULL == engine) {
logg("!reload_db: Invalid arguments, unable to load signature databases.\n");
status = CL_EARG;
goto done;
@@ -313,9 +313,9 @@ static cl_error_t reload_db(struct cl_engine *engine, unsigned int dboptions, co
rldata->dboptions = dboptions;
if (engine) {
if (*engine) {
/* copy current settings */
rldata->settings = cl_engine_settings_copy(engine);
rldata->settings = cl_engine_settings_copy(*engine);
if (!rldata->settings) {
logg("!Can't make a copy of the current engine settings\n");
goto done;
@@ -339,12 +339,31 @@ static cl_error_t reload_db(struct cl_engine *engine, unsigned int dboptions, co
goto done;
}
if (*engine) {
if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {
/*
* If concurrent reload disabled, we'll NULL out the current engine and deref it.
* It will only actually be free'd once the last scan finishes.
*/
thrmgr_setactiveengine(NULL);
cl_engine_free(*engine);
*engine = NULL;
/* Wait for all scans to finish */
thrmgr_wait_for_threads(thr_pool);
}
}
if (pthread_attr_init(&th_attr)) {
logg("!Failed to init reload thread attributes\n");
goto done;
}
pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_DETACHED);
if (optget(opts, "ConcurrentDatabaseReload")->enabled) {
/* For concurrent reloads: set detached, so we don't leak thread resources */
pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_DETACHED);
}
retval = pthread_create(&th, &th_attr, reload_th, rldata);
if (pthread_attr_destroy(&th_attr))
logg("^Failed to release reload thread attributes\n");
@@ -353,6 +372,32 @@ static cl_error_t reload_db(struct cl_engine *engine, unsigned int dboptions, co
goto done;
}
if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {
/* For non-concurrent reloads: join the thread */
int join_ret = pthread_join(th, NULL);
switch (join_ret) {
case 0:
logg("Database reload completed.\n");
break;
case EDEADLK:
logg("!A deadlock was detected when waiting for the database reload thread.\n");
goto done;
case ESRCH:
logg("!Failed to find database reload thread.\n");
goto done;
case EINVAL:
logg("!The database reload thread is not a joinable thread.\n");
goto done;
default:
logg("!An unknown error occured when waiting for the database reload thread: %d\n", join_ret);
goto done;
}
}
status = CL_SUCCESS;
done:
@@ -1269,7 +1314,7 @@ int recvloop(int *socketds, unsigned nsockets, struct cl_engine *engine, unsigne
val = cl_engine_get_num(engine, CL_ENGINE_MIN_CC_COUNT, NULL);
logg("Structured: Minimum Credit Card Number Count set to %u\n", (unsigned int)val);
if(optget(opts, "StructuredCCOnly")->enabled)
if (optget(opts, "StructuredCCOnly")->enabled)
options.heuristic |= CL_SCAN_HEURISTIC_STRUCTURED_CC;
if ((opt = optget(opts, "StructuredMinSSNCount"))->enabled) {
@@ -1675,35 +1720,51 @@ int recvloop(int *socketds, unsigned nsockets, struct cl_engine *engine, unsigne
/* DB reload */
pthread_mutex_lock(&reload_mutex);
if (reload) {
pthread_mutex_unlock(&reload_mutex);
/* Reload was requested */
pthread_mutex_lock(&rldstage_mutex);
pthread_mutex_lock(&reload_stage_mutex);
if (reload_stage == RELOAD_STAGE__IDLE) {
/* Reloading not already taking place */
reload_stage = RELOAD_STAGE__RELOADING;
if (CL_SUCCESS != reload_db(engine, dboptions, opts)) {
pthread_mutex_unlock(&reload_stage_mutex);
if (CL_SUCCESS != reload_db(&engine, dboptions, opts, thr_pool)) {
logg("^Database reload setup failed, keeping the previous instance\n");
reload = 0;
pthread_mutex_lock(&reload_mutex);
reload = 0;
pthread_mutex_unlock(&reload_mutex);
pthread_mutex_lock(&reload_stage_mutex);
reload_stage = RELOAD_STAGE__IDLE;
pthread_mutex_unlock(&reload_stage_mutex);
}
} else if (reload_stage == RELOAD_STAGE__NEW_DB_AVAILABLE) {
pthread_mutex_lock(&reload_stage_mutex);
}
if (reload_stage == RELOAD_STAGE__NEW_DB_AVAILABLE) {
/* New database available */
if (g_newengine) {
/* Reload succeeded */
logg("Activating the newly loaded database...\n");
thrmgr_setactiveengine(g_newengine);
cl_engine_free(engine);
if (optget(opts, "ConcurrentDatabaseReload")->enabled) {
/* If concurrent database reload, we now need to free the old engine. */
cl_engine_free(engine);
}
engine = g_newengine;
g_newengine = NULL;
} else {
logg("^Database reload failed, keeping the previous instance\n");
}
reload_stage = RELOAD_STAGE__IDLE;
reload = 0;
pthread_mutex_unlock(&reload_stage_mutex);
pthread_mutex_lock(&reload_mutex);
reload = 0;
pthread_mutex_unlock(&reload_mutex);
time(&reloaded_time);
} else {
pthread_mutex_unlock(&reload_stage_mutex);
}
pthread_mutex_unlock(&rldstage_mutex);
} else {
pthread_mutex_unlock(&reload_mutex);
}
pthread_mutex_unlock(&reload_mutex);
}
pthread_mutex_lock(&exit_mutex);

View File

@@ -359,6 +359,44 @@ void thrmgr_destroy(threadpool_t *threadpool)
return;
}
void thrmgr_wait_for_threads(threadpool_t *threadpool) {
if (!threadpool) {
return;
}
if (pthread_mutex_lock(&threadpool->pool_mutex) != 0) {
logg("!Mutex lock failed\n");
exit(-1);
}
if (threadpool->state != POOL_VALID) {
if (pthread_mutex_unlock(&threadpool->pool_mutex) != 0) {
logg("!Mutex unlock failed\n");
exit(-1);
}
return;
}
/* wait for threads to exit */
if (threadpool->thr_alive > 0) {
if (pthread_cond_broadcast(&(threadpool->pool_cond)) != 0) {
pthread_mutex_unlock(&threadpool->pool_mutex);
return;
}
}
while (threadpool->thr_alive > 0) {
if (pthread_cond_wait(&threadpool->pool_cond, &threadpool->pool_mutex) != 0) {
pthread_mutex_unlock(&threadpool->pool_mutex);
return;
}
}
/* Ok threads all exited, we can release the lock */
if (pthread_mutex_unlock(&threadpool->pool_mutex) != 0) {
logg("!Mutex unlock failed\n");
exit(-1);
}
return;
}
threadpool_t *thrmgr_new(int max_threads, int idle_timeout, int max_queue, void (*handler)(void *))
{
threadpool_t *threadpool;

View File

@@ -99,6 +99,7 @@ enum thrmgr_exit {
threadpool_t *thrmgr_new(int max_threads, int idle_timeout, int max_queue, void (*handler)(void *));
void thrmgr_destroy(threadpool_t *threadpool);
void thrmgr_wait_for_threads(threadpool_t *threadpool);
int thrmgr_dispatch(threadpool_t *threadpool, void *user_data);
int thrmgr_group_dispatch(threadpool_t *threadpool, jobgroup_t *group, void *user_data, int bulk);
void thrmgr_group_waitforall(jobgroup_t *group, unsigned *ok, unsigned *error, unsigned *total);

View File

@@ -234,6 +234,11 @@ should perform a database check.
.br
Default: 600
.TP
\fBConcurrentDatabaseReload BOOL\fR
Enable non-blocking (multi-threaded/concurrent) database reloads. This feature will temporarily load a second scanning engine while scanning continues using the first engine. Once loaded, the new engine takes over. The old engine is removed as soon as all scans using the old engine have completed. This feature requires more RAM, so this option is provided in case users are willing to block scans during reload in exchange for lower RAM requirements.
.br
Default: yes
.TP
\fBVirusEvent COMMAND\fR
Execute a command when a virus is found. In the command string %v will be
replaced with the virus name. Additionally, two environment variables will
@@ -398,7 +403,7 @@ Default: 3
.TP
\fBStructuredCCOnly BOOL\fR
With this option enabled the DLP module will search for valid Credit Card\nnumbers only. Debit and Private Label cards will not be searched.
.br
.br
Default: No
.TP
\fBStructuredMinSSNCount NUMBER\fR

View File

@@ -196,6 +196,15 @@ Example
# Default: 600 (10 min)
#SelfCheck 600
# Enable non-blocking (multi-threaded/concurrent) database reloads. This feature
# will temporarily load a second scanning engine while scanning continues using
# the first engine. Once loaded, the new engine takes over. The old engine is
# removed as soon as all scans using the old engine have completed. This feature
# requires more RAM, so this option is provided in case users are willing to
# block scans during reload in exchange for lower RAM requirements.
# Default: yes
#ConcurrentDatabaseReload no
# Execute a command when virus is found. In the command string %v will
# be replaced with the virus name.
# Default: no

View File

@@ -271,6 +271,8 @@ const struct clam_option __clam_options[] = {
{"SelfCheck", NULL, 0, CLOPT_TYPE_NUMBER, MATCH_NUMBER, 600, NULL, 0, OPT_CLAMD, "This option specifies the time intervals (in seconds) in which clamd\nshould perform a database check.", "600"},
{"ConcurrentDatabaseReload", NULL, 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 1, NULL, 0, OPT_CLAMD, "Enable non-blocking (multi-threaded/concurrent) database reloads. This feature \nwill temporarily load a second scanning engine while scanning continues using \nthe first engine. Once loaded, the new engine takes over. The old engine is \nremoved as soon as all scans using the old engine have completed. This feature \nrequires more RAM, so this option is provided in case users are willing to \nblock scans during reload in exchange for lower RAM requirements.", "yes"},
{"DisableCache", "disable-cache", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "This option allows you to disable clamd's caching feature.", "no"},
{"VirusEvent", NULL, 0, CLOPT_TYPE_STRING, NULL, -1, NULL, 0, OPT_CLAMD, "Execute a command when a virus is found. In the command string %v will be\nreplaced with the virus name. Additionally, two environment variables will\nbe defined: $CLAM_VIRUSEVENT_FILENAME and $CLAM_VIRUSEVENT_VIRUSNAME.", "/usr/bin/mailx -s \"ClamAV VIRUS ALERT: %v\" alert < /dev/null"},

View File

@@ -172,6 +172,15 @@ TCPAddr 127.0.0.1
# Default: 600 (10 min)
#SelfCheck 600
# Enable non-blocking (multi-threaded/concurrent) database reloads. This feature
# will temporarily load a second scanning engine while scanning continues using
# the first engine. Once loaded, the new engine takes over. The old engine is
# removed as soon as all scans using the old engine have completed. This feature
# requires more RAM, so this option is provided in case users are willing to
# block scans during reload in exchange for lower RAM requirements.
# Default: yes
#ConcurrentDatabaseReload no
# Execute a command when virus is found. In the command string %v will
# be replaced with the virus name.
# Default: no