mirror of
https://github.com/weewx/weewx.git
synced 2026-04-18 16:46:56 -04:00
wee_database previously did daily summary backfills in one datbase transaction. This resulted in errors for large archives being processed on machines with relatively small amounts of memory. The TRANS-DAYS command line parameter allows the backfill to be completed in a series of database transactions using no more than TRANS-DAYS data at a time. To provide a consistent approach to logging some minor changes were made to the logging in DaySummaryManager.backfill_day_summary(), DaySummaryManager.drop_daily() and StdArchive.setup_database().
448 lines
22 KiB
Python
Executable File
448 lines
22 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright (c) 2009-2015 Tom Keffer <tkeffer@gmail.com>
|
|
#
|
|
# See the file LICENSE.txt for your full rights.
|
|
#
|
|
"""Configure the databases used by weewx"""
|
|
from __future__ import with_statement
|
|
|
|
import optparse
|
|
import syslog
|
|
import sys
|
|
import time
|
|
|
|
import user.extensions #@UnusedImport
|
|
import weedb
|
|
import weeutil.weeutil
|
|
import weewx.manager
|
|
import weewx.units
|
|
import weecfg
|
|
import weewx
|
|
|
|
from weeutil.weeutil import TimeSpan
|
|
from weewx.manager import get_database_dict_from_config
|
|
|
|
description = """Configure the weewx databases. Most of these functions are
|
|
handled automatically by weewx, but they may be useful as a utility in special
|
|
cases. In particular, the 'reconfigure' option can be useful if you decide to
|
|
add or drop data types from the database schema or change unit systems and the
|
|
'transfer' option is useful if converting from one database type to another."""
|
|
|
|
usage = """wee_database --help
|
|
wee_database --create-archive
|
|
[CONFIG_FILE|--config=CONFIG_FILE]
|
|
[--binding=BINDING_NAME]
|
|
wee_database --drop-daily
|
|
[CONFIG_FILE|--config=CONFIG_FILE]
|
|
[--binding=BINDING_NAME]
|
|
wee_database --backfill-daily
|
|
[CONFIG_FILE|--config=CONFIG_FILE]
|
|
[--binding=BINDING_NAME]
|
|
[--trans-days=DAYS]
|
|
wee_database --reconfigure
|
|
[CONFIG_FILE|--config=CONFIG_FILE]
|
|
[--binding=BINDING_NAME]
|
|
wee_database --string-check
|
|
[CONFIG_FILE|--config=CONFIG_FILE]
|
|
[--binding=BINDING_NAME] [--fix]
|
|
wee_database --transfer
|
|
[CONFIG_FILE|--config=CONFIG_FILE]
|
|
[--binding=BINDING_NAME]
|
|
--dest-binding=BINDING_NAME
|
|
[--dry-run]
|
|
"""
|
|
|
|
epilog = """If you are using a MySQL database it is assumed that you have the
|
|
appropriate permissions for the requested operation."""
|
|
|
|
# List of 'dest' settings used by our 'verbs', note 'dest' may be explicit or
|
|
# implicit. If adding more 'verbs' need to add corresponding 'dest' here.
|
|
dest_list = ['create_archive', 'drop_daily', 'backfill_daily',
|
|
'reconfigure', 'string_check', 'transfer']
|
|
|
|
def main():
|
|
|
|
# Set defaults for the system logger:
|
|
syslog.openlog('wee_database', syslog.LOG_PID|syslog.LOG_CONS)
|
|
|
|
# Create a command line parser:
|
|
parser = optparse.OptionParser(description=description, usage=usage, epilog=epilog)
|
|
|
|
# Add the various options:
|
|
parser.add_option("--config", dest="config_path", type=str,
|
|
metavar="CONFIG_FILE",
|
|
help="Use configuration file CONFIG_FILE.")
|
|
parser.add_option("--create-archive", dest="create_archive",
|
|
action='store_true',
|
|
help="Create the archive database.")
|
|
parser.add_option("--drop-daily", dest="drop_daily", action='store_true',
|
|
help="Drop the daily summary tables from a database.")
|
|
parser.add_option("--backfill-daily", dest="backfill_daily",
|
|
action='store_true',
|
|
help="Backfill a database with daily summaries.")
|
|
parser.add_option("--reconfigure", action='store_true',
|
|
help="Create a new archive database using configuration"
|
|
" information found in the configuration file. In"
|
|
" particular, the new database will use the"
|
|
" unit system found in option [StdConvert][target_unit]."
|
|
" The new database will have the same name as the old"
|
|
" database, with a '_new' on the end.")
|
|
parser.add_option("--string-check", dest="string_check",
|
|
action="store_true",
|
|
help="Check a sqlite version of the archive database to"
|
|
" see whether it contains embedded strings.")
|
|
parser.add_option("--fix", dest="fix", action="store_true",
|
|
help="Fix any embedded strings in a sqlite database.")
|
|
parser.add_option("--transfer", dest="transfer", action='store_true',
|
|
help="Transfer the weewx archive from source database to"
|
|
" destination database.")
|
|
parser.add_option("--binding", dest="binding", metavar="BINDING_NAME",
|
|
default='wx_binding',
|
|
help="The data binding. Default is 'wx_binding'.")
|
|
parser.add_option("--dest-binding", dest="dest_binding",
|
|
metavar="BINDING_NAME",
|
|
help="The destination data binding.")
|
|
parser.add_option('--dry-run', action='store_true',
|
|
help='Print what would happen but do not do it.')
|
|
parser.add_option('--trans-days', dest='trans_days', default=5,
|
|
metavar="DAYS",
|
|
help="Limit backfill transactions to no more than"
|
|
" DAYS days of archive data at a time. Default value is 5."
|
|
" May be increased for a slight speed increase or reduced"
|
|
" to reduce memory usage.")
|
|
|
|
# Now we are ready to parse the command line:
|
|
(options, args) = parser.parse_args()
|
|
|
|
# Do a check to see if the user used more than 1 'verb' . More than 1 verb
|
|
# should be avoided - what did the user really want to do?
|
|
# first get a list of options actually used
|
|
options_list = [opt for opt, val in options.__dict__.items() if val is not None]
|
|
# now a list of the 'dest' settings used
|
|
dest_used = [opt for opt in options_list if opt in dest_list]
|
|
# If we have more than 1 entry in dest_used then we have more than 1 'verb'
|
|
# used. Then inform the user and exit. If only 1 'verb' then carry on.
|
|
if len(dest_used) > 1:
|
|
# generate the message but we need to speak 'options' (aka verbs)
|
|
# not 'dest'
|
|
# get a dict of {option.dest: option.str} from our universe of options
|
|
opt_dict = dict((x.dest, x.get_opt_string()) for x in parser.option_list[1:])
|
|
# now get a list of 'verbs' used
|
|
verbs_used = [opt_dict[x] for x in dest_used]
|
|
# get rid of the [ and ]
|
|
verbs_str = ', '.join(verbs_used)
|
|
# our exit message
|
|
exit_str = ("Multiple verbs found in command line %s. Only one verb permitted.\nNothing done. Aborting." %
|
|
(verbs_str, ))
|
|
# now exit with our message
|
|
sys.exit(exit_str)
|
|
|
|
# get config_dict to use
|
|
config_path, config_dict = weecfg.read_config(options.config_path, args)
|
|
print "Using configuration file %s" % config_path
|
|
|
|
db_binding = options.binding
|
|
database = config_dict['DataBindings'][db_binding]['database']
|
|
print "Using database binding '%s', which is bound to database '%s'" % (db_binding, database)
|
|
|
|
if options.create_archive:
|
|
createMainDatabase(config_dict, db_binding)
|
|
|
|
if options.drop_daily:
|
|
dropDaily(config_dict, db_binding)
|
|
|
|
if options.backfill_daily:
|
|
backfillDaily(config_dict, db_binding, int(options.trans_days))
|
|
|
|
if options.reconfigure:
|
|
reconfigMainDatabase(config_dict, db_binding)
|
|
|
|
if options.string_check:
|
|
string_check(config_dict, db_binding, options.fix)
|
|
|
|
if options.transfer:
|
|
transferDatabase(config_dict, db_binding, options)
|
|
|
|
def createMainDatabase(config_dict, db_binding):
|
|
"""Create a weewx archive database"""
|
|
|
|
# Try a simple open. If it succeeds, that means the database
|
|
# exists and is initialized. Otherwise, an exception will be thrown.
|
|
try:
|
|
with weewx.manager.open_manager_with_config(config_dict, db_binding) as dbmanager:
|
|
print "Database '%s' already exists. Nothing done." % (dbmanager.database_name,)
|
|
except weedb.OperationalError:
|
|
# Database does not exist. Try again, but allow initialization:
|
|
with weewx.manager.open_manager_with_config(config_dict, db_binding, initialize=True) as dbmanager:
|
|
print "Created database '%s'" % (dbmanager.database_name,)
|
|
|
|
def dropDaily(config_dict, db_binding):
|
|
"""Drop the daily summaries from a weewx database"""
|
|
|
|
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
|
|
db_binding)
|
|
database_name = manager_dict['database_dict']['database_name']
|
|
|
|
ans = None
|
|
while ans not in ['y', 'n']:
|
|
print "Proceeding will delete all your daily summaries from database '%s'" % database_name
|
|
ans = raw_input("Are you sure you want to proceed (y/n)? ")
|
|
if ans == 'y' :
|
|
|
|
print "Dropping daily summary tables from '%s' ... " % database_name
|
|
try:
|
|
with weewx.manager.open_manager_with_config(config_dict, db_binding) as dbmanager:
|
|
try:
|
|
dbmanager.drop_daily()
|
|
except weedb.OperationalError, e:
|
|
print "Got error '%s'\nPerhaps there was no daily summary?" % e
|
|
else:
|
|
print "Dropped daily summary tables from database '%s'" % (database_name,)
|
|
except weedb.OperationalError:
|
|
# No daily summaries. Nothing to be done.
|
|
print "No daily summaries found in database '%s'. Nothing done." % (database_name,)
|
|
|
|
def backfillDaily(config_dict, db_binding, trans_days):
|
|
"""Backfill the daily summaries"""
|
|
|
|
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
|
|
db_binding)
|
|
database_name = manager_dict['database_dict']['database_name']
|
|
|
|
syslog.syslog(syslog.LOG_INFO, "Backfilling daily summaries in database '%s' ..." % database_name)
|
|
print "Backfilling daily summaries in database '%s' ..." % database_name
|
|
|
|
t1 = time.time()
|
|
# Open up the archive. This will create the tables necessary for the daily summaries if they
|
|
# don't already exist:
|
|
with weewx.manager.open_manager_with_config(config_dict, db_binding, initialize=True) as dbmanager:
|
|
nrecs, ndays = dbmanager.backfill_day_summary(trans_days=trans_days)
|
|
tdiff = time.time() - t1
|
|
|
|
syslog.syslog(syslog.LOG_INFO, "Backfill of daily summaries in database '%s' complete" % database_name)
|
|
if nrecs:
|
|
sys.stdout.flush()
|
|
print "Processed %d records to backfill %d day summaries in %.2f seconds " % (nrecs, ndays, tdiff)
|
|
print "Backfill of daily summaries in database '%s' complete" % database_name
|
|
else:
|
|
print "Daily summaries up to date in '%s'" % database_name
|
|
|
|
def reconfigMainDatabase(config_dict, db_binding):
|
|
"""Create a new database, then populate it with the contents of an old database"""
|
|
|
|
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
|
|
db_binding)
|
|
# Make a copy for the new database (we will be modifying it)
|
|
new_database_dict = dict(manager_dict['database_dict'])
|
|
|
|
# Now modify the database name
|
|
new_database_dict['database_name'] = manager_dict['database_dict']['database_name']+'_new'
|
|
|
|
# First check and see if the new database already exists. If it does, check
|
|
# with the user whether it's ok to delete it.
|
|
try:
|
|
weedb.create(new_database_dict)
|
|
except weedb.DatabaseExists:
|
|
ans = None
|
|
while ans not in ['y', 'n']:
|
|
ans = raw_input("New database '%s' already exists. Delete it first (y/n)? " % (new_database_dict['database_name'],))
|
|
if ans == 'y':
|
|
weedb.drop(new_database_dict)
|
|
elif ans == 'n':
|
|
print "Nothing done."
|
|
return
|
|
|
|
# Get the unit system of the old archive:
|
|
with weewx.manager.Manager.open(manager_dict['database_dict']) as old_dbmanager:
|
|
old_unit_system = old_dbmanager.std_unit_system
|
|
|
|
if old_unit_system is None:
|
|
print "Old database has not been initialized. Nothing to be done."
|
|
return
|
|
|
|
# Get the unit system of the new archive:
|
|
try:
|
|
target_unit_nickname = config_dict['StdConvert']['target_unit']
|
|
except KeyError:
|
|
target_unit_system = None
|
|
else:
|
|
target_unit_system = weewx.units.unit_constants[target_unit_nickname.upper()]
|
|
|
|
|
|
ans = None
|
|
while ans not in ['y', 'n']:
|
|
print "Copying archive database '%s' to '%s'" % (manager_dict['database_dict']['database_name'], new_database_dict['database_name'])
|
|
if target_unit_system is None or old_unit_system==target_unit_system:
|
|
print "The new archive will use the same unit system as the old ('%s')." % (weewx.units.unit_nicknames[old_unit_system],)
|
|
else:
|
|
print "Units will be converted from the '%s' system to the '%s' system." % (weewx.units.unit_nicknames[old_unit_system],
|
|
weewx.units.unit_nicknames[target_unit_system])
|
|
ans = raw_input("Are you sure you wish to proceed (y/n)? ")
|
|
if ans == 'y':
|
|
weewx.manager.reconfig(manager_dict['database_dict'],
|
|
new_database_dict,
|
|
new_unit_system=target_unit_system,
|
|
new_schema=manager_dict['schema'])
|
|
print "Done."
|
|
elif ans == 'n':
|
|
print "Nothing done."
|
|
|
|
def string_check(config_dict, db_binding, fix=False):
|
|
|
|
print "Checking archive database for strings..."
|
|
|
|
# Open up the main database archive
|
|
with weewx.manager.open_manager_with_config(config_dict, db_binding) as dbmanager:
|
|
|
|
obs_pytype_list = []
|
|
obs_list = []
|
|
|
|
# Get the schema and extract the Python type each observation type should be
|
|
for column in dbmanager.connection.genSchemaOf('archive'):
|
|
schema_type = column[2]
|
|
if schema_type == 'INTEGER':
|
|
schema_type = int
|
|
elif schema_type == 'REAL':
|
|
schema_type = float
|
|
elif schema_type == 'STR':
|
|
schema_type = str
|
|
# Save the observation type for this column (eg, 'outTemp'):
|
|
obs_list.append(column[1])
|
|
# Save the Python type for this column (eg, 'int'):
|
|
obs_pytype_list.append(schema_type)
|
|
|
|
# Cycle through each row in the database
|
|
for record in dbmanager.genBatchRows():
|
|
# Now examine each column
|
|
for icol in range(len(record)):
|
|
# Check to see if this column is an instance of the correct Python type
|
|
if record[icol] is not None and not isinstance(record[icol], obs_pytype_list[icol]):
|
|
# Oops. Found a bad one. Print it out
|
|
sys.stdout.write("Timestamp = %s; record['%s']= %r; ... " % (record[0], obs_list[icol], record[icol]))
|
|
|
|
if fix:
|
|
# Cooerce to the correct type. If it can't be done, then set it to None
|
|
try:
|
|
corrected_value = obs_pytype_list[icol](record[icol])
|
|
except ValueError:
|
|
corrected_value = None
|
|
# Update the database with the new value
|
|
dbmanager.updateValue(record[0], obs_list[icol], corrected_value)
|
|
# Inform the user
|
|
sys.stdout.write("changed to %r\n" % corrected_value)
|
|
else:
|
|
sys.stdout.write("ignored.\n")
|
|
|
|
def transferDatabase(config_dict, db_binding, options):
|
|
"""Transfer 'archive' data from one database to another"""
|
|
|
|
# do we have enough to go on, must have a dest binding
|
|
if options.dest_binding is None:
|
|
print "Destination binding not specified. Nothing Done. Aborting."
|
|
return
|
|
# get manager dict for our source binding
|
|
src_manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
|
|
db_binding)
|
|
# get manager dict for our dest binding
|
|
try:
|
|
dest_manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
|
|
options.dest_binding)
|
|
except weewx.UnknownBinding:
|
|
# if we can't find the binding display a message then return
|
|
print ("Unknown destination binding '%s', confirm destination binding." %
|
|
(options.dest_binding, ))
|
|
print "Nothing Done. Aborting."
|
|
return
|
|
except weewx.UnknownDatabase:
|
|
# if we can't find the database display a message then return
|
|
print "Error accessing destination database, confirm destination binding and/or database."
|
|
print "Nothing Done. Aborting."
|
|
return
|
|
except (ValueError, AttributeError):
|
|
# maybe a schema issue
|
|
print "Error accessing destination database."
|
|
print ("Maybe the destination schema is incorrectly specified in binding '%s' in weewx.conf?" %
|
|
(options.dest_binding, ))
|
|
print "Nothing Done. Aborting."
|
|
return
|
|
except (weewx.UnknownDatabaseType):
|
|
# maybe a [Databases] issue
|
|
print "Error accessing destination database."
|
|
print "Maybe the destination database is incorrectly defined in weewx.conf?"
|
|
print "Nothing Done. Aborting."
|
|
return
|
|
# get a manager for our source
|
|
with weewx.manager.Manager.open(src_manager_dict['database_dict']) as src_manager:
|
|
# get first and last timestamps from the source so we can count the
|
|
# records to transfer and display an appropriate message
|
|
first_ts = src_manager.firstGoodStamp()
|
|
last_ts = src_manager.lastGoodStamp()
|
|
if first_ts is not None and last_ts is not None:
|
|
# we have source records
|
|
num_recs = src_manager.getAggregate(TimeSpan(first_ts, last_ts),
|
|
'dateTime', 'count')[0]
|
|
else:
|
|
# we have no source records to transfer so abort with a message
|
|
print ("No records found in source database '%s' for transfer." %
|
|
(src_manager.database_name, ))
|
|
print "Nothing done. Aborting."
|
|
exit()
|
|
ans = None
|
|
if not options.dry_run: # is it a dry run ?
|
|
# not a dry run, actually do the transfer
|
|
while ans not in ['y', 'n']:
|
|
ans = raw_input("Transfer %s records from source database '%s' to destination database '%s' (y/n)? " %
|
|
(num_recs, src_manager.database_name, dest_manager_dict['database_dict']['database_name']))
|
|
if ans == 'y':
|
|
# wrap in a try..except in case we have an error
|
|
try:
|
|
with weewx.manager.Manager.open_with_create(dest_manager_dict['database_dict'],
|
|
table_name=dest_manager_dict['table_name'],
|
|
schema=dest_manager_dict['schema']) as dest_manager:
|
|
sys.stdout.write("transferring, this may take a while.... ")
|
|
sys.stdout.flush()
|
|
# do the transfer, should be quick as it's done as a
|
|
# single transaction
|
|
dest_manager.addRecord(src_manager.genBatchRecords())
|
|
print "complete"
|
|
# get first and last timestamps from the dest so we can
|
|
# count the records transferred and display a message
|
|
first_ts = dest_manager.firstGoodStamp()
|
|
last_ts = dest_manager.lastGoodStamp()
|
|
if first_ts is not None and last_ts is not None:
|
|
num_recs = dest_manager.getAggregate(TimeSpan(first_ts, last_ts),
|
|
'dateTime', 'count')[0]
|
|
print ("%s records transferred from source database '%s' to destination database '%s'." %
|
|
(num_recs, src_manager.database_name, dest_manager.database_name))
|
|
else:
|
|
print ("Error. No records were transferred from source database '%s' to destination database '%s'." %
|
|
(src_manager.database_name, dest_manager.database_name))
|
|
except ImportError:
|
|
# probaly when trying to load db driver
|
|
print ("Error accessing destination database '%s'." %
|
|
(dest_manager_dict['database_dict']['database_name'], ))
|
|
print "Maybe the database driver is incorrectly specified in weewx.conf?"
|
|
print "Nothing done. Aborting."
|
|
return
|
|
except (OSError, weedb.OperationalError):
|
|
# probably a weewx.conf typo or MySQL db not created
|
|
print ("Error accessing destination database '%s'." %
|
|
(dest_manager_dict['database_dict']['database_name'], ))
|
|
print "Maybe it does not exist (MySQL) or is incorrectly defined in weewx.conf?"
|
|
print "Nothing done. Aborting."
|
|
return
|
|
|
|
elif ans == 'n':
|
|
# we decided not to do the transfer
|
|
print "Nothing done."
|
|
return
|
|
else:
|
|
# it's a dry run so say what we would have done then return
|
|
print ("Transfer %s records from source database '%s' to destination database '%s'." %
|
|
(num_recs, src_manager.database_name, dest_manager_dict['database_dict']['database_name']))
|
|
print "Dry run, nothing done."
|
|
|
|
if __name__=="__main__" :
|
|
main()
|