Files
weewx/bin/wee_database

967 lines
42 KiB
Python
Executable File

#!/usr/bin/env python
#
# Copyright (c) 2009-2019 Tom Keffer <tkeffer@gmail.com>
#
# See the file LICENSE.txt for your full rights.
#
"""Configure databases used by WeeWX"""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import with_statement
# python imports
import datetime
import logging
import optparse
import sys
import time
import six
from six.moves import input
# weewx imports
import user.extensions # @UnusedImport
import weecfg.database
import weedb
import weeutil.logger
import weewx.manager
import weewx.units
from weeutil.weeutil import TimeSpan, timestamp_to_string, y_or_n, to_int
log = logging.getLogger(__name__)
usage = """wee_database --help
wee_database --create
wee_database --reconfigure
wee_database --transfer --dest-binding=BINDING_NAME [--dry-run]
wee_database --check
wee_database --update [--dry-run]
wee_database --drop-daily
wee_database --rebuild-daily [--date=YYYY-mm-dd |
[--from=YYYY-mm-dd] [--to=YYYY-mm-dd]]
wee_database --calc-missing [--date=YYYY-mm-dd |
[--from=YYYY-mm-dd[THH:MM]] [--to=YYYY-mm-dd[THH:MM]]]
wee_database --check-strings
wee_database --fix-strings [--dry-run]
Description:
Manipulate the WeeWX database. Most of these operations are handled
automatically by WeeWX, but they may be useful in special cases."""
def main():
# Set defaults for logging:
weeutil.logger.setup('wee_database', {})
# Create a command line parser:
parser = optparse.OptionParser(usage=usage)
# Add the various verbs...
parser.add_option("--create", action='store_true',
help="Create the WeeWX database and initialize it with the"
" schema.")
parser.add_option("--reconfigure", action='store_true',
help="Create a new database using configuration"
" information found in the configuration file. In"
" particular, the new database will use the unit system"
" found in option [StdConvert][target_unit]."
" The new database will have the same name as the old"
" database, with a '_new' on the end.")
parser.add_option("--transfer", action='store_true',
help="Transfer the WeeWX archive from source database "
"to destination database.")
parser.add_option("--check", action="store_true",
help="Check the calculations in the daily summary tables.")
parser.add_option("--update", action="store_true",
help="Update the daily summary tables if required and"
" recalculate the daily summary maximum windSpeed values.")
parser.add_option("--calc-missing", dest="calc_missing", action="store_true",
help="Calculate and store any missing derived observations.")
parser.add_option("--check-strings", action="store_true",
help="Check the archive table for null strings that may"
" have been introduced by a SQL editing program.")
parser.add_option("--fix-strings", action='store_true',
help="Fix any null strings in a SQLite database.")
parser.add_option("--drop-daily", action='store_true',
help="Drop the daily summary tables from a database.")
parser.add_option("--rebuild-daily", action='store_true',
help="Rebuild the daily summaries from data in the archive table.")
# ... then add the various options:
parser.add_option("--config", dest="config_path", type=str,
metavar="CONFIG_FILE",
help="Use configuration file CONFIG_FILE.")
parser.add_option("--date", type=str, metavar="YYYY-mm-dd",
help="This date only (options --calc-missing and --rebuild-daily only).")
parser.add_option("--from", dest="from_date", type=str, metavar="YYYY-mm-dd[THH:MM]",
help="Start with this date or date-time"
" (options --calc-missing and --rebuild-daily only).")
parser.add_option("--to", dest="to_date", type=str, metavar="YYYY-mm-dd[THH:MM]",
help="End with this date or date-time"
" (options --calc-missing and --rebuild-daily only).")
parser.add_option("--binding", metavar="BINDING_NAME", default='wx_binding',
help="The data binding to use. Default is 'wx_binding'.")
parser.add_option("--dest-binding", metavar="BINDING_NAME",
help="The destination data binding (option --transfer only).")
parser.add_option('--dry-run', action='store_true',
default=False,
help="Print what would happen but do not do it. Default is False.")
# Now we are ready to parse the command line:
(options, args) = parser.parse_args()
# Do a check to see if the user used more than 1 'verb'
if sum(1 if x else 0 for x in [options.create,
options.reconfigure,
options.transfer,
options.check,
options.update,
options.calc_missing,
options.check_strings,
options.fix_strings,
options.drop_daily,
options.rebuild_daily]) != 1:
sys.exit("Must specify one and only one verb.")
# get config_dict to use
config_path, config_dict = weecfg.read_config(options.config_path, args)
print("Using configuration file %s" % config_path)
# Set weewx.debug as necessary:
weewx.debug = to_int(config_dict.get('debug', 0))
# Now we can set up the user customized logging:
weeutil.logger.setup('wee_database', config_dict)
db_binding = options.binding
database = config_dict['DataBindings'][db_binding]['database']
print("Using database binding '%s', which is bound to database '%s'" % (db_binding, database))
if options.create:
createMainDatabase(config_dict, db_binding)
elif options.reconfigure:
reconfigMainDatabase(config_dict, db_binding)
elif options.transfer:
transferDatabase(config_dict, db_binding, options)
elif options.check:
check(config_dict, db_binding, options)
elif options.update:
update(config_dict, db_binding, options)
elif options.calc_missing:
calc_missing(config_dict, db_binding, options)
elif options.check_strings:
check_strings(config_dict, db_binding, options, fix=False)
elif options.fix_strings:
check_strings(config_dict, db_binding, options, fix=True)
elif options.drop_daily:
dropDaily(config_dict, db_binding)
elif options.rebuild_daily:
rebuildDaily(config_dict, db_binding, options)
def createMainDatabase(config_dict, db_binding):
"""Create the WeeWX database"""
# Try a simple open. If it succeeds, that means the database
# exists and is initialized. Otherwise, an exception will be thrown.
try:
with weewx.manager.open_manager_with_config(config_dict, db_binding) as dbmanager:
print("Database '%s' already exists. Nothing done." % dbmanager.database_name)
except weedb.OperationalError:
# Database does not exist. Try again, but allow initialization:
with weewx.manager.open_manager_with_config(config_dict, db_binding, initialize=True) as dbmanager:
print("Created database '%s'" % dbmanager.database_name)
def dropDaily(config_dict, db_binding):
"""Drop the daily summaries from a WeeWX database"""
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
db_binding)
database_name = manager_dict['database_dict']['database_name']
print("Proceeding will delete all your daily summaries from database '%s'" % database_name)
ans = y_or_n("Are you sure you want to proceed (y/n)? ")
if ans == 'y':
t1 = time.time()
print("Dropping daily summary tables from '%s' ... " % database_name)
try:
with weewx.manager.open_manager_with_config(config_dict, db_binding) as dbmanager:
try:
dbmanager.drop_daily()
except weedb.OperationalError as e:
print("Error '%s'" % e)
print("Drop daily summary tables failed for database '%s'" % database_name)
else:
tdiff = time.time() - t1
print("Daily summary tables dropped from "
"database '%s' in %.2f seconds" % (database_name, tdiff))
except weedb.OperationalError:
# No daily summaries. Nothing to be done.
print("No daily summaries found in database '%s'. Nothing done." % database_name)
else:
print("Nothing done.")
def rebuildDaily(config_dict, db_binding, options):
"""Rebuild the daily summaries."""
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
db_binding)
database_name = manager_dict['database_dict']['database_name']
# get the first and last good timestamps from the archive, these represent
# our bounds for rebuilding
with weewx.manager.Manager.open(manager_dict['database_dict']) as dbmanager:
first_ts = dbmanager.firstGoodStamp()
first_d = datetime.date.fromtimestamp(first_ts) if first_ts is not None else None
last_ts = dbmanager.lastGoodStamp()
last_d = datetime.date.fromtimestamp(last_ts) if first_ts is not None else None
# determine the period over which we are rebuilding from any command line
# date parameters
from_dt, to_dt = _parse_dates(options)
# we have start and stop datetime objects but we work on whole days only,
# so need date object
from_d = from_dt.date() if from_dt is not None else None
to_d = to_dt.date() if to_dt is not None else None
# advise the user/log what we will do
if from_d is None and to_d is None:
_msg = "All daily summaries will be rebuilt."
elif from_d and not to_d:
_msg = "Daily summaries from %s through the end (%s) will be rebuilt." % (from_d,
last_d)
elif not from_d and to_d:
_msg = "Daily summaries from the beginning (%s) through %s will be rebuilt." % (first_d,
to_d)
elif from_d == to_d:
_msg = "Daily summary for %s will be rebuilt." % from_d
else:
_msg = "Daily summaries from %s through %s inclusive will be rebuilt." % (from_d,
to_d)
log.info(_msg)
print(_msg)
ans = y_or_n("Proceed (y/n)? ")
if ans == 'n':
_msg = "Nothing done."
log.info(_msg)
print(_msg)
return
t1 = time.time()
# Open up the database. This will create the tables necessary for the daily
# summaries if they don't already exist:
with weewx.manager.open_manager_with_config(config_dict, db_binding, initialize=True) as dbmanager:
log.info("Rebuilding daily summaries in database '%s' ..." % database_name)
print("Rebuilding daily summaries in database '%s' ..." % database_name)
if options.dry_run:
print("Dry run. Nothing done.")
return
else:
# now do the actual rebuild
nrecs, ndays = dbmanager.backfill_day_summary(start_d=from_d,
stop_d=to_d,
trans_days=20)
tdiff = time.time() - t1
# advise the user/log what we did
log.info("Rebuild of daily summaries in database '%s' complete" % database_name)
if nrecs:
sys.stdout.flush()
# fix a bit of formatting inconsistency if less than 1000 records
# processed
if nrecs >= 1000:
print()
if ndays == 1:
_msg = "Processed %d records to rebuild 1 daily summary in %.2f seconds" % (nrecs,
tdiff)
else:
_msg = ("Processed %d records to rebuild %d daily summaries in %.2f seconds" % (nrecs,
ndays,
tdiff))
print(_msg)
print("Rebuild of daily summaries in database '%s' complete" % database_name)
else:
print("Daily summaries up to date in '%s'" % database_name)
def reconfigMainDatabase(config_dict, db_binding):
"""Create a new database, then populate it with the contents of an old database"""
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
db_binding)
# Make a copy for the new database (we will be modifying it)
new_database_dict = dict(manager_dict['database_dict'])
# Now modify the database name
new_database_dict['database_name'] = manager_dict['database_dict']['database_name'] + '_new'
# First check and see if the new database already exists. If it does, check
# with the user whether it's ok to delete it.
try:
weedb.create(new_database_dict)
except weedb.DatabaseExists:
ans = y_or_n("New database '%s' already exists. "
"Delete it first (y/n)? " % new_database_dict['database_name'])
if ans == 'y':
weedb.drop(new_database_dict)
else:
print("Nothing done.")
return
# Get the unit system of the old archive:
with weewx.manager.Manager.open(manager_dict['database_dict']) as old_dbmanager:
old_unit_system = old_dbmanager.std_unit_system
if old_unit_system is None:
print("Old database has not been initialized. Nothing to be done.")
return
# Get the unit system of the new archive:
try:
target_unit_nickname = config_dict['StdConvert']['target_unit']
except KeyError:
target_unit_system = None
else:
target_unit_system = weewx.units.unit_constants[target_unit_nickname.upper()]
print("Copying database '%s' to '%s'" % (manager_dict['database_dict']['database_name'],
new_database_dict['database_name']))
if target_unit_system is None or old_unit_system == target_unit_system:
print("The new database will use the same unit system as the old ('%s')." %
weewx.units.unit_nicknames[old_unit_system])
else:
print("Units will be converted from the '%s' system to the '%s' system." %
(weewx.units.unit_nicknames[old_unit_system],
weewx.units.unit_nicknames[target_unit_system]))
ans = y_or_n("Are you sure you wish to proceed (y/n)? ")
if ans == 'y':
t1 = time.time()
weewx.manager.reconfig(manager_dict['database_dict'],
new_database_dict,
new_unit_system=target_unit_system,
new_schema=manager_dict['schema'])
tdiff = time.time() - t1
print("Database '%s' copied to '%s' in %.2f seconds." % (manager_dict['database_dict']['database_name'],
new_database_dict['database_name'],
tdiff))
else:
print("Nothing done.")
def transferDatabase(config_dict, db_binding, options):
"""Transfer 'archive' data from one database to another"""
# do we have enough to go on, must have a dest binding
if not options.dest_binding:
print("Destination binding not specified. Nothing Done. Aborting.")
return
# get manager dict for our source binding
src_manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
db_binding)
# get manager dict for our dest binding
try:
dest_manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
options.dest_binding)
except weewx.UnknownBinding:
# if we can't find the binding display a message then return
print("Unknown destination binding '%s', confirm destination binding."
% options.dest_binding)
print("Nothing Done. Aborting.")
return
except weewx.UnknownDatabase:
# if we can't find the database display a message then return
print("Error accessing destination database, confirm destination binding and/or database.")
print("Nothing Done. Aborting.")
return
except (ValueError, AttributeError):
# maybe a schema issue
print("Error accessing destination database.")
print("Maybe the destination schema is incorrectly specified in binding '%s' in weewx.conf?"
% options.dest_binding)
print("Nothing Done. Aborting.")
return
except weewx.UnknownDatabaseType:
# maybe a [Databases] issue
print("Error accessing destination database.")
print("Maybe the destination database is incorrectly defined in weewx.conf?")
print("Nothing Done. Aborting.")
return
# get a manager for our source
with weewx.manager.Manager.open(src_manager_dict['database_dict']) as src_manager:
# get first and last timestamps from the source so we can count the
# records to transfer and display an appropriate message
first_ts = src_manager.firstGoodStamp()
last_ts = src_manager.lastGoodStamp()
if first_ts is not None and last_ts is not None:
# we have source records
num_recs = src_manager.getAggregate(TimeSpan(first_ts, last_ts),
'dateTime', 'count')[0]
else:
# we have no source records to transfer so abort with a message
print("No records found in source database '%s' for transfer." %
src_manager.database_name)
print("Nothing done. Aborting.")
exit()
if not options.dry_run: # is it a dry run ?
# not a dry run, actually do the transfer
ans = y_or_n("Transfer %s records from source database '%s' "
"to destination database '%s' (y/n)? "
% (num_recs, src_manager.database_name,
dest_manager_dict['database_dict']['database_name']))
if ans == 'y':
t1 = time.time()
# wrap in a try..except in case we have an error
try:
with weewx.manager.Manager.open_with_create(dest_manager_dict['database_dict'],
table_name=dest_manager_dict['table_name'],
schema=dest_manager_dict['schema']) as dest_manager:
sys.stdout.write("transferring, this may take a while.... ")
sys.stdout.flush()
# do the transfer, should be quick as it's done as a
# single transaction
dest_manager.addRecord(src_manager.genBatchRecords())
print("complete")
# get first and last timestamps from the dest so we can
# count the records transferred and display a message
first_ts = dest_manager.firstGoodStamp()
last_ts = dest_manager.lastGoodStamp()
tdiff = time.time() - t1
if first_ts is not None and last_ts is not None:
num_recs = dest_manager.getAggregate(TimeSpan(first_ts, last_ts),
'dateTime', 'count')[0]
print("%s records transferred from source database '%s' to" %
(num_recs, src_manager.database_name))
print("destination database '%s' in %.2f seconds." %
(dest_manager.database_name, tdiff))
else:
print("Error. No records were transferred from source database '%s' to "
"destination database '%s'." % (src_manager.database_name,
dest_manager.database_name))
except ImportError:
# Probably when trying to load db driver
print("Error accessing destination database '%s'." %
(dest_manager_dict['database_dict']['database_name'],))
print("Nothing done. Aborting.")
raise
except (OSError, weedb.OperationalError):
# probably a weewx.conf typo or MySQL db not created
print("Error accessing destination database '%s'."
% dest_manager_dict['database_dict']['database_name'])
print("Maybe it does not exist (MySQL) or is incorrectly defined in weewx.conf?")
print("Nothing done. Aborting.")
return
else:
# we decided not to do the transfer
print("Nothing done.")
return
else:
# it's a dry run so say what we would have done then return
print("Transfer %s records from source database '%s' "
"to destination database '%s'."
% (num_recs, src_manager.database_name,
dest_manager_dict['database_dict']['database_name']))
print("Dry run, nothing done.")
def check(config_dict, db_binding, options):
"""Check database and report outstanding fixes/issues.
Performs the following checks:
- checks database version
- checks for null strings in SQLite database
"""
t1 = time.time()
# Check interval weighting
print("Checking daily summary tables version...")
# Get a database manager object
dbm = weewx.manager.open_manager_with_config(config_dict, db_binding)
# check the daily summary version
_daily_summary_version = dbm._read_metadata('Version')
msg = "Daily summary tables are at version %s" % _daily_summary_version
log.info(msg)
print(msg)
if _daily_summary_version is not None and _daily_summary_version >= '2.0':
# interval weighting fix has been applied
msg = "Interval Weighting Fix is not required."
log.info(msg)
print(msg)
else:
print("Recommend running --update to recalculate interval weightings.")
print("Daily summary tables version check completed in %0.2f seconds." % (time.time() - t1))
# now check for null strings
check_strings(config_dict, db_binding, options, fix=False)
def update(config_dict, db_binding, options):
"""Apply any required database fixes.
Applies the following fixes:
- checks if database version is 2.0, if not interval weighting fix is
applied
- recalculates windSpeed daily summary max and maxtime fields from
archive
"""
# prompt for confirmation if it is not a dry run
ans = 'y' if options.dry_run else None
while ans not in ['y', 'n']:
ans = input("The update process does not affect archive data, but does alter the database.\nContinue (y/n)? ")
if ans == 'n':
# we decided not to update the summary tables
msg = "Update cancelled"
log.info(msg)
print(msg)
return
if options.dry_run:
logging.disable(logging.INFO)
msg = "Preparing Interval Weighting Fix..."
log.info(msg)
print(msg)
# notify if this is a dry run
if options.dry_run:
print("This is a dry run: weighted intervals will be calculated but not saved.")
# Get a database manager object
dbm = weewx.manager.open_manager_with_config(config_dict, db_binding)
# Interval weighting
# first construct an interval weighting config dict
weighting_config_dict = {'name': 'Interval Weighting Fix',
'binding': db_binding,
'trans_days': 100,
'dry_run': options.dry_run}
# create an interval weighting fix object
weight_obj = weecfg.database.IntervalWeighting(config_dict,
weighting_config_dict)
# check the daily summary version
_daily_summary_version = dbm._read_metadata('Version')
msg = "Daily summary tables are at version %s" % _daily_summary_version
log.info(msg)
print(msg)
if _daily_summary_version is not None and _daily_summary_version >= '2.0':
# interval weighting fix has been applied
msg = "Interval Weighting Fix is not required."
log.info(msg)
print(msg)
else:
# apply the interval weighting
msg = "Calculating interval weights..."
log.info(msg)
print(msg)
t1 = time.time()
weight_obj.run()
msg = "Interval Weighting Fix completed in %0.2f seconds." % (time.time() - t1)
log.info(msg)
print(msg)
# recalc the max/maxtime windSpeed values
_fix_wind(config_dict, db_binding, options)
# just in case, set the syslog level back where we found it
if options.dry_run:
logging.disable(logging.NOTSET)
def calc_missing(config_dict, db_binding, options):
"""Calculate any missing derived observations and save to database."""
msg = "Preparing to calculate missing derived observations..."
log.info(msg)
manager_dict = weewx.manager.get_manager_dict_from_config(config_dict,
db_binding)
# get the first and last good timestamps from the archive, these represent
# our overall bounds for calculating missing derived obs
with weewx.manager.Manager.open(manager_dict['database_dict']) as dbmanager:
first_ts = dbmanager.firstGoodStamp()
last_ts = dbmanager.lastGoodStamp()
# process any command line options that may limit the period over which
# missing derived obs are calculated
start_dt, stop_dt = _parse_dates(options)
# we now have a start and stop date for processing, we need to obtain those
# as epoch timestamps, if we have no start and/or stop date then use the
# first or last good timestamp instead
start_ts = time.mktime(start_dt.timetuple()) if start_dt is not None else first_ts - 1
stop_ts = time.mktime(stop_dt.timetuple()) if stop_dt is not None else last_ts
# notify if this is a dry run
if options.dry_run:
msg = "This is a dry run, missing derived observations will be calculated but not saved"
log.info(msg)
print(msg)
_head = "Missing derived observations will be calculated "
# advise the user/log what we will do
if start_dt is None and stop_dt is None:
_tail = "for all records."
elif start_dt and not stop_dt:
_tail = "from %s through to the end (%s)." % (timestamp_to_string(start_ts),
timestamp_to_string(stop_ts))
elif not start_dt and stop_dt:
_tail = "from the beginning (%s) through to %s." % (timestamp_to_string(start_ts),
timestamp_to_string(stop_ts))
else:
_tail = "from %s through to %s inclusive." % (timestamp_to_string(start_ts),
timestamp_to_string(stop_ts))
_msg = "%s%s" % (_head, _tail)
log.info(_msg)
print(_msg)
ans = y_or_n("Proceed (y/n)? ")
if ans == 'n':
_msg = "Nothing done."
log.info(_msg)
print(_msg)
return
t1 = time.time()
# construct a CalcMissing config dict
calc_missing_config_dict = {'name': 'Calculate Missing Derived Observations',
'binding': db_binding,
'start_ts': start_ts,
'stop_ts': stop_ts,
'trans_days': 20,
'dry_run': options.dry_run}
# obtain a CalcMissing object
calc_missing_obj = weecfg.database.CalcMissing(config_dict,
calc_missing_config_dict)
msg = "Calculating missing derived observations..."
log.info(msg)
print(msg)
# calculate and store any missing observations
calc_missing_obj.run()
msg = "Missing derived observations calculated in %0.2f seconds" % (time.time() - t1)
log.info(msg)
print(msg)
def _fix_wind(config_dict, db_binding, options):
"""Recalculate the windSpeed daily summary max and maxtime fields.
Create a WindSpeedRecalculation object and call its run() method to
recalculate the max and maxtime fields from archive data. This process is
idempotent so it can be called repeatedly with no ill effect.
"""
t1 = time.time()
msg = "Preparing Maximum windSpeed Fix..."
log.info(msg)
print(msg)
# notify if this is a dry run
if options.dry_run:
print("This is a dry run: maximum windSpeed will be calculated but not saved.")
# construct a windSpeed recalculation config dict
wind_config_dict = {'name': 'Maximum windSpeed Fix',
'binding': db_binding,
'trans_days': 100,
'dry_run': options.dry_run}
# create a windSpeedRecalculation object
wind_obj = weecfg.database.WindSpeedRecalculation(config_dict,
wind_config_dict)
# perform the recalculation, wrap in a try..except to catch any db errors
try:
wind_obj.run()
except weedb.NoTableError as e:
msg = "Maximum windSpeed Fix applied: no windSpeed found"
log.info(msg)
print(msg)
else:
msg = "Maximum windSpeed Fix completed in %0.2f seconds" % (time.time() - t1)
log.info(msg)
print(msg)
# These functions are necessary because Python 3 does not allow you to
# parameterize types. So, we use a big if-else.
def check_type(val, expected):
if expected == 'INTEGER':
return isinstance(val, six.integer_types)
elif expected == 'REAL':
return isinstance(val, float)
elif expected == 'STR':
return isinstance(val, six.string_types)
else:
raise ValueError("Unknown type %s" % expected)
def set_type(val, target):
if target == 'INTEGER':
return int(val)
elif target == 'REAL':
return float(val)
elif target == 'STR':
return six.ensure_str(val)
else:
raise ValueError("Unknown type %s" % target)
def check_strings(config_dict, db_binding, options, fix=False):
"""Scan the archive table for null strings.
Identifies and lists any null string occurrences in the archive table. If
fix is True then any null strings that are found are fixed.
"""
t1 = time.time()
if options.dry_run or not fix:
logging.disable(logging.INFO)
print("Preparing Null String Fix, this may take a while...")
if fix:
log.info("Preparing Null String Fix")
# notify if this is a dry run
if options.dry_run:
print("This is a dry run: null strings will be detected but not fixed")
# open up the main database archive table
with weewx.manager.open_manager_with_config(config_dict, db_binding) as dbmanager:
obs_list = []
obs_type_list = []
# get the schema and extract the Python type each observation type should be
for column in dbmanager.connection.genSchemaOf('archive'):
# Save the observation name for this column (eg, 'outTemp'):
obs_list.append(column[1])
# And its type
obs_type_list.append(column[2])
records = 0
_found = []
# cycle through each row in the database
for record in dbmanager.genBatchRows():
records += 1
# now examine each column
for icol in range(len(record)):
# check to see if this column is an instance of the correct
# Python type
if record[icol] is not None and not check_type(record[icol], obs_type_list[icol]):
# Oops. Found a bad one. Print it out.
if fix:
log.info("Timestamp = %s; record['%s']= %r; ... "
% (record[0], obs_list[icol], record[icol]))
if fix:
# coerce to the correct type. If it can't be done, then
# set it to None.
try:
corrected_value = set_type(record[icol], obs_type_list[icol])
except ValueError:
corrected_value = None
# update the database with the new value but only if
# it's not a dry run
if not options.dry_run:
dbmanager.updateValue(record[0], obs_list[icol], corrected_value)
_found.append((record[0], obs_list[icol], record[icol], corrected_value))
# log it
log.info(" changed to %r\n" % corrected_value)
else:
_found.append((record[0], obs_list[icol], record[icol]))
# notify the user of progress
if records % 1000 == 0:
print("Checking record: %d; Timestamp: %s\r"
% (records, timestamp_to_string(record[0])), end=' ')
sys.stdout.flush()
# update our final count now that we have finished
print("Checking record: %d; Timestamp: %s\r" % (records, timestamp_to_string(record[0])), end=' ')
print()
tdiff = time.time() - t1
# now display details of what we found if we found any null strings
if len(_found):
print("The following null strings were found:")
for item in _found:
if len(item) == 4:
print("Timestamp = %s; record['%s'] = %r; ... changed to %r" % item)
else:
print("Timestamp = %s; record['%s'] = %r; ... ignored" % item)
# how many did we fix?
fixed = len([a for a in _found if len(a) == 4])
# summarise our results
if len(_found) == 0:
# found no null strings, log it and display on screen
log.info("No null strings found.")
print("No null strings found.")
elif fixed == len(_found):
# fixed all we found
if options.dry_run:
# its a dry run so display to screen but not to log
print("%d of %d null strings found would have been fixed." % (fixed, len(_found)))
else:
# really did fix so log and display to screen
log.info("%d of %d null strings found were fixed." % (fixed, len(_found)))
print("%d of %d null strings found were fixed." % (fixed, len(_found)))
elif fix:
# this should never occur - found some but didn't fix them all when we
# should have
if options.dry_run:
# its a dry run so say what would have happened
print("Could not fix all null strings. "
"%d of %d null strings found would have been fixed." % (fixed,
len(_found)))
else:
# really did fix so log and display to screen
log.info("Could not fix all null strings. "
"%d of %d null strings found were fixed." % (fixed,
len(_found)))
print("Could not fix all null strings. "
"%d of %d null strings found were fixed." % (fixed,
len(_found)))
else:
# found some null string but it was only a check not a fix, just
# display to screen
print("%d null strings were found.\r\n"
"Recommend running --fix-strings to fix these strings." % len(_found))
# and finally details on time taken
if fix:
log.info("Applied Null String Fix in %0.2f seconds." % tdiff)
print("Applied Null String Fix in %0.2f seconds." % tdiff)
else:
# it was a check not a fix so just display to screen
print("Completed Null String Check in %0.2f seconds." % tdiff)
# just in case, set the syslog level back where we found it
if options.dry_run or not fix:
logging.disable(logging.NOTSET)
def _parse_dates(options):
"""Parse --date, --from and --to command line options.
Parses --date or --from and --to to determine a date-time span to be
used. --to and --from in the format y-m-dTHH:MM precisely define a
date-time but anything in the format y-m-d does not. When rebuilding
the daily summaries this imprecision is not import as we merely need a
date-time somewhere within the day being rebuilt. When calculating
missing fields we need date-times for the span over which the
calculations are to be performed.
Inputs:
options: the optparse options
Returns: A two-way tuple (from_dt, to_dt) representing the from and to
date-times derived from the --date or --to and --from command line
options where
from_dt: A datetime.datetime object holding the from date-time. May
be None
to_dt: A datetime.datetime object holding the to date-time. May be
None
"""
# default is None, unless user has specified an option
_from_dt = None
_to_dt = None
# first look for --date
if options.date:
# we have a --date option, make sure we are not over specified
if options.from_date or options.to_date:
raise ValueError("Specify either --date or a --from and --to combination; not both")
# there is a --date but is it valid
try:
# this will give a datetime object representing midnight at the
# start of the day
_from_dt = datetime.datetime.strptime(options.date, "%Y-%m-%d")
except ValueError:
raise ValueError("Invalid --date option specified.")
else:
# we have the from date-time, for a --date option our final results
# depend on the action we are to perform
if options.rebuild_daily:
# The daily summaries are stamped with the midnight timestamp
# for each day, so our from and to results need to be within the
# same calendar day else we will rebuild more than just one day.
# For simplicity make them the same.
_to_dt = _from_dt
elif options.calc_missing:
# On the other hand calc missing will be dealing with archive
# records which are epoch timestamped. The midnight stamped
# record is part of the previous day so make our from result
# one second after midnight. THe to result must be midnight at
# the end of the day.
_to_dt = _from_dt + datetime.timedelta(days=1)
_from_dt = _from_dt + datetime.timedelta(seconds=1)
else:
# nothing else uses from and to (yet) but just in case return
# midnight to midnight as the default
_to_dt = _from_dt + datetime.timedelta(days=1)
# we have our results so we can return
return _from_dt, _to_dt
# we don't have --date so now look for --from and --to
if options.from_date:
# we have a --from but is it valid
try:
if 'T' in options.from_date:
# we have a time so we can precisely determine a date-time
_from_dt = datetime.datetime.strptime(options.from_date, "%Y-%m-%dT%H:%M")
else:
# we have a date only, so use midnight at the start of the day
_from_dt = datetime.datetime.strptime(options.from_date, "%Y-%m-%d")
except ValueError:
raise ValueError("Invalid --from option specified.")
if options.to_date:
# we have a --to but is it valid
try:
if 'T' in options.to_date:
# we have a time so decode and use that
_to_dt = datetime.datetime.strptime(options.to_date, "%Y-%m-%dT%H:%M")
else:
# we have a date, first obtain a datetime object for midnight
# at the start of the day specified
_to_dt = datetime.datetime.strptime(options.to_date, "%Y-%m-%d")
# since we have a date the result we want depends on what action
# we are to complete
if options.rebuild_daily:
# for a rebuild the to date-time must be within the date
# specified date, which it already is so leave it
pass
elif options.calc_missing:
# for calc missing we want midnight at the end of the day
_to_dt = _to_dt + datetime.timedelta(days=1)
else:
# nothing else uses from and to (yet) but just in case
# return midnight at the end of the day
_to_dt = _to_dt + datetime.timedelta(days=1)
except ValueError:
raise ValueError("Invalid --to option specified.")
# if we have both from and to date-times make sure from is no later than to
if _from_dt and _to_dt and _to_dt < _from_dt:
raise weewx.ViolatedPrecondition("--from value is later than --to value.")
# we have our results so we can return
return _from_dt, _to_dt
if __name__ == "__main__":
main()