Added an option --string-check and --fix to the utility wee_config_database to fix embedded strings found in the sqlite archive database.

Documented it.
This commit is contained in:
Tom Keffer
2014-01-30 18:37:02 +00:00
parent 8293bd2f59
commit d96c6a2b53
5 changed files with 132 additions and 13 deletions

View File

@@ -13,6 +13,7 @@ from __future__ import with_statement
import optparse
import syslog
import sys
import user.extensions #@UnusedImport
import weedb
@@ -30,7 +31,7 @@ add or drop data types from the database schema or change unit systems."""
usage="""%prog: [config_path] [--help]
[--create-database] [--create-stats]
[--reconfigure] [--backfill-stats]
[--string-check]"""
[--string-check] [--fix]"""
epilog="""If you are using the MySQL database it is assumed that you have the
appropriate permissions for the requested operation."""
@@ -59,7 +60,9 @@ def main():
parser.add_option("--backfill-stats", dest="backfill_stats", action='store_true',
help="Backfill the statistical database using the archive database")
parser.add_option("--string-check", dest="string_check", action="store_true",
help="Check the archive database for strings in it.")
help="Check a sqlite version of the archive database for embedded strings in it.")
parser.add_option("--fix", dest="fix", action="store_true",
help="If a string is found, fix it.")
# Now we are ready to parse the command line:
(options, args) = parser.parse_args()
@@ -79,7 +82,7 @@ def main():
backfillStatsDatabase(config_dict)
if options.string_check:
string_check(config_dict)
string_check(config_dict, options.fix)
def createMainDatabase(config_dict):
"""Create the main weewx archive database"""
@@ -187,7 +190,7 @@ def backfillStatsDatabase(config_dict):
print "Backfilled %d records from the archive database '%s' into the statistical database '%s'" % (nrecs, archive.database, statsDb.database)
def string_check(config_dict):
def string_check(config_dict, fix=False):
print "Checking archive database for strings..."
archive_db = config_dict['StdArchive']['archive_database']
archive_db_dict = config_dict['Databases'][archive_db]
@@ -195,23 +198,44 @@ def string_check(config_dict):
# Open up the main database archive
with weewx.archive.Archive.open(archive_db_dict) as archive:
obs_type_list = []
obs_pytype_list = []
obs_list = []
# Get the schema and extract the Python type each observation type should be
for column in archive.connection.genSchemaOf('archive'):
schema_type = column[2]
if column[2] == 'INTEGER':
if schema_type == 'INTEGER':
schema_type = int
elif column[2] == 'REAL':
elif schema_type == 'REAL':
schema_type = float
elif column[2] == 'STR':
elif schema_type == 'STR':
schema_type = str
# Save the observation type for this column (eg, 'outTemp'):
obs_list.append(column[1])
obs_type_list.append(schema_type)
# Save the Python type for this column (eg, 'int'):
obs_pytype_list.append(schema_type)
# Cycle through each row in the database
for record in archive.genBatchRows():
# Now examine each column
for icol in range(len(record)):
if record[icol] is not None and not isinstance(record[icol], obs_type_list[icol]):
print weeutil.weeutil.timestamp_to_string(record['dateTime']), obs_list[icol], "; value=", record[icol]
# Check to see if this column is an instance of the correct Python type
if record[icol] is not None and not isinstance(record[icol], obs_pytype_list[icol]):
# Oops. Found a bad one. Print it out
sys.stdout.write("Timestamp = %s; record['%s']= %r; ... " % (record[0], obs_list[icol], record[icol]))
if fix:
# Cooerce to the correct type. If it can't be done, then set it to None
try:
corrected_value = obs_pytype_list[icol](record[icol])
except ValueError:
corrected_value = None
# Update the database with the new value
archive.updateValue(record[0], obs_list[icol], corrected_value)
# Inform the user
sys.stdout.write("changed to %r\n" % corrected_value)
else:
sys.stdout.write("ignored.\n")
if __name__=="__main__" :
main()

View File

@@ -9,6 +9,7 @@
#
"""Driver for sqlite"""
from __future__ import with_statement
import os.path
# Import sqlite3. If it does not support the 'with' statement, then
@@ -78,6 +79,13 @@ class Connection(weedb.Connection):
"""Return a cursor object."""
return Cursor(self.connection)
def execute(self, sql_string, sql_tuple=() ):
"""Execute a sql statement. This specialized version takes advantage
of sqlite's ability to do an execute without a cursor."""
with self.connection:
self.connection.execute(sql_string, sql_tuple)
def tables(self):
"""Returns a list of tables in the database."""

View File

@@ -254,6 +254,11 @@ class Archive(object):
finally:
_cursor.close()
def updateValue(self, timestamp, obs_type, new_value):
"""Update (replace) a single value in the database."""
self.connection.execute("UPDATE %s SET %s=? WHERE dateTime=?" % (self.table, obs_type), (new_value, timestamp))
def getSql(self, sql, sqlargs=()):
"""Executes an arbitrary SQL statement on the database.

View File

@@ -36,6 +36,9 @@ When QC rejects values it now logs the rejection.
Introduced a new unit system, METRICWX. Similar to METRIC, it uses
mm for rain, mm/hr for rain rate, and m/s for speed.
Added an option --string-check and --fix to the utility wee_config_database
to fix embedded strings found in the sqlite archive database.
Font handles are now cached in order to work around a memory leak in PIL.
Image margins now scale with image and font sizes.

View File

@@ -2439,6 +2439,7 @@ class MyAlarm(StdService):
low-battery alarm (<span class="code">lowBattery.py</span>), which is
similar, except that it intercepts LOOP events (instead of
archiving events). </p>
<h1 id ="archive_database">Customizing the archive database</h1>
<p>For most users the default database will work just fine. It has the
added advantage of being compatible with the wview database.
@@ -2446,9 +2447,44 @@ class MyAlarm(StdService):
type to your database, or change its unit system. This section shows you
how to do this, using the utility <span class="symcode">$BIN_ROOT</span><span
class="code">/wee_config_database</span>.</p>
<p>This utility also has the ability to check a sqlite version of the archive
database for embedded strings (where a float is expected).<p>
<p>Before starting, it's worth running the utility with the <span class="code">--help</span>
flag to see how it is used:</p>
<pre class="tty"><span class="symcode">$BIN_ROOT</span>/wee_config_database --help</pre>
<p>This will result in an output that looks something like this:</p>
<pre class="tty">
Usage: wee_config_database: [config_path] [--help]
[--create-database] [--create-stats]
[--reconfigure] [--backfill-stats]
[--string-check] [--fix]
Configure the weewx databases. Most of these functions are handled
automatically by weewx, but they may be useful as a utility in special cases.
In particular, the 'reconfigure' option can be useful if you decide to add or
drop data types from the database schema or change unit systems.
Options:
-h, --help show this help message and exit
--config=FILE use configuration file FILE
--create-archive Create the archive database.
--create-stats Create the statistical database.
--reconfigure Create a new archive database using configuration
information found in the configuration file. In
particular, the new database will use the unit system
found in option [StdConvert][target_unit]. It will use the
schema found in './bin/user/schemas.py'. The new database
will have the same name as the old database, with a '_new'
on the end.
--backfill-stats Backfill the statistical database using the archive
database
--string-check Check a sqlite version of the archive database for
embedded strings in it.
--fix If a string is found, fix it.
If you are using the MySQL database it is assumed that you have the
appropriate permissions for the requested operation.</pre>
<h2>Adding a new observation type</h2>
<p>Suppose you have installed an electric meter at your house and you wish
to correlate electrical usage with the weather. The meter has some sort
@@ -2622,6 +2658,7 @@ mv weewx.sdb_new weewx.sdb</pre>
</tr>
</tbody>
</table>
<h2 id="Changing_the_unit_system">Changing the unit system</h2>
<p>Normally, data is stored in the databases using US Customary units and,
normally, you don't care --- data can always be displayed using any
@@ -2679,6 +2716,48 @@ class="symcode">$CONFIG_ROOT</span>/weewx.conf</pre>
<p>5. <strong>Recreate the stats database.</strong> Delete the stats
database, then let <span class="code">weewx</span> regenerate it. It
will use the new unit system.</p>
<h2>Checking for embedded strings</h2>
<p>If you edit your sqlite archive database using an editing tool, occasionally
strings will get embedded in it, causing weewx to raise an exception. <strong>
This is only a problem with sqlite</strong>. There is no analogous problem
with MySQL databases. The symptom will look something like this:</p>
<pre class='tty'>
Dec 31 16:55:09 arm weewx[18141]: wxengine: Record generation will be attempted in 'hardware'
Dec 31 16:55:09 arm weewx[18141]: wxengine: Using archive database: archive_sqlite
Dec 31 16:55:10 arm weewx[18141]: stats: Created schema for statistical database
Dec 31 17:01:06 arm weewx[18141]: wxengine: Caught unrecoverable exception in wxengine:
Dec 31 17:01:06 arm weewx[18141]: **** unsupported operand type(s) for +=: 'float' and 'unicode'
Dec 31 17:01:06 arm weewx[18141]: **** Traceback (most recent call last):
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/wxengine.py", line 886, in main
Dec 31 17:01:06 arm weewx[18141]: **** engine = EngineClass(config_dict)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/wxengine.py", line 70, in __init__
Dec 31 17:01:06 arm weewx[18141]: **** self.loadServices(config_dict)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/wxengine.py", line 124, in loadServices
Dec 31 17:01:06 arm weewx[18141]: **** self.service_obj.append(weeutil.weeutil._get_object(svc)(self, config_dict))
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/wxengine.py", line 432, in __init__
Dec 31 17:01:06 arm weewx[18141]: **** self.setupStatsDatabase(config_dict)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/wxengine.py", line 543, in setupStatsDatabase
Dec 31 17:01:06 arm weewx[18141]: **** self.statsDb.backfillFrom(self.archive)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/stats.py", line 461, in backfillFrom
Dec 31 17:01:06 arm weewx[18141]: **** _statsDict.addRecord(_rec)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/accum.py", line 305, in addRecord
Dec 31 17:01:06 arm weewx[18141]: **** self._add_value(record[obs_type], obs_type, record['dateTime'], add_hilo)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/accum.py", line 264, in _add_value
Dec 31 17:01:06 arm weewx[18141]: **** self[obs_type].addSum(val)
Dec 31 17:01:06 arm weewx[18141]: **** File "/usr/share/weewx/weewx/accum.py", line 81, in addSum
Dec 31 17:01:06 arm weewx[18141]: **** self.sum += val
Dec 31 17:01:06 arm weewx[18141]: **** TypeError: unsupported operand type(s) for +=: 'float' and 'unicode'
Dec 31 17:01:06 arm weewx[18141]: **** Exiting.</pre>
<p>The problem is that a unicode null string <span class='code'>u''</span>
got entered where a <span class='code'>NULL</span> should be. The
utility <span class='code'>wee_config_database</span> can fix this.
Run it with the option <span class='code'>--string-check</span> to
search for these embedded strings. Add the option <span class='code'>--fix</span>
to have the utility fix them:</p>
<pre class="tty"><span class="symcode">$BIN_ROOT</span>/wee_config_database <span
class="symcode">$CONFIG_ROOT</span>/weewx.conf --string-check --fix</pre>
<h1 id="porting">Porting to new weather station hardware</h1>
<p>Naturally, this is an advanced topic but, nevertheless, I'd really
like to encourage any Python wizards out there to give it a try. Of