Added a check for stray strings in the archive database

2026-04-26 04:27:16 -04:00 · 2014-01-30 16:23:50 +00:00
parent c2e15e16f9
commit ff26165b45
3 changed files with 56 additions and 8 deletions
--- a/TODO.txt
+++ b/TODO.txt
@@ -7,3 +7,5 @@ trailing comma in the services lists after upgrade
 add documentation for each extension and the extension installer.

 add upgrading notes for extensions from 2.5 to 2.6
+
+Add --fix option to wee_config_database
--- a/bin/wee_config_database
+++ b/bin/wee_config_database
@@ -29,7 +29,8 @@ add or drop data types from the database schema or change unit systems."""
 
 usage="""%prog: [config_path] [--help]
                              [--create-database] [--create-stats]
-                              [--reconfigure] [--backfill-stats] """
+                              [--reconfigure] [--backfill-stats] 
+                              [--string-check]"""

 epilog="""If you are using the MySQL database it is assumed that you have the
 appropriate permissions for the requested operation."""
@@ -57,6 +58,8 @@ def main():
                          """The new database will have the same name as the old database, with a '_new' on the end.""")
    parser.add_option("--backfill-stats", dest="backfill_stats", action='store_true',
                      help="Backfill the statistical database using the archive database")
+    parser.add_option("--string-check", dest="string_check", action="store_true",
+                      help="Check the archive database for strings in it.")

    # Now we are ready to parse the command line:
    (options, args) = parser.parse_args()
@@ -74,6 +77,9 @@ def main():

    if options.backfill_stats:
        backfillStatsDatabase(config_dict)
+        
+    if options.string_check:
+        string_check(config_dict)

 def createMainDatabase(config_dict):
    """Create the main weewx archive database"""
@@ -181,5 +187,31 @@ def backfillStatsDatabase(config_dict):

    print "Backfilled %d records from the archive database '%s' into the statistical database '%s'" % (nrecs, archive.database, statsDb.database)
    
+def string_check(config_dict):
+    print "Checking archive database for strings..."
+    archive_db = config_dict['StdArchive']['archive_database']
+    archive_db_dict = config_dict['Databases'][archive_db]
+
+    # Open up the main database archive
+    with weewx.archive.Archive.open(archive_db_dict) as archive:
+        
+        obs_type_list = []
+        obs_list = []
+        for column in archive.connection.genSchemaOf('archive'):
+            schema_type = column[2]
+            if column[2] == 'INTEGER':
+                schema_type = int
+            elif column[2] == 'REAL':
+                schema_type = float
+            elif column[2] == 'STR':
+                schema_type = str
+            obs_list.append(column[1])
+            obs_type_list.append(schema_type)
+            
+        for record in archive.genBatchRows():
+            for icol in range(len(record)):
+                if record[icol] is not None and not isinstance(record[icol], obs_type_list[icol]):
+                    print weeutil.weeutil.timestamp_to_string(record['dateTime']), obs_list[icol], "; value=", record[icol] 
+    
 if __name__=="__main__" :
    main()
--- a/bin/weewx/archive.py
+++ b/bin/weewx/archive.py
@@ -184,9 +184,9 @@ class Archive(object):
                except Exception, e:
                    syslog.syslog(syslog.LOG_ERR, "archive: unable to add archive record %s: %s" % (weeutil.weeutil.timestamp_to_string(record['dateTime']), e))

-    def genBatchRecords(self, startstamp=None, stopstamp=None):
-        """Generator function that yields records with timestamps within an
-        interval.
+    def genBatchRows(self, startstamp=None, stopstamp=None):
+        """Generator function that yields raw rows from the archive database
+        with timestamps within an interval.
        
        startstamp: Exclusive start of the interval in epoch time. If 'None',
        then start at earliest archive record.
@@ -194,9 +194,7 @@ class Archive(object):
        stopstamp: Inclusive end of the interval in epoch time. If 'None', then
        end at last archive record.
        
-        yields: A dictionary record for each database record within the time
-        interval """
-        
+        yields: A list with the data records"""
        _cursor = self.connection.cursor()
        try:
            if startstamp is None:
@@ -211,10 +209,26 @@ class Archive(object):
                    _gen = _cursor.execute("SELECT * FROM %s WHERE dateTime > ? AND dateTime <= ?" % (self.table,), (startstamp, stopstamp))
            
            for _row in _gen :
-                yield dict(zip(self.sqlkeys, _row)) if _row else None
+                yield _row
        finally:
            _cursor.close()
        
+    def genBatchRecords(self, startstamp=None, stopstamp=None):
+        """Generator function that yields records with timestamps within an
+        interval.
+        
+        startstamp: Exclusive start of the interval in epoch time. If 'None',
+        then start at earliest archive record.
+        
+        stopstamp: Inclusive end of the interval in epoch time. If 'None', then
+        end at last archive record.
+        
+        yields: A dictionary where key is the observation type (eg, 'outTemp')
+        and the value is the observation value"""
+        
+        for _row in self.genBatchRows(startstamp, stopstamp):            
+            yield dict(zip(self.sqlkeys, _row)) if _row else None
+        
    def getRecord(self, timestamp, max_delta=None):
        """Get a single archive record with a given epoch time stamp.