mirror of
https://github.com/weewx/weewx.git
synced 2026-04-19 00:56:54 -04:00
471 lines
20 KiB
Python
Executable File
471 lines
20 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#===============================================================================
|
|
# Copyright (c) 2009, 2010, 2011, 2012, 2016 Tom Keffer <tkeffer@gmail.com>
|
|
#
|
|
# This software may be used and redistributed under the
|
|
# terms of the GNU General Public License version 3.0
|
|
# or, at your option, any higher version.
|
|
#
|
|
# See the file LICENSE.txt for your full rights.
|
|
#
|
|
#===============================================================================
|
|
"""This utility fills in missing data on the Weather Underground. It
|
|
goes through all the records in a weewx archive file for a given
|
|
day, comparing to see whether a corresponding record exists on the
|
|
Weather Underground. If not, it will publish a new record on the
|
|
Weather Underground with the missing data.
|
|
|
|
CHANGE HISTORY
|
|
--------------------------------
|
|
1.1.0 10/11/16
|
|
Now uses restx API to publish the requests.
|
|
Standardised option syntax.
|
|
|
|
1.0.0 8/16/15
|
|
Published version.
|
|
|
|
1.0.0a1 2/28/15
|
|
Now uses weewx API allowing use with any database supported by weewx.
|
|
Now supports weewx databases using any weewx supported unit system (eg US,
|
|
METRIC and METRIXWX).
|
|
Database is no longer specified by file name rather path to weewx.conf and a
|
|
binding are specified.
|
|
Now posts wind speeds with 1 decimal place and barometer with 3 decimal places.
|
|
Now has option to log to syslog.
|
|
|
|
0.5.2 11/17/12
|
|
Adds radiation and UV to the types posted on WU.
|
|
|
|
0.5.1 11/05/12
|
|
Now assumes sqlite3 will be present. If not, it falls back to pysqlite2.
|
|
|
|
0.5.0 10/31/11
|
|
Fixed bug in fuzzy compares, which were introduced in V0.3.
|
|
Timestamps within an epsilon (default 120 seconds) of each other are
|
|
considered the same. Epsilon can be specified on the command line.
|
|
|
|
0.4.0 04/10/10
|
|
Now tries up to max_tries times to publish to the WU before giving up.
|
|
"""
|
|
|
|
import csv
|
|
import datetime
|
|
import optparse
|
|
import re
|
|
import socket
|
|
import sys
|
|
import syslog
|
|
import time
|
|
import urllib2
|
|
|
|
import weecfg
|
|
from weeutil.weeutil import timestamp_to_string
|
|
import weewx.manager
|
|
import weewx.restx
|
|
|
|
usagestr = """%prog CONFIG_FILE|--config=CONFIG_FILE
|
|
[--binding=BINDING]
|
|
[--station=STATION] [--password=PASSWORD]
|
|
[--date=YYYY-mm-dd] [--epsilon=SECONDS]
|
|
[--verbose] [--log LOG_FACILITY] [--test] [--query]
|
|
[--help]
|
|
|
|
This utility fills in missing data on the Weather Underground. It goes through
|
|
all the records in a weewx archive for a given day, comparing to see whether a
|
|
corresponding record exists on the Weather Underground. If not, it will publish
|
|
a new record on the Weather Underground with the missing data.
|
|
|
|
Be sure to use the --test switch first to see whether you like what it
|
|
proposes!"""
|
|
|
|
__version__ = "1.1.0"
|
|
|
|
# The number of seconds difference in the timestamp between two records
|
|
# and still have them considered to be the same:
|
|
epsilon = None
|
|
|
|
# Instance of our logger
|
|
wlog = None
|
|
|
|
socket.setdefaulttimeout(10.0)
|
|
|
|
def main() :
|
|
global epsilon, wlog
|
|
|
|
"""main program body for wunderfixer"""
|
|
|
|
parser = optparse.OptionParser(usage=usagestr)
|
|
parser.add_option("-c", "--config", type="string", dest="config", metavar="CONFIG_PATH",
|
|
help="Use configuration file CONFIG_PATH. "
|
|
"Default is /etc/weewx/weewx.conf or /home/weewx/weewx.conf.")
|
|
parser.add_option("-b", "--binding", type="string", dest="binding",
|
|
metavar="BINDING", default='wx_binding',
|
|
help="The database binding to be used. Default is 'wx_binding'.")
|
|
parser.add_option("-s", "--station", type="string", dest="station",
|
|
help="Weather Underground station to check. Optional. "
|
|
"Default is to take from configuration file.")
|
|
parser.add_option("-p", "--password", type="string", dest="password",
|
|
help="Weather Underground station password. Optional. "
|
|
"Default is to take from configuration file.")
|
|
parser.add_option("-d", "--date", type="string", dest="date", metavar="YYYY-mm-dd",
|
|
help="Date to check as a string of form YYYY-mm-dd. Default is today.")
|
|
parser.add_option("-e", "--epsilon", type="int", dest="epsilon", metavar="SECONDS",
|
|
default=120,
|
|
help="Timestamps within this value in seconds compare true. Default "
|
|
"is 120.")
|
|
parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
|
|
help="Print useful extra output.")
|
|
parser.add_option("-l", "--log", type="string", dest="logging", metavar="LOG_FACILITY",
|
|
help="Log selected output to syslog. If omitted no syslog logging occurs. "
|
|
"If LOG_FACILITY is 'weewx' then logs are written to the same log used by "
|
|
"weewx. Any other parameter will log to syslog.")
|
|
parser.add_option("-t", "--test", action="store_true", dest="simulate",
|
|
help="Test what would happen, but don't do anything.")
|
|
parser.add_option("-q", "--query", action="store_true", dest="query",
|
|
help="For each record, query the user before making a change.")
|
|
|
|
(options, args) = parser.parse_args()
|
|
|
|
# Set up our syslog
|
|
wlog = WunderLog(options.logging, options.verbose)
|
|
|
|
# get our config file
|
|
config_fn, config_dict = weecfg.read_config(options.config, args)
|
|
print "Using configuration file %s." % config_fn
|
|
wlog.slog(syslog.LOG_INFO, "Using weewx configuration file %s." % config_fn)
|
|
|
|
# Retrieve the station ID and password from the config file
|
|
try:
|
|
if not options.station:
|
|
options.station = config_dict['StdRESTful']['Wunderground']['station']
|
|
if not options.password and not options.simulate:
|
|
options.password = config_dict['StdRESTful']['Wunderground']['password']
|
|
except KeyError:
|
|
wlog.slog(syslog.LOG_ERR, "Missing Wunderground station and/or password")
|
|
exit("Missing Wunderground station and/or password")
|
|
|
|
# exit if any essential arguments are not present
|
|
if not options.station or (not options.password and not options.simulate):
|
|
print "Missing argument(s).\n"
|
|
print parser.parse_args(["--help"])
|
|
wlog.slog(syslog.LOG_ERR, "Missing argument(s). Wunderfixer exiting.")
|
|
exit()
|
|
|
|
# get our binding and database and say what we are using
|
|
db_binding = options.binding
|
|
database = config_dict['DataBindings'][db_binding]['database']
|
|
print "Using database binding '%s', which is bound to database '%s'" % (db_binding, database)
|
|
wlog.slog(syslog.LOG_INFO, "Using database binding '%s', which is bound to database '%s'" % (db_binding, database))
|
|
|
|
# get the manager object for our db_binding
|
|
dbmanager_t = weewx.manager.open_manager_with_config(config_dict, db_binding)
|
|
|
|
_ans = 'y'
|
|
if options.simulate:
|
|
options.query = False
|
|
_ans = 'n'
|
|
|
|
if options.query:
|
|
options.verbose = True;
|
|
|
|
if options.date:
|
|
date_tt = time.strptime(options.date, "%Y-%m-%d")
|
|
date_date = datetime.date(date_tt[0], date_tt[1], date_tt[2])
|
|
else:
|
|
# If no date option was specified on the command line, use today's date:
|
|
date_date = datetime.date.today()
|
|
|
|
epsilon = options.epsilon
|
|
|
|
if options.verbose:
|
|
print "Weather Underground Station: ", options.station
|
|
print "Date to check: ", date_date
|
|
wlog.slog(syslog.LOG_INFO, "Checking Weather Underground station '%s' data for date %s" % (options.station, date_date))
|
|
|
|
|
|
# Get all the time stamps in the archive for the given day:
|
|
archive_results = getArchiveDayTimeStamps(dbmanager_t, date_date)
|
|
|
|
if options.verbose :
|
|
print "Number of archive records: ", len(archive_results)
|
|
|
|
# Get a WunderStation object so we can interact with Weather Underground
|
|
wunder = WunderStation(queue=None, # Bogus queue. We will not be using it.
|
|
manager_dict=dbmanager_t,
|
|
station=options.station,
|
|
password=options.password,
|
|
server_url=weewx.restx.StdWunderground.pws_url,
|
|
protocol_name = "wunderfixer",
|
|
softwaretype = "wunderfixer-%s" % __version__)
|
|
|
|
try:
|
|
# Get all the time stamps on the Weather Underground for the given day:
|
|
wunder_results = wunder.getDayTimeStamps(date_date)
|
|
except Exception:
|
|
wlog.slog(syslog.LOG_ERR, "Could not get Weather Underground data. Exiting.")
|
|
exit("Could not get Weather Underground data. Exiting.")
|
|
|
|
if options.verbose :
|
|
print "Number of WU records: ", len(wunder_results)
|
|
wlog.slog(syslog.LOG_DEBUG, "Found %d archive records and %d WU records" % (len(archive_results), len(wunder_results)))
|
|
|
|
#===========================================================================
|
|
# Unfortunately, the WU does not signal an error if you ask for a CSV file
|
|
# on a non-existent station. So, there's no way to tell the difference
|
|
# between asking for results from a non-existent station, versus a
|
|
# legitimate station that has no data for the given day. Warn the user, then
|
|
# proceed.
|
|
#===========================================================================
|
|
if len(wunder_results) == 0 :
|
|
sys.stdout.flush()
|
|
print >>sys.stderr, "\nNo results returned from Weather Underground (perhaps a bad station name??)."
|
|
print >>sys.stderr, "Publishing anyway."
|
|
wlog.slog(syslog.LOG_ERR, "No results returned from Weather Underground for station '%s'"
|
|
"(perhaps a bad station name??). Publishing anyway." % options.station)
|
|
|
|
# Look for any records missing in the WU list, then sort the results:
|
|
missing_records = sorted([x for x in archive_results if not x in wunder_results])
|
|
|
|
if options.verbose :
|
|
print "Number of missing records: ", len(missing_records)
|
|
if missing_records:
|
|
print "\nMissing records:"
|
|
wlog.slog(syslog.LOG_INFO, "%d Weather Underground records missing." % len(missing_records))
|
|
|
|
no_published = 0
|
|
# Loop through the missing time stamps:
|
|
for time_TS in missing_records:
|
|
ts = time_TS.ts
|
|
# Get the archive record for this timestamp:
|
|
record = dbmanager_t.getRecord(ts)
|
|
# Print it out:
|
|
print >>sys.stdout, print_record(record),
|
|
sys.stdout.flush()
|
|
|
|
# If this is an interactive session (option "-q") see if the
|
|
# user wants to change it:
|
|
if options.query :
|
|
_ans=raw_input("...fix? (y/n/a/q):")
|
|
if _ans == "q" :
|
|
print "Quitting."
|
|
wlog.slog(syslog.LOG_DEBUG, "... exiting")
|
|
exit()
|
|
if _ans == "a" :
|
|
_ans = "y"
|
|
options.query=False
|
|
|
|
if _ans=='y' :
|
|
try:
|
|
# Post the data to the WU:
|
|
wunder.process_record(record, dbmanager_t)
|
|
no_published += 1
|
|
print >>sys.stdout, " ...published."
|
|
wlog.slog(syslog.LOG_DEBUG, "%s ...published" % timestamp_to_string(record['dateTime']))
|
|
except weewx.restx.BadLogin, e:
|
|
print >>sys.stderr, "Bad login"
|
|
print >>sys.stderr, e
|
|
exit("Bad login")
|
|
except weewx.restx.FailedPost, e:
|
|
print >>sys.stderr, e
|
|
print >>sys.stderr, "Aborted."
|
|
wlog.slog(syslog.LOG_ERR, "%s ...error %s. Aborting." % (timestamp_to_string(record['dateTime']), e))
|
|
exit("Failed post")
|
|
except IOError, e:
|
|
print >>sys.stderr, " ... not published."
|
|
print "Reason: ", e
|
|
wlog.slog(syslog.LOG_ERR, "%s ...not published. Reason '%s'" % (timestamp_to_string(record['dateTime']), e))
|
|
if hasattr(e, 'reason'):
|
|
print >>sys.stderr, "Failed to reach server. Reason: %s" % e.reason
|
|
wlog.slog(syslog.LOG_ERR, "%s ...not published. Failed to reach server. Reason '%s'" %
|
|
(timestamp_to_string(record['dateTime']), e.reason))
|
|
if hasattr(e, 'code'):
|
|
print >>sys.stderr, "Failed to reach server. Error code: %s" % e.code
|
|
wlog.slog(syslog.LOG_ERR, "%s ...not published. Failed to reach server. Error code '%s'" %
|
|
(timestamp_to_string(record['dateTime']), e.code))
|
|
|
|
else :
|
|
print " ... skipped."
|
|
wlog.slog(syslog.LOG_DEBUG, "%s ...skipped" % timestamp_to_string(record['dateTime']))
|
|
wlog.slog(syslog.LOG_INFO, "%s out of %s missing records published to '%s' for date %s."
|
|
" Wunderfixer exiting." % (no_published, len(missing_records), options.station, date_date))
|
|
|
|
#===============================================================================
|
|
# class WunderStation
|
|
#===============================================================================
|
|
|
|
class WunderStation(weewx.restx.AmbientThread):
|
|
"""Class to interact with the Weather Underground."""
|
|
|
|
# match any HTML tag of the form <...>
|
|
_tags = re.compile(r'\<.*\>')
|
|
|
|
def getDayTimeStamps(self, dayRequested) :
|
|
"""Returns all time stamps for a given weather underground station for a given day
|
|
|
|
dayRequested: An instance of datetime.date with the requested date
|
|
|
|
return: a set containing the timestamps in epoch time
|
|
"""
|
|
dayRequested_tt = dayRequested.timetuple()
|
|
|
|
_url = "http://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=%s&"\
|
|
"month=%d&day=%d&year=%d&format=1" % (self.station, dayRequested_tt[1], dayRequested_tt[2], dayRequested_tt[0])
|
|
|
|
try :
|
|
# Hit the weather underground site:
|
|
_wudata = urllib2.urlopen(_url)
|
|
except urllib2.URLError, e :
|
|
print >>sys.stderr, "Unable to open Weather Underground station " + self.station, " or ", e
|
|
wlog.slog(syslog.LOG_ERR, "Unable to open Weather Underground station %s or %s" % (self.station, e))
|
|
raise
|
|
except socket.timeout, e:
|
|
print >>sys.stderr, "Socket timeout for Weather Underground station " + self.station
|
|
wlog.slog(syslog.LOG_ERR, "Socket timeout for Weather Underground station %s" % self.station)
|
|
raise
|
|
|
|
# Because the data comes back with lots of HTML tags and blank lines in it,
|
|
# we need a bit of logic to clean it up.
|
|
_cleanWUdata = []
|
|
for _row in _wudata :
|
|
_line = ''.join(WunderStation._tags.split(_row)) # Get rid of any HTML tags
|
|
if _line != "\n" : # Get rid of any blank lines
|
|
_cleanWUdata.append(_line) # Save what's left
|
|
|
|
# Now form a dictionary CSV reader, using the first line as the set of keys
|
|
_csvreader = csv.DictReader(_cleanWUdata)
|
|
|
|
# We are only interested in the time stamps. Decode them
|
|
# and return as a list
|
|
_timeStamps = []
|
|
for _row in _csvreader :
|
|
_datetm = time.strptime(_row["Time"], "%Y-%m-%d %H:%M:%S")
|
|
_time_t = int(time.mktime(_datetm))
|
|
# Add to timeStamps
|
|
_timeStamps.append(TimeStamp(_time_t))
|
|
|
|
return _timeStamps
|
|
|
|
#===============================================================================
|
|
# class TimeStamp
|
|
#===============================================================================
|
|
|
|
class TimeStamp(object):
|
|
"""This class represents a timestamp. It uses a 'fuzzy' compare.
|
|
That is, if the times are within epsilon seconds of each other, they compare true."""
|
|
|
|
def __init__(self, ts):
|
|
self.ts = ts
|
|
|
|
def __cmp__(self, other_ts):
|
|
if self.__eq__(other_ts):
|
|
return 0
|
|
return 1 if self.ts > other_ts.ts else -1
|
|
|
|
def __hash__(self):
|
|
return hash(self.ts)
|
|
|
|
def __eq__(self, other_ts):
|
|
return abs(self.ts - other_ts.ts) <= epsilon
|
|
|
|
def __str__(self):
|
|
return timestamp_to_string(self.ts)
|
|
|
|
#===============================================================================
|
|
# class WunderLog
|
|
#===============================================================================
|
|
|
|
class WunderLog(object):
|
|
""" This class provides a wrapper around the python syslog module to handle
|
|
wunderfixer logging requirements.
|
|
"""
|
|
|
|
def __init__(self, log_facy, verbose):
|
|
"""Initialise our syslog environment."""
|
|
|
|
# flag to indicate whether we are logging to file or not
|
|
# if we have a file name then log, otherwise don't
|
|
self.log = False if log_facy is None else True
|
|
# syslog log facility to be used
|
|
self.log_facy = log_facy
|
|
# if we are logging then setup our syslog environment
|
|
# if --verbose we log up to syslog.LOG_DEBUG
|
|
# otherwise just log up to syslog.LOG_INFO
|
|
if self.log:
|
|
syslog.openlog(log_facy, syslog.LOG_PID|syslog.LOG_CONS)
|
|
if verbose:
|
|
syslog.setlogmask(syslog.LOG_UPTO(syslog.LOG_DEBUG))
|
|
else:
|
|
syslog.setlogmask(syslog.LOG_UPTO(syslog.LOG_INFO))
|
|
|
|
def slog(self, level, message):
|
|
"""Method to log to syslog if required."""
|
|
|
|
# are we logging ?
|
|
if self.log:
|
|
# if logging to 'weewx' then add a little preamble to say this is wunderfixer
|
|
_message = message if self.log_facy != 'weewx' else 'wunderfixer: ' + message
|
|
syslog.syslog(level, _message)
|
|
|
|
#===============================================================================
|
|
# Utility functions
|
|
#===============================================================================
|
|
|
|
# The formats to be used to print the record. For each type, there are two
|
|
# formats, the first to be used for a valid value, the second for value
|
|
# 'None'
|
|
_formats = ( ('barometer' , ('%7.3f"', ' N/A ')),
|
|
('outTemp' , ('%6.1fF', ' N/A ')),
|
|
('outHumidity' , ('%4.0f%%', ' N/A ')),
|
|
('windSpeed' , ('%4.1f mph', ' N/A mph')),
|
|
('windDir' , ('%4.0f deg', ' N/A deg')),
|
|
('windGust' , ('%4.1f mph gust', ' N/A mph gust')),
|
|
('dewpoint' , ('%6.1fF', ' N/A ')),
|
|
('rain' , ('%5.2f" rain', ' N/A rain')))
|
|
|
|
def print_record(record):
|
|
# Start with a formatted version of the time:
|
|
_strlist = [timestamp_to_string(record['dateTime'])]
|
|
|
|
# Now add the other types, in the order given by _formats:
|
|
for (_type, _format) in _formats:
|
|
_val = record.get(_type)
|
|
_strlist.append(_format[0] % _val if _val is not None else _format[1])
|
|
# _strlist is a list of strings. Convert it into one long string:
|
|
_string_result = ';'.join(_strlist)
|
|
return _string_result
|
|
|
|
def getArchiveDayTimeStamps(dbmanager, dayRequested):
|
|
"""Returns all time stamps in a weewx archive file for a given day
|
|
|
|
dayRequested: An instance of datetime.date
|
|
|
|
returns: A list containing instances of TimeStamps
|
|
"""
|
|
|
|
# Get the ordinal number for today and tomorrow
|
|
start_ord = dayRequested.toordinal()
|
|
end_ord = start_ord + 1
|
|
|
|
# Convert them to instances of datetime.date
|
|
start_date = datetime.date.fromordinal(start_ord)
|
|
end_date = datetime.date.fromordinal(end_ord)
|
|
|
|
# Finally, convert those to epoch time stamps.
|
|
# The result will be two timestamps for the two midnights
|
|
# E.G., 2009-10-25 00:00:00 and 2009-10-26 00:00:00
|
|
start_ts = time.mktime(start_date.timetuple())
|
|
end_ts = time.mktime(end_date.timetuple())
|
|
|
|
_gen_rows = dbmanager.genSql("""SELECT dateTime FROM archive WHERE dateTime>=? AND dateTime<?""",
|
|
(start_ts, end_ts))
|
|
timeStamps = [TimeStamp(_row[0]) for _row in _gen_rows]
|
|
|
|
return timeStamps
|
|
|
|
#===============================================================================
|
|
# Call main program body
|
|
#===============================================================================
|
|
|
|
if __name__=="__main__" :
|
|
main()
|