Files
weewx/bin/wunderfixer
2017-03-05 11:12:16 -05:00

471 lines
20 KiB
Python
Executable File

#!/usr/bin/env python
#===============================================================================
# Copyright (c) 2009, 2010, 2011, 2012, 2016 Tom Keffer <tkeffer@gmail.com>
#
# This software may be used and redistributed under the
# terms of the GNU General Public License version 3.0
# or, at your option, any higher version.
#
# See the file LICENSE.txt for your full rights.
#
#===============================================================================
"""This utility fills in missing data on the Weather Underground. It
goes through all the records in a weewx archive file for a given
day, comparing to see whether a corresponding record exists on the
Weather Underground. If not, it will publish a new record on the
Weather Underground with the missing data.
CHANGE HISTORY
--------------------------------
1.1.0 10/11/16
Now uses restx API to publish the requests.
Standardised option syntax.
1.0.0 8/16/15
Published version.
1.0.0a1 2/28/15
Now uses weewx API allowing use with any database supported by weewx.
Now supports weewx databases using any weewx supported unit system (eg US,
METRIC and METRIXWX).
Database is no longer specified by file name rather path to weewx.conf and a
binding are specified.
Now posts wind speeds with 1 decimal place and barometer with 3 decimal places.
Now has option to log to syslog.
0.5.2 11/17/12
Adds radiation and UV to the types posted on WU.
0.5.1 11/05/12
Now assumes sqlite3 will be present. If not, it falls back to pysqlite2.
0.5.0 10/31/11
Fixed bug in fuzzy compares, which were introduced in V0.3.
Timestamps within an epsilon (default 120 seconds) of each other are
considered the same. Epsilon can be specified on the command line.
0.4.0 04/10/10
Now tries up to max_tries times to publish to the WU before giving up.
"""
import csv
import datetime
import optparse
import re
import socket
import sys
import syslog
import time
import urllib2
import weecfg
from weeutil.weeutil import timestamp_to_string
import weewx.manager
import weewx.restx
usagestr = """%prog CONFIG_FILE|--config=CONFIG_FILE
[--binding=BINDING]
[--station=STATION] [--password=PASSWORD]
[--date=YYYY-mm-dd] [--epsilon=SECONDS]
[--verbose] [--log LOG_FACILITY] [--test] [--query]
[--help]
This utility fills in missing data on the Weather Underground. It goes through
all the records in a weewx archive for a given day, comparing to see whether a
corresponding record exists on the Weather Underground. If not, it will publish
a new record on the Weather Underground with the missing data.
Be sure to use the --test switch first to see whether you like what it
proposes!"""
__version__ = "1.1.0"
# The number of seconds difference in the timestamp between two records
# and still have them considered to be the same:
epsilon = None
# Instance of our logger
wlog = None
socket.setdefaulttimeout(10.0)
def main() :
global epsilon, wlog
"""main program body for wunderfixer"""
parser = optparse.OptionParser(usage=usagestr)
parser.add_option("-c", "--config", type="string", dest="config", metavar="CONFIG_PATH",
help="Use configuration file CONFIG_PATH. "
"Default is /etc/weewx/weewx.conf or /home/weewx/weewx.conf.")
parser.add_option("-b", "--binding", type="string", dest="binding",
metavar="BINDING", default='wx_binding',
help="The database binding to be used. Default is 'wx_binding'.")
parser.add_option("-s", "--station", type="string", dest="station",
help="Weather Underground station to check. Optional. "
"Default is to take from configuration file.")
parser.add_option("-p", "--password", type="string", dest="password",
help="Weather Underground station password. Optional. "
"Default is to take from configuration file.")
parser.add_option("-d", "--date", type="string", dest="date", metavar="YYYY-mm-dd",
help="Date to check as a string of form YYYY-mm-dd. Default is today.")
parser.add_option("-e", "--epsilon", type="int", dest="epsilon", metavar="SECONDS",
default=120,
help="Timestamps within this value in seconds compare true. Default "
"is 120.")
parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
help="Print useful extra output.")
parser.add_option("-l", "--log", type="string", dest="logging", metavar="LOG_FACILITY",
help="Log selected output to syslog. If omitted no syslog logging occurs. "
"If LOG_FACILITY is 'weewx' then logs are written to the same log used by "
"weewx. Any other parameter will log to syslog.")
parser.add_option("-t", "--test", action="store_true", dest="simulate",
help="Test what would happen, but don't do anything.")
parser.add_option("-q", "--query", action="store_true", dest="query",
help="For each record, query the user before making a change.")
(options, args) = parser.parse_args()
# Set up our syslog
wlog = WunderLog(options.logging, options.verbose)
# get our config file
config_fn, config_dict = weecfg.read_config(options.config, args)
print "Using configuration file %s." % config_fn
wlog.slog(syslog.LOG_INFO, "Using weewx configuration file %s." % config_fn)
# Retrieve the station ID and password from the config file
try:
if not options.station:
options.station = config_dict['StdRESTful']['Wunderground']['station']
if not options.password and not options.simulate:
options.password = config_dict['StdRESTful']['Wunderground']['password']
except KeyError:
wlog.slog(syslog.LOG_ERR, "Missing Wunderground station and/or password")
exit("Missing Wunderground station and/or password")
# exit if any essential arguments are not present
if not options.station or (not options.password and not options.simulate):
print "Missing argument(s).\n"
print parser.parse_args(["--help"])
wlog.slog(syslog.LOG_ERR, "Missing argument(s). Wunderfixer exiting.")
exit()
# get our binding and database and say what we are using
db_binding = options.binding
database = config_dict['DataBindings'][db_binding]['database']
print "Using database binding '%s', which is bound to database '%s'" % (db_binding, database)
wlog.slog(syslog.LOG_INFO, "Using database binding '%s', which is bound to database '%s'" % (db_binding, database))
# get the manager object for our db_binding
dbmanager_t = weewx.manager.open_manager_with_config(config_dict, db_binding)
_ans = 'y'
if options.simulate:
options.query = False
_ans = 'n'
if options.query:
options.verbose = True;
if options.date:
date_tt = time.strptime(options.date, "%Y-%m-%d")
date_date = datetime.date(date_tt[0], date_tt[1], date_tt[2])
else:
# If no date option was specified on the command line, use today's date:
date_date = datetime.date.today()
epsilon = options.epsilon
if options.verbose:
print "Weather Underground Station: ", options.station
print "Date to check: ", date_date
wlog.slog(syslog.LOG_INFO, "Checking Weather Underground station '%s' data for date %s" % (options.station, date_date))
# Get all the time stamps in the archive for the given day:
archive_results = getArchiveDayTimeStamps(dbmanager_t, date_date)
if options.verbose :
print "Number of archive records: ", len(archive_results)
# Get a WunderStation object so we can interact with Weather Underground
wunder = WunderStation(queue=None, # Bogus queue. We will not be using it.
manager_dict=dbmanager_t,
station=options.station,
password=options.password,
server_url=weewx.restx.StdWunderground.pws_url,
protocol_name = "wunderfixer",
softwaretype = "wunderfixer-%s" % __version__)
try:
# Get all the time stamps on the Weather Underground for the given day:
wunder_results = wunder.getDayTimeStamps(date_date)
except Exception:
wlog.slog(syslog.LOG_ERR, "Could not get Weather Underground data. Exiting.")
exit("Could not get Weather Underground data. Exiting.")
if options.verbose :
print "Number of WU records: ", len(wunder_results)
wlog.slog(syslog.LOG_DEBUG, "Found %d archive records and %d WU records" % (len(archive_results), len(wunder_results)))
#===========================================================================
# Unfortunately, the WU does not signal an error if you ask for a CSV file
# on a non-existent station. So, there's no way to tell the difference
# between asking for results from a non-existent station, versus a
# legitimate station that has no data for the given day. Warn the user, then
# proceed.
#===========================================================================
if len(wunder_results) == 0 :
sys.stdout.flush()
print >>sys.stderr, "\nNo results returned from Weather Underground (perhaps a bad station name??)."
print >>sys.stderr, "Publishing anyway."
wlog.slog(syslog.LOG_ERR, "No results returned from Weather Underground for station '%s'"
"(perhaps a bad station name??). Publishing anyway." % options.station)
# Look for any records missing in the WU list, then sort the results:
missing_records = sorted([x for x in archive_results if not x in wunder_results])
if options.verbose :
print "Number of missing records: ", len(missing_records)
if missing_records:
print "\nMissing records:"
wlog.slog(syslog.LOG_INFO, "%d Weather Underground records missing." % len(missing_records))
no_published = 0
# Loop through the missing time stamps:
for time_TS in missing_records:
ts = time_TS.ts
# Get the archive record for this timestamp:
record = dbmanager_t.getRecord(ts)
# Print it out:
print >>sys.stdout, print_record(record),
sys.stdout.flush()
# If this is an interactive session (option "-q") see if the
# user wants to change it:
if options.query :
_ans=raw_input("...fix? (y/n/a/q):")
if _ans == "q" :
print "Quitting."
wlog.slog(syslog.LOG_DEBUG, "... exiting")
exit()
if _ans == "a" :
_ans = "y"
options.query=False
if _ans=='y' :
try:
# Post the data to the WU:
wunder.process_record(record, dbmanager_t)
no_published += 1
print >>sys.stdout, " ...published."
wlog.slog(syslog.LOG_DEBUG, "%s ...published" % timestamp_to_string(record['dateTime']))
except weewx.restx.BadLogin, e:
print >>sys.stderr, "Bad login"
print >>sys.stderr, e
exit("Bad login")
except weewx.restx.FailedPost, e:
print >>sys.stderr, e
print >>sys.stderr, "Aborted."
wlog.slog(syslog.LOG_ERR, "%s ...error %s. Aborting." % (timestamp_to_string(record['dateTime']), e))
exit("Failed post")
except IOError, e:
print >>sys.stderr, " ... not published."
print "Reason: ", e
wlog.slog(syslog.LOG_ERR, "%s ...not published. Reason '%s'" % (timestamp_to_string(record['dateTime']), e))
if hasattr(e, 'reason'):
print >>sys.stderr, "Failed to reach server. Reason: %s" % e.reason
wlog.slog(syslog.LOG_ERR, "%s ...not published. Failed to reach server. Reason '%s'" %
(timestamp_to_string(record['dateTime']), e.reason))
if hasattr(e, 'code'):
print >>sys.stderr, "Failed to reach server. Error code: %s" % e.code
wlog.slog(syslog.LOG_ERR, "%s ...not published. Failed to reach server. Error code '%s'" %
(timestamp_to_string(record['dateTime']), e.code))
else :
print " ... skipped."
wlog.slog(syslog.LOG_DEBUG, "%s ...skipped" % timestamp_to_string(record['dateTime']))
wlog.slog(syslog.LOG_INFO, "%s out of %s missing records published to '%s' for date %s."
" Wunderfixer exiting." % (no_published, len(missing_records), options.station, date_date))
#===============================================================================
# class WunderStation
#===============================================================================
class WunderStation(weewx.restx.AmbientThread):
"""Class to interact with the Weather Underground."""
# match any HTML tag of the form <...>
_tags = re.compile(r'\<.*\>')
def getDayTimeStamps(self, dayRequested) :
"""Returns all time stamps for a given weather underground station for a given day
dayRequested: An instance of datetime.date with the requested date
return: a set containing the timestamps in epoch time
"""
dayRequested_tt = dayRequested.timetuple()
_url = "http://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=%s&"\
"month=%d&day=%d&year=%d&format=1" % (self.station, dayRequested_tt[1], dayRequested_tt[2], dayRequested_tt[0])
try :
# Hit the weather underground site:
_wudata = urllib2.urlopen(_url)
except urllib2.URLError, e :
print >>sys.stderr, "Unable to open Weather Underground station " + self.station, " or ", e
wlog.slog(syslog.LOG_ERR, "Unable to open Weather Underground station %s or %s" % (self.station, e))
raise
except socket.timeout, e:
print >>sys.stderr, "Socket timeout for Weather Underground station " + self.station
wlog.slog(syslog.LOG_ERR, "Socket timeout for Weather Underground station %s" % self.station)
raise
# Because the data comes back with lots of HTML tags and blank lines in it,
# we need a bit of logic to clean it up.
_cleanWUdata = []
for _row in _wudata :
_line = ''.join(WunderStation._tags.split(_row)) # Get rid of any HTML tags
if _line != "\n" : # Get rid of any blank lines
_cleanWUdata.append(_line) # Save what's left
# Now form a dictionary CSV reader, using the first line as the set of keys
_csvreader = csv.DictReader(_cleanWUdata)
# We are only interested in the time stamps. Decode them
# and return as a list
_timeStamps = []
for _row in _csvreader :
_datetm = time.strptime(_row["Time"], "%Y-%m-%d %H:%M:%S")
_time_t = int(time.mktime(_datetm))
# Add to timeStamps
_timeStamps.append(TimeStamp(_time_t))
return _timeStamps
#===============================================================================
# class TimeStamp
#===============================================================================
class TimeStamp(object):
"""This class represents a timestamp. It uses a 'fuzzy' compare.
That is, if the times are within epsilon seconds of each other, they compare true."""
def __init__(self, ts):
self.ts = ts
def __cmp__(self, other_ts):
if self.__eq__(other_ts):
return 0
return 1 if self.ts > other_ts.ts else -1
def __hash__(self):
return hash(self.ts)
def __eq__(self, other_ts):
return abs(self.ts - other_ts.ts) <= epsilon
def __str__(self):
return timestamp_to_string(self.ts)
#===============================================================================
# class WunderLog
#===============================================================================
class WunderLog(object):
""" This class provides a wrapper around the python syslog module to handle
wunderfixer logging requirements.
"""
def __init__(self, log_facy, verbose):
"""Initialise our syslog environment."""
# flag to indicate whether we are logging to file or not
# if we have a file name then log, otherwise don't
self.log = False if log_facy is None else True
# syslog log facility to be used
self.log_facy = log_facy
# if we are logging then setup our syslog environment
# if --verbose we log up to syslog.LOG_DEBUG
# otherwise just log up to syslog.LOG_INFO
if self.log:
syslog.openlog(log_facy, syslog.LOG_PID|syslog.LOG_CONS)
if verbose:
syslog.setlogmask(syslog.LOG_UPTO(syslog.LOG_DEBUG))
else:
syslog.setlogmask(syslog.LOG_UPTO(syslog.LOG_INFO))
def slog(self, level, message):
"""Method to log to syslog if required."""
# are we logging ?
if self.log:
# if logging to 'weewx' then add a little preamble to say this is wunderfixer
_message = message if self.log_facy != 'weewx' else 'wunderfixer: ' + message
syslog.syslog(level, _message)
#===============================================================================
# Utility functions
#===============================================================================
# The formats to be used to print the record. For each type, there are two
# formats, the first to be used for a valid value, the second for value
# 'None'
_formats = ( ('barometer' , ('%7.3f"', ' N/A ')),
('outTemp' , ('%6.1fF', ' N/A ')),
('outHumidity' , ('%4.0f%%', ' N/A ')),
('windSpeed' , ('%4.1f mph', ' N/A mph')),
('windDir' , ('%4.0f deg', ' N/A deg')),
('windGust' , ('%4.1f mph gust', ' N/A mph gust')),
('dewpoint' , ('%6.1fF', ' N/A ')),
('rain' , ('%5.2f" rain', ' N/A rain')))
def print_record(record):
# Start with a formatted version of the time:
_strlist = [timestamp_to_string(record['dateTime'])]
# Now add the other types, in the order given by _formats:
for (_type, _format) in _formats:
_val = record.get(_type)
_strlist.append(_format[0] % _val if _val is not None else _format[1])
# _strlist is a list of strings. Convert it into one long string:
_string_result = ';'.join(_strlist)
return _string_result
def getArchiveDayTimeStamps(dbmanager, dayRequested):
"""Returns all time stamps in a weewx archive file for a given day
dayRequested: An instance of datetime.date
returns: A list containing instances of TimeStamps
"""
# Get the ordinal number for today and tomorrow
start_ord = dayRequested.toordinal()
end_ord = start_ord + 1
# Convert them to instances of datetime.date
start_date = datetime.date.fromordinal(start_ord)
end_date = datetime.date.fromordinal(end_ord)
# Finally, convert those to epoch time stamps.
# The result will be two timestamps for the two midnights
# E.G., 2009-10-25 00:00:00 and 2009-10-26 00:00:00
start_ts = time.mktime(start_date.timetuple())
end_ts = time.mktime(end_date.timetuple())
_gen_rows = dbmanager.genSql("""SELECT dateTime FROM archive WHERE dateTime>=? AND dateTime<?""",
(start_ts, end_ts))
timeStamps = [TimeStamp(_row[0]) for _row in _gen_rows]
return timeStamps
#===============================================================================
# Call main program body
#===============================================================================
if __name__=="__main__" :
main()