Files
weewx/bin/weeimport/cumulusimport.py

414 lines
19 KiB
Python

#
# Copyright (c) 2009-2019 Tom Keffer <tkeffer@gmail.com> and
# Gary Roderick
#
# See the file LICENSE.txt for your full rights.
#
"""Module to interact with Cumulus monthly log files and import raw
observational data for use with weeimport.
"""
from __future__ import with_statement
from __future__ import absolute_import
from __future__ import print_function
# Python imports
import csv
import glob
import io
import logging
import os
import time
# WeeWX imports
from . import weeimport
import weewx
from weeutil.weeutil import timestamp_to_string
from weewx.units import unit_nicknames
log = logging.getLogger(__name__)
# Dict to lookup rainRate units given rain units
rain_units_dict = {'inch': 'inch_per_hour', 'mm': 'mm_per_hour'}
# ============================================================================
# class CumulusSource
# ============================================================================
class CumulusSource(weeimport.Source):
"""Class to interact with a Cumulus generated monthly log files.
Handles the import of data from Cumulus monthly log files.Cumulus stores
observation data in monthly log files. Each log file contains a month of
data in CSV format. The format of the CSV data (eg date separator, field
delimiter, decimal point character) depends upon the settings used in
Cumulus.
Data is imported from all month log files found in the source directory one
log file at a time. Units of measure are not specified in the monthly log
files so the units of measure must be specified in the wee_import config
file. Whilst the Cumulus monthly log file format is well defined, some
pre-processing of the data is required to provide data in a format the
suitable for use in the wee_import mapping methods.
"""
# List of field names used during import of Cumulus log files. These field
# names are for internal wee_import use only as Cumulus monthly log files
# do not have a header line with defined field names. Cumulus monthly log
# field 0 and field 1 are date and time fields respectively. getRawData()
# combines these fields to return a formatted date-time string that is later
# converted into a unix epoch timestamp.
_field_list = ['datetime', 'cur_out_temp', 'cur_out_hum',
'cur_dewpoint', 'avg_wind_speed', 'gust_wind_speed',
'avg_wind_bearing', 'cur_rain_rate', 'day_rain', 'cur_slp',
'rain_counter', 'curr_in_temp', 'cur_in_hum',
'lastest_wind_gust', 'cur_windchill', 'cur_heatindex',
'cur_uv', 'cur_solar', 'cur_et', 'annual_et',
'cur_app_temp', 'cur_tmax_solar', 'day_sunshine_hours',
'cur_wind_bearing', 'day_rain_rg11', 'midnight_rain']
# Dict to map all possible Cumulus field names (refer _field_list) to WeeWX
# archive field names and units.
_header_map = {'datetime': {'units': 'unix_epoch', 'map_to': 'dateTime'},
'cur_out_temp': {'map_to': 'outTemp'},
'curr_in_temp': {'map_to': 'inTemp'},
'cur_dewpoint': {'map_to': 'dewpoint'},
'cur_slp': {'map_to': 'barometer'},
'avg_wind_bearing': {'units': 'degree_compass',
'map_to': 'windDir'},
'avg_wind_speed': {'map_to': 'windSpeed'},
'cur_heatindex': {'map_to': 'heatindex'},
'gust_wind_speed': {'map_to': 'windGust'},
'cur_windchill': {'map_to': 'windchill'},
'cur_out_hum': {'units': 'percent', 'map_to': 'outHumidity'},
'cur_in_hum': {'units': 'percent', 'map_to': 'inHumidity'},
'midnight_rain': {'map_to': 'rain'},
'cur_rain_rate': {'map_to': 'rainRate'},
'cur_solar': {'units': 'watt_per_meter_squared',
'map_to': 'radiation'},
'cur_uv': {'units': 'uv_index', 'map_to': 'UV'},
'cur_app_temp': {'map_to': 'appTemp'}
}
def __init__(self, config_dict, config_path, cumulus_config_dict, import_config_path, options):
# call our parents __init__
super(CumulusSource, self).__init__(config_dict,
cumulus_config_dict,
options)
# save our import config path
self.import_config_path = import_config_path
# save our import config dict
self.cumulus_config_dict = cumulus_config_dict
# wind dir bounds
self.wind_dir = [0, 360]
# field delimiter used in monthly log files, default to comma
self.delimiter = str(cumulus_config_dict.get('delimiter', ','))
# decimal separator used in monthly log files, default to decimal point
self.decimal = cumulus_config_dict.get('decimal', '.')
# date separator used in monthly log files, default to solidus
separator = cumulus_config_dict.get('separator', '/')
# we combine Cumulus date and time fields to give a fixed format
# date-time string
self.raw_datetime_format = separator.join(('%d', '%m', '%y %H:%M'))
# Cumulus log files provide a number of cumulative rainfall fields. We
# cannot use the daily rainfall as this may reset at some time of day
# other than midnight (as required by WeeWX). So we use field 26, total
# rainfall since midnight and treat it as a cumulative value.
self.rain = 'cumulative'
# initialise our import field-to-WeeWX archive field map
self.map = None
# Cumulus log files have a number of 'rain' fields that can be used to
# derive the WeeWX rain field. Which one is available depends on the
# Cumulus version that created the logs. The preferred field is field
# 26(AA) - total rainfall since midnight but it is only available in
# Cumulus v1.9.4 or later. If that field is not available then the
# preferred field in field 09(J) - total rainfall today then field
# 11(L) - total rainfall counter. Initialise the rain_source_confirmed
# property now and we will deal with it later when we have some source
# data.
self.rain_source_confirmed = None
# Units of measure for some obs (eg temperatures) cannot be derived from
# the Cumulus monthly log files. These units must be specified by the
# user in the import config file. Read these units and fill in the
# missing unit data in the header map. Do some basic error checking and
# validation, if one of the fields is missing or invalid then we need
# to catch the error and raise it as we can't go on.
# Temperature
try:
temp_u = cumulus_config_dict['Units'].get('temperature')
except KeyError:
_msg = "No units specified for Cumulus temperature fields in %s." % (self.import_config_path, )
raise weewx.UnitError(_msg)
else:
if temp_u in weewx.units.default_unit_format_dict:
self._header_map['cur_out_temp']['units'] = temp_u
self._header_map['curr_in_temp']['units'] = temp_u
self._header_map['cur_dewpoint']['units'] = temp_u
self._header_map['cur_heatindex']['units'] = temp_u
self._header_map['cur_windchill']['units'] = temp_u
self._header_map['cur_app_temp']['units'] = temp_u
else:
_msg = "Unknown units '%s' specified for Cumulus temperature fields in %s." % (temp_u,
self.import_config_path)
raise weewx.UnitError(_msg)
# Pressure
try:
press_u = cumulus_config_dict['Units'].get('pressure')
except KeyError:
_msg = "No units specified for Cumulus pressure fields in %s." % (self.import_config_path, )
raise weewx.UnitError(_msg)
else:
if press_u in ['inHg', 'mbar', 'hPa']:
self._header_map['cur_slp']['units'] = press_u
else:
_msg = "Unknown units '%s' specified for Cumulus pressure fields in %s." % (press_u,
self.import_config_path)
raise weewx.UnitError(_msg)
# Rain
try:
rain_u = cumulus_config_dict['Units'].get('rain')
except KeyError:
_msg = "No units specified for Cumulus rain fields in %s." % (self.import_config_path, )
raise weewx.UnitError(_msg)
else:
if rain_u in rain_units_dict:
self._header_map['midnight_rain']['units'] = rain_u
self._header_map['cur_rain_rate']['units'] = rain_units_dict[rain_u]
else:
_msg = "Unknown units '%s' specified for Cumulus rain fields in %s." % (rain_u,
self.import_config_path)
raise weewx.UnitError(_msg)
# Speed
try:
speed_u = cumulus_config_dict['Units'].get('speed')
except KeyError:
_msg = "No units specified for Cumulus speed fields in %s." % (self.import_config_path, )
raise weewx.UnitError(_msg)
else:
if speed_u in weewx.units.default_unit_format_dict:
self._header_map['avg_wind_speed']['units'] = speed_u
self._header_map['gust_wind_speed']['units'] = speed_u
else:
_msg = "Unknown units '%s' specified for Cumulus speed fields in %s." % (speed_u,
self.import_config_path)
raise weewx.UnitError(_msg)
# get our source file path
try:
self.source = cumulus_config_dict['directory']
except KeyError:
_msg = "Cumulus monthly logs directory not specified in '%s'." % import_config_path
raise weewx.ViolatedPrecondition(_msg)
# get the source file encoding, default to utf-8-sig
self.source_encoding = self.cumulus_config_dict.get('source_encoding',
'utf-8-sig')
# property holding the current log file name being processed
self.file_name = None
# Now get a list on monthly log files sorted from oldest to newest
month_log_list = glob.glob(self.source + '/?????log.txt')
_temp = [(fn, fn[-9:-7], time.strptime(fn[-12:-9], '%b').tm_mon) for fn in month_log_list]
self.log_list = [a[0] for a in sorted(_temp,
key=lambda el: (el[1], el[2]))]
if len(self.log_list) == 0:
raise weeimport.WeeImportIOError(
"No Cumulus monthly logs found in directory '%s'." % self.source)
# tell the user/log what we intend to do
_msg = "Cumulus monthly log files in the '%s' directory will be imported" % self.source
print(_msg)
log.info(_msg)
_msg = "The following options will be used:"
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " config=%s, import-config=%s" % (config_path,
self.import_config_path)
if self.verbose:
print(_msg)
log.debug(_msg)
if options.date:
_msg = " date=%s" % options.date
else:
# we must have --from and --to
_msg = " from=%s, to=%s" % (options.date_from, options.date_to)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " dry-run=%s, calc_missing=%s, ignore_invalid_data=%s" % (self.dry_run,
self.calc_missing,
self.ignore_invalid_data)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " tranche=%s, interval=%s" % (self.tranche,
self.interval)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = "Using database binding '%s', which is bound to database '%s'" % (self.db_binding_wx,
self.dbm.database_name)
print(_msg)
log.info(_msg)
_msg = "Destination table '%s' unit system is '%#04x' (%s)." % (self.dbm.table_name,
self.archive_unit_sys,
unit_nicknames[self.archive_unit_sys])
print(_msg)
log.info(_msg)
if self.calc_missing:
print("Missing derived observations will be calculated.")
if not self.UV_sensor:
print("All WeeWX UV fields will be set to None.")
if not self.solar_sensor:
print("All WeeWX radiation fields will be set to None.")
if options.date or options.date_from:
print("Observations timestamped after %s and up to and" % timestamp_to_string(self.first_ts))
print("including %s will be imported." % timestamp_to_string(self.last_ts))
if self.dry_run:
print("This is a dry run, imported data will not be saved to archive.")
def getRawData(self, period):
"""Get raw observation data and construct a map from Cumulus monthly
log fields to WeeWX archive fields.
Obtain raw observational data from Cumulus monthly logs. This raw data
needs to be cleaned of unnecessary characters/codes, a date-time field
generated for each row and an iterable returned.
Input parameters:
period: the file name, including path, of the Cumulus monthly log
file from which raw obs data will be read.
"""
# period holds the filename of the monthly log file that contains our
# data. Does our source exist?
if os.path.isfile(period):
# It exists. The source file may use some encoding, if we can't
# decode it raise a WeeImportDecodeError.
try:
with io.open(period, mode='r', encoding=self.source_encoding) as f:
_raw_data = f.readlines()
except UnicodeDecodeError as e:
# not a utf-8 based encoding, so raise a WeeImportDecodeError
raise weeimport.WeeImportDecodeError(e)
else:
# If it doesn't we can't go on so raise it
raise weeimport.WeeImportIOError(
"Cumulus monthly log file '%s' could not be found." % period)
# Our raw data needs a bit of cleaning up before we can parse/map it.
_clean_data = []
for _row in _raw_data:
# Make sure we have full stops as decimal points
_line = _row.replace(self.decimal, '.')
# Ignore any blank lines
if _line != "\n":
# Cumulus has separate date and time fields as the first 2
# fields of a row. It is easier to combine them now into a
# single date-time field that we can parse later when we map the
# raw data.
_datetime_line = _line.replace(self.delimiter, ' ', 1)
# Save what's left
_clean_data.append(_datetime_line)
# if we haven't confirmed our source for the WeeWX rain field we need
# to do so now
if self.rain_source_confirmed is None:
# The Cumulus source field depends on the Cumulus version that
# created the log files. Unfortunately, we can only determine
# which field to use by looking at the mapped Cumulus data. If we
# look at our DictReader we have no way to reset it, so we create
# a one off DictReader to use instead.
_rain_reader = csv.DictReader(_clean_data, fieldnames=self._field_list,
delimiter=self.delimiter)
# now that we know what Cumulus fields are available we can set our
# rain source appropriately
self.set_rain_source(_rain_reader)
# Now create a dictionary CSV reader
_reader = csv.DictReader(_clean_data, fieldnames=self._field_list,
delimiter=self.delimiter)
# Finally, get our database-source mapping
self.map = self.parseMap('Cumulus', _reader, self.cumulus_config_dict)
# Return our dict reader
return _reader
def period_generator(self):
"""Generator function yielding a sequence of monthly log file names.
This generator controls the FOR statement in the parents run() method
that loops over the monthly log files to be imported. The generator
yields a monthly log file name from the list of monthly log files to
be imported until the list is exhausted.
"""
# step through each of our file names
for self.file_name in self.log_list:
# yield the file name
yield self.file_name
@property
def first_period(self):
"""True if current period is the first period otherwise False.
Return True if the current file name being processed is the first in
the list or it is None (the initialisation value).
"""
return self.file_name == self.log_list[0] if self.file_name is not None else True
@property
def last_period(self):
"""True if current period is the last period otherwise False.
Return True if the current file name being processed is the last in
the list.
"""
return self.file_name == self.log_list[-1]
def set_rain_source(self, _data):
"""Set the Cumulus field to be used as the WeeWX rain field source.
"""
_row = next(_data)
if _row['midnight_rain'] is not None:
# we have data in midnight_rain, our default source, so leave
# things as they are and return
pass
elif _row['day_rain'] is not None:
# we have data in day_rain so use that as our rain source
self._header_map['day_rain'] = self._header_map['midnight_rain']
del self._header_map['midnight_rain']
elif _row['rain_counter'] is not None:
# we have data in rain_counter so use that as our rain source
self._header_map['rain_counter'] = self._header_map['midnight_rain']
del self._header_map['midnight_rain']
else:
# We should never end up in this state but....
# We have no suitable rain source so we can't import so remove the
# rain field entry from the header map.
del self._header_map['midnight_rain']
# we only need to do this once so set our flag to True
self.rain_source_confirmed = True
return