mirror of
https://github.com/weewx/weewx.git
synced 2026-04-20 09:37:02 -04:00
414 lines
19 KiB
Python
414 lines
19 KiB
Python
#
|
|
# Copyright (c) 2009-2019 Tom Keffer <tkeffer@gmail.com> and
|
|
# Gary Roderick
|
|
#
|
|
# See the file LICENSE.txt for your full rights.
|
|
#
|
|
"""Module to interact with Cumulus monthly log files and import raw
|
|
observational data for use with weeimport.
|
|
"""
|
|
|
|
from __future__ import with_statement
|
|
from __future__ import absolute_import
|
|
from __future__ import print_function
|
|
|
|
# Python imports
|
|
import csv
|
|
import glob
|
|
import io
|
|
import logging
|
|
import os
|
|
import time
|
|
|
|
# WeeWX imports
|
|
from . import weeimport
|
|
import weewx
|
|
|
|
from weeutil.weeutil import timestamp_to_string
|
|
from weewx.units import unit_nicknames
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Dict to lookup rainRate units given rain units
|
|
rain_units_dict = {'inch': 'inch_per_hour', 'mm': 'mm_per_hour'}
|
|
|
|
|
|
# ============================================================================
|
|
# class CumulusSource
|
|
# ============================================================================
|
|
|
|
|
|
class CumulusSource(weeimport.Source):
|
|
"""Class to interact with a Cumulus generated monthly log files.
|
|
|
|
Handles the import of data from Cumulus monthly log files.Cumulus stores
|
|
observation data in monthly log files. Each log file contains a month of
|
|
data in CSV format. The format of the CSV data (eg date separator, field
|
|
delimiter, decimal point character) depends upon the settings used in
|
|
Cumulus.
|
|
|
|
Data is imported from all month log files found in the source directory one
|
|
log file at a time. Units of measure are not specified in the monthly log
|
|
files so the units of measure must be specified in the wee_import config
|
|
file. Whilst the Cumulus monthly log file format is well defined, some
|
|
pre-processing of the data is required to provide data in a format the
|
|
suitable for use in the wee_import mapping methods.
|
|
"""
|
|
|
|
# List of field names used during import of Cumulus log files. These field
|
|
# names are for internal wee_import use only as Cumulus monthly log files
|
|
# do not have a header line with defined field names. Cumulus monthly log
|
|
# field 0 and field 1 are date and time fields respectively. getRawData()
|
|
# combines these fields to return a formatted date-time string that is later
|
|
# converted into a unix epoch timestamp.
|
|
_field_list = ['datetime', 'cur_out_temp', 'cur_out_hum',
|
|
'cur_dewpoint', 'avg_wind_speed', 'gust_wind_speed',
|
|
'avg_wind_bearing', 'cur_rain_rate', 'day_rain', 'cur_slp',
|
|
'rain_counter', 'curr_in_temp', 'cur_in_hum',
|
|
'lastest_wind_gust', 'cur_windchill', 'cur_heatindex',
|
|
'cur_uv', 'cur_solar', 'cur_et', 'annual_et',
|
|
'cur_app_temp', 'cur_tmax_solar', 'day_sunshine_hours',
|
|
'cur_wind_bearing', 'day_rain_rg11', 'midnight_rain']
|
|
# Dict to map all possible Cumulus field names (refer _field_list) to WeeWX
|
|
# archive field names and units.
|
|
_header_map = {'datetime': {'units': 'unix_epoch', 'map_to': 'dateTime'},
|
|
'cur_out_temp': {'map_to': 'outTemp'},
|
|
'curr_in_temp': {'map_to': 'inTemp'},
|
|
'cur_dewpoint': {'map_to': 'dewpoint'},
|
|
'cur_slp': {'map_to': 'barometer'},
|
|
'avg_wind_bearing': {'units': 'degree_compass',
|
|
'map_to': 'windDir'},
|
|
'avg_wind_speed': {'map_to': 'windSpeed'},
|
|
'cur_heatindex': {'map_to': 'heatindex'},
|
|
'gust_wind_speed': {'map_to': 'windGust'},
|
|
'cur_windchill': {'map_to': 'windchill'},
|
|
'cur_out_hum': {'units': 'percent', 'map_to': 'outHumidity'},
|
|
'cur_in_hum': {'units': 'percent', 'map_to': 'inHumidity'},
|
|
'midnight_rain': {'map_to': 'rain'},
|
|
'cur_rain_rate': {'map_to': 'rainRate'},
|
|
'cur_solar': {'units': 'watt_per_meter_squared',
|
|
'map_to': 'radiation'},
|
|
'cur_uv': {'units': 'uv_index', 'map_to': 'UV'},
|
|
'cur_app_temp': {'map_to': 'appTemp'}
|
|
}
|
|
|
|
def __init__(self, config_dict, config_path, cumulus_config_dict, import_config_path, options):
|
|
|
|
# call our parents __init__
|
|
super(CumulusSource, self).__init__(config_dict,
|
|
cumulus_config_dict,
|
|
options)
|
|
|
|
# save our import config path
|
|
self.import_config_path = import_config_path
|
|
# save our import config dict
|
|
self.cumulus_config_dict = cumulus_config_dict
|
|
|
|
# wind dir bounds
|
|
self.wind_dir = [0, 360]
|
|
|
|
# field delimiter used in monthly log files, default to comma
|
|
self.delimiter = str(cumulus_config_dict.get('delimiter', ','))
|
|
# decimal separator used in monthly log files, default to decimal point
|
|
self.decimal = cumulus_config_dict.get('decimal', '.')
|
|
|
|
# date separator used in monthly log files, default to solidus
|
|
separator = cumulus_config_dict.get('separator', '/')
|
|
# we combine Cumulus date and time fields to give a fixed format
|
|
# date-time string
|
|
self.raw_datetime_format = separator.join(('%d', '%m', '%y %H:%M'))
|
|
|
|
# Cumulus log files provide a number of cumulative rainfall fields. We
|
|
# cannot use the daily rainfall as this may reset at some time of day
|
|
# other than midnight (as required by WeeWX). So we use field 26, total
|
|
# rainfall since midnight and treat it as a cumulative value.
|
|
self.rain = 'cumulative'
|
|
|
|
# initialise our import field-to-WeeWX archive field map
|
|
self.map = None
|
|
|
|
# Cumulus log files have a number of 'rain' fields that can be used to
|
|
# derive the WeeWX rain field. Which one is available depends on the
|
|
# Cumulus version that created the logs. The preferred field is field
|
|
# 26(AA) - total rainfall since midnight but it is only available in
|
|
# Cumulus v1.9.4 or later. If that field is not available then the
|
|
# preferred field in field 09(J) - total rainfall today then field
|
|
# 11(L) - total rainfall counter. Initialise the rain_source_confirmed
|
|
# property now and we will deal with it later when we have some source
|
|
# data.
|
|
self.rain_source_confirmed = None
|
|
|
|
# Units of measure for some obs (eg temperatures) cannot be derived from
|
|
# the Cumulus monthly log files. These units must be specified by the
|
|
# user in the import config file. Read these units and fill in the
|
|
# missing unit data in the header map. Do some basic error checking and
|
|
# validation, if one of the fields is missing or invalid then we need
|
|
# to catch the error and raise it as we can't go on.
|
|
# Temperature
|
|
try:
|
|
temp_u = cumulus_config_dict['Units'].get('temperature')
|
|
except KeyError:
|
|
_msg = "No units specified for Cumulus temperature fields in %s." % (self.import_config_path, )
|
|
raise weewx.UnitError(_msg)
|
|
else:
|
|
if temp_u in weewx.units.default_unit_format_dict:
|
|
self._header_map['cur_out_temp']['units'] = temp_u
|
|
self._header_map['curr_in_temp']['units'] = temp_u
|
|
self._header_map['cur_dewpoint']['units'] = temp_u
|
|
self._header_map['cur_heatindex']['units'] = temp_u
|
|
self._header_map['cur_windchill']['units'] = temp_u
|
|
self._header_map['cur_app_temp']['units'] = temp_u
|
|
else:
|
|
_msg = "Unknown units '%s' specified for Cumulus temperature fields in %s." % (temp_u,
|
|
self.import_config_path)
|
|
raise weewx.UnitError(_msg)
|
|
# Pressure
|
|
try:
|
|
press_u = cumulus_config_dict['Units'].get('pressure')
|
|
except KeyError:
|
|
_msg = "No units specified for Cumulus pressure fields in %s." % (self.import_config_path, )
|
|
raise weewx.UnitError(_msg)
|
|
else:
|
|
if press_u in ['inHg', 'mbar', 'hPa']:
|
|
self._header_map['cur_slp']['units'] = press_u
|
|
else:
|
|
_msg = "Unknown units '%s' specified for Cumulus pressure fields in %s." % (press_u,
|
|
self.import_config_path)
|
|
raise weewx.UnitError(_msg)
|
|
# Rain
|
|
try:
|
|
rain_u = cumulus_config_dict['Units'].get('rain')
|
|
except KeyError:
|
|
_msg = "No units specified for Cumulus rain fields in %s." % (self.import_config_path, )
|
|
raise weewx.UnitError(_msg)
|
|
else:
|
|
if rain_u in rain_units_dict:
|
|
self._header_map['midnight_rain']['units'] = rain_u
|
|
self._header_map['cur_rain_rate']['units'] = rain_units_dict[rain_u]
|
|
|
|
else:
|
|
_msg = "Unknown units '%s' specified for Cumulus rain fields in %s." % (rain_u,
|
|
self.import_config_path)
|
|
raise weewx.UnitError(_msg)
|
|
# Speed
|
|
try:
|
|
speed_u = cumulus_config_dict['Units'].get('speed')
|
|
except KeyError:
|
|
_msg = "No units specified for Cumulus speed fields in %s." % (self.import_config_path, )
|
|
raise weewx.UnitError(_msg)
|
|
else:
|
|
if speed_u in weewx.units.default_unit_format_dict:
|
|
self._header_map['avg_wind_speed']['units'] = speed_u
|
|
self._header_map['gust_wind_speed']['units'] = speed_u
|
|
else:
|
|
_msg = "Unknown units '%s' specified for Cumulus speed fields in %s." % (speed_u,
|
|
self.import_config_path)
|
|
raise weewx.UnitError(_msg)
|
|
|
|
# get our source file path
|
|
try:
|
|
self.source = cumulus_config_dict['directory']
|
|
except KeyError:
|
|
_msg = "Cumulus monthly logs directory not specified in '%s'." % import_config_path
|
|
raise weewx.ViolatedPrecondition(_msg)
|
|
|
|
# get the source file encoding, default to utf-8-sig
|
|
self.source_encoding = self.cumulus_config_dict.get('source_encoding',
|
|
'utf-8-sig')
|
|
|
|
# property holding the current log file name being processed
|
|
self.file_name = None
|
|
|
|
# Now get a list on monthly log files sorted from oldest to newest
|
|
month_log_list = glob.glob(self.source + '/?????log.txt')
|
|
_temp = [(fn, fn[-9:-7], time.strptime(fn[-12:-9], '%b').tm_mon) for fn in month_log_list]
|
|
self.log_list = [a[0] for a in sorted(_temp,
|
|
key=lambda el: (el[1], el[2]))]
|
|
if len(self.log_list) == 0:
|
|
raise weeimport.WeeImportIOError(
|
|
"No Cumulus monthly logs found in directory '%s'." % self.source)
|
|
|
|
# tell the user/log what we intend to do
|
|
_msg = "Cumulus monthly log files in the '%s' directory will be imported" % self.source
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "The following options will be used:"
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " config=%s, import-config=%s" % (config_path,
|
|
self.import_config_path)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
if options.date:
|
|
_msg = " date=%s" % options.date
|
|
else:
|
|
# we must have --from and --to
|
|
_msg = " from=%s, to=%s" % (options.date_from, options.date_to)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " dry-run=%s, calc_missing=%s, ignore_invalid_data=%s" % (self.dry_run,
|
|
self.calc_missing,
|
|
self.ignore_invalid_data)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " tranche=%s, interval=%s" % (self.tranche,
|
|
self.interval)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = "Using database binding '%s', which is bound to database '%s'" % (self.db_binding_wx,
|
|
self.dbm.database_name)
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "Destination table '%s' unit system is '%#04x' (%s)." % (self.dbm.table_name,
|
|
self.archive_unit_sys,
|
|
unit_nicknames[self.archive_unit_sys])
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if self.calc_missing:
|
|
print("Missing derived observations will be calculated.")
|
|
if not self.UV_sensor:
|
|
print("All WeeWX UV fields will be set to None.")
|
|
if not self.solar_sensor:
|
|
print("All WeeWX radiation fields will be set to None.")
|
|
if options.date or options.date_from:
|
|
print("Observations timestamped after %s and up to and" % timestamp_to_string(self.first_ts))
|
|
print("including %s will be imported." % timestamp_to_string(self.last_ts))
|
|
if self.dry_run:
|
|
print("This is a dry run, imported data will not be saved to archive.")
|
|
|
|
def getRawData(self, period):
|
|
"""Get raw observation data and construct a map from Cumulus monthly
|
|
log fields to WeeWX archive fields.
|
|
|
|
Obtain raw observational data from Cumulus monthly logs. This raw data
|
|
needs to be cleaned of unnecessary characters/codes, a date-time field
|
|
generated for each row and an iterable returned.
|
|
|
|
Input parameters:
|
|
|
|
period: the file name, including path, of the Cumulus monthly log
|
|
file from which raw obs data will be read.
|
|
"""
|
|
|
|
# period holds the filename of the monthly log file that contains our
|
|
# data. Does our source exist?
|
|
if os.path.isfile(period):
|
|
# It exists. The source file may use some encoding, if we can't
|
|
# decode it raise a WeeImportDecodeError.
|
|
try:
|
|
with io.open(period, mode='r', encoding=self.source_encoding) as f:
|
|
_raw_data = f.readlines()
|
|
except UnicodeDecodeError as e:
|
|
# not a utf-8 based encoding, so raise a WeeImportDecodeError
|
|
raise weeimport.WeeImportDecodeError(e)
|
|
else:
|
|
# If it doesn't we can't go on so raise it
|
|
raise weeimport.WeeImportIOError(
|
|
"Cumulus monthly log file '%s' could not be found." % period)
|
|
|
|
# Our raw data needs a bit of cleaning up before we can parse/map it.
|
|
_clean_data = []
|
|
for _row in _raw_data:
|
|
# Make sure we have full stops as decimal points
|
|
_line = _row.replace(self.decimal, '.')
|
|
# Ignore any blank lines
|
|
if _line != "\n":
|
|
# Cumulus has separate date and time fields as the first 2
|
|
# fields of a row. It is easier to combine them now into a
|
|
# single date-time field that we can parse later when we map the
|
|
# raw data.
|
|
_datetime_line = _line.replace(self.delimiter, ' ', 1)
|
|
# Save what's left
|
|
_clean_data.append(_datetime_line)
|
|
|
|
# if we haven't confirmed our source for the WeeWX rain field we need
|
|
# to do so now
|
|
if self.rain_source_confirmed is None:
|
|
# The Cumulus source field depends on the Cumulus version that
|
|
# created the log files. Unfortunately, we can only determine
|
|
# which field to use by looking at the mapped Cumulus data. If we
|
|
# look at our DictReader we have no way to reset it, so we create
|
|
# a one off DictReader to use instead.
|
|
_rain_reader = csv.DictReader(_clean_data, fieldnames=self._field_list,
|
|
delimiter=self.delimiter)
|
|
# now that we know what Cumulus fields are available we can set our
|
|
# rain source appropriately
|
|
self.set_rain_source(_rain_reader)
|
|
|
|
# Now create a dictionary CSV reader
|
|
_reader = csv.DictReader(_clean_data, fieldnames=self._field_list,
|
|
delimiter=self.delimiter)
|
|
# Finally, get our database-source mapping
|
|
self.map = self.parseMap('Cumulus', _reader, self.cumulus_config_dict)
|
|
# Return our dict reader
|
|
return _reader
|
|
|
|
def period_generator(self):
|
|
"""Generator function yielding a sequence of monthly log file names.
|
|
|
|
This generator controls the FOR statement in the parents run() method
|
|
that loops over the monthly log files to be imported. The generator
|
|
yields a monthly log file name from the list of monthly log files to
|
|
be imported until the list is exhausted.
|
|
"""
|
|
|
|
# step through each of our file names
|
|
for self.file_name in self.log_list:
|
|
# yield the file name
|
|
yield self.file_name
|
|
|
|
@property
|
|
def first_period(self):
|
|
"""True if current period is the first period otherwise False.
|
|
|
|
Return True if the current file name being processed is the first in
|
|
the list or it is None (the initialisation value).
|
|
"""
|
|
|
|
return self.file_name == self.log_list[0] if self.file_name is not None else True
|
|
|
|
@property
|
|
def last_period(self):
|
|
"""True if current period is the last period otherwise False.
|
|
|
|
Return True if the current file name being processed is the last in
|
|
the list.
|
|
"""
|
|
|
|
return self.file_name == self.log_list[-1]
|
|
|
|
def set_rain_source(self, _data):
|
|
"""Set the Cumulus field to be used as the WeeWX rain field source.
|
|
"""
|
|
|
|
_row = next(_data)
|
|
if _row['midnight_rain'] is not None:
|
|
# we have data in midnight_rain, our default source, so leave
|
|
# things as they are and return
|
|
pass
|
|
elif _row['day_rain'] is not None:
|
|
# we have data in day_rain so use that as our rain source
|
|
self._header_map['day_rain'] = self._header_map['midnight_rain']
|
|
del self._header_map['midnight_rain']
|
|
elif _row['rain_counter'] is not None:
|
|
# we have data in rain_counter so use that as our rain source
|
|
self._header_map['rain_counter'] = self._header_map['midnight_rain']
|
|
del self._header_map['midnight_rain']
|
|
else:
|
|
# We should never end up in this state but....
|
|
# We have no suitable rain source so we can't import so remove the
|
|
# rain field entry from the header map.
|
|
del self._header_map['midnight_rain']
|
|
# we only need to do this once so set our flag to True
|
|
self.rain_source_confirmed = True
|
|
return
|