mirror of
https://github.com/weewx/weewx.git
synced 2026-04-19 00:56:54 -04:00
1363 lines
68 KiB
Python
1363 lines
68 KiB
Python
#
|
|
# Copyright (c) 2009-2023 Tom Keffer <tkeffer@gmail.com> and Gary Roderick
|
|
#
|
|
# See the file LICENSE.txt for your full rights.
|
|
#
|
|
|
|
"""Module providing the base classes and API for importing observational data
|
|
into WeeWX.
|
|
"""
|
|
|
|
# Python imports
|
|
import datetime
|
|
import logging
|
|
import numbers
|
|
import re
|
|
import sys
|
|
import time
|
|
from datetime import datetime as dt
|
|
|
|
# WeeWX imports
|
|
import weecfg
|
|
import weecfg.database
|
|
import weewx
|
|
import weewx.accum
|
|
import weewx.qc
|
|
import weewx.wxservices
|
|
from weeutil.weeutil import timestamp_to_string, option_as_list, to_int, tobool, get_object, \
|
|
max_with_none
|
|
from weewx.manager import open_manager_with_config
|
|
from weewx.units import unit_constants, unit_nicknames, convertStd, to_std_system, ValueTuple
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# List of sources we support
|
|
SUPPORTED_SOURCES = ['CSV', 'WU', 'Cumulus', 'WD', 'WeatherCat']
|
|
|
|
# Minimum requirements in any explicit or implicit WeeWX field-to-import field
|
|
# map
|
|
MINIMUM_MAP = {'dateTime': {'units': 'unix_epoch'},
|
|
'usUnits': {'units': None},
|
|
'interval': {'units': 'minute'}}
|
|
|
|
|
|
# ============================================================================
|
|
# Error Classes
|
|
# ============================================================================
|
|
|
|
|
|
class WeeImportOptionError(Exception):
|
|
"""Base class of exceptions thrown when encountering an error with a
|
|
command line option.
|
|
"""
|
|
|
|
|
|
class WeeImportMapError(Exception):
|
|
"""Base class of exceptions thrown when encountering an error with an
|
|
external source-to-WeeWX field map.
|
|
"""
|
|
|
|
|
|
class WeeImportIOError(Exception):
|
|
"""Base class of exceptions thrown when encountering an I/O error with an
|
|
external source.
|
|
"""
|
|
|
|
|
|
class WeeImportFieldError(Exception):
|
|
"""Base class of exceptions thrown when encountering an error with a field
|
|
from an external source.
|
|
"""
|
|
|
|
|
|
class WeeImportDecodeError(Exception):
|
|
"""Base class of exceptions thrown when encountering a decode error with an
|
|
external source.
|
|
"""
|
|
|
|
# ============================================================================
|
|
# class Source
|
|
# ============================================================================
|
|
|
|
|
|
class Source(object):
|
|
""" Abstract base class used for interacting with an external data source
|
|
to import records into the WeeWX archive.
|
|
|
|
__init__() must define the following properties:
|
|
dry_run - Is this a dry run (ie do not save imported records
|
|
to archive). [True|False].
|
|
calc_missing - Calculate any missing derived observations.
|
|
[True|False].
|
|
ignore_invalid_data - Ignore any invalid data found in a source field.
|
|
[True|False].
|
|
tranche - Number of records to be written to archive in a
|
|
single transaction. Integer.
|
|
interval - Method of determining interval value if interval
|
|
field not included in data source.
|
|
['config'|'derive'|x] where x is an integer.
|
|
|
|
Child classes are used to interact with a specific source (eg CSV file,
|
|
WU). Any such child classes must define a getRawData() method which:
|
|
- gets the raw observation data and returns an iterable yielding data
|
|
dicts whose fields can be mapped to a WeeWX archive field
|
|
- defines an import data field-to-WeeWX archive field map (self.map)
|
|
|
|
self.raw_datetime_format - Format of date time data field from which
|
|
observation timestamp is to be derived. A
|
|
string in Python datetime string format such
|
|
as '%Y-%m-%d %H:%M:%S'. If the date time
|
|
data field cannot be interpreted as a string
|
|
wee_import attempts to interpret the field
|
|
as a unix timestamp. If the field is not a
|
|
valid unix timestamp an error is raised.
|
|
"""
|
|
|
|
# reg expression to match any HTML tag of the form <...>
|
|
_tags = re.compile(r'\<.*\>')
|
|
|
|
def __init__(self, config_dict, import_config_dict, options):
|
|
"""A generic initialisation.
|
|
|
|
Set some realistic default values for options read from the import
|
|
config file. Obtain objects to handle missing derived obs (if required)
|
|
and QC on imported data. Parse any --date command line option, so we
|
|
know what records to import.
|
|
"""
|
|
|
|
# save our WeeWX config dict
|
|
self.config_dict = config_dict
|
|
|
|
# get our import config dict settings
|
|
# interval, default to 'derive'
|
|
self.interval = import_config_dict.get('interval', 'derive')
|
|
# do we ignore invalid data, default to True
|
|
self.ignore_invalid_data = tobool(import_config_dict.get('ignore_invalid_data',
|
|
True))
|
|
# tranche, default to 250
|
|
self.tranche = to_int(import_config_dict.get('tranche', 250))
|
|
# apply QC, default to True
|
|
self.apply_qc = tobool(import_config_dict.get('qc', True))
|
|
# calc-missing, default to True
|
|
self.calc_missing = tobool(import_config_dict.get('calc_missing', True))
|
|
# decimal separator, default to period '.'
|
|
self.decimal_sep = import_config_dict.get('decimal', '.')
|
|
|
|
# Some sources include UV index and solar radiation values even if no
|
|
# sensor was present. The WeeWX convention is to store the None value
|
|
# when a sensor or observation does not exist. Record whether UV and/or
|
|
# solar radiation sensor was present.
|
|
# UV, default to True
|
|
self.UV_sensor = tobool(import_config_dict.get('UV_sensor', True))
|
|
# solar, default to True
|
|
self.solar_sensor = tobool(import_config_dict.get('solar_sensor', True))
|
|
|
|
# initialise ignore extreme > 255.0 values for temperature and
|
|
# humidity fields for WD imports
|
|
self.ignore_extr_th = False
|
|
|
|
self.db_binding_wx = get_binding(config_dict)
|
|
self.dbm = open_manager_with_config(config_dict, self.db_binding_wx,
|
|
initialize=True,
|
|
default_binding_dict={'table_name': 'archive',
|
|
'manager': 'weewx.wxmanager.DaySummaryManager',
|
|
'schema': 'schemas.wview_extended.schema'})
|
|
# get the unit system used in our db
|
|
if self.dbm.std_unit_system is None:
|
|
# we have a fresh archive (ie no records) so cannot deduce
|
|
# the unit system in use, so go to our config_dict
|
|
self.archive_unit_sys = unit_constants[self.config_dict['StdConvert'].get('target_unit',
|
|
'US')]
|
|
else:
|
|
# get our unit system from the archive db
|
|
self.archive_unit_sys = self.dbm.std_unit_system
|
|
|
|
# initialise the accum dict with any Accumulator config in the config
|
|
# dict
|
|
weewx.accum.initialize(self.config_dict)
|
|
|
|
# get ourselves a QC object to do QC on imported records
|
|
try:
|
|
mm_dict = config_dict['StdQC']['MinMax']
|
|
except KeyError:
|
|
mm_dict = {}
|
|
self.import_QC = weewx.qc.QC(mm_dict)
|
|
|
|
# Process our command line options
|
|
self.dry_run = options.dry_run
|
|
self.verbose = options.verbose
|
|
self.no_prompt = options.no_prompt
|
|
self.suppress = options.suppress
|
|
|
|
# By processing any --date, --from and --to options we need to derive
|
|
# self.first_ts and self.last_ts; the earliest (exclusive) and latest
|
|
# (inclusive) timestamps of data to be imported. If we have no --date,
|
|
# --from or --to then set both to None (we then get the default action
|
|
# for each import type).
|
|
# First we see if we have a valid --date, if not then we look for
|
|
# --from and --to.
|
|
if options.date or options.date == "":
|
|
# there is a --date but is it valid
|
|
try:
|
|
_first_dt = dt.strptime(options.date, "%Y-%m-%d")
|
|
except ValueError:
|
|
# Could not convert --date. If we have a --date it must be
|
|
# valid otherwise we can't continue so raise it.
|
|
_msg = "Invalid --date option specified."
|
|
raise WeeImportOptionError(_msg)
|
|
else:
|
|
# we have a valid date so do some date arithmetic
|
|
_last_dt = _first_dt + datetime.timedelta(days=1)
|
|
self.first_ts = time.mktime(_first_dt.timetuple())
|
|
self.last_ts = time.mktime(_last_dt.timetuple())
|
|
elif options.date_from or options.date_to or options.date_from == '' or options.date_to == '':
|
|
# There is a --from and/or a --to, but do we have both and are
|
|
# they valid.
|
|
# try --from first
|
|
try:
|
|
if 'T' in options.date_from:
|
|
_from_dt = dt.strptime(options.date_from, "%Y-%m-%dT%H:%M")
|
|
else:
|
|
_from_dt = dt.strptime(options.date_from, "%Y-%m-%d")
|
|
_from_ts = time.mktime(_from_dt.timetuple())
|
|
except TypeError:
|
|
# --from not specified we can't continue so raise it
|
|
_msg = "Missing --from option. Both --from and --to must be specified."
|
|
raise WeeImportOptionError(_msg)
|
|
except ValueError:
|
|
# could not convert --from, we can't continue so raise it
|
|
_msg = "Invalid --from option."
|
|
raise WeeImportOptionError(_msg)
|
|
# try --to
|
|
try:
|
|
if 'T' in options.date_to:
|
|
_to_dt = dt.strptime(options.date_to, "%Y-%m-%dT%H:%M")
|
|
else:
|
|
_to_dt = dt.strptime(options.date_to, "%Y-%m-%d")
|
|
# since it is just a date we want the end of the day
|
|
_to_dt += datetime.timedelta(days=1)
|
|
_to_ts = time.mktime(_to_dt.timetuple())
|
|
except TypeError:
|
|
# --to not specified , we can't continue so raise it
|
|
_msg = "Missing --to option. Both --from and --to must be specified."
|
|
raise WeeImportOptionError(_msg)
|
|
except ValueError:
|
|
# could not convert --to, we can't continue so raise it
|
|
_msg = "Invalid --to option."
|
|
raise WeeImportOptionError(_msg)
|
|
# If we made it here we have a _from_ts and _to_ts. Do a simple
|
|
# error check first.
|
|
if _from_ts > _to_ts:
|
|
# from is later than to, raise it
|
|
_msg = "--from value is later than --to value."
|
|
raise WeeImportOptionError(_msg)
|
|
self.first_ts = _from_ts
|
|
self.last_ts = _to_ts
|
|
else:
|
|
# no --date or --from/--to so we take the default, set all to None
|
|
self.first_ts = None
|
|
self.last_ts = None
|
|
|
|
# initialise a few properties we will need during the import
|
|
# answer flags
|
|
self.ans = None
|
|
self.interval_ans = None
|
|
# properties to help with processing multi-period imports
|
|
self.period_no = None
|
|
# total records processed
|
|
self.total_rec_proc = 0
|
|
# total unique records identified
|
|
self.total_unique_rec = 0
|
|
# total duplicate records identified
|
|
self.total_duplicate_rec = 0
|
|
# time we started to first save
|
|
self.t1 = None
|
|
# time taken to process
|
|
self.tdiff = None
|
|
# earliest timestamp imported
|
|
self.earliest_ts = None
|
|
# latest timestamp imported
|
|
self.latest_ts = None
|
|
|
|
# initialise two sets to hold timestamps of records for which we
|
|
# encountered duplicates
|
|
|
|
# duplicates seen over all periods
|
|
self.duplicates = set()
|
|
# duplicates seen over the current period
|
|
self.period_duplicates = set()
|
|
|
|
@staticmethod
|
|
def sourceFactory(options, args):
|
|
"""Factory to produce a Source object.
|
|
|
|
Returns an appropriate object depending on the source type. Raises a
|
|
weewx.UnsupportedFeature error if an object could not be created.
|
|
"""
|
|
|
|
# get some key WeeWX parameters
|
|
# first the config dict to use
|
|
config_path, config_dict = weecfg.read_config(options.config_path, args)
|
|
# get wee_import config dict if it exists
|
|
import_config_path, import_config_dict = weecfg.read_config(None,
|
|
args,
|
|
file_name=options.import_config_path)
|
|
# we should have a source parameter at the root of out import config
|
|
# file, try to get it but be prepared to catch the error.
|
|
try:
|
|
source = import_config_dict['source']
|
|
except KeyError:
|
|
# we have no source parameter so check if we have a single source
|
|
# config stanza, if we do then proceed using that
|
|
_source_keys = [s for s in SUPPORTED_SOURCES if s in import_config_dict.keys]
|
|
if len(_source_keys) == 1:
|
|
# we have a single source config stanza so use that
|
|
source = _source_keys[0]
|
|
else:
|
|
# there is no source parameter and we do not have a single
|
|
# source config stanza so raise an error
|
|
_msg = "Invalid 'source' parameter or no 'source' parameter specified in %s" % import_config_path
|
|
raise weewx.UnsupportedFeature(_msg)
|
|
# if we made it this far we have all we need to create an object
|
|
module_class = '.'.join(['weeimport',
|
|
source.lower() + 'import',
|
|
source + 'Source'])
|
|
return get_object(module_class)(config_dict,
|
|
config_path,
|
|
import_config_dict.get(source, {}),
|
|
import_config_path,
|
|
options)
|
|
|
|
def run(self):
|
|
"""Main entry point for importing from an external source.
|
|
|
|
Source data may be provided as a group of records over a single period
|
|
(eg a single CSV file) or as a number of groups of records covering
|
|
multiple periods(eg a WU multi-day import). Step through each group of
|
|
records, getting the raw data, mapping the data and saving the data for
|
|
each period.
|
|
"""
|
|
|
|
# setup a counter to count the periods of records
|
|
self.period_no = 1
|
|
# obtain the lastUpdate metadata value before we import anything
|
|
last_update = to_int(self.dbm._read_metadata('lastUpdate'))
|
|
with self.dbm as archive:
|
|
if self.first_period:
|
|
# collect the time for some stats reporting later
|
|
self.t1 = time.time()
|
|
# it's convenient to give this message now
|
|
if self.dry_run:
|
|
print('Starting dry run import ...')
|
|
else:
|
|
print('Starting import ...')
|
|
|
|
if self.first_period and not self.last_period:
|
|
# there are more periods so say so
|
|
print("Records covering multiple periods have been identified for import.")
|
|
|
|
# step through our periods of records until we reach the end. A
|
|
# 'period' of records may comprise the contents of a file, a day
|
|
# of WU obs or a month of Cumulus obs
|
|
for period in self.period_generator():
|
|
|
|
# if we are importing multiple periods of data then tell the
|
|
# user what period we are up to
|
|
if not (self.first_period and self.last_period):
|
|
print("Period %d ..." % self.period_no)
|
|
|
|
# get the raw data
|
|
_msg = 'Obtaining raw import data for period %d ...' % self.period_no
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
try:
|
|
_raw_data = self.getRawData(period)
|
|
except WeeImportIOError as e:
|
|
print("**** Unable to load source data for period %d." % self.period_no)
|
|
log.info("**** Unable to load source data for period %d." % self.period_no)
|
|
print("**** %s" % e)
|
|
log.info("**** %s" % e)
|
|
print("**** Period %d will be skipped. "
|
|
"Proceeding to next period." % self.period_no)
|
|
log.info("**** Period %d will be skipped. "
|
|
"Proceeding to next period." % self.period_no)
|
|
# increment our period counter
|
|
self.period_no += 1
|
|
continue
|
|
except WeeImportDecodeError as e:
|
|
print("**** Unable to decode source data for period %d." % self.period_no)
|
|
log.info("**** Unable to decode source data for period %d." % self.period_no)
|
|
print("**** %s" % e)
|
|
log.info("**** %s" % e)
|
|
print("**** Period %d will be skipped. "
|
|
"Proceeding to next period." % self.period_no)
|
|
log.info("**** Period %d will be skipped. "
|
|
"Proceeding to next period." % self.period_no)
|
|
print("**** Consider specifying the source file encoding "
|
|
"using the 'source_encoding' config option.")
|
|
log.info("**** Consider specifying the source file encoding "
|
|
"using the 'source_encoding' config option.")
|
|
# increment our period counter
|
|
self.period_no += 1
|
|
continue
|
|
_msg = 'Raw import data read successfully for period %d.' % self.period_no
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
|
|
# map the raw data to a WeeWX archive compatible dictionary
|
|
_msg = 'Mapping raw import data for period %d ...' % self.period_no
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_mapped_data = self.mapRawData(_raw_data, self.archive_unit_sys)
|
|
_msg = 'Raw import data mapped successfully for period %d.' % self.period_no
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
|
|
# save the mapped data to archive
|
|
# first advise the user and log, but only if it's not a dry run
|
|
if not self.dry_run:
|
|
_msg = 'Saving mapped data to archive for period %d ...' % self.period_no
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
self.saveToArchive(archive, _mapped_data)
|
|
# advise the user and log, but only if it's not a dry run
|
|
if not self.dry_run:
|
|
_msg = 'Mapped data saved to archive successfully "' \
|
|
'"for period %d.' % self.period_no
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# increment our period counter
|
|
self.period_no += 1
|
|
# The source data has been processed and any records saved to
|
|
# archive (except if it was a dry run).
|
|
|
|
# now update the lastUpdate metadata field, set it to the max of
|
|
# the timestamp of the youngest record imported and the value of
|
|
# lastUpdate from before we started
|
|
new_last_update = max_with_none((last_update, self.latest_ts))
|
|
if new_last_update is not None:
|
|
self.dbm._write_metadata('lastUpdate', str(int(new_last_update)))
|
|
# If necessary, calculate any missing derived fields and provide
|
|
# the user with suitable summary output.
|
|
if self.total_rec_proc == 0:
|
|
# nothing was imported so no need to calculate any missing
|
|
# fields just inform the user what was done
|
|
_msg = 'No records were identified for import. Exiting. Nothing done.'
|
|
print(_msg)
|
|
log.info(_msg)
|
|
else:
|
|
# We imported something, but was it a dry run or not?
|
|
total_rec = self.total_rec_proc + self.total_duplicate_rec
|
|
if self.dry_run:
|
|
# It was a dry run. Skip calculation of missing derived
|
|
# fields (since there are no archive records to process),
|
|
# just provide the user with a summary of what we did.
|
|
_msg = "Finished dry run import"
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "%d records were processed and %d unique records would "\
|
|
"have been imported." % (total_rec,
|
|
self.total_rec_proc)
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if self.total_duplicate_rec > 1:
|
|
_msg = "%d duplicate records were ignored." % self.total_duplicate_rec
|
|
print(_msg)
|
|
log.info(_msg)
|
|
elif self.total_duplicate_rec == 1:
|
|
_msg = "1 duplicate record was ignored."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
else:
|
|
# It was not a dry run so calculate any missing derived
|
|
# fields and provide the user with a summary of what we did.
|
|
if self.calc_missing:
|
|
# We were asked to calculate missing derived fields, so
|
|
# get a CalcMissing object.
|
|
# First construct a CalcMissing config dict
|
|
# (self.dry_run will never be true). Subtract 0.5
|
|
# seconds from the earliest timestamp as calc_missing
|
|
# only calculates missing derived obs for records
|
|
# timestamped after start_ts.
|
|
calc_missing_config_dict = {'name': 'Calculate Missing Derived Observations',
|
|
'binding': self.db_binding_wx,
|
|
'start_ts': self.earliest_ts-0.5,
|
|
'stop_ts': self.latest_ts,
|
|
'trans_days': 1,
|
|
'dry_run': self.dry_run is True}
|
|
# now obtain a CalcMissing object
|
|
self.calc_missing_obj = weecfg.database.CalcMissing(self.config_dict,
|
|
calc_missing_config_dict)
|
|
_msg = "Calculating missing derived observations ..."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# do the calculations
|
|
self.calc_missing_obj.run()
|
|
_msg = "Finished calculating missing derived observations"
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# now provide the summary report
|
|
_msg = "Finished import"
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "%d records were processed and %d unique records " \
|
|
"imported in %.2f seconds." % (total_rec,
|
|
self.total_rec_proc,
|
|
self.tdiff)
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if self.total_duplicate_rec > 1:
|
|
_msg = "%d duplicate records were ignored." % self.total_duplicate_rec
|
|
print(_msg)
|
|
log.info(_msg)
|
|
elif self.total_duplicate_rec == 1:
|
|
_msg = "1 duplicate record was ignored."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
print("Those records with a timestamp already "
|
|
"in the archive will not have been")
|
|
print("imported. Confirm successful import in the WeeWX log file.")
|
|
|
|
def parseMap(self, source_type, source, import_config_dict):
|
|
"""Produce a source field-to-WeeWX archive field map.
|
|
|
|
Data from an external source can be mapped to the WeeWX archive using:
|
|
- a fixed field map (WU),
|
|
- a fixed field map with user specified source units (Cumulus), or
|
|
- a user defined field/units map.
|
|
|
|
All user defined mapping is specified in the import config file.
|
|
|
|
To generate the field map first look to see if we have a fixed map, if
|
|
we do validate it and return the resulting map. Otherwise, look for
|
|
user specified mapping in the import config file, construct the field
|
|
map and return it. If there is neither a fixed map nor a user specified
|
|
mapping then raise an error.
|
|
|
|
Input parameters:
|
|
|
|
source_type: String holding name of the section in
|
|
import_config_dict that holds config details for the
|
|
source being used.
|
|
|
|
source: Iterable holding the source data. Used if import field
|
|
names are included in the source data (eg CSV).
|
|
|
|
import_config_dict: config dict from import config file.
|
|
|
|
Returns a map as a dictionary of elements with each element structured
|
|
as follows:
|
|
|
|
'archive_field_name': {'field_name': 'source_field_name',
|
|
'units': 'unit_name'}
|
|
|
|
where:
|
|
|
|
- archive_field_name is an observation name in the WeeWX
|
|
database schema
|
|
- source_field_name is the name of a field from the external
|
|
source
|
|
- unit_name is the WeeWX unit name of the units used by
|
|
source_field_name
|
|
"""
|
|
|
|
# start with the minimum map
|
|
_map = dict(MINIMUM_MAP)
|
|
|
|
# Do the easy one first, do we have a fixed mapping, if so validate it
|
|
if self._header_map:
|
|
# We have a static map that maps header fields to WeeWX (eg WU).
|
|
# Our static map may have entries for fields that don't exist in our
|
|
# source data so step through each field name in our source data and
|
|
# only add those that exist to our resulting map.
|
|
|
|
# first get a list of fields, source could be a DictReader object
|
|
# or a list of dicts, a DictReader will have a fieldnames property
|
|
try:
|
|
_field_names = source.fieldnames
|
|
except AttributeError:
|
|
# Not a DictReader so need to obtain the dict keys, could just
|
|
# pick a record and extract its keys but some records may have
|
|
# different keys to others. Use sets and a generator
|
|
# comprehension.
|
|
_field_names = set().union(*(list(d.keys()) for d in source))
|
|
# now iterate over the field names
|
|
for _key in _field_names:
|
|
# if we know about the field name add it to our map
|
|
if _key in self._header_map:
|
|
_map[self._header_map[_key]['map_to']] = {'field_name': _key,
|
|
'units': self._header_map[_key]['units']}
|
|
# Do we have a user specified map, if so construct our field map
|
|
elif 'FieldMap' in import_config_dict:
|
|
# we have a user specified map so construct our map dict
|
|
for _key, _item in import_config_dict['FieldMap'].items():
|
|
_entry = option_as_list(_item)
|
|
# expect 2 parameters for each option: source field, units
|
|
if len(_entry) == 2:
|
|
# we have 2 parameter so that's field and units
|
|
_map[_key] = {'field_name': _entry[0],
|
|
'units': _entry[1]}
|
|
# if the entry is not empty then it might be valid ie just a
|
|
# field name (eg if usUnits is specified)
|
|
elif _entry != [''] and len(_entry) == 1:
|
|
# we have 1 parameter so it must be just name
|
|
_map[_key] = {'field_name': _entry[0]}
|
|
else:
|
|
# otherwise it's invalid so ignore it
|
|
pass
|
|
|
|
# now do some crude error checking
|
|
|
|
# dateTime. We must have a dateTime mapping. Check for a
|
|
# 'field_name' field under 'dateTime' and be prepared to catch the
|
|
# error if it does not exist.
|
|
try:
|
|
if _map['dateTime']['field_name']:
|
|
# we have a 'field_name' entry so continue
|
|
pass
|
|
else:
|
|
# something is wrong; we have a 'field_name' entry, but it
|
|
# is not valid so raise an error
|
|
_msg = "Invalid mapping specified in '%s' for " \
|
|
"field 'dateTime'." % self.import_config_path
|
|
raise WeeImportMapError(_msg)
|
|
except KeyError:
|
|
_msg = "No mapping specified in '%s' for field " \
|
|
"'dateTime'." % self.import_config_path
|
|
raise WeeImportMapError(_msg)
|
|
|
|
# usUnits. We don't have to have a mapping for usUnits but if we
|
|
# don't then we must have 'units' specified for each field mapping.
|
|
if 'usUnits' not in _map or _map['usUnits'].get('field_name') is None:
|
|
# no unit system mapping do we have units specified for
|
|
# each individual field
|
|
for _key, _val in _map.items():
|
|
# we don't need to check dateTime and usUnits
|
|
if _key not in ['dateTime', 'usUnits']:
|
|
if 'units' in _val:
|
|
# we have a units field, do we know about it
|
|
if _val['units'] not in weewx.units.conversionDict \
|
|
and _val['units'] not in weewx.units.USUnits.values() \
|
|
and _val['units'] != 'text':
|
|
# we have an invalid unit string so tell the
|
|
# user and exit
|
|
_msg = "Unknown units '%s' specified for " \
|
|
"field '%s' in %s." % (_val['units'],
|
|
_key,
|
|
self.import_config_path)
|
|
raise weewx.UnitError(_msg)
|
|
else:
|
|
# we don't have a units field, that's not allowed
|
|
# so raise an error
|
|
_msg = "No units specified for source field " \
|
|
"'%s' in %s." % (_key,
|
|
self.import_config_path)
|
|
raise WeeImportMapError(_msg)
|
|
|
|
# if we got this far we have a usable map, advise the user what we
|
|
# will use
|
|
_msg = "The following imported field-to-WeeWX field map will be used:"
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
for _key, _val in _map.items():
|
|
if 'field_name' in _val:
|
|
_units_msg = ""
|
|
if 'units' in _val:
|
|
if _val['units'] == 'text':
|
|
_units_msg = " as text"
|
|
else:
|
|
_units_msg = " in units '%s'" % _val['units']
|
|
_msg = " source field '%s'%s --> WeeWX field '%s'" % (_val['field_name'],
|
|
_units_msg,
|
|
_key)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
else:
|
|
# no [[FieldMap]] stanza and no _header_map so raise an error as we
|
|
# don't know what to map
|
|
_msg = "No '%s' field map found in %s." % (source_type,
|
|
self.import_config_path)
|
|
raise WeeImportMapError(_msg)
|
|
return _map
|
|
|
|
def mapRawData(self, data, unit_sys=weewx.US):
|
|
"""Maps raw data to WeeWX archive record compatible dictionaries.
|
|
|
|
Takes an iterable source of raw data observations, maps the fields of
|
|
each row to a list of WeeWX compatible archive records and performs any
|
|
necessary unit conversion.
|
|
|
|
Input parameters:
|
|
|
|
data: iterable that yields the data records to be processed.
|
|
|
|
unit_sys: WeeWX unit system in which the generated records will be
|
|
provided. Omission will result in US customary (weewx.US)
|
|
being used.
|
|
|
|
Returns a list of dicts of WeeWX compatible archive records.
|
|
"""
|
|
|
|
# initialise our list of mapped records
|
|
_records = []
|
|
# initialise some rain variables
|
|
_last_ts = None
|
|
_last_rain = None
|
|
# list of fields we have given the user a warning over, prevents us
|
|
# giving multiple warnings for the same field.
|
|
_warned = []
|
|
# step through each row in our data
|
|
for _row in data:
|
|
_rec = {}
|
|
# first off process the fields that require special processing
|
|
# dateTime
|
|
if 'field_name' in self.map['dateTime']:
|
|
# we have a map for dateTime
|
|
try:
|
|
_raw_dateTime = _row[self.map['dateTime']['field_name']]
|
|
except KeyError:
|
|
_msg = "Field '%s' not found in source "\
|
|
"data." % self.map['dateTime']['field_name']
|
|
raise WeeImportFieldError(_msg)
|
|
# now process the raw date time data
|
|
if isinstance(_raw_dateTime, numbers.Number) or _raw_dateTime.isdigit():
|
|
# Our dateTime is a number, is it a timestamp already?
|
|
# Try to use it and catch the error if there is one and
|
|
# raise it higher.
|
|
try:
|
|
_rec_dateTime = int(_raw_dateTime)
|
|
except ValueError:
|
|
_msg = "Invalid '%s' field. Cannot convert '%s' to " \
|
|
"timestamp." % (self.map['dateTime']['field_name'],
|
|
_raw_dateTime)
|
|
raise ValueError(_msg)
|
|
else:
|
|
# it's a non-numeric string so try to parse it and catch
|
|
# the error if there is one and raise it higher
|
|
try:
|
|
_datetm = time.strptime(_raw_dateTime,
|
|
self.raw_datetime_format)
|
|
_rec_dateTime = int(time.mktime(_datetm))
|
|
except ValueError:
|
|
_msg = "Invalid '%s' field. Cannot convert '%s' to " \
|
|
"timestamp." % (self.map['dateTime']['field_name'],
|
|
_raw_dateTime)
|
|
raise ValueError(_msg)
|
|
# if we have a timeframe of concern does our record fall within
|
|
# it
|
|
if (self.first_ts is None and self.last_ts is None) or \
|
|
self.first_ts < _rec_dateTime <= self.last_ts:
|
|
# we have no timeframe or if we do it falls within it so
|
|
# save the dateTime
|
|
_rec['dateTime'] = _rec_dateTime
|
|
# update earliest and latest record timestamps
|
|
if self.earliest_ts is None or _rec_dateTime < self.earliest_ts:
|
|
self.earliest_ts = _rec_dateTime
|
|
if self.latest_ts is None or _rec_dateTime > self.earliest_ts:
|
|
self.latest_ts = _rec_dateTime
|
|
else:
|
|
# it is not so skip to the next record
|
|
continue
|
|
else:
|
|
# there is no mapped field for dateTime so raise an error
|
|
raise ValueError("No mapping for WeeWX field 'dateTime'.")
|
|
# usUnits
|
|
_units = None
|
|
if 'field_name' in self.map['usUnits']:
|
|
# we have a field map for a unit system
|
|
try:
|
|
# The mapped field is in _row so try to get the raw data.
|
|
# If it's not there then raise an error.
|
|
_raw_units = int(_row[self.map['usUnits']['field_name']])
|
|
except KeyError:
|
|
_msg = "Field '%s' not found in "\
|
|
"source data." % self.map['usUnits']['field_name']
|
|
raise WeeImportFieldError(_msg)
|
|
# we have a value but is it valid
|
|
if _raw_units in unit_nicknames:
|
|
# it is valid so use it
|
|
_units = _raw_units
|
|
else:
|
|
# the units value is not valid so raise an error
|
|
_msg = "Invalid unit system '%s'(0x%02x) mapped from data source. " \
|
|
"Check data source or field mapping." % (_raw_units,
|
|
_raw_units)
|
|
raise weewx.UnitError(_msg)
|
|
# interval
|
|
if 'field_name' in self.map['interval']:
|
|
# We have a map for interval so try to get the raw data. If
|
|
# it's not there raise an error.
|
|
try:
|
|
_tfield = _row[self.map['interval']['field_name']]
|
|
except KeyError:
|
|
_msg = "Field '%s' not found in "\
|
|
"source data." % self.map['interval']['field_name']
|
|
raise WeeImportFieldError(_msg)
|
|
# now process the raw interval data
|
|
if _tfield is not None and _tfield != '':
|
|
try:
|
|
_rec['interval'] = int(_tfield)
|
|
except ValueError:
|
|
_msg = "Invalid '%s' field. Cannot convert '%s' to " \
|
|
"an integer." % (self.map['interval']['field_name'],
|
|
_tfield)
|
|
raise ValueError(_msg)
|
|
else:
|
|
# if it happens to be None then raise an error
|
|
_msg = "Invalid value '%s' for mapped field '%s' at " \
|
|
"timestamp '%s'." % (_tfield,
|
|
self.map['interval']['field_name'],
|
|
timestamp_to_string(_rec['dateTime']))
|
|
raise ValueError(_msg)
|
|
else:
|
|
# we have no mapping so calculate it, wrap in a try..except in
|
|
# case it cannot be calculated
|
|
try:
|
|
_rec['interval'] = self.getInterval(_last_ts, _rec['dateTime'])
|
|
except WeeImportFieldError as e:
|
|
# We encountered a WeeImportFieldError, which means we
|
|
# cannot calculate the interval value, possibly because
|
|
# this record is out of date-time order. We cannot use this
|
|
# record so skip it, advise the user (via console and log)
|
|
# and move to the next record.
|
|
_msg = "Record discarded: %s" % e
|
|
print(_msg)
|
|
log.info(_msg)
|
|
continue
|
|
# now step through the rest of the fields in our map and process
|
|
# the fields that don't require special processing
|
|
for _field in self.map:
|
|
# skip those that have had special processing
|
|
if _field in MINIMUM_MAP:
|
|
continue
|
|
# process everything else
|
|
else:
|
|
# is our mapped field in the record
|
|
if self.map[_field]['field_name'] in _row:
|
|
# yes it is
|
|
# first check to see if this is a text field
|
|
if 'units' in self.map[_field] and self.map[_field]['units'] == 'text':
|
|
# we have a text field, so accept the field
|
|
# contents as is
|
|
_rec[_field] = _row[self.map[_field]['field_name']]
|
|
else:
|
|
# we have a non-text field so try to get a value
|
|
# for the obs but if we can't, catch the error
|
|
try:
|
|
_value = float(_row[self.map[_field]['field_name']].strip())
|
|
except AttributeError:
|
|
# the data has no strip() attribute so chances
|
|
# are it's a number already, or it could
|
|
# (somehow ?) be None
|
|
if _row[self.map[_field]['field_name']] is None:
|
|
_value = None
|
|
else:
|
|
try:
|
|
_value = float(_row[self.map[_field]['field_name']])
|
|
except TypeError:
|
|
# somehow we have data that is not a
|
|
# number or a string
|
|
_msg = "%s: cannot convert '%s' to float at " \
|
|
"timestamp '%s'." % (_field,
|
|
_row[self.map[_field]['field_name']],
|
|
timestamp_to_string(_rec['dateTime']))
|
|
raise TypeError(_msg)
|
|
except ValueError:
|
|
# A ValueError means that float() could not
|
|
# convert the string or number to a float, most
|
|
# likely because we have non-numeric, non-None
|
|
# data. We have some other possibilities to
|
|
# work through before we give up.
|
|
|
|
# start by setting our result to None.
|
|
_value = None
|
|
|
|
# perhaps it is numeric data but with something
|
|
# other that a period as decimal separator, try
|
|
# using float() again after replacing the
|
|
# decimal seperator
|
|
if self.decimal_sep is not None:
|
|
_data = _row[self.map[_field]['field_name']].replace(self.decimal_sep,
|
|
'.')
|
|
try:
|
|
_value = float(_data)
|
|
except ValueError:
|
|
# still could not convert it so pass
|
|
pass
|
|
|
|
# If this is a csv import and we are mapping to
|
|
# a direction field, perhaps we have a string
|
|
# representation of a cardinal, inter-cardinal
|
|
# or secondary inter-cardinal direction that we
|
|
# can convert to degrees
|
|
|
|
if _value is None and hasattr(self, 'wind_dir_map') and \
|
|
self.map[_field]['units'] == 'degree_compass':
|
|
# we have a csv import and we are mapping
|
|
# to a direction field, so try a cardinal
|
|
# conversion
|
|
|
|
# first strip any whitespace and hyphens
|
|
# from the data
|
|
_stripped = re.sub(r'[\s-]+', '',
|
|
_row[self.map[_field]['field_name']])
|
|
# try to use the data as the key in a dict
|
|
# mapping directions to degrees, if there
|
|
# is no match we will have None returned
|
|
try:
|
|
_value = self.wind_dir_map[_stripped.upper()]
|
|
except KeyError:
|
|
# we did not find a match so pass
|
|
pass
|
|
# we have exhausted all possibilities, so if we
|
|
# have a non-None result use it, otherwise we
|
|
# either ignore it or raise an error
|
|
if _value is None and not self.ignore_invalid_data:
|
|
_msg = "%s: cannot convert '%s' to float at " \
|
|
"timestamp '%s'." % (_field,
|
|
_row[self.map[_field]['field_name']],
|
|
timestamp_to_string(_rec['dateTime']))
|
|
raise ValueError(_msg)
|
|
|
|
# some fields need some special processing
|
|
|
|
# rain - if our imported 'rain' field is cumulative
|
|
# (self.rain == 'cumulative') then we need to calculate
|
|
# the discrete rainfall for this archive period
|
|
if _field == "rain":
|
|
if self.rain == "cumulative":
|
|
_rain = self.getRain(_last_rain, _value)
|
|
_last_rain = _value
|
|
_value = _rain
|
|
# wind - check any wind direction fields are within our
|
|
# bounds and convert to 0 to 360 range
|
|
elif _field == "windDir" or _field == "windGustDir":
|
|
if _value is not None and (self.wind_dir[0] <= _value <= self.wind_dir[1]):
|
|
# normalise to 0 to 360
|
|
_value %= 360
|
|
else:
|
|
# outside our bounds so set to None
|
|
_value = None
|
|
# UV - if there was no UV sensor used to create the
|
|
# imported data then we need to set the imported value
|
|
# to None
|
|
elif _field == 'UV':
|
|
if not self.UV_sensor:
|
|
_value = None
|
|
# solar radiation - if there was no solar radiation
|
|
# sensor used to create the imported data then we need
|
|
# to set the imported value to None
|
|
elif _field == 'radiation':
|
|
if not self.solar_sensor:
|
|
_value = None
|
|
|
|
# check and ignore if required temperature and humidity
|
|
# values of 255.0 and greater
|
|
if self.ignore_extr_th \
|
|
and self.map[_field]['units'] in ['degree_C', 'degree_F', 'percent'] \
|
|
and _value >= 255.0:
|
|
_value = None
|
|
|
|
# if there is no mapped field for a unit system we
|
|
# have to do field by field unit conversions
|
|
if _units is None:
|
|
_vt = ValueTuple(_value,
|
|
self.map[_field]['units'],
|
|
weewx.units.obs_group_dict[_field])
|
|
_conv_vt = convertStd(_vt, unit_sys)
|
|
_rec[_field] = _conv_vt.value
|
|
else:
|
|
# we do have a mapped field for a unit system so
|
|
# save the field in our record and continue, any
|
|
# unit conversion will be done in bulk later
|
|
_rec[_field] = _value
|
|
else:
|
|
# no it's not in our record, so set the field in our
|
|
# output to None
|
|
_rec[_field] = None
|
|
# now warn the user about this field if we have not
|
|
# already done so
|
|
if self.map[_field]['field_name'] not in _warned:
|
|
_msg = "Warning: Import field '%s' is mapped to WeeWX " \
|
|
"field '%s' but the" % (self.map[_field]['field_name'],
|
|
_field)
|
|
if not self.suppress:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = " import field '%s' could not be found " \
|
|
"in one or more records." % self.map[_field]['field_name']
|
|
if not self.suppress:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = " WeeWX field '%s' will be "\
|
|
"set to 'None' in these records." % _field
|
|
if not self.suppress:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# make sure we do this warning once only
|
|
_warned.append(self.map[_field]['field_name'])
|
|
# if we have a mapped field for a unit system with a valid value,
|
|
# then all we need do is set 'usUnits', bulk conversion is taken
|
|
# care of by saveToArchive()
|
|
if _units is not None:
|
|
# we have a mapped field for a unit system with a valid value
|
|
_rec['usUnits'] = _units
|
|
else:
|
|
# no mapped field for unit system, but we have already
|
|
# converted any necessary fields on a field by field basis so
|
|
# all we need do is set 'usUnits', any bulk conversion will be
|
|
# taken care of by saveToArchive()
|
|
_rec['usUnits'] = unit_sys
|
|
# If interval is being derived from record timestamps our first
|
|
# record will have an interval of None. In this case we wait until
|
|
# we have the second record, and then we use the interval between
|
|
# records 1 and 2 as the interval for record 1.
|
|
if len(_records) == 1 and _records[0]['interval'] is None:
|
|
_records[0]['interval'] = _rec['interval']
|
|
_last_ts = _rec['dateTime']
|
|
# this record is done, add it to our list of records to return
|
|
_records.append(_rec)
|
|
# If we have more than 1 unique value for interval in our records it
|
|
# could be a sign of missing data and impact the integrity of our data,
|
|
# so do the check and see if the user wants to continue
|
|
if len(_records) > 0:
|
|
# if we have any records to return do the unique interval check
|
|
# before we return the records
|
|
_start_interval = _records[0]['interval']
|
|
_diff_interval = False
|
|
for _rec in _records:
|
|
if _rec['interval'] != _start_interval:
|
|
_diff_interval = True
|
|
break
|
|
if _diff_interval and self.interval_ans != 'y':
|
|
# we had more than one unique value for interval, warn the user
|
|
_msg = "Warning: Records to be imported contain multiple " \
|
|
"different 'interval' values."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
print(" This may mean the imported data is missing "
|
|
"some records and it may lead")
|
|
print(" to data integrity issues. If the raw data has "
|
|
"a known, fixed interval")
|
|
print(" value setting the relevant 'interval' setting "
|
|
"in wee_import config to")
|
|
print(" this value may give a better result.")
|
|
while self.interval_ans not in ['y', 'n']:
|
|
if self.no_prompt:
|
|
self.interval_ans = 'y'
|
|
else:
|
|
self.interval_ans = input('Are you sure you want to proceed (y/n)? ')
|
|
if self.interval_ans == 'n':
|
|
# the user chose to abort, but we may have already
|
|
# processed some records. So log it then raise a SystemExit()
|
|
if self.dry_run:
|
|
print("Dry run import aborted by user. %d records were processed." % self.total_rec_proc)
|
|
else:
|
|
if self.total_rec_proc > 0:
|
|
print("Those records with a timestamp already in the "
|
|
"archive will not have been")
|
|
print("imported. As the import was aborted before completion "
|
|
"refer to the WeeWX log")
|
|
print("file to confirm which records were imported.")
|
|
raise SystemExit('Exiting.')
|
|
else:
|
|
print("Import aborted by user. No records saved to archive.")
|
|
_msg = "User chose to abort import. %d records were processed. " \
|
|
"Exiting." % self.total_rec_proc
|
|
log.info(_msg)
|
|
raise SystemExit('Exiting. Nothing done.')
|
|
_msg = "Mapped %d records." % len(_records)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# the user wants to continue, or we have only one unique value for
|
|
# interval so return the records
|
|
return _records
|
|
else:
|
|
_msg = "Mapped 0 records."
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# we have no records to return so return None
|
|
return None
|
|
|
|
def getInterval(self, last_ts, current_ts):
|
|
"""Determine an interval value for a record.
|
|
|
|
The interval field can be determined in one of the following ways:
|
|
|
|
- Derived from the raw data. The interval is calculated as the
|
|
difference between the timestamps of consecutive records rounded to
|
|
the nearest minute. In this case interval can change between
|
|
records if the records are not evenly spaced in time or if there
|
|
are missing records. This method is the default and is used when
|
|
the interval parameter in wee_import.conf is 'derive'.
|
|
|
|
- Read from weewx.conf. The interval value is read from the
|
|
archive_interval parameter in [StdArchive] in weewx.conf. In this
|
|
case interval may or may not be the same as the difference in time
|
|
between consecutive records. This method may be of use when the
|
|
import source has a known interval but may be missing a number of
|
|
records which makes deriving the interval from the imported data
|
|
problematic. This method is used when the interval parameter in
|
|
wee_import.conf is 'conf'.
|
|
|
|
Input parameters:
|
|
|
|
last_ts. timestamp of the previous record.
|
|
current_rain. timestamp of the current record.
|
|
|
|
Returns the interval (in minutes) for the current record.
|
|
"""
|
|
|
|
# did we have a number specified in wee_import.conf, if so use that
|
|
try:
|
|
return float(self.interval)
|
|
except ValueError:
|
|
pass
|
|
# how are we getting interval
|
|
if self.interval.lower() == 'conf':
|
|
# get interval from weewx.conf
|
|
return to_int(float(self.config_dict['StdArchive'].get('archive_interval')) / 60.0)
|
|
elif self.interval.lower() == 'derive':
|
|
# get interval from the timestamps of consecutive records
|
|
try:
|
|
_interval = int((current_ts - last_ts) / 60.0)
|
|
# but if _interval < 0 our records are not in date-time order
|
|
if _interval < 0:
|
|
# so raise a WeeImportFieldError exception
|
|
_msg = "Cannot derive 'interval' for record "\
|
|
"timestamp: %s. " % timestamp_to_string(current_ts)
|
|
raise WeeImportFieldError(_msg)
|
|
except TypeError:
|
|
_interval = None
|
|
return _interval
|
|
else:
|
|
# we don't know what to do so raise an error
|
|
_msg = "Cannot derive 'interval'. Unknown 'interval' "\
|
|
"setting in %s." % self.import_config_path
|
|
raise ValueError(_msg)
|
|
|
|
@staticmethod
|
|
def getRain(last_rain, current_rain):
|
|
"""Determine period rainfall from two cumulative rainfall values.
|
|
|
|
If the data source provides rainfall as a cumulative value then the
|
|
rainfall in a period is the simple difference between the two values.
|
|
But we need to take into account some special cases:
|
|
|
|
No last_rain value. Will occur for very first record or maybe in an
|
|
error condition. Need to return 0.0.
|
|
last_rain > current_rain. Occurs when rain counter was reset (maybe
|
|
daily or some other period). Need to return
|
|
current_rain.
|
|
current_rain is None. Could occur if imported rainfall value could not
|
|
be converted to a numeric and config option
|
|
ignore_invalid_data is set.
|
|
|
|
Input parameters:
|
|
|
|
last_rain. Previous rainfall total.
|
|
current_rain. Current rainfall total.
|
|
|
|
Returns the rainfall in the period.
|
|
"""
|
|
|
|
if last_rain is not None:
|
|
# we have a value for the previous period
|
|
if current_rain is not None and current_rain >= last_rain:
|
|
# just return the difference
|
|
return current_rain - last_rain
|
|
else:
|
|
# we are at a cumulative reset point or we current_rain is None,
|
|
# either way we just want current_rain
|
|
return current_rain
|
|
else:
|
|
# we have no previous rain value so return zero
|
|
return 0.0
|
|
|
|
def qc(self, data_dict, data_type):
|
|
""" Apply weewx.conf QC to a record.
|
|
|
|
If qc option is set in the import config file then apply any StdQC
|
|
min/max checks specified in weewx.conf.
|
|
|
|
Input parameters:
|
|
|
|
data_dict: A WeeWX compatible archive record.
|
|
|
|
Returns nothing. data_dict is modified directly with obs outside of QC
|
|
limits set to None.
|
|
"""
|
|
|
|
if self.apply_qc:
|
|
self.import_QC.apply_qc(data_dict, data_type=data_type)
|
|
|
|
def saveToArchive(self, archive, records):
|
|
""" Save records to the WeeWX archive.
|
|
|
|
Supports saving one or more records to archive. Each collection of
|
|
records is processed and saved to archive in transactions of
|
|
self.tranche records at a time.
|
|
|
|
if the import config file qc option was set quality checks on the
|
|
imported record are performed using the WeeWX StdQC configuration from
|
|
weewx.conf. Any missing derived observations are then added to the
|
|
archive record using the WeeWX WXCalculate class if the import config
|
|
file calc_missing option was set. WeeWX API addRecord() method is used
|
|
to add archive records.
|
|
|
|
If --dry-run was set then every aspect of the import is carried out but
|
|
nothing is saved to archive. If --dry-run was not set then the user is
|
|
requested to confirm the import before any records are saved to archive.
|
|
|
|
Input parameters:
|
|
|
|
archive: database manager object for the WeeWX archive.
|
|
|
|
records: iterable that provides WeeWX compatible archive records
|
|
(in dict form) to be written to archive
|
|
"""
|
|
|
|
# do we have any records?
|
|
if records and len(records) > 0:
|
|
# if this is the first period then give a little summary about what
|
|
# records we have
|
|
# TODO. Check that a single period shows correct and consistent console output
|
|
if self.first_period and self.last_period:
|
|
# there is only 1 period, so we can count them
|
|
print("%s records identified for import." % len(records))
|
|
# we do, confirm the user actually wants to save them
|
|
while self.ans not in ['y', 'n'] and not self.dry_run:
|
|
if self.no_prompt:
|
|
self.ans = 'y'
|
|
else:
|
|
print("Proceeding will save all imported records in the WeeWX archive.")
|
|
self.ans = input("Are you sure you want to proceed (y/n)? ")
|
|
if self.ans == 'y' or self.dry_run:
|
|
# we are going to save them
|
|
# reset record counter
|
|
nrecs = 0
|
|
# initialise our list of records for this tranche
|
|
_tranche = []
|
|
# initialise a set for use in our dry run, this lets us
|
|
# give some better stats on records imported
|
|
unique_set = set()
|
|
# step through each record in this period
|
|
for _rec in records:
|
|
# convert our record
|
|
_conv_rec = to_std_system(_rec, self.archive_unit_sys)
|
|
# perform any required QC checks
|
|
self.qc(_conv_rec, 'Archive')
|
|
# add the record to our tranche and increment our count
|
|
_tranche.append(_conv_rec)
|
|
nrecs += 1
|
|
# if we have a full tranche then save to archive and reset
|
|
# the tranche
|
|
if len(_tranche) >= self.tranche:
|
|
# add the record only if it is not a dry run
|
|
if not self.dry_run:
|
|
# add the record only if it is not a dry run
|
|
archive.addRecord(_tranche)
|
|
# add our the dateTime for each record in our tranche
|
|
# to the dry run set
|
|
for _trec in _tranche:
|
|
unique_set.add(_trec['dateTime'])
|
|
# tell the user what we have done
|
|
_msg = "Unique records processed: %d; "\
|
|
"Last timestamp: %s\r" % (nrecs,
|
|
timestamp_to_string(_rec['dateTime']))
|
|
print(_msg, end='', file=sys.stdout)
|
|
sys.stdout.flush()
|
|
_tranche = []
|
|
# we have processed all records but do we have any records left
|
|
# in the tranche?
|
|
if len(_tranche) > 0:
|
|
# we do so process them
|
|
if not self.dry_run:
|
|
# add the record only if it is not a dry run
|
|
archive.addRecord(_tranche)
|
|
# add our the dateTime for each record in our tranche to
|
|
# the dry run set
|
|
for _trec in _tranche:
|
|
unique_set.add(_trec['dateTime'])
|
|
# tell the user what we have done
|
|
_msg = "Unique records processed: %d; "\
|
|
"Last timestamp: %s\r" % (nrecs,
|
|
timestamp_to_string(_rec['dateTime']))
|
|
print(_msg, end='', file=sys.stdout)
|
|
print()
|
|
sys.stdout.flush()
|
|
# update our counts
|
|
self.total_rec_proc += nrecs
|
|
self.total_unique_rec += len(unique_set)
|
|
# mention any duplicates we encountered
|
|
num_duplicates = len(self.period_duplicates)
|
|
self.total_duplicate_rec += num_duplicates
|
|
if num_duplicates > 0:
|
|
if num_duplicates == 1:
|
|
_msg = " 1 duplicate record was identified "\
|
|
"in period %d:" % self.period_no
|
|
else:
|
|
_msg = " %d duplicate records were identified "\
|
|
"in period %d:" % (num_duplicates,
|
|
self.period_no)
|
|
if not self.suppress:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
for ts in sorted(self.period_duplicates):
|
|
_msg = " %s" % timestamp_to_string(ts)
|
|
if not self.suppress:
|
|
print(_msg)
|
|
log.info(_msg)
|
|
# add the period duplicates to the overall duplicates
|
|
self.duplicates |= self.period_duplicates
|
|
# reset the period duplicates
|
|
self.period_duplicates = set()
|
|
elif self.ans == 'n':
|
|
# user does not want to import so display a message and then
|
|
# ask to exit
|
|
_msg = "User chose not to import records. Exiting. Nothing done."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
raise SystemExit('Exiting. Nothing done.')
|
|
else:
|
|
# we have no records to import, advise the user but what we say
|
|
# will depend on if there are any more periods to import
|
|
if self.first_period and self.last_period:
|
|
# there was only 1 period
|
|
_msg = 'No records identified for import.'
|
|
else:
|
|
# multiple periods
|
|
_msg = 'Period %d - no records identified for import.' % self.period_no
|
|
print(_msg)
|
|
# if we have finished record the time taken for our summary
|
|
if self.last_period:
|
|
self.tdiff = time.time() - self.t1
|
|
|
|
|
|
# ============================================================================
|
|
# Utility functions
|
|
# ============================================================================
|
|
|
|
|
|
def get_binding(config_dict):
|
|
"""Get the binding for the WeeWX database."""
|
|
|
|
# Extract our binding from the StdArchive section of the config file. If
|
|
# it's missing, return None.
|
|
if 'StdArchive' in config_dict:
|
|
db_binding_wx = config_dict['StdArchive'].get('data_binding',
|
|
'wx_binding')
|
|
else:
|
|
db_binding_wx = None
|
|
return db_binding_wx
|