mirror of
https://github.com/weewx/weewx.git
synced 2026-04-19 00:56:54 -04:00
* First cut of python logging in wee_import Utilities Guide updates to be done separately * Refactored logging. Removed class WeeImportLog. All logging now done directly to module level logger. Console output now done directly from the module concerned using print function. * Replaced log method calls with info/debug/error method calls
234 lines
8.8 KiB
Python
234 lines
8.8 KiB
Python
#
|
|
# Copyright (c) 2009-2019 Tom Keffer <tkeffer@gmail.com> and
|
|
# Gary Roderick
|
|
#
|
|
# See the file LICENSE.txt for your full rights.
|
|
#
|
|
|
|
"""Module to interact with a CSV file and import raw observational data for
|
|
use with wee_import.
|
|
"""
|
|
|
|
from __future__ import with_statement
|
|
from __future__ import absolute_import
|
|
from __future__ import print_function
|
|
|
|
# Python imports
|
|
import csv
|
|
import logging
|
|
import os
|
|
|
|
# WeeWX imports
|
|
from . import weeimport
|
|
import weewx
|
|
|
|
from weeutil.weeutil import timestamp_to_string, option_as_list
|
|
from weewx.units import unit_nicknames
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# ============================================================================
|
|
# class CSVSource
|
|
# ============================================================================
|
|
|
|
|
|
class CSVSource(weeimport.Source):
|
|
"""Class to interact with a CSV format text file.
|
|
|
|
Handles the import of data from a CSV format data file with known field
|
|
names.
|
|
"""
|
|
|
|
# Define a dict to map CSV fields to WeeWX archive fields. For a CSV import
|
|
# these details are specified by the user in the wee_import config file.
|
|
_header_map = None
|
|
|
|
def __init__(self, config_dict, config_path, csv_config_dict, import_config_path, options):
|
|
|
|
# call our parents __init__
|
|
super(CSVSource, self).__init__(config_dict,
|
|
csv_config_dict,
|
|
options)
|
|
|
|
# save our import config path
|
|
self.import_config_path = import_config_path
|
|
# save our import config dict
|
|
self.csv_config_dict = csv_config_dict
|
|
|
|
# get a few config settings from our CSV config dict
|
|
# string format used to decode the imported field holding our dateTime
|
|
self.raw_datetime_format = self.csv_config_dict.get('raw_datetime_format',
|
|
'%Y-%m-%d %H:%M:%S')
|
|
# is our rain discrete or cumulative
|
|
self.rain = self.csv_config_dict.get('rain', 'cumulative')
|
|
# determine valid range for imported wind direction
|
|
_wind_direction = option_as_list(self.csv_config_dict.get('wind_direction',
|
|
'0,360'))
|
|
try:
|
|
if float(_wind_direction[0]) <= float(_wind_direction[1]):
|
|
self.wind_dir = [float(_wind_direction[0]),
|
|
float(_wind_direction[1])]
|
|
else:
|
|
self.wind_dir = [-360, 360]
|
|
except (KeyError, ValueError):
|
|
self.wind_dir = [-360, 360]
|
|
# get our source file path
|
|
try:
|
|
self.source = csv_config_dict['file']
|
|
except KeyError:
|
|
raise weewx.ViolatedPrecondition("CSV source file not specified in '%s'." % import_config_path)
|
|
# initialise our import field-to-WeeWX archive field map
|
|
self.map = None
|
|
# initialise some other properties we will need
|
|
self.start = 1
|
|
self.end = 1
|
|
self.increment = 1
|
|
|
|
# tell the user/log what we intend to do
|
|
_msg = "A CSV import from source file '%s' has been requested." % self.source
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "The following options will be used:"
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " config=%s, import-config=%s" % (config_path,
|
|
self.import_config_path)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
if options.date:
|
|
_msg = " source=%s, date=%s" % (self.source, options.date)
|
|
else:
|
|
# we must have --from and --to
|
|
_msg = " source=%s, from=%s, to=%s" % (self.source,
|
|
options.date_from,
|
|
options.date_to)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " dry-run=%s, calc_missing=%s, ignore_invalid_data=%s" % (self.dry_run,
|
|
self.calc_missing,
|
|
self.ignore_invalid_data)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " tranche=%s, interval=%s, date/time_string_format=%s" % (self.tranche,
|
|
self.interval,
|
|
self.raw_datetime_format)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " rain=%s, wind_direction=%s" % (self.rain, self.wind_dir)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = " UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor)
|
|
if self.verbose:
|
|
print(_msg)
|
|
log.debug(_msg)
|
|
_msg = "Using database binding '%s', which is bound to database '%s'" % (self.db_binding_wx,
|
|
self.dbm.database_name)
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "Destination table '%s' unit system is '%#04x' (%s)." % (self.dbm.table_name,
|
|
self.archive_unit_sys,
|
|
unit_nicknames[self.archive_unit_sys])
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if self.calc_missing:
|
|
_msg = "Missing derived observations will be calculated."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
|
|
if not self.UV_sensor:
|
|
_msg = "All WeeWX UV fields will be set to None."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if not self.solar_sensor:
|
|
_msg = "All WeeWX radiation fields will be set to None."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if options.date or options.date_from:
|
|
_msg = "Observations timestamped after %s and up to and" % timestamp_to_string(self.first_ts)
|
|
print(_msg)
|
|
log.info(_msg)
|
|
_msg = "including %s will be imported." % timestamp_to_string(self.last_ts)
|
|
print(_msg)
|
|
log.info(_msg)
|
|
if self.dry_run:
|
|
_msg = "This is a dry run, imported data will not be saved to archive."
|
|
print(_msg)
|
|
log.info(_msg)
|
|
|
|
def getRawData(self, period):
|
|
"""Obtain an iterable containing the raw data to be imported.
|
|
|
|
Raw data is read and any clean-up/pre-processing carried out before the
|
|
iterable is returned. In this case we will use csv.Dictreader(). The
|
|
iterable should be of a form where the field names in the field map can
|
|
be used to map the data to the WeeWX archive record format.
|
|
|
|
Input parameters:
|
|
|
|
period: a simple counter that is unused but retained to keep the
|
|
getRawData() signature the same across all classes.
|
|
"""
|
|
|
|
# does our source exist?
|
|
if os.path.isfile(self.source):
|
|
with open(self.source, 'r') as f:
|
|
_raw_data = f.readlines()
|
|
else:
|
|
# if it doesn't we can't go on so raise it
|
|
raise weeimport.WeeImportIOError(
|
|
"CSV source file '%s' could not be found." % self.source)
|
|
|
|
# just in case the data has been sourced from the web we will remove
|
|
# any HTML tags and blank lines that may exist
|
|
_clean_data = []
|
|
for _row in _raw_data:
|
|
# get rid of any HTML tags
|
|
_line = ''.join(CSVSource._tags.split(_row))
|
|
if _line != "\n":
|
|
# save anything that is not a blank line
|
|
_clean_data.append(_line)
|
|
|
|
# create a dictionary CSV reader, using the first line as the set of keys
|
|
_csv_reader = csv.DictReader(_clean_data)
|
|
|
|
# finally, get our source-to-database mapping
|
|
self.map = self.parseMap('CSV', _csv_reader, self.csv_config_dict)
|
|
|
|
# return our CSV dict reader
|
|
return _csv_reader
|
|
|
|
@staticmethod
|
|
def period_generator():
|
|
"""Generator function to control CSV import processing loop.
|
|
|
|
Since CSV imports import from a single file this generator need only
|
|
return a single value before it is exhausted.
|
|
"""
|
|
|
|
yield 1
|
|
|
|
@property
|
|
def first_period(self):
|
|
"""True if current period is the first period otherwise False.
|
|
|
|
For CSV imports there is only one period so it is always the first.
|
|
"""
|
|
|
|
return True
|
|
|
|
@property
|
|
def last_period(self):
|
|
"""True if current period is the last period otherwise False.
|
|
|
|
For CSV imports there is only one period so it is always the last.
|
|
"""
|
|
|
|
return True
|