Files
weewx/bin/weeimport/csvimport.py
gjr80 c566d7abe5 First cut of python logging in wee_import (#441)
* First cut of python logging in wee_import
Utilities Guide updates to be done separately

* Refactored logging.
Removed class WeeImportLog.
All logging now done directly to module level logger.
Console output now done directly from the module concerned using print function.

* Replaced log method calls with info/debug/error method calls
2019-09-07 16:22:40 -07:00

234 lines
8.8 KiB
Python

#
# Copyright (c) 2009-2019 Tom Keffer <tkeffer@gmail.com> and
# Gary Roderick
#
# See the file LICENSE.txt for your full rights.
#
"""Module to interact with a CSV file and import raw observational data for
use with wee_import.
"""
from __future__ import with_statement
from __future__ import absolute_import
from __future__ import print_function
# Python imports
import csv
import logging
import os
# WeeWX imports
from . import weeimport
import weewx
from weeutil.weeutil import timestamp_to_string, option_as_list
from weewx.units import unit_nicknames
log = logging.getLogger(__name__)
# ============================================================================
# class CSVSource
# ============================================================================
class CSVSource(weeimport.Source):
"""Class to interact with a CSV format text file.
Handles the import of data from a CSV format data file with known field
names.
"""
# Define a dict to map CSV fields to WeeWX archive fields. For a CSV import
# these details are specified by the user in the wee_import config file.
_header_map = None
def __init__(self, config_dict, config_path, csv_config_dict, import_config_path, options):
# call our parents __init__
super(CSVSource, self).__init__(config_dict,
csv_config_dict,
options)
# save our import config path
self.import_config_path = import_config_path
# save our import config dict
self.csv_config_dict = csv_config_dict
# get a few config settings from our CSV config dict
# string format used to decode the imported field holding our dateTime
self.raw_datetime_format = self.csv_config_dict.get('raw_datetime_format',
'%Y-%m-%d %H:%M:%S')
# is our rain discrete or cumulative
self.rain = self.csv_config_dict.get('rain', 'cumulative')
# determine valid range for imported wind direction
_wind_direction = option_as_list(self.csv_config_dict.get('wind_direction',
'0,360'))
try:
if float(_wind_direction[0]) <= float(_wind_direction[1]):
self.wind_dir = [float(_wind_direction[0]),
float(_wind_direction[1])]
else:
self.wind_dir = [-360, 360]
except (KeyError, ValueError):
self.wind_dir = [-360, 360]
# get our source file path
try:
self.source = csv_config_dict['file']
except KeyError:
raise weewx.ViolatedPrecondition("CSV source file not specified in '%s'." % import_config_path)
# initialise our import field-to-WeeWX archive field map
self.map = None
# initialise some other properties we will need
self.start = 1
self.end = 1
self.increment = 1
# tell the user/log what we intend to do
_msg = "A CSV import from source file '%s' has been requested." % self.source
print(_msg)
log.info(_msg)
_msg = "The following options will be used:"
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " config=%s, import-config=%s" % (config_path,
self.import_config_path)
if self.verbose:
print(_msg)
log.debug(_msg)
if options.date:
_msg = " source=%s, date=%s" % (self.source, options.date)
else:
# we must have --from and --to
_msg = " source=%s, from=%s, to=%s" % (self.source,
options.date_from,
options.date_to)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " dry-run=%s, calc_missing=%s, ignore_invalid_data=%s" % (self.dry_run,
self.calc_missing,
self.ignore_invalid_data)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " tranche=%s, interval=%s, date/time_string_format=%s" % (self.tranche,
self.interval,
self.raw_datetime_format)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " rain=%s, wind_direction=%s" % (self.rain, self.wind_dir)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = " UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor)
if self.verbose:
print(_msg)
log.debug(_msg)
_msg = "Using database binding '%s', which is bound to database '%s'" % (self.db_binding_wx,
self.dbm.database_name)
print(_msg)
log.info(_msg)
_msg = "Destination table '%s' unit system is '%#04x' (%s)." % (self.dbm.table_name,
self.archive_unit_sys,
unit_nicknames[self.archive_unit_sys])
print(_msg)
log.info(_msg)
if self.calc_missing:
_msg = "Missing derived observations will be calculated."
print(_msg)
log.info(_msg)
if not self.UV_sensor:
_msg = "All WeeWX UV fields will be set to None."
print(_msg)
log.info(_msg)
if not self.solar_sensor:
_msg = "All WeeWX radiation fields will be set to None."
print(_msg)
log.info(_msg)
if options.date or options.date_from:
_msg = "Observations timestamped after %s and up to and" % timestamp_to_string(self.first_ts)
print(_msg)
log.info(_msg)
_msg = "including %s will be imported." % timestamp_to_string(self.last_ts)
print(_msg)
log.info(_msg)
if self.dry_run:
_msg = "This is a dry run, imported data will not be saved to archive."
print(_msg)
log.info(_msg)
def getRawData(self, period):
"""Obtain an iterable containing the raw data to be imported.
Raw data is read and any clean-up/pre-processing carried out before the
iterable is returned. In this case we will use csv.Dictreader(). The
iterable should be of a form where the field names in the field map can
be used to map the data to the WeeWX archive record format.
Input parameters:
period: a simple counter that is unused but retained to keep the
getRawData() signature the same across all classes.
"""
# does our source exist?
if os.path.isfile(self.source):
with open(self.source, 'r') as f:
_raw_data = f.readlines()
else:
# if it doesn't we can't go on so raise it
raise weeimport.WeeImportIOError(
"CSV source file '%s' could not be found." % self.source)
# just in case the data has been sourced from the web we will remove
# any HTML tags and blank lines that may exist
_clean_data = []
for _row in _raw_data:
# get rid of any HTML tags
_line = ''.join(CSVSource._tags.split(_row))
if _line != "\n":
# save anything that is not a blank line
_clean_data.append(_line)
# create a dictionary CSV reader, using the first line as the set of keys
_csv_reader = csv.DictReader(_clean_data)
# finally, get our source-to-database mapping
self.map = self.parseMap('CSV', _csv_reader, self.csv_config_dict)
# return our CSV dict reader
return _csv_reader
@staticmethod
def period_generator():
"""Generator function to control CSV import processing loop.
Since CSV imports import from a single file this generator need only
return a single value before it is exhausted.
"""
yield 1
@property
def first_period(self):
"""True if current period is the first period otherwise False.
For CSV imports there is only one period so it is always the first.
"""
return True
@property
def last_period(self):
"""True if current period is the last period otherwise False.
For CSV imports there is only one period so it is always the last.
"""
return True