weewx/bin/weeimport/csvimport.py

#
#    Copyright (c) 2009-2019 Tom Keffer <tkeffer@gmail.com> and
#                            Gary Roderick
#
#    See the file LICENSE.txt for your full rights.
#

"""Module to interact with a CSV file and import raw observational data for
use with wee_import.
"""

from __future__ import with_statement
from __future__ import absolute_import
from __future__ import print_function

# Python imports
import csv
import logging
import os

# WeeWX imports
from . import weeimport
import weewx

from weeutil.weeutil import timestamp_to_string, option_as_list
from weewx.units import unit_nicknames

log = logging.getLogger(__name__)


# ============================================================================
#                             class CSVSource
# ============================================================================


class CSVSource(weeimport.Source):
    """Class to interact with a CSV format text file.

    Handles the import of data from a CSV format data file with known field
    names.
    """

    # Define a dict to map CSV fields to WeeWX archive fields. For a CSV import
    # these details are specified by the user in the wee_import config file.
    _header_map = None

    def __init__(self, config_dict, config_path, csv_config_dict, import_config_path, options):

        # call our parents __init__
        super(CSVSource, self).__init__(config_dict,
                                        csv_config_dict,
                                        options)

        # save our import config path
        self.import_config_path = import_config_path
        # save our import config dict
        self.csv_config_dict = csv_config_dict

        # get a few config settings from our CSV config dict
        # string format used to decode the imported field holding our dateTime
        self.raw_datetime_format = self.csv_config_dict.get('raw_datetime_format',
                                                            '%Y-%m-%d %H:%M:%S')
        # is our rain discrete or cumulative
        self.rain = self.csv_config_dict.get('rain', 'cumulative')
        # determine valid range for imported wind direction
        _wind_direction = option_as_list(self.csv_config_dict.get('wind_direction',
                                                                  '0,360'))
        try:
            if float(_wind_direction[0]) <= float(_wind_direction[1]):
                self.wind_dir = [float(_wind_direction[0]),
                                 float(_wind_direction[1])]
            else:
                self.wind_dir = [-360, 360]
        except (KeyError, ValueError):
            self.wind_dir = [-360, 360]
        # get our source file path
        try:
            self.source = csv_config_dict['file']
        except KeyError:
            raise weewx.ViolatedPrecondition("CSV source file not specified in '%s'." % import_config_path)
        # initialise our import field-to-WeeWX archive field map
        self.map = None
        # initialise some other properties we will need
        self.start = 1
        self.end = 1
        self.increment = 1

        # tell the user/log what we intend to do
        _msg = "A CSV import from source file '%s' has been requested." % self.source
        print(_msg)
        log.info(_msg)
        _msg = "The following options will be used:"
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        _msg = "     config=%s, import-config=%s" % (config_path,
                                                     self.import_config_path)
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        if options.date:
            _msg = "     source=%s, date=%s" % (self.source, options.date)
        else:
            # we must have --from and --to
            _msg = "     source=%s, from=%s, to=%s" % (self.source,
                                                       options.date_from,
                                                       options.date_to)
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        _msg = "     dry-run=%s, calc_missing=%s, ignore_invalid_data=%s" % (self.dry_run,
                                                                             self.calc_missing,
                                                                             self.ignore_invalid_data)
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        _msg = "     tranche=%s, interval=%s, date/time_string_format=%s" % (self.tranche,
                                                                             self.interval,
                                                                             self.raw_datetime_format)
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        _msg = "     rain=%s, wind_direction=%s" % (self.rain, self.wind_dir)
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        _msg = "     UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor)
        if self.verbose:
            print(_msg)
        log.debug(_msg)
        _msg = "Using database binding '%s', which is bound to database '%s'" % (self.db_binding_wx,
                                                                                 self.dbm.database_name)
        print(_msg)
        log.info(_msg)
        _msg = "Destination table '%s' unit system is '%#04x' (%s)." % (self.dbm.table_name,
                                                                        self.archive_unit_sys,
                                                                        unit_nicknames[self.archive_unit_sys])
        print(_msg)
        log.info(_msg)
        if self.calc_missing:
            _msg = "Missing derived observations will be calculated."
            print(_msg)
            log.info(_msg)

        if not self.UV_sensor:
            _msg = "All WeeWX UV fields will be set to None."
            print(_msg)
            log.info(_msg)
        if not self.solar_sensor:
            _msg = "All WeeWX radiation fields will be set to None."
            print(_msg)
            log.info(_msg)
        if options.date or options.date_from:
            _msg = "Observations timestamped after %s and up to and" % timestamp_to_string(self.first_ts)
            print(_msg)
            log.info(_msg)
            _msg = "including %s will be imported." % timestamp_to_string(self.last_ts)
            print(_msg)
            log.info(_msg)
        if self.dry_run:
            _msg = "This is a dry run, imported data will not be saved to archive."
            print(_msg)
            log.info(_msg)

    def getRawData(self, period):
        """Obtain an iterable containing the raw data to be imported.

        Raw data is read and any clean-up/pre-processing carried out before the
        iterable is returned. In this case we will use csv.Dictreader(). The
        iterable should be of a form where the field names in the field map can
        be used to map the data to the WeeWX archive record format.

        Input parameters:

            period: a simple counter that is unused but retained to keep the
                    getRawData() signature the same across all classes.
        """

        # does our source exist?
        if os.path.isfile(self.source):
            with open(self.source, 'r') as f:
                _raw_data = f.readlines()
        else:
            # if it doesn't we can't go on so raise it
            raise weeimport.WeeImportIOError(
                "CSV source file '%s' could not be found." % self.source)

        # just in case the data has been sourced from the web we will remove
        # any HTML tags and blank lines that may exist
        _clean_data = []
        for _row in _raw_data:
            # get rid of any HTML tags
            _line = ''.join(CSVSource._tags.split(_row))
            if _line != "\n":
                # save anything that is not a blank line
                _clean_data.append(_line)

        # create a dictionary CSV reader, using the first line as the set of keys
        _csv_reader = csv.DictReader(_clean_data)

        # finally, get our source-to-database mapping
        self.map = self.parseMap('CSV', _csv_reader, self.csv_config_dict)

        # return our CSV dict reader
        return _csv_reader

    @staticmethod
    def period_generator():
        """Generator function to control CSV import processing loop.

        Since CSV imports import from a single file this generator need only
        return a single value before it is exhausted.
        """

        yield 1

    @property
    def first_period(self):
        """True if current period is the first period otherwise False.

        For CSV imports there is only one period so it is always the first.
        """

        return True

    @property
    def last_period(self):
        """True if current period is the last period otherwise False.

        For CSV imports there is only one period so it is always the last.
        """

        return True