Source code for prms_python.data

# -*- coding: utf-8 -*-
'''
data.py -- holds ``Data`` class for standard PRMS climate input data.
'''

import pandas as pd
from shutil import copyfile

[docs]class Data(object): """ Object to access or create a PRMS data file with ability to load/assign it to a date-time indexed pandas.DataFrame for data management, analysis and visualization. It can be used to build a new PRMS data file from user defined metadata and a ``pandas.DataFrame`` of PRMS datetime-indexed climatic forcing and observation variables. The class properties ``metadata`` and ``data_frame`` can be later assigned if no ``base_file`` is given on initialization, allowing for the creation of PRMS climatic forcing file in a Python environment. Keyword Arguments: base_file (str, optional): path to standard PRMS data file na_rep (int, optional): how to represent missing values default = -999 Attributes: date_header (list): date and time header for PRMS data file valid_input_variables (tuple): valid hydro-climate variables for PRMS data file Note: If using the ``Data`` class to create a new data file, it is up to the user to ensure that the metadata and :class:`pandas.DataFrame` assigned are correct and compatible. """ ## data file constant attributes date_header = ['year', 'month', 'day', 'hh', 'mm', 'sec'] valid_input_variables = ('gate_ht', 'humidity', 'lake_elev', 'pan_evap', 'precip', 'rain_day', 'runoff', 'snowdepth', 'solrad', 'tmax', 'tmin', 'wind_speed') def __init__(self, base_file=None, na_rep=-999): self.base_file = base_file self.na_rep = na_rep self._metadata = None self._data_frame = None @property def metadata(self): """ :obj:`dict`:A property that gets and sets the header information from a standard PRMS climate input data file held in a Python dictionary. As a property it can be assigned directly to overwrite or create a new PRMS data file. As such the user is in control and must supply the correct syntax for PRMS standard data files, e.g. text lines before header should begin with "//". Here is an example of the information gathered and held in this attribute: Example: >>> data.metadata { 'data_startline' : 6, 'data_variables' : ['runoff 1', 'runoff 2', 'tmin', 'tmax', 'ppt'] 'text_before_header' : "Title of data file \\n //some comments\\nrunoff 2 \\ntmin 1\\ntmax 1\\nppt 1\\nrunoff 2\\ntmin 1 \\ntmax 1\\nppt 1\\n ########################################\\n" } Note: When assigning or creating a new data file, the ``Data.write`` method will assign the appropriate date header that follows the line of number signs "#". Raises: ValueError: if data in metadata is accessed before data is assigned, e.g. if accessed to write a PRMS data file from a ``Data`` instance that was initialized without a valid PRMS data file. TypeError: if an object that is not a Python dictionary is assigned. """ # to avoid overwriting pre-assigned data, check if already exists if isinstance(self._metadata, dict): return self._metadata elif not self.base_file: raise ValueError('No data file was given on initialization') ## starting list for variable names in data file input_data_names = [] text_before_header = str() ## open data file and read header information with open(self.base_file, 'r') as inf: for idx,l in enumerate(inf): text_before_header+=l if idx == 0: ## first line always string identifier of the file- may use later data_head = l.rstrip() elif l.startswith('/'): ## comment lines continue if l.startswith(Data.valid_input_variables): h = l.split() ## split, first name and second number of columns if int(h[1]) > 1: ## more than one input time series of a particular variable for el in range(int(h[1])): tmp = '{var_name} {var_ind}'.format(var_name = h[0], var_ind = el+1) input_data_names.append(tmp) elif int(h[1]) == 1: input_data_names.append(h[0]) if l.startswith('#'): ## end of header info and begin time series input data data_startline = idx+1 ## 0 indexed line of first data entry break self._metadata = dict([('data_startline',data_startline), ('data_variables',input_data_names), ('text_before_header',text_before_header)]) return self._metadata @metadata.setter def metadata(self, dic): if not isinstance(dic, dict): raise TypeError('Must assign a Python dictionary for new Data object/file metadata') self._metadata = dic @property def data_frame(self): """ A property that gets and sets the climatic forcing data for a standard PRMS climate input data file as a :class:`pandas.DataFrame`. Example: d is a Data instance, calling >>> d.data_frame input variables runoff 1 runoff 2 runoff 3 precip tmax tmin date 1996-12-27 0.54 1.6 NaN 0.0 46 32.0 1996-12-28 0.65 1.6 NaN 0.0 45 24.0 1996-12-29 0.80 1.6 NaN 0.0 44 28.0 1996-12-30 0.90 1.6 NaN 0.0 51 33.0 1996-12-31 1.00 1.7 NaN 0.0 47 32.0 shows the date-indexed ``pd.DataFrame`` of the input data that is created when a ``Data`` object is initiated if given a valid ``base_file``, i.e. file path to a PRMS climate data file. Raises: ValueError: if attribute is accessed before either assigning a PRMS data file on ``Data`` initialization or not assigning a compatabile date-indexed ``pandas.DataFrame`` of hydro-climate variables. TypeError: if a data type other than ``pandas.DataFrame`` is assigned. """ if not self._metadata: self.metadata elif not isinstance(self._data_frame, pd.DataFrame) and self.base_file == None: raise ValueError('No data base_file given on initialization, '\ 'therefore you must assign a DataFrame'\ +' before accessing the .data_frame attribute!') # to avoid overwriting pre-assigned data elif isinstance(self._data_frame, pd.DataFrame): return self._data_frame df = pd.read_csv(self.base_file, header = -1, skiprows = self.metadata['data_startline'], delim_whitespace = True, na_values = [self.na_rep]) ## read data file df.columns = Data.date_header + self.metadata['data_variables'] date = pd.Series(pd.to_datetime(df.year * 10000 + df.month * 100 +\ df.day, format = '%Y%m%d'), index = df.index) df.index = pd.to_datetime(date) ## assign datetime index df.drop(Data.date_header, axis = 1, inplace = True) ## unneeded columns df.columns.name = 'input variables' ; df.index.name = 'date' self._data_frame = df return self._data_frame @data_frame.setter def data_frame(self, df): if not isinstance(df, pd.DataFrame): raise TypeError("Must assign a Pandas.DataFrame object for PRMS data input") self._data_frame = df
[docs] def modify(self, func, vars_to_adjust): """ Apply a user defined function to one or more variable(s) in the data file. The ``modify`` method allows for inplace modification of one or more time series inputs in the data file based on a user defined function. Arguments: func (function): function to apply to each variable in vars_to_adjust vars_to_adjust (list or tuple): collection of variable names to apply func to. Returns: None Example: Here is an example of loading a data file, modifying the temperature inputs (*tmin* and *tmax*) by adding two degrees to each element, and rewritting the modified data to disk, >>> d = Data('path_to_data_file') >>> def f(x): return x + 2 >>> d.modify(f,['tmax','tmin']) >>> d.write('data_temp_plus_2') """ if not isinstance(self._data_frame, pd.DataFrame): self.data_frame # will raise ValueError from data_frame property for v in vars_to_adjust: self._data_frame[v] = self._data_frame[v].apply(func)
[docs] def write(self, out_path): """ Writes the current state of the ``Data`` to PRMS text format utilizing the ``Data.metadata`` and ``Data.data_frame`` instance properties. If ``Data.data_frame`` was never accessed or assigned new values then this method simply copies the original PRMS data file to ``out_path``. Arguments: out_path (str): full path to save or copy the current PRMS data in PRMS text format. Returns: None Raises: ValueError: if the ``write`` method is called without assigning either an initial data (``base_file``) path or assigning correct ``metadata`` and ``data_frame`` properties. """ # if file data was never accessed- unchanged if not isinstance(self._data_frame, pd.DataFrame): if self.base_file: copyfile(self.base_file, out_path) else: # if data not from original file and dataframe never assigned raise ValueError('No data base_file was given and'\ +' no data was assigned!') ## reconstruct PRMS data file format, don't overwrite date-indexed else: df = self._data_frame[self.metadata['data_variables']] df['year'] = self._data_frame.index.year df['month'] = self._data_frame.index.month df['day'] = self._data_frame.index.day df['hh'] = df['mm'] = df['sec'] = 0 df = df[Data.date_header + self._metadata['data_variables']] with open(out_path,'w') as outf: # write comment header then data outf.write(self._metadata['text_before_header']) df.to_csv(outf, sep=' ', header=None,\ index=False, na_rep=self.na_rep)