Source code for prms_python.parameters

# -*- coding: utf-8 -*-
'''
parameters.py -- holds ``Parameter`` class with multiple functionality for 
the standard PRMS parameters input file.
'''

import datetime, calendar
import io, os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib.backends.backend_pdf import PdfPages
from mpl_toolkits.axes_grid1 import make_axes_locatable
from collections import OrderedDict

OPJ = os.path.join

[docs]class Parameters(object): ''' Disk-based representation of a PRMS parameter file. For the sake of memory efficiency, we only load parameters from ``base_file`` that get modified through item assignment or accessed directly. Internally, a reference is kept to only previously accessed parameter data, so when ``write`` is called most copying is from ``base_file`` directly. When parameters are accessed or modified using the dictionary-like syntax, a ``np.ndarray`` representation of the parameter is returned. As a result ``numpy`` mathematical rules including efficient vectorization of math applied to arrays can be applied to modify parameters directly. The ``Parameter`` objects user methods allow for visualization of most PRMS parameters, function based modification of parameters, and a write function that writes the data back to PRMS text format. Arguments: base_file (str): path to PRMS parameters file Attributes: base_file (str): path to PRMS parameters file base_file_reader (file): file handle of PRMS parameters file dimensions (:obj:`collections.OrderedDict`): dictionary with parameter dimensions as defined in parameters file loaded on initialization base_params (list of dicts): list of dictionaries of parameter metadata loaded on initialization e.g. name, dimension(s), data type, length of data array, and lines where data starts and ends in file param_arrays (dict): dictionary with parameteter names as keys and ``numpy.array`` and ``numpy.ndarray`` representations of parameter values as keys. Initially empty, uses getter and setter functions. Example: >>> p = Parameters('path/to/a/parameter/file') >>> p['jh_coef'] = p['jh_coef']*1.1 >>> p.write('example_modified_params') will read parameter information from the params file to check that *jh_coef* is present in the parameter file, read the lines corresponding to *jh_coef* data and assign the new value as requested. Calling the ``write`` method next will copy all parameters except *jh_coef* to the new parameter file and append the newly modified *jh_coef* to the end of the new file from the modified values stored in the parameter instance ``p``. ''' def __init__(self, base_file): self.base_file = base_file self.base_file_reader = open(base_file) self.dimensions, self.base_params = self.__read_base(base_file) self.param_arrays = dict()
[docs] def write(self, out_name): """ Writes current state of ``Parameters`` to disk in PRMS text format To reduce memory usage the ``write`` method copies parameters from the initial ``base_file`` parameter file for all parameters that were never modified. Arguments: out_name (str): path to write ``Parameters`` data to PRMS text format. Returns: None """ data_type_dic = {'1': 'int', '2': 'float'} # retain PRMS data types with open(self.base_file, 'r') as base_file: with open(out_name, 'w') as out_file: # write metadata out_file.write('File Auto-generated by PRMS-Python\n') out_file.write(datetime.datetime.now().isoformat() + '\n') # # write dimensions out_file.write('** Dimensions **\n') # write parameters; pre-sorted by data start line on read name_is_next = False params_start = False write_params_lines = False for l in base_file: if not params_start and l.strip() == '** Parameters **': out_file.write('** Parameters **\n') params_start = True elif l.strip() == '####': name_is_next = True elif name_is_next: name = l.strip().split()[0] if name not in self.param_arrays: out_file.write('####\n') out_file.write(name + '\n') name_is_next = False write_params_lines = True else: write_params_lines = False name_is_next = False elif write_params_lines: out_file.write(l.strip() + '\n') # write all parameters that had been accessed and/or modified for param, new_arr in self.param_arrays.items(): out_file.write('####\n') param_info = [el for el in self.base_params if el['name'] == param].pop() out_file.write(str(param_info['name']) + '\n') out_file.write(str(param_info['ndims']) + '\n') for dimname in param_info['dimnames']: out_file.write(dimname + '\n') out_file.write(str(param_info['length']) + '\n') out_file.write(str(param_info['vartype']) + '\n') out_file.writelines([str(a) + '\n' for a in new_arr.flatten().\ astype(data_type_dic[param_info\ ['vartype']])])
[docs] def plot(self, nrows, which='all', out_dir=None, xlabel=None,\ ylabel=None, cbar_label=None, title=None, mpl_style=None): """ Versatile method that plots most parameters in a standard PRMS parameter file assuming the PRMS model was built on a uniform spatial grid. Plots parameters as line plots for series or 2D spatial grid depending on parameter dimension. The PRMS parameter file is assumed to hold parameters for a model that was set up on a uniform rectangular grid with the spatial index of HRUs starting in the upper left corner and moving left to right across columns and down rows. Default function is to print four files, each with plots of varying parameter dimensions as explained under Kwargs ``which`` and more detailed explanation in the example `Jupyter notebook <https://github.com/PRMS-Python/PRMS-Python/blob/master/notebooks/param_examples.ipynb>`_. Arguments: nrows (int): The number of rows in the PRMS model grid for plotting spatial parameters. Will only work correctly for rectangular gridded models with HRU indices starting in the upper left cell moving left to right across columns and down across rows. Keyword Arguments: which (str): name of PRMS parameter to plot or 'all'. If 'all' then the function will print 3 multipage pdfs, one for nhru dimensional parameters, one for nhru by monthly parameters, one for other parameters of length > 1, and one html file containing single valued parameters. out_dir (str): path to an output dir, default current directory xlabel (str): x label for plot(s) ylabel (str): y label for plot(s) cbar_label (str): label for colorbar on spatial plot(s) title (str): plot title mpl_style (str, list): name or list of names of matplotlib style sheets to use for plot(s). Returns: None Examples: If the plot method is called with the keyword argument ``which`` set to a parameter that has length one, i.e. single valued it will simply print out the value e.g.: >>> p = Parameters('path/to/parameters') >>> p.plot(nrows=10, which='radj_sppt') radj_sppt is single valued with value: 0.4924942352224324 The default action is particularly useful which makes four multi-page pdfs of most PRMS parameters where each file contains parameters of different dimensions e.g.: >>> p.plot(nrows=10, which='all', mpl_style='ggplot') will produce the following four files named by parameters of certain dimensions: >>> import os >>> os.listdir(os.getcwd()) # list files in current directory nhru_param_maps.pdf nhru_by_nmonths_param_maps.pdf non_spatial_param_plots.pdf single_valued_params.html """ params = self if not isinstance(params, Parameters): raise TypeError('params must be instance of Parameters, not '\ + str(type(params))) if not out_dir: out_dir = os.getcwd() if not os.path.isdir(out_dir): os.mkdir(out_dir) nhru = params.dimensions['nhru'] ncols = nhru // nrows if not mpl_style: mpl_style = 'classic' plt.style.use(mpl_style) # make pdfs and html of all parameters seperated in 4 files based on dimension if which == 'all': ## spatial parameters with dimension of length nhru p_names = [param['name'] for param in params.base_params if\ param['length'] == nhru and len(param['dimnames'])==1] with PdfPages(OPJ(out_dir,'nhru_param_maps.pdf')) as pdf: for p in p_names: try: plt.figure() ax = plt.gca() im = ax.imshow(params['{}'.format(p)].reshape(nrows,ncols), origin='upper') # origin upper- assumes indices of parameters starts in upper left divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im, cax=cax) ax.set_title('{}'.format(p)) ax.tick_params(left='off', bottom='off', labelleft='off',labelbottom='off') pdf.savefig() plt.close() except: print('{param} parameter failed to plot'.format(param=p)) ## monthly spatial parameters (on plot per month) p_names = [param['name'] for param in params.base_params if\ param['dimnames'][0] == 'nhru' and len(param['dimnames'])==2 ] with PdfPages(OPJ(out_dir,'nhru_by_nmonths_param_maps.pdf')) as pdf: for p in p_names: try: for i in range(12): #month plt.figure() ax = plt.gca() im = ax.imshow(params['{}'.format(p)][i].reshape(nrows, ncols), origin='upper') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im, cax=cax) ax.set_title('{} {}'.format(p, calendar.month_name[i+1])) ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off') pdf.savefig() plt.close() except: print('{param} for {month} failed to plot'.\ format(param=p, month=calendar.month_name[i+1])) ## non spatial parameters with dimension length > 1 to be plotted as time series p_names = [param['name'] for param in params.base_params if\ ( 1 < param['length'] <= 366 )\ and param['dimnames'][0] != 'nhru' ] with PdfPages(OPJ(out_dir,'non_spatial_param_plots.pdf')) as pdf: for p in p_names: try: param_dict = [param for param in params.base_params if param['name'] == p][0] plt.plot(np.arange(1, param_dict['length']+1, 1), params[p]) plt.xlabel(param_dict['dimnames'][0]) plt.ylabel(p) plt.xlim(0.5, param_dict['length']+0.5) pdf.savefig() plt.close() except: print('{param} parameter failed to plot'.format(param=p)) ## html table of parameters with dimension length = 1 p_names = [param['name'] for param in params.base_params if param['length'] == 1] df = pd.DataFrame() df.index.name = 'parameter' for p in p_names: df.set_value(p, 'value', params[p]) df.to_html(OPJ(out_dir,'single_valued_params.html')) ################################################################ # plot single parameter, in case of nhru by monthly param, # save multi-page pdf else: param_name = which try: params[which] except: print('{param} is not a valid PRMS parameter'.format(param=param_name)) return param_dict = [param for param in params.base_params if param['name'] == param_name][0] # labels for single plots if not cbar_label: cbar_label = param_name if not title: title = '' # if parameter is not spatial, one dimensional, with length greater than one, plot as line if param_dict['ndims'] == 1 and ( 1 < param_dict['length'] <= 366 )\ and param_dict['dimnames'][0] != 'nhru': if not xlabel: xlabel = param_dict['dimnames'][0] if not ylabel: ylabel = param_name plt.plot(np.arange(1, param_dict['length']+1,1), params[param_name]) plt.xlim(0.5, param_dict['length']+0.5) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(title) # if spatial and one dimensional, plot elif param_dict['ndims'] == 1 and param_dict['length'] == params.dimensions['nhru']: if not xlabel: xlabel = '' if not ylabel: ylabel = '' plt.figure() ax = plt.gca() im = ax.imshow(params[param_name].reshape(nrows,ncols), origin='upper') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im, cax=cax,label=cbar_label) ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off') ax.set_title(title) ax.set_ylabel(ylabel) ax.set_xlabel(xlabel) # spatial monthly parameter elif param_dict['dimnames'][0] == 'nhru' and param_dict['dimnames'][1] == 'nmonths'\ and param_dict['ndims'] == 2: if not xlabel: xlabel = '' if not ylabel: ylabel = '' file_name = '{}.pdf'.format(param_name) with PdfPages(OPJ(out_dir, file_name)) as pdf: for i in range(12): #month plt.figure() ax = plt.gca() im = ax.imshow(params['{}'.format(param_name)][i].reshape(nrows,ncols), origin='upper') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im, cax=cax) ax.set_title('{} {}'.format(param_name, calendar.month_name[i+1])) ax.set_ylabel(ylabel) ax.set_xlabel(xlabel) ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off') pdf.savefig() plt.close() else: val = params[param_name] print('{param} is single valued with value: {v}'.format(param=param_name, v=val))
def __read_base(self, base_file): "Read base file returning 2-tuple of dimension and params dict" params_startline, dimensions = self.__make_dimensions_dict(base_file) base_params = self.__make_parameter_dict(base_file, params_startline) return (dimensions, base_params) def __make_dimensions_dict(self, base_file): """ Extract dimensions and each dimension length. Runs before __make_parameter_dict. """ ret = OrderedDict() dim_name = '' dim_len = 0 # finished = False found_dim_start = False # while not finished: for idx, l in enumerate(self.base_file_reader): if l.strip() == '** Dimensions **': # start of dimensions found_dim_start = True elif '#' in l: # comments pass elif l.strip() == '** Parameters **': # start of parameters dimlines = idx # finished = True break elif found_dim_start: if dim_name == '': dim_name = l.strip() else: dim_len = int(l) ret.update({dim_name: dim_len}) dim_name = '' return (dimlines, ret) def __make_parameter_dict(self, base_file, params_startline=0): ret = [] name = '' ndims = 0 dimnames = [] length = 0 vartype = '' dimnames_read = 0 data_startline = 0 for idx, l in enumerate(self.base_file_reader): if '#' in l: # we have a comment; the next lines will be new # parameter metadata. No data for the first time through, so # we don't want to append an metadata blob with empty values if name: ret.append( dict( name=name, ndims=ndims, dimnames=dimnames, length=length, vartype=vartype, data_startline=data_startline ) ) name = '' ndims = 0 dimnames = [] length = 0 vartype = '' dimnames_read = 0 elif not name: name = l.strip().split()[0] # in case old format with integer after name elif not ndims: ndims = int(l.strip()) elif not (dimnames_read == ndims): dimnames.append(l.strip()) dimnames_read += 1 elif not length: length = int(l.strip()) elif not vartype: vartype = l.strip() # advance one from current position and account for starting # to count from zero data_startline = params_startline + idx + 2 # need to append one more time since iteration will have stopped after # last line ret.append( dict( name=name, ndims=ndims, dimnames=dimnames, length=length, vartype=vartype, data_startline=data_startline ) ) return ret def __getitem__(self, key): """ Look up a parameter by its name. Raises: KeyError if parameter name is not valid """ def load_parameter_array(param_metadata): startline = param_metadata['data_startline'] endline = startline + param_metadata['length'] + 1 param_slice = itertools.islice( io.open(self.base_file, 'rb'), startline, endline ) arr = np.genfromtxt(param_slice) if param_metadata['ndims'] > 1: dimsizes = [ self.dimensions[d] for d in param_metadata['dimnames'] ] dimsizes.reverse() arr = arr.reshape(dimsizes) return arr if key in self.param_arrays: return self.param_arrays[key] else: try: param_metadata = [ el for el in self.base_params if el['name'] == key ].pop() except IndexError: raise KeyError(key) arr = load_parameter_array(param_metadata) # cache the value for future access (but maybe shouldn't?) self.param_arrays.update({key: arr}) return arr def __setitem__(self, key, value): if key in self.param_arrays: cur_arr = self.param_arrays[key] if not value.shape == cur_arr.shape: raise ValueError('New array does not match existing') self.param_arrays[key] = value
[docs]def modify_params(params_in, params_out, param_mods=None): ''' Given a parameter file in and a dictionary of param_mods, write modified parameters to params_out. Arguments: params_in (str): location on disk of the base parameter file params_out (str): location on disk where the modified parameters will be written Keyword Arguments: param_mods (dict): param name-keyed, param modification function-valued Returns: None Example: Below we modify the monthly *jh_coef* parameter by increasing it 10% for every month, >>> params_in = 'models/lbcd/parameters' >>> params_out = 'scenarios/jh_coef_1.1/params' >>> scale_10pct = lambda x: x * 1.1 >>> modify_params(params_in, params_out, {'jh_coef': scale_10pct}) So param_mods is a dictionary of with keys being parameter names and values a function that operates on a single value. Currently we only accept functions that operate without reference to any other parameters. The function will be applied to every cell, month, or cascade routing rule for which the parameter is defined. ''' p_in = Parameters(params_in) for k in param_mods: p_in[k] = param_mods[k](p_in[k]) p_in.write(params_out)