Source code for prms_python.parameters

# -*- coding: utf-8 -*-
'''
parameters.py -- holds ``Parameter`` class with multiple functionality for 
the standard PRMS parameters input file.
'''

import datetime, calendar
import io, os
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib.backends.backend_pdf import PdfPages
from mpl_toolkits.axes_grid1 import make_axes_locatable
from collections import OrderedDict

OPJ = os.path.join

[docs]class Parameters(object):
    '''
    Disk-based representation of a PRMS parameter file. 
    
    For the sake of memory efficiency, we only load parameters from 
    ``base_file`` that get modified through item assignment or accessed directly. 
    Internally, a reference is kept to only previously accessed parameter data, 
    so when ``write`` is called most copying is from ``base_file`` directly. 
    When parameters are accessed or modified using the dictionary-like syntax, 
    a ``np.ndarray`` representation of the parameter is returned. As a result 
    ``numpy`` mathematical rules including efficient vectorization of math applied 
    to arrays can be applied to modify parameters directly. The ``Parameter`` 
    objects user methods allow for visualization of most PRMS parameters, function 
    based modification of parameters, and a write function that writes the data 
    back to PRMS text format.  

    Arguments:
        base_file (str): path to PRMS parameters file 

    Attributes:
        base_file (str): path to PRMS parameters file 
        base_file_reader (file): file handle of PRMS parameters file
        dimensions (:obj:`collections.OrderedDict`): dictionary with 
            parameter dimensions as defined in parameters file loaded on
            initialization
        base_params (list of dicts): list of dictionaries of parameter
            metadata loaded on initialization e.g. name, dimension(s), data 
            type, length of data array, and lines where data starts and ends 
            in file
        param_arrays (dict): dictionary with parameteter names as keys and
            ``numpy.array`` and ``numpy.ndarray`` representations of parameter
            values as keys. Initially empty, uses getter and setter functions.

    Example:
        >>> p = Parameters('path/to/a/parameter/file')
        >>> p['jh_coef'] = p['jh_coef']*1.1
        >>> p.write('example_modified_params')
    
        will read parameter information from the params file to check that
        *jh_coef* is present in the parameter file, read the lines corresponding
        to *jh_coef* data and assign the new value as requested. Calling
        the ``write`` method next will copy all parameters except *jh_coef*
        to the new parameter file and append the newly modified *jh_coef*
        to the end of the new file from the modified values stored in the
        parameter instance ``p``. 
    '''

    def __init__(self, base_file):
        self.base_file = base_file
        self.base_file_reader = open(base_file)
        self.dimensions, self.base_params = self.__read_base(base_file)
        self.param_arrays = dict()

[docs]    def write(self, out_name):
        """
        Writes current state of ``Parameters`` to disk in PRMS text format

        To reduce memory usage the ``write`` method copies parameters
        from the initial ``base_file`` parameter file for all parameters
        that were never modified. 

        Arguments:
            out_name (str): path to write ``Parameters`` data to PRMS text
                format.

        Returns:
            None
        """
        data_type_dic = {'1': 'int', 
                         '2': 'float'} # retain PRMS data types

        with open(self.base_file, 'r') as base_file:
            with open(out_name, 'w') as out_file:
                # write metadata
                out_file.write('File Auto-generated by PRMS-Python\n')
                out_file.write(datetime.datetime.now().isoformat() + '\n')

                # # write dimensions
                out_file.write('** Dimensions **\n')

                # write parameters; pre-sorted by data start line on read
                name_is_next = False
                params_start = False
                write_params_lines = False
                for l in base_file:

                    if not params_start and l.strip() == '** Parameters **':
                        out_file.write('** Parameters **\n')
                        params_start = True

                    elif l.strip() == '####':
                        name_is_next = True

                    elif name_is_next:
                        name = l.strip().split()[0] 
                        if name not in self.param_arrays:
                            out_file.write('####\n')
                            out_file.write(name + '\n')
                            name_is_next = False
                            write_params_lines = True
                        else:
                            write_params_lines = False
                            name_is_next = False

                    elif write_params_lines:
                        out_file.write(l.strip() + '\n')

                # write all parameters that had been accessed and/or modified
                for param, new_arr in self.param_arrays.items():

                    out_file.write('####\n')

                    param_info = [el for el in self.base_params
                                  if el['name'] == param].pop()

                    out_file.write(str(param_info['name']) + '\n')
                    out_file.write(str(param_info['ndims']) + '\n')
                    for dimname in param_info['dimnames']:
                        out_file.write(dimname + '\n')
                    out_file.write(str(param_info['length']) + '\n')
                    out_file.write(str(param_info['vartype']) + '\n')
                    out_file.writelines([str(a) + '\n'
                                         for a in new_arr.flatten().\
                                        astype(data_type_dic[param_info\
                                        ['vartype']])])

[docs]    def plot(self, nrows, which='all', out_dir=None, xlabel=None,\
                    ylabel=None, cbar_label=None, title=None, mpl_style=None):
        """
        Versatile method that plots most parameters in a standard PRMS parameter
        file assuming the PRMS model was built on a uniform spatial grid. 
        
        Plots parameters as line plots for series or 2D spatial grid depending on 
        parameter dimension. The PRMS parameter file is assumed to hold parameters 
        for a model that was set up on a uniform rectangular grid with the spatial 
        index of HRUs starting in the upper left corner and moving left to 
        right across columns and down rows. Default function is to print four
        files, each with plots of varying parameter dimensions as explained 
        under Kwargs ``which`` and more detailed explanation in the example
        `Jupyter notebook <https://github.com/PRMS-Python/PRMS-Python/blob/master/notebooks/param_examples.ipynb>`_.
        
        Arguments:
            nrows (int): The number of rows in the PRMS model grid for plotting 
                spatial parameters. Will only work correctly for rectangular gridded models 
                with HRU indices starting in the upper left cell moving left to right 
                across columns and down across rows.
        
        Keyword Arguments:
            which (str): name of PRMS parameter to plot or 'all'. If 'all' then
                the function will print 3 multipage pdfs, one for nhru 
                dimensional parameters, one for nhru by monthly parameters, one 
                for other parameters of length > 1, and one html file containing
                single valued parameters.
            out_dir (str): path to an output dir, default current directory
            xlabel (str): x label for plot(s)
            ylabel (str): y label for plot(s)
            cbar_label (str): label for colorbar on spatial plot(s)
            title (str): plot title
            mpl_style (str, list): name or list of names of matplotlib style sheets to 
                use for plot(s).
    
        Returns: 
            None

        Examples:
            If the plot method is called with the keyword argument ``which`` set
            to a parameter that has length one, i.e. single valued it will simply
            print out the value e.g.:

            >>> p = Parameters('path/to/parameters')
            >>> p.plot(nrows=10, which='radj_sppt')
                radj_sppt is single valued with value: 0.4924942352224324

            The default action is particularly useful which makes four multi-page
            pdfs of most PRMS parameters where each file contains parameters
            of different dimensions e.g.:

            >>> p.plot(nrows=10, which='all', mpl_style='ggplot')

            will produce the following four files named by parameters of certain
            dimensions:

            >>> import os
            >>> os.listdir(os.getcwd()) # list files in current directory
                nhru_param_maps.pdf    
                nhru_by_nmonths_param_maps.pdf
                non_spatial_param_plots.pdf  
                single_valued_params.html
    
        """
        params = self
    
        if not isinstance(params, Parameters):
            raise TypeError('params must be instance of Parameters, not '\
                           + str(type(params)))
        if not out_dir:
            out_dir = os.getcwd()
        
        if not os.path.isdir(out_dir):
            os.mkdir(out_dir)
            
        nhru = params.dimensions['nhru']
        ncols = nhru // nrows
        
        if not mpl_style:
            mpl_style = 'classic'
        plt.style.use(mpl_style)
    
        # make pdfs and html of all parameters seperated in 4 files based on dimension
        if which == 'all':
            ## spatial parameters with dimension of length nhru
            p_names = [param['name'] for param in params.base_params if\
                       param['length'] == nhru and len(param['dimnames'])==1]
            with PdfPages(OPJ(out_dir,'nhru_param_maps.pdf')) as pdf:
                for p in p_names:
                    try:
                        plt.figure()
                        ax = plt.gca()
                        im = ax.imshow(params['{}'.format(p)].reshape(nrows,ncols), origin='upper')
                        # origin upper- assumes indices of parameters starts in upper left
                        divider = make_axes_locatable(ax)
                        cax = divider.append_axes("right", size="5%", pad=0.05)
                        plt.colorbar(im, cax=cax)
                        ax.set_title('{}'.format(p))
                        ax.tick_params(left='off', bottom='off', labelleft='off',labelbottom='off')
                        pdf.savefig()
                        plt.close()
                    except:
                        print('{param} parameter failed to plot'.format(param=p))
                        
            ## monthly spatial parameters (on plot per month)
            p_names = [param['name'] for param in params.base_params if\
                       param['dimnames'][0] == 'nhru' and len(param['dimnames'])==2 ]
            with PdfPages(OPJ(out_dir,'nhru_by_nmonths_param_maps.pdf')) as pdf:
                for p in p_names:
                    try:
                        for i in range(12): #month
                            plt.figure()
                            ax = plt.gca()
                            im = ax.imshow(params['{}'.format(p)][i].reshape(nrows, ncols), origin='upper')
                            divider = make_axes_locatable(ax)
                            cax = divider.append_axes("right", size="5%", pad=0.05)
                            plt.colorbar(im, cax=cax)
                            ax.set_title('{} {}'.format(p, calendar.month_name[i+1]))
                            ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off')
                            pdf.savefig()
                            plt.close()
                    except:
                        print('{param} for {month} failed to plot'.\
                              format(param=p, month=calendar.month_name[i+1]))
                        
            ## non spatial parameters with dimension length > 1 to be plotted as time series
            p_names = [param['name'] for param in params.base_params if\
                       ( 1 < param['length'] <= 366 )\
                       and param['dimnames'][0] != 'nhru' ]
            with PdfPages(OPJ(out_dir,'non_spatial_param_plots.pdf')) as pdf:
                for p in p_names:
                    try:
                        param_dict = [param for param in params.base_params if param['name'] == p][0]
                        plt.plot(np.arange(1, param_dict['length']+1, 1), params[p])
                        plt.xlabel(param_dict['dimnames'][0])
                        plt.ylabel(p)
                        plt.xlim(0.5, param_dict['length']+0.5)
                        pdf.savefig()
                        plt.close()
                    except:
                        print('{param} parameter failed to plot'.format(param=p))
    
            ## html table of parameters with dimension length = 1 
            p_names = [param['name'] for param in params.base_params if param['length'] == 1]
            df = pd.DataFrame()
            df.index.name = 'parameter'
            for p in p_names:
                df.set_value(p, 'value', params[p])
            df.to_html(OPJ(out_dir,'single_valued_params.html'))
        ################################################################                
        # plot single parameter, in case of nhru by monthly param, 
        # save multi-page pdf 
        else:
            param_name = which
            try:
                params[which]
            except:
                print('{param} is not a valid PRMS parameter'.format(param=param_name))
                return
            
            param_dict = [param for param in params.base_params if param['name'] == param_name][0]
            
            # labels for single plots
            if not cbar_label:
                cbar_label = param_name
            if not title:
                title = ''
                       
            # if parameter is not spatial, one dimensional, with length greater than one, plot as line
            if param_dict['ndims'] == 1 and ( 1 < param_dict['length'] <= 366 )\
                                        and param_dict['dimnames'][0] != 'nhru':
                if not xlabel:
                    xlabel = param_dict['dimnames'][0]
                if not ylabel:
                    ylabel = param_name
                plt.plot(np.arange(1, param_dict['length']+1,1), params[param_name])
                plt.xlim(0.5, param_dict['length']+0.5)
                plt.xlabel(xlabel)
                plt.ylabel(ylabel)
                plt.title(title)
            # if spatial and one dimensional, plot
            elif param_dict['ndims'] == 1 and param_dict['length'] == params.dimensions['nhru']:
                if not xlabel:
                    xlabel = ''
                if not ylabel:
                    ylabel = ''
                plt.figure()
                ax = plt.gca()
                im = ax.imshow(params[param_name].reshape(nrows,ncols), origin='upper')
                divider = make_axes_locatable(ax)
                cax = divider.append_axes("right", size="5%", pad=0.05)
                plt.colorbar(im, cax=cax,label=cbar_label)
                ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off')
                ax.set_title(title)
                ax.set_ylabel(ylabel)
                ax.set_xlabel(xlabel)
            # spatial monthly parameter
            elif param_dict['dimnames'][0] == 'nhru' and param_dict['dimnames'][1] == 'nmonths'\
                 and param_dict['ndims'] == 2: 
                if not xlabel:
                    xlabel = ''
                if not ylabel:
                    ylabel = ''
                file_name = '{}.pdf'.format(param_name)
                with PdfPages(OPJ(out_dir, file_name)) as pdf:
                    for i in range(12): #month
                        plt.figure()
                        ax = plt.gca()
                        im = ax.imshow(params['{}'.format(param_name)][i].reshape(nrows,ncols), origin='upper')
                        divider = make_axes_locatable(ax)
                        cax = divider.append_axes("right", size="5%", pad=0.05)
                        plt.colorbar(im, cax=cax)
                        ax.set_title('{} {}'.format(param_name, calendar.month_name[i+1]))
                        ax.set_ylabel(ylabel)
                        ax.set_xlabel(xlabel)
                        ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off')
                        pdf.savefig()
                        plt.close()
            else:
                val = params[param_name]
                print('{param} is single valued with value: {v}'.format(param=param_name, v=val))          

    def __read_base(self, base_file):
        "Read base file returning 2-tuple of dimension and params dict"

        params_startline, dimensions = self.__make_dimensions_dict(base_file)
        base_params = self.__make_parameter_dict(base_file, params_startline)

        return (dimensions, base_params)

    def __make_dimensions_dict(self, base_file):
        """
        Extract dimensions and each dimension length. Runs before
        __make_parameter_dict.
        """
        ret = OrderedDict()

        dim_name = ''
        dim_len = 0
        # finished = False
        found_dim_start = False
        # while not finished:
        for idx, l in enumerate(self.base_file_reader):

            if l.strip() == '** Dimensions **':  # start of dimensions
                found_dim_start = True

            elif '#' in l:  # comments
                pass

            elif l.strip() == '** Parameters **':  # start of parameters
                dimlines = idx
                # finished = True
                break

            elif found_dim_start:

                if dim_name == '':
                    dim_name = l.strip()
                else:
                    dim_len = int(l)
                    ret.update({dim_name: dim_len})
                    dim_name = ''

        return (dimlines, ret)

    def __make_parameter_dict(self, base_file, params_startline=0):
        ret = []

        name = ''
        ndims = 0
        dimnames = []
        length = 0
        vartype = ''

        dimnames_read = 0
        data_startline = 0

        for idx, l in enumerate(self.base_file_reader):

            if '#' in l:
                # we have a comment; the next lines will be new
                # parameter metadata. No data for the first time through, so
                # we don't want to append an metadata blob with empty values
                if name:
                    ret.append(
                        dict(
                            name=name,
                            ndims=ndims,
                            dimnames=dimnames,
                            length=length,
                            vartype=vartype,
                            data_startline=data_startline
                        )
                    )

                    name = ''
                    ndims = 0
                    dimnames = []
                    length = 0
                    vartype = ''
                    dimnames_read = 0

            elif not name:
                name = l.strip().split()[0] # in case old format with integer after name

            elif not ndims:
                ndims = int(l.strip())

            elif not (dimnames_read == ndims):
                dimnames.append(l.strip())
                dimnames_read += 1

            elif not length:
                length = int(l.strip())

            elif not vartype:
                vartype = l.strip()
                # advance one from current position and account for starting
                # to count from zero
                data_startline = params_startline + idx + 2

        # need to append one more time since iteration will have stopped after
        # last line
        ret.append(
            dict(
                name=name,
                ndims=ndims,
                dimnames=dimnames,
                length=length,
                vartype=vartype,
                data_startline=data_startline
            )
        )

        return ret

    def __getitem__(self, key):
        """
        Look up a parameter by its name.

        Raises:
            KeyError if parameter name is not valid
        """
        def load_parameter_array(param_metadata):

            startline = param_metadata['data_startline']
            endline = startline + param_metadata['length'] + 1

            param_slice = itertools.islice(
                    io.open(self.base_file, 'rb'), startline, endline
                )

            arr = np.genfromtxt(param_slice)

            if param_metadata['ndims'] > 1:
                dimsizes = [
                    self.dimensions[d] for d in param_metadata['dimnames']
                ]
                dimsizes.reverse()
                arr = arr.reshape(dimsizes)

            return arr

        if key in self.param_arrays:
            return self.param_arrays[key]

        else:
            try:
                param_metadata = [
                    el for el in self.base_params if el['name'] == key
                ].pop()

            except IndexError:
                raise KeyError(key)

            arr = load_parameter_array(param_metadata)

            # cache the value for future access (but maybe shouldn't?)
            self.param_arrays.update({key: arr})

            return arr

    def __setitem__(self, key, value):

        if key in self.param_arrays:
            cur_arr = self.param_arrays[key]
            if not value.shape == cur_arr.shape:
                raise ValueError('New array does not match existing')

            self.param_arrays[key] = value

[docs]def modify_params(params_in, params_out, param_mods=None):
    '''
    Given a parameter file in and a dictionary of param_mods, write modified
    parameters to params_out.


    Arguments:
        params_in (str): location on disk of the base parameter file
        params_out (str): location on disk where the modified parameters will 
            be written

    Keyword Arguments:
        param_mods (dict): param name-keyed, param modification function-valued

    Returns:
        None

    Example:
        Below we modify the monthly *jh_coef* parameter by increasing it 10% 
        for every month,
    
            >>> params_in = 'models/lbcd/parameters'
            >>> params_out = 'scenarios/jh_coef_1.1/params'
            >>> scale_10pct = lambda x: x * 1.1
            >>> modify_params(params_in, params_out, {'jh_coef': scale_10pct})
    
        So param_mods is a dictionary of with keys being parameter names and
        values a function that operates on a single value. Currently we only
        accept functions that operate without reference to any other 
        parameters. The function will be applied to every cell, month, or 
        cascade routing rule for which the parameter is defined.
    '''
    p_in = Parameters(params_in)

    for k in param_mods:
        p_in[k] = param_mods[k](p_in[k])

    p_in.write(params_out)
Navigation

Related Topics

Donate/support

Source code for prms_python.parameters