Source code for pygpc.sobol_saltelli

import os
import math
import numpy as np
import h5py
from scipy.stats import norm
from scipy.special import binom

##############################################################################
# The following code is based on the Sobol sequence generator by Frances
# Y. Kuo and Stephen Joe. The license terms are provided below.
#
# Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# Neither the names of the copyright holders nor the names of the
# University of New South Wales and the University of Waikato
# and its contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
##############################################################################



[docs]
def sobol_sampling(n, dim):
    """Generate (N x D) numpy array of Sobol sequence samples"""
    scale = 31
    result = np.zeros([n, dim])

    # load directions
    fn = os.path.join(os.path.dirname(__file__), "sobol_saltelli_directions.hdf5")

    with h5py.File(fn, "r") as f:
        directions_raw = []
        directions = []

        for key in f.keys():
            directions_raw.append(f[key][:].tolist())

        for i in range(len(directions_raw)):
            for d in directions_raw[i]:
                directions.append(d)

    if dim > len(directions) + 1:
        raise ValueError("Error in Sobol sequence: not enough dimensions")

    ll = int(math.ceil(math.log(n) / math.log(2)))

    if ll > scale:
        raise ValueError("Error in Sobol sequence: not enough bits")

    for i in range(dim):
        v = np.zeros(ll + 1, dtype=int)

        if i == 0:
            for j in range(1, ll + 1):
                v[j] = 1 << (scale - j)  # all m's = 1
        else:
            m = np.array(directions[i - 1], dtype=int)
            a = m[0]
            s = len(m) - 1

            # The following code discards the first row of the ``m`` array
            # Because it has floating point errors, e.g. values of 2.24e-314
            if ll <= s:
                for j in range(1, ll + 1):
                    v[j] = m[j] << (scale - j)
            else:
                for j in range(1, s + 1):
                    v[j] = m[j] << (scale - j)
                for j in range(s + 1, ll + 1):
                    v[j] = v[j - s] ^ (v[j - s] >> s)
                    for k in range(1, s):
                        v[j] ^= ((a >> (s - 1 - k)) & 1) * v[j - k]

        x = int(0)
        for j in range(1, n):
            x ^= v[index_of_least_significant_zero_bit(j - 1)]
            result[j][i] = float(x / math.pow(2, scale))

    return result




[docs]
def index_of_least_significant_zero_bit(value):
    index = 1
    while (value & 1) != 0:
        value >>= 1
        index += 1

    return index




[docs]
def saltelli_sampling(n_samples, dim, calc_second_order=True):
    """Generates model inputs using Saltelli's extension of the Sobol sequence.

    Returns a NumPy matrix containing the model inputs using Saltelli's sampling
    scheme.  Saltelli's scheme extends the Sobol sequence in a way to reduce
    the error rates in the resulting sensitivity index calculations.  If
    calc_second_order is False, the resulting matrix has N * (D + 2)
    rows, where D is the number of parameters.  If calc_second_order is True,
    the resulting matrix has N * (2D + 2) rows.  These model inputs are
    intended to be used with :func:`SALib.analyze.sobol.analyze`.

    Parameters
    ----------
    n_samples : int
        The number of samples to generate
    dim : int
        The number of dimensions
    calc_second_order : bool
        Calculate second-order sensitivities (default True)
    """
    dim = int(dim)
    n_samples = int(n_samples)
    dg = dim

    # How many values of the Sobol sequence to skip
    skip_values = 1000

    # Create base sequence - could be any type of sampling
    base_sequence = sobol_sampling(n_samples + skip_values, 2 * dim)

    if calc_second_order:
        saltelli_sequence = np.zeros([(2 * dg + 2) * n_samples, dim])
    else:
        saltelli_sequence = np.zeros([(dg + 2) * n_samples, dim])
    index = 0

    for i in range(skip_values, n_samples + skip_values):

        # Copy matrix "A"
        for j in range(dim):
            saltelli_sequence[index, j] = base_sequence[i, j]

        index += 1

        # Cross-sample elements of "B" into "A"
        for k in range(dg):
            for j in range(dim):
                if j == k:
                    saltelli_sequence[index, j] = base_sequence[i, j + dim]
                else:
                    saltelli_sequence[index, j] = base_sequence[i, j]

            index += 1

        # Cross-sample elements of "A" into "B"
        # Only needed if you're doing second-order indices (true by default)
        if calc_second_order:
            for k in range(dg):
                for j in range(dim):
                    if j == k:
                        saltelli_sequence[index, j] = base_sequence[i, j]
                    else:
                        saltelli_sequence[index, j] = base_sequence[i, j + dim]

                index += 1

        # Copy matrix "B"
        for j in range(dim):
            saltelli_sequence[index, j] = base_sequence[i, j + dim]

        index += 1
    return saltelli_sequence




[docs]
def get_sobol_indices_saltelli(y, dim, calc_second_order=True, num_resamples=100,
                               conf_level=0.95):
    """Perform Sobol Analysis on model outputs.

    Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf', where
    each entry is a list of size D (the number of parameters) containing the
    indices in the same order as the parameter file.  If calc_second_order is
    True, the dictionary also contains keys 'S2' and 'S2_conf'.

    Parameters
    ----------
    y : numpy.array
        A NumPy array containing the model outputs
    dim : int
        Number of dimensions
    calc_second_order : bool
        Calculate second-order sensitivities (default True)
    num_resamples : int
        The number of resamples (default 100)
    conf_level : float
        The confidence interval level (default 0.95)

    References
    ----------
    .. [1] Sobol, I. M. (2001).  "Global sensitivity indices for nonlinear
           mathematical models and their Monte Carlo estimates."  Mathematics
           and Computers in Simulation, 55(1-3):271-280,
           doi:10.1016/S0378-4754(00)00270-6.
    .. [2] Saltelli, A. (2002).  "Making best use of model evaluations to
           compute sensitivity indices."  Computer Physics Communications,
           145(2):280-297, doi:10.1016/S0010-4655(02)00280-1.
    .. [3] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and
           S. Tarantola (2010).  "Variance based sensitivity analysis of model
           output.  Design and estimator for the total sensitivity index."
           Computer Physics Communications, 181(2):259-270,
           doi:10.1016/j.cpc.2009.09.018.
    """

    if calc_second_order and y.shape[0] % (2 * dim + 2) == 0:
        n = int(y.shape[0] / (2 * dim + 2))
    elif not calc_second_order and y.shape[0] % (dim + 2) == 0:
        n = int(y.shape[0] / (dim + 2))
    else:
        raise RuntimeError("""
        Incorrect number of samples in model output file.
        Confirm that calc_second_order matches option used during sampling.""")

    if conf_level < 0 or conf_level > 1:
        raise RuntimeError("Confidence level must be between 0-1.")

    # normalize the model output
    y = (y - y.mean(axis=0)) / y.std(axis=0)

    a, b, ab, ba = separate_output_values(y, dim, n, calc_second_order)
    r = np.random.randint(n, size=(n, num_resamples))
    z = norm.ppf(0.5 + conf_level / 2)

    n_sobol = int(dim + binom(dim, 2))
    sobol = np.zeros((n_sobol, y.shape[1]))
    sobol_total = np.zeros((dim, y.shape[1]))
    sobol_total_conf = np.zeros((dim, y.shape[1]))
    sobol_conf = np.zeros((n_sobol, y.shape[1]))
    sobol_idx = [np.nan for _ in range(n_sobol)]
    sobol_idx_bool = np.zeros((n_sobol, dim)).astype(bool)

    # first and total order (+ confidence interval)
    i_sobol = 0
    for j in range(dim):
        sobol[j, :] = first_order(a, ab[:, j, :], b)
        sobol_conf[j, :] = z * first_order(a[r], ab[r, j], b[r]).std(ddof=1)
        sobol_total[j, :] = z * total_order(a, ab[:, j], b)
        sobol_total_conf[j, :] = z * total_order(a[r], ab[r, j], b[r]).std(ddof=1)
        sobol_idx[j] = np.array([j])
        sobol_idx_bool[j, j] = True
        i_sobol += 1

    # Second order (+ confidence interval)
    if calc_second_order:
        for j in range(dim):
            for k in range(j + 1, dim):
                sobol[i_sobol, :] = second_order(a, ab[:, j, :], ab[:, k, :], ba[:, j, :], b)
                sobol_conf[i_sobol, :] = z * second_order(a[r], ab[r, j], ab[r, k], ba[r, j], b[r]).std(ddof=1)
                sobol_idx[i_sobol] = np.array([j, k])
                sobol_idx_bool[i_sobol, [j, k]] = True
                i_sobol += 1

    return sobol, sobol_idx, sobol_idx_bool




[docs]
def first_order(a, ab, b):
    # First order estimator following Saltelli et al. 2010 CPC, normalized by
    # sample variance
    return np.mean(b * (ab - a), axis=0) / np.var(np.r_[a, b], axis=0)




[docs]
def total_order(a, ab, b):
    # Total order estimator following Saltelli et al. 2010 CPC, normalized by
    # sample variance
    return 0.5 * np.mean((a - ab) ** 2, axis=0) / np.var(np.r_[a, b], axis=0)




[docs]
def second_order(a, abj, abk, baj, b):
    # Second order estimator following Saltelli 2002
    vjk = np.mean(baj * abk - a * b, axis=0) / np.var(np.r_[a, b], axis=0)
    sj = first_order(a, abj, b)
    sk = first_order(a, abk, b)

    return vjk - sj - sk




[docs]
def create_si_dict(dim, calc_second_order):
    # initialize empty dict to store sensitivity indices
    s = dict((k, np.zeros(dim)) for k in ('S1', 'S1_conf', 'ST', 'ST_conf'))

    if calc_second_order:
        s['S2'] = np.zeros((dim, dim))
        s['S2'][:] = np.nan
        s['S2_conf'] = np.zeros((dim, dim))
        s['S2_conf'][:] = np.nan

    return s




[docs]
def separate_output_values(y, dim, n, calc_second_order):
    ab = np.zeros((n, dim, y.shape[1]))
    ba = np.zeros((n, dim, y.shape[1])) if calc_second_order else None
    step = 2 * dim + 2 if calc_second_order else dim + 2

    a = y[0:y.shape[0]:step, :]
    b = y[(step - 1):y.shape[0]:step, :]
    for j in range(dim):
        ab[:, j, :] = y[(j + 1):y.shape[0]:step, :]
        if calc_second_order:
            ba[:, j, :] = y[(j + 1 + dim):y.shape[0]:step, :]

    return a, b, ab, ba