Source code for pygpc.sobol_saltelli

import os
import math
import numpy as np
import h5py
from scipy.stats import norm
from scipy.special import binom

##############################################################################
# The following code is based on the Sobol sequence generator by Frances
# Y. Kuo and Stephen Joe. The license terms are provided below.
#
# Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# Neither the names of the copyright holders nor the names of the
# University of New South Wales and the University of Waikato
# and its contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
##############################################################################


[docs] def sobol_sampling(n, dim): """Generate (N x D) numpy array of Sobol sequence samples""" scale = 31 result = np.zeros([n, dim]) # load directions fn = os.path.join(os.path.dirname(__file__), "sobol_saltelli_directions.hdf5") with h5py.File(fn, "r") as f: directions_raw = [] directions = [] for key in f.keys(): directions_raw.append(f[key][:].tolist()) for i in range(len(directions_raw)): for d in directions_raw[i]: directions.append(d) if dim > len(directions) + 1: raise ValueError("Error in Sobol sequence: not enough dimensions") ll = int(math.ceil(math.log(n) / math.log(2))) if ll > scale: raise ValueError("Error in Sobol sequence: not enough bits") for i in range(dim): v = np.zeros(ll + 1, dtype=int) if i == 0: for j in range(1, ll + 1): v[j] = 1 << (scale - j) # all m's = 1 else: m = np.array(directions[i - 1], dtype=int) a = m[0] s = len(m) - 1 # The following code discards the first row of the ``m`` array # Because it has floating point errors, e.g. values of 2.24e-314 if ll <= s: for j in range(1, ll + 1): v[j] = m[j] << (scale - j) else: for j in range(1, s + 1): v[j] = m[j] << (scale - j) for j in range(s + 1, ll + 1): v[j] = v[j - s] ^ (v[j - s] >> s) for k in range(1, s): v[j] ^= ((a >> (s - 1 - k)) & 1) * v[j - k] x = int(0) for j in range(1, n): x ^= v[index_of_least_significant_zero_bit(j - 1)] result[j][i] = float(x / math.pow(2, scale)) return result
[docs] def index_of_least_significant_zero_bit(value): index = 1 while (value & 1) != 0: value >>= 1 index += 1 return index
[docs] def saltelli_sampling(n_samples, dim, calc_second_order=True): """Generates model inputs using Saltelli's extension of the Sobol sequence. Returns a NumPy matrix containing the model inputs using Saltelli's sampling scheme. Saltelli's scheme extends the Sobol sequence in a way to reduce the error rates in the resulting sensitivity index calculations. If calc_second_order is False, the resulting matrix has N * (D + 2) rows, where D is the number of parameters. If calc_second_order is True, the resulting matrix has N * (2D + 2) rows. These model inputs are intended to be used with :func:`SALib.analyze.sobol.analyze`. Parameters ---------- n_samples : int The number of samples to generate dim : int The number of dimensions calc_second_order : bool Calculate second-order sensitivities (default True) """ dim = int(dim) n_samples = int(n_samples) dg = dim # How many values of the Sobol sequence to skip skip_values = 1000 # Create base sequence - could be any type of sampling base_sequence = sobol_sampling(n_samples + skip_values, 2 * dim) if calc_second_order: saltelli_sequence = np.zeros([(2 * dg + 2) * n_samples, dim]) else: saltelli_sequence = np.zeros([(dg + 2) * n_samples, dim]) index = 0 for i in range(skip_values, n_samples + skip_values): # Copy matrix "A" for j in range(dim): saltelli_sequence[index, j] = base_sequence[i, j] index += 1 # Cross-sample elements of "B" into "A" for k in range(dg): for j in range(dim): if j == k: saltelli_sequence[index, j] = base_sequence[i, j + dim] else: saltelli_sequence[index, j] = base_sequence[i, j] index += 1 # Cross-sample elements of "A" into "B" # Only needed if you're doing second-order indices (true by default) if calc_second_order: for k in range(dg): for j in range(dim): if j == k: saltelli_sequence[index, j] = base_sequence[i, j] else: saltelli_sequence[index, j] = base_sequence[i, j + dim] index += 1 # Copy matrix "B" for j in range(dim): saltelli_sequence[index, j] = base_sequence[i, j + dim] index += 1 return saltelli_sequence
[docs] def get_sobol_indices_saltelli(y, dim, calc_second_order=True, num_resamples=100, conf_level=0.95): """Perform Sobol Analysis on model outputs. Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf', where each entry is a list of size D (the number of parameters) containing the indices in the same order as the parameter file. If calc_second_order is True, the dictionary also contains keys 'S2' and 'S2_conf'. Parameters ---------- y : numpy.array A NumPy array containing the model outputs dim : int Number of dimensions calc_second_order : bool Calculate second-order sensitivities (default True) num_resamples : int The number of resamples (default 100) conf_level : float The confidence interval level (default 0.95) References ---------- .. [1] Sobol, I. M. (2001). "Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates." Mathematics and Computers in Simulation, 55(1-3):271-280, doi:10.1016/S0378-4754(00)00270-6. .. [2] Saltelli, A. (2002). "Making best use of model evaluations to compute sensitivity indices." Computer Physics Communications, 145(2):280-297, doi:10.1016/S0010-4655(02)00280-1. .. [3] Saltelli, A., P. Annoni, I. Azzini, F. Campolongo, M. Ratto, and S. Tarantola (2010). "Variance based sensitivity analysis of model output. Design and estimator for the total sensitivity index." Computer Physics Communications, 181(2):259-270, doi:10.1016/j.cpc.2009.09.018. """ if calc_second_order and y.shape[0] % (2 * dim + 2) == 0: n = int(y.shape[0] / (2 * dim + 2)) elif not calc_second_order and y.shape[0] % (dim + 2) == 0: n = int(y.shape[0] / (dim + 2)) else: raise RuntimeError(""" Incorrect number of samples in model output file. Confirm that calc_second_order matches option used during sampling.""") if conf_level < 0 or conf_level > 1: raise RuntimeError("Confidence level must be between 0-1.") # normalize the model output y = (y - y.mean(axis=0)) / y.std(axis=0) a, b, ab, ba = separate_output_values(y, dim, n, calc_second_order) r = np.random.randint(n, size=(n, num_resamples)) z = norm.ppf(0.5 + conf_level / 2) n_sobol = int(dim + binom(dim, 2)) sobol = np.zeros((n_sobol, y.shape[1])) sobol_total = np.zeros((dim, y.shape[1])) sobol_total_conf = np.zeros((dim, y.shape[1])) sobol_conf = np.zeros((n_sobol, y.shape[1])) sobol_idx = [np.nan for _ in range(n_sobol)] sobol_idx_bool = np.zeros((n_sobol, dim)).astype(bool) # first and total order (+ confidence interval) i_sobol = 0 for j in range(dim): sobol[j, :] = first_order(a, ab[:, j, :], b) sobol_conf[j, :] = z * first_order(a[r], ab[r, j], b[r]).std(ddof=1) sobol_total[j, :] = z * total_order(a, ab[:, j], b) sobol_total_conf[j, :] = z * total_order(a[r], ab[r, j], b[r]).std(ddof=1) sobol_idx[j] = np.array([j]) sobol_idx_bool[j, j] = True i_sobol += 1 # Second order (+ confidence interval) if calc_second_order: for j in range(dim): for k in range(j + 1, dim): sobol[i_sobol, :] = second_order(a, ab[:, j, :], ab[:, k, :], ba[:, j, :], b) sobol_conf[i_sobol, :] = z * second_order(a[r], ab[r, j], ab[r, k], ba[r, j], b[r]).std(ddof=1) sobol_idx[i_sobol] = np.array([j, k]) sobol_idx_bool[i_sobol, [j, k]] = True i_sobol += 1 return sobol, sobol_idx, sobol_idx_bool
[docs] def first_order(a, ab, b): # First order estimator following Saltelli et al. 2010 CPC, normalized by # sample variance return np.mean(b * (ab - a), axis=0) / np.var(np.r_[a, b], axis=0)
[docs] def total_order(a, ab, b): # Total order estimator following Saltelli et al. 2010 CPC, normalized by # sample variance return 0.5 * np.mean((a - ab) ** 2, axis=0) / np.var(np.r_[a, b], axis=0)
[docs] def second_order(a, abj, abk, baj, b): # Second order estimator following Saltelli 2002 vjk = np.mean(baj * abk - a * b, axis=0) / np.var(np.r_[a, b], axis=0) sj = first_order(a, abj, b) sk = first_order(a, abk, b) return vjk - sj - sk
[docs] def create_si_dict(dim, calc_second_order): # initialize empty dict to store sensitivity indices s = dict((k, np.zeros(dim)) for k in ('S1', 'S1_conf', 'ST', 'ST_conf')) if calc_second_order: s['S2'] = np.zeros((dim, dim)) s['S2'][:] = np.nan s['S2_conf'] = np.zeros((dim, dim)) s['S2_conf'][:] = np.nan return s
[docs] def separate_output_values(y, dim, n, calc_second_order): ab = np.zeros((n, dim, y.shape[1])) ba = np.zeros((n, dim, y.shape[1])) if calc_second_order else None step = 2 * dim + 2 if calc_second_order else dim + 2 a = y[0:y.shape[0]:step, :] b = y[(step - 1):y.shape[0]:step, :] for j in range(dim): ab[:, j, :] = y[(j + 1):y.shape[0]:step, :] if calc_second_order: ba[:, j, :] = y[(j + 1 + dim):y.shape[0]:step, :] return a, b, ab, ba