Source code for pygpc.validation

import os
import h5py
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
from scipy.signal import savgol_filter
from .misc import nrmsd
from .misc import get_cartesian_product
from pygpc.Computation import *
from .MEGPC import *
from .Grid import *
from .Visualization import *


[docs] def validate_gpc_mc(session, coeffs, coords=None, data_original=None, n_samples=1e4, output_idx=0, n_cpu=1, smooth_pdf=[51, 5], bins=100, hist=False, fn_out=None, folder="gpc_vs_original_mc", plot=True): """ Compares gPC approximation with original model function. Evaluates both at "n_samples" sampling points and evaluates the root mean square deviation. It also computes the pdf at the output quantity with output_idx and saves the plot as fn_pdf.png and fn_pdf.pdf. Parameters ---------- session : GPC Session object instance GPC session object containing all information i.e., gPC, Problem, Model, Grid, Basis, RandomParameter instances coeffs : ndarray of float [n_coeffs x n_out] GPC coefficients coords : ndarray of float [n_coords x n_dim] Parameter combinations for the random_vars the comparison is conducted with data_original: ndarray of float [n_coords x n_out], optional, default: None If available, data of original model function at grid, containing all QOIs n_samples : int Number of samples to validate the gPC approximation (ignored if coords and data_original is provided) output_idx : int or ndarray of int or list of int, optional, default=None [1 x n_out] Index of output quantities to consider (if output_idx=None, all output quantities are considered) n_cpu : int, optional, default=1 Number of CPU cores to use (parallel function evaluations) to evaluate original model function smooth_pdf : bool, optional, default: True Smooth probability density functions using a Savgol filter [polylength, order] bins : int, optional, default: 100 Number of bins to estimate pdf hist : bool, optional, default: False Plot histogram instead of pdf fn_out : str or None Filename of validation results and plot comparing original vs gPC model (w/o file extension) folder : str, optional, default: "gpc_vs_original_plot" Folder in .hdf5 file to save data in plot : boolean, optional, default: True Plots the pdfs of the original model function and the gPC approximation Returns ------- nrmsd : ndarray of float [n_out] Normalized root mean square deviation for all output quantities between gPC and original model <file> : .hdf5 file Data file containing the sampling points, the results and the pdfs of the original and the gpc approximation <file> : .pdf file Plot showing the pdfs of the original and the gpc approximation """ if smooth_pdf is True: smooth_pdf = [51, 5] if type(output_idx) is int: output_idx = [output_idx] if data_original is None or coords is None: # Create sampling points grid_mc = Random(parameters_random=session.parameters_random, n_grid=n_samples, options=None) coords_norm = grid_mc.coords_norm coords = grid_mc.coords # Evaluate original model at grid points com = Computation(n_cpu=n_cpu, matlab_model=session.matlab_model) y_orig = com.run(model=session.model, problem=session.problem, coords=coords, coords_norm=coords_norm, i_iter=None, i_subiter=None, fn_results=None, print_func_time=False) if output_idx is None: output_idx = np.arange(y_orig.shape[1]) y_orig = y_orig[:, output_idx] else: if output_idx is None: output_idx = np.arange(data_original.shape[1]) grid_mc = Random(parameters_random=session.parameters_random, n_grid=0, options=None) grid_mc.coords = coords coords_norm = grid_mc.get_normalized_coordinates(coords) y_orig = data_original # y_orig = session.validation.results[:, output_idx] # coords_norm = session.validation.grid.coords_norm # coords = session.validation.grid.coords if y_orig.ndim == 1: y_orig = y_orig[:, np.newaxis] # Evaluate gPC expansion on grid if session.qoi_specific: y_gpc = np.zeros((coords_norm.shape[0], len(output_idx))) for i, o_idx in enumerate(output_idx): y_gpc[:, i] = session.gpc[o_idx].get_approximation(coeffs=coeffs[o_idx], x=coords_norm, output_idx=0).flatten() else: y_gpc = session.gpc[0].get_approximation(coeffs=coeffs, x=coords_norm, output_idx=output_idx) if y_gpc.ndim == 1: y_gpc = y_gpc[:, np.newaxis] # Calculate normalized root mean square deviation relative_error_nrmsd = nrmsd(y_gpc, y_orig) for i, o_idx in enumerate(output_idx): pdf_y_gpc, tmp = np.histogram(y_gpc[:, i].flatten(), bins=bins, density=True) pdf_x_gpc = (tmp[1:] + tmp[0:-1]) / 2. pdf_y_orig, tmp = np.histogram(y_orig[:, i].flatten(), bins=bins, density=True) pdf_x_orig = (tmp[1:] + tmp[0:-1]) / 2. if smooth_pdf is not None: if smooth_pdf is not False: # smooth gpc pdf y_gpc_smoothed = False while not y_gpc_smoothed: try: pdf_y_gpc = savgol_filter(pdf_y_gpc, smooth_pdf[0], smooth_pdf[1]) y_gpc_smoothed = True except np.linalg.LinAlgError: smooth_pdf[1] += int(3*np.random.random(1)) # smooth original pdf y_orig_smoothed = False while not y_orig_smoothed: try: pdf_y_orig = savgol_filter(pdf_y_orig, smooth_pdf[0], smooth_pdf[1]) y_orig_smoothed = True except np.linalg.LinAlgError: smooth_pdf[1] += int(3*np.random.random(1)) # plot pdfs if plot: matplotlib.rc('text', usetex=False) matplotlib.rc('xtick', labelsize=12) matplotlib.rc('ytick', labelsize=12) fig1, ax1 = plt.subplots(nrows=1, ncols=1, squeeze=True, figsize=(6, 5)) if hist is None: ax1.plot(pdf_x_gpc, pdf_y_gpc, pdf_x_orig, pdf_y_orig) else: sns.distplot(y_gpc[:, i].flatten(), bins=bins, ax=ax1) sns.distplot(y_orig[:, i].flatten(), bins=bins, label=r'original', ax=ax1) # ax1.hist((y_gpc[:, i].flatten(), y_orig[:, i].flatten()), bins=bins, density=True) ax1.legend([r'gpc', r'original'], fontsize=14, loc="upper right") ax1.grid() ax1.set_xlabel(r'$y$', fontsize=16) ax1.set_ylabel(r'$p(y)$', fontsize=16) ax1.text(0.05, 0.95, r'$error=%.2f$' % (100 * relative_error_nrmsd[0],) + "%", transform=ax1.transAxes, fontsize=12, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) plt.tight_layout() if fn_out: plt.savefig(os.path.splitext(fn_out)[0] + "_qoi_" + str(o_idx) + '.pdf') plt.savefig(os.path.splitext(fn_out)[0] + "_qoi_" + str(o_idx) + '.png', dpi=1200) if fn_out: # save results in .hdf5 file with h5py.File(os.path.splitext(fn_out)[0] + '.hdf5', 'a') as f: f.create_dataset(folder + '/error/qoi_{}/nrmsd'.format(o_idx), data=relative_error_nrmsd[i]) f.create_dataset(folder + '/pdf/qoi_{}/original'.format(o_idx), data=np.vstack((pdf_x_orig, pdf_y_orig)).transpose()) f.create_dataset(folder + '/pdf/qoi_{}/gpc'.format(o_idx), data=np.vstack((pdf_x_gpc, pdf_y_gpc)).transpose()) f.create_dataset(folder + '/grid/coords', data=coords) f.create_dataset(folder + '/grid/coords_norm', data=coords_norm) f.create_dataset(folder + '/model_evaluations/results/gpc', data=y_gpc) f.create_dataset(folder + '/model_evaluations/results/orig', data=y_orig) return relative_error_nrmsd
[docs] def validate_gpc_plot(session, coeffs, random_vars, n_grid=None, coords=None, output_idx=0, data_original=None, fn_out=None, folder="gpc_vs_original_plot", n_cpu=1): """ Compares gPC approximation with original model function. Evaluates both at n_grid (x n_grid) sampling points and calculate the difference between two solutions at the output quantity with output_idx and saves the plot as *_QOI_idx_<output_idx>.png/pdf. Also generates one .hdf5 results file with the evaluation results. Parameters ---------- session : GPC Session object instance GPC session object containing all information i.e., gPC, Problem, Model, Grid, Basis, RandomParameter instances coeffs : ndarray of float [n_coeffs x n_out] or list of ndarray of float [n_qoi][n_coeffs x n_out] GPC coefficients random_vars: str or list of str [2] Names of the random variables, the analysis is performed for one or max. two random variables n_grid : int or list of int [2], optional Number of samples in each dimension to compare the gPC approximation with the original model function. A cartesian grid is generated based on the limits of the specified random_vars coords : ndarray of float [n_coords x n_dim] Parameter combinations for the random_vars the comparison is conducted with output_idx : int or list of int, optional, default=0 List of indices of output quantity to consider data_original: ndarray of float [n_coords x n_out], optional, default: None If available, data of original model function at grid, containing all QOIs fn_out : str, optional, default: None Filename of plot comparing original vs gPC model (*_QOI_idx_<output_idx>.png is added) Filename of .hdf5 file containing the grid and the data from the original model and the gPC folder : str, optional, default: "gpc_vs_original_plot" Folder in .hdf5 file to save data in n_cpu : int, default=1 Number of CPU cores to use to calculate results of original model on grid. Returns ------- <file> : .hdf5 file Data file containing the grid points and the results of the original and the gpc approximation <file> : .png and .pdf file Plot comparing original vs gPC model """ if type(output_idx) is int: output_idx = [output_idx] if type(random_vars) is not list: random_vars = random_vars.tolist() if n_grid and type(n_grid) is not list: n_grid = n_grid.tolist() # Create grid such that it includes the mean values of other random variables if coords is None: grid = np.zeros((np.prod(n_grid), len(session.parameters_random))) else: grid = np.zeros((coords.shape[0], len(session.parameters_random))) idx = [] idx_global = [] # sort random_vars according to gpc.parameters for i_p, p in enumerate(session.parameters_random.keys()): if p not in random_vars: grid[:, i_p] = session.parameters_random[p].mean else: idx.append(random_vars.index(p)) idx_global.append(i_p) random_vars = [random_vars[i] for i in idx] x = [] if coords is None: n_grid = [n_grid[i] for i in idx] for i_p, p in enumerate(random_vars): x.append(np.linspace(session.parameters_random[p].pdf_limits[0], session.parameters_random[p].pdf_limits[1], n_grid[i_p])) coords = get_cartesian_product(x) else: for i_p, p in enumerate(random_vars): x.append(np.unique(coords[:, i_p])) grid[:, idx_global] = coords # Normalize grid grid_norm = Grid(parameters_random=session.parameters_random).get_normalized_coordinates(grid) # Evaluate gPC expansion on grid if session.qoi_specific: y_gpc = np.zeros((grid_norm.shape[0], len(output_idx))) for i, o_idx in enumerate(output_idx): y_gpc[:, i] = session.gpc[o_idx].get_approximation(coeffs=coeffs[o_idx], x=grid_norm, output_idx=0).flatten() else: y_gpc = session.gpc[0].get_approximation(coeffs=coeffs, x=grid_norm, output_idx=output_idx) # Evaluate original model function on grid if data_original is None: com = Computation(n_cpu=n_cpu, matlab_model=session.matlab_model) y_orig_all = com.run(model=session.model, problem=session.problem, coords=grid, coords_norm=grid_norm, i_iter=None, i_subiter=None, fn_results=None, print_func_time=False) y_orig = y_orig_all[:, output_idx] else: y_orig_all = data_original y_orig = data_original[:, output_idx] # add axes if necessary if y_gpc.ndim == 1: y_gpc = y_gpc[:, np.newaxis] if y_orig.ndim == 1: y_orig = y_orig[:, np.newaxis] # Evaluate difference between original and gPC approximation y_dif = y_orig - y_gpc # save results in .hdf5 file if fn_out is not None: with h5py.File(os.path.splitext(fn_out)[0] + '.hdf5', 'a') as f: f.create_dataset(folder + '/model_evaluations/original', data=y_orig) f.create_dataset(folder + '/model_evaluations/original_all_qoi', data=y_orig_all) f.create_dataset(folder + '/model_evaluations/gpc', data=y_gpc) f.create_dataset(folder + '/model_evaluations/difference', data=y_dif) f.create_dataset(folder + '/grid/coords', data=grid) f.create_dataset(folder + '/grid/coords_norm', data=grid_norm) # Plot results matplotlib.rc('text', usetex=False) matplotlib.rc('xtick', labelsize=13) matplotlib.rc('ytick', labelsize=13) fs = 14 for i in range(len(output_idx)): # One random variable if len(random_vars) == 1: fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, squeeze=True, figsize=(12, 5)) ax1.plot(coords, y_orig[:, i]) ax1.plot(coords, y_gpc[:, i]) ax1.legend([r"original", r"gPC"], fontsize=fs) ax1.set_xlabel(r"%s" % random_vars[0], fontsize=fs) ax1.set_ylabel(r"y(%s)" % random_vars[0], fontsize=fs) ax1.grid() ax2.plot(coords, y_dif[:, i], '--k') ax2.legend([r"difference"], fontsize=fs) ax1.set_xlabel(r"%s" % random_vars[0], fontsize=fs) ax2.grid() # Two random variables elif len(random_vars) == 2: fig, (ax1, ax2, ax3) = matplotlib.pyplot.subplots(nrows=1, ncols=3, sharex='all', sharey='all', squeeze=True, figsize=(16, 5)) x1_2d, x2_2d = np.meshgrid(x[0], x[1]) min_all = np.min(np.array(np.min(y_orig[:, i]), np.min(y_gpc[:, i]))) max_all = np.max(np.array(np.max(y_orig[:, i]), np.max(y_gpc[:, i]))) # Original model function im1 = ax1.pcolor(x1_2d, x2_2d, np.reshape(y_orig[:, i], (x[1].size, x[0].size), order='f'), cmap="jet", vmin=min_all, vmax=max_all) ax1.set_title(r'Original model', fontsize=fs) ax1.set_xlabel(r"%s" % random_vars[0], fontsize=fs) ax1.set_ylabel(r"%s" % random_vars[1], fontsize=fs) # gPC approximation # Original model function im2 = ax2.pcolor(x1_2d, x2_2d, np.reshape(y_gpc[:, i], (x[1].size, x[0].size), order='f'), cmap="jet", vmin=min_all, vmax=max_all) ax2.set_title(r'gPC approximation', fontsize=fs) ax1.set_xlabel(r"%s" % random_vars[0], fontsize=fs) ax1.set_ylabel(r"%s" % random_vars[1], fontsize=fs) # Difference min_dif = np.min(y_dif[:, i]) max_dif = np.max(y_dif[:, i]) b2rcw_cmap = make_cmap(b2rcw(min_dif, max_dif)) im3 = ax3.pcolor(x1_2d, x2_2d, np.reshape(y_dif[:, i], (x[1].size, x[0].size), order='f'), cmap=b2rcw_cmap, vmin=min_dif, vmax=max_dif) ax3.set_title(r'Difference (Original vs gPC)', fontsize=fs) ax1.set_xlabel(r"%s" % random_vars[0], fontsize=fs) ax1.set_ylabel(r"%s" % random_vars[1], fontsize=fs) fig.colorbar(im1, ax=ax1, orientation='vertical') fig.colorbar(im2, ax=ax2, orientation='vertical') fig.colorbar(im3, ax=ax3, orientation='vertical') plt.tight_layout() if fn_out is not None: plt.savefig(os.path.splitext(fn_out)[0] + "_qoi_" + str(output_idx[i]) + '.png', dpi=1200) plt.savefig(os.path.splitext(fn_out)[0] + "_qoi_" + str(output_idx[i]) + '.pdf')