Source code for pygpc.Classifier

import numpy as np
import copy
from sklearn.cluster import KMeans
from sklearn.cluster import spectral_clustering
from sklearn.neural_network import MLPClassifier



[docs]
def Classifier(coords, results, algorithm="learning", options=None):
    """
    Helper function to initialize Classifier class.

    Parameters
    ----------
    coords: ndarray of float [n_grid, n_dim]
        Set of n_grid parameter combinations
    results: ndarray [n_grid x n_out]
        Results of the model evaluation
    algorithm: str, optional, default: "learning"
        Algorithm to classify grid points
        - "learning" ... 2-step procedure with unsupervised and supervised learning
        - ...
    options: dict, optional, default=None
        Classifier options

    Returns
    -------
    obj : object instance of Classifier class
        Object instance of Classifier class
    """
    if algorithm == "learning":
        return ClassifierLearning(coords=coords, results=results, options=options)
    else:
        raise AttributeError("Please specify correct classification algorithm: {""learning"", ...}")




[docs]
class ClassifierLearning(object):
    """
    ClassifierLearning class

    Parameters
    ----------
    coords: ndarray of float [n_grid, n_dim]
        Grid points to train the classifier
    results: ndarray [n_grid x n_out]
        Results of the model evaluation
    options: dict, optional, default=None
        Classifier options
        - options["clusterer"] ... Cluster algorithm (e.g. "KMeans")
        - options["n_clusters"] ... Number of clusters in case of "KMeans"
        - options["classifier"] ... Classification algorithm (e.g. "MLPClassifier")
        - options["classifier_solver"] ... Classification algorithm (e.g. "adam" or "lbfgs")

    Attributes
    ----------
    coords: ndarray of float [n_grid, n_dim]
        Grid points to train the classifier
    results: ndarray [n_grid x n_out]
        Results of the model evaluation
    options: dict, optional, default=None
        Classifier options
    clf: Classifier object
        Classifier object
    """
    def __init__(self, coords, results, options=None):
        """
        Constructor; Initializes ClassifierLearning class
        """
        self.results = results
        self.coords = coords
        self.options = options

        # set defaults
        if options is None:
            options = dict()
            options["clusterer"] = "KMeans"
            options["n_clusters"] = 2
            options["classifier"] = "MLPClassifier"
            options["classifier_solver"] = "lbfgs"

        # setup clusterer to determine domains (unsupervised learning)
        if options["clusterer"] == "KMeans":
            self.clusterer = KMeans(n_clusters=options["n_clusters"],
                                    random_state=42,
                                    n_init=100)

        elif options["clusterer"] == "spectral_clustering":
            raise NotImplementedError("spectral projection not implemented yet")
            adjacency_matrix = None
            self.clusterer = spectral_clustering(adjacency_matrix,
                                                 n_clusters=options["n_clusters"],
                                                 random_state=0,
                                                 eigen_solver='arpack',
                                                 assign_labels="discretize")

        else:
            raise AttributeError("Please specify correct clusterer: {""KMeans"", ""spectral_clustering""...}")

        self.clusterer.fit(results)
        self.domains = self.clusterer.labels_
        self.swap_idx = np.arange(len(np.unique(self.domains)))

        # setup classifier for prediction (supervised learning)
        if options["classifier"] == "MLPClassifier":
            self.clf = MLPClassifier(alpha=0.01,
                                     max_iter=1000,
                                     activation="relu",
                                     solver=options["classifier_solver"])
        else:
            raise AttributeError("Please specify correct classifier: {""MLPClassifier"", ...}")

        self.clf.fit(coords, self.domains)


[docs]
    def update(self, coords, results):
        """
        Updates classifier using the previous results

        Parameters
        ----------
        coords: ndarray of float [n_grid, n_dim]
            Grid points to train the classifier
        results: ndarray [n_grid x n_out]
            Results of the model evaluation
        """
        self.coords = coords
        self.results = results

        domains_old = copy.deepcopy(self.domains)

        # rerun clusterer
        self.clusterer.fit(self.results)
        self.domains = self.clusterer.labels_

        # check if domain labels are swapped and change it back to initial order
        domains_new = self.domains[:len(domains_old)]
        domains_unique = np.unique(domains_old)

        self.swap_idx = np.arange(len(domains_unique))
        for d in domains_unique:
            if np.mean(domains_old[domains_old == d] == domains_new[domains_old == d]) < 0.5:
                count = np.zeros(len(domains_unique))
                for di in domains_unique:
                    count[di] = np.sum(domains_new[domains_old == d] == di)

                if np.max(count) > 0:
                    self.swap_idx[d] = np.argmax(count)
                else:
                    self.swap_idx[d] = d

        domains_temp = np.zeros(self.domains.shape)

        for d in domains_unique:
            domains_temp[self.domains == d] = self.swap_idx[d]

        self.domains = domains_temp.astype(int)

        # rerun classifier
        self.clf.fit(self.coords, self.domains)



[docs]
    def predict(self, coords):
        """
        Predict domains from new coordinates

        Parameters
        ----------
        coords: ndarray of float [n_grid, n_dim]
            Grid points to classify (has to be a 2D array)

        Returns
        -------
        domains: ndarray of float [n_grid, n_dim]
            Domain IDs of grid-points
        """

        domains = self.clf.predict(coords)

        return domains