Source code for pygpc.Classifier
import numpy as np
import copy
from sklearn.cluster import KMeans
from sklearn.cluster import spectral_clustering
from sklearn.neural_network import MLPClassifier
[docs]
def Classifier(coords, results, algorithm="learning", options=None):
"""
Helper function to initialize Classifier class.
Parameters
----------
coords: ndarray of float [n_grid, n_dim]
Set of n_grid parameter combinations
results: ndarray [n_grid x n_out]
Results of the model evaluation
algorithm: str, optional, default: "learning"
Algorithm to classify grid points
- "learning" ... 2-step procedure with unsupervised and supervised learning
- ...
options: dict, optional, default=None
Classifier options
Returns
-------
obj : object instance of Classifier class
Object instance of Classifier class
"""
if algorithm == "learning":
return ClassifierLearning(coords=coords, results=results, options=options)
else:
raise AttributeError("Please specify correct classification algorithm: {""learning"", ...}")
[docs]
class ClassifierLearning(object):
"""
ClassifierLearning class
Parameters
----------
coords: ndarray of float [n_grid, n_dim]
Grid points to train the classifier
results: ndarray [n_grid x n_out]
Results of the model evaluation
options: dict, optional, default=None
Classifier options
- options["clusterer"] ... Cluster algorithm (e.g. "KMeans")
- options["n_clusters"] ... Number of clusters in case of "KMeans"
- options["classifier"] ... Classification algorithm (e.g. "MLPClassifier")
- options["classifier_solver"] ... Classification algorithm (e.g. "adam" or "lbfgs")
Attributes
----------
coords: ndarray of float [n_grid, n_dim]
Grid points to train the classifier
results: ndarray [n_grid x n_out]
Results of the model evaluation
options: dict, optional, default=None
Classifier options
clf: Classifier object
Classifier object
"""
def __init__(self, coords, results, options=None):
"""
Constructor; Initializes ClassifierLearning class
"""
self.results = results
self.coords = coords
self.options = options
# set defaults
if options is None:
options = dict()
options["clusterer"] = "KMeans"
options["n_clusters"] = 2
options["classifier"] = "MLPClassifier"
options["classifier_solver"] = "lbfgs"
# setup clusterer to determine domains (unsupervised learning)
if options["clusterer"] == "KMeans":
self.clusterer = KMeans(n_clusters=options["n_clusters"],
random_state=42,
n_init=100)
elif options["clusterer"] == "spectral_clustering":
raise NotImplementedError("spectral projection not implemented yet")
adjacency_matrix = None
self.clusterer = spectral_clustering(adjacency_matrix,
n_clusters=options["n_clusters"],
random_state=0,
eigen_solver='arpack',
assign_labels="discretize")
else:
raise AttributeError("Please specify correct clusterer: {""KMeans"", ""spectral_clustering""...}")
self.clusterer.fit(results)
self.domains = self.clusterer.labels_
self.swap_idx = np.arange(len(np.unique(self.domains)))
# setup classifier for prediction (supervised learning)
if options["classifier"] == "MLPClassifier":
self.clf = MLPClassifier(alpha=0.01,
max_iter=1000,
activation="relu",
solver=options["classifier_solver"])
else:
raise AttributeError("Please specify correct classifier: {""MLPClassifier"", ...}")
self.clf.fit(coords, self.domains)
[docs]
def update(self, coords, results):
"""
Updates classifier using the previous results
Parameters
----------
coords: ndarray of float [n_grid, n_dim]
Grid points to train the classifier
results: ndarray [n_grid x n_out]
Results of the model evaluation
"""
self.coords = coords
self.results = results
domains_old = copy.deepcopy(self.domains)
# rerun clusterer
self.clusterer.fit(self.results)
self.domains = self.clusterer.labels_
# check if domain labels are swapped and change it back to initial order
domains_new = self.domains[:len(domains_old)]
domains_unique = np.unique(domains_old)
self.swap_idx = np.arange(len(domains_unique))
for d in domains_unique:
if np.mean(domains_old[domains_old == d] == domains_new[domains_old == d]) < 0.5:
count = np.zeros(len(domains_unique))
for di in domains_unique:
count[di] = np.sum(domains_new[domains_old == d] == di)
if np.max(count) > 0:
self.swap_idx[d] = np.argmax(count)
else:
self.swap_idx[d] = d
domains_temp = np.zeros(self.domains.shape)
for d in domains_unique:
domains_temp[self.domains == d] = self.swap_idx[d]
self.domains = domains_temp.astype(int)
# rerun classifier
self.clf.fit(self.coords, self.domains)
[docs]
def predict(self, coords):
"""
Predict domains from new coordinates
Parameters
----------
coords: ndarray of float [n_grid, n_dim]
Grid points to classify (has to be a 2D array)
Returns
-------
domains: ndarray of float [n_grid, n_dim]
Domain IDs of grid-points
"""
domains = self.clf.predict(coords)
return domains