ELM is a special form of feedforward perceptron. It has one hidden layer, but the weight of the hidden layer is randomly determined, and the weight of the output layer is determined using the pseudo inverse matrix. In terms of image, the hidden layer is like making a lot of random feature extractors and selecting features in the output layer.
ELM has the following characteristics.
import numpy as np
class ExtremeLearningMachine(object):
def __init__(self, n_unit, activation=None):
self._activation = self._sig if activation is None else activation
self._n_unit = n_unit
@staticmethod
def _sig(x):
return 1. / (1 + np.exp(-x))
@staticmethod
def _add_bias(x):
return np.hstack((x, np.ones((x.shape[0], 1))))
def fit(self, X, y):
self.W0 = np.random.random((X.shape[1], self._n_unit))
z = self._add_bias(self._activation(X.dot(self.W0)))
self.W1 = np.linalg.lstsq(z, y)[0]
def transform(self, X):
if not hasattr(self, 'W0'):
raise UnboundLocalError('must fit before transform')
z = self._add_bias(self._activation(X.dot(self.W0)))
return z.dot(self.W1)
def fit_transform(self, X, y):
self.W0 = np.random.random((X.shape[1], self._n_unit))
z = self._add_bias(self._activation(X.dot(self.W0)))
self.W1 = np.linalg.lstsq(z, y)[0]
return z.dot(self.W1)
I will try it with iris for the time being.
from sklearn import datasets
iris = datasets.load_iris()
ind = np.random.permutation(len(iris.data))
y = np.zeros((len(iris.target), 3))
y[np.arange(len(y)), iris.target] = 1
acc_train = []
acc_test = []
N = [5, 10, 15, 20, 30, 40, 80, 160]
for n in N:
elm = ExtremeLearningMachine(n)
elm.fit(iris.data[ind[:100]], y[ind[:100]])
acc_train.append(np.average(np.argmax(elm.transform(iris.data[ind[:100]]), axis=1) == iris.target[ind[:100]]))
acc_test.append(np.average(np.argmax(elm.transform(iris.data[ind[100:]]), axis=1) == iris.target[ind[100:]]))
plt.plot(N, acc_train, c='red', label='train')
plt.plot(N, acc_test, c='blue', label='test')
plt.legend(loc=1)
plt.savefig("result.png ")
Recommended Posts