For those who want to quickly know what kind of classification hyperplane the Fisher's linear discriminator, simple perceptron, and IRLS (repetitive reweighting least squares), which are famous as linear discriminators, draw.
The following two classes are generated from the Gaussian distribution and their boundaries are calculated.
*Class 1: 80 pieces
\Sigma_{1} = \Sigma_{1}' = \Sigma_{2} = ((30, 10), (10, 15))^{-1}
\mu_{1} = (0, 0)^{T}
\mu_{1}' = (-2, -2)^{T}
\mu_{2} = (1, 1)^{T}
import numpy as np
import matplotlib.pyplot as plt
def generate_data(mu, cov, num_data):
cls1 = np.random.multivariate_normal(mu[0], cov, num_data[0])
cls1_ = np.random.multivariate_normal(mu[1], cov, num_data[1])
cls2 = np.random.multivariate_normal(mu[2], cov, num_data[2])
return np.r_[cls1, cls1_], cls2
def plot(filename, cls1, cls2, spr=None):
x1, x2 = cls1.T
plt.plot(x1, x2, "bo")
x1, x2 = cls2.T
plt.plot(x1, x2, "ro")
if not spr is None:
plt.plot(spr[0], spr[1], "g-")
plt.xlim(-3, 3)
plt.ylim(-3, 3)
plt.savefig(filename)
plt.clf()
def step(out):
out = out >= 0.
out = out.astype(float)
for i in range(len(out[0])):
if out[0][i] == 0.:
out[0][i] = -1.
return out
def sigmoid(x):
return 1./(1.+np.exp(-x))
def fisher(cls1, cls2):
m1 = np.mean(cls1, axis=0)
m2 = np.mean(cls2, axis=0)
dim = len(m1)
Sw = np.zeros((dim, dim))
for i in range(len(cls1)):
xi = np.array(cls1[i]).reshape(dim, 1)
m1 = np.array(m1).reshape(dim, 1)
Sw += np.dot((xi - m1), (xi - m1).T)
for i in range(len(cls2)):
xi = np.array(cls2[i]).reshape(dim, 1)
m2 = np.array(m2).reshape(dim, 1)
Sw += np.dot((xi - m2), (xi - m2).T)
Sw_inv = np.linalg.inv(Sw)
w = np.dot(Sw_inv, (m2 - m1))
m = (m1 + m2) / 2.
b = -sum(w*m)
x = np.linspace(-3, 3, 1000)
y = [(w[0][0]*xs+b)/(-w[1][0]) for xs in x]
plot("fisher.png ", cls1, cls2, (x, y))
def perceptron(cls1, cls2, lr=0.5, loop=1000):
cls1_ = np.c_[cls1, np.ones((len(cls1))), np.ones((len(cls1)))]
cls2_ = np.c_[cls2, np.ones((len(cls2))), -1*np.ones((len(cls2)))]
data = np.r_[cls1_, cls2_]
np.random.shuffle(data)
data, label = np.hsplit(data, [len(data[0])-1])
w = np.random.uniform(-1., 1., size=(1, len(data[0])))
for i in range(loop):
out = np.dot(w, data.T)
out = step(out)
dw = lr * (label - out.T) * data
w += np.mean(dw, axis=0)
x = np.linspace(-3, 3, 1000)
y = [(w[0][0]*xs+w[0][2])/(-w[0][1]) for xs in x]
plot("perceptron.png ", cls1, cls2, (x, y))
def IRLS(cls1, cls2, tol=1e-5, maxits=100):
cls1_ = np.c_[cls1, np.ones((len(cls1))), np.ones((len(cls1)))]
cls2_ = np.c_[cls2, np.ones((len(cls2))), np.zeros((len(cls2)))]
data = np.r_[cls1_, cls2_]
np.random.shuffle(data)
data, label = np.hsplit(data, [len(data[0])-1])
w = np.zeros((1, len(data[0])))
itr=0
while(itr < maxits):
y = sigmoid(np.dot(w, data.T)).T
g = np.dot(data.T, (y - label))
rn = y.T*(1-y.T)
r = np.diag(rn[0])
hesse = np.dot(np.dot(data.T, r), data)
diff = np.dot(np.dot(np.linalg.inv(hesse), data.T), (y - label))
w -= diff.T
if np.sum(g**2) <= tol:
print(itr)
break
itr += 1
x = np.linspace(-3, 3, 1000)
y = [(w[0][0]*xs+w[0][2])/(-w[0][1]) for xs in x]
plot("IRLS.png ", cls1, cls2, (x, y))
if __name__ == "__main__":
mu = [[0., 0.], [-2., -2.], [1. ,1.]]
cov = np.linalg.inv([[30., 10.], [10., 15.]])
num_data = [80, 20, 100]
cls1, cls2 = generate_data(mu, cov, num_data)
plot("data.png ", cls1, cls2)
fisher(cls1, cls2)
perceptron(cls1, cls2)
IRLS(cls1, cls2)
First of all, Fisher's linear classifier
You can see that it is greatly pulled by the outliers.
Then simple perceptron
Regardless of whether the generalization performance is good, it is a classification hyperplane that can classify training data. Also, it is not pulled by outliers.
Finally IRLS
It is not affected by outliers and seems to have good generalization performance.