The book I read
"[2nd Edition] Python Machine Learning Programming Expert Data Scientist Theory and Practice (impress top gear)"
↑ I'm playing around with different data, but basically I just did Chapter 2 of this book.
Perceptron's early learning rules
Initialize the weight $ \ mathbf {w} $ with 0 or a small random number
For each training sample $ \ mathbf {x} ^ {(i)} $, do the following:
Calculate the output value $ \ hat {y} $
Update the weight
$ \ eta $ is the learning rate (usually a constant greater than 0.0 and less than 1.0) $ y ^ {(i)} $ is the true class label of the i-th training sample, $ \ hat {y} ^ {(i)} $ is the predicted class label.
Predicted value $ \ hat {y} ^ {(i)} $ is
and
Is determined by.
The perceptron is linearly separable and convergence is guaranteed only when the learning rate is small enough.
import numpy as np
class Perceptron(object):
"""Perceptron classifier
Parameters
-----------
eta : float
Learning rate(0.Greater than 0 1.Value less than or equal to 0)
n_iter : int
Number of trainings in training data
random_state : int
Random seed for weight initialization
attribute
-----------
w_ :One-dimensional array
Weight after conforming
errors_ :list
Number of misclassifications (updates) in each epoch
"""
def __init__(self, eta=0.01, n_iter=50, random_state=1):
self.eta = eta
self.n_iter = n_iter
self.random_state = random_state
def fit(self, X, y):
"""Fits to training data
Parameters
------------
X : {Array-like data structure}, shape = [n_samples, n_features]
Training data
n_samples is the number of samples, n_features is the number of features
y :Array-like data structure, shape = [n_samples]
Objective variable
Return value
------------
self : object
"""
rgen = np.random.RandomState(self.random_state)
self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])
self.errors_ = []
for _ in range(self.n_iter): #Repeat training data for the number of trainings
errors = 0
for xi, target in zip(X, y): #Update weights in each sample
#Weight w_1, ..., w_m update
# Δw_j = η (y^(i)true value- y^(i)Forecast) x_j (j = 1, ..., m)
update = self.eta * (target - self.predict(xi))
self.w_[1:] += update * xi
#Weight w_Update 0 Δw_0 = η (y^(i)true value- y^(i)Forecast)
self.w_[0] += update
#If the weight update is not 0, it is counted as a misclassification.
errors += int(update != 0.0)
#Stores the error for each iteration
self.errors_.append(errors)
return self
def net_input(self, X):
"""Calculate total input"""
return np.dot(X, self.w_[1:]) + self.w_[0]
def predict(self, X):
"""Returns the class label after one step"""
return np.where(self.net_input(X) >= 0.0, 1, -1)
from sklearn.datasets import load_iris
import pandas as pd
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
# df.loc[df['target'] == 0, 'target'] = "setosa"
# df.loc[df['target'] == 1, 'target'] = "versicolor"
# df.loc[df['target'] == 2, 'target'] = "virginica"
df.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | target | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
import seaborn as sns
sns.pairplot(df, hue='target')
Since it is a binary classification, we will narrow down to two types that can be linearly separated.
In addition, the features are two-dimensional so that they are easy to see visually.
Labels 0 and 2 should be appropriate and can be roughly separated. (I want to use something different from a book)
import numpy as np
import matplotlib.pyplot as plt
df2 = df.query("target != 1").copy() #Exclude label 1
df2["target"] -= 1 #Label 1-Align to 1
plt.scatter(df2.iloc[:50, 3], df2.iloc[:50, 1], color='blue', marker='o', label='setosa')
plt.scatter(df2.iloc[50:, 3], df2.iloc[50:, 1], color='green', marker='o', label='virginica')
plt.xlabel('petal width [cm]')
plt.ylabel('sepal width [cm]')
plt.legend(loc='upper left')
plt.show()
The first pair plot from the right and the second from the top were extracted and plotted in the same way.
This data is used to train the Perceptron algorithm.
X = df2[['petal width (cm)', 'sepal width (cm)']].values
Y = df2['target'].values
ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X, Y)
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of update')
plt.show()
It can be seen that the perceptron has converged in the sixth epoch.
Implement a simple and convenient function to visualize the decision boundaries.
from matplotlib.colors import ListedColormap
def plot_decision_regions(X, y, classifier, resolution=0.02):
#Marker and color map preparation
markers = ('s', 'x', 'o', '^', 'v')
colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:len(np.unique(y))])
#Plot of decision area
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#Grid point generation
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
np.arange(x2_min, x2_max, resolution))
#Predict by converting each feature into a one-dimensional array
Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
#Convert prediction results to original gridpoint data size
Z = Z.reshape(xx1.shape)
#Plot of grid point contours
plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
#Axis range setting
plt.xlim(xx1.min(), xx1.max())
plt.ylim(xx2.min(), xx2.max())
#Plot samples by class
for idx, cl in enumerate(np.unique(y)):
plt.scatter(x=X[y == cl, 0],
y=X[y == cl, 1],
alpha=0.8,
c=colors[idx],
marker=markers[idx],
label=cl,
edgecolor='black')
#Plot of decision area
plot_decision_regions(X, Y, classifier=ppn)
plt.xlabel('petal width [cm]')
plt.ylabel('sepal width [cm]')
plt.legend(loc='upper left')
plt.show()
If you can't divide it, try it.
Prediction: It does not converge and stops at the limit of the number of epochs. error is likely to be better.
import numpy as np
import matplotlib.pyplot as plt
df3 = df.query("target != 0").copy() #Exclude label 0
y = df3.iloc[:, 4].values
y = np.where(y == 1, -1, 1) #label 1-Set 1 to 1 for others (label 2)
plt.scatter(df3.iloc[:50, 1], df3.iloc[:50, 0], color='orange', marker='o', label='versicolor')
plt.scatter(df3.iloc[50:, 1], df3.iloc[50:, 0], color='green', marker='o', label='virginica')
plt.xlabel('sepal width [cm]')
plt.ylabel('sepal length [cm]')
plt.legend(loc='upper left')
plt.show()
It was extracted and plotted in the same way as the third from the right and the first from the top of the pair plot above.
X2 = df3[['sepal width (cm)', 'sepal length (cm)']].values
ppn = Perceptron(eta=0.1, n_iter=100)
ppn.fit(X2, y)
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of update')
plt.show()
#Plot of decision area
plot_decision_regions(X2, y, classifier=ppn)
plt.xlabel('sepal width [cm]')
plt.ylabel('sepal length [cm]')
plt.legend(loc='upper left')
plt.show()
Not classified at all. Visually, the larger y is, the more label 1 is, so I thought that it would be separated properly when y = 6, but it seems that is not the case.
Recommended Posts