
** It can also be used for regression, but here we will do a classification case. ** **
import numpy as np
import pandas as pd
from sklearn import datasets
# sklearn.neighbors module k-NN method
from sklearn.neighbors import KNeighborsClassifier
#sklearn data split utility
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
#Method to generate color map
from matplotlib.colors import ListedColormap
#Japanese display module of matplotlib
!pip install japanize-matplotlib
import japanize_matplotlib
| Variable name | meaning | Note | Data type | |
|---|---|---|---|---|
| 0 | species | type | Setosa=0, Versicolour=1, Virginica=2 | int64 | 
| 1 | sepal length | Sepal length | Continuous amount(cm) | float64 | 
| 2 | sepal width | Sepal width | Continuous amount(cm) | float64 | 
| 3 | petal length | Petal length | Continuous amount(cm) | float64 | 
| 4 | petal width | Petal width | Continuous amount(cm) | float64 | 
iris = datasets.load_iris()
#Explanatory variable (feature)
print("label:\n", iris.feature_names)
print("shape:\n", iris.data.shape)
print("First 10 lines:\n", iris.data[0:10, :]) 
#Objective variable (type)
print("label:\n", iris.target_names)
print("shape:\n", iris.target.shape)
print("Full display:\n", iris.target)

X_train, X_test, y_train, y_test = train_test_split(
    iris.data, 
    iris.target,
    stratify = iris.target, #Stratified sampling
    random_state = 0)
stratify = iris.target specifies ** stratified sampling ** by type (iris.target). The default is random sampling, so here we will divide it so that it retains three types of composition ratios for both training and testing.print("shape:", y_train.shape)
#Get the number of unique elements
np.unique(y_train, return_counts=True)

#Variable to store the correct answer rate
training_accuracy = []
test_accuracy = []
#k while changing k-Execute NN and get the correct answer rate
for k in range(3,21):
    #Pass k to create an instance, fit the data and generate a model
    kNN = KNeighborsClassifier(n_neighbors = k)
    kNN.fit(X_train, y_train)
    #Obtain the correct answer rate with score and store it sequentially
    training_accuracy.append(kNN.score(X_train, y_train))
    test_accuracy.append(kNN.score(X_test, y_test))
#Convert correct answer rate to numpy array
training_accuracy = np.array(training_accuracy)
test_accuracy = np.array(test_accuracy)
#Changes in the correct answer rate for training and testing
plt.figure(figsize=(6, 4))
plt.plot(range(3,21), training_accuracy, label='Training')
plt.plot(range(3,21), test_accuracy, label='test')
plt.xticks(np.arange(2, 21, 1)) #x-axis scale
plt.xlabel('k number')
plt.ylabel('Correct answer rate')
plt.title('Transition of correct answer rate')
plt.grid()
plt.legend()
#Transition of difference in correct answer rate
plt.figure(figsize=(6, 4))
difference = np.abs(training_accuracy - test_accuracy) #Calculate the difference
plt.plot(range(3,21), difference, label='Difference')
plt.xticks(np.arange(2, 21, 1)) #x-axis scale
plt.xlabel('k number')
plt.ylabel('Difference(train - test)')
plt.title('Transition of difference in correct answer rate')
plt.grid()
plt.legend()
plt.show()

#Specify the number of k
k = 15
#Set explanatory variable X and objective variable y
X = iris.data[:, :2]
y = iris.target
#Create an instance, fit the data and generate a model
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X, y)
Z to draw the boundaries of each group on a two-dimensional plane.#Specify mesh spacing
h = 0.02
#Create a color map
cmap_surface = ListedColormap(['darkseagreen', 'mediumpurple', 'gold']) #For area charts
cmap_dot = ListedColormap(['darkgreen', 'darkslateblue', 'olive']) #For scatter plots
# x,Get the minimum and maximum values of the y-axis
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#Generate grid columns at specified mesh intervals
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
#Predict by passing the grid sequence to the model
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape) #Shape conversion
xx and yy to one dimension with the ravel () function, and pass it to the model to predict what is combined with the c_ () function of numpy.
plt.figure(figsize=(6,5))
#Isolate diagram
plt.pcolormesh(xx, yy, Z, cmap=cmap_surface)
#Scatter plot
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_dot, s=30)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.show()

pcolormesh () function produces a color plot based on an amorphous rectangular grid.(x, y, Z, c), x, y is the coordinates of the mesh from the left. The data Z, which has group information for each cell, is assigned a color with c.Recommended Posts