For the purpose of understanding the characteristics of convolutional neural networks (CNN) and other machine learning methods (gradient boosting, multi-layer perceptron), the classification performance is improved when the image is rotated, enlarged, or changed in color. I tried how it would change.
The image was generated by the method of Automatically generate images of koala and bear. Can you distinguish between koalas and bears by silhouette?
from PIL import Image
koalas = []
for i in range(num_data):
koala = Image.open("koala_or_bear/koala_{}.jpg ".format(i))
koalas.append(koala)
bears = []
for i in range(num_data):
bear = Image.open("koala_or_bear/bear_{}.jpg ".format(i))
bears.append(bear)
%matplotlib inline
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10,10))
for i in range(16):
ax = fig.add_subplot(4, 4, i+1)
ax.axis('off')
if i < 8:
ax.set_title('koala_{}'.format(i))
ax.imshow(koalas[i],cmap=plt.cm.gray, interpolation='none')
else:
ax.set_title('bear_{}'.format(i - 8))
ax.imshow(bears[i - 8],cmap=plt.cm.gray, interpolation='none')
plt.show()
import numpy as np
X = [] #Explanatory variable
Y = [] #Objective variable
index = 0
for koala in koalas:
resize_img = koala.resize((128, 128))
r, g, b = resize_img.split()
r_resize_img = np.asarray(np.float32(r)/255.0)
g_resize_img = np.asarray(np.float32(g)/255.0)
b_resize_img = np.asarray(np.float32(b)/255.0)
rgb_resize_img = np.asarray([r_resize_img, g_resize_img, b_resize_img])
X.append(rgb_resize_img)
Y.append(0)
index += 1
if index >= num_data:
break
index = 0
for bear in bears:
resize_img = bear.resize((128, 128))
r, g, b = resize_img.split()
r_resize_img = np.asarray(np.float32(r)/255.0)
g_resize_img = np.asarray(np.float32(g)/255.0)
b_resize_img = np.asarray(np.float32(b)/255.0)
rgb_resize_img = np.asarray([r_resize_img, g_resize_img, b_resize_img])
X.append(rgb_resize_img)
Y.append(1)
index += 1
if index >= num_data:
break
X = np.array(X, dtype='float32')
Y = np.array(Y, dtype='int64')
from sklearn import model_selection
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(
X, Y, test_size=0.1
)
d1, d2, d3, d4 = X_train.shape
X_train_a = X_train.reshape((d1, d2 * d3 * d4))
Y_train_onehot = np.identity(2)[Y_train]
d1, d2, d3, d4 = X_test.shape
X_test_a = X_test.reshape((d1, d2 * d3 * d4))
Y_test_onehot = np.identity(2)[Y_test]
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
X_train_t = torch.from_numpy(X_train).float()
Y_train_t = torch.from_numpy(Y_train).long()
X_train_v = torch.autograd.Variable(X_train_t)
Y_train_v = torch.autograd.Variable(Y_train_t)
X_test_t = torch.from_numpy(X_test).float()
Y_test_t = torch.from_numpy(Y_test).long()
X_test_v = torch.autograd.Variable(X_test_t)
Y_test_v = torch.autograd.Variable(Y_test_t)
train = TensorDataset(X_train_t, Y_train_t)
train_loader = DataLoader(train, batch_size=32, shuffle=True)
%%time
from sklearn.ensemble import GradientBoostingClassifier
classifier = GradientBoostingClassifier()
classifier.fit(X_train_a, Y_train)
print("Accuracy score (train): ", classifier.score(X_train_a, Y_train))
print("Accuracy score (test): ", classifier.score(X_test_a, Y_test))
%%time
from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier(max_iter=10000, early_stopping=True)
classifier.fit(X_train_a, Y_train)
print("Accuracy score (train): ", classifier.score(X_train_a, Y_train))
print("Accuracy score (test): ", classifier.score(X_test_a, Y_test))
%%time
from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier(max_iter=10000, early_stopping=True,
hidden_layer_sizes=(100, 100))
classifier.fit(X_train_a, Y_train)
print("Accuracy score (train): ", classifier.score(X_train_a, Y_train))
print("Accuracy score (test): ", classifier.score(X_test_a, Y_test))
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 10, 5)
self.conv2 = torch.nn.Conv2d(10, 20, 5)
self.fc1 = torch.nn.Linear(20 * 29 * 29, 50)
self.fc2 = torch.nn.Linear(50, 2)
def forward(self, x):
x = torch.nn.functional.relu(self.conv1(x))
x = torch.nn.functional.max_pool2d(x, 2)
x = torch.nn.functional.relu(self.conv2(x))
x = torch.nn.functional.max_pool2d(x, 2)
x = x.view(-1, 20 * 29 * 29)
x = torch.nn.functional.relu(self.fc1(x))
x = torch.nn.functional.log_softmax(self.fc2(x), 1)
return x
from torchsummary import summary
model = CNN()
summary(model, X[0].shape)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 10, 124, 124] 760
Conv2d-2 [-1, 20, 58, 58] 5,020
Linear-3 [-1, 50] 841,050
Linear-4 [-1, 2] 102
================================================================
Total params: 846,932
Trainable params: 846,932
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 1.69
Params size (MB): 3.23
Estimated Total Size (MB): 5.11
----------------------------------------------------------------
def learn(model, criterion, optimizer, n_iteration):
for epoch in range(n_iteration):
total_loss = np.array(0, dtype='float64')
for x, y in train_loader:
x = torch.autograd.Variable(x)
y = torch.autograd.Variable(y)
optimizer.zero_grad()
y_pred = model(x)
loss = criterion(y_pred, y)
loss.backward()
optimizer.step()
total_loss += loss.data.numpy()
if (epoch + 1) % 10 == 0:
print(epoch + 1, total_loss)
if total_loss == np.array(0, dtype='float64'):
break
loss_history.append(total_loss)
return model
#%%time
model = CNN()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_history = []
model = learn(model, criterion, optimizer, 300)
ax = plt.subplot(2, 1, 1)
ax.plot(loss_history)
ax.grid()
ax = plt.subplot(2, 1, 2)
ax.plot(loss_history)
ax.set_yscale('log')
ax.grid()
Correct answer rate (teacher set)
Y_pred = torch.max(model(X_train_v).data, 1)[1]
accuracy = sum(Y_train == Y_pred.numpy()) / len(Y_train)
print(accuracy)
Correct answer rate (test set)
Y_pred = torch.max(model(X_test_v).data, 1)[1]
accuracy = sum(Y_test == Y_pred.numpy()) / len(Y_test)
print(accuracy)
The first is the simplest question to distinguish between koalas and bears.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 1min 2s td> | 1.0 td> | 1.0 td> |
Multilayer Perceptron (1 intermediate layer) td> | 50.9 s td> | 1.0 td> | 1.0 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 35.1 s td> | 1.0 td> | 1.0 td> |
Convolutional Neural Network (CNN) td> | - td> | 1.0 td> | 1.0 td> |
All prediction methods were able to answer perfectly.
Let's magnify Koala and Kuma at random magnifications. At this time, I will try to shift it slightly vertically and horizontally.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 5min 11s td> | 1.0 td> | 0.975 td> |
Multilayer Perceptron (1st intermediate layer) td> | 1min 4s td> | 1.0 td> | 1.0 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 1min 4s td> | 0.977 td> | 1.0 td> |
Convolutional Neural Network (CNN) td> | - td> | 1.0 td> | 1.0 td> |
The percentage of correct answers for gradient boosting has dropped slightly. Convolutional Neural Networks (CNN) have the perfect percentage of correct answers.
Draw something that might get in the way in the background.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 1min 23s td> | 1.0 td> | 1.0 td> |
Multilayer perceptron (intermediate 1 layer) td> | 38.3 s td> | 1.0 td> | 1.0 td> |
Multilayer Perceptron (2 intermediate layers) td> | 1min 3s td> | 0.983 td> | 1.0 td> |
Convolutional Neural Network (CNN) td> | - td> | 1.0 td> | 1.0 td> |
It seems to have almost no effect.
Let's make the background colorful.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 1min 30s td> | 1.0 td> | 1.0 td> |
Multilayer perceptron (intermediate 1 layer) td> | 43.9 s td> | 0.9916 td> | 1.0 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 41.6 s td> | 1.0 td> | 1.0 td> |
Convolutional Neural Network (CNN) td> | - td> | 1.0 td> | 1.0 td> |
This also seems to have little effect.
Let's make koalas and bears colorful.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 4min 26s td> | 1.0 td> | 0.975 td> |
Multilayer Perceptron (1 intermediate layer) td> | 27.8 s td> | 0.494 td> | 0.55 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 1min 9s td> | 0.816 td> | 0.775 td> |
Convolutional Neural Network (CNN) td> | - td> | 0.505 td> | 0.45 td> |
The prediction accuracy has dropped considerably. The performance of the convolutional neural network (CNN) has dropped. that? Surprisingly, the multi-layer perceptron (middle two layers) is doing well. And the performance of gradient boosting has hardly deteriorated. It's amazing.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 7min 24s td> | 1.0 td> | 0.9 td> |
Multilayer perceptron (intermediate 1 layer) td> | 42.4 s td> | 0.6861 td> | 0.6 td> |
Multilayer Perceptron (2 intermediate layers) td> | 1min 50s td> | 0.925 td> | 0.75 td> |
Convolutional Neural Network (CNN) td> | - td> | 0.5 td> | 0.5 td> |
The difficulty has increased, but the gradient boosting is pretty good. Convolutional neural networks (CNNs) are completely useless.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 6min 12s td> | 1.0 td> | 0.975 td> |
Multilayer Perceptron (1 intermediate layer) td> | 1min 1s td> | 0.9916 td> | 0.975 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 1min 12s td> | 1.0 td> | 1.0 td> |
Convolutional Neural Network (CNN) td> | - td> | 1.0 td> | 1.0 td> |
All prediction performance was restored just by unifying the colors of Koala and Kuma to black. After all, that is the criterion.
Koalas spin, bears spin
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 3min 10s td> | 1.0 td> | 0.925 td> |
Multilayer Perceptron (1 intermediate layer) td> | 27.4 s td> | 0.5 td> | 0.5 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 1min 20s td> | 0.994 td> | 1.0 td> |
Convolutional Neural Network (CNN) td> | - td> | 1.0 td> | 1.0 td> |
The multi-layer perceptron (middle 1 layer) didn't work, but everyone else seems to have done it about rotation. Gradient boosting seems to be a little weak in rotation.
Let's expand while rotating.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 5min 28s td> | 1.0 td> | 0.775 td> |
Multilayer Perceptron (1 intermediate layer) td> | 1min 33s td> | 0.825 td> | 0.7 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 30.9 s td> | 0.65 td> | 0.675 td> |
Convolutional Neural Network (CNN) td> | - td> | 0.505 td> | 0.45 td> |
It's okay if it's just rotation, it's okay if it's just expansion. However, it seems that everyone gets confused if it expands while rotating. Still, gradient boosting, I'm doing my best.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 7min 6s td> | 1.0 td> | 0.6 td> |
Multilayer Perceptron (1 intermediate layer) td> | 29.5 s td> | 0.5194 td> | 0.325 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 33 s td> | 0.572 td> | 0.65 td> |
Convolutional Neural Network (CNN) td> | - td> | 0.5194 td> | 0.325 td> |
If various factors (rotation, enlargement, obstacles, colors) are attacked individually, even if it is not a big problem, it seems that it will be difficult if those factors are mixed and attacked.
Method th> | Learning time th> | Correct answer rate (teacher set) th> | Correct answer rate (test set) th> tr> |
---|---|---|---|
Gradient Boosting td> | 7min 55s td> | 1.0 td> | 0.45 td> |
Multilayer Perceptron (1 intermediate layer) td> | 31.8 s td> | 0.505 td> | 0.45 td> |
Multilayer Perceptron (Intermediate 2 Layers) td> | 55.7 s td> | 0.6027 td> | 0.45 td> |
Convolutional Neural Network (CNN) td> | - td> | 0.505 td> | 0.45 td> |
I was able to ride all the various factors. This seems to be a mess in any way.
I wanted to get a feel for the convolutional neural network (CNN), so I wanted to get a "CNN wins alone !!!" </ b> result, but the conclusion is "gradient boosting". Great yeah yeah! "</ B>. (ヽ ´ω`)
Recommended Posts