In this paper, we will classify images of CIFAR-10 using PyTorch. Follow the Official Tutorial with comments. Python and machine learning are super beginners.
A 10-label image dataset that is widely used in the machine learning area. airplane、automobile、bird、cat、deer、dog、frog、horse、ship、truck 10 labels are available.
Official site will issue an installation command according to each environment. Since I'm macOS, run the following to install.
pip install torch torchvision
#Import NumPy, Matplotlib, PyTorch
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#ToTensor: Grayscale image (RGB 0)~255 to 0~Normalize to the range of 1), Normalize: Z-value (RGB mean and standard deviation to 0).Normalize with 5)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#Download training data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
#Download test data
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=2)
#Training dataset: 50,000 RGB images with 32 pixels in height and width
print(trainset.data.shape)
(50000, 32, 32, 3)
#Test dataset: 10000 RGB images with 32 pixels in height and width
print(testset.data.shape)
(10000, 32, 32, 3)
#Check the class list
print(trainset.classes)
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
#Classes are often used, so keep them separately
classes = trainset.classes
In the official documentation, ** airplane was redefined as plane ** and ** automobile was redefined as car **. Why?
#Try to display the downloaded image
def imshow(img):
#Denormalize
img = img / 2 + 0.5
# torch.From tensor type to numpy.Convert to ndarray type
print(type(img)) # <class 'torch.Tensor'>
npimg = img.numpy()
print(type(npimg))
#Convert shape from (RGB, vertical, horizontal) to (vertical, horizontal, RGB)
print(npimg.shape)
npimg = np.transpose(npimg, (1, 2, 0))
print(npimg.shape)
#Display image
plt.imshow(npimg)
plt.show()
dataiter = iter(trainloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
#Implement CNN
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
Define each layer with init and connect them with forward.
#Cross entropy
criterion = nn.CrossEntropyLoss()
#Stochastic gradient descent
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#To train
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
#Backpropagation of error
loss.backward()
optimizer.step()
train_loss = loss.item()
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
[1, 2000] loss: 2.164
[1, 4000] loss: 1.863
[1, 6000] loss: 1.683
[1, 8000] loss: 1.603
[1, 10000] loss: 1.525
[1, 12000] loss: 1.470
[2, 2000] loss: 1.415
[2, 4000] loss: 1.369
[2, 6000] loss: 1.363
[2, 8000] loss: 1.333
[2, 10000] loss: 1.314
[2, 12000] loss: 1.317
Finished Training
The average value of Loss for every 2000 mini-batch is output to the log.
#Save the model
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)
Save the model in the current directory with the extension pth (PyTorch).
#Load test data and display image and correct label
dataiter = iter(testloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
#Load the saved model and predict
net = Net()
net.load_state_dict(torch.load(PATH))
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))
GroundTruth: truck cat airplane ship Predicted: truck horse airplane ship
You can see that the predictions are correct except for cat.
print(outputs)
value, predicted = torch.max(outputs, 1)
print(value)
print(predicted)
tensor([[ 0.7114, -2.2724, 0.1225, 0.9470, 2.1940, 1.8655, -2.6655, 4.1646,
-1.1001, -1.6991],
[-2.2453, -4.1017, 1.8291, 3.2079, 1.1242, 3.6712, 1.0010, 1.0489,
-3.2010, -1.9476],
[-3.0669, -3.8900, 0.9312, 3.5649, 2.7791, 1.5095, 2.1216, 1.5274,
-4.3077, -2.2234],
[-2.0948, -3.4640, 2.4833, 2.6210, 4.0590, 1.8350, 0.4924, 0.7212,
-3.5043, -2.4212]], grad_fn=<AddmmBackward>)
tensor([4.1646, 3.6712, 3.5649, 4.0590], grad_fn=<MaxBackward0>)
tensor([7, 5, 3, 4])
** torch.max ** returns the maximum value of outputs.
correct = 0
total = 0
#Calculate without remembering the gradient (without learning)
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
Accuracy of the network on the 10000 test images: 60 %
You can see that the correct answer rate for 10000 test data is 60%.
Below is a personal note for Python beginners. Not good ** (predicted == labels) .sum (). item () ** I didn't know how to write this, so I'll log it out and check it.
print(type((predicted == labels)))
print((predicted == labels).dtype)
print(type((predicted == labels).sum()))
print((predicted == labels).sum())
print((predicted == labels).sum().item())
# <class 'torch.Tensor'>
# torch.bool
# <class 'torch.Tensor'>
# tensor(2)
# 2
I see. Compare each element of the array and use sum () implemented in torch.Tensor to calculate the total value of true. After that, item () implemented in torch.Tensor is used to make the total value an int type numerical value. It was a little easier to understand when I checked it with numpy.
#Try with numpy
a = np.array([1, 2, 3, 4, 5])
b = np.array([1, 2, 0, 4, 5])
print(type((a == b)))
print((a == b))
print((a == b).sum())
print(type((a == b).sum()))
print((a == b).sum().item())
print(type((a == b).sum().item()))
# <class 'numpy.ndarray'>
# [ True True False True True]
# 4
# <class 'numpy.int64'>
# 4
# <class 'int'>
Looking at Official, you can use almost the same API as ndarray, so ** sum () ** and ** item () ** Can be used. Convinced.
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
Accuracy of airplane : 72 %
Accuracy of automobile : 66 %
Accuracy of bird : 38 %
Accuracy of cat : 58 %
Accuracy of deer : 60 %
Accuracy of dog : 29 %
Accuracy of frog : 73 %
Accuracy of horse : 60 %
Accuracy of ship : 69 %
Accuracy of truck : 73 %
Is it like this in a tutorial?
Recommended Posts