Use PyTorch to learn the basic ideas of deep learning.
Suppose you have data represented by an array of numpy, like this:
import numpy as np
x1 = np.array([0.12, 0.21])
x2 = np.array([0.34, 0.43])
x3 = np.array([0.56, 0.65])
You can make a linear combination as follows.
z = 0.5 * x1 + 0.3 * x2 + 0.2 * x3
z
array([0.274, 0.364])
Pytorch uses a numpy array converted to a tensor type.
import torch
x1 = torch.from_numpy(x1).float()
x2 = torch.from_numpy(x2).float()
x3 = torch.from_numpy(x3).float()
You can make linear combinations in the same way.
z = 0.5 * x1 + 0.3 * x2 + 0.2 * x3
z
tensor([0.2740, 0.3640])
It may seem annoying, but by further converting the tensor type to the Variable type, it becomes possible to automatically differentiate (calculate the gradient).
from torch.autograd import Variable
x1 = Variable(x1)
x2 = Variable(x2)
x3 = Variable(x3)
You can make linear combinations in the same way.
z = 0.5 * x1 + 0.3 * x2 + 0.2 * x3
z
tensor([0.2740, 0.3640])
Pytoach provides a linear combination function. Before that, let's express the linear combination with numpy.
import numpy as np
W = np.array([[ 5, 1, -2 ],
[ 3, -5, -1 ]], dtype=np.float32) #Weight matrix
b = np.array([2, -3], dtype=np.float32) #Bias term
You can make a linear combination as follows.
x = np.array([0, 1, 2]) #1 record of data consisting of 3 variables
y = x.dot(W.T) + b
y
array([ -1., -10.])
If the data consisting of 3 variables is 5 records, it will be as follows.
x = np.array(range(15)).astype(np.float32).reshape(5, 3)
x
array([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.],
[ 9., 10., 11.],
[12., 13., 14.]], dtype=float32)
y = x.dot(W.T) + b
y
array([[ -1., -10.],
[ 11., -19.],
[ 23., -28.],
[ 35., -37.],
[ 47., -46.]], dtype=float32)
The above is a linear combination using numpy. In Pytorch, the function that represents a linear combination is defined as follows:
h = torch.nn.Linear(3,2) #A linear action function y that inputs a 3D vector and outputs a 2D vector= Wx + b
By default, the weight matrix W and the bias term b contain random numbers. The purpose of deep learning is to optimize these coefficients.
h.weight #Random numbers are entered by default
Parameter containing:
tensor([[-0.2569, -0.5403, 0.4155],
[-0.5554, 0.5284, 0.3978]], requires_grad=True)
h.bias #Random numbers are entered by default
Parameter containing:
tensor([-0.5164, -0.2875], requires_grad=True)
By the way, to convert tensor type data to numpy array type, do as follows.
h.weight.detach().numpy()
array([[-0.256898 , -0.54026437, 0.41552007],
[-0.55537015, 0.5283861 , 0.39781755]], dtype=float32)
h.bias.detach().numpy()
array([-0.5163672 , -0.28754294], dtype=float32)
Now let's do a linear transformation using Pytorch.
x = Variable(torch.from_numpy(x).float()) #Convert from array type → tensor type → Variable type
y = h(x) #Linear transformation
y.data.detach().numpy() #Check the obtained value with array type
array([[-0.22559142, 1.0364783 ],
[-1.3705183 , 2.1489787 ],
[-2.515445 , 3.2614794 ],
[-3.660372 , 4.37398 ],
[-4.805299 , 5.48648 ]], dtype=float32)
x = x.detach().numpy()
x.dot(h.weight.detach().numpy().T) + h.bias.detach().numpy() #verification of accounts
array([[-0.22559142, 1.0364783 ],
[-1.3705183 , 2.1489787 ],
[-2.515445 , 3.2614794 ],
[-3.660372 , 4.37398 ],
[-4.805299 , 5.48648 ]], dtype=float32)
Now, as a practice of Pytorch, let's do linear multiple regression. As the data to be handled, we will handle the data of Iris (iris), which is often used in the field of machine learning.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris() #Reading iris data
data = iris.data.astype(np.float32)
X = data[:, :3] #The first three of the iris measurement data are used as explanatory variables.
Y = data[:, 3].reshape(len(data), 1) #Let the last one be the objective variable.
#The odd-numbered data is the teacher data, and the even-numbered data is the test data.
index = np.arange(Y.size)
X_train = X[index[index % 2 != 0], :] #Explanatory variable (teacher data)
X_test = X[index[index % 2 == 0], :] #Explanatory variable (test data)
Y_train = Y[index[index % 2 != 0], :] #Objective variable (teacher data)
Y_test = Y[index[index % 2 == 0], :] #Objective variable (test data)
The data is represented by tensor type.
import torch
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
Y_train = torch.from_numpy(Y_train).float()
Y_test = torch.from_numpy(Y_test).float()
X_train.shape
torch.Size([75, 3])
Combine the explanatory variables and objective variables into TensorDataset type data.
from torch.utils.data import TensorDataset
train = TensorDataset(X_train, Y_train)
train[0]
(tensor([4.9000, 3.0000, 1.4000]), tensor([0.2000]))
In deep learning, teacher data is divided into small "batch" and learned.
from torch.utils.data import DataLoader
train_loader = DataLoader(train, batch_size=10, shuffle=True)
Defines a class that performs linear multiple regression.
class MLR(torch.nn.Module):
def __init__(self, n_input, n_output):
super(MLR, self).__init__()
self.l1 = torch.nn.Linear(n_input, n_output)
def forward(self, x):
return self.l1(x)
Create an object of that class.
model = MLR(3, 1) #Linear multiple regression model with 3 variables input and 1 variable output
The goal is to minimize the error (called "loss" in deep learning), but we define that error.
criterion = torch.nn.MSELoss() # mean square error
Select an algorithm that minimizes the error.
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) #Stochastic gradient descent
"Predict" with a forward calculation, calculate the error, and then repeat the operation of propagating the error in the opposite direction.
from torch.autograd import Variable
loss_history = []
for epoch in range(100):
total_loss = 0
for x_train, y_train in train_loader:
x_train = Variable(x_train)
y_train = Variable(y_train)
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
total_loss += loss.item()
loss_history.append(total_loss)
if (epoch +1) % 10 == 0:
print(epoch + 1, total_loss)
10 0.5098993554711342
20 0.49431246891617775
30 0.37891835160553455
40 0.38362359534949064
50 0.602457270026207
60 0.4444280909374356
70 0.41419393196702003
80 0.4345690496265888
90 0.38460623472929
100 0.3826814219355583
Did you see the error (loss) decrease? Let's illustrate the history.
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_history)
[<matplotlib.lines.Line2D at 0x7fc33444d128>]
Now let's look at the y-y plot comparing the predicted and measured values. The closer it is to the diagonal, the better the prediction.
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(6,6))
plt.scatter(Y_train.flatten(), model.forward(X_train).data.flatten(), alpha=0.5)
plt.plot([min(Y), max(Y)], [min(Y), max(Y)])
plt.grid()
plt.legend()
plt.xlabel('Observed')
plt.ylabel('Predicted')
plt.show()
Next, let's perform logistic regression, which is a method of regression to the sigmoid function (logistic function) and is also used as a classification method.
This time, the four measurement data of iris are used as explanatory variables, and the iris varieties (three types) are used as objective variables.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris() #Reading iris data
X = iris.data.astype(np.float32) #4 variables as explanatory variables
Y = iris.target #Using iris varieties (3 types) as objective variables
#One variety of iris-Convert to hot vector.
Y_ohv = np.zeros(3 * Y.size).reshape(Y.size, 3).astype(np.float32)
for i in range(Y.size):
Y_ohv[i, Y[i]] = 1.0 # one-hot vector
#The odd-numbered data is the teacher data, and the even-numbered data is the test data.
index = np.arange(Y.size)
X_train = X[index[index % 2 != 0], :] #Explanatory variable (teacher data)
X_test = X[index[index % 2 == 0], :] #Explanatory variable (test data)
Y_train = Y_ohv[index[index % 2 != 0], :] #Objective variable one-hot vector (teacher data)
Y_test = Y_ohv[index[index % 2 == 0], :] #Objective variable one-hot vector (test data)
Y_ans_train = Y[index[index % 2 != 0]] #Objective variable (teacher data)
Y_ans_test = Y[index[index % 2 == 0]] #Objective variable (test data)
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
Y_train = torch.from_numpy(Y_train).float()
Y_test = torch.from_numpy(Y_test).float()
train = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train, batch_size=10, shuffle=True)
train[0]
(tensor([4.9000, 3.0000, 1.4000, 0.2000]), tensor([1., 0., 0.]))
#import torch.nn.functional as F
class LR(torch.nn.Module):
def __init__(self, n_input, n_output):
super(LR, self).__init__()
self.l1 = torch.nn.Linear(n_input, n_output)
def forward(self, x):
h1 = self.l1(x)
h2 = torch.sigmoid(h1)
return h2
model = LR(4, 3)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
from torch.autograd import Variable
loss_history = []
for epoch in range(1000):
total_loss = 0
for x_train, y_train in train_loader:
x_train = Variable(x_train)
y_train = Variable(y_train)
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
total_loss += loss.item()
loss_history.append(total_loss)
if (epoch +1) % 100 == 0:
print(epoch + 1, total_loss)
100 1.63955856859684
200 1.2039469927549362
300 0.9843573123216629
400 0.9481473788619041
500 0.847799651324749
600 0.857478104531765
700 0.8010830879211426
800 0.8148728087544441
900 0.8013908714056015
1000 0.7699911445379257
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_history)
[<matplotlib.lines.Line2D at 0x7fc3317298d0>]
Let's find the correct answer rate when the variety with the maximum output value is the "predicted variety".
Y_pred = model.forward(X_train)
nrow, ncol = Y_pred.data.shape
count = 0
for i in range(nrow):
cls = np.argmax(Y_pred.data[i, :])
if cls == Y_ans_train[i]:
count += 1
print(count, " / ", nrow, " = ", count / nrow)
65 / 75 = 0.8666666666666667
So far, I have created linear multiple regression and logistic regression models using Pytorch. In the same way, if you thicken the layer, it becomes "deep learning". The simplest model of deep learning is the multi-layer perceptron.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris() #Reading iris data
data = iris.data.astype(np.float32)
X = data[:, :3] #The first three of the iris measurement data are used as explanatory variables.
Y = data[:, 3].reshape(len(data), 1) #Let the last one be the objective variable.
#The odd-numbered data is the teacher data, and the even-numbered data is the test data.
index = np.arange(Y.size)
X_train = X[index[index % 2 != 0], :] #Explanatory variable (teacher data)
X_test = X[index[index % 2 == 0], :] #Explanatory variable (test data)
Y_train = Y[index[index % 2 != 0], :] #Objective variable (teacher data)
Y_test = Y[index[index % 2 == 0], :] #Objective variable (test data)
import torch
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
Y_train = torch.from_numpy(Y_train).float()
Y_test = torch.from_numpy(Y_test).float()
from torch.utils.data import TensorDataset
train = TensorDataset(X_train, Y_train)
from torch.utils.data import DataLoader
train_loader = DataLoader(train, batch_size=10, shuffle=True)
#import torch.nn.functional as F
class MLPR(torch.nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(MLPR, self).__init__()
self.l1 = torch.nn.Linear(n_input, n_hidden)
self.l2 = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
h1 = self.l1(x)
h2 = torch.sigmoid(h1)
h3 = self.l2(h2)
return h3
model = MLPR(3, 3, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
from torch.autograd import Variable
loss_history = []
for epoch in range(1000):
total_loss = 0
for x_train, y_train in train_loader:
x_train = Variable(x_train)
y_train = Variable(y_train)
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
total_loss += loss.item()
loss_history.append(total_loss)
if (epoch +1) % 100 == 0:
print(epoch + 1, total_loss)
100 0.40582243632525206
200 0.3966686334460974
300 0.4202105160802603
400 0.3585368797648698
500 0.3776881340891123
600 0.3534861374646425
700 0.40271759033203125
800 0.37439848855137825
900 0.40052078012377024
1000 0.35703002475202084
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_history)
[<matplotlib.lines.Line2D at 0x7fc3316929e8>]
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(6,6))
plt.scatter(Y_train.flatten(), model.forward(X_train).data.flatten(), alpha=0.5)
plt.plot([min(Y), max(Y)], [min(Y), max(Y)])
plt.grid()
plt.legend()
plt.xlabel('Observed')
plt.ylabel('Predicted')
plt.show()
No handles with labels found to put in legend.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris() #Reading iris data
X = iris.data.astype(np.float32) #4 variables as explanatory variables
Y = iris.target #Using iris varieties (3 types) as objective variables
#One variety of iris-Convert to hot vector.
Y_ohv = np.zeros(3 * Y.size).reshape(Y.size, 3).astype(np.float32)
for i in range(Y.size):
Y_ohv[i, Y[i]] = 1.0 # one-hot vector
#The odd-numbered data is the teacher data, and the even-numbered data is the test data.
index = np.arange(Y.size)
X_train = X[index[index % 2 != 0], :] #Explanatory variable (teacher data)
X_test = X[index[index % 2 == 0], :] #Explanatory variable (test data)
Y_train = Y_ohv[index[index % 2 != 0], :] #Objective variable one-hot vector (teacher data)
Y_test = Y_ohv[index[index % 2 == 0], :] #Objective variable one-hot vector (test data)
Y_ans_train = Y[index[index % 2 != 0]] #Objective variable (teacher data)
Y_ans_test = Y[index[index % 2 == 0]] #Objective variable (test data)
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
Y_train = torch.from_numpy(Y_train).float()
Y_test = torch.from_numpy(Y_test).float()
train = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train, batch_size=10, shuffle=True)
class MLPC(torch.nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(MLPC, self).__init__()
self.l1 = torch.nn.Linear(n_input, n_hidden)
self.l2 = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
h1 = self.l1(x)
h2 = torch.sigmoid(h1)
h3 = self.l2(h2)
h4 = torch.sigmoid(h3)
return h4
model = MLPC(4, 3, 3)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
from torch.autograd import Variable
loss_history = []
for epoch in range(5000):
total_loss = 0
for x_train, y_train in train_loader:
x_train = Variable(x_train)
y_train = Variable(y_train)
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
total_loss += loss.item()
loss_history.append(total_loss)
if (epoch +1) % 500 == 0:
print(epoch + 1, total_loss)
500 1.645580604672432
1000 1.3838596642017365
1500 1.1801470965147018
2000 1.0481771975755692
2500 1.004256546497345
3000 0.9521581381559372
3500 0.9244466200470924
4000 0.8533390164375305
4500 0.845703762024641
5000 0.7936465740203857
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_history)
[<matplotlib.lines.Line2D at 0x7fc331581c18>]
Y_pred = model.forward(X_train)
nrow, ncol = Y_pred.data.shape
count = 0
for i in range(nrow):
cls = np.argmax(Y_pred.data[i, :])
if cls == Y_ans_train[i]:
count += 1
print(count, " / ", nrow, " = ", count / nrow)
67 / 75 = 0.8933333333333333
An autoencoder is a neural network that returns to itself. The input layer to intermediate layer converter is called an encoder, and the intermediate layer to output layer converter is called a decoder. "Dimensionality reduction" (dimension reduction) can be achieved by reducing the number of neurons in the middle layer to less than the input data.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris() #Reading iris data
data = iris.data.astype(np.float32)
X = data
index = np.arange(Y.size)
X_train = X[index[index % 2 != 0], :] #Explanatory variable (teacher data)
X_test = X[index[index % 2 == 0], :] #Explanatory variable (test data)
import torch
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
from torch.utils.data import TensorDataset
train = TensorDataset(X_train, X_train)
from torch.utils.data import DataLoader
train_loader = DataLoader(train, batch_size=10, shuffle=True)
class MLPR(torch.nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(MLPR, self).__init__()
self.l1 = torch.nn.Linear(n_input, n_hidden)
self.l2 = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
h1 = self.l1(x)
h2 = torch.sigmoid(h1)
h3 = self.l2(h2)
return h3
def project(self, x):
h1 = self.l1(x)
h2 = torch.sigmoid(h1)
return h2
model = MLPR(4, 2, 4)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
from torch.autograd import Variable
loss_history = []
for epoch in range(500):
total_loss = 0
for x_train, y_train in train_loader:
x_train = Variable(x_train)
y_train = Variable(y_train)
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
total_loss += loss.item()
loss_history.append(total_loss)
if (epoch +1) % 50 == 0:
print(epoch + 1, total_loss)
50 7.260494828224182
100 3.8141910433769226
150 2.6670321971178055
200 1.9922174364328384
250 1.538402482867241
300 1.2299609556794167
350 1.1305854469537735
400 1.0665423274040222
450 1.0088475532829762
500 0.9823619686067104
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_history)
[<matplotlib.lines.Line2D at 0x7fc33154e2e8>]
latent = model.project(X_train)
%matplotlib inline
import matplotlib.pyplot as plt
plt.scatter(latent.data[0:50, 0], latent.data[0:50, 1], alpha=0.5)
plt.scatter(latent.data[50:100, 0], latent.data[50:100, 1], alpha=0.5)
plt.scatter(latent.data[100:150, 0], latent.data[100:150, 1], alpha=0.5)
plt.grid()
Evaluate the predictive performance of the test set for the above model.
Compare linear multiple regression, logistic regression, multi-layer perceptrons, and autoencoders and explain their differences and similarities.
Recommended Posts