Vor kurzem wurde PyToch plötzlich im Bereich der neuronalen Netzbibliothek aufgeregt, also berührte ich es. Es ist jedoch nicht interessant, es nur zu berühren, deshalb habe ich DeepPose: Optimierung der menschlichen Pose über tiefe neuronale Netze implementiert, während ich es mit Chainer verglichen habe. Eine Übersicht über PyTorch selbst finden Sie unter Einführung in Pytorch.
Jetzt möchte ich DeepPose mit Chainer (1.19.0) und PyTorch (0.1.10) implementieren.
Model Mit Chainer und PyTorch können Sie fast denselben Code schreiben. Muss PyTorch, um es klar auszudrücken, die Ausgabe der Faltungsschicht explizit in die vollständig verbundene Schicht mit "Ansicht" konvertieren?
Chainer
# -*- coding: utf-8 -*-
""" AlexNet implementation. """
import chainer
import chainer.functions as F
import chainer.links as L
from modules.functions.chainer import mean_squared_error
class AlexNet(chainer.Chain):
""" The AlexNet :
'A. Krizhevsky, I. Sutskever, and G. Hinton.
Imagenet clas-sification with deep convolutional neural networks. InNIPS , 2012'
Args:
Nj (int): Size of joints.
use_visibility (bool): When it is ``True``,
the function uses visibility to compute mean squared error.
"""
def __init__(self, Nj, use_visibility=False):
super(AlexNet, self).__init__(
conv1=L.Convolution2D(None, 96, 11, stride=4),
conv2=L.Convolution2D(None, 256, 5, pad=2),
conv3=L.Convolution2D(None, 384, 3, pad=1),
conv4=L.Convolution2D(None, 384, 3, pad=1),
conv5=L.Convolution2D(None, 256, 3, pad=1),
fc6=L.Linear(None, 4096),
fc7=L.Linear(None, 4096),
fc8=L.Linear(None, Nj*2),
)
self.Nj = Nj
self.use_visibility = use_visibility
self.train = True
def predict(self, x):
""" Predict 2D pose from image. """
# layer1
h = F.relu(self.conv1(x))
h = F.max_pooling_2d(h, 3, stride=2)
# layer2
h = F.relu(self.conv2(h))
h = F.max_pooling_2d(h, 3, stride=2)
# layer3-5
h = F.relu(self.conv3(h))
h = F.relu(self.conv4(h))
h = F.relu(self.conv5(h))
h = F.max_pooling_2d(h, 3, stride=2)
# layer6-8
h = F.dropout(F.relu(self.fc6(h)), train=self.train)
h = F.dropout(F.relu(self.fc7(h)), train=self.train)
h = self.fc8(h)
return F.reshape(h, (-1, self.Nj, 2))
def __call__(self, image, x, v):
y = self.predict(image)
loss = mean_squared_error(y, x, v, use_visibility=self.use_visibility)
chainer.report({'loss': loss}, self)
return loss
PyTorch
# -*- coding: utf-8 -*-
""" AlexNet implementation. """
import torch.nn as nn
import torch.nn.functional as F
class AlexNet(nn.Module):
""" The AlexNet :
'A. Krizhevsky, I. Sutskever, and G. Hinton.
Imagenet clas-sification with deep convolutional neural networks. InNIPS , 2012'
Args:
Nj (int): Size of joints.
"""
def __init__(self, Nj):
super(AlexNet, self).__init__()
self.conv1 = nn.Conv2d(3, 96, 11, stride=4)
self.conv2 = nn.Conv2d(96, 256, 5, padding=2)
self.conv3 = nn.Conv2d(256, 384, 3, padding=1)
self.conv4 = nn.Conv2d(384, 384, 3, padding=1)
self.conv5 = nn.Conv2d(384, 256, 3, padding=1)
self.fc6 = nn.Linear(256*6*6, 4096)
self.fc7 = nn.Linear(4096, 4096)
self.fc8 = nn.Linear(4096, Nj*2)
self.Nj = Nj
def forward(self, x):
# layer1
h = F.relu(self.conv1(x))
h = F.max_pool2d(h, 3, stride=2)
# layer2
h = F.relu(self.conv2(h))
h = F.max_pool2d(h, 3, stride=2)
# layer3-5
h = F.relu(self.conv3(h))
h = F.relu(self.conv4(h))
h = F.relu(self.conv5(h))
h = F.max_pool2d(h, 3, stride=2)
h = h.view(-1, 256*6*6)
# layer6-8
h = F.dropout(F.relu(self.fc6(h)), training=self.training)
h = F.dropout(F.relu(self.fc7(h)), training=self.training)
h = self.fc8(h)
return h.view(-1, self.Nj, 2)
Loss Function Es scheint, dass Sie fast genauso über die Verlustfunktion schreiben können. Der Unterschied besteht darin, dass PyTorch einige Studien erfordert, da die Berechnung in Torch implementiert werden muss. (Für das Studium von Torch ist hier hilfreich.) Andererseits bin ich froh, dass es nicht notwendig erscheint, die Rückwärtsberechnung explizit zu implementieren. (Chainer muss in einigen Fällen nicht einmal "rückwärts" sein.)
Chainer
# -*- coding: utf-8 -*-
""" Mean squared error function. """
import numpy as np
from chainer import function
from chainer.utils import type_check
class MeanSquaredError(function.Function):
""" Mean squared error (a.k.a. Euclidean loss) function. """
def __init__(self, use_visibility=False):
self.use_visibility = use_visibility
self.diff = None
self.N = None
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 3)
type_check.expect(
in_types[0].dtype == np.float32,
in_types[1].dtype == np.float32,
in_types[2].dtype == np.int32,
in_types[0].shape == in_types[1].shape,
in_types[0].shape[:-1] == in_types[2].shape[:-1]
)
def forward_cpu(self, inputs):
x, t, v = inputs
self.diff = x - t
if self.use_visibility:
self.N = v.sum()/2
self.diff *= v
else:
self.N = self.diff.size/2
diff = self.diff.ravel()
return np.array(diff.dot(diff)/self.N, dtype=diff.dtype),
def forward_gpu(self, inputs):
x, t, v = inputs
self.diff = x - t
if self.use_visibility:
self.N = int(v.sum())/2
self.diff *= v
else:
self.N = self.diff.size/2
diff = self.diff.ravel()
return diff.dot(diff)/diff.dtype.type(self.N),
def backward(self, inputs, gy):
coeff = gy[0]*gy[0].dtype.type(2./self.N)
gx0 = coeff*self.diff
return gx0, -gx0, None
def mean_squared_error(x, t, v, use_visibility=False):
""" Computes mean squared error over the minibatch.
Args:
x (Variable): Variable holding an float32 vector of estimated pose.
t (Variable): Variable holding an float32 vector of ground truth pose.
v (Variable): Variable holding an int32 vector of ground truth pose's visibility.
(0: invisible, 1: visible)
use_visibility (bool): When it is ``True``,
the function uses visibility to compute mean squared error.
Returns:
Variable: A variable holding a scalar of the mean squared error loss.
"""
return MeanSquaredError(use_visibility)(x, t, v)
PyTorch
# -*- coding: utf-8 -*-
""" Mean squared error function. """
import torch.nn as nn
class MeanSquaredError(nn.Module):
""" Mean squared error (a.k.a. Euclidean loss) function. """
def __init__(self, use_visibility=False):
super(MeanSquaredError, self).__init__()
self.use_visibility = use_visibility
def forward(self, *inputs):
x, t, v = inputs
diff = x - t
if self.use_visibility:
N = (v.sum()/2).data[0]
diff = diff*v
else:
N = diff.numel()/2
diff = diff.view(-1)
return diff.dot(diff)/N
def mean_squared_error(x, t, v, use_visibility=False):
""" Computes mean squared error over the minibatch.
Args:
x (Variable): Variable holding an float32 vector of estimated pose.
t (Variable): Variable holding an float32 vector of ground truth pose.
v (Variable): Variable holding an int32 vector of ground truth pose's visibility.
(0: invisible, 1: visible)
use_visibility (bool): When it is ``True``,
the function uses visibility to compute mean squared error.
Returns:
Variable: A variable holding a scalar of the mean squared error loss.
"""
return MeanSquaredError(use_visibility)(x, t, v)
Train Der letzte ist der Lerncode. Dies kann auch mit fast demselben Code implementiert werden. In Bezug auf das Lernen scheint Chainer etwas verwirrter zu sein, aber wenn man bedenkt, dass PyTorch v0.1.10 (Stand 28. März 2017) ist, ist es ein Gefühl der Erwartung für die Zukunft von PyTorch?
Chainer
# -*- coding: utf-8 -*-
""" Train pose net. """
import os
import chainer
from chainer import optimizers
from chainer import training
from chainer.training import extensions
from chainer import serializers
from modules.errors import FileNotFoundError, UnknownOptimizationMethodError
from modules.models.chainer import AlexNet
from modules.dataset_indexing.chainer import PoseDataset
class TestModeEvaluator(extensions.Evaluator):
def evaluate(self):
model = self.get_target('main')
model.train = False
ret = super(TestModeEvaluator, self).evaluate()
model.train = True
return ret
class TrainPoseNet(object):
""" Train pose net of estimating 2D pose from image.
Args:
Nj (int): Number of joints.
use_visibility (bool): Use visibility to compute loss.
epoch (int): Number of epochs to train.
opt (str): Optimization method.
gpu (int): GPU ID (negative value indicates CPU).
train (str): Path to training image-pose list file.
val (str): Path to validation image-pose list file.
batchsize (int): Learning minibatch size.
out (str): Output directory.
resume (str): Initialize the trainer from given file.
The file name is 'epoch-{epoch number}.iter'.
resume_model (str): Load model definition file to use for resuming training
(it\'s necessary when you resume a training).
The file name is 'epoch-{epoch number}.model'.
resume_opt (str): Load optimization states from this file
(it\'s necessary when you resume a training).
The file name is 'epoch-{epoch number}.state'.
"""
def __init__(self, **kwargs):
self.Nj = kwargs['Nj']
self.use_visibility = kwargs['use_visibility']
self.epoch = kwargs['epoch']
self.gpu = kwargs['gpu']
self.opt = kwargs['opt']
self.train = kwargs['train']
self.val = kwargs['val']
self.batchsize = kwargs['batchsize']
self.out = kwargs['out']
self.resume = kwargs['resume']
self.resume_model = kwargs['resume_model']
self.resume_opt = kwargs['resume_opt']
# validate arguments.
self._validate_arguments()
def _validate_arguments(self):
for path in (self.train, self.val):
if not os.path.isfile(path):
raise FileNotFoundError('{0} is not found.'.format(path))
if self.opt not in ('MomentumSGD', 'Adam'):
raise UnknownOptimizationMethodError(
'{0} is unknown optimization method.'.format(self.opt))
if self.resume is not None:
for path in (self.resume, self.resume_model, self.resume_opt):
if not os.path.isfile(path):
raise FileNotFoundError('{0} is not found.'.format(path))
def _get_optimizer(self):
if self.opt == 'MomentumSGD':
optimizer = optimizers.MomentumSGD()
elif self.opt == "Adam":
optimizer = optimizers.Adam()
return optimizer
def start(self):
""" Train pose net. """
# initialize model to train.
model = AlexNet(self.Nj, self.use_visibility)
if self.resume_model:
serializers.load_npz(self.resume_model, model)
# prepare gpu.
if self.gpu >= 0:
chainer.cuda.get_device(self.gpu).use()
model.to_gpu()
# load the datasets.
train = PoseDataset(self.train)
val = PoseDataset(self.val, data_augmentation=False)
# training/validation iterators.
train_iter = chainer.iterators.MultiprocessIterator(
train, self.batchsize)
val_iter = chainer.iterators.MultiprocessIterator(
val, self.batchsize, repeat=False, shuffle=False)
# set up an optimizer.
optimizer = self._get_optimizer()
optimizer.setup(model)
if self.resume_opt:
chainer.serializers.load_npz(self.resume_opt, optimizer)
# set up a trainer.
updater = training.StandardUpdater(train_iter, optimizer, device=self.gpu)
trainer = training.Trainer(
updater, (self.epoch, 'epoch'), os.path.join(self.out, 'chainer'))
# standard trainer settings
trainer.extend(extensions.dump_graph('main/loss'))
val_interval = (10, 'epoch')
trainer.extend(TestModeEvaluator(val_iter, model, device=self.gpu), trigger=val_interval)
# save parameters and optimization state per validation step
resume_interval = (self.epoch/10, 'epoch')
trainer.extend(extensions.snapshot_object(
model, "epoch-{.updater.epoch}.model"), trigger=resume_interval)
trainer.extend(extensions.snapshot_object(
optimizer, "epoch-{.updater.epoch}.state"), trigger=resume_interval)
trainer.extend(extensions.snapshot(
filename="epoch-{.updater.epoch}.iter"), trigger=resume_interval)
# show log
log_interval = (10, "iteration")
trainer.extend(extensions.LogReport(trigger=log_interval))
trainer.extend(extensions.observe_lr(), trigger=log_interval)
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss', 'lr']), trigger=log_interval)
trainer.extend(extensions.ProgressBar(update_interval=10))
# start training
if self.resume:
chainer.serializers.load_npz(self.resume, trainer)
trainer.run()
PyTorch
# -*- coding: utf-8 -*-
""" Train pose net. """
import os
import time
from tqdm import tqdm, trange
import torch
import torch.optim as optim
from torch.autograd import Variable
from torchvision import transforms
from modules.errors import FileNotFoundError, GPUNotFoundError, UnknownOptimizationMethodError
from modules.models.pytorch import AlexNet
from modules.dataset_indexing.pytorch import PoseDataset, Crop, RandomNoise, Scale
from modules.functions.pytorch import mean_squared_error
class TrainLogger(object):
""" Logger of training pose net.
Args:
out (str): Output directory.
"""
def __init__(self, out):
try:
os.makedirs(out)
except OSError:
pass
self.file = open(os.path.join(out, 'log'), 'w')
self.logs = []
def write(self, log):
""" Write log. """
tqdm.write(log)
tqdm.write(log, file=self.file)
self.logs.append(log)
def state_dict(self):
""" Returns the state of the logger. """
return {'logs': self.logs}
def load_state_dict(self, state_dict):
""" Loads the logger state. """
self.logs = state_dict['logs']
# write logs.
tqdm.write(self.logs[-1])
for log in self.logs:
tqdm.write(log, file=self.file)
class TrainPoseNet(object):
""" Train pose net of estimating 2D pose from image.
Args:
Nj (int): Number of joints.
use_visibility (bool): Use visibility to compute loss.
epoch (int): Number of epochs to train.
opt (str): Optimization method.
gpu (bool): Use GPU.
train (str): Path to training image-pose list file.
val (str): Path to validation image-pose list file.
batchsize (int): Learning minibatch size.
out (str): Output directory.
resume (str): Initialize the trainer from given file.
The file name is 'epoch-{epoch number}.iter'.
resume_model (str): Load model definition file to use for resuming training
(it\'s necessary when you resume a training).
The file name is 'epoch-{epoch number}.model'.
resume_opt (str): Load optimization states from this file
(it\'s necessary when you resume a training).
The file name is 'epoch-{epoch number}.state'.
"""
def __init__(self, **kwargs):
self.Nj = kwargs['Nj']
self.use_visibility = kwargs['use_visibility']
self.epoch = kwargs['epoch']
self.gpu = (kwargs['gpu'] >= 0)
self.opt = kwargs['opt']
self.train = kwargs['train']
self.val = kwargs['val']
self.batchsize = kwargs['batchsize']
self.out = kwargs['out']
self.resume = kwargs['resume']
self.resume_model = kwargs['resume_model']
self.resume_opt = kwargs['resume_opt']
# validate arguments.
self._validate_arguments()
def _validate_arguments(self):
if self.gpu and not torch.cuda.is_available():
raise GPUNotFoundError('GPU is not found.')
for path in (self.train, self.val):
if not os.path.isfile(path):
raise FileNotFoundError('{0} is not found.'.format(path))
if self.opt not in ('MomentumSGD', 'Adam'):
raise UnknownOptimizationMethodError(
'{0} is unknown optimization method.'.format(self.opt))
if self.resume is not None:
for path in (self.resume, self.resume_model, self.resume_opt):
if not os.path.isfile(path):
raise FileNotFoundError('{0} is not found.'.format(path))
def _get_optimizer(self, model):
if self.opt == 'MomentumSGD':
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
elif self.opt == "Adam":
optimizer = optim.Adam(model.parameters())
return optimizer
def _train(self, model, optimizer, train_iter, log_interval, logger, start_time):
model.train()
for iteration, batch in enumerate(tqdm(train_iter, desc='this epoch')):
image, pose, visibility = Variable(batch[0]), Variable(batch[1]), Variable(batch[2])
if self.gpu:
image, pose, visibility = image.cuda(), pose.cuda(), visibility.cuda()
optimizer.zero_grad()
output = model(image)
loss = mean_squared_error(output, pose, visibility, self.use_visibility)
loss.backward()
optimizer.step()
if iteration % log_interval == 0:
log = 'elapsed_time: {0}, loss: {1}'.format(time.time() - start_time, loss.data[0])
logger.write(log)
def _test(self, model, test_iter, logger, start_time):
model.eval()
test_loss = 0
for batch in test_iter:
image, pose, visibility = Variable(batch[0]), Variable(batch[1]), Variable(batch[2])
if self.gpu:
image, pose, visibility = image.cuda(), pose.cuda(), visibility.cuda()
output = model(image)
test_loss += mean_squared_error(output, pose, visibility, self.use_visibility).data[0]
test_loss /= len(test_iter)
log = 'elapsed_time: {0}, validation/loss: {1}'.format(time.time() - start_time, test_loss)
logger.write(log)
def _checkpoint(self, epoch, model, optimizer, logger):
filename = os.path.join(self.out, 'pytorch', 'epoch-{0}'.format(epoch))
torch.save({'epoch': epoch + 1, 'logger': logger.state_dict()}, filename + '.iter')
torch.save(model.state_dict(), filename + '.model')
torch.save(optimizer.state_dict(), filename + '.state')
def start(self):
""" Train pose net. """
# initialize model to train.
model = AlexNet(self.Nj)
if self.resume_model:
model.load_state_dict(torch.load(self.resume_model))
# prepare gpu.
if self.gpu:
model.cuda()
# load the datasets.
train = PoseDataset(
self.train,
input_transform=transforms.Compose([
transforms.ToTensor(),
RandomNoise()]),
output_transform=Scale(),
transform=Crop(data_augmentation=True))
val = PoseDataset(
self.val,
input_transform=transforms.Compose([
transforms.ToTensor()]),
output_transform=Scale(),
transform=Crop(data_augmentation=False))
# training/validation iterators.
train_iter = torch.utils.data.DataLoader(train, batch_size=self.batchsize, shuffle=True)
val_iter = torch.utils.data.DataLoader(val, batch_size=self.batchsize, shuffle=False)
# set up an optimizer.
optimizer = self._get_optimizer(model)
if self.resume_opt:
optimizer.load_state_dict(torch.load(self.resume_opt))
# set intervals.
val_interval = 10
resume_interval = self.epoch/10
log_interval = 10
# set logger and start epoch.
logger = TrainLogger(os.path.join(self.out, 'pytorch'))
start_epoch = 1
if self.resume:
resume = torch.load(self.resume)
start_epoch = resume['epoch']
logger.load_state_dict(resume['logger'])
# start training.
start_time = time.time()
for epoch in trange(start_epoch, self.epoch + 1, desc=' total'):
self._train(model, optimizer, train_iter, log_interval, logger, start_time)
if epoch % val_interval == 0:
self._test(model, val_iter, logger, start_time)
if epoch % resume_interval == 0:
self._checkpoint(epoch, model, optimizer, logger)
PyTorch hat den Ruf, schnell zu sein, deshalb habe ich die Geschwindigkeit mit Chainer verglichen. Der folgende Datensatz wurde zur Verifizierung wie in der Veröffentlichung verwendet.
Wir haben die Zeit gemessen, die zum Erlernen von 100 Epochen in der CPU-Umgebung und der GPU-Umgebung erforderlich ist. Als CPU-Umgebung und GPU-Umgebung haben wir AWS m4.large bzw. g2.2xlarge verwendet.
In der CPU-Umgebung hat PyTorch das Lernen etwa 1,6-mal schneller abgeschlossen.
Bibliothek | Benötigte Zeit[h] |
---|---|
Chainer | 77.5 |
PyTorch | 48.1 |
Dieses Mal habe ich mit Momentum SGD trainiert, das in Chainer bzw. PyTorch implementiert ist, aber die Implementierung scheint unterschiedlich zu sein und der Konvergenzzustand ist zwischen ihnen unterschiedlich. Da jedoch eine zufällige Datenerweiterung auf den Trainingssatz angewendet wird, können die Ergebnisse abweichen, wenn die Samen ausgerichtet sind.
In der GPU-Umgebung hat PyTorch das Lernen etwa 1,3-mal schneller abgeschlossen.
Bibliothek | Benötigte Zeit[h] |
---|---|
Chainer | 4.25 |
PyTorch | 3.23 |
Darüber hinaus war die Lernkurve in der Regel ähnlich wie in der CPU-Umgebung.
Wir haben die für die Vorhersage erforderliche Zeit 10.000 Mal in jeder CPU-Umgebung und GPU-Umgebung gemessen und den Durchschnitt berechnet. Die Umgebung ist die gleiche wie beim Lernen.
In der CPU-Umgebung war die durchschnittliche vorhergesagte Zeit von Chainer und PyTorch für Chainer etwas schneller.
In der GPU-Umgebung war die durchschnittliche vorhergesagte Zeit von Chainer und PyTorch nahezu gleich.
In Bezug auf die Implementierung war PyTorch so einfach zu schreiben wie Chainer. Ich hatte das Gefühl, dass PyTorch etwas weniger vertraut war, aber ich kann es in Zukunft erwarten. Es scheint auch ein Vorteil zu sein, dass Sie "rückwärts" nicht explizit implementieren müssen. In Bezug auf die Leistung war PyTorch in Vorwärtsberechnungen mit Chainer vergleichbar, aber es war überraschend, dass die Lerngeschwindigkeit schneller war als Chainer. In der CPU-Umgebung ist es interessant, dass PyTorch bei der Vorwärtsberechnung etwas langsamer war, beim Lernen jedoch 1,6-mal schneller. PyTorch gilt als überlegen gegenüber der automatischen Differenzierung, da es nicht explizit "rückwärts" implementiert, PyTorch jedoch schneller lernt. Das nächste Mal würde ich gerne sehen, wie sich die Leistung auswirkt, wenn "rückwärts" explizit implementiert wird. Der Code befindet sich derzeit unter hier. Wenn Sie möchten, lesen Sie bitte PyTorch.
Recommended Posts