[PYTHON] Classify anime faces with deep learning with Chainer

** Click here for the sequel by Keras (http://qiita.com/hogefugabar/items/afb4f6c9a93a4bbda51a). ** **

Introduction

This is my first post. Although hot days continue, I would like to classify anime face image data using deep learning. I'm not very familiar with anime. You can tell if it's Hatsune Miku. Who on earth is who?

data set

The dataset can be obtained from animeface-character-dataset. Reference: I tried to extract the features of the anime face with Denoising AutoEncoder

code

This time, we will implement a convolutional neural network using Chainer. First, define the model. It's like Convolution → Max Pooling → Convolution → Max Pooling → Full-Connected → Softmax. Please forgive that the code is dirty.

Reference: https://github.com/mitmul/chainer-cifar10/blob/master/models/Cifar10.py

CNN.py


import time
import six.moves.cPickle as pickle
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from chainer import cuda, Variable, FunctionSet, optimizers
import chainer.functions as F

class ImageNet(FunctionSet):
	def __init__(self, n_outputs):
		super(ImageNet, self).__init__(
			conv1=	F.Convolution2D(3, 32, 5),
			conv2=	F.Convolution2D(32, 32, 5),
			l3=		F.Linear(512, 512),
			l4=		F.Linear(512, n_outputs)
		)

	def forward(self, x_data, y_data, train=True, gpu=-1):

		if gpu >= 0:
			x_data = cuda.to_gpu(x_data)
			y_data = cuda.to_gpu(y_data)

		x, t = Variable(x_data), Variable(y_data)
		h = F.max_pooling_2d(F.relu(self.conv1(x)), ksize=2, stride=2)
		h = F.max_pooling_2d(F.relu(self.conv2(h)), ksize=3, stride=3)
		h = F.dropout(F.relu(self.l3(h)), train=train)
		y = self.l4(h)
		return F.softmax_cross_entropy(y, t), F.accuracy(y,t)

For the time being, we will make it possible to learn and evaluate using the defined model.

CNN.py


class CNN:
	def __init__(self, data, target, n_outputs, gpu=-1):

		self.model = ImageNet(n_outputs)
		self.model_name = 'cnn_model'

		if gpu >= 0:
			self.model.to_gpu()

		self.gpu = gpu

		self.x_train,\
		self.x_test,\
		self.y_train,\
		self.y_test = train_test_split(data, target, test_size=0.1)

		self.n_train = len(self.y_train)
		self.n_test = len(self.y_test)

		self.optimizer = optimizers.Adam()
		self.optimizer.setup(self.model.collect_parameters())

	def predict(self, x_data, gpu=-1):
		return self.model.predict(x_data, gpu)


	def train_and_test(self, n_epoch=100, batchsize=100):

		epoch = 1
		best_accuracy = 0
		while epoch <= n_epoch:
			print 'epoch', epoch

			perm = np.random.permutation(self.n_train)
			sum_train_accuracy = 0
			sum_train_loss = 0
			for i in xrange(0, self.n_train, batchsize):
				x_batch = self.x_train[perm[i:i+batchsize]]
				y_batch = self.y_train[perm[i:i+batchsize]]

				real_batchsize = len(x_batch)

				self.optimizer.zero_grads()
				loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu)
				loss.backward()
				self.optimizer.update()

				sum_train_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
				sum_train_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

			print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train)

			# evaluation
			sum_test_accuracy = 0
			sum_test_loss = 0
			for i in xrange(0, self.n_test, batchsize):
				x_batch = self.x_test[i:i+batchsize]
				y_batch = self.y_test[i:i+batchsize]

				real_batchsize = len(x_batch)

				loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu)

				sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
				sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

			print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test)			

			epoch += 1

	def dump_model(self):
		self.model.to_cpu()
		pickle.dump(self.model, open(self.model_name, 'wb'), -1)

	def load_model(self):
		self.model = pickle.load(open(self.model_name,'rb'))
		if self.gpu >= 0:
			self.model.to_gpu()
		self.optimizer.setup(self.model.collect_parameters())

Data set preprocessing

Set all image sizes to 32 * 32 to reduce processing. Also, ** Delete the directory that does not contain images ** in advance.

animeface.py


#! -*- coding: utf-8 -*-

import os
import six.moves.cPickle as pickle
import numpy as np
import cv2 as cv

class AnimeFaceDataset:
	def __init__(self):
		self.data_dir_path = u"./animeface-character-dataset/thumb/"
		self.data = None
		self.target = None
		self.n_types_target = -1
		self.dump_name = u'dataset'
		self.image_size = 32

	def get_dir_list(self):
		tmp = os.listdir(self.data_dir_path)
		if tmp is None:
			return None
		return sorted([x for x in tmp if os.path.isdir(self.data_dir_path+x)])

	def get_class_id(self, fname):
		dir_list = self.get_dir_list()
		dir_name = filter(lambda x: x in fname, dir_list)
		return dir_list.index(dir_name[0])

	def load_data_target(self):
		if os.path.exists(self.dump_name):
			self.load_dataset()
		if self.target is None:
			dir_list = self.get_dir_list()
			ret = {}
			self.target = []
			target_name = []
			self.data = []
			for dir_name in dir_list:
				file_list = os.listdir(self.data_dir_path+dir_name)
				for file_name in file_list:
					root, ext = os.path.splitext(file_name)
					if ext == u'.png':
						abs_name = self.data_dir_path+dir_name+'/'+file_name
						# read class id i.e., target
						class_id = self.get_class_id(abs_name)
						self.target.append(class_id)
						target_name.append(str(dir_name))
						# read image i.e., data
						image = cv.imread(abs_name)
						image = cv.resize(image, (self.image_size, self.image_size))
						image = image.transpose(2,0,1)
						image = image/255.
						self.data.append(image)

			self.index2name = {}
			for i in xrange(len(self.target)):
				self.index2name[self.target[i]] = target_name[i]
				
		self.data = np.array(self.data, np.float32)
		self.target = np.array(self.target, np.int32)

		self.dump_dataset()

	def get_n_types_target(self):
		if self.target is None:
			self.load_data_target()

		if self.n_types_target is not -1:
			return self.n_types_target

		tmp = {}
		for target in self.target:
			tmp[target] = 0
		return len(tmp)

	def dump_dataset(self):
		pickle.dump((self.data,self.target,self.index2name), open(self.dump_name, 'wb'), -1)

	def load_dataset(self):
		self.data, self.target, self.index2name = pickle.load(open(self.dump_name, 'rb'))

Let's actually read the data.

In [1]: from animeface import AnimeFaceDataset

In [2]: dataset = AnimeFaceDataset()

In [3]: dataset.load_data_target()       

In [5]: dataset.get_n_types_target()
Out[5]: 176

In [6]: len(dataset.target)
Out[6]: 14490

Therefore, it is a classification problem with 14490 data and 176 classes (characters). I wonder if there are 176 people. Let's actually learn with the following code.

from CNN import CNN
from animeface import AnimeFaceDataset
from chainer import cuda

#I'll use GPU
cuda.init(0)

print 'load AnimeFace dataset'
dataset = AnimeFaceDataset()
dataset.read_data_target()
data = dataset.data
target = dataset.target
n_outputs = dataset.get_n_types_target()

cnn = CNN(data=data,
		  target=target,
		  gpu=0,
		  n_outputs=n_outputs)
		  
cnn.train_and_test(n_epoch=100)

The execution result is as follows.

C:\Python27\lib\site-packages\skcuda\cublas.py:273: UserWarning: creating CUBLAS
 context to get version number
  warnings.warn('creating CUBLAS context to get version number')
load AnimeFace dataset
epoch 1
train mean loss=4.77383880182, accuracy=0.0361935423276
test mean loss=3.88453409868, accuracy=0.116632157313
epoch 2
train mean loss=3.52874370272, accuracy=0.158193386024
test mean loss=3.00467933286, accuracy=0.247066933423
epoch 3
train mean loss=2.95961939461, accuracy=0.254735058687
test mean loss=2.6362867278, accuracy=0.327122144303
epoch 4
train mean loss=2.634737659, accuracy=0.319607384265
test mean loss=2.38959699009, accuracy=0.395445127233

----
Omission
----

epoch 96
train mean loss=0.227027994983, accuracy=0.925159092696
test mean loss=2.70711887911, accuracy=0.589371965415
epoch 97
train mean loss=0.216873285405, accuracy=0.927382851637
test mean loss=2.6218228118, accuracy=0.594893018034
epoch 98
train mean loss=0.209225204521, accuracy=0.930220058136
test mean loss=2.68379376295, accuracy=0.5935127585
epoch 99
train mean loss=0.209071503231, accuracy=0.928072985573
test mean loss=2.62009712151, accuracy=0.593512752658
epoch 100
train mean loss=0.210750763214, accuracy=0.92999001446
test mean loss=2.75891605618, accuracy=0.589371977427
Early Stopping was not executed.

in conclusion

The result was a correct answer rate of about 60%. Perhaps I think I can go if I change the architecture of CNN, and I think it was too small to make the image 32 * 32. .. .. When I'm going to give the code, variable names, function names, etc. to GitHub next time. I would appreciate it if you could point out any strange points.

Recommended Posts

Classify anime faces with deep learning with Chainer
Classify anime faces by sequel / deep learning with Keras
Try with Chainer Deep Q Learning --Launch
[Introduction to StyleGAN2] Independent learning with 10 anime faces ♬
Try deep learning with TensorFlow
Deep Kernel Learning with Pyro
Try Deep Learning with FPGA
Deep Embedded Clustering with Chainer 2.0
Generate Pokemon with Deep Learning
Try Deep Learning with FPGA-Select Cucumbers
Cat breed identification with deep learning
Make ASCII art with deep learning
Try deep learning with TensorFlow Part 2
[Chainer] Learning XOR with multi-layer perceptron
Solve three-dimensional PDEs with deep learning.
First Anime Face Recognition with Chainer
Check squat forms with deep learning
Categorize news articles with deep learning
Forecasting Snack Sales with Deep Learning
Try Common Representation Learning with chainer
Make people smile with Deep Learning
(python) Deep Learning Library Chainer Basics Basics
Introduction to Deep Learning (2) --Try your own nonlinear regression with Chainer-
Deep Learning
Try Bitcoin Price Forecasting with Deep Learning
Try deep learning of genomics with Kipoi
Sentiment analysis of tweets with deep learning
The story of doing deep learning with TPU
Chainer and deep learning learned by function approximation
Load caffe model with Chainer and classify images
Categorize face images of anime characters with Chainer
Deep Learning Memorandum
Start Deep learning
99.78% accuracy with deep learning by recognizing handwritten hiragana
Python Deep Learning
Deep learning × Python
Seq2Seq (1) with chainer
I installed Chainer, a framework for deep learning
Build a "bot that tells you AV actresses with similar faces" by deep learning
A story about predicting exchange rates with Deep Learning
Classify mnist numbers by unsupervised learning with keras [Autoencoder]
Deep learning image analysis starting with Kaggle and Keras
Classify articles with tags specified by Qiita by unsupervised learning
Use scikit-learn training dataset with chainer (for learning / prediction)
Extract music features with Deep Learning and predict tags
DNN (Deep Learning) Library: Comparison of chainer and TensorFlow (1)
Now, let's try face recognition with Chainer (learning phase)
"Object-oriented" learning with python
Python: Deep Learning Practices
Deep learning / activation functions
Deep Learning from scratch
Deep learning 1 Practice of deep learning
Use tensorboard with Chainer
Deep learning / cross entropy
First Deep Learning ~ Preparation ~
First Deep Learning ~ Solution ~
[AI] Deep Metric Learning
Learning Python with ChemTHEATER 02
I tried deep learning
Learning Python with ChemTHEATER 01
I tried to make Othello AI that I learned 7.2 million hands by deep learning with Chainer