[PYTHON] Since there was a doppelganger, I tried to distinguish it with artificial intelligence (laugh) (Part 2)

Hello! I'm @eve_yk, a Liaro engineer. Please see Part 1 for the beginning of the matter (crying) Last time, we collected and processed data to train the classifier. This time, we will describe the model of the classifier and actually learn and evaluate (face recognition)!

3. Build a model of the classifier

Next, describe the model of the face image classifier to be trained in Chainer. I referred to the following code (including the previous data set creation). https://github.com/mitmul/chainer-cifar10


class YTNet(chainer.Chain):

    def __init__(self):
Model definition
        super(YTNet, self).__init__(
            conv1=L.Convolution2D(3, 32, 5, stride=1, pad=2),
            bn1  =L.BatchNormalization(32),
            conv2=L.Convolution2D(32, 32, 5, stride=1, pad=2),
            bn2  =L.BatchNormalization(32),
            conv3=L.Convolution2D(32, 64, 5, stride=1, pad=2),
            fc4=F.Linear(16384, 4096),
            fc5=F.Linear(4096, 2),
        self.train = True

    def __call__(self, x, t):
forward processing
        h = F.max_pooling_2d(F.relu(self.conv1(x)), 3, stride=2)
        h = F.max_pooling_2d(F.relu(self.conv2(h)), 3, stride=2)
        h = F.relu(self.conv3(h))
        h = F.dropout(F.relu(self.fc4(h)), ratio=0.5, train=self.train)
        h = self.fc5(h)

        self.loss = F.softmax_cross_entropy(h, t)
        self.accuracy = F.accuracy(h, t)

        if self.train:
            return self.loss
            self.pred = F.softmax(h)
            return self.pred

4. Learn the model

Based on the code in 3., write the code to train the model. This time, the learning will be completed relatively quickly, but when trying to work with a larger dataset, it is good for mental health to show the progress of the learning.


# -*- coding: utf-8 -*-

import argparse
import os
import six
import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np
from chainer import optimizers
from chainer import cuda
from chainer import serializers
from chainer import Variable
from progressbar import ProgressBar

class YTNet(chainer.Chain):

    def __init__(self):
Model definition
        super(YTNet, self).__init__(
            conv1=L.Convolution2D(3, 32, 5, stride=1, pad=2),
            bn1  =L.BatchNormalization(32),
            conv2=L.Convolution2D(32, 32, 5, stride=1, pad=2),
            bn2  =L.BatchNormalization(32),
            conv3=L.Convolution2D(32, 64, 5, stride=1, pad=2),
            fc4=F.Linear(16384, 4096),
            fc5=F.Linear(4096, 2),
        self.train = True

    def __call__(self, x, t):
forward processing
        h = F.max_pooling_2d(F.relu(self.conv1(x)), 3, stride=2)
        h = F.max_pooling_2d(F.relu(self.conv2(h)), 3, stride=2)
        h = F.relu(self.conv3(h))
        h = F.dropout(F.relu(self.fc4(h)), ratio=0.5, train=self.train)
        h = self.fc5(h)

        self.loss = F.softmax_cross_entropy(h, t)
        self.accuracy = F.accuracy(h, t)

        if self.train:
            return self.loss
            self.pred = F.softmax(h)
            return self.pred

def one_epoch(args, model, optimizer, data, label, epoch, train):
1epoch training or evaluation processing
    model.train = train
    xp = cuda.cupy if args.gpu >= 0 else np

    sum_accuracy = 0
    sum_loss = 0

    p = ProgressBar(min_value=0, max_value=data.shape[0]) #For checking progress
    perm = np.random.permutation(data.shape[0])
    for i in xrange(0, data.shape[0], args.batchsize):
        #Create a mini batch
        target = perm[i:i + args.batchsize]
        x = xp.array(data[target], dtype=xp.float32)
        t = xp.array(label[target], dtype=xp.int32)

        #Create Variable
        volatile = 'off' if train else 'on'
        x = Variable(x, volatile=volatile)
        t = Variable(t, volatile=volatile)

        #Parameter update or label prediction
        if train:
            optimizer.update(model, x, t)
            pred = model(x, t).data
        sum_loss += float(model.loss.data) * t.data.shape[0]
        sum_accuracy += float(model.accuracy.data) * t.data.shape[0]

        del x, t

    print "" #For line breaks
    if train:
        print "train epoch " + str(epoch)
        print "   train loss : " + str(sum_loss / data.shape[0])
        print "   train acc  : " + str(sum_accuracy / data.shape[0])
        print "test epoch " + str(epoch)
        print "   test loss : " + str(sum_loss / data.shape[0])
        print "   test acc  : " + str(sum_accuracy / data.shape[0])

def load_dataset(datadir):
Load the dataset
    train_data = np.load('%s/train_data.npy' % datadir)
    train_labels = np.load('%s/train_label.npy' % datadir)
    test_data = np.load('%s/test_data.npy' % datadir)
    test_labels = np.load('%s/test_label.npy' % datadir)

    return train_data, train_labels, test_data, test_labels

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--gpu", type=int, default=-1)
    parser.add_argument("--batchsize", type=int, default=10)
    parser.add_argument('--data_dir', type=str, default='dataset')
    parser.add_argument('--output_dir', type=str, default='result')
    args = parser.parse_args()

    # model,Create optimizer
    model = YTNet()
    optimizer = optimizers.Adam(alpha=0.00005)

    #Data set load
    dataset = load_dataset(args.data_dir)
    tr_data, tr_labels, te_data, te_labels = dataset

    #Main loop
    for epoch in range(1, 20):
        one_epoch(args, model, optimizer, tr_data, tr_labels, epoch, True)
        one_epoch(args, model, optimizer, te_data, te_labels, epoch, False)

    #Save model
    if not os.path.exists(args.output_dir):
    serializers.save_npz(args.output_dir + "YTNet.chainermodel", model)
    serializers.save_npz(args.output_dir + "YTNet.state", optimizer)

After learning about 20 epoch, the accuracy of the training data exceeded 99%.

5. Evaluate performance

Finally, we will evaluate the performance. Test with the following 10 images. Five of them are @eve_yk and five are Super Maradona Tanaka. A sample face can be found in Previous article, so please try to guess if you can guess it.


I'm a little worried, but you can tell by looking at it. It's different from the color of the glasses. The correct answer is eve_yk for 1,2,4,7,10 and Mr. Tanaka for 3,5,6,8,9.

Now, what about the CNN you learned this time? Test with the following code.


# coding:utf-8

import os
import sys
import argparse
import glob
import cv2
import numpy as np
from chainer import Variable
from chainer import serializers
from train import YTNet

def transpose_opencv2chainer(x):
Convert from opencv npy format to chainer npy format
		opencv  => (height, width, channel)
		chainer => (channel, height, width)
	return x.transpose(2,0,1)

file2labels = {"01.jpg ":"eve_yk", "02.jpg ":"eve_yk", "03.jpg ":"tanaka",
			   "04.jpg ":"eve_yk", "05.jpg ":"tanaka", "06.jpg ":"tanaka",
			   "07.jpg ":"eve_yk", "08.jpg ":"tanaka", "09.jpg ":"tanaka",
			   "10.jpg ":"eve_yk"}

if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Create dataset for CNN')
	parser.add_argument('--input_path',   required=True, type=str)
	parser.add_argument('--model_path',  required=True, type=str)
	args = parser.parse_args()

	#Get jpg file list
	test_files  = glob.glob(args.input_path+"/*.jpg ")

	#Model loading
	model = YTNet()
	model = serializers.load_npz(args.model_path, , model)

	#Evaluate one by one
	collect_count = 0.0
        test_count = 0.0
	for file_path in test_files:
		image = cv2.imread(file_path)
		if image is None:
			#Read failure
                test_count += 1.0

		#Get the file name from the directory structure
		file_name = file_path.split("/")[-1]
                print file_name+"("+file2labels[file_name]+") :",

		#Convert to chainer format
		image = transpose_opencv2chainer(image)
		x = Variable(np.asarray([image], dtype=np.float32), volatile="on")
		t = Variable(np.asarray([[0]], dtype=np.int32), volatile="on")

		pred = model(x, t).data
		if int(pred) == 0: # tanaka
			print u"Identification result "tanaka""
			if file2labels[file_name] == u"tanaka":
				collect_count += 1.0
		else: # eve_yk
			print u"Identification result "eve_yk」"
			if file2labels[file_name] == u"eve_yk":
				collect_count += 1.0	

	print u"total:{}%".format(collect_count/test_count*100)

The result is this street

python test.py --input_path test/ --model_path result/YTNet.chainermodel
08.jpg(tanaka) :Identification result "tanaka"
09.jpg(tanaka) :Identification result "eve_yk」
07.jpg(eve_yk) :Identification result "eve_yk」
01.jpg(eve_yk) :Identification result "eve_yk」
03.jpg(tanaka) :Identification result "tanaka"
06.jpg(tanaka) :Identification result "eve_yk」
02.jpg(eve_yk) :Identification result "eve_yk」
05.jpg(tanaka) :Identification result "tanaka"
04.jpg(eve_yk) :Identification result "eve_yk」
10.jpg(eve_yk) :Identification result "eve_yk」

I mistaken Mr. Tanaka of 6 and 9 for eve_yk. Is it a biased prediction due to the difference in the amount of training data? However, I feel that the image that is bothersome to the human eye is wrong. After all, it seems difficult to distinguish doppelgangers with a half-finished system.

in conclusion

I have created a face classifier that identifies my look-alike. The accuracy is ok. I think it was good that there were many compromises such as the number of data and preprocessing. It would be interesting to work hard to collect data and classify more people, or work hard on preprocessing to improve accuracy!

Liaro and eve_yk are rooting for Super Maradona!



