[PYTHON] First Deep Learning ~ Solution ~

Hello everyone. @best_not_best. This article is a continuation of First Deep Learning ~ Struggle ~. If you haven't read it, please read it first. I'm sorry it's been posted for almost a year ...


This article is a product of personal desires and is not the official view of the organization to which it belongs.

From the conclusion


Procedure again

  1. Collect images of employees
  2. Cut out the face part of the image in 1.
  3. Collect learning images (favorite entertainers)
  4. Cut out the face part of 3.
  5. Collect learning images (appropriately other than your favorite entertainer)
  6. Cut out the face part of 5.
  7. Create a discriminator by learning 4 and 6 with Tensorflow
  8. Discriminate the image in 2. with a discriminator

1. Collect images of employees

Please refer to Preparation for the details of the process.

1.1. Obtaining an employee ID

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import lxml.html
from selenium import webdriver
import os

target_url = 'http://hogehoge.co.jp/list.html'
driver = webdriver.PhantomJS(service_log_path = os.path.devnull)
root = lxml.html.fromstring(driver.page_source)
links = root.cssselect('td.text12m')
for link in links:
    if link.text is None:
    if link.text.isdigit():


The employee ID is output to the standard output, so please redirect to a file etc. From now on, this file will be treated as member_id.txt.

1.2. Generate and get the image URL from the employee ID

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import urllib.request
import urllib.parse
import time

#The above employee ID file
ID_LIST = '/path/to/member_id.txt'
#Employee image URL format
URL_FMT = 'http://hogehoge.co.jp/%s.jpg'
#File save destination path format
OUTPUT_FMT = '/path/to/photo/%s.jpg'

opener = urllib.request.build_opener()

for id in open(ID_LIST, 'r'):
    url = URL_FMT % (id.strip())

        img = urllib.request.urlopen(url, timeout=5).read()
        if len(img) == 0:

    except urllib.request.URLError:
        print(url, 'URLError')

    except IOError:
        print(url, 'IOError')

    except UnicodeEncodeError:
        print(url, 'EncodeError')

    except OSError:
        print(url, 'OSError')

        output = OUTPUT_FMT % id.strip()
        file = open(output, 'wb')


It will be saved with the following file name.


2. Cut out the face part of the image in 1.

Please refer to Preparation for the details of the process.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import numpy
import os
import sys
import cv2

#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 1.Directory saved in
INPUT_DIR_PATH = '/path/to/photos/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)

files = os.listdir(INPUT_DIR_PATH)
for file in files:
    input_image_path = INPUT_DIR_PATH + file

    #File reading
    image = cv2.imread(input_image_path)
    #Grayscale conversion
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:

    #Acquire the features of the cascade classifier
    cascade = cv2.CascadeClassifier(CASCADE_PATH)

    #Execution of object recognition (face recognition)
    facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))

    if len(facerect) > 0:
        #Saving recognition results
        i = 1
        for rect in facerect:
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]

            path, ext = os.path.splitext(os.path.basename(file))
            output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, path, i, ext)
                im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
                cv2.imwrite(output_image_path, im)
            except cv2.error:

            i += 1

It will be saved with the following file name.


3. Collect learning images (favorite entertainers)

Please refer to Struggle for the details of the process.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys
import os
import json
import urllib.request
import urllib.parse
import requests
import mimetypes
import re

BING_URL = 'https://api.datamarket.azure.com/Bing/Search/Image?'
MS_ACCTKEY = 'hogehoge'
QUERY = 'The name of your favorite celebrity'
#Directory for saving acquired images
OUTPUT_DIR_PATH = '/path/to/talent/'

opener = urllib.request.build_opener()

def download_urllist(urllist, skip):
    for url in urllist:
            img = urllib.request.urlopen(url, timeout=5).read()
            if len(img) == 0:

            url = re.sub(r'\?.*', '', url)
            mine_type = mimetypes.guess_type(url)[0]
            if mine_type is None:
                mine_type = 'jpeg'
                mine_type = mine_type.split('/')[1]

            file_name = '%s.%s' % (skip, mine_type)
            with open(OUTPUT_DIR_PATH + file_name, 'wb') as f:

        except urllib.request.URLError:

        except IOError:

        except UnicodeEncodeError:

        except OSError:

        skip += 1

if __name__ == "__main__":
    query = urllib.request.quote(QUERY)
    step = 20
    num = 50

    url_param_dict = {
        'Query': "'"+QUERY+"'",
        'Market': "'ja-JP'",
    url_param_base = urllib.parse.urlencode(url_param_dict)
    url_param_base = url_param_base + '&$format=json&$top=%d&$skip='%(num)

    for skip in range(0, num*step, num):
        url_param = url_param_base + str(skip)
        url = BING_URL + url_param

        response = requests.get(url,
                                auth=(MS_ACCTKEY, MS_ACCTKEY),
                                headers={'User-Agent': 'My API Robot'})
        response = response.json()

        urllist = [item['MediaUrl'] for item in response['d']['results']]
        download_urllist(urllist, skip)

4. Cut out the face part of 3.

Please refer to Struggle for the details of the process.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import numpy
import os
import sys
import cv2

#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 3.Directory saved in
INPUT_DIR_PATH = '/path/to/talent/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/talent_cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)

files = os.listdir(INPUT_DIR_PATH)
for file in files:
    input_image_path = INPUT_DIR_PATH + file

    #File reading
    image = cv2.imread(input_image_path)
    #Grayscale conversion
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:

    #Acquire the features of the cascade classifier
    cascade = cv2.CascadeClassifier(CASCADE_PATH)

    #Execution of object recognition (face recognition)
    facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))

    if len(facerect) > 0:
        #Saving recognition results
        i = 1
        for rect in facerect:
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]

            path, ext = os.path.splitext(os.path.basename(file))
            output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, count, i, ext)
                im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
                cv2.imwrite(output_image_path, im)
            except cv2.error:

            i += 1

It will be saved with the following file name.


5. Collect learning images (appropriately other than your favorite entertainer)

Change the QUERY and ʻOUTPUT_DIR_PATHof the program in 3. to run it. This time, I tried to execute it withQUERY` called" general public ".

QUERY = 'Ordinary people'
OUTPUT_DIR_PATH = '/path/to/other_talent/'

6. Cut out the face part of 5.

I will omit it because it is the same process as 4.

7. Create a discriminator by learning 4 and 6 with Tensorflow

Create a dataset. Label the image file of your favorite celebrity with "1" and sort it randomly.

$ ls -la /path/to/talent_cutout/*.* | awk '{print $9" 1"}' | gsort -R > talent.txt

Divide 80% into learning data and 20% into test data. (The following is divided into 752 and 189 because the total number of files was 941.)

$ head -752 talent.txt > talent_train.txt
$ tail -189 talent.txt > talent_test.txt

Similarly, images other than favorite entertainers are also labeled as "2" and divided into learning data (commons_train.txt) and test data (commons_test.txt). Each training data and test data are combined and randomly sorted.

$ cat commons_train.txt talent_train.txt | gsort -R > train.txt
$ cat commons_test.txt talent_test.txt | gsort -R > test.txt

The contents of the file are as follows.

$ head -5 train.txt
/path/to/other_talent_cutout/152_16.jpeg 2
/path/to/talent_cutout/371_1.jpg 1
/path/to/talent_cutout/349_1.jpg 1
/path/to/talent_cutout/523_2.jpg 1
/path/to/other_talent_cutout/348_2.jpeg 2

Let Tensorflow learn. TensorFlow To learn from a large number of images ... ~ (almost) solution ~ --Qiita was used as a reference.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform


flags = tf.app.flags
#The path of the file to save the learning result
flags.DEFINE_string('save_model', '/path/to/model.ckpt', 'File name of model data')
#Training data path
flags.DEFINE_string('train', '/path/to/train.txt', 'File name of train data.')
#Test data path
flags.DEFINE_string('test', '/path/to/test.txt', 'File name of test data.')
flags.DEFINE_string('train_dir', './log_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 100, 'Number of steps to run trainer.')
    'Batch size'
    'Must divide evenly into the dataset sizes.'
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')

def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(
            ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1],

    x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)

    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return y_conv

def loss(logits, labels):
    cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits, 1e-10, 1.0)))
    tf.scalar_summary('cross_entropy', cross_entropy)
    return cross_entropy

def training(loss, learning_rate):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return train_step

def accuracy(logits, labels):
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    tf.scalar_summary('accuracy', accuracy)
    return accuracy

if __name__ == '__main__':
    with open(FLAGS.train, 'r') as f: # train.txt
        train_image = []
        train_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            train_image.append(img.flatten().astype(np.float32) / 255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
        train_image = np.asarray(train_image)
        train_label = np.asarray(train_label)
        train_len = len(train_image)

    with open(FLAGS.test, 'r') as f:
        test_image = []
        test_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            test_image.append(img.flatten().astype(np.float32) / 255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
        test_image = np.asarray(test_image)
        test_label = np.asarray(test_label)
        test_len = len(test_image)

    with tf.Graph().as_default():
        images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
        labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
        keep_prob = tf.placeholder('float')

        logits = inference(images_placeholder, keep_prob)
        loss_value = loss(logits, labels_placeholder)
        train_op = training(loss_value, FLAGS.learning_rate)
        acc = accuracy(logits, labels_placeholder)

        saver = tf.train.Saver()
        sess = tf.Session()
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)

        if train_len % FLAGS.batch_size is 0:
            train_batch = train_len / FLAGS.batch_size
            train_batch = (train_len / FLAGS.batch_size) + 1
            print('train_batch = ' + str(train_batch))
        for step in range(FLAGS.max_steps):
            for i in range(int(train_batch)):
                batch = FLAGS.batch_size * i
                batch_plus = FLAGS.batch_size * (i + 1)
                if batch_plus > train_len:
                    batch_plus = train_len

                sess.run(train_op, feed_dict={
                    images_placeholder: train_image[batch: batch_plus],
                    labels_placeholder: train_label[batch: batch_plus],
                    keep_prob: 0.5

            if step % 10 == 0:
                train_accuracy = 0.0
                for i in range(int(train_batch)):
                    batch = FLAGS.batch_size * i
                    batch_plus = FLAGS.batch_size * (i + 1)
                    if batch_plus > train_len: batch_plus = train_len
                    train_accuracy += sess.run(acc, feed_dict={
                        images_placeholder: train_image[batch: batch_plus],
                        labels_placeholder: train_label[batch: batch_plus],
                        keep_prob: 1.0})
                    if i is not 0: train_accuracy /= 2.0

                print('step %d, training accuracy %g' % (step, train_accuracy))

    if test_len % FLAGS.batch_size is 0:
        test_batch = test_len / FLAGS.batch_size
        test_batch = (test_len / FLAGS.batch_size) + 1
        print('test_batch = ' + str(test_batch))

    test_accuracy = 0.0
    for i in range(int(test_batch)):
        batch = FLAGS.batch_size * i
        batch_plus = FLAGS.batch_size * (i + 1)
        if batch_plus > train_len:
            batch_plus = train_len
        test_accuracy += sess.run(
                images_placeholder: test_image[batch:batch_plus],
                labels_placeholder: test_label[batch:batch_plus],
                keep_prob: 1.0
        if i is not 0:
            test_accuracy /= 2.0

    print('test accuracy %g' % (test_accuracy))
    save_path = saver.save(sess, FLAGS.save_model)

The learning result is saved in /path/to/model.ckpt.

8. Discriminate the image in 2. with a discriminator

Again, I referred to TensorFlow to learn from a large number of images ... ~ (almost) solution ~ --Qiita.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *

# 2.Directory for saving images cropped with
DIR_PATH = '/path/to/cutout/'

flags = tf.app.flags
flags.DEFINE_string('readmodels', '/path/to/model.ckpt', 'File name of model data')

def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(
            ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1],

    x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    return y_conv

if __name__ == '__main__':
    test_image = []
    test_image_name = []
    files = os.listdir(DIR_PATH)
    for file in files:
        if file == '.DS_Store':

        img = cv2.imread(DIR_PATH + file)
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        test_image.append(img.flatten().astype(np.float32) / 255.0)

    test_image = np.asarray(test_image)

    images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
    keep_prob = tf.placeholder('float')

    logits = inference(images_placeholder, keep_prob)
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()

    for i in range(len(test_image)):
        pr = logits.eval(feed_dict={
            images_placeholder: [test_image[i]],
            keep_prob: 1.0
        pred = np.argmax(pr)

        if pred == 1:
            #When judged to be a favorite entertainer
            print('%s,%f' % (test_image_name[i], pr[pred] * 100.0))

The result will be output to the standard output, so please redirect to a file as appropriate. The results are sorted in descending order of score and output to complete!

$ cat result.csv | sort -r -t, -k 2 | head -5


I'm not sure, but we introduced it as an example of our efforts in our subcommittee.

