Hello everyone. @best_not_best. This article is a continuation of First Deep Learning ~ Struggle ~. If you haven't read it, please read it first. I'm sorry it's been posted for almost a year ...
This article is a product of personal desires and is not the official view of the organization to which it belongs.
Machine / OS
MacBook Pro (Retina, 15-inch, Mid 2014)
OS X Yosemite 10.10.5
Python
Python package
lxml 3.6.0
selenium 3.0.2
numpy 1.11.2
opencv3 3.1.0 (Install with conda.)
tensorflow 0.10.0
I am running in Anaconda to use OpenCV with Python 3.x.
Please refer to Preparation for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import lxml.html
from selenium import webdriver
import os
target_url = 'http://hogehoge.co.jp/list.html'
driver = webdriver.PhantomJS(service_log_path = os.path.devnull)
driver.get(target_url)
root = lxml.html.fromstring(driver.page_source)
links = root.cssselect('td.text12m')
for link in links:
    if link.text is None:
        continue
    if link.text.isdigit():
        print(link.text)
driver.close()
driver.quit()
The employee ID is output to the standard output, so please redirect to a file etc. From now on, this file will be treated as member_id.txt.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import urllib.request
import urllib.parse
import time
#The above employee ID file
ID_LIST = '/path/to/member_id.txt'
#Employee image URL format
URL_FMT = 'http://hogehoge.co.jp/%s.jpg'
#File save destination path format
OUTPUT_FMT = '/path/to/photo/%s.jpg'
opener = urllib.request.build_opener()
urllib.request.install_opener(opener)
for id in open(ID_LIST, 'r'):
    url = URL_FMT % (id.strip())
    try:
        img = urllib.request.urlopen(url, timeout=5).read()
        if len(img) == 0:
            continue
    except urllib.request.URLError:
        print(url, 'URLError')
    except IOError:
        print(url, 'IOError')
    except UnicodeEncodeError:
        print(url, 'EncodeError')
    except OSError:
        print(url, 'OSError')
    else:
        output = OUTPUT_FMT % id.strip()
        file = open(output, 'wb')
        file.write(img)
        file.close()
    time.sleep(0.1)
It will be saved with the following file name.
000001.jpg
000002.jpg
000003.jpg
000004.jpg
000005.jpg
...
Please refer to Preparation for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy
import os
import sys
import cv2
#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 1.Directory saved in
INPUT_DIR_PATH = '/path/to/photos/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)
files = os.listdir(INPUT_DIR_PATH)
for file in files:
    input_image_path = INPUT_DIR_PATH + file
    #File reading
    image = cv2.imread(input_image_path)
    #Grayscale conversion
    try:
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:
        continue
    #Acquire the features of the cascade classifier
    cascade = cv2.CascadeClassifier(CASCADE_PATH)
    #Execution of object recognition (face recognition)
    facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))
    if len(facerect) > 0:
        #Saving recognition results
        i = 1
        for rect in facerect:
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]
            path, ext = os.path.splitext(os.path.basename(file))
            output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, path, i, ext)
            try:
                im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
                cv2.imwrite(output_image_path, im)
            except cv2.error:
                print(file)
                continue
            i += 1
It will be saved with the following file name.
000001_1.jpg
000002_1.jpg
000003_1.jpg
000003_2.jpg
000004_1.jpg
...
Please refer to Struggle for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import os
import json
import urllib.request
import urllib.parse
import requests
import mimetypes
import re
# API URL
BING_URL = 'https://api.datamarket.azure.com/Bing/Search/Image?'
# API ACCESS KEY
MS_ACCTKEY = 'hogehoge'
QUERY = 'The name of your favorite celebrity'
#Directory for saving acquired images
OUTPUT_DIR_PATH = '/path/to/talent/'
opener = urllib.request.build_opener()
urllib.request.install_opener(opener)
def download_urllist(urllist, skip):
    for url in urllist:
        try:
            img = urllib.request.urlopen(url, timeout=5).read()
            if len(img) == 0:
                continue
            url = re.sub(r'\?.*', '', url)
            mine_type = mimetypes.guess_type(url)[0]
            if mine_type is None:
                mine_type = 'jpeg'
            else:
                mine_type = mine_type.split('/')[1]
            file_name = '%s.%s' % (skip, mine_type)
            with open(OUTPUT_DIR_PATH + file_name, 'wb') as f:
                f.write(img)
        except urllib.request.URLError:
            print('URLError')
        except IOError:
            print('IOError')
        except UnicodeEncodeError:
            print('EncodeError')
        except OSError:
            print('OSError')
        skip += 1
if __name__ == "__main__":
    query = urllib.request.quote(QUERY)
    step = 20
    num = 50
    url_param_dict = {
        'Query': "'"+QUERY+"'",
        'Market': "'ja-JP'",
    }
    url_param_base = urllib.parse.urlencode(url_param_dict)
    url_param_base = url_param_base + '&$format=json&$top=%d&$skip='%(num)
    for skip in range(0, num*step, num):
        url_param = url_param_base + str(skip)
        url = BING_URL + url_param
        response = requests.get(url,
                                auth=(MS_ACCTKEY, MS_ACCTKEY),
                                headers={'User-Agent': 'My API Robot'})
        response = response.json()
        urllist = [item['MediaUrl'] for item in response['d']['results']]
        download_urllist(urllist, skip)
Please refer to Struggle for the details of the process.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy
import os
import sys
import cv2
#Specify the definition file in the OpenCV package
CASCADE_PATH = '/path/to/versions/anaconda3-4.1.1/pkgs/opencv3-3.1.0-py35_0/share/OpenCV/haarcascades/haarcascade_frontalface_alt.xml'
# 3.Directory saved in
INPUT_DIR_PATH = '/path/to/talent/'
#Directory for storing cropped images
OUTPUT_DIR_PATH = '/path/to/talent_cutout/'
#Image file name format
#Since multiple images may be cut out from one image, add serial numbers.
OUTPUT_FILE_FMT = '%s%s_%d%s'
COLOR = (255, 255, 255)
files = os.listdir(INPUT_DIR_PATH)
for file in files:
    input_image_path = INPUT_DIR_PATH + file
    #File reading
    image = cv2.imread(input_image_path)
    #Grayscale conversion
    try:
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except cv2.error:
        continue
    #Acquire the features of the cascade classifier
    cascade = cv2.CascadeClassifier(CASCADE_PATH)
    #Execution of object recognition (face recognition)
    facerect = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1))
    if len(facerect) > 0:
        #Saving recognition results
        i = 1
        for rect in facerect:
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]
            path, ext = os.path.splitext(os.path.basename(file))
            output_image_path = OUTPUT_FILE_FMT % (OUTPUT_DIR_PATH, count, i, ext)
            try:
                im = cv2.resize(image[y:y+h, x:x+w], (96, 96))
                cv2.imwrite(output_image_path, im)
            except cv2.error:
                print(file)
                continue
            i += 1
It will be saved with the following file name.
7_3.jpeg
6_1.jpeg
4_1.jpeg
3_1.jpeg
2_1.jpeg
...
Change the QUERY and ʻOUTPUT_DIR_PATHof the program in 3. to run it.  This time, I tried to execute it withQUERY` called" general public ".
QUERY = 'Ordinary people'
OUTPUT_DIR_PATH = '/path/to/other_talent/'
I will omit it because it is the same process as 4.
Create a dataset. Label the image file of your favorite celebrity with "1" and sort it randomly.
$ ls -la /path/to/talent_cutout/*.* | awk '{print $9" 1"}' | gsort -R > talent.txt
Divide 80% into learning data and 20% into test data. (The following is divided into 752 and 189 because the total number of files was 941.)
$ head -752 talent.txt > talent_train.txt
$ tail -189 talent.txt > talent_test.txt
Similarly, images other than favorite entertainers are also labeled as "2" and divided into learning data (commons_train.txt) and test data (commons_test.txt). Each training data and test data are combined and randomly sorted.
$ cat commons_train.txt talent_train.txt | gsort -R > train.txt
$ cat commons_test.txt talent_test.txt | gsort -R > test.txt
The contents of the file are as follows.
$ head -5 train.txt
/path/to/other_talent_cutout/152_16.jpeg 2
/path/to/talent_cutout/371_1.jpg 1
/path/to/talent_cutout/349_1.jpg 1
/path/to/talent_cutout/523_2.jpg 1
/path/to/other_talent_cutout/348_2.jpeg 2
Let Tensorflow learn. TensorFlow To learn from a large number of images ... ~ (almost) solution ~ --Qiita was used as a reference.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * 3
flags = tf.app.flags
FLAGS = flags.FLAGS
#The path of the file to save the learning result
flags.DEFINE_string('save_model', '/path/to/model.ckpt', 'File name of model data')
#Training data path
flags.DEFINE_string('train', '/path/to/train.txt', 'File name of train data.')
#Test data path
flags.DEFINE_string('test', '/path/to/test.txt', 'File name of test data.')
flags.DEFINE_string('train_dir', './log_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 100, 'Number of steps to run trainer.')
flags.DEFINE_integer(
    'batch_size',
    10,
    'Batch size'
    'Must divide evenly into the dataset sizes.'
)
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    def max_pool_2x2(x):
        return tf.nn.max_pool(
            x,
            ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1],
            padding='SAME'
        )
    x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)
    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])
    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return y_conv
def loss(logits, labels):
    cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits, 1e-10, 1.0)))
    tf.scalar_summary('cross_entropy', cross_entropy)
    return cross_entropy
def training(loss, learning_rate):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return train_step
def accuracy(logits, labels):
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    tf.scalar_summary('accuracy', accuracy)
    return accuracy
if __name__ == '__main__':
    with open(FLAGS.train, 'r') as f: # train.txt
        train_image = []
        train_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            train_image.append(img.flatten().astype(np.float32) / 255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            train_label.append(tmp)
        train_image = np.asarray(train_image)
        train_label = np.asarray(train_label)
        train_len = len(train_image)
    with open(FLAGS.test, 'r') as f:
        test_image = []
        test_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            test_image.append(img.flatten().astype(np.float32) / 255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            test_label.append(tmp)
        test_image = np.asarray(test_image)
        test_label = np.asarray(test_label)
        test_len = len(test_image)
    with tf.Graph().as_default():
        images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
        labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
        keep_prob = tf.placeholder('float')
        logits = inference(images_placeholder, keep_prob)
        loss_value = loss(logits, labels_placeholder)
        train_op = training(loss_value, FLAGS.learning_rate)
        acc = accuracy(logits, labels_placeholder)
        saver = tf.train.Saver()
        sess = tf.Session()
        sess.run(tf.initialize_all_variables())
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
        if train_len % FLAGS.batch_size is 0:
            train_batch = train_len / FLAGS.batch_size
        else:
            train_batch = (train_len / FLAGS.batch_size) + 1
            print('train_batch = ' + str(train_batch))
        for step in range(FLAGS.max_steps):
            for i in range(int(train_batch)):
                batch = FLAGS.batch_size * i
                batch_plus = FLAGS.batch_size * (i + 1)
                if batch_plus > train_len:
                    batch_plus = train_len
                sess.run(train_op, feed_dict={
                    images_placeholder: train_image[batch: batch_plus],
                    labels_placeholder: train_label[batch: batch_plus],
                    keep_prob: 0.5
                })
            if step % 10 == 0:
                train_accuracy = 0.0
                for i in range(int(train_batch)):
                    batch = FLAGS.batch_size * i
                    batch_plus = FLAGS.batch_size * (i + 1)
                    if batch_plus > train_len: batch_plus = train_len
                    train_accuracy += sess.run(acc, feed_dict={
                        images_placeholder: train_image[batch: batch_plus],
                        labels_placeholder: train_label[batch: batch_plus],
                        keep_prob: 1.0})
                    if i is not 0: train_accuracy /= 2.0
                print('step %d, training accuracy %g' % (step, train_accuracy))
    if test_len % FLAGS.batch_size is 0:
        test_batch = test_len / FLAGS.batch_size
    else:
        test_batch = (test_len / FLAGS.batch_size) + 1
        print('test_batch = ' + str(test_batch))
    test_accuracy = 0.0
    for i in range(int(test_batch)):
        batch = FLAGS.batch_size * i
        batch_plus = FLAGS.batch_size * (i + 1)
        if batch_plus > train_len:
            batch_plus = train_len
        test_accuracy += sess.run(
            acc,
            feed_dict={
                images_placeholder: test_image[batch:batch_plus],
                labels_placeholder: test_label[batch:batch_plus],
                keep_prob: 1.0
            }
        )
        if i is not 0:
            test_accuracy /= 2.0
    print('test accuracy %g' % (test_accuracy))
    save_path = saver.save(sess, FLAGS.save_model)
The learning result is saved in /path/to/model.ckpt.
Again, I referred to TensorFlow to learn from a large number of images ... ~ (almost) solution ~ --Qiita.
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *
NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * 3
# 2.Directory for saving images cropped with
DIR_PATH = '/path/to/cutout/'
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('readmodels', '/path/to/model.ckpt', 'File name of model data')
def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    def max_pool_2x2(x):
        return tf.nn.max_pool(
            x,
            ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1],
            padding='SAME'
        )
    x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)
    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])
    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return y_conv
if __name__ == '__main__':
    test_image = []
    test_image_name = []
    files = os.listdir(DIR_PATH)
    for file in files:
        if file == '.DS_Store':
            continue
        img = cv2.imread(DIR_PATH + file)
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        test_image.append(img.flatten().astype(np.float32) / 255.0)
        test_image_name.append(file)
    test_image = np.asarray(test_image)
    images_placeholder = tf.placeholder('float', shape=(None, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder('float', shape=(None, NUM_CLASSES))
    keep_prob = tf.placeholder('float')
    logits = inference(images_placeholder, keep_prob)
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    saver.restore(sess,FLAGS.readmodels)
    for i in range(len(test_image)):
        pr = logits.eval(feed_dict={
            images_placeholder: [test_image[i]],
            keep_prob: 1.0
        })[0]
        pred = np.argmax(pr)
        if pred == 1:
            #When judged to be a favorite entertainer
            print('%s,%f' % (test_image_name[i], pr[pred] * 100.0))
The result will be output to the standard output, so please redirect to a file as appropriate. The results are sorted in descending order of score and output to complete!
$ cat result.csv | sort -r -t, -k 2 | head -5
    1xxxx1_1.jpg,0.5406388165003011
    1xxxx1_2.jpg,0.5350551152698707
    1xxxx6_1.jpg,0.5310078821076752
    1xxxx2_1.jpg,0.5183026050695199
    1xxxx0_1.jpg,0.5130400958800978
I'm not sure, but we introduced it as an example of our efforts in our subcommittee.
Recommended Posts