Introduction

Hi, this is ikki. Finally, I solved the problem of feeding a large number of images to TensorFlow. When I solved it, it was really embarrassing. .. .. Copy and paste that you understand halfway is not good!

Previous problems

Here posted a program to learn images prepared by myself, but it is not possible to learn tens of thousands of image data as it is. It was.

So, first of all, I will post the modified program.

`train.py`


#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform

NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('save_model', 'models/model.ckpt', 'File name of model data')
flags.DEFINE_string('train', 'training/train.txt', 'File name of train data')
flags.DEFINE_string('test', 'training/test.txt', 'File name of test data')
flags.DEFINE_string('train_dir', '/tmp/pict_data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 201, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 256, 'Batch size'
                     'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')

def inference(images_placeholder, keep_prob):
    ###############################################################
    #Functions that create models for deep learning
    #argument: 
    #  images_placeholder: inputs()Placeholder of the image created in
    #  keep_prob:dropout rate place_holder
    #Return value:
    #  cross_entropy:Model calculation result
    ###############################################################
    #Weight with standard deviation 0.Initialized with a normal distribution of 1
    def weight_variable(shape):
      initial = tf.truncated_normal(shape, stddev=0.1)
      return tf.Variable(initial)
    #Bias standard deviation 0.Initialized with a normal distribution of 1
    def bias_variable(shape):
      initial = tf.constant(0.1, shape=shape)
      return tf.Variable(initial)
    #Creating a convolution layer
    def conv2d(x, W):
      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    #Creating a pooling layer
    def max_pool_2x2(x):
      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')
    #Transform input to 28x28x3
    x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
    #Creation of convolution layer 1
    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
    #Creation of pooling layer 1
    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    #Creation of convolution layer 2
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    #Creation of pooling layer 2
    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)
    #Creation of fully connected layer 1
    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7*7*64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        #dropout settings
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    #Creation of fully connected layer 2
    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])
    #Normalization with softmax function
    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return y_conv

def loss(logits, labels):
    ###############################################################
    #Function to calculate loss
    #argument: 
    #  logits:Logit tensor, float - [batch_size, NUM_CLASSES]
    #  labels:Label tensor, int32 - [batch_size, NUM_CLASSES]
    #Return value:
    #  cross_entropy:Model calculation result
    ###############################################################
    cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits,1e-10,1.0)))
    tf.scalar_summary("cross_entropy", cross_entropy)
    return cross_entropy

def training(loss, learning_rate):
    ###############################################################
    #Functions that define training ops
    #argument: 
    #  loss:Loss tensor, loss()Result of
    #  learning_rate:Learning coefficient
    #Return value:
    #  train_step:Training Op
    ###############################################################
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return train_step

def accuracy(logits, labels):
    ###############################################################
    #Correct answer rate(accuracy)Function to calculate
    #argument: 
    #  logits: inference()Result of
    #  labels:Label tensor, int32 - [batch_size, NUM_CLASSES]
    #Return value:
    #  accuracy:Correct answer rate(float)
    ###############################################################
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.scalar_summary("accuracy", accuracy)
    return accuracy

if __name__ == '__main__':
    #Open file
    with open(FLAGS.train, 'r') as f: # train.txt
        train_image = []
        train_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            train_image.append(img.flatten().astype(np.float32)/255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            train_label.append(tmp)
        train_image = np.asarray(train_image)
        train_label = np.asarray(train_label)
        train_len = len(train_image)

    with open(FLAGS.test, 'r') as f: # test.txt
        test_image = []
        test_label = []
        for line in f:
            line = line.rstrip()
            l = line.split()
            img = cv2.imread(l[0])
            img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            test_image.append(img.flatten().astype(np.float32)/255.0)
            tmp = np.zeros(NUM_CLASSES)
            tmp[int(l[1])] = 1
            test_label.append(tmp)
        test_image = np.asarray(test_image)
        test_label = np.asarray(test_label)
        test_len = len(test_image)
    
    with tf.Graph().as_default():
        images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
        labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
        keep_prob = tf.placeholder("float")

        logits = inference(images_placeholder, keep_prob)
        loss_value = loss(logits, labels_placeholder)
        train_op = training(loss_value, FLAGS.learning_rate)
        acc = accuracy(logits, labels_placeholder)

        saver = tf.train.Saver()
        sess = tf.Session()
        sess.run(tf.initialize_all_variables())
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)

        #Execution of training
        if train_len % FLAGS.batch_size is 0:
            train_batch = train_len/FLAGS.batch_size
        else:
            train_batch = (train_len/FLAGS.batch_size)+1
            print "train_batch = "+str(train_batch)
        for step in range(FLAGS.max_steps):
            for i in range(train_batch):
                batch = FLAGS.batch_size*i
                batch_plus = FLAGS.batch_size*(i+1)
                if batch_plus > train_len: batch_plus = train_len
                # feed_Specify the data to put in the placeholder with dict
                sess.run(train_op, feed_dict={
                  images_placeholder: train_image[batch:batch_plus],
                  labels_placeholder: train_label[batch:batch_plus],
                  keep_prob: 0.5})
                  
            if step % 10 == 0:
                train_accuracy = 0.0
                for i in range(train_batch):
                    batch = FLAGS.batch_size*i
                    batch_plus = FLAGS.batch_size*(i+1)
                    if batch_plus > train_len: batch_plus = train_len
                    train_accuracy += sess.run(acc, feed_dict={
                        images_placeholder: train_image[batch:batch_plus],
                        labels_placeholder: train_label[batch:batch_plus],
                        keep_prob: 1.0})
                    if i is not 0: train_accuracy /= 2.0
                #Add a value to be displayed on the TensorBoard after each 10 steps
                #summary_str = sess.run(summary_op, feed_dict={
                #    images_placeholder: train_image,
                #    labels_placeholder: train_label,
                #    keep_prob: 1.0})
                #summary_writer.add_summary(summary_str, step)
                print "step %d, training accuracy %g"%(step, train_accuracy)

    if test_len % FLAGS.batch_size is 0:
        test_batch = test_len/FLAGS.batch_size
    else:
        test_batch = (test_len/FLAGS.batch_size)+1
        print "test_batch = "+str(test_batch)
    test_accuracy = 0.0
    for i in range(test_batch):
        batch = FLAGS.batch_size*i
        batch_plus = FLAGS.batch_size*(i+1)
        if batch_plus > train_len: batch_plus = train_len
        test_accuracy += sess.run(acc, feed_dict={
                images_placeholder: test_image[batch:batch_plus],
                labels_placeholder: test_label[batch:batch_plus],
                keep_prob: 1.0})
        if i is not 0: test_accuracy /= 2.0
    print "test accuracy %g"%(test_accuracy)
    save_path = saver.save(sess, FLAGS.save_model)

For the time being, I think it works with copy and paste. (Let's copy and paste after understanding!) There are minor corrections, but the main changes that led to the solution are train_accuracy and test_accuracy, which calculate the accuracy. I changed to batch processing. Well, why did you do batch processing where you learned and didn't do that when you calculated accuracy ... ~~ Perhaps some great people will laugh when they see the program, saying, "Why are you doing this? You'll get a memory error w Pupupu ~ www". In fact, I laughed the moment I noticed. ~~ Aside from the negatives, it will take time no matter how many images you eat, but I think it will teach you well.

I will write a little more about how to handle this program.

Folder structure

Place train.txt and test.txt in the same folder as this train.py. The contents of each text are as follows

`lang:train.txt&test.txt`


/home/picture/images/image1.jpg 0
/home/picture/images/image2.jpg 0
/home/picture/images/image3.jpg 1
/home/picture/images/image4.jpg 2
/home/picture/images/image5.jpg 1
/home/picture/images/image6.jpg 1
            :
            :

Feeling like that. This time it is classified into 3 classes, so Image file name Class number name (0 ~ 2) will do. I always write with an absolute path in such cases. This text file is difficult to write by yourself, so let's write a program to generate it automatically. I will post it if requested, but ... Also, this time, I created a folder called training in the same folder as train.py and put both text files in it.

Also, if you create a folder called models, a .ckpt file of the learning results will be created.

How to use learning results

This is just an example.

`test.py`


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import numpy as np
import tensorflow as tf
import cv2
import tensorflow.python.platform
from types import *

NUM_CLASSES = 3
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('readmodels', 'models/model.ckpt', 'File name of model data')

def inference(images_placeholder, keep_prob):
    def weight_variable(shape):
      initial = tf.truncated_normal(shape, stddev=0.1)
      return tf.Variable(initial)

    def bias_variable(shape):
      initial = tf.constant(0.1, shape=shape)
      return tf.Variable(initial)

    def conv2d(x, W):
      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')
    
    x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

    with tf.name_scope('conv1') as scope:
        W_conv1 = weight_variable([5, 5, 3, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    with tf.name_scope('pool1') as scope:
        h_pool1 = max_pool_2x2(h_conv1)
    
    with tf.name_scope('conv2') as scope:
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2') as scope:
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1') as scope:
        W_fc1 = weight_variable([7*7*64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2') as scope:
        W_fc2 = weight_variable([1024, NUM_CLASSES])
        b_fc2 = bias_variable([NUM_CLASSES])

    with tf.name_scope('softmax') as scope:
        y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    return y_conv

if __name__ == '__main__':
    test_image = []
    for i in range(1, len(sys.argv)):
        img = cv2.imread(sys.argv[i])
        print img
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        test_image.append(img.flatten().astype(np.float32)/255.0)
    test_image = np.asarray(test_image)

    images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
    labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
    keep_prob = tf.placeholder("float")

    logits = inference(images_placeholder, keep_prob)
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    saver.restore(sess,FLAGS.readmodels)

    for i in range(len(test_image)):
        pr = logits.eval(feed_dict={ 
            images_placeholder: [test_image[i]],
            keep_prob: 1.0 })[0]
        pred = np.argmax(pr)
        print pr
        print pred
    print "finish"

python test.py image file If you execute with, you should get the recognition result. Especially the explanation is good.

Problems that have not been solved yet

Actually, train.py is also compatible with Tensorboard, but I am struggling because the summary part that draws the graph cannot be batch processed. The problem is that the type of the value returned by sess.run (summary_op, ~~) is str type, and there are some unknown values for Tensorboard. ** It is not possible to say "add up the results calculated for each batch and draw a graph (by averaging)" **.

Please let me know if you have any solution or if you notice it. .. ..

[PYTHON] TensorFlow To learn from a large number of images ... ~ (almost) solution ~