――There are multiple libraries in Deep Learning, but TensorFlow seems to have a notation that tends to be complicated when creating a model compared to other libraries. --This time, TF-Slim created the model previously. I tried refactoring using a library called contrib / slim). ――Refactoring is just replacing the notation, and if there is a Python convention, it is ignored and line breaks are included. ――Please note that it is a little difficult to read because the classes are not divided only for the model.
slim_network.py
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
import tensorflow.contrib.slim as slim
#Number of identification labels(This time Zuckerberg:0,Elon Musk: 1,Bill Gates:2 so 3)
NUM_CLASSES = 3
#Image size when learning(px)
IMAGE_SIZE = 28
#Number of dimensions of the image(28* 28*Color(?))
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3
#Set the path of data required for learning and the scale of learning
#TensorFlow built-in function that can register parameter settings, default values and help screen explanations
flags = tf.app.flags
FLAGS = flags.FLAGS
#Training data
flags.DEFINE_string('train', '/Users/neriai/Develops/workspace/dir/train/data.txt', 'File name of train data')
#Verification test data
flags.DEFINE_string('test', '/Users/neriai/Develops/workspace/dir/test/data.txt', 'File name of train data')
#Folder where data is placed
flags.DEFINE_string('train_dir', '/Users/neriai/Develops/workspace/dir/data', 'Directory to put the training data.')
#Number of data learning training trials
flags.DEFINE_integer('max_steps', 100, 'Number of steps to run trainer.')
#How many images to use in one learning
flags.DEFINE_integer('batch_size', 20, 'Batch size Must divide evenly into the dataset sizes.')
#If the learning rate is too small, learning will not proceed, and if it is too large, the error will not converge or diverge.
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
#AI learning model part(neural network)To create
# images_placeholder:Image placeholder, keep_prob:dropout rate place_holder becomes an argument
#Outputs and returns the probability of each label for the input image
def model(x_image, keep_prob):
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
biases_initializer=tf.constant_initializer(0.1)
):
with slim.arg_scope([slim.max_pool2d], padding='SAME'):
#Create the first layer of the convolution layer
conv1 = slim.conv2d(x_image, 32, [5, 5])
#Creation of pooling layer 1
pool1 = slim.max_pool2d(conv1, [2, 2])
#Creation of the second layer of the convolution layer
conv2 = slim.conv2d(pool1, 64, [5, 5])
#Creation of pooling layer 2
pool2 = slim.max_pool2d(conv2, [2, 2])
#Creation of fully connected layer 1
pool2_flat = slim.flatten(pool2)
fc1 = slim.fully_connected(pool2_flat, 1024)
#dropout settings
dropout = slim.dropout(fc1, keep_prob)
#Creation of fully connected layer 2
y_conv = slim.fully_connected(dropout, NUM_CLASSES, activation_fn=None)
# #Normalization with softmax function
y_conv = tf.nn.softmax(y_conv)
return y_conv
#Calculate how much "error" there was between the prediction result and the correct answer
#logits is the calculation result: float - [batch_size, NUM_CLASSES]
#labels is the correct label: int32 - [batch_size, NUM_CLASSES]
def loss(labels_placeholder, model):
#Calculate how much "error" there was between the prediction result and the correct answer
cross_entropy = -tf.reduce_sum(labels_placeholder*tf.log(model))
#Specify to display in TensorBoard
tf.summary.scalar("cross_entropy", cross_entropy)
#Error rate value(cross_entropy)return it
return cross_entropy
#error(loss)Train a learning model designed using error backpropagation based on
#I'm not sure what's happening behind the scenes, but the weights of each layer of the learning model(w)And so on
#Understanding that it is optimized and adjusted based on the error(?)
# (The explanation of the book "Is artificial intelligence surpassing humans?")
def training(learning_rate, loss):
#error(loss)Train a learning model designed using error backpropagation based on
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
#Calculate the correct answer rate of the prediction result given by the learning model at inference
def accuracy(model, labels_placeholder):
#Compare whether the prediction label and the correct label are equal. Returns True if they are the same
correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(labels_placeholder, 1))
#boolean correct_Calculate the correct answer rate by changing prediction to float
# false:0,true:Convert to 1 and calculate
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#Set to display on TensorBoard
tf.summary.scalar("accuracy", accuracy)
return accuracy
if __name__ == '__main__':
#Open file
f = open(FLAGS.train, 'r')
#Array to put data
train_image = []
train_label = []
for line in f:
#Separated with spaces except line breaks
line = line.rstrip()
l = line.split()
#Read data and reduce to 28x28
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
#0 after lining up-Set to a float value of 1
train_image.append(img.flatten().astype(np.float32)/255.0)
#Label 1-of-Prepare with k method
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
#Convert to numpy format
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
f.close()
f = open(FLAGS.test, 'r')
test_image = []
test_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
test_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
test_label.append(tmp)
test_image = np.asarray(test_image)
test_label = np.asarray(test_label)
f.close()
#Specify the scope to be output to the graph of TensorBoard
with tf.Graph().as_default() as graph:
#Tensor for inserting images(28*28*3(IMAGE_PIXELS)Any number of dimensional images(None)I have a minute)
images_placeholder = tf.placeholder(tf.float32, shape=(None, IMAGE_PIXELS))
#Tensor to put a label(3(NUM_CLASSES)Any number of dimensional labels(None)Enter minutes)
labels_placeholder = tf.placeholder(tf.float32, shape=(None, NUM_CLASSES))
#28px image data input in vector format*Return to 28px image(?)。
#This time it's a color image, so 3(1 for monochrome)
x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])
#Temporary Tensor to put dropout rate
keep_prob = tf.placeholder(tf.float32)
# model()To create a model
model = model(x_image, keep_prob)
# loss()To calculate the loss
loss = loss(labels_placeholder, model)
# training()To train and adjust the parameters of the learning model
train_step = training(FLAGS.learning_rate, loss)
#Accuracy calculation
accuracy = accuracy(model, labels_placeholder)
#Ready to save
saver = tf.train.Saver()
#Creating a Session(TensorFlow calculations must be done in an absolute Session)
sess = tf.Session()
#Variable initialization(Initialize when starting Session)
sess.run(tf.global_variables_initializer())
#TensorBoard display settings(Tensor Board Declarative?)
summary_step = tf.summary.merge_all()
# train_Specify the path to output the TensorBoard log with dir
summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
#Actually max_Execute training as many times as step
for step in range(FLAGS.max_steps):
for i in range(len(train_image)/FLAGS.batch_size):
# batch_Training for size images
batch = FLAGS.batch_size*i
# feed_Specify the data to put in the placeholder with dict
sess.run(
train_step,
feed_dict={
images_placeholder: train_image[batch:batch+FLAGS.batch_size],
labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
keep_prob: 0.5
}
)
#Calculate the accuracy after each step
train_accuracy = sess.run(
accuracy,
feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0
}
)
print "step %d, training accuracy %g"%(step, train_accuracy)
#Add a value to be displayed on the TensorBoard after each step
summary_str = sess.run(
summary_step,
feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0
}
)
summary_writer.add_summary(summary_str, step)
#Display accuracy for test data after training
print "test accuracy %g"%sess.run(
accuracy,
feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0
}
)
#Learn the data and save the final model
# "model.ckpt"Is the output file name
save_path = saver.save(sess, "model.ckpt")
Before
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
After
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
biases_initializer=tf.constant_initializer(0.1)
):
with slim.arg_scope([slim.max_pool2d], padding='SAME'):
Before
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
After
conv1 = slim.conv2d(x_image, 32, [5, 5])
Before
pool1 = max_pool_2x2(conv1)
After
pool1 = slim.max_pool2d(conv1, [2, 2])
Before
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
After
conv2 = slim.conv2d(pool1, 64, [5, 5])
Before
pool2 = max_pool_2x2(conv2)
After
pool2 = slim.max_pool2d(conv2, [2, 2])
Before
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
pool2_flat = tf.reshape(pool2, [-1, 7*7*64])
fc1 = tf.nn.relu(tf.matmul(pool2_flat, W_fc1) + b_fc1)
After
pool2_flat = slim.flatten(pool2)
fc1 = slim.fully_connected(pool2_flat, 1024)
Before
dropout = tf.nn.dropout(fc1, keep_prob)
After
dropout = slim.dropout(fc1, keep_prob)
Before
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
y_conv = tf.nn.softmax(tf.matmul(dropout, W_fc2) + b_fc2)
After
y_conv = slim.fully_connected(dropout, NUM_CLASSES, activation_fn=None)
y_conv = tf.nn.softmax(y_conv)
――It feels a little slimmer, but it may be effective when it comes to the SSD class. ――It's more like a puzzle than an understanding of replacement. ――It seemed that other things could be replaced, but since various errors and calculated values were broken, I kept it only inside the model. ――I feel that the plagiarist was a little my own.
Recommended Posts