I thought after doing the MNIST tutorial on TensorFlow. .. .. "It's not very fun because only this accuracy is displayed ...."
I "What number do you think this is?"
PC "I think this is 2 !!" I "correct answer! My partner !!"
That's why I made a program that returns a prediction for a handwritten number image when it is inserted.
I just changed the code of the MNIST tutorial a little, so if I can do Deep MNIST for Experts must.
Since I am a beginner of TensorFlow for the first time on the 5th, I think that there are mistakes, but in that case I would appreciate it if you could give me advice etc. in the comments. But for the time being, the program works fine.
Only this. Right? Isn't it easy?
train_mnist.py
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import cv2
import numpy as np
#Download MNIST data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
NUM_CLASSES = 10 #Number of model classes
sess = tf.InteractiveSession()
def interence(imegs_placeholder, keep_prob):
"""Functions that create predictive models
argument:
images_placeholder:Image placeholder
keep_prob:dropout rate placeholder
Return value:
y_conv:Probability of each class(Something like)
with tf.name_scope("xxx") as scope:
Now it will appear as a bunch of nodes on the TensorBoard
"""
#Weight with standard deviation 0.Initialized with a normal distribution of 1
def weight_variable(shape):
inital = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(inital)
#Bias standard deviation 0.Initialized with a normal distribution of 1
def bias_variable(shape):
inital = tf.constant(0.1, shape=shape)
return tf.Variable(inital)
#Convolution layer
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="SAME")
#Pooling layer
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
#28 input layers*28*Transformed to 1
x_image = tf.reshape(imegs_placeholder, [-1, 28, 28, 1])
#Creation of convolution layer 1
with tf.name_scope("conv1") as scope:
W_conv1 = weight_variable([3,3,1,16])
b_conv1 = bias_variable([16])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#Creation of convolution layer 2
with tf.name_scope("conv2") as scope:
W_conv2 = weight_variable([3,3,16,16])
b_conv2 = bias_variable([16])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
#Creation of pooling layer 1
with tf.name_scope("pool1") as scope:
h_pool1 = max_pool_2x2(h_conv2)
#Creation of convolution layer 3
with tf.name_scope("conv3") as scope:
W_conv3 = weight_variable([3,3,16,32])
b_conv3 = bias_variable([32])
h_conv3 = tf.nn.relu(conv2d(h_pool1, W_conv3) + b_conv3)
#Creation of convolution layer 4
with tf.name_scope("conv4") as scope:
W_conv4 = weight_variable([3,3,32,32])
b_conv4 = bias_variable([32])
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
#Creation of pooling layer 2
with tf.name_scope("pool2") as scope:
h_pool2 = max_pool_2x2(h_conv4)
#Creation of convolution layer 5
with tf.name_scope("conv5") as scope:
W_conv5 = weight_variable([3,3,32,64])
b_conv5 = bias_variable([64])
h_conv5 = tf.nn.relu(conv2d(h_pool2, W_conv5) + b_conv5)
#Creation of convolution layer 6
with tf.name_scope("conv6") as scope:
W_conv6 = weight_variable([3,3,64,64])
b_conv6 = bias_variable([64])
h_conv6 = tf.nn.relu(conv2d(h_conv5, W_conv6) + b_conv6)
#Creation of pooling layer 3
with tf.name_scope("pool3") as scope:
h_pool3 = max_pool_2x2(h_conv6)
#Creation of bond layer 1
with tf.name_scope("fc1") as scope:
W_fc1 = weight_variable([4*4*64, 1024])
b_fc1 = bias_variable([1024])
h_pool3_flat = tf.reshape(h_pool3, [-1, 4*4*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
#dropout1 setting
h_fc_1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Creation of bond layer 2
with tf.name_scope("fc2") as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
#Normalization with softmax function
with tf.name_scope("softmax") as scope:
y_conv = tf.nn.softmax(tf.matmul(h_fc_1_drop, W_fc2) + b_fc2)
#Returns something like the probability of each label
return y_conv
def loss(logits, labels):
"""Function to calculate loss
argument:
logits:Logit tensor, float - [batch_size, NUM_CLASSES]
labels:Label tensor, int32 - [batch_size, NUM_CLASSES]
Return value:
cross_entropy:Cross entropy tensor, float
"""
#Calculation of cross entropy
cross_entropy = -tf.reduce_sum(labels*tf.log(logits))
#Specify to display in TensorBoard
tf.summary.scalar("cross_entropy", cross_entropy) #v0.12 to tf.summary.scalar()Became#reference: https://teratail.com/questions/68531
return cross_entropy
def training(loss, learning_rate):
"""Functions that define training ops
argument:
loss:Loss tensor, loss()Result of
learning_rate:Learning coefficient
Return value:
train_step:Training op
"""
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
def accuracy(logits, labels):
"""Correct answer rate(accuracy)Function to calculate
argument:
logits: inference()Result of
labels:Label tensor, int32 - [batch_size, NUM_CLASSES]
Return value:
accuracy:Correct answer rate(float)
"""
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.arg_max(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.summary.scalar("accuracy", accuracy)
return accuracy
if __name__=="__main__":
with tf.Graph().as_default():
#Variable settings used in expressions
x_image = tf.placeholder("float", shape=[None, 784]) #input
y_label = tf.placeholder("float", shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
#y_label = tf.nn.softmax(tf.matmul(x_image,W)+b) # y=softmax(Wx+b)Differentiation is also done without permission
keep_prob = tf.placeholder("float")
#init_op = tf.global_variables_initializer() #Variable initialization(Must be required when using variables)
logits = interence(x_image, keep_prob) # inference()To create a model
loss_value = loss(logits, y_label) # loss()To calculate the loss
train_op = training(loss_value,1e-4) # training()Call and train (1e-4 is the learning rate)
accur = accuracy(logits, y_label) # accuracy()To calculate the accuracy
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
#Setting the value to be displayed on TensorBoard
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter("./tmp/data", graph=sess.graph)
saver = tf.train.Saver() #Added a lame OP that saves and restores all variables
init = tf.global_variables_initializer()
sess.run(init) #Initialize and execute variables
#Execution of training
for step in range(20000)
batch = mnist.train.next_batch(50)
if step % 100 == 0:
train_accury = sess.run(accur, feed_dict={x_image: batch[0], y_label: batch[1], keep_prob: 1.0})
print("step%d, train_accury : %g"%(step, train_accury))
sess.run(train_op, feed_dict={x_image: batch[0], y_label: batch[1], keep_prob:0.5})
#Add a value to be displayed on the TensorBoard after each step
summary_str = sess.run(summary_op, feed_dict={x_image: batch[0], y_label: batch[1], keep_prob: 1.0})
summary_writer.add_summary(summary_str, step)
summary_writer.flush()
#Result display
print("test accuracy : %g" %sess.run(accur, feed_dict={x_image: mnist.test.images, y_label: mnist.test.labels, keep_prob: 1.0}))
saver.save(sess, ".\ckpt\model.ckpt") #Variable data storage
The important thing here is
saver = tf.train.Saver () # Add a lame OP to save and restore all variables
saver.save (sess, ". \ Ckpt \ model.ckpt") # Save variable data
These two save the parameters when the learning is finished. Without this, tens of minutes of machine learning will start even if you just want to return the prediction of each image. I referred to this site → How to save and load Tensorflow learning parameters
input_num.py
import tensorflow as tf
import cv2
import numpy as np
NUM_CLASSES = 10
def interence(imegs_placeholder, keep_prob):
"""Functions that create predictive models
argument:
images_placeholder:Image placeholder
keep_prob:dropout rate placeholder
Return value:
y_conv:Probability of each class(Something like)
with tf.name_scope("xxx") as scope:
Now it will appear as a bunch of nodes on the TensorBoard
"""
#Weight with standard deviation 0.Initialized with a normal distribution of 1
def weight_variable(shape):
inital = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(inital)
#Bias standard deviation 0.Initialized with a normal distribution of 1
def bias_variable(shape):
inital = tf.constant(0.1, shape=shape)
return tf.Variable(inital)
#Convolution layer
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="SAME")
#Pooling layer
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
#28 input layers*28*Transformed to 1
x_image = tf.reshape(imegs_placeholder, [-1, 28, 28, 1])
#Creation of convolution layer 1
with tf.name_scope("conv1") as scope:
W_conv1 = weight_variable([3,3,1,16])
b_conv1 = bias_variable([16])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#Creation of convolution layer 2
with tf.name_scope("conv2") as scope:
W_conv2 = weight_variable([3,3,16,16])
b_conv2 = bias_variable([16])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
#Creation of pooling layer 1
with tf.name_scope("pool1") as scope:
h_pool1 = max_pool_2x2(h_conv2)
#Creation of convolution layer 3
with tf.name_scope("conv3") as scope:
W_conv3 = weight_variable([3,3,16,32])
b_conv3 = bias_variable([32])
h_conv3 = tf.nn.relu(conv2d(h_pool1, W_conv3) + b_conv3)
#Creation of convolution layer 4
with tf.name_scope("conv4") as scope:
W_conv4 = weight_variable([3,3,32,32])
b_conv4 = bias_variable([32])
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
#Creation of pooling layer 2
with tf.name_scope("pool2") as scope:
h_pool2 = max_pool_2x2(h_conv4)
#Creation of convolution layer 5
with tf.name_scope("conv5") as scope:
W_conv5 = weight_variable([3,3,32,64])
b_conv5 = bias_variable([64])
h_conv5 = tf.nn.relu(conv2d(h_pool2, W_conv5) + b_conv5)
#Creation of convolution layer 6
with tf.name_scope("conv6") as scope:
W_conv6 = weight_variable([3,3,64,64])
b_conv6 = bias_variable([64])
h_conv6 = tf.nn.relu(conv2d(h_conv5, W_conv6) + b_conv6)
#Creation of pooling layer 3
with tf.name_scope("pool3") as scope:
h_pool3 = max_pool_2x2(h_conv6)
#Creation of bond layer 1
with tf.name_scope("fc1") as scope:
W_fc1 = weight_variable([4*4*64, 1024])
b_fc1 = bias_variable([1024])
h_pool3_flat = tf.reshape(h_pool3, [-1, 4*4*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
#dropout1 setting
h_fc_1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Creation of bond layer 2
with tf.name_scope("fc2") as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
#Normalization with softmax function
with tf.name_scope("softmax") as scope:
y_conv = tf.nn.softmax(tf.matmul(h_fc_1_drop, W_fc2) + b_fc2)
#Returns something like the probability of each label
return y_conv
if __name__ == "__main__":
#Image loading
img = input("Please enter the image path>")
img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (28, 28))
ximage = img.flatten().astype(np.float32)/255.0 #Change format
#Variable settings used in expressions
x_image = tf.placeholder("float", shape=[None, 784]) #input
y_label = tf.placeholder("float", shape=[None, 10])
keep_prob = tf.placeholder("float")
logits = interence(x_image, keep_prob)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state('./ckpt')
saver.restore(sess, ckpt.model_checkpoint_path) #Reading variable data
pred = np.argmax(logits.eval(feed_dict={x_image: [ximage], keep_prob: 1.0})[0])
print(pred)
As you may have noticed here, the first half is almost the same. This is because you must first determine the variables to use before putting in the learned parameters.
** Image processing **
#Image loading img = input ("Please enter the image path>") img = cv2.imread(img, cv2.IMREAD_GRAYSCALE) img = cv2.resize(img, (28, 28)) ximage = img.flatten (). astype (np.float32) /255.0 #change format
Here, after reading the image using cv2, resize it to 28 * 28, make the format one-dimensional, and then divide the grayscale from 0 to 255 by 255. Then white [255] becomes 1 and black [0] becomes [0]. Subtle colors (gray, etc.) are floating point numbers, In other words, it was within the range of 0 to 1. This allows you to make predictions using what you learned earlier.
saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state('./ckpt') saver.restore (sess, ckpt.model_checkpoint_path) #Read variable data
Initialize the variables before loading the trained parameters saved in the first training program, and then load the parameters.
pred = np.argmax(logits.eval(feed_dict={x_image: [ximage], keep_prob: 1.0})[0]) print(pred)
Finally, the processed image, dropout rate, etc. are passed to the network, and the largest result returned is displayed.
If you run it and pass an image like this (although it may be a little different), it should look like the one below.
C:\User\...\MNIST > python input_num.py Please enter the image path> (image path) 3
This is the end of the program that returns the prediction when you enter the image path. Please try it out!
Regarding the "image" when passing image data, if the thickness of the characters is thin, it will disappear when resized to 28 * 28, so please write it fairly thick.
I also put the code on GitHub for the time being. Code for this article
Identify the anime Yuruyuri production company with TensorFlow How to save and load learning parameters of ensorflow Loading basic images of Python OpenCV (still images)
Recommended Posts