--A few years ago, I did a tutorial on MNIST CNN
for TensorFlow
and left it alone. This is a common story.
――This time, I modified the tutorial to create a learning model for face images.
--The class and the 10
image size are the same as 28x28
. The accuracy is around 80%
. Well, it's a play, so I'm satisfied.
--The complete source is here.
--For the model itself, I referred to the following.
--Reference: TensorFlow mnist_deep.py
--num_classes
ʻimg_rows ʻimg_cols
uses the value of the configuration file. Added support for changing the number of classes and image size.
def model():
"""MNIST reference model."""
num_classes = len(CLASSES)
img_rows, img_cols = IMG_ROWS, IMG_COLS
x = tf.compat.v1.placeholder(tf.float32, [None, img_rows*img_cols])
with tf.name_scope('reshape'):
x_image = tf.reshape(x, [-1, img_rows, img_cols, 1])
with tf.name_scope('conv1'):
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
with tf.name_scope('pool1'):
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2'):
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2'):
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1'):
W_fc1 = weight_variable([int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
with tf.name_scope('dropout'):
keep_prob = tf.compat.v1.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, rate=1-keep_prob)
with tf.name_scope('fc2'):
W_fc2 = weight_variable([1024, num_classes])
b_fc2 = bias_variable([num_classes])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return x, y_conv, keep_pro
――The following also uses the tutorial.
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
"""max_pool_2x2 downsamples a feature map by 2X."""
return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.random.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
――We are making it possible to read the previously created data set. --Also, the batch size and epoch can be changed.
def train(datasets, batch_size=128, epochs=12):
"""Learning."""
x, y_conv, keep_prob = model()
y_ = tf.compat.v1.placeholder(tf.float32, [None, 10])
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
--Almost the same as the tutorial, but added saving of the model. --The save location is specified from the configuration file.
saver = tf.compat.v1.train.Saver()
os.makedirs(os.path.dirname(os.path.abspath(MODEL_FILE)), exist_ok=True)
――We modified it from the tutorial so that the accuracy can be displayed and the model can be saved for each epoch.
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
next_epoch = 1
print('epoch, train accuracy, test accuracy')
while datasets.train.epochs_completed < epochs:
train_images, train_labels = datasets.train.next_batch(batch_size)
sess.run(train_step, feed_dict={x: train_images, y_: train_labels, keep_prob: 0.5})
if datasets.train.epochs_completed == next_epoch:
train_accuracy = accuracy.eval(feed_dict={x: datasets.train.images, y_: datasets.train.labels, keep_prob: 1.0})
test_accuracy = accuracy.eval(feed_dict={x: datasets.test.images, y_: datasets.test.labels, keep_prob: 1.0})
print('{:d}, {:.4f}, {:.4f}'.format(datasets.train.epochs_completed, train_accuracy, test_accuracy))
saver.save(sess, MODEL_FILE)
next_epoch = datasets.train.epochs_completed + 1
--Training is performed by specifying the --train
option.
--Batch size is 128
and epoch is 120
.
$ python face_deep.py --train
epoch, train accuracy, test accuracy
1, 0.4580, 0.4090
2, 0.5593, 0.4880
abridgement
119, 1.0000, 0.8110
120, 1.0000, 0.792
--The image must be numpy
.
--You can change the result type with dtype
.
def predict(images, dtype=None):
"""The inference result is numpy, int,Switch argmax with dtype."""
tf.compat.v1.reset_default_graph()
x, y_conv, keep_prob = model()
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
saver = tf.compat.v1.train.Saver()
saver.restore(sess, MODEL_FILE)
results = sess.run(tf.nn.softmax(y_conv), feed_dict={x: images, keep_prob: 1.0})
results = np.array(results * 100, dtype=np.uint8)
if dtype == 'int':
results = [[int(y) for y in result] for result in results]
if dtype == 'argmax':
results = [np.argmax(y) for y in results]
return results
--Inference is performed with no options.
――It is the operation check level to the last. I am thinking of using it from a web application separately.
--The following is the inference result of the first 10 arrays of the test image of the dataset. The first few hundreds are labeled 0
, so they seem to match.
$ python face_deep.py
abridgement
[[100 0 0 0 0 0 0 0 0 0]
[ 99 0 0 0 0 0 0 0 0 0]
[ 99 0 0 0 0 0 0 0 0 0]
[ 0 99 0 0 0 0 0 0 0 0]
[ 99 0 0 0 0 0 0 0 0 0]
[ 97 0 0 0 0 0 0 0 0 1]
[ 99 0 0 0 0 0 0 0 0 0]
[ 0 99 0 0 0 0 0 0 0 0]
[ 99 0 0 0 0 0 0 0 0 0]
[ 36 63 0 0 0 0 0 0 0 0]]
--Modified the tutorial of MNIST CNN
of TensorFlow
to learn and infer facial images.
――Since it is a study level, it was enough to be able to perform learning and reasoning.
--Next time, let's try inference from the Flask
web application.
Recommended Posts