Einführung

Vor ein paar Jahren habe ich ein Tutorial über "MNIST CNN" von "TensorFlow" gemacht und es in Ruhe gelassen. Dies ist eine häufige Geschichte. ――Dieses Mal habe ich das Tutorial geändert, um ein Lernmodell für Gesichtsbilder zu erstellen.
Die Klasse und die Bildgröße "10" entsprechen der "28x28". Die Genauigkeit liegt bei 80%. Nun, es ist ein Stück, also bin ich zufrieden.
Die vollständige Quelle ist hier.

Modell-

Für das Modell selbst habe ich Folgendes erwähnt.
Referenz: TensorFlow mnist_deep.py --num_classes`` img_rows img_cols verwendet den Wert der Einstellungsdatei. Unterstützung für das Ändern der Anzahl der Klassen und der Bildgröße hinzugefügt.

def model():
    """MNIST-Referenzmodell."""

    num_classes = len(CLASSES)
    img_rows, img_cols = IMG_ROWS, IMG_COLS

    x = tf.compat.v1.placeholder(tf.float32, [None, img_rows*img_cols])

    with tf.name_scope('reshape'):
        x_image = tf.reshape(x, [-1, img_rows, img_cols, 1])

    with tf.name_scope('conv1'):
        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    with tf.name_scope('pool1'):
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('conv2'):
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    with tf.name_scope('pool2'):
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('fc1'):
        W_fc1 = weight_variable([int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64, 1024])
        b_fc1 = bias_variable([1024])

        h_pool2_flat = tf.reshape(h_pool2, [-1, int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    with tf.name_scope('dropout'):
        keep_prob = tf.compat.v1.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, rate=1-keep_prob)

    with tf.name_scope('fc2'):
        W_fc2 = weight_variable([1024, num_classes])
        b_fc2 = bias_variable([num_classes])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    return x, y_conv, keep_pro

――Das Folgende verwendet auch das Tutorial.

def conv2d(x, W):
    """conv2d returns a 2d convolution layer with full stride."""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    """max_pool_2x2 downsamples a feature map by 2X."""
    return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def weight_variable(shape):
    """weight_variable generates a weight variable of a given shape."""
    initial = tf.random.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """bias_variable generates a bias variable of a given shape."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

Lernen

Lernmodell

――Wir ermöglichen das Lesen des zuvor erstellten Datensatzes.

Auch die Chargengröße und die Epoche können geändert werden.

def train(datasets, batch_size=128, epochs=12):
    """Lernen."""

    x, y_conv, keep_prob = model()

    y_ = tf.compat.v1.placeholder(tf.float32, [None, 10])

    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

Fast das gleiche wie im Tutorial, aber das Speichern des Modells wurde hinzugefügt.
Der Speicherort wird in der Einstellungsdatei angegeben.

    saver = tf.compat.v1.train.Saver()
    os.makedirs(os.path.dirname(os.path.abspath(MODEL_FILE)), exist_ok=True)

――Wir haben es aus dem Tutorial heraus geändert, damit die Genauigkeit angezeigt und das Modell für jede Epoche gespeichert werden kann.

    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        next_epoch = 1
        print('epoch, train accuracy, test accuracy')
        while datasets.train.epochs_completed < epochs:
            train_images, train_labels = datasets.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={x: train_images, y_: train_labels, keep_prob: 0.5})

            if datasets.train.epochs_completed == next_epoch:

                train_accuracy = accuracy.eval(feed_dict={x: datasets.train.images, y_: datasets.train.labels, keep_prob: 1.0})
                test_accuracy = accuracy.eval(feed_dict={x: datasets.test.images, y_: datasets.test.labels, keep_prob: 1.0})
                print('{:d}, {:.4f}, {:.4f}'.format(datasets.train.epochs_completed, train_accuracy, test_accuracy))

                saver.save(sess, MODEL_FILE)

                next_epoch = datasets.train.epochs_completed + 1

Ausführung des Lernens

--Training wird durchgeführt, indem die Option --train angegeben wird.

Die Stapelgröße beträgt "128" und die Epoche "120".

$ python face_deep.py --train
epoch, train accuracy, test accuracy
1, 0.4580, 0.4090
2, 0.5593, 0.4880
Kürzung
119, 1.0000, 0.8110
120, 1.0000, 0.792

Inferenz

Das Bild muss "numpy" sein.
Sie können den Ergebnistyp mit dtype ändern.

def predict(images, dtype=None):
    """Das Inferenzergebnis ist numpy, int,Wechseln Sie argmax mit dtype."""

    tf.compat.v1.reset_default_graph()

    x, y_conv, keep_prob = model()

    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        saver = tf.compat.v1.train.Saver()
        saver.restore(sess, MODEL_FILE)

        results = sess.run(tf.nn.softmax(y_conv), feed_dict={x: images, keep_prob: 1.0})
        results = np.array(results * 100, dtype=np.uint8)
        if dtype == 'int':
            results = [[int(y) for y in result] for result in results]
        if dtype == 'argmax':
            results = [np.argmax(y) for y in results]

    return results

Inferenz ausführen

--Inferenz wird ohne Optionen durchgeführt. ――Es ist die Betriebsprüfungsstufe bis zur letzten. Ich denke darüber nach, es von einer Webanwendung separat zu verwenden.

Das Folgende ist das Inferenzergebnis der ersten 10 Sequenzen des Testbildes des Datensatzes. Die ersten paar Hundert sind mit "0" gekennzeichnet, daher scheinen sie übereinzustimmen.

$ python face_deep.py
Kürzung
[[100   0   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [  0  99   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 97   0   0   0   0   0   0   0   0   1]
 [ 99   0   0   0   0   0   0   0   0   0]
 [  0  99   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 36  63   0   0   0   0   0   0   0   0]]

abschließend

Das "MNIST CNN" -Tutorial von "TensorFlow" wurde geändert, um Gesichtsbilder zu lernen und daraus zu schließen. ――Da es ein Studienlevel ist, war es genug, um lernen und argumentieren zu können. ――Nächstes Mal werde ich versuchen, Rückschlüsse aus der Webanwendung "Flask" zu ziehen.

[PYTHON] Modellbildung, Lernen und Denken lernen

Einführung

Modell-

Lernen

Lernmodell

Ausführung des Lernens

Inferenz

Inferenz

Inferenz ausführen

abschließend