[PYTHON] Visualization of CNN feature maps and filters (Tensorflow 2.0)

Overview

I took a look at the feature map and filters on the CNN built with the Subclassing model.

environment

-Software- Windows 10 Home Anaconda3 64-bit(Python3.7) VSCode -Library- Tensorflow 2.1.0 opencv-python 4.1.2.30 -Hardware- CPU: Intel core i9 9900K GPU: NVIDIA GeForce RTX2080ti RAM: 16GB 3200MHz

reference

site ・ Visualize CNN using Keras: Fashion-MNIST -Visualize the weights of the first layer of the convolutional neural network

program

I will post it on Github. https://github.com/himazin331/CNN-Visualization The repository contains a demo program (cnn_visual.py), a feature map visualization module (feature_visual.py), and Includes a filter visualization module (filter_visual.py).

Source code

The less relevant parts are omitted. ** Please note that the code is dirty ... **

`cnn_visual.py`


import argparse as arg
import os
import sys

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #Hide TF message

import tensorflow as tf
import tensorflow.keras.layers as kl

import numpy as np
import matplotlib.pyplot as plt

import feature_visual
import filter_visual

# CNN
class CNN(tf.keras.Model):
    
    def __init__(self, n_out, input_shape):
        super().__init__()

        self.conv1 = kl.Conv2D(16, 4, activation='relu', input_shape=input_shape)
        self.conv2 = kl.Conv2D(32, 4, activation='relu')
        self.conv3 = kl.Conv2D(64, 4, activation='relu')

        self.mp1 = kl.MaxPool2D((2, 2), padding='same')
        self.mp2 = kl.MaxPool2D((2, 2), padding='same')
        self.mp3 = kl.MaxPool2D((2, 2), padding='same')
   
        self.flt = kl.Flatten()
      
        self.link = kl.Dense(1024, activation='relu')
        self.link_class = kl.Dense(n_out, activation='softmax')

    def call(self, x):   
        
        h1 = self.mp1(self.conv1(x))
        h2 = self.mp2(self.conv2(h1))
        h3 = self.mp3(self.conv3(h2))
        
        h4 = self.link(self.flt(h3))

        return self.link_class(h4)

#Learning
class trainer(object):

    def __init__(self, n_out, input_shape):

        self.model = CNN(n_out, input_shape)
        self.model.compile(optimizer=tf.keras.optimizers.Adam(),
                           loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                           metrics=['accuracy'])
        

    def train(self, train_img, train_lab, batch_size, epochs, input_shape, test_img):
        #Learning
        self.model.fit(train_img, train_lab, batch_size=batch_size, epochs=epochs)
        
        print("___Training finished\n\n")
        
        #Feature map visualization
        feature_visual.feature_vi(self.model, input_shape, train_img)
        #Filter visualization
        filter_visual.filter_vi(self.model)

def main():

    """
Command line options
    """

    #Data set acquisition, preprocessing
    (train_img, train_lab), (test_img, _) = tf.keras.datasets.mnist.load_data()
    train_img = tf.convert_to_tensor(train_img, np.float32)
    train_img /= 255
    train_img = train_img[:, :, :, np.newaxis]

    test_img = tf.convert_to_tensor(test_img, np.float32)
    test_img /= 255
    test_img = train_img[:, :, :, np.newaxis]

    #Start learning
    print("___Start training...")

    input_shape = (28, 28, 1)

    Trainer = trainer(10, input_shape)
    Trainer.train(train_img, train_lab, batch_size=args.batch_size,
                epochs=args.epoch, input_shape=input_shape, test_img=test_img)

if __name__ == '__main__':
    main()

Execution result

This time, I was asked to enter MNIST handwritten numbers. The result is 10 Epoch and 256 mini-batch size.

Feature map

** Convolution layer 1 **

** Pooling layer 1 **

** Convolution layer 2 **

** Pooling layer 2 **

filter

** Convolution layer 1 **

** Convolution layer 2 **

** Convolution layer 3 ** Since the display is small and difficult to see, it is enlarged and trimmed by editing.

Description

I will explain the related code.

The network model is a CNN with the following structure.

`Network model`


# CNN
class CNN(tf.keras.Model):
    
    def __init__(self, n_out, input_shape):
        super().__init__()

        self.conv1 = kl.Conv2D(16, 4, activation='relu', input_shape=input_shape)
        self.conv2 = kl.Conv2D(32, 4, activation='relu')
        self.conv3 = kl.Conv2D(64, 4, activation='relu')

        self.mp1 = kl.MaxPool2D((2, 2), padding='same')
        self.mp2 = kl.MaxPool2D((2, 2), padding='same')
        self.mp3 = kl.MaxPool2D((2, 2), padding='same')
   
        self.flt = kl.Flatten()
      
        self.link = kl.Dense(1024, activation='relu')
        self.link_class = kl.Dense(n_out, activation='softmax')

    def call(self, x):   
        
        h1 = self.mp1(self.conv1(x))
        h2 = self.mp2(self.conv2(h1))
        h3 = self.mp3(self.conv3(h2))
        
        h4 = self.link(self.flt(h3))

        return self.link_class(h4)

Feature map visualization is done in feature_visual.py.

`feature_visual.py`


import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #Hide TF message

import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt

#Feature map visualization
def feature_vi(model, input_shape, test_img):
        
    #Model rebuild
    x = tf.keras.Input(shape=input_shape)
    model_vi = tf.keras.Model(inputs=x, outputs=model.call(x))
     
    #Network configuration output
    model_vi.summary()
    print("")
        
    #Get layer information
    feature_vi = []
    feature_vi.append(model_vi.get_layer('input_1'))
    feature_vi.append(model_vi.get_layer('conv2d'))
    feature_vi.append(model_vi.get_layer('max_pooling2d'))
    feature_vi.append(model_vi.get_layer('conv2d_1'))
    feature_vi.append(model_vi.get_layer('max_pooling2d_1'))

    #Random data extraction
    idx = int(np.random.randint(0, len(test_img), 1))
    img = test_img[idx]
    img = img[None, :, :, :]

    for i in range(len(feature_vi)-1):
            
        #Feature map acquisition
        feature_model = tf.keras.Model(inputs=feature_vi[0].input, outputs=feature_vi[i+1].output)
        feature_map = feature_model.predict(img)
        feature_map = feature_map[0]
        feature = feature_map.shape[2]
            
        #Window name definition
        fig = plt.gcf()
        fig.canvas.set_window_title(feature_vi[i+1].name + " feature-map visualization")
            
        #output
        for j in range(feature):
            plt.subplots_adjust(wspace=0.4, hspace=0.8)
            plt.subplot(feature/6 + 1, 6, j+1)
            plt.xticks([])
            plt.yticks([])
            plt.xlabel(f'filter {j}')
            plt.imshow(feature_map[:,:,j])
        plt.show()

You cannot use the model of the CNN class as it is. This is because the input layer is not defined. For implementation in the SubClassing model, ** add an input layer to the model ** as shown below.

    #Model rebuild
    x = tf.keras.Input(shape=input_shape)
    model_vi = tf.keras.Model(inputs=x, outputs=model.call(x))

Next, prepare a list and ** add the input layer and any layer information ** to the list. This time, I want to see the output of the first convolution layer, the first maximum pooling layer, the second convolution layer, and the second maximum pooling layer. Describe as follows.

    #Get layer information
    feature_vi = []
    feature_vi.append(model_vi.get_layer('input_1'))
    feature_vi.append(model_vi.get_layer('conv2d'))
    feature_vi.append(model_vi.get_layer('max_pooling2d'))
    feature_vi.append(model_vi.get_layer('conv2d_1'))
    feature_vi.append(model_vi.get_layer('max_pooling2d_1'))

Next, prepare the input data. The test data corresponding to the index is acquired by indexing a random numerical value with a random number. Since the shape of the acquired test data is (28, 28, 1), we will add the dimension of the number of data items.

    #Random data extraction
    idx = int(np.random.randint(0, len(test_img), 1))
    img = test_img[idx]
    img = img[None, :, :, :]

Build a model feature_model with the input as the input layer and the output as the output of each layer. Then pass the input data with predict and get the layer output.

        #Feature map acquisition
        feature_model = tf.keras.Model(inputs=feature_vi[0].input, outputs=feature_vi[i+1].output)
        feature_map = feature_model.predict(img)
        feature_map = feature_map[0]
        feature = feature_map.shape[2]

After that, plot the layer output and repeat it like the next layer output.

Filter visualization is done in fileter_visual.py.

`filter_visual.py`


import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #Hide TF message

import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt

#Filter visualization
def filter_vi(model):
        
    vi_layer = []
        
    #Layer to be visualized
    vi_layer.append(model.get_layer('conv2d'))
    vi_layer.append(model.get_layer('conv2d_1'))
    vi_layer.append(model.get_layer('conv2d_2'))
        
    for i in range(len(vi_layer)):      
            
        #Get layer filter
        target_layer = vi_layer[i].get_weights()[0]
        filter_num = target_layer.shape[3]
            
        #Window name definition
        fig = plt.gcf()
        fig.canvas.set_window_title(vi_layer[i].name + " filter visualization")
            
        #output
        for j in range(filter_num):
            plt.subplots_adjust(wspace=0.4, hspace=0.8)
            plt.subplot(filter_num/6 + 1, 6, j+1)
            plt.xticks([])
            plt.yticks([])
            plt.xlabel(f'filter {j}')  
            plt.imshow(target_layer[ :, :, 0, j], cmap="gray") 
        plt.show()

As with the feature map visualization, add the convolution layer corresponding to the filter you want to see to the list.

    vi_layer = []
        
    #Layer to be visualized
    vi_layer.append(model.get_layer('conv2d'))
    vi_layer.append(model.get_layer('conv2d_1'))
    vi_layer.append(model.get_layer('conv2d_2'))

Get the ** filter of the target layer with get_weights () [0] **. By the way, you can get the bias by writing get_weights () [1].

The shape of the obtained filter is (H, W, I_C, O_C). I_C is the number of input channels and O_C is the number of output channels.

        #Get layer filter
        target_layer = vi_layer[i].get_weights()[0]
        filter_num = target_layer.shape[3]

After that, output the filter and repeat like the next filter.

in conclusion

I wanted to see the feature map and filters, so I looked it up and implemented it with various changes. The feature map is interesting to look at, but I don't know what the filter is, so it's not interesting ~~. In recent years, it seems that explainable AI (AI) has been attracting attention, but I'm looking forward to the time when humans will be able to understand why such filters can recognize it.