1.First of all

I thought about making a product using the deep learning I'm learning now. I wish I could make an AI application using images, so I worked on it.

When I was wondering what to do with the subject, my father was addicted to female professional golfers, so I decided to create a diagnostic app for female professionals. I decided to make a diagnostic imaging app for Hinako Shibuno, Sakura Kobetsu, and Erika Hara.

I really wanted to put a photo of my face here, but I gave up because it seems to be useless due to copyright issues.

2. How to proceed

① Image collection

② Get only the face part

③ Delete unnecessary data (for example, a different face, wearing sunglasses, somehow uncle's image)

④ Divide into test data and validation data

⑤ Inflated image

⑥ Model construction (this time vgg16 transfer learning)

⑦ Create application for publication

⑧ Deploy

It was pretty hard ... I'll write about what I've tried.

3. Image collection

I think there are many ways to do it, but I used icrawler.

What is icrawler?

A mini framework for web crawlers. It supports media data such as images and videos, and can also be applied to text and other types of files. Scrapy is heavy and powerful, but icrawler seems to be light. Official Reference Please refer to the installation method as well.

`search.py`


from icrawler.builtin import BingImageCrawler
import os
import shutil

#Image to detect
golfer_lists = {'Hinako Shibuno': 'shibuno', 'Small celebration Sakura': 'koiwai', 'Erika Hara': 'hara'}

#Create folder
os.makedirs('./origin_image', exist_ok=True)

#The key is the search name, and the value is the folder name.
for key, value in golfer_lists.items():
    #Specify the save destination of the acquired image
    crawler = BingImageCrawler(storage={'root_dir': value})
    #Specify keywords and the number of acquisitions
    crawler.crawl(keyword=key, max_num=1000)
    #Move folder
    path = os.path.join('./', value)
    shutil.move(path, './origin_image/')

Now you can get each image. I set it to 1000, but it was actually about 700.

4. Get the face part

I used face_recognition to get the face part.

`face_recognition.py`


import cv2
from PIL import  Image
import os, glob
import numpy as np
import random
from PIL import ImageFile
import face_recognition

#The original folder containing the original image
in_dir = './origin_image/*'
#Contains the image of only the face
out_dir = './face'
#Folder for each player
in_file = glob.glob(in_dir)
#Get the folder name of each player
fileName_lists = os.listdir('./origin_image')
#Where to save the test file
test_image_path = './test_image/'

#Perform processing for each folder
for golfer, fileName in zip(in_file, fileName_lists):
    #Get the image list of each player
    in_jpg = glob.glob(golfer + '/*')
    #Image name of each player
    in_fileName=os.listdir(golfer)
    #Folder path for each player
    folder_path = out_dir + '/' + fileName
    #Create an output folder for each player
    os.makedirs(folder_path, exist_ok=True)

    #Process each image
    for i in range(len(in_jpg)):
        #Load image
        # image(Vertical,side,3 colours)
        image = face_recognition.load_image_file(str(in_jpg[i]))
        faces = face_recognition.face_locations(image)
        
        #If the image exists([(911, 2452, 1466, 1897)])Is output like
        if len(faces) > 0:
            #Select the largest one among the acquired face images((top - bottom)*(right - left)Calculate)
            face_max = [(abs(faces[i][0]-faces[i][2])) * (abs(faces[i][1]-faces[i][3])) for i in range(len(faces))]
            top, right, bottom, left = faces[face_max.index(max(face_max))]

            #Extract the image of the face part
            faceImage = image[top:bottom, left:right]

            # Image.fromarray()Passing ndarray to PIL.Image is obtained and its save()Can be saved as an image file with the method.
            final = Image.fromarray(faceImage)

            final = np.asarray(final.resize((64, 64)))
            final = Image.fromarray(final)

            file_path = folder_path + '/' + str(i) + '.jpg'
            final.save(file_path)
        else:
            print('No Face')

It's a little long, but it looks like this.

Check the current folder system フォルダ.png

Now you can get the face part. __ After that, work to check one by one ... __ It ’s very difficult, but it ’s important.

5. Separate training data and validation data

Divide into training data 80% and validation data 20%.

`split.py`


#Separate training data and validation data
import os, glob
import shutil

#Contains the image of only the face
in_dir = './face/*'
#Folder for each player['./face/shibuno'　'./face/koiwai', './face/hara']
in_file = glob.glob(in_dir) 
#Get the folder name of each player['shibuno', 'koiwai', 'hara']
fileName_lists = os.listdir('./face')
#Where to save the test file
test_image_path = './valid/'

#Perform processing for each folder
for golfer, fileName in zip(in_file, fileName_lists):
    #Get the image list of each player
    in_jpg = glob.glob(golfer + '/*')
    #Image name of each player
    in_fileName=os.listdir(golfer)

    #Save data for validation
    test_path = test_image_path + fileName
    os.makedirs(test_path, exist_ok=True)

    #Move to the validation folder
    for i in range(len(in_jpg)//5):
        shutil.move(str(in_jpg[i]), test_path+'/')

It's OK if it looks like this. Next, the image of the training data is inflated.

6. Data bulking (inflating)

Collecting image data and selecting it is very time-consuming and difficult. (It was really hard) Therefore, new data is created by inverting or shifting the image data.

`pic_add`


import PIL
from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator, array_to_img
import numpy as np
import os, glob
import matplotlib.pyplot as plt
import cv2

#The original folder containing the original image
in_dir = './face/*'
#Contains the image of only the face
out_dir = './face'
#Folder path for each player
in_files = glob.glob(in_dir)
#Each folder name
folder_names = os.listdir('./face')

#Each player's pass and folder name
for file, name in zip(in_files, folder_names):
    #Each image
    image_files = glob.glob(file + '/*')
    #Each file name
    in_fileName = os.listdir(file)

    SAVE_DIR = './face/' + name
    #If the save destination directory does not exist, create it.
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

    #Inflate each image individually
    for num in range(len(image_files)):
        datagen = ImageDataGenerator(
        rotation_range=40,       #Rotation range that rotates randomly (unit: degree)
        width_shift_range=0.2,   #Randomly translates horizontally, as a percentage of the width of the image
        height_shift_range=0.2,  #Randomly translates vertically, as a percentage of the vertical width of the image
        shear_range=0.2,         #Degree of shear. Increasing the size makes the image look more diagonally crushed or stretched (unit: degree).
        zoom_range=0.2,          #The rate at which the image is randomly compressed and enlarged. Minimum 1-Compressed to zoomrange, up to 1+zoom_Expanded to range.
        horizontal_flip=True,    #If True is specified, it will be flipped horizontally at random.
        fill_mode='nearest')

        img_array = cv2.imread(image_files[num])  #Image loading
        img_array = img_array.reshape((1,) + img_array.shape)  #Convert to 4D data (flow)()To pass to)

        # flow()Create a batch of randomly converted images.
        #Save the generated image in the specified directory.
        i = 0
        for batch in datagen.flow(img_array, batch_size=1,
                                save_to_dir=SAVE_DIR, save_prefix='add', save_format='jpg'):
            i += 1
            if i == 5:
                break  #Infinite loop if not stopped

With this, the number of images of training data has exceeded 1000.

__ It's been so long that I'll carry over the model creation next time. __

The code when the cascade classifier is used as an extra edition is also described.

* Cascade classifier

At first, I tried to get the face part using the cascade classifier, but I couldn't get the character part and the face, and the amount of data became about 1/6. I will also post the cascade classifier for the time being Download "haarcascade_frontalface_default.xml" from the link below.

(https://github.com/opencv/opencv/tree/master/data/haarcascades)

`cascade.py`


import cv2
from PIL import  Image
import os, glob
import numpy as np
import random
from PIL import ImageFile

#The original folder containing the original image
in_dir = './origin_image/*'
#Contains the image of only the face
out_dir = './face_image'
#Folder for each player
in_file = glob.glob(in_dir)
#Get the folder name of each player
fileName_lists = os.listdir('./origin_image')
#Where to save the test file
test_image_path = './face_image/test_image/'

cascade_path = './haarcascade_frontalface_alt.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

#Perform processing for each folder
for golfer, fileName in zip(in_file, fileName_lists):
    #Get the image list of each player
    in_jpg = glob.glob(golfer + '/*')
    #Image name of each player
    in_fileName=os.listdir(golfer)
    #Folder path for each player
    folder_path = out_dir + '/' + fileName
    #Create an output folder for each player
    os.makedirs(folder_path, exist_ok=True)

    #Process each image
    for i in range(len(in_jpg)):
        #Load image
        image=cv2.imread(str(in_jpg[i]))
        #Make it grayscale
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=2, minSize=(64, 64))

        if len(faces) > 0:
            for j, face in enumerate(faces,1):
                x, y ,w, h =face
                save_img_path = folder_path + '/' + str(i) +'_' + str(j) + '.jpg'
                cv2.imwrite(save_img_path , image[y:y+h, x:x+w])
        else:
            print ('image' + str(i) + ':NoFace')

[PYTHON] I tried to make a face diagnosis AI for a female professional golfer ①