--There is a method of padding in case there are few learning images. --There are various things such as contrast, gamma, blur, and noise. ――This time, we performed left-right reversal and random cropping. ――Actually, after various experiments, it was the combination with the best learning accuracy. It is the case of the original image this time. --The complete source is here.
--I used Numpy`` Pillow
.
$ pip install numpy==1.16.5 pillow
--Sequential processing is repeated according to CLASSES
.
--The face image is saved in FACE_PATH
.
--According to TEST_NUM
, the image is duplicated from FACE_PATH
to TEST_PATH
.
--TRAIN_PATH
duplicates images that were not replicated to TEST_PATH
.
--According to ʻAUGMENT_NUM, an inflated image is created from
TRAIN_PATH to ʻAUGMENT_PATH
.
config.py
CLASSES = [
'Abe Oto',
'Satomi Ishihara',
'Yuno Ohara',
'Fuka Koshiba',
'Haruna Kawaguchi',
'Nana Mori',
'Minami Hamabe',
'Kaya Kiyohara',
'Haruka Fukuhara',
'Kuroshima Yuina'
]
BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_PATH = os.path.join(BASE_PATH, 'data')
FACE_PATH = os.path.join(DATA_PATH, 'face')
TRAIN_PATH = os.path.join(DATA_PATH, 'train')
TEST_PATH = os.path.join(DATA_PATH, 'test')
AUGMENT_PATH = os.path.join(DATA_PATH, 'augment')
TRAIN_NUM = 0
TEST_NUM = 100
AUGMENT_NUM = 6000
--Check the path of the face image, learning image, and test image.
--Create a list of face images.
--query
is given sequentially CLASSES
.
save_train_test_from_face.py
def split(query):
"""Get a list of face images, divide and copy into learning and testing."""
face_path = os.path.join(FACE_PATH, query)
train_path = os.path.join(TRAIN_PATH, query)
test_path = os.path.join(TEST_PATH, query)
face_file_list = glob.glob(os.path.join(face_path, '*.jpeg'))
face_file_list.sort()
--Shuffle the list of face images.
--According to TEST_NUM
, the list of face images is divided into training images and test images.
save_train_test_from_face.py
random.shuffle(face_file_list)
train_file_list = face_file_list[:-TEST_NUM]
test_file_list = face_file_list[len(train_file_list):]
--Create a duplicate of the training image and the test image. ――If you keep the original face image, you can save the trouble of redoing it.
save_train_test_from_face.py
for face_file in train_file_list:
train_file = os.path.join(train_path, os.path.basename(face_file))
shutil.copy(face_file, train_file)
for face_file in test_file_list:
test_file = os.path.join(test_path, os.path.basename(face_file))
shutil.copy(face_file, test_file)
――The face image was divided into a learning image and a test image as shown below.
――The maximum number of learning images is 392
and the minimum number is 269
. Maybe few.
$ python save_train_test_from_face.py
query:Abe Oto, face: 415, train: 315, test: 100
query:Satomi Ishihara, face: 492, train: 392, test: 100
query:Yuno Ohara, face: 372, train: 272, test: 100
query:Fuka Koshiba, face: 400, train: 300, test: 100
query:Haruna Kawaguchi, face: 369, train: 269, test: 100
query:Nana Mori, face: 389, train: 289, test: 100
query:Minami Hamabe, face: 481, train: 381, test: 100
query:Kaya Kiyohara, face: 428, train: 328, test: 100
query:Haruka Fukuhara, face: 420, train: 320, test: 100
query:Kuroshima Yuina, face: 448, train: 348, test: 100
――I referred to the following. -Summary of Data Augmentation of images in NumPy
--First, convert from Pillow
to Numpy
.
--Also, rate
gives the probability of inversion. We set 0.5
to a 50-50 chance.
--Convert to Numpy
and flip it horizontally with fliplr
.
--Finally, change from Numpy
back to Pillow
.
def horizontal_flip(image, rate=0.5):
"""Flip horizontally."""
image = np.array(image, dtype=np.float32)
if np.random.rand() < rate:
image = np.fliplr(image)
return Image.fromarray(np.uint8(image))
--Get the height and width of the image with ʻimage.shape. --Determine the crop size based on
size.
0.8 means to crop at a size of
80% . --Position the
upper leftand
lower right. --
top is a random value in the range
0to
height --
crop_size. --Similarly, determine
left. --
bottom is located by adding
topand
crop_size. --Similarly, determine
right. --Finally, crop from ʻimage
.
def random_crop(image, size=0.8):
"""Crop at random size."""
image = np.array(image, dtype=np.float32)
height, width, _ = image.shape
crop_size = int(min(height, width) * size)
top = np.random.randint(0, height - crop_size)
left = np.random.randint(0, width - crop_size)
bottom = top + crop_size
right = left + crop_size
image = image[top:bottom, left:right, :]
return Image.fromarray(np.uint8(image))
--Set the path for the training image and the padded image.
--query
is given sequentially CLASSES
.
def augment(query):
"""Load, inflate, and save learning images."""
train_path = os.path.join(TRAIN_PATH, query)
augment_path = os.path.join(AUGMENT_PATH, query)
--Create a list of face images.
train_list = glob.glob(os.path.join(train_path, '*.jpeg'))
train_list.sort()
--From the number of padded images, check how many face images should be created and determine the number of loop processes.
loop_num = math.ceil(AUGMENT_NUM / len(train_list))
--Perform the following in the loop processing count and face image list loop.
--Reading the face image.
――Inverts horizontally at a rate of 50%.
--Random crop with 80% image size.
--Add -0001.jpeg
to the file name of the face image and save the inflated image.
augment_num = 0
for num in range(1, loop_num + 1):
for train_file in train_list:
if augment_num == AUGMENT_NUM:
break
image = Image.open(train_file)
image = horizontal_flip(image)
image = random_crop(image)
augment_file = os.path.join(AUGMENT_PATH, query, os.path.basename(train_file).split('.')[0] + '-{:04d}.jpeg'.format(num))
image.save(augment_file, optimize=True, quality=95)
print('query: {}, train_file: {}, augment_file: {}'.format(
query, os.path.basename(train_file), os.path.basename(augment_file)))
augment_num += 1
--The training image was inflated with Pillo
and Numpy
.
――In the process of work, we also confirmed scale crop, cut apto, random erase, and random rotate other than random crop. In the case of the face image this time, I did not use it because it was not suitable for improving accuracy.
――Next time, we plan to create a data set to make the training images and test images easier to handle.
Recommended Posts