PC: MacBook Air CPU: 1.4 GHz Intel Core i5 Memory: 4GB
I did it on a normal MacBook Air, but it was painful because learning was slow and it fell due to lack of memory.
I can't write in detail here, so please refer to the following article.
[Python: Scraping websites with BeautifulSoup4](http://momijiame.tumblr.com/post/114227737756/python-beautifulsoup4-%E3%82%92%E4%BD%BF%E3%81%A3 % E3% 81% A6-web-% E3% 82% B5% E3% 82% A4% E3% 83% 88% E3% 82% 92% E3% 82% B9% E3% 82% AF% E3% 83% AC% E3% 82% A4% E3% 83% 94% E3% 83% B3% E3% 82% B0% E3% 81% 99% E3% 82% 8B) Scraping with Python and Beautiful Soup
The acquired images are saved in separate directories for each actress.
./folder
|--- /actress1
| |--- image1.jpg
| |--- image2.jpg
| |--- image3.jpg
|
|--- /actress2
| .
| .
|--- /actress3
.
.
.
I think OpenCV is famous when it comes to image recognition, but if you look at dlib vs OpenCV face detection, the library dlib is wrong for face extraction. I used dlib because it seems to be good with few detections.
import os
import sys
import glob
import cv2
from PIL import Image
import dlib
"""
INPUT_DIR is(1.Collect images)Directory name of the image obtained in
OUTPUT_DIR is the output directory name(Folder structure is INPUT_Same as DIR)
"""
detector = dlib.get_frontal_face_detector()
#Get a list of directories for each actress
dir_list = os.listdir(INPUT_DIR)
for i, dir_name in enumerate(dir_list):
if not os.path.exists(os.path.join(OUTPUT_DIR, dir_name)):
os.mkdir(os.path.join(OUTPUT_DIR, dir_name))
image_files = glob.glob(os.path.join(INPUT_DIR, dir_name, "*.jpg "))
for j, image_file in enumerate(image_files):
img = cv2.imread(image_file)
dets = detector(img, 1)
open_img = Image.open(image_file)
for k, d in enumerate(dets):
#Skip images smaller than 80
if d.right()-d.left() < 80 or d.bottom()-d.top() < 80:
continue
image_file = image_file.replace(INPUT_DIR, OUTPUT_DIR)
#If there are multiple faces in one image, the output file name will be covered, so change it
output_file = image_file.replace('.jpg', '_'+str(k)+'.jpg')
cropped_img = open_img.crop((d.left(), d.top(), d.right(), d.bottom()))
cropped_img.resize((96,96)).save(output_file, 'JPEG', quality=100, optimize=True)
In addition to face extraction, dlib also has the ability to detect organs such as the eyes, nose, and contours of the face.
import os
import math
import random
import glob
import numpy as np
from scipy import misc
from PIL import Image
import cv2
#Flip horizontal
def flip_left_right(image):
return image[:, -1::-1]
#Change brightness
def random_brightness(image, max_delta=63, seed=None):
img = np.array(image)
delta = np.random.uniform(-max_delta, max_delta)
image = Image.fromarray(np.uint8(img + delta))
return image
#Contrast change
def random_contrast(image, lower, upper, seed=None):
factor = np.random.uniform(-lower, upper)
mean = (image[0] + image[1] + image[2]).astype(np.float32) / 3
img = np.zeros(image.shape, np.float32)
for i in range(0, 3):
img[i] = (img[i] - mean) * factor + mean
return img
#Image crop
def crop(image, name, crop_size, padding_size):
(width, height) = image.shape
cropped_images = []
for i in xrange(0, width, padding_size):
for j in xrange(0, height, padding_size):
box = (i, j, i+crop_size, j+crop_size) #left, upper, right, lower
cropped_name = name + '_' + str(i) + '_' + str(j) + '.jpg'
cropped_image = image[i:i+crop_size, j:j+crop_size]
resized_image = cv2.resize(cropped_image, (IMAGE_SIZE, IMAGE_SIZE))
cropped_images.append(resized_image)
return cropped_images
#Data expansion
# data_"Invert left and right", "Change brightness", "Change contrast", and "Crop" until the value specified in num is reached.
def data_augmentation(image_files, data_num):
image_list = []
file_num = len(image_files)
for image_file in image_files:
image_list.append(misc.imread(image_file))
if file_num >= data_num:
return image_list
# flip left right
random.shuffle(image_list)
for image in image_list:
flipped_image = flip_left_right(image)
image_list.append(flipped_image)
if len(image_list) == data_num:
return image_list
# random brightness
random.shuffle(image_list)
for image in image_list:
brightness_image = random_brightness(image)
image_list.append(brightness_image)
if len(image_list) == data_num:
return image_list
# random contrast
random.shuffle(image_list)
for image in image_list:
contrast_image = random_contrast(image)
image_list.append(contrast_image)
if len(image_list) == data_num:
return image_list
# cropping
random.shuffle(image_list)
image_list.clear()
cropped_size = int(IMAGE_SIZE * 0.75)
padding_size = IMAGE_SIZE - cropped_size
for image in image_list:
cropped_image_list = crop(image, 'image', cropped_size, padding_size)
for cropped_image in cropped_image_list:
image_list.append(cropped_image)
if len(image_list) == data_num:
return image_list
return image_list
dir_list = os.listdir(INPUT_DIR)
for dir in dir_list:
image_files = glob.glob(os.path.join(input_dir, dir, "*.jpg "))
if len(image_files) == 0:
continue
image_list = data_augmentation(image_files, 1000)
for i, image in enumerate(image_list):
image = whitening(image)
misc.imsave(os.path.join(OUTPUT_DIR, dir, str(i) + '.jpg'), image)
Expansion of training data with Chainer's imagenet sample / whitening
import os
import sys
import glob
import random
import numpy as np
from scipy import misc
""" Get files from specified directory """
def load_data_from_dir(input_dir_name, input_dir_list, start_index, test_freq):
train_list = []
test_list = []
for dir_index, dir_name in enumerate(input_dir_list):
image_files = glob.glob(os.path.join(input_dir_name, dir_name, "*.jpg "))
train_count = 0
test_count = 0
print('directory:{} index:{}'.format(dir_name, dir_index + start_index))
for file_index, file_name in enumerate(image_files):
image = misc.imread(file_name)
label = np.int32(dir_index + start_index)
if not file_index % test_freq == 0: # set train datq
train_list.append((dir_name, image, label))
train_count += 1
else:
test_list.append((dir_name, image, label))
test_count += 1
print("directory:{} total:{} train:{} test:{}".format(
dir_name, train_count + test_count, train_count, test_count))
return train_list, test_list
""" Save data in a numpy format """
def save_dataset_numpy(data_list, image_path, label_path):
image_list = []
label_list = []
for _, image, label in data_list:
image_list.append(image)
label_list.append(label)
image_data = np.array(image_list, dtype=np.float32)
label_data = np.array(label_list, dtype=np.int32)
np.save(image_path, image_data)
np.save(label_path, label_data)
for i in xrange(0, len(DIR_LIST), 10):
#Create a file for every 10 classes
train_list, test_list = load_data_from_dir(INPUT_DIR, dir_list[i:i+args.interval], i, 10)
train_data_path = os.path.join(OUTPUT_DIR, 'train', 'data-{}.npy'.format(i+args.interval))
train_label_path = os.path.join(OUTPUT_DIR, 'train', 'label-{}.npy'.format(i+args.interval))
test_data_path = os.path.join(OUTPUT_DIR, 'test', 'data-{}.npy'.format(i+args.interval))
test_label_path = os.path.join(OUTPUT_DIR, 'test', 'label-{}.npy'.format(i+args.interval))
save_dataset_numpy(train_list, train_data_path, train_label_path)
save_dataset_numpy(test_list, test_data_path, test_label_path)
At first I tried to make it with Tensorflow, but personally I switched to Chainer because there were many extra functions.
For learning, first make something to learn properly with CIFAR-10 (10 classifications of general objects), and then learn the actual data. I did it.
The network used AlexNet with Batch Normalization with a slight modification.
I created it from the beginning using multiprocessing (Kanji), but debugging was quite difficult. At first, I think you should make a really simple program.
If you read the image at once, the memory will die because the input image alone has about 1.7GB. As a result, the source gets dirty, but I created the BatchIterator
class to delete
the data and free the memory for each batch.
//Size per sheet
96×96×3 = 27648(byte)
//Per class
27648×1000 = 27648000(byte) = 26.4(MB)
//The entire(66 classes) ...Do you have a calculation?
26.4×66 = 1742.4(MB) = 1.7(GB)
"""
Batch iterator class
Usage:
batch_iter = BatchIter(DATA_DIR, 100)
for batch_data, batch_label in batch_iter:
batch_start_time = time.time()
x = np.asarray(batch_data, dtype=np.float32).transpose((0, 3, 1, 2))
t = np.asarray(train_batch_label, dtype=np.int32)
x = Variable(xp.asarray(x))
t = Variable(xp.asarray(t))
optimizer.update(model, x, t)
"""
class BatchIter(object):
def __init__(self, data_dir, batch_size):
self.index = 0
self.batch_size = batch_size
self.data_files = glob.glob(os.path.join(data_dir, 'data-*.npy'))
self.label_files = glob.glob(os.path.join(data_dir, 'label-*.npy'))
data_size = 0
for data in self.data_files:
loaded_data = np.load(data)
data_size += loaded_data.shape[0]
del loaded_data
self.data_size = data_size
assert len(self.data_files) == len(self.label_files), "Invalid data size."
def __iter__(self):
return self
def next(self):
if self.index >= self.data_size:
raise StopIteration()
data = np.zeros((self.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3))
label = np.zeros((self.batch_size))
incremental_value = int(self.batch_size / len(self.data_files))
count = 0
for i in range(len(self.data_files)):
loaded_data = np.load(self.data_files[i])
loaded_label = np.load(self.label_files[i])
assert loaded_data.shape[0] == loaded_label.shape[0], "Loaded data size is invalid."
perm = np.random.permutation(loaded_data.shape[0])
if i + 1 == len(self.data_files): # last item
incremental_value = self.batch_size - count
idx = perm[0:incremental_value]
else:
idx = perm[0:incremental_value]
data[count:count+incremental_value] = loaded_data[idx]
label[count:count+incremental_value] = loaded_label[idx]
count += incremental_value
del loaded_data
del loaded_label
self.index += self.batch_size
return data, label
CNN Trends in research on deep learning in the field of image recognition Image scaling using deep convolutional neural network CNN Tutorial
Tensorflow Identify the anime Yuruyuri production company with TensorFlow Identify an idol's face by deep learning with TensorFlow Tensor Flow: How To
Chainer GitHub - chainer/examples/imagenet/ GitHub - mitmul/chainer-cifar10 First Anime Face Recognition with Chainer
def set_model(model_name, model_path):
model_fn = os.path.basename('models/' + model_name + '.py')
model = imp.load_source(model_fn.split('.')[0],
'models/' + model_name + '.py').model
print('Load model from ', model_path)
serializers.load_hdf5(model_path, model)
return model
def set_optimizer(opt_name, opt_path, model):
if opt_name == 'MomentumSGD':
optimizer = optimizers.MomentumSGD(momentum=0.9)
elif opt_name == 'Adam':
optimizer = optimizers.Adam()
elif opt_name == 'AdaGrad':
optimizer = optimizers.AdaGrad()
else:
raise ValueError('Invalid architecture name')
optimizer.setup(model)
print('Load optimizer state from ', opt_path)
serializers.load_hdf5(opt_path, optimizer)
return optimizer
def detect_face(image_file):
detector = dlib.get_frontal_face_detector()
#img = cv2.imread(image_file)
image = misc.imread(image_file)
dets = detector(image, 1)
d = dets[0]
cropped_image = image[d.top():d.bottom(), d.left():d.right()]
resized_image = misc.imresize(cropped_image, (96, 96))
return resized_image
#Model loading
model = set_model(model_name, model_path)
optimizer = set_optimizer(opt_name, opt_path, model)
detected_face_image = detect_face(input_image)
#Predict from the loaded model
x = np.asarray(detected_face_image, dtype=np.float32).transpose((0, 3, 1, 2))
x = Variable(np.asarray(x), volatile='on')
pred = model.predict(x).data
#Label reading(Create a label file when creating a numpy format file)
categories = np.loadtxt(label_path, str, delimiter="\n")
#Sort in descending order of score
score = pred.reshape((pred.size,))
result = zip(score, categories)
result = sorted(result, reverse=True)
results = []
for i, (score, label) in enumerate(result[:10]):
if i == 5: break
print('num:{} score:{:.5f} label:{}'.format(i + 1, score * 100, label))
results.append({
'label': label,
'score': str(round(score * 100, 2))
})
It has nothing to do with deep learning, but when I built the website, I first tried Heroku, but finally I chose Conoha. When using dlib or chainer, it was quite difficult to install with Heroku. Conoha seems to have had a problem with the utilization rate a while ago, but it seems that it is okay to renew it. I was at a loss with Sakura VPS, but the deciding factor was that Sakura VPS had an initial cost, while Conoha had no initial cost.
I also wrote an article about using Chainer Trainer, which abstracts the code of the learning part, so please refer to it if you like. I tried learning my own dataset using Chainer Trainer
Last but not least, I'm making a site that searches for similar images of AV actresses using CNN, so please take a look if you like. Babelink --Similar AV actress search service
Recommended Posts