Ich habe Kaokore ausprobiert, einen klassischen japanischen Gesichtsdatensatz, der erst kürzlich veröffentlicht wurde.
https://github.com/rois-codh/kaokore
Der Datensatz war ein klassisches Gesicht, und es gab zwei Aufgaben, Geschlecht und Status. Dieses Mal habe ich versucht, das Geschlecht mit Pytorch von EfficientNet in zwei Klassen einzuteilen.
Lassen Sie es zuerst lokal fallen
$ git clone https://github.com/rois-codh/kaokore.git
Drücken Sie dann den folgenden Befehl, um die Bilddaten abzurufen.
$ python download.py
Dann ist die Verzeichnisstruktur wie folgt. In images_256 befindet sich ein Bild, und die Beschriftung des Bildes lautet label.csv.
kaokore --- kaokore --- images_256 --- XXXXXX.jpg
|- labels.csv
EfficientNet
Versuchen Sie dann, EfficientNet zu verwenden.
EfficientNet verfügt über verschiedene Arten von Implementierungen, die alle hier zusammengefasst sind.
https://github.com/yoyoyo-yo/DeepLearningMugenKnock
Diesmal benutze ich EfficientNet B0
import torch
import torch.nn.functional as F
import argparse
import cv2
import numpy as np
from glob import glob
import copy
from collections import OrderedDict
from tqdm import tqdm
import pandas as pd
Definieren Sie die Bildgröße usw.
class_label = ['male', 'female'] # class name
class_N = len(class_label) # class number
img_height, img_width = 128, 128 # image size
channel = 3 # channel size
# GPU
GPU = True # if necessary
device = torch.device("cuda" if GPU else "cpu")
torch.manual_seed(0)
EfficientNet
Definieren Sie das Modell.
Das Originalpapier lautet https://arxiv.org/abs/1905.11946
EfficientNet wurde zu einem heißen Thema in dem Modell, das zu dieser Zeit SoTA of Classification war. Dieses Mal habe ich mich mit pytorch unter Bezugnahme auf https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py wieder zusammengesetzt.
class EfficientNetB0(torch.nn.Module):
def __init__(self):
super(EfficientNetB0, self).__init__()
# Net config
width_coefficient=1
depth_coefficient=1
dropout_ratio=0.2
depth_divisor=8
drop_connect_rate=0.2
DEFAULT_BLOCKS_ARGS = [
# block 1
{'kernel_size': 3, 'repeats': 1, 'filters_in': 32, 'filters_out': 16,
'expand_ratio': 1, 'id_skip': True, 'stride': 1, 'se_ratio': 0.25},
# block 2
{'kernel_size': 3, 'repeats': 2, 'filters_in': 16, 'filters_out': 24,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 3
{'kernel_size': 5, 'repeats': 2, 'filters_in': 24, 'filters_out': 40,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 4
{'kernel_size': 3, 'repeats': 3, 'filters_in': 40, 'filters_out': 80,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 5
{'kernel_size': 5, 'repeats': 3, 'filters_in': 80, 'filters_out': 112,
'expand_ratio': 6, 'id_skip': True, 'stride': 1, 'se_ratio': 0.25},
# block 6
{'kernel_size': 5, 'repeats': 4, 'filters_in': 112, 'filters_out': 192,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 7
{'kernel_size': 3, 'repeats': 1, 'filters_in': 192, 'filters_out': 320,
'expand_ratio': 6, 'id_skip': True, 'stride': 1, 'se_ratio': 0.25}
]
def round_filters(filters, divisor=depth_divisor):
"""Round number of filters based on depth multiplier."""
filters *= width_coefficient
new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
return int(new_filters)
def round_repeats(repeats):
"""Round number of repeats based on depth multiplier."""
return int(np.ceil(depth_coefficient * repeats))
class Reshape(torch.nn.Module):
def __init__(self, c, h, w):
super(Reshape, self).__init__()
self.c = c
self.h = h
self.w = w
def forward(self, x):
x = x.view(x.size()[0], self.c, self.h, self.w)
return x
class Flatten(torch.nn.Module):
def __init__(self):
super(Flatten, self).__init__()
def forward(self, x):
x = x.view(x.size()[0], -1)
return x
# activation
class Swish(torch.nn.Module):
def __init__(self):
super(Swish, self).__init__()
def forward(self, x):
return x * torch.sigmoid(x)
# EfficientNet block
class Block(torch.nn.Module):
def __init__(self, activation_fn=Swish(), drop_rate=0., name='',
filters_in=32, filters_out=16, kernel_size=3, stride=1,
expand_ratio=1, se_ratio=0., id_skip=True):
super(Block, self).__init__()
# Expansion phase
filters = filters_in * expand_ratio
if expand_ratio != 1:
_modules = OrderedDict()
_modules[name + 'expand_conv'] = torch.nn.Conv2d(filters_in, filters, kernel_size=1, padding=0, bias=False)
_modules[name + 'expand_bn'] = torch.nn.BatchNorm2d(filters)
_modules[name + 'expand_activation'] = activation_fn
self.expansion = torch.nn.Sequential(_modules)
# Depthwise Convolution
_modules = OrderedDict()
conv_pad = kernel_size // 2
_modules[name + 'dw_conv'] = torch.nn.Conv2d(filters, filters, kernel_size, stride=stride, padding=conv_pad, bias=False, groups=1)
_modules[name + 'dw_bn'] = torch.nn.BatchNorm2d(filters)
_modules[name + 'dw_activation'] = activation_fn
self.DW_conv = torch.nn.Sequential(_modules)
# Squeeze and Excitation phase
if 0 < se_ratio <= 1:
filters_se = max(1, int(filters_in * se_ratio))
_modules = OrderedDict()
_modules[name + 'se_sqeeze'] = torch.nn.AdaptiveMaxPool2d((1, 1))
_modules[name + 'se_reshape'] = Reshape(c=filters, h=1, w=1)
_modules[name + 'se_reduce_conv'] = torch.nn.Conv2d(filters, filters_se, kernel_size=1, padding=0)
_modules[name + 'se_reduce_activation'] = activation_fn
_modules[name + 'se_expand_conv'] = torch.nn.Conv2d(filters_se, filters, kernel_size=1, padding=0)
_modules[name + 'se_expand_activation'] = torch.nn.Sigmoid()
self.SE_phase = torch.nn.Sequential(_modules)
# Output phase
_modules = OrderedDict()
_modules[name + 'project_conv'] = torch.nn.Conv2d(filters, filters_out, kernel_size=1, padding=0, bias=False)
_modules[name + 'project_bn'] = torch.nn.BatchNorm2d(filters_out)
self.output_phase = torch.nn.Sequential(_modules)
#
self.last_add = False
if (id_skip is True and stride == 1 and filters_in == filters_out):
if drop_rate > 0:
self.output_phase_Dropout = torch.nn.Dropout2d(p=drop_rate)
self.last_add = True
def forward(self, input_x):
# expansion phase
if hasattr(self, 'expansion'):
x = self.expansion(input_x)
else:
x = input_x
x = self.DW_conv(x)
# Squeeze and Excitation phase
if hasattr(self, 'SE_phase'):
x_SE_phase = self.SE_phase(x)
x = x * x_SE_phase
# Output phase
x = self.output_phase(x)
if hasattr(self, 'output_phase_Dropout'):
x = self.output_phase_Dropout(x)
if self.last_add:
x = x + input_x
return x
# stem
_modules = OrderedDict()
_modules['stem_conv'] = torch.nn.Conv2d(channel, 32, kernel_size=3, padding=1, stride=2, bias=False)
_modules['stem_bn'] = torch.nn.BatchNorm2d(32)
_modules['stem_activation'] = Swish()
self.stem = torch.nn.Sequential(_modules)
# block
_modules = []
b = 0
block_Num = float(sum(args['repeats'] for args in DEFAULT_BLOCKS_ARGS))
for (i, args) in enumerate(DEFAULT_BLOCKS_ARGS):
# Update block input and output filters based on depth multiplier.
args['filters_in'] = round_filters(args['filters_in'])
args['filters_out'] = round_filters(args['filters_out'])
for j in range(round_repeats(args.pop('repeats'))):
# The first block needs to take care of stride and filter size increase.
if j > 0:
args['stride'] = 1
args['filters_in'] = args['filters_out']
_modules.append(
Block(activation_fn=Swish(), drop_rate=drop_connect_rate * b / block_Num, name='block{}{}_'.format(i + 1, chr(j + 97)), **args))
b += 1
self.block = torch.nn.Sequential(*_modules)
# top
_modules = OrderedDict()
_modules['top_conv'] = torch.nn.Conv2d(DEFAULT_BLOCKS_ARGS[-1]['filters_out'], round_filters(1280), kernel_size=1, padding=0, bias=False)
_modules['top_bn'] = torch.nn.BatchNorm2d(round_filters(1280))
_modules['top_activation'] = Swish()
self.top = torch.nn.Sequential(_modules)
_modules = OrderedDict()
_modules['top_class_GAP'] = torch.nn.AdaptiveMaxPool2d((1, 1))
if dropout_ratio > 0:
_modules['top_class_dropout'] = torch.nn.Dropout2d(p=dropout_ratio)
_modules['top_class_flatten'] = Flatten()
_modules['top_class_linear'] = torch.nn.Linear(round_filters(1280), class_N)
self.top_class = torch.nn.Sequential(_modules)
def forward(self, x):
# stem
x = self.stem(x)
# blocks
x = self.block(x)
# top
x = self.top(x)
x = self.top_class(x)
x = F.softmax(x, dim=1)
return x
Definieren Sie eine Funktion zum Lesen von Daten. Diese Funktion gibt den Bilddateipfad und die Datenerweiterung als Liste zurück. Geben Sie an, ob Trainingsdaten mit Zug- oder Testdaten gelesen werden sollen.
Sie können label.csv laden, den Bildpfad und die Geschlechtsbezeichnung laden und zur Liste hinzufügen.
# get train data
def data_load(path, hf=False, vf=False, rot=False, train=True):
paths = []
ts = []
df = pd.read_csv(path + 'labels.csv')
if train:
_df = df.query('set == "train"')
else:
_df = df.query('set == "test"')
data_num = len(_df)
pbar = tqdm(total = data_num)
for i, row in _df.iterrows():
name = row['image']
gender = row['gender']
paths.append([path + 'images_256/' + name, False, False])
ts.append(gender)
if hf:
paths.append([path + 'images_256/' + name, True, False])
ts.append(gender)
if vf:
paths.append([path + 'images_256/' + name, False, True])
ts.append(gender)
if hf and vf:
paths.append([path + 'images_256/' + name, True, True])
ts.append(gender)
pbar.update(1)
pbar.close()
print()
return np.array(paths), np.array(ts)
Definieren Sie als Nächstes eine Funktion zum Lesen der Bilddaten
def get_image(paths):
xs = []
for info in paths:
path, hf, vf = info
x = cv2.imread(path)
if channel == 1:
x = cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
x = cv2.resize(x, (img_width, img_height)).astype(np.float32)
x = x / 127.5 - 1
if channel == 3:
x = x[..., ::-1]
if hf:
x = x[:, ::-1]
if vf:
x = x[::-1]
xs.append(x)
xs = np.array(xs, dtype=np.float32)
if channel == 1:
xs = np.expand_dims(xs, axis=-1)
xs = np.transpose(xs, (0,3,1,2))
return xs
# train
def train():
# model
model = EfficientNetB0().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
model.train()
#xs, ts, paths = data_load('drive/My Drive/Colab Notebooks/' + '/Dataset/train/images/', hf=True, vf=True, rot=1)
paths, ts = data_load('drive/My Drive/Colab Notebooks/', hf=True, vf=True, rot=False, train=True)
# training
mb = 32
mbi = 0
data_N = len(paths)
train_ind = np.arange(len(paths))
np.random.seed(0)
np.random.shuffle(train_ind)
loss_fn = torch.nn.CrossEntropyLoss()
# start training
for i in range(5000):
# get minibatch
if mbi + mb > data_N:
mb_ind = copy.copy(train_ind)[mbi:]
np.random.shuffle(train_ind)
mb_ind = np.hstack((mb_ind, train_ind[:(mb - (data_N - mbi))]))
else:
mb_ind = train_ind[mbi: mbi + mb]
mbi += mb
# get X and t
x = torch.tensor(get_image(paths[mb_ind]), dtype=torch.float).to(device)
t = torch.tensor(ts[mb_ind], dtype=torch.long).to(device)
opt.zero_grad()
y = model(x)
#y = F.log_softmax(y, dim=1)
loss = loss_fn(y, t)
loss.backward()
opt.step()
pred = y.argmax(dim=1, keepdim=True)
acc = pred.eq(t.view_as(pred)).sum().item() / mb
if (i + 1) % 10 == 0:
print("iter >>", i+1, ', loss >>', loss.item(), ', accuracy >>', acc)
torch.save(model.state_dict(), 'drive/My Drive/Colab Notebooks/kaokore_gender_efficientnetB0.pt')
# test
def test():
model = EfficientNetB0().to(device)
model.eval()
model.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/kaokore_gender_efficientnetB0.pt'))
paths, ts = data_load('drive/My Drive/Colab Notebooks/', hf=False, vf=False, rot=False, train=False)
accuracy = 0.
for i in range(len(paths)):
x = torch.tensor(get_image([paths[i]]), dtype=torch.float).to(device)
pred = model(x)
pred = pred.detach().cpu().numpy()[0]
if pred.argmax() == ts[i]:
accuracy += 1
Accuracy = accuracy / len(paths)
print('Accuracy = {:.2f} ({} / {})'.format(Accuracy, accuracy, len(paths)))
Mit dem obigen Code habe ich mich für die GPU bei Google Colaboratory entschieden.
Die Lernrate ist jedoch angemessen, sodass das Ergebnis nicht das beste ist.
--Gendergenauigkeit: 94% (Erfolgreich mit 493 von 527 Bildern) --Status ist Genauigkeit: 72% (Erfolgreich in 381 von 527 Bildern)
100% anstreben! !!
Recommended Posts