While reading "Deep Learning from scratch" (written by Yasuki Saito, published by O'Reilly Japan), I will make a note of the sites I referred to. Part 19 ← → Part 20
Last time, I processed the image for Data Augmentation with Pillow, but I would like to process it without using the library.
import numpy as np
import math
import matplotlib.pyplot as plt
def flip_img(img):
return img[:, ::-1, :]
Just make the image np.array and invert the x-axis item as :: -1.
def trans_horiz_img(img, r=0.1):
h, w, c = img.shape
imgr = np.zeros_like(img)
wr = int(w*r)
if wr>0:
imgr[:, wr:, :] = img[:, :w-wr, :]
else:
imgr[:, :w+wr, :] = img[:, -wr:, :]
return imgr
def trans_vert_img(img, r=0.1):
h, w, c = img.shape
imgr = np.zeros_like(img)
hr = int(h*r)
if hr>0:
imgr[hr:, :, :] = img[:h-hr, :, :]
else:
imgr[:h+hr, :, :] = img[-hr:, :, :]
return imgr
def trans_img(img, rw=0.1, rh=0.1):
imgr = trans_vert_img(img, rh)
imgr = trans_horiz_img(imgr, rw)
return imgr
This also just moved the elements of the array.
I tried to rotate it using trigonometric functions.
import math
def rotate_img(img, deg=30):
imgr = np.zeros_like(img)
rad = - math.radians(deg) #Of the head-Adjusts the y-axis of the coordinate system
mc=math.cos(rad)
ms=math.sin(rad)
h, w, c = img.shape
h0 = h/2
w0 = w/2
for i in range(h):
y=i-h0
for j in range(w):
x=j-w0
x2 = int(x*mc - y*ms + w0)
y2 = int(y*mc + x*ms + h0)
if(x2>=0 and x2<w and y2>=0 and y2<h):
imgr[y2, x2, :] = img[i, j, :]
return imgr
imgR=rotate_img(img, -30)
fig = plt.figure(figsize=(40, 40))
im = fig.add_subplot(1, 8, 1)
im.imshow(imgR, cmap=plt.cm.binary)
A strange pattern was created by rounding when converting the coordinates. If it's good to be able to convert it like that, even with this kind of sloppy way, if the rotation is less than 90 degrees, it will be like that.
def rotate_img(img, deg=30):
imgr = np.zeros_like(img)
rad = - math.radians(deg) #Of the head-Adjusts the y-axis of the coordinate system
mc=math.cos(rad)
ms=math.sin(rad)
h, w, c = img.shape
h0 = h/2
w0 = w/2
for i in range(h):
y=i-h0
for j in range(w-1):
x=j-w0
x2 = int(x*mc - y*ms + w0)
y2 = int(y*mc + x*ms + h0)
if(x2>=0 and x2<w-1 and y2>=0 and y2<h):
imgr[y2, x2, :] = img[i, j, :]
imgr[y2, x2+1, :] = img[i,j+1, :] #Ignoring the angle, I just copied the point to the right to the right
return imgr
As expected, Qiita also has a proper explanation and method. What do you use trigonometric functions for? Image Rotation and Matrix
There was a proper explanation, so I tried to remake it in a way that does not make holes.
def rotate_img(img, deg=30):
imgr = np.zeros_like(img)
rad = math.radians(deg)
mc = math.cos(rad)
ms = math.sin(rad)
h, w, c = img.shape
h0 = h/2
w0 = w/2
# x ,y is the coordinates of the rotated image
# x0,y0 is converted so that the center of the image is the origin x,y
for y in range(h):
y0 = y - h0
for x in range(w):
x0 = x - w0
xx = int(x0*mc - y0*ms + w0) #xx is the x coordinate before rotation
yy = int(y0*mc + x0*ms + h0) #yy is the y coordinate before rotation
if(xx>=0 and xx<w and yy>=0 and yy<h):
imgr[y, x, :] = img[yy, xx, :]
return imgr
First, I created a function to resize the array.
def resize_img(img, wr, hr): #Specify the horizontal and vertical sizes of the image and after resizing
h, w, c = img.shape
rw = wr / w
rh = hr / h
imgr = np.zeros((hr,wr,3), dtype=int)
# x ,y is the coordinates of the resized image
for y in range(hr):
y0 = int(y / rh) # y0 ,y1 is the y coordinate before resizing
dy = y/rh - y0
y1 = y0 + 1
if y1>=h:
y1=y0
for x in range(wr):
x0 = int(x / rw) # x0 ,x1 is the x coordinate before resizing
dx = x/rw - x0
x1 = x0 + 1
if x1>=w:
x1=x0
imgr[y,x,:]=img[y0,x0,:]*(1-dx)*(1-dy) \
+ img[y1,x0,:]*(1-dx)*dy \
+ img[y0,x1,:]*dx*(1-dy) \
+ img[y1,x1,:]*dx*dy
return imgr
The pixels of the array after resizing are determined between the points in the array before resizing, and the values of the points before and after that are linearly interpolated and set. I referred to this site. Enlargement / reduction of images in C language (linear interpolation)
Next, I created a function that cuts out the same size as the original image from the center of the enlarged image.
def cropp_img(img, rw = 1.2, rh = 1.4): #Specify image and magnification
imgr = np.zeros_like(img)
h, w, c = img.shape
wr = int(w * rw)
hr = int(h * rh)
imgRS = resize_img(img, wr, hr)
if rw>1:
ws = int((wr - w)/2)
we = ws + w
rws = 0
rwe = w
else:
ws = 0
we = wr
rws = int((w - wr)/2)
rwe = rws + wr
if rh>1:
hs = int((hr - h)/2)
he = hs + h
rhs = 0
rhe = h
else:
hs = 0
he = hr
rhs = int((h - hr)/2)
rhe = rhs + hr
imgr[rhs:rhe,rws:rwe,:] = imgRS[hs:he,ws:we,:]
return imgr
Execution result
import numpy as np
import math
import matplotlib.pyplot as plt
img = dataset['train_img'][1] # np.array
imgC = cropp_img(img, 1.3, 1.5)
imgF = flip_img(img)
imgR = rotate_img(img, -30)
imgH = trans_horiz_img(img,0.2)
imgV = trans_vert_img(img, -0.1)
imgT = trans_img(img, 0.1, 0.1)
fig = plt.figure(figsize=(20, 20))
im = fig.add_subplot(1, 8, 1)
im.imshow(img, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 2)
im.imshow(imgC, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 3)
im.imshow(imgF, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 4)
im.imshow(imgR, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 5)
im.imshow(imgH, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 6)
im.imshow(imgV, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 7)
im.imshow(imgT, cmap=plt.cm.binary)
I didn't have to program myself to do what I could do with pillows, but it reminded me of trigonometric functions and helped me learn algorithms. For Kaggle's cat and dog dataset, Part 6-2
Click here for the table of contents of the memo Unreadable Glossary
Recommended Posts