[PYTHON] "Deep Learning from scratch" self-study memo (No. 19-2) Data Augmentation continued

While reading "Deep Learning from scratch" (written by Yasuki Saito, published by O'Reilly Japan), I will make a note of the sites I referred to. Part 19 ← → Part 20

Last time, I processed the image for Data Augmentation with Pillow, but I would like to process it without using the library.

Flip horizontal

import numpy as np
import math
import matplotlib.pyplot as plt

def flip_img(img):
  return img[:, ::-1, :]

Just make the image np.array and invert the x-axis item as :: -1.

Move

def trans_horiz_img(img, r=0.1):
  h, w, c = img.shape
  imgr = np.zeros_like(img)
  wr = int(w*r)
  if wr>0:
    imgr[:, wr:, :] = img[:, :w-wr, :]
  else:
    imgr[:, :w+wr, :] = img[:, -wr:, :]
  return imgr

def trans_vert_img(img, r=0.1):
  h, w, c = img.shape
  imgr = np.zeros_like(img)
  hr = int(h*r)
  if hr>0:
    imgr[hr:, :, :] = img[:h-hr, :, :]
  else:
    imgr[:h+hr, :, :] = img[-hr:, :, :]
  return imgr

def trans_img(img, rw=0.1, rh=0.1):
  imgr = trans_vert_img(img, rh)
  imgr = trans_horiz_img(imgr, rw)
  return imgr

This also just moved the elements of the array.

rotation

I tried to rotate it using trigonometric functions.

import math

def rotate_img(img, deg=30):
  imgr = np.zeros_like(img)
  rad = - math.radians(deg) #Of the head-Adjusts the y-axis of the coordinate system
  mc=math.cos(rad)
  ms=math.sin(rad)
  h, w, c = img.shape
  h0 = h/2
  w0 = w/2

  for i in range(h):
    y=i-h0
    for j in range(w):
      x=j-w0
      x2 = int(x*mc - y*ms + w0)
      y2 = int(y*mc + x*ms + h0) 
      if(x2>=0 and x2<w and y2>=0 and y2<h):
        imgr[y2, x2, :] = img[i, j, :]

  return imgr

imgR=rotate_img(img, -30)
fig = plt.figure(figsize=(40, 40))
im = fig.add_subplot(1, 8, 1)
im.imshow(imgR, cmap=plt.cm.binary)

A strange pattern was created by rounding when converting the coordinates. If it's good to be able to convert it like that, even with this kind of sloppy way, if the rotation is less than 90 degrees, it will be like that.

def rotate_img(img, deg=30):
  imgr = np.zeros_like(img)
  rad = - math.radians(deg) #Of the head-Adjusts the y-axis of the coordinate system
  mc=math.cos(rad)
  ms=math.sin(rad)
  h, w, c = img.shape
  h0 = h/2
  w0 = w/2

  for i in range(h):
    y=i-h0
    for j in range(w-1):
      x=j-w0
      x2 = int(x*mc - y*ms + w0)
      y2 = int(y*mc + x*ms + h0) 
      if(x2>=0 and x2<w-1 and y2>=0 and y2<h):
        imgr[y2, x2, :] = img[i, j, :]
        imgr[y2, x2+1, :] = img[i,j+1, :] #Ignoring the angle, I just copied the point to the right to the right

  return imgr

As expected, Qiita also has a proper explanation and method. What do you use trigonometric functions for? Image Rotation and Matrix

There was a proper explanation, so I tried to remake it in a way that does not make holes.

def rotate_img(img, deg=30):
  imgr = np.zeros_like(img)
  rad = math.radians(deg) 
  mc = math.cos(rad)
  ms = math.sin(rad)
  h, w, c = img.shape
  h0 = h/2
  w0 = w/2

  # x ,y is the coordinates of the rotated image
  # x0,y0 is converted so that the center of the image is the origin x,y
  for y in range(h):
    y0 = y - h0
    for x in range(w):
      x0 = x - w0
      xx = int(x0*mc - y0*ms + w0)  #xx is the x coordinate before rotation
      yy = int(y0*mc + x0*ms + h0)  #yy is the y coordinate before rotation
      if(xx>=0 and xx<w and yy>=0 and yy<h):
        imgr[y, x, :] = img[yy, xx, :]
 
  return imgr

Scale

First, I created a function to resize the array.

def resize_img(img, wr, hr): #Specify the horizontal and vertical sizes of the image and after resizing
  h, w, c = img.shape
  rw = wr / w
  rh = hr / h
  imgr = np.zeros((hr,wr,3), dtype=int)

  # x ,y is the coordinates of the resized image
  for y in range(hr):
    y0 = int(y / rh)  # y0 ,y1 is the y coordinate before resizing
    dy = y/rh - y0
    y1 = y0 + 1
    if y1>=h:
      y1=y0
    for x in range(wr):
      x0 = int(x / rw)  # x0 ,x1 is the x coordinate before resizing
      dx = x/rw - x0
      x1 = x0 + 1
      if x1>=w:
        x1=x0

      imgr[y,x,:]=img[y0,x0,:]*(1-dx)*(1-dy) \
                + img[y1,x0,:]*(1-dx)*dy \
                + img[y0,x1,:]*dx*(1-dy) \
                + img[y1,x1,:]*dx*dy

  return imgr

The pixels of the array after resizing are determined between the points in the array before resizing, and the values of the points before and after that are linearly interpolated and set. I referred to this site. Enlargement / reduction of images in C language (linear interpolation)

Next, I created a function that cuts out the same size as the original image from the center of the enlarged image.

def cropp_img(img, rw = 1.2, rh = 1.4): #Specify image and magnification
  imgr = np.zeros_like(img)
  h, w, c = img.shape
  wr = int(w * rw)
  hr = int(h * rh)
  imgRS = resize_img(img, wr, hr)

  if rw>1:
    ws = int((wr - w)/2)
    we = ws + w
    rws = 0
    rwe = w 
  else:
    ws = 0
    we = wr
    rws = int((w - wr)/2)
    rwe = rws + wr

  if rh>1:
    hs = int((hr - h)/2)
    he = hs + h
    rhs = 0
    rhe = h
  else:
    hs = 0
    he = hr
    rhs = int((h - hr)/2)
    rhe = rhs + hr

  imgr[rhs:rhe,rws:rwe,:] = imgRS[hs:he,ws:we,:]
  return imgr

Execution result

import numpy as np
import math 
import matplotlib.pyplot as plt

img = dataset['train_img'][1] # np.array
imgC = cropp_img(img, 1.3, 1.5)
imgF = flip_img(img)
imgR = rotate_img(img, -30)
imgH = trans_horiz_img(img,0.2)
imgV = trans_vert_img(img, -0.1)
imgT = trans_img(img, 0.1, 0.1)

fig = plt.figure(figsize=(20, 20))
im = fig.add_subplot(1, 8, 1)
im.imshow(img, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 2)
im.imshow(imgC, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 3)
im.imshow(imgF, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 4)
im.imshow(imgR, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 5)
im.imshow(imgH, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 6)
im.imshow(imgV, cmap=plt.cm.binary)
im = fig.add_subplot(1, 8, 7)
im.imshow(imgT, cmap=plt.cm.binary)

I didn't have to program myself to do what I could do with pillows, but it reminded me of trigonometric functions and helped me learn algorithms. For Kaggle's cat and dog dataset, Part 6-2

Part 19 ← → Part 20

Click here for the table of contents of the memo Unreadable Glossary