[PYTHON] G test measures OCR convolution calculation tool

Overview

This is the first post. I challenged if I could somehow autonomate the rumored convolution calculation when I got out with the G test. I don't think this is a calculation that humans do their best.

Internally --PySimpleGUI (I really wanted to use Kivy) --Area specification by mouse drag using OpenCV --Character recognition by Tesseract OCR I'm using, so I hope you can refer to these individually. Since I am using win32api, it does not work on UNIX as it is. Kivy is cooler and I'm used to it, so I wanted to use it, but I tried using PySimpleGUI for studying. But Kivy is modern and cool, so I recommend it.

The code is also below. https://github.com/poohsanforest/ConvCalcOCR

Required modules

How to use

  1. Enter the name of the window you want to read in the upper field and press the button on the right to display the screen in the lower right. Even if you enter the tab name in Chrome, a black screen will appear, so it is better to enter the tab name in Firefox. (Can't access without Chrome Driver?)
  2. When you press the filter button, a pop-up will appear. Select the convolution filter part so that the grid fits.
  3. Use the target button to select the target you want to convolve.
  4. If the number read in each text box is incorrect, correct it. (Misjudged as a percentage)
  5. Press the calculate button to display the convolution calculation result.

Test image (open in Firefox)

Appearance part

I am making a GUI part with PySimpleGUI. The redefine_rectangle and draw_shape functions may be helpful when getting coordinates by dragging the mouse.

import sys
import PySimpleGUI as sg
import numpy as np
import cv2
import win32gui
import win32ui
import win32con
import pyocr
import re
from PIL import Image
from scipy import signal


#Since I am using win32api, it does not work on unix as it is
class ConvCalc():
    def __init__(self, **kwargs):
        self.drawing = False
        self.flag_drawing = False
        self.flag_filter = True
        self.roi_filter = [[0, 0], [50, 50]]
        self.roi_target = [[0, 0], [50, 50]]
        self.roi_temp = [[0, 0], [50, 50]]
        self.image_filter = np.zeros((300, 300, 3), np.uint8)
        self.image_target = np.zeros((300, 300, 3), np.uint8)
        self.image_capture = np.zeros((300, 300, 3), np.uint8)
        self.image_copy = np.zeros((300, 300, 3), np.uint8)
        self.tool = None

    #Start selecting rectangle
    def redefine_rectangle(self, cols, rows, color):
        cv2.namedWindow("set ROI")
        cv2.setMouseCallback("set ROI", self.draw_shape)
        self.flag_drawing = True

        while 1:
            image_rect = rectangle_grid(self.image_copy.copy(),
                                    tuple(self.roi_filter[0] if self.flag_filter else self.roi_target[0]),
                                    tuple(self.roi_filter[1] if self.flag_filter else self.roi_target[1]),
                                    color,
                                    cols, rows,
                                    1)

            cv2.imshow("set ROI", image_rect)
            cv2.waitKey(1)

            #× Button detection
            ret = cv2.getWindowProperty('set ROI', cv2.WND_PROP_ASPECT_RATIO)
            
            #I wonder if I should finish writing or break with the × button and not process the button
            if not self.flag_drawing or ret == -1.0:
                #Split image creation
                divided_image = divide_image(self.image_copy, self.roi_filter if self.flag_filter else self.roi_target, cols, rows, 0.08)
                score_list = []
                for c, divided in enumerate(divided_image):
                    txt = self.tool.image_to_string(
                        Image.fromarray(cv2.cvtColor(divided, cv2.COLOR_BGR2RGB)),
                        lang="eng",
                        # builder=pyocr.builders.TextBuilder(tesseract_layout=6),
                        builder=pyocr.tesseract.DigitBuilder(tesseract_layout=6),
                    )
                    score_list.append(float(fix_num(txt)))

                #Reshape to dimension of rowcol
                score_list_reshaped = fix_textbox(score_list, rows, cols)

                #Reflect the recognized numerical value
                if self.flag_filter:
                    self.image_filter = image_rect
                    self.window['filterarray'].update(score_list_reshaped)
                else:
                    self.image_target = image_rect
                    self.window['targetarray'].update(score_list_reshaped)

                break
        
        #Draw the final range
        self.image_capture = rectangle_grid(self.image_target.copy() if self.flag_filter else self.image_filter.copy(),
                                                tuple(self.roi_filter[0] if self.flag_filter else self.roi_target[0]),
                                                tuple(self.roi_filter[1] if self.flag_filter else self.roi_target[1]),
                                                color,
                                                cols, rows,
                                                1)
        cv2.destroyWindow("set ROI")

    #Draw while being careful not to cause an error when the size is 0
    def draw_shape(self, event, x, y, flag, param):
        if event == cv2.EVENT_LBUTTONDOWN:
            self.drawing = True
            self.roi_temp[0][1] = y
            self.roi_temp[0][0] = x

        elif event == cv2.EVENT_MOUSEMOVE:
            if self.flag_filter: #Is there a smarter way?
                if not (self.roi_filter[0][0] == x or self.roi_filter[0][1] == y) and self.drawing == True:
                        self.roi_filter = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)
            else:
                if not (self.roi_target[0][0] == x or self.roi_target[0][1] == y) and self.drawing == True:
                        self.roi_target = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)

        elif event == cv2.EVENT_LBUTTONUP:
            self.drawing = False
            if self.flag_filter:
                if not (self.roi_filter[0][0] == x or self.roi_filter[0][1] == y):
                    self.roi_filter = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)
            else:
                if not (self.roi_target[0][0] == x or self.roi_target[0][1] == y):
                    self.roi_target = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)

            self.flag_drawing = False

    # https://qiita.com/dario_okazaki/items/656de21cab5c81cabe59
    def main(self):
        #Section 1-Option settings and standard layout
        sg.theme('Dark Blue 3')

        # filter frame
        filter_box = sg.Frame('FILTER', font='Any 15', layout=[
                    [sg.Text('kernelsize_col'), sg.Input("3", size=(3,None), key='kernelsize_0')],
                    [sg.Text('kernelsize_row'), sg.Input("3", size=(3,None), key='kernelsize_1')],
                    [sg.Button('filter', key='filter')],
                    [sg.Image(filename='', key='image_filter')],
                    [sg.Multiline('[0.0, 0.1, 0.2],\n[0.1, 0.2, 0.4],\n[0.0, 0.2, 0.4]', size=(24, 12), key='filterarray')],
                    ])

        target_box = sg.Frame('TARGET', font='Any 15', layout=[
                    [sg.Text('targetsize_col'), sg.Input("5", size=(3,None), key='targetsize_0')],
                    [sg.Text('targetsize_row'), sg.Input("5", size=(3,None), key='targetsize_1')],
                    [sg.Button('target', key='target')],
                    [sg.Image(filename='', key='image_target')],
                    [sg.Multiline('[0.0, 2.0, 8.0, 3.0, 5.0],\n[5.0, 1.0, 7.0, 6.0, 3.0],\n[2.0, 9.0, 3.0, 2.0, 1.0],\n[5.0, 4.0, 1.0, 4.0, 8.0],\n[0.0, 5.0, 2.0, 8.0, 5.0]',size=(24, 12), key='targetarray')],
                    ])

        calculated_box = sg.Frame('CALCULATED', font='Any 15', layout=[
                    [sg.Text('stride'), sg.Input("2", size=(3,None), key='stride')],
                    [sg.Button('calculate', key='calculate')],
                    [sg.Multiline('calculated',size=(24, 12),key='text_calculated')],
                    ])

        layout = [
                # [sg.Text('Type the window name and press the button to capture it.\nIf it couldnt find the one, it will capture a whole area of a primary display.')],
                [sg.Text('Enter the window name and click the button\n If not found, capture the main display as it is')],
                [sg.Input("FireFox", size=(40,None), key='windowname'), sg.Button('windowcapture', key='windowcapture')],
                [sg.Image(filename='', key='image_capture', size=(600,300), )],
                [filter_box, target_box, calculated_box],
                ]

        #Section 2-Window generation
        self.window = sg.Window('ConvCalcOCR', layout)

        # init OCR
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("Will use tool '%s'" % (self.tool.get_name()))

        #Section 3-Event loop
        while True:
            event, values = self.window.read()

            if event is None:
                print('exit')
                break

            elif event == 'windowcapture':
                self.image_capture = cv2.cvtColor(WindowCapture(values['windowname']), cv2.COLOR_BGRA2BGR)
                self.image_copy = self.image_target = self.image_filter = self.image_capture.copy()
                img = scale_box(self.image_capture, 600, 300)
                imgbytes = cv2.imencode('.png', img)[1].tobytes()
                self.window['image_capture'].update(data=imgbytes)

            elif event == 'filter':
                self.flag_filter = True #Is there a smarter way?
                self.redefine_rectangle(int(values['kernelsize_0']), int(values['kernelsize_1']), (128, 128, 0))
                img = scale_box(self.image_capture, 600, 300)
                imgbytes = cv2.imencode('.png', img)[1].tobytes()
                self.window['image_capture'].update(data=imgbytes)

            elif event == 'target':
                self.flag_filter = False
                self.redefine_rectangle(int(values['targetsize_0']), int(values['targetsize_1']), (0, 0, 255))
                img = scale_box(self.image_capture, 600, 300)
                imgbytes = cv2.imencode('.png', img)[1].tobytes()
                self.window['image_capture'].update(data=imgbytes)

            elif event == 'calculate':
                calculated = strideConv(values['targetarray'], values['filterarray'], int(values['stride']))
                calculated = np.round(calculated, decimals=2) #Because there may be many decimal places due to calculation error
                calculated_shape = np.array(calculated).shape
                
                self.window['text_calculated'].update(fix_textbox(calculated.tolist(), calculated_shape[0], calculated_shape[1]))

        #Section 4-Destroy and close windows
        self.window.close()

Execution result

Grid type rectangle drawing

Draw with a grid to make it easier to match the range dragged with the mouse

#Rectangle with grid
def rectangle_grid(img, pt1, pt2, color, cols, rows, thickness=1, lineType=cv2.LINE_8, shift=None):
    space_x = abs(pt2[0] - pt1[0]) / cols
    space_y = abs(pt2[1] - pt1[1]) / rows

    for col in range(cols+1):
        img = cv2.line(img, (int(pt1[0]+col*space_x), pt1[1]), (int(pt1[0]+col*space_x), pt2[1]), color, thickness, lineType)
    for row in range(rows+1):
        img = cv2.line(img, (pt1[0], int(pt1[1]+row*space_y)), (pt2[0], int(pt1[1]+row*space_y)), color, thickness, lineType)

    return img

Correction of range specification by dragging the mouse

When specifying the range by dragging the mouse, make sure that the image size becomes 0 at the beginning of drawing and the error that occurs and the addition and subtraction at the time of coordinate calculation do not become negative (I did not make a note of the page that I referred to. I'm sorry.)

#Negative coordinate support
def fix_coordinate(iy, ix, y, x):
    x_sorted = sorted([ix, x])
    y_sorted = sorted([iy, y])
    # relu
    return [[np.maximum(0, x_sorted[0]), np.maximum(0, y_sorted[0])],
            [np.maximum(0, x_sorted[1]), np.maximum(0, y_sorted[1])]]

Image split

Divide the image into any number, and then remove the border of the table in the problem statement from the image by cutting the circumference by the eroderatio ratio.

#Image split
def divide_image(image, roi, cols, rows, eroderatio):
    #Crop
    cropped = image[roi[0][1]:roi[1][1], roi[0][0]:roi[1][0]]

    # https://pystyle.info/opencv-split-and-concat-images/
    chunks = []
    for row_cropped in np.array_split(cropped, rows, axis=0):
        for chunk in np.array_split(row_cropped, cols, axis=1):
            #Remove borders that tend to be judged as characters
            erode = int(eroderatio*min(chunk.shape[:2]))
            chunk = chunk[erode:chunk.shape[0]-erode, erode:chunk.shape[1]-erode]
            chunks.append(chunk)

    return chunks

Correction of read numbers

In many cases, "." And "-" cannot be read normally, so ignore them once and format them assuming that the integer part has only one digit in the G test.

#Formatting assuming that the integer part is one digit
def fix_num(num):
    fixed_num = re.sub("\\D", "", num) #Delete other than numbers
    if fixed_num == '' or fixed_num == '-': # "-"Was treated as a number
        return 0.0
    else:
        fixed_num = fixed_num[0] + '.' + fixed_num[1:]
        return fixed_num

Screen display of read numerical values

Since there is no line break in the list of numerical values read by OCR and the display is broken as it is, disassemble and insert the line feed code.

#Shaping for the display
def fix_textbox(score_list, rows, cols):
    score_array = np.reshape(np.array(score_list), (rows, cols))
    score_list_reshaped= ["{}".format(l) for l in score_array.tolist()]
    return ',\n'.join(score_list_reshaped)

Convolution calculation

The numerical value read by OCR is calculated by scipy after returning it because the parentheses of List are deleted for screen display.

#Convolution calculation
# https://stackoverflow.com/questions/48097941/strided-convolution-of-2d-in-numpy/48098534
def strideConv(v1, v2, s):
    arr = np.array(eval('[' + v1 + ']'))
    arr2 = np.array(eval('[' + v2 + ']'))

    return signal.convolve2d(arr, arr2[::-1, ::-1], mode='valid')[::s, ::s]

Whole code

import sys
import PySimpleGUI as sg
import numpy as np
import cv2
import win32gui
import win32ui
import win32con
import pyocr
import re
from PIL import Image
from scipy import signal


#Since I am using win32api, it does not work on unix as it is
class ConvCalc():
    def __init__(self, **kwargs):
        self.drawing = False
        self.flag_drawing = False
        self.flag_filter = True
        self.roi_filter = [[0, 0], [50, 50]]
        self.roi_target = [[0, 0], [50, 50]]
        self.roi_temp = [[0, 0], [50, 50]]
        self.image_filter = np.zeros((300, 300, 3), np.uint8)
        self.image_target = np.zeros((300, 300, 3), np.uint8)
        self.image_capture = np.zeros((300, 300, 3), np.uint8)
        self.image_copy = np.zeros((300, 300, 3), np.uint8)
        self.tool = None

    #Start selecting rectangle
    def redefine_rectangle(self, cols, rows, color):
        cv2.namedWindow("set ROI")
        cv2.setMouseCallback("set ROI", self.draw_shape)
        self.flag_drawing = True

        while 1:
            image_rect = rectangle_grid(self.image_copy.copy(),
                                    tuple(self.roi_filter[0] if self.flag_filter else self.roi_target[0]),
                                    tuple(self.roi_filter[1] if self.flag_filter else self.roi_target[1]),
                                    color,
                                    cols, rows,
                                    1)

            cv2.imshow("set ROI", image_rect)
            cv2.waitKey(1)

            #× Button detection
            ret = cv2.getWindowProperty('set ROI', cv2.WND_PROP_ASPECT_RATIO)
            
            #I wonder if I should finish writing or break with the × button and not process the button
            if not self.flag_drawing or ret == -1.0:
                #Split image creation
                divided_image = divide_image(self.image_copy, self.roi_filter if self.flag_filter else self.roi_target, cols, rows, 0.08)
                score_list = []
                for c, divided in enumerate(divided_image):
                    txt = self.tool.image_to_string(
                        Image.fromarray(cv2.cvtColor(divided, cv2.COLOR_BGR2RGB)),
                        lang="eng",
                        # builder=pyocr.builders.TextBuilder(tesseract_layout=6),
                        builder=pyocr.tesseract.DigitBuilder(tesseract_layout=6),
                    )
                    score_list.append(float(fix_num(txt)))

                #Reshape to dimension of rowcol
                score_list_reshaped = fix_textbox(score_list, rows, cols)

                #Reflect the recognized numerical value
                if self.flag_filter:
                    self.image_filter = image_rect
                    self.window['filterarray'].update(score_list_reshaped)
                else:
                    self.image_target = image_rect
                    self.window['targetarray'].update(score_list_reshaped)

                break
        
        #Draw the final range
        self.image_capture = rectangle_grid(self.image_target.copy() if self.flag_filter else self.image_filter.copy(),
                                                tuple(self.roi_filter[0] if self.flag_filter else self.roi_target[0]),
                                                tuple(self.roi_filter[1] if self.flag_filter else self.roi_target[1]),
                                                color,
                                                cols, rows,
                                                1)
        cv2.destroyWindow("set ROI")

    #Draw while being careful not to cause an error when the size is 0
    def draw_shape(self, event, x, y, flag, param):
        if event == cv2.EVENT_LBUTTONDOWN:
            self.drawing = True
            self.roi_temp[0][1] = y
            self.roi_temp[0][0] = x

        elif event == cv2.EVENT_MOUSEMOVE:
            if self.flag_filter: #Is there a smarter way?
                if not (self.roi_filter[0][0] == x or self.roi_filter[0][1] == y) and self.drawing == True:
                        self.roi_filter = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)
            else:
                if not (self.roi_target[0][0] == x or self.roi_target[0][1] == y) and self.drawing == True:
                        self.roi_target = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)

        elif event == cv2.EVENT_LBUTTONUP:
            self.drawing = False
            if self.flag_filter:
                if not (self.roi_filter[0][0] == x or self.roi_filter[0][1] == y):
                    self.roi_filter = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)
            else:
                if not (self.roi_target[0][0] == x or self.roi_target[0][1] == y):
                    self.roi_target = fix_coordinate(self.roi_temp[0][1], self.roi_temp[0][0], y, x)

            self.flag_drawing = False

    # https://qiita.com/dario_okazaki/items/656de21cab5c81cabe59
    def main(self):
        #Section 1-Option settings and standard layout
        sg.theme('Dark Blue 3')

        # filter frame
        filter_box = sg.Frame('FILTER', font='Any 15', layout=[
                    [sg.Text('kernelsize_col'), sg.Input("3", size=(3,None), key='kernelsize_0')],
                    [sg.Text('kernelsize_row'), sg.Input("3", size=(3,None), key='kernelsize_1')],
                    [sg.Button('filter', key='filter')],
                    [sg.Image(filename='', key='image_filter')],
                    [sg.Multiline('[0.0, 0.1, 0.2],\n[0.1, 0.2, 0.4],\n[0.0, 0.2, 0.4]', size=(24, 12), key='filterarray')],
                    ])

        target_box = sg.Frame('TARGET', font='Any 15', layout=[
                    [sg.Text('targetsize_col'), sg.Input("5", size=(3,None), key='targetsize_0')],
                    [sg.Text('targetsize_row'), sg.Input("5", size=(3,None), key='targetsize_1')],
                    [sg.Button('target', key='target')],
                    [sg.Image(filename='', key='image_target')],
                    [sg.Multiline('[0.0, 2.0, 8.0, 3.0, 5.0],\n[5.0, 1.0, 7.0, 6.0, 3.0],\n[2.0, 9.0, 3.0, 2.0, 1.0],\n[5.0, 4.0, 1.0, 4.0, 8.0],\n[0.0, 5.0, 2.0, 8.0, 5.0]',size=(24, 12), key='targetarray')],
                    ])

        calculated_box = sg.Frame('CALCULATED', font='Any 15', layout=[
                    [sg.Text('stride'), sg.Input("2", size=(3,None), key='stride')],
                    [sg.Button('calculate', key='calculate')],
                    [sg.Multiline('calculated',size=(24, 12),key='text_calculated')],
                    ])

        layout = [
                # [sg.Text('Type the window name and press the button to capture it.\nIf it couldnt find the one, it will capture a whole area of a primary display.')],
                [sg.Text('Enter the window name and click the button\n If not found, capture the main display as it is')],
                [sg.Input("FireFox", size=(40,None), key='windowname'), sg.Button('windowcapture', key='windowcapture')],
                [sg.Image(filename='', key='image_capture', size=(600,300), )],
                [filter_box, target_box, calculated_box],
                ]

        #Section 2-Window generation
        self.window = sg.Window('ConvCalcOCR', layout)

        # init OCR
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("Will use tool '%s'" % (self.tool.get_name()))

        #Section 3-Event loop
        while True:
            event, values = self.window.read()

            if event is None:
                print('exit')
                break

            elif event == 'windowcapture':
                self.image_capture = cv2.cvtColor(WindowCapture(values['windowname']), cv2.COLOR_BGRA2BGR)
                self.image_copy = self.image_target = self.image_filter = self.image_capture.copy()
                img = scale_box(self.image_capture, 600, 300)
                imgbytes = cv2.imencode('.png', img)[1].tobytes()
                self.window['image_capture'].update(data=imgbytes)

            elif event == 'filter':
                self.flag_filter = True #Is there a smarter way?
                self.redefine_rectangle(int(values['kernelsize_0']), int(values['kernelsize_1']), (128, 128, 0))
                img = scale_box(self.image_capture, 600, 300)
                imgbytes = cv2.imencode('.png', img)[1].tobytes()
                self.window['image_capture'].update(data=imgbytes)

            elif event == 'target':
                self.flag_filter = False
                self.redefine_rectangle(int(values['targetsize_0']), int(values['targetsize_1']), (0, 0, 255))
                img = scale_box(self.image_capture, 600, 300)
                imgbytes = cv2.imencode('.png', img)[1].tobytes()
                self.window['image_capture'].update(data=imgbytes)

            elif event == 'calculate':
                calculated = strideConv(values['targetarray'], values['filterarray'], int(values['stride']))
                calculated = np.round(calculated, decimals=2) #Because there may be many decimal places due to calculation error
                calculated_shape = np.array(calculated).shape
                
                self.window['text_calculated'].update(fix_textbox(calculated.tolist(), calculated_shape[0], calculated_shape[1]))

        #Section 4-Destroy and close windows
        self.window.close()


#Rectangle with grid
def rectangle_grid(img, pt1, pt2, color, cols, rows, thickness=1, lineType=cv2.LINE_8, shift=None):
    space_x = abs(pt2[0] - pt1[0]) / cols
    space_y = abs(pt2[1] - pt1[1]) / rows

    for col in range(cols+1):
        img = cv2.line(img, (int(pt1[0]+col*space_x), pt1[1]), (int(pt1[0]+col*space_x), pt2[1]), color, thickness, lineType)
    for row in range(rows+1):
        img = cv2.line(img, (pt1[0], int(pt1[1]+row*space_y)), (pt2[0], int(pt1[1]+row*space_y)), color, thickness, lineType)

    return img


#Image split
def divide_image(image, roi, cols, rows, eroderatio):
    #Crop
    cropped = image[roi[0][1]:roi[1][1], roi[0][0]:roi[1][0]]

    # https://pystyle.info/opencv-split-and-concat-images/
    chunks = []
    for row_cropped in np.array_split(cropped, rows, axis=0):
        for chunk in np.array_split(row_cropped, cols, axis=1):
            #Remove borders that tend to be judged as characters
            erode = int(eroderatio*min(chunk.shape[:2]))
            chunk = chunk[erode:chunk.shape[0]-erode, erode:chunk.shape[1]-erode]
            chunks.append(chunk)

    return chunks


#Negative coordinate support
def fix_coordinate(iy, ix, y, x):
    x_sorted = sorted([ix, x])
    y_sorted = sorted([iy, y])
    # relu
    return [[np.maximum(0, x_sorted[0]), np.maximum(0, y_sorted[0])],
            [np.maximum(0, x_sorted[1]), np.maximum(0, y_sorted[1])]]


#Formatting assuming that the integer part is one digit
def fix_num(num):
    fixed_num = re.sub("\\D", "", num) #Delete other than numbers
    if fixed_num == '' or fixed_num == '-': # "-"Was treated as a number
        return 0.0
    else:
        fixed_num = fixed_num[0] + '.' + fixed_num[1:]
        return fixed_num


#Shaping for the display
def fix_textbox(score_list, rows, cols):
    score_array = np.reshape(np.array(score_list), (rows, cols))
    score_list_reshaped= ["{}".format(l) for l in score_array.tolist()]
    return ',\n'.join(score_list_reshaped)


#Image resizing
# https://pystyle.info/opencv-resize/#outline__3_5
def scale_box(img, width, height):
    """Fix the aspect ratio and resize it so that it fits in the specified size.
    """
    h, w = img.shape[:2]
    aspect = w / h
    if width / height >= aspect:
        nh = height
        nw = round(nh * aspect)
    else:
        nw = width
        nh = round(nw / aspect)

    return cv2.resize(img, dsize=(nw, nh))


#Convolution calculation
# https://stackoverflow.com/questions/48097941/strided-convolution-of-2d-in-numpy/48098534
def strideConv(v1, v2, s):
    arr = np.array(eval('[' + v1 + ']'))
    arr2 = np.array(eval('[' + v2 + ']'))

    return signal.convolve2d(arr, arr2[::-1, ::-1], mode='valid')[::s, ::s]


#Window capture
# https://qiita.com/danupo/items/e196e0e07e704796cd42
def WindowCapture(window_name: str, bgr2rgb: bool = False):
    #Find the name of the currently active window
    process_list = []

    def callback(handle, _):
        process_list.append(win32gui.GetWindowText(handle))

    win32gui.EnumWindows(callback, None)

    #Find the target window name
    for process_name in process_list:
        if window_name in process_name:
            hnd = win32gui.FindWindow(None, process_name)
            break
    else:
        #Get the entire screen if not found
        hnd = win32gui.GetDesktopWindow()

    #Get window size
    x0, y0, x1, y1 = win32gui.GetWindowRect(hnd)
    width = x1 - x0
    height = y1 - y0
    #Get device context for a window
    windc = win32gui.GetWindowDC(hnd)
    srcdc = win32ui.CreateDCFromHandle(windc)
    memdc = srcdc.CreateCompatibleDC()
    #Copy pixel information from device context,bmp
    bmp = win32ui.CreateBitmap()
    bmp.CreateCompatibleBitmap(srcdc, width, height)
    memdc.SelectObject(bmp)
    memdc.BitBlt((0, 0), (width, height), srcdc, (0, 0), win32con.SRCCOPY)

    #Export bmp
    if bgr2rgb is True: 
        img = np.frombuffer(bmp.GetBitmapBits(True), np.uint8).reshape(height, width, 4)
        img = cv2.cvtColor(img, cv2.COLOR_bgr2rgb)
    else:
        img = np.fromstring(bmp.GetBitmapBits(True), np.uint8).reshape(height, width, 4)

    #Clean up
    # srcdc.DeleteDC()
    memdc.DeleteDC()
    # win32gui.ReleaseDC(hnd, windc)
    win32gui.DeleteObject(bmp.GetHandle())

    return img


if __name__ == '__main__':
    ConvCalc().main()

reference

Finally

――I think there are many places that cannot be reached, so please feel free to point out. ――How many convolution calculations will you ask in the G test? ――I think I should have studied the time I was making this. ――Next, I would like to make an article about Kivy.

Postscript

In the 2020.11.7 exam, I didn't get a single convolution calculation question: angry:

Recommended Posts

G test measures OCR convolution calculation tool