Continued Try to make a capture software with as high accuracy as possible with python (2) https://qiita.com/akaiteto/items/56bfd8d764d42b9ff508 Try to make a capture software with as high accuracy as possible with python (1) https://qiita.com/akaiteto/items/b2119260d732bb189c87
I'm tired of screen / audio capture, so I'll make another part. The following process is a rough process that is currently envisioned.
This time I will make the second part. Because I want to flexibly support any structure of the website The goal is to operate the browser with opencv image detection without analyzing HTML.
I added the reason for retrofitting, but ... I just want to process the image because it's a screenshot of the screen.
1. 1. Transition to URL destination
2. Press the play button
3. 3. Detect area of weather map
4. Record the detected area
Imagine a site that delivers a video of some weather map. The whole process is assumed as above. This time it will probably be 1-3.
OS : windows10 ver: python3.7 web: chrome
Selenium
Let's use Selenium, a library that allows you to operate the browser from commands. https://qiita.com/hanzawak/items/2ab4d2a333d6be6ac760 https://rabbitfoot.xyz/selenium-chrome-profile/
As an aside, I'm using Chrome + pycharm, so When installing the web driver, install it in python in the virtual environment.
#Installation in a virtual environment
cd D:~ Omitted ~\venv\Scripts
bat activate.bat
pip install chromedriver-binary==(The version of Chrome you are using)
Display the weather map site. Also save the captured image of the opened chrome for later steps. If you use the source below, please fill in the (your user name) section. For details on the profile path, see https://rabbitfoot.xyz/selenium-chrome-profile/
from selenium import webdriver
import chromedriver_binary #Important
from selenium.webdriver.chrome.options import Options
import win32gui
import win32ui
import win32con
import numpy as np
import cv2
#Get full screen size
hnd = win32gui.GetDesktopWindow()
x0, y0, x1, y1 = win32gui.GetWindowRect(hnd)
fullscreen_width = x1 - x0
fullscreen_height = y1 - y0
#Browser size
browse_width=300
browse_height=fullscreen_height
#Launch browser
options = Options()
options.add_argument(r"--user-data-dir=C:\Users\(Your username)\AppData\Local\Google\Chrome\User Data")
driver = webdriver.Chrome(chrome_options=options)
driver.get("url")
driver.set_window_size(browse_width,browse_height)
driver.set_window_position(0,0)
#Browser screenshot
windc = win32gui.GetWindowDC(hnd)
srcdc = win32ui.CreateDCFromHandle(windc)
memdc = srcdc.CreateCompatibleDC()
bmp = win32ui.CreateBitmap()
bmp.CreateCompatibleBitmap(srcdc, browse_width, browse_height)
memdc.SelectObject(bmp)
memdc.BitBlt((0, 0), (browse_width, browse_height), srcdc, (0, 0), win32con.SRCCOPY)
bmp.SaveBitmapFile(memdc, 'PointDetect.bmp')
# driver.close()
When opening a site that requires login. I don't want to write my password raw as it is physiologically unpleasant.
** If the site requires login, log in from chrome in advance **
user data directory is already in use, please specify a unique value for --user-data-dir argument, or don't use --user-data-dir
By the way, if you run it with chrome running, the above error will occur. You can take measures, but this time I will not do it because there is no need to start multiple chrome.
Let's try to detect the button by image processing and click it.
As an outline of the process, Get the coordinates of the play button and click on the coordinates in the pyautogui library. The problem is getting the coordinates of the play button. There are the following two proposals for consideration.
1. 1. Detect shapes and get coordinates
2. Find the same part as the image of the play button
1. 1. Detect shapes and get coordinates
Let's do it easily with the opencv function.
DetectTriangle.py
import cv2
import numpy as np
def DetectTriangle(img, inputNm, outputNm):
image_obj = cv2.imread(inputNm)
img = cv2.adaptiveThreshold(img, 255, 1, 1, 11, 2)
cv2.imwrite("PointDetect_threshold" + outputNm, img)
contours, hierarchy = cv2.findContours(img,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
approx = cv2.approxPolyDP(cnt, 0.1 * cv2.arcLength(cnt, True), True)
# approx = cv2.approxPolyDP(cnt, 0.07 * cv2.arcLength(cnt, True), True) #Parameters: Affects accuracy
# approx = cv2.approxPolyDP(cnt, .03 * cv2.arcLength(cnt, True), True) #Parameters: Affects accuracy
# approx = cv2.approxPolyDP(cnt, .009 * cv2.arcLength(cnt, True), True) #Parameters: Affects accuracy
if len(approx) == 3:
print("triangle")
cv2.drawContours(image_obj, [cnt], 0, (0, 0, 255), -1)
elif len(approx) == 4:
print("square")
cv2.drawContours(image_obj, [cnt], 0, (0, 255, 0), -1)
elif len(approx) == 8:
print("circle")
area = cv2.contourArea(cnt)
(cx, cy), radius = cv2.minEnclosingCircle(cnt)
circleArea = radius * radius * np.pi
if circleArea == area:
cv2.drawContours(image_obj, [cnt], 0, (255, 0, 0), -1)
cv2.imwrite(outputNm, image_obj)
inputNm = 'PointDetect2.bmp'
srcImage = cv2.imread(inputNm)
gray = cv2.cvtColor(srcImage, cv2.COLOR_BGR2GRAY)
cv2.imwrite("PointDetect_gray.png ", gray)
kernel = np.ones((4, 4), np.uint8)
dilation = cv2.dilate(gray, kernel, iterations=1)
cv2.imwrite("PointDetect_dilation.png ", dilation)
blur = cv2.GaussianBlur(dilation, (5, 5), 0)
cv2.imwrite("PointDetect_blur.png ", dilation)
DetectTriangle(blur,inputNm,"result_blur.png ") #Blur
DetectTriangle(gray,inputNm,"result_gray.png ") #grayscale
DetectTriangle(dilation,inputNm,"result_dilation.png ") #Inflate (because the play button is small)
The processing you want to do is roughly as follows.
1. 1. Preprocessing
2. Threshold processing
3. 3. Contour extraction
I will try it with a screenshot of google image search.
Grayscale as pre-processing ↓ (I do various other things in the source)
Threshold processing ↓
Contour extraction ↓
Red is a triangle, green is a quadrangle, and blue is a circle. From the coordinates of the outline of the figure detected here, It is a calculation to specify the position of the play button, which is a triangle shape.
・ ・ ・ ・ ・ ・
So, I went to the weather site. The result was ... no good. I tried some blurring, expansion, parameter adjustment, etc., but it didn't work.
The play button has also been detected, but there are too many false positives. In the first place, it is not suitable for sites with many triangular shapes, Another drawback is that it may be necessary to adjust the appropriate parameters for each site.
It would be possible to detect this weather site specifically, I want to make it a format that can be done on various sites, so plan 1 is rejected.
2. Find the same part as the image of the play button
Let's detect it by template matching. The process of detecting if the same small image is contained in another image. In order to perform template matching, it is necessary to tell the image of the play button that is the correct answer.
So, under the above flow, Add a phase called "Get image of play button (manual)". I wanted to do everything automatically, but it can't be helped.
Reference: https://shizenkarasuzon.hatenablog.com/entry/2020/03/23/005440
In terms of image, A small window like the one above will appear, and the user will select the button he wants to click in the square selection window (light blue). Save the image for template matching with the image of ... Manual operation is assumed only for the first time.
So I will try template matching.
http://labs.eecs.tottori-u.ac.jp/sd/Member/oyamada/OpenCV/html/py_tutorials/py_imgproc/py_template_matching/py_template_matching.html
When I ran it according to the openCV sample source, it worked very well. Let's go with the policy of 2
I'm messing with opencv's absdiff (miscellaneous) Performs motion detection on the images before and after pressing the play button, Detects the area of change.
I will summarize the sources so far.
As a premise, the image of the part you want to click, Suppose you already have an image of the play button here. ↓ Cut out image like below
Before running, install the following libraries.
pip install PyAutoGUI
In my environment, the following error occurred and the installation failed.
SyntaxError: (unicode error) 'utf-8' codec can't decode byte 0x93 in position 0: invalid start byte (sitecustomize.py, line 7)
https://qiita.com/hisakichi95/items/41002333efa8f6371d40 I installed an older version of PyMsgBox by referring to.
So, the following source. I don't organize it
Detect.py
from selenium import webdriver
import chromedriver_binary #Important
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import win32gui
import win32ui
import win32con
import numpy as np
import cv2
def DetectMotion(ImgNm1,ImgNm2):
img1 = cv2.imread(ImgNm1, 0)
img2 = cv2.imread(ImgNm2, 0)
img1 = img1.copy().astype("float")
cv2.accumulateWeighted(img2, img1, 0.6)
cv2.accumulateWeighted(img2, img1, 0.6)
frameDelta = cv2.absdiff(img2, cv2.convertScaleAbs(img1))
thresh = cv2.threshold(frameDelta, 3, 255, cv2.THRESH_BINARY)[1]
contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
img2 = cv2.imread(TEMP_AFTER_SCREENSHOT)
top_left_X = 9999999
top_left_Y = 9999999
bum_right_X = 0
bum_right_Y = 0
for i in range(0, len(contours)):
if len(contours[i]) > 0:
# remove small objects
if cv2.contourArea(contours[i]) < 500:
continue
rect = contours[i]
x, y, w, h = cv2.boundingRect(rect)
pos_top = (x, y)
pos_bum = (x + w, y + h)
print(x, y, x + w, y + h)
top_left_X = pos_top[0] if top_left_X > pos_top[0] else top_left_X
top_left_Y = pos_top[1] if top_left_Y > pos_top[1] else top_left_Y
bum_right_X = pos_bum[0] if bum_right_X < pos_bum[0] else bum_right_X
bum_right_Y = pos_bum[1] if bum_right_Y < pos_bum[1] else bum_right_Y
return (top_left_X, top_left_Y), (bum_right_X, bum_right_Y)
def DiffImage(img1,img2):
im_diff = img1.astype(int) - img2.astype(int)
im_diff_abs = np.abs(im_diff)
return im_diff_abs.max()
def DetectBtn(bmp,memdc,CapFIleNm,PatchNm,scrollY):
memdc.BitBlt((0, 0), (browse_width, browse_height), srcdc, (0, 0), win32con.SRCCOPY)
bmp.SaveBitmapFile(memdc, CapFIleNm)
#Get button coordinates
img = cv2.imread(CapFIleNm, 0)
img2 = img.copy()
template = cv2.imread(PatchNm, 0)
w, h = template.shape[::-1]
meth = 'cv2.TM_CCOEFF_NORMED'
img = img2.copy()
method = eval(meth)
res = cv2.matchTemplate(img, template, method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, 255, 2)
range = ((bottom_right[0] - top_left[0]) / 2, (bottom_right[1] - top_left[1]) / 2)
btn_center = (int(top_left[0] + range[0]), int(top_left[1] + range[1]))
print("Button upper left coordinates", top_left)
print("Button upper right coordinates", bottom_right)
print("Button center coordinates", btn_center)
#Cut out the detected part
img1 = img[top_left[1]: bottom_right[1], top_left[0]: bottom_right[0]]
if DiffImage(template,img1) > 180:
#Too different from the button image-> False
cv2.imwrite("Detect_Fail" + str(scrollY) + ".jpg ", img1)
print("btn not exist")
return False,(0,0)
else:
#success-> True
cv2.imwrite("Detect_Success" + str(scrollY) + ".jpg ", img1)
print("btn exist")
return True,btn_center
TEMP_BEFORE_SCREENSHOT = 'PointDetect_before.bmp'
TEMP_AFTER_SCREENSHOT = 'PointDetect_after.bmp'
TEMP_PATCH = 'PointDetect_patch.jpg'
#Get full screen size
hnd = win32gui.GetDesktopWindow()
x0, y0, x1, y1 = win32gui.GetWindowRect(hnd)
fullscreen_width = x1 - x0
fullscreen_height = y1 - y0
#Browser size
# browse_width=fullscreen_width
# browse_height=fullscreen_height
browse_width=1920
browse_height=1080
#Launch browser
options = Options()
# options.add_argument(r"--user-data-dir=C:\Users\Your username\AppData\Local\Google\Chrome\User Data")
driver = webdriver.Chrome(chrome_options=options)
driver.get("https://")
driver.set_window_size(browse_width,browse_height)
driver.set_window_position(0,0)
#Waiting for browser behavior
import time
time.sleep(3)
#Screenshot preparation
windc = win32gui.GetWindowDC(hnd)
srcdc = win32ui.CreateDCFromHandle(windc)
memdc = srcdc.CreateCompatibleDC()
bmp = win32ui.CreateBitmap()
bmp.CreateCompatibleBitmap(srcdc, browse_width, browse_height)
memdc.SelectObject(bmp)
#Scroll until you find the button
Detectflg=False
isScrolButton=False
scrollY = 0
while Detectflg==False:
scrollY += int(fullscreen_height/4)
#Capture capture before and after scrolling
memdc.BitBlt((0, 0), (browse_width, browse_height), srcdc, (0, 0), win32con.SRCCOPY)
bmp.SaveBitmapFile(memdc, TEMP_BEFORE_SCREENSHOT)
driver.execute_script("window.scrollTo(0, "+ str(scrollY) +")")
time.sleep(5)
memdc.BitBlt((0, 0), (browse_width, browse_height), srcdc, (0, 0), win32con.SRCCOPY)
bmp.SaveBitmapFile(memdc, TEMP_AFTER_SCREENSHOT)
img1 = cv2.imread(TEMP_BEFORE_SCREENSHOT, 0)
img2 = cv2.imread(TEMP_AFTER_SCREENSHOT, 0)
diff = DiffImage(img1,img2)
if diff < 100:
#The screen does not change even if you scroll->I failed because I came to the bottom of the scroll
print("scrollbutton")
flg=True,btn_pos
isScrolButton=True
flg,btn_pos = DetectBtn(bmp,memdc,TEMP_AFTER_SCREENSHOT,TEMP_PATCH,scrollY)
Detectflg = flg
#Click the button coordinates
if isScrolButton:
print("Button not found")
exit()
#Regeneration
import pyautogui
pyautogui.click(btn_pos[0],btn_pos[1])
#Save before change
memdc.BitBlt((0, 0), (browse_width, browse_height), srcdc, (0, 0), win32con.SRCCOPY)
bmp.SaveBitmapFile(memdc, TEMP_BEFORE_SCREENSHOT)
#Wait until the screen changes
time.sleep(5)
#Save after change
memdc.BitBlt((0, 0), (browse_width, browse_height), srcdc, (0, 0), win32con.SRCCOPY)
bmp.SaveBitmapFile(memdc, TEMP_AFTER_SCREENSHOT)
#Motion detection
top_left,bottom_right = DetectMotion(TEMP_BEFORE_SCREENSHOT,TEMP_AFTER_SCREENSHOT)
img = cv2.imread(TEMP_AFTER_SCREENSHOT)
img1 = img[top_left[1]: bottom_right[1], top_left[0]: bottom_right[0]]
cv2.imwrite("MotionArea.jpg ", img1)
# driver.close()
Scroll function to find a button and We have added a process to take the difference of the image for false detection when the button is detected.
Next time, I would like to combine it with the recording function I made last time. Well then
Recommended Posts