[PYTHON] Tool for creating training data for object detection in OpenCV

In order to make your own object detection with OpenCV, it is necessary to cut out a large amount of images as training data. So I created two tools.

screenshot.png

Cut out the image with a simple GUI and generate a file to be passed to opencv_createsamples.exe. The code is below. Since it was made by rush work, there may be many bugs.

how to use

Preparation

The directory structure is as follows.

Appropriate directory/
  ├── images/
  |├── Image file 1.png
  |├── Image file 2.jpg
  |      |     ︙
  |└── Image file n.bmp
  ├── clipper.py
  ├── make_negative.py
  ├── opencv_createsamples.exe
  ├── opencv_traincascade.exe
└── (OpenCV DLLs)

How to use the image cropping support tool

--Left-click and drag to select the range of objects you want to detect --Right click to delete the last selected range --Switch images with the Maul wheel --Exit with ESC key or "Q" key

When finished, a file pos.dat that can be entered in opencv_createsamples.exe will be generated. The progress is saved in a file, so you can resume work when you are finished.

How to use the negative example automatic creation tool

Just use the image cropping support tool and then run make_negative.py. This will generate a negative example image in the directory negatives and a list of negative examples in bg.dat.

How to use opencv_createsamples.exe

For the time being, just execute the command below.

opencv_createsamples.exe -info pos.dat -vec pos.vec

How to use opencv_traincascade.exe

opencv_traincascade.exe -data output directory-vec pos.vec -bg bg.dat

code

Image cropping support tool

clipper.py


import cv2
import glob
import lzma
import os
import pickle


file_dir = './images'
state_file = './data'
output_file = './pos.dat'
display_size = 768
window_name = 'image_clip'

state = {}

mouse_position = [0, 0]
mouse_wheel    = 0
crop_origin    = None
crop_end       = None
selecting      = False
remove         = False

def load_state():
    global state

    if os.path.exists(state_file):
        with lzma.open(state_file, 'rb') as f:
            state = pickle.load(f)

def save_state():
    with lzma.open(state_file, 'wb') as f:
        pickle.dump(state, f)

def output():
    with open(output_file, 'w') as f:
        for file_name, rects in state.items():
            if len(rects) == 0:
                continue
            values  = [os.path.abspath(file_name), str(len(rects))]
            values += sum([[str(int(r)) for r in rect] for rect in rects], [])
            f.write(' '.join(values) + '\n')

def mouse_callback(event, x, y, flags, param):
    global mouse_position
    global mouse_wheel
    global crop_origin
    global selecting
    global crop_end
    global remove
    
    mouse_position = (x, y)

    if event == cv2.EVENT_LBUTTONDOWN:
        crop_origin = mouse_position
        selecting = True
    if event == cv2.EVENT_LBUTTONUP:
        crop_end = mouse_position
        selecting = False
    if event == cv2.EVENT_RBUTTONDOWN:
        remove = True
    if event == cv2.EVENT_MOUSEWHEEL:
        mouse_wheel = flags


def main():
    global state
    global mouse_wheel
    global crop_origin
    global crop_end
    global remove

    os.makedirs(file_dir, exist_ok=True)
    image_files = glob.glob(os.path.join(file_dir, '*'))

    if len(image_files) == 0:
        print('Please put the image in images')
        exit()

    load_state()

    cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
    cv2.setMouseCallback(window_name, mouse_callback)

    image_counter = 0
    for i in range(len(image_files)):
        image_counter = i
        if image_files[image_counter] not in state.keys():
            break

    while True:
        if image_counter < 0:
            image_counter = image_counter + len(image_files)
        if image_counter >= len(image_files):
            image_counter = image_counter - len(image_files)

        save_state()

        image_file = image_files[image_counter]
        image = cv2.imread(image_file)
        scale = display_size / max(image.shape[0], image.shape[1])

        resized_image = cv2.resize(image, 
                                   dsize=None,
                                   fx=scale,
                                   fy=scale,
                                   interpolation=cv2.INTER_AREA)

        if image_file not in state:
            state[image_file] = []

        while True:
            display_image = resized_image.copy()

            for rect in state[image_file]:
                left_top = (int(rect[0] * scale), int(rect[1] * scale))
                right_bottom = (int((rect[0] + rect[2]) * scale), int((rect[1] + rect[3]) * scale))
                display_image = cv2.rectangle(display_image,
                                              left_top,
                                              right_bottom,
                                              (0, 0, 255),
                                              2)

            display_image = cv2.line(display_image,
                                     (mouse_position[0], 0),
                                     (mouse_position[0], display_image.shape[0]),
                                     (255, 0, 0),
                                     2)

            display_image = cv2.line(display_image,
                                     (0, mouse_position[1]),
                                     (display_image.shape[1], mouse_position[1]),
                                     (255, 0, 0),
                                     2)

            if selecting:
                display_image = cv2.rectangle(display_image,
                                              crop_origin,
                                              mouse_position,
                                              (0, 128, 255),
                                              2)

            cv2.imshow(window_name, display_image)

            key = cv2.waitKey(10) & 0xFF

            if crop_origin is not None and crop_end is not None:
                rect_x = min(mouse_position[0], crop_origin[0])
                rect_w = max(mouse_position[0], crop_origin[0]) - rect_x
                rect_y = min(mouse_position[1], crop_origin[1])
                rect_h = max(mouse_position[1], crop_origin[1]) - rect_y
                new_rect = [rect_x / scale, rect_y / scale, rect_w / scale, rect_h / scale]
                state[image_file].append(new_rect)

                crop_origin = None
                crop_end = None

            if remove:
                if len(state[image_file]) > 0:
                    state[image_file].pop(-1)
                remove = False

            if mouse_wheel != 0:
                image_counter += 1 if mouse_wheel > 0 else -1
                mouse_wheel = 0
                break

            if key == ord('q') or key == 27:
                return
                

if __name__ == '__main__':
    main()
    cv2.destroyAllWindows()
    save_state()
    output()

Negative example automatic creation tool

make_negative.py


import cv2
import glob
import lzma
import os
import pickle
import random


file_dir = './images'
output_dir = './negatives/'
output_list_file = './bg.dat'
state_file = './data'

def sample_start_point(width, height, positive_rects):
    for i in range(100):
        start_point = [random.randrange(width), random.randrange(height)]

        for rect in positive_rects:
            rect_left   = rect[0]
            rect_right  = rect[0] + rect[2]
            rect_top    = rect[1]
            rect_bottom = rect[1] + rect[3]

            if ((rect_left <= start_point[0] and rect_right  >= start_point[0]) and
                (rect_top  <= start_point[1] and rect_bottom >= start_point[1])):
                break
        else:
            return start_point
    
    return None

if __name__ == '__main__':
    if not os.path.exists(state_file):
        exit()

    with lzma.open(state_file, 'rb') as f:
        state = pickle.load(f)

    image_counter = 0
    
    for file_name, positive_rects in state.items():
        if len(positive_rects) == 0:
            continue

        image = cv2.imread(file_name)
        width = image.shape[1]
        height = image.shape[0]

        negative_rects = []

        for i in range(1000):
            start_point = sample_start_point(width, height, positive_rects)
            if start_point is None:
                continue
            
            negative_rect = [start_point[0], start_point[1], start_point[0], start_point[1]]

            min_x = 0
            max_x = width
            min_y = 0
            max_y = height

            directions = random.sample(['left', 'right', 'up', 'down'], 4)

            for direction in directions:
                for positive_rect in positive_rects:
                    positive_rect_left   = positive_rect[0]
                    positive_rect_right  = positive_rect[0] + positive_rect[2]
                    positive_rect_top    = positive_rect[1]
                    positive_rect_bottom = positive_rect[1] + positive_rect[3]

                    if not (negative_rect[1] > positive_rect_bottom or
                            negative_rect[3] < positive_rect_top):
                        if direction == 'left':
                            if negative_rect[0] > positive_rect_right:
                                min_x = max(min_x, positive_rect_right)
                        if direction == 'right':
                            if negative_rect[2] < positive_rect_left:
                                max_x = min(max_x, positive_rect_left)

                    if not (negative_rect[0] > positive_rect_right or
                            negative_rect[2] < positive_rect_left):
                        if direction == 'up':
                            if negative_rect[1] > positive_rect_bottom:
                                min_y = max(min_y, positive_rect_bottom)
                        if direction == 'down':
                            if negative_rect[3] < positive_rect_top:
                                max_y = min(max_y, positive_rect_top)
                        
                if direction == 'left':
                    negative_rect[0] = min_x
                if direction == 'right':
                    negative_rect[2] = max_x
                if direction == 'up':
                    negative_rect[1] = min_y
                if direction == 'down':
                    negative_rect[3] = max_y

            if negative_rect[0] == negative_rect[2] or negative_rect[1] == negative_rect[3]:
                continue

            negative_rects.append(tuple([int(x) for x in negative_rect]))
        
        negative_rects = set(negative_rects)
        
        for negative_rect in negative_rects:
            trimed_image = image[negative_rect[1]:negative_rect[3], negative_rect[0]:negative_rect[2], :]
            os.makedirs(output_dir, exist_ok=True)

            extention = os.path.splitext(file_name)[1]
            output_file_path = os.path.join(output_dir, '{}{}'.format(image_counter, extention))
            cv2.imwrite(output_file_path, trimed_image)
            image_counter += 1

    image_files = glob.glob(os.path.join(output_dir, '*'))
    with open(output_list_file, 'w') as f:
        for image_file in image_files:
            f.write('{}\n'.format(os.path.abspath(image_file)))

Recommended Posts

Tool for creating training data for object detection in OpenCV
Creating training data
Make OpenCV object detection rotation invariant
Cloud Pak for Data object operation example in Python (WML client, project_lib)
Calculation of mean IoU in object detection
Python visualization tool for data analysis work
Windows → linux Tips for bringing in data
TFRecord file creation memorandum for object detection
Time series data anomaly detection for beginners
Memo for creating a text formatting tool
Object detection (Single Shot MultiBox Detector) XML file creation GUI for image data
Template for creating command line applications in Python
A tool for creating symbolic links on Windows
Creating learning data for face image dataset sorting (# 1)
I tried object detection using Python and OpenCV
Display candlesticks for FX (forex) data in Python
Let's reproduce the math teaching tool "Jamaica" ❗️ vol.02 "Notes for creating functions in Python"