[PYTHON] I want to automatically find high-quality parts from the videos I shot

Thing you want to do

approach

Relation

Source

input

Output (When actually using it, please combine the following two files with ffmpeg or video editing software)

cut_movie.py


import datetime
import os

import cv2
import librosa
import numpy as np
import scipy


#A function for console output that feels good. It does not have to be.
def pretty_print_sec(sec):
    int_sec = int(sec)

    hour = int_sec // 3600
    left_sec = int_sec - hour * 3600
    minute = left_sec // 60
    left_sec = left_sec - minute * 60

    hour_str = ("00" + str(hour))[-2:]
    min_str = ("00" + str(minute))[-2:]
    sec_str = ("00" + str(left_sec))[-2:]

    return ":".join([hour_str, min_str, sec_str])


#Function used to check if the target number of seconds is the target of clipping
def is_in(tuple_list, val):
    for tup in tuple_list:
        if tup[0] <= val <= tup[1]:
            return True

    return False


#Use this when cropping based on the maximum value
def cut_by_max_rms(rms, percentile):

    is_on = False

    start = 0
    end = 0

    threshold = np.percentile(rms[0], percentile)
    cut_list = []

    #If the previous frame was also selected for cropping, combine the cropping ranges
    for i, val in enumerate(rms[0]):
        if val >= threshold and is_on:
            pass
        elif val >= threshold and not is_on:
            is_on = True
            start = float(i) * 30
        elif val < threshold and is_on:
            end = float(i) * 30
            is_on = False
            cut_list.append((start, end))
        else:
            pass

    if is_on:
        cut_list.append((start, float(i + 1) * 30))

    return cut_list


#Use this for maximal point base
def cut_by_local_max_rms(rms, max_frame_num):

    cut_list = []

    order = 1
    while True:
        pts = list(scipy.signal.argrelmax(rms[0], order=order)[0])

        if len(pts) < max_frame_num:
            break

        order += 1

    for point in pts:
        cut_list.append((point * 30, (point + 1) * 30))

    return cut_list


#Identification of cutout location
#Cut out based on the volume
def decide_cut_frames(cut_type, voice_file):
    #Load audio to identify crops
    #I want to make it as light as possible, so I read it at sample rate 8000
    y_voice, sr_voice = librosa.load(voice_file, sr=8000, mono=True)

    #Check the volume every 30 seconds
    rms = librosa.feature.rms(
        y=y_voice,
        frame_length=sr_voice * 30,
        hop_length=sr_voice * 30,
        center=True,
        pad_mode="reflect",
    )

    if cut_type == "local_max":
        #The volume is maximum(Where the peak is standing)Select up to 20 frames and cut out
        cut_list = cut_by_local_max_rms(rms, 20)
    elif cut_type == "max":
        #Top 5 loudest%Cut out the frame of
        cut_list = cut_by_local_max_rms(rms, 100 - 95)

    return cut_list


#Video crop
def cut_movie(cut_list, movie_file, output_movie_file):

    movie = cv2.VideoCapture(movie_file)
    fps = movie.get(cv2.CAP_PROP_FPS)
    height = movie.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = movie.get(cv2.CAP_PROP_FRAME_WIDTH)
    print(fps, int(width), int(height))

    #Format at output
    #Note that it may change depending on the OS
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")

    #If it already exists, an error will occur, so delete it once.
    if os.path.exists(output_movie_file):
        os.remove(output_movie_file)

    out = cv2.VideoWriter(
        output_movie_file, fourcc, int(fps), (int(width), int(height))
    )

    for start, end in cut_list:
        i = start * fps
        movie.set(0, start * 1000)

        #Read frame by frame from start and break when end is exceeded
        while movie.isOpened():
            sec = float(i / fps)
            if sec % 60 == 0:
                print(pretty_print_sec(sec), datetime.datetime.now(), flush=True)

            ret, frame = movie.read()
            if not ret:
                break

            #Add text for current time
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(
                frame,
                pretty_print_sec(sec),
                (10, int(height * 0.9)),
                font,
                1,
                (0, 255, 0),
                2,
                cv2.LINE_AA,
            )

            if is_in(cut_list, sec):
                out.write(frame)

            i += 1
            if sec > end:
                break

    movie.release()
    out.release()


#Audio crop
def cut_audio(cut_list, voice_file, output_audio_file):

    #Note that sr will be 22050 if None is specified.
    y_full, sr_full = librosa.load(voice_file, sr=None, mono=False)

    output_array = [[], []]
    for start, end in cut_list:
        for i in range(int(start * sr_full), int(end * sr_full) + 1):
            val_0 = y_full[0, i]
            val_1 = y_full[1, i]

            sec = float(i / sr_full)
            if sec % 60 == 0:
                print(pretty_print_sec(sec), datetime.datetime.now(), flush=True)

            if is_in(cut_list, sec):
                output_array[0].append(val_0)
                output_array[1].append(val_1)

            if sec > end:
                break

    #Fall if you don't use asfortranarray
    librosa.output.write_wav(
        output_audio_file, np.asfortranarray(output_array), sr_full
    )


def main():
    audio_file = "full.mp3"  #Extracted video audio
    voice_file = "voice.wav"  #Extracted only voice from video
    movie_file = "full.mp4"

    output_audio_file = "cut.wav"
    output_movie_file = "cut.mp4"

    cut_type = "local_max"  #Maxima base
    # cut_type = "max" #Maximum value base

    cut_list = decide_cut_frames(cut_type, voice_file)
    cut_movie(cut_list, movie_file, output_movie_file)
    cut_audio(cut_list, audio_file, output_audio_file)


if __name__ == "__main__":
    main()

Remarks

Recommended Posts

I want to automatically find high-quality parts from the videos I shot
I want to calculate the allowable downtime from the operating rate
I want to find the shortest route to travel through all points
I want to pin Spyder to the taskbar
I want to output to the console coolly
I want to handle the rhyme part1
I want to handle the rhyme part3
I want to use jar from python
I want to display the progress bar
I want to handle the rhyme part2
I want to handle the rhyme part5
I want to handle the rhyme part4
I want to send a signal only from the sub thread to the main thread
I want to connect to PostgreSQL from various languages
I want to email from Gmail using Python.
[Python] I want to manage 7DaysToDie from Discord! 1/3
I want to perform SageMaker inference from PHP
I want to handle the rhyme part7 (BOW)
I want to make fits from my head
I want to use ceres solver from python
[Python] I want to manage 7DaysToDie from Discord! 2/3
I want to easily find a delicious restaurant
I want to make C ++ code from Python code!
I want to customize the appearance of zabbix
I want to use the activation function Mish
I want to display the progress in Python!
[LINE Messaging API] I want to send a message from the program to everyone's LINE
[Ansible] I want to call my own function from the template module (macro)
I want to detect images of cats from Instagram
I tried to detect the iris from the camera image
I want to grep the execution result of strace
I want to scroll the Django shift table, but ...
[Python] I made a system to introduce "recipes I really want" from the recipe site!
I want to find a popular package on PyPi
I wanted to use the Python library from MATLAB
I want to inherit to the back with python dataclass
I want to fully understand the basics of Bokeh
I just want to find the 95% confidence interval for the difference in population ratios in Python
[Python3] I want to generate harassment names from Japanese!
I want to write in Python! (3) Utilize the mock
I want to send a business start email automatically
I want to handle the rhyme part6 (organize once)
I want to automate ssh using the expect command!
I want to publish the product at the lowest cost
I read the Chainer reference (updated from time to time)
I want to use the R dataset in python
I want to handle the rhyme part8 (finished once)
I want to do Wake On LAN fully automatically
I want to increase the security of ssh connections
I want to change the symbolic link destination of / lib64 from / usr / lib64 to / my-lib64 on CentOS
I want to find a stock that will rise 5 minutes after the Nikkei Stock Average rises
I want to find the intersection of a Bezier curve and a straight line (Bezier Clipping method)
I made a tool to automatically generate a simple ER diagram from the CREATE TABLE statement
[Selenium] I want to display the browser by hitting the driver on the host OS from WSL
Implementation of recommendation system ~ I tried to find the similarity from the outline of the movie using TF-IDF ~
I tried to find the trend of the number of ships in Tokyo Bay from satellite images.
I tried to find out the outline about Big Gorilla
[TensorFlow] I want to master the indexing for Ragged Tensor
I want to use the latest gcc without sudo privileges! !!
I want to save the photos sent by LINE to S3
I tried to find the average of the sequence with TensorFlow