Use Dynamic Mode Decomposition (DMD) to separate the background of the video to create a video with only the dynamic mode of the moving object. Dynamic mode decomposition is like a combination of principal component analysis and the Fourier transform. For a detailed explanation of dynamic mode decomposition, please refer to the link in the article I wrote earlier. -https://qiita.com/matsxxx/items/5e4b272de821fb1c11e0
I will briefly introduce the procedure of dynamic mode decomposition. Dynamic mode decomposition creates a dynamic mode by finding the eigenvalues and eigenvectors of a transition matrix from time series data. It can be decomposed into dimensional and temporal features. Dimensional features appear in the eigenvectors, and temporal features appear in complex numbers of eigenvalues.
In dynamic mode decomposition, the eigenvalues and eigenvectors of the transition matrix are implemented so that they can be obtained with a realistic amount of calculation. In this article, it is implemented in Exact DMD.
import scipy.linalg as la
#DMD definition
def dmd(X, Y, truncate=None):#X=X_{1:n-1} Y=X_{2:n}
u2,sig2,vh2 = la.svd(X, False)
r = len(sig2) if truncate is None else truncate
u = u2[:,:r]
sig = np.diag(sig2)[:r,:r]
v = vh2.conj().T[:,:r]
Atil = np.dot(np.dot(np.dot(u.conj().T, Y), v), la.inv(sig))
mu,w = la.eig(Atil)
phi = np.dot(np.dot(np.dot(Y, v), la.inv(sig)), w)#DMD mode
return mu, phi
I used Atrium in Video here. This is a video of a person walking. I am using 120frame to 170frame. Separate the background from the walking person.
import cv2
#Image path
video_path = "./atrium_video.avi"
#Image loading
cap = cv2.VideoCapture(video_path)
#Get image resolution, frame rate, number of frames
wid = cap.get(cv2.CAP_PROP_FRAME_WIDTH)#side
hei = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)#Vertical
fps = cap.get(cv2.CAP_PROP_FPS)#frame rate
count = cap.get(cv2.CAP_PROP_FRAME_COUNT)#Number of frames
dt = 1/fps#Seconds between frames
print(f"wid:{wid}", f" hei:{hei}", f" fps:{fps}", f" count:{count}", f"dt:{dt}")
#Frame to use
start_frame =120
end_frame = 170
#Image resolution 1/Set to 4 (to reduce the amount of calculation)
r = 4
#Frame extraction
cat_frames = []
cap.set(cv2.CAP_PROP_POS_FRAMES,start_frame)
for i in range(end_frame - start_frame):
ret, img = cap.read()
if not ret:
print("no image")
break
buf = cv2.cvtColor(cv2.resize(img,(int(wid/r), int(hei/r))), cv2.COLOR_BGR2GRAY).flatten()#
cat_frames.append(buf)
cat_frames = np.array(cat_frames).T#Image used for DMD
cap.release()
Decomposes video in dynamic mode. The background is that the vibration / amplitude mode has 0 amplitude and 0 frequency. The moving body is something other than the background.
#DMD calculation
X1 = cat_frames[:,:-1]
X2 = cat_frames[:,1:]
mu, phi = dmd(X1,X2)
omega = np.log(mu)/dt#Vibration / amplitude mode
#Judgment of background and moving object
bg = np.abs(omega) < 1e-2 #Background extraction
fg = ~bg #Moving body
phi_bg = phi[:,bg]#Dynamic background mode
phi_fg = phi[:,fg]#Dynamic mode of moving object
omega_bg = omega[bg]#Background vibration / amplitude mode
omega_fg = omega[fg]#Vibration / amplitude mode of moving object
The following formula reconstructs the video in motion mode.
$ \ Phi ^ {fg} $ is the dynamic mode matrix of the moving object, $ \ omega ^ {fg} $ is the vibration / amplitude mode of the moving object, and the pseudo inverse matrix of the dynamic mode matrix of the moving object is $ \ mathbf {from the right. It is a matrix that is the matrix product of the initial video values of b} ^ {fg} $.
#Rebuilding
phi_fg_pinv = la.pinv(phi_fg)#Pseudo inverse matrix. It takes a lot of time. If you don't have enough memory, increase r.
X_fg = np.zeros((len(omega_fg), end_frame - start_frame), dtype='complex')
b = phi_fg_pinv @ cat_frames[:,0]#initial value
for tt in range(end_frame - start_frame):
X_fg[:,tt] = b * np.exp(omega_fg * dt * tt)
X_fg = phi_fg @ X_fg
#For brightness adjustment
lum_max = np.abs(X_fg.real.max())
lum_min = np.abs(X_fg.real.min())
lum_diff = lum_max + lum_min
#Video creation
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter("out_dmd_fg.mp4", fourcc, fps, (int(wid/r), int(hei/r)))
for tt in range(end_frame - start_frame):
a = X_fg[:,tt].real.reshape(int(hei/r), -1)
a = (a + lum_min)/lum_diff * 255
a = a.astype("uint8")
out_img = np.tile(a, (3,1,1)).transpose((1,2,0))#Grayscale[wid, hei, 3]Convert to a matrix of
writer.write(out_img)
writer.release()
You can see that the background has disappeared and the movement is only for people. However, an afterimage can be seen in the movement of people. I think it's because there are only 50 images of the video I used, but I'm not good at breaking down linear movements.
import numpy as np
import scipy.linalg as la
import cv2
#Image path
video_path = "./atrium_video.avi"
#DMD definition
def dmd(X, Y, truncate=None):
u2,sig2,vh2 = la.svd(X, False)
r = len(sig2) if truncate is None else truncate
u = u2[:,:r]
sig = np.diag(sig2)[:r,:r]
v = vh2.conj().T[:,:r]
Atil = np.dot(np.dot(np.dot(u.conj().T, Y), v), la.inv(sig))
mu,w = la.eig(Atil)
phi = np.dot(np.dot(np.dot(Y, v), la.inv(sig)), w)#DMD mode
return mu, phi
#Image loading
cap = cv2.VideoCapture(video_path)
#Get image resolution, frame rate, number of frames
wid = cap.get(cv2.CAP_PROP_FRAME_WIDTH)#side
hei = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)#Vertical
fps = cap.get(cv2.CAP_PROP_FPS)#frame rate
count = cap.get(cv2.CAP_PROP_FRAME_COUNT)#Number of frames
dt = 1/fps#Seconds between frames
print(f"wid:{wid}", f" hei:{hei}", f" fps:{fps}", f" count:{count}", f"dt:{dt}")
#Frame to use
start_frame =120
end_frame = 170
#Image resolution 1/Set to 4 (to reduce the amount of calculation)
r = 4
#Frame extraction
cat_frames = []
cap.set(cv2.CAP_PROP_POS_FRAMES,start_frame)
for i in range(end_frame - start_frame):
ret, img = cap.read()
if not ret:
print("no image")
break
buf = cv2.cvtColor(cv2.resize(img,(int(wid/r), int(hei/r))), cv2.COLOR_BGR2GRAY).flatten()#
cat_frames.append(buf)
cat_frames = np.array(cat_frames).T
cap.release()
#DMD calculation
X1 = cat_frames[:,:-1]
X2 = cat_frames[:,1:]
mu, phi = dmd(X1,X2)
omega = np.log(mu)/dt
#Judgment other than background and moving object
bg = np.abs(omega) < 1e-2 #Background extraction
fg = ~bg #Extraction of moving body
phi_bg = phi[:,bg]#Dynamic background mode
phi_fg = phi[:,fg]#Dynamic mode of moving object
omega_bg = omega[bg]#Background vibration / amplitude mode
omega_fg = omega[fg]#Vibration / amplitude mode of moving object
#Dynamic mode reconstruction
phi_fg_pinv = la.pinv(phi_fg)#Pseudo inverse matrix. If you get a memory error, increase r
X_fg = np.zeros((len(omega_fg), end_frame - start_frame), dtype='complex')
b = phi_fg_pinv @ cat_frames[:,0]#initial value
for tt in range(end_frame - start_frame):
X_fg[:,tt] = b * np.exp(omega_fg * dt * tt)
X_fg = phi_fg @ X_fg
#For brightness adjustment
lum_max = np.abs(X_fg.real.max())
lum_min = np.abs(X_fg.real.min())
lum_diff = lum_max + lum_min
#Video creation
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter("out_dmd_fg.mp4", fourcc, fps, (int(wid/r), int(hei/r)))
for tt in range(end_frame - start_frame):
a = X_fg[:,tt].real.reshape(int(hei/r), -1)
a = (a + lum_min)/lum_diff * 255
a = a.astype("uint8")
out_img = np.tile(a, (3,1,1)).transpose((1,2,0))#Grayscale[wid, hei, 3]Convert to a matrix of
writer.write(out_img)
writer.release()