[PYTHON] Acquire camera position and orientation with ArUco marker

Introduction

――I felt that the ArUco marker was useful for estimating the self-position of the camera, but I couldn't find a good sample to look for, so I made a note. -(Please tell me if there is a better way to write it ...) --Code, materials, camera parameters, etc. are placed here ⇨ https://github.com/naruya/aruco

environment

Material

shisa.gif

output

sample1 (1).gifsample2(1).gif

theorical

――This person's article was easy to understand - https://mem-archive.com/2018/10/28/post-962/

--Camera external parameters ($ R $ and $ T $) satisfy: --c: Camera (** image **) coordinate system --w: World coordinate system

\begin{bmatrix}
X_c \\
Y_c \\
Z_c
\end{bmatrix}
= R
\begin{bmatrix}
X_w \\
Y_w \\
Z_w
\end{bmatrix}
+ T

--To find the coordinates of the camera in the world coordinate system, use $ R ^ {-1} = R ^ \ top $

\begin{bmatrix}
X_w \\
Y_w \\
Z_w
\end{bmatrix}
=
R^\top
\begin{pmatrix}
\begin{bmatrix}
X_c \\
Y_c \\
Z_c
\end{bmatrix}
- T
\end{pmatrix}

--Therefore, the camera position $ O_w $ (origin of camera coordinates) is

O_w = 
\begin{bmatrix}
O^x_w \\
O^y_w \\
O^z_w
\end{bmatrix}
=
R^\top
\begin{pmatrix}
- T
\end{pmatrix}

--Also, the front view of the camera $ vz_w $ ($ vz_c = \ begin {bmatrix} 0 \ 0 \ 1 \ end {bmatrix} $) is the camera coordinates $ \ begin {bmatrix} X_c \ Y_c \ Z_c \ end {bmatrix} ^ \ top = \ begin {bmatrix} 0 \ 0 \ 1 \ end {bmatrix} ^ \ top $

vz_w = 
R^\top
\begin{pmatrix}
\begin{bmatrix}
0 \\
0 \\
1
\end{bmatrix}
- T
\end{pmatrix}
- O_w
=
R^\top
\begin{bmatrix}
0 \\
0 \\
1
\end{bmatrix}

--Other camera orientation $ vx_w $ ($ vx_c = \ begin {bmatrix} 1 \ 0 \ 0 \ end {bmatrix} $: camera facing right), $ vy_w $ ($ vy_c = \ begin {bmatrix} 0 \ \ 1 \ 0 \ end {bmatrix} $: camera down)

Attention point

--Subtle differences in how to take the coordinate system (both right-handed coordinate system & it feels like that, but for the time being) --World coordinate system (alcomarker coordinate system): --The marker itself is on the xy plane, the right is the x-axis positive, ** above ** is the y-axis positive, and the marker sky is the z-axis positive. --Camera coordinate system (coordinate system for OpenCV camera image): --The right side of the image is the positive x-axis, the bottom ** of the image is the positive y-axis, and the back of the image is the positive z-axis.

code

1. Preparation

import cv2
import numpy as np
import matplotlib.pyplot as plt
from skvideo.io import vread
import moviepy.editor as mpy
from tqdm import tqdm
from mpl_toolkits.mplot3d import axes3d, Axes3D

def npy_to_gif(npy, filename):
    clip = mpy.ImageSequenceClip(list(npy), fps=10)
    clip.write_gif(filename)

vid = vread("src/shisa.mp4")
print(vid.shape)

(524, 1920, 1080, 3)

2. Arco marker

aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
aruco = cv2.aruco.drawMarker(aruco_dict, 0, 256)
cv2.imwrite("aruco.png ", aruco)

aruco.png

--I printed this and pasted it on the stepladder

3. Camera settings

--Take with Google Pixel3 recording mode, Magnification 1.

marker_length = 0.07 # [m] ###Caution!
mtx = np.load("camera/mtx.npy")
dist = np.load("camera/dist.npy")

4. Main

--rpy (roll pitch yaw) is not used for plot, but for the time being

XYZ = []
RPY = []
V_x = []
V_y = []
V_z = []

for frame in vid[:500:25]:  #It's heavy to process everything ...
    frame = frame[...,::-1]  # BGR2RGB
    frame = cv2.resize(frame, (360, 640))
    corners, ids, _ = cv2.aruco.detectMarkers(frame, aruco_dict)
    rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corners, marker_length, mtx, dist)

    R = cv2.Rodrigues(rvec)[0]  #Rotation vector->Rotation matrix
    R_T = R.T
    T = tvec[0].T

    xyz = np.dot(R_T, - T).squeeze()
    XYZ.append(xyz)

    rpy = np.deg2rad(cv2.RQDecomp3x3(R_T)[0])
    RPY.append(rpy)
    
    V_x.append(np.dot(R_T, np.array([1,0,0])))
    V_y.append(np.dot(R_T, np.array([0,1,0])))
    V_z.append(np.dot(R_T, np.array([0,0,1])))

    # ----drawing
    cv2.aruco.drawDetectedMarkers(frame, corners, ids, (0,255,255))
    cv2.aruco.drawAxis(frame, mtx, dist, rvec, tvec, marker_length/2)
    cv2.imshow('frame', frame)
    cv2.waitKey(1)
    # ----

cv2.destroyAllWindows()

shisa.png

--By the way, rpy is --How to use cv2.RQDecomp3x3 () --How to use cv2.decomposeProjectionMatrix () --ʻArctan () Yara ʻarcsin () --There seems to be a way to use it (described later)

5. Drawing the camera position

def plot_all_frames(elev=90, azim=270):
    frames = []

    for t in tqdm(range(len(XYZ))):
        fig = plt.figure(figsize=(4,3))
        ax = Axes3D(fig)
        ax.view_init(elev=elev, azim=azim)
        ax.set_xlim(-2, 2); ax.set_ylim(-2, 2); ax.set_zlim(-2, 2)
        ax.set_xlabel("x"); ax.set_ylabel("y"); ax.set_zlabel("z")

        x, y, z = XYZ[t]
        ux, vx, wx = V_x[t]
        uy, vy, wy = V_y[t]
        uz, vz, wz = V_z[t]

        # draw marker
        ax.scatter(0, 0, 0, color="k")
        ax.quiver(0, 0, 0, 1, 0, 0, length=1, color="r")
        ax.quiver(0, 0, 0, 0, 1, 0, length=1, color="g")
        ax.quiver(0, 0, 0, 0, 0, 1, length=1, color="b")
        ax.plot([-1,1,1,-1,-1], [-1,-1,1,1,-1], [0,0,0,0,0], color="k", linestyle=":")

        # draw camera
        ax.quiver(x, y, z, ux, vx, wx, length=0.5, color="r")
        ax.quiver(x, y, z, uy, vy, wy, length=0.5, color="g")
        ax.quiver(x, y, z, uz, vz, wz, length=0.5, color="b")

        # save for animation
        fig.canvas.draw()
        frames.append(np.array(fig.canvas.renderer.buffer_rgba()))
        plt.close()

    return frames
frames = plot_all_frames(elev=105, azim=270)
npy_to_gif(frames, "src/sample1.gif")

frames = plot_all_frames(elev=165, azim=270)
npy_to_gif(frames, "src/sample2.gif")

sample1 (1).gifsample2(1).gif

bonus

Other ways to find roll pitch yaw

rpy = np.deg2rad(cv2.RQDecomp3x3(R_T)[0])
print(rpy)

#T can be anything&I think it's not a very good method for this purpose
rpy = cv2.decomposeProjectionMatrix(np.hstack([R_T, -T]))[6]  # [0~5]Do not use
rpy = np.deg2rad(rpy.squeeze())
print(rpy)

r = np.arctan2(-R_T[2][1], R_T[2][2])
p = np.arcsin(R_T[2][0])
y = np.arctan2(-R_T[1][0], R_T[0][0])
rpy = - np.array([r, p, y])
print(rpy)

--All the same --The above is the rotation matrix ⇨ rotation vector

Relationship between roll pitch yaw and rotation matrix

def eulerAnglesToRotationMatrix(euler):
    R_x = np.array([[                1,                 0,                 0],
                    [                0,  np.cos(euler[0]), -np.sin(euler[0])],
                    [                0,  np.sin(euler[0]),  np.cos(euler[0])]])
    R_y = np.array([[ np.cos(euler[1]),                 0,  np.sin(euler[1])],
                    [ 0,                                1,                 0],
                    [-np.sin(euler[1]),                 0,  np.cos(euler[1])]])
    R_z = np.array([[ np.cos(euler[2]), -np.sin(euler[2]),                 0],
                    [ np.sin(euler[2]),  np.cos(euler[2]),                 0],
                    [                0,                 0,                 1]])
    R = np.dot(R_z, np.dot(R_y, R_x))
    return R

diff = eulerAnglesToRotationMatrix(rpy) - R_T
print(diff.astype(np.float16))

from scipy.spatial.transform import Rotation
diff = Rotation.from_euler('xyz', rpy).as_matrix() - R_T
print(diff.astype(np.float16))

-(diff is 0) --ʻEulerAnglesToRotationMatrix () makes a rotation matrix (returns) --The product of the rotation matrix of each component --ʻEulerAnglesToRotationMatrix () was done with scipy, the second ver --The above is the rotation vector ⇨ the transformation of the rotation matrix.

that's all!

LGTM if you like

Recommended Posts

Acquire camera position and orientation with ArUco marker
[Note] (Zupai) Face Tracking with Pan-Tilt HAT and camera