About Higher Ambisonics

Ambisonics is a method that realizes three-dimensional sound reproduction by reproducing the direction of arrival of sound waves incident on a certain sound receiving point, that is, the incident directivity of sound waves. The incident directivity is reproduced by expanding the three-dimensional directivity of the sound wave incident on a certain point by the spherical harmonics and determining the output of each speaker in the speaker array so as to reproduce the expanded directivity during reproduction. Those that focus on the 0th and 1st orders are called ambisonics, and those that also use spherical harmonics of 2nd or higher order are called higher-order ambisonics.

In this article, I will describe the code of the flow of collecting sound with a microphone, then virtually arranging speakers, and reproducing the sound field.

About the waves used in the simulation

The simulation described in this article deals only with plane waves, but there are three types: plane waves, spherical waves, and directional spherical waves. The wave equation in each Cartesian coordinate system can be written as: $e^{i\vec{k_i}\vec{r}}$ $\frac{e^{ik|r-r_s|}}{4\pi|r-r_s|}$ $\frac{e^{ik|r-r_s|}}{4\pi|r-r_s|}\Bigl(\alpha-\bigl(1-\alpha\bigr)\bigl(1+\frac{i}{k|r-r_s|}\bigr)\cos\gamma\Bigr)$ $ \ Gamma $ is the angle between the receiving position and the sound source position vector, and $ \ alpha $ is the directivity coefficient ($ \ alpha $ = 1 matches the spherical wave). Then the equation for each wave in the polar coordinate system can be written as: $e^{i\vec{k_i}\vec{r}} = \sum_{n=0}^\infty\sum_{m=-n}^n4\pi i^n j_n(kr) Y_n^m(\theta,\varphi) Y_n^m(\theta_i,\varphi_i)^*$

\frac{e^{ik|r-r_s|}}{4\pi|r-r_s|} = \sum_{n=0}^\infty\sum_{m=-n}^n ik j_n(kr) h_n(kr_s) Y_n^m(\theta,\varphi) Y_n^m(\theta_s,\varphi_s)^*

\frac{e^{ik|r-r_s|}}{4\pi|r-r_s|}\Bigl(\alpha-\bigl(1-\alpha\bigr)\bigl(1+\frac{i}{k|r-r_s|}\bigr)\cos\gamma\Bigr) = \sum_{n=0}^\infty\sum_{m=-n}^n k j_n(kr)[i\alpha h_n(kr_s) +(1-\alpha)h_n^{'}(kr_s)]Y_n^m(\theta,\varphi) Y_n^m(\theta_s,\varphi_s)^*

$ j_n (kr) $ is the first-class sphere Bessel function, and $ h_n (kr) $ is the first-class Hankel function. $ ^ * $ Represents a complex conjugate transpose.

`green_function.py`


import numpy as np 
import function as fu


def Plane_wave(freq,cv, pos_s,vec_dn, pos_r):
    # freq: frequency
    # cv: sound speed
    # pos_s: source position (Not used in this function)
    # vec_dn: plane wave direction vector
    # pos_r: receiver position
    omega = 2*np.pi*freq
    k = omega/cv
    vec_dn_unit = np.array([0]*3,dtype = 'float')
    vec_dn_unit[0] = vec_dn[0] / np.linalg.norm(vec_dn, ord=2)
    vec_dn_unit[1] = vec_dn[1] / np.linalg.norm(vec_dn, ord=2)
    vec_dn_unit[2] = vec_dn[2] / np.linalg.norm(vec_dn, ord=2)
    azi,ele,_ = fu.cart2sph(-vec_dn_unit[0],-vec_dn_unit[1],-vec_dn_unit[2])
    ele_1 = np.pi / 2 - ele
    kx = k * np.cos(azi) * np.sin(ele_1)
    ky = k * np.sin(azi) * np.sin(ele_1)
    kz = k * np.cos(ele_1)
    x = pos_r[0] - pos_s[0]
    y = pos_r[1] - pos_s[1]
    z = pos_r[2] - pos_s[2]
    ret = np.exp(1j*(kx*x + ky*y + kz*z))
    return ret

def green(freq, cv, vec1, vec2):
    vec = np.array([0]*3,dtype = 'float')
    vec[0] = vec1[0] - vec2[0] 
    vec[1] = vec1[1] - vec2[1]
    vec[2] = vec1[2] - vec2[2]
    rr = np.linalg.norm(vec, ord=2)
    omega = 2 * np.pi * freq
    k = omega / cv
    jkr = 1j * k * rr
    ret = np.exp(jkr)/(4 * np.pi * rr)
    return ret

def green2(alpha, freq, cv, vecrs, vecr):
    # alpha:directivity coefficient 0~1 
    #                               0   :dipole 
    #                               0.5 :cardiod 
    #                               1   :monopole
    # CV: sound speed
    # freq: frequency [Hz](can be vector)
    # vecrs: sound source position
    # vecr: receiver position
    vecrd = np.array([0]*3,dtype = 'float')
    vecrd[0] = vecrs[0] - vecr[0]
    vecrd[1] = vecrs[1] - vecr[1]
    vecrd[2] = vecrs[2] - vecr[2]
    rd = np.linalg.norm(vecrd, ord=2)
    omega = 2 * np.pi * freq
    k = omega/cv
    cosgamma = np.dot(vecrd,-vecrs) / (np.linalg.norm(vecrd, ord=2) * np.linalg.norm(vecrs, ord=2))
    ret = green(freq, cv, vecrs, vecr) * (alpha - (1 - alpha)*(1 + (1j/(k*rd)))*cosgamma)
    return ret

The functions mainly used in Ambisonics are summarized below.

`function.py`


import scipy as sp
from scipy.special import sph_harm
import numpy as np
from scipy.special import spherical_jn
from scipy.special import spherical_yn
from numpy import arctan2, sqrt
import numexpr as ne

def cart2sph(x,y,z):
    """ x, y, z :  ndarray coordinates
        ceval: backend to use: 
              - eval :  pure Numpy
              - numexpr.evaluate:  Numexpr """
              
    azimuth = ne.evaluate('arctan2(y,x)')
    xy2 = ne.evaluate('x**2 + y**2')
    elevation = ne.evaluate('arctan2(z, sqrt(xy2))')
    r = eval('sqrt(xy2 + z**2)')
    return azimuth, elevation, r

def EquiDistSphere(n):
    golden_angle = np.pi * (3 - np.sqrt(5))
    theta = golden_angle * (np.arange(n)+1)
    z = np.linspace(1 - 1.0 / n, 1.0 / n - 1, n)
    radius = np.sqrt(1 - z * z)
    
    points = np.zeros((n, 3))
    points[:,0] = radius * np.cos(theta)
    points[:,1] = radius * np.sin(theta)
    points[:,2] = z
    return points

def BnMat2(N,k,r):
    B = np.zeros((N+1)**2, dtype = np.complex).reshape((N+1)**2,)
    dx = 0
    for i in range(N+1):
        A = Bn2(i,k,r)
        for j in np.arange(-i,i+1,1):
            B[dx] = A
            dx +=1
    return B

def Bn2(n,k,r):
    kr = k*r
    y = 4 * np.pi * (1j**(n)) * spherical_jn(n,kr)
    return y
def calcSH(N,azi,ele):
    y_n = np.array([],dtype = np.complex)
    for n in range(N+1):
        for m in np.arange(-n,n+1,1):
            y_n = np.append(y_n,sph_harm(m,n,azi,ele))
    return y_n

def encode(ambi_order, azi, ele,k,r, mic_sig):
    Y_N = np.zeros((len(azi),(ambi_order+1)**2),dtype = np.complex)
    for i in range(azi.size):
        Y_N[i,:] = calcSH(ambi_order, azi[i],ele[i])
    invY = np.linalg.pinv(Y_N)             
    Wnkr = BnMat2(ambi_order,k,r)
    ambi_signal = np.diag(1/Wnkr) @ invY @ mic_sig
    return ambi_signal

def decode(ambi_order, azi, ele,r,lambda_,ambi_signal):
    Y_N = np.zeros((len(azi),(ambi_order+1)**2),dtype = np.complex)
    for i in range(azi.size):
        Y_N[i,:] = calcSH(ambi_order, azi[i],ele[i])
    invY = np.linalg.pinv(np.conj(Y_N.T)) 
    r_res = r % lambda_
    spkr_output = np.exp(1j*(2*np.pi*r_res/lambda_)) * (invY @ ambi_signal)
    return spkr_output

Ambisonics (internal problem ver)

The sound pressure at any position on the sphere can be expressed as follows.

p\bigl(\theta,\varphi,r,\omega\bigr) = \sum_{n=0}^\infty\sum_{m=-n}^n A_{nm} W_{nkr} Y_n^m(\theta,\varphi)

$ A_ {nm} $ is the expansion factor (Ambisonics coefficient), and $ W_ {nkr} $ is the shape of the microphone (a hollow array is used in this simulation). Ambisonics does two things: encoding and decoding. First, the expansion coefficient is obtained from the encoding, and the drive signal output from the speaker is obtained by decoding.

Encode

Find the expansion factor $ A_ {nm} . The following formulas are represented by matrices. $A_{nm}= diag(\frac{1}{W_{nkr}}) * Y_n^m(\theta,\varphi)^+ * p\bigl(\theta,\varphi,r,\omega\bigr) $$

You can find the expansion coefficient more. $ ^ + $ Represents a generalized inverse matrix. The problem here is that the wave is not reproduced at the frequency where $ W_ {nkr} $ becomes 0 because it is divided by $ W_ {nkr} $. Microphone arrays such as rigid sphere arrays and hollow cardioid arrays are being considered to avoid this prohibited frequency problem.

The following code first describes the flow of the primary sound source (sound field you want to play) from microphone placement, sound collection, and encoding. From the following article, we are dealing with microphones that are evenly arranged on a spherical surface.

blog.marmakoide.org/?p=1

`~encode.py`


import green_function as gr
import  function as fu
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.cm as cm

#Initial conditions
ambi_order = 7              #Censoring order
freq = 550                  #frequency
pos_s = np.array([2,0,0])   #Primary sound source coordinates x,y,z
vec_dn = np.array([1,0,0])  #Plane wave direction
cv = 344                    #Speed of sound
r_sp = 1.5                  #Radius to place the speaker
r_mic = 0.05                #Microphone radius
#Graph conditions
Plot_range = 2.0
space = 0.05

#Calculation
lambda_ = cv/freq
omega = 2*np.pi*freq
k = omega/cv


#Decide speaker placement
num_sp = (ambi_order+1)**2
xyz = fu.EquiDistSphere((ambi_order+1)**2)
xyz = np.array([xyz]).reshape(num_sp,3)

pos_sp = xyz * r_sp

azi_sp, ele_sp, rr_sp = fu.cart2sph(pos_sp[:,0],pos_sp[:,1],pos_sp[:,2])
ele_sp1 = np.pi /2 - ele_sp
#Decide the microphone placement
num_mic = (ambi_order+1)**2
pos_mic = xyz * r_mic
azi_mic, ele_mic, rr_mic = fu.cart2sph(pos_mic[:,0],pos_mic[:,1],pos_mic[:,2])
ele_mic1 = np.pi /2 - ele_mic
#Sound collection
mic_sig = np.array([])
for i in range(num_mic):
    mic_sig_ = gr.Plane_wave(freq,cv,pos_s,vec_dn,pos_mic[i])
    mic_sig = np.append(mic_sig, mic_sig_)

`encode.py`


#Encode
ambi_signal = fu.encode(ambi_order,azi_mic,ele_mic1,k,r_mic,mic_sig)
"""
def encode(ambi_order, azi, ele,k,r, mic_sig):
    Y_N = np.zeros((len(azi),(ambi_order+1)**2),dtype = np.complex)
    for i in range(azi.size):
        Y_N[i,:] = calcSH(ambi_order, azi[i],ele[i])
    invY = np.linalg.pinv(Y_N)             
    Wnkr = BnMat2(ambi_order,k,r)
    ambi_signal = np.diag(1/Wnkr) @ invY @ mic_sig
    return ambi_signal
"""

Decode

Performs processing to match the expansion coefficient from the speaker with the expansion coefficient obtained by encoding. Specifically, it is the following formula.

A_{nm}= Y_n^m(\theta,\varphi)^* * w_l

$ w_l $ is the drive signal. From this formula

w_l= Y_n^m(\theta,\varphi)^{*^{+}} * A_{nm}

You can get more drive signals. In this decoding, the primary sound source and the secondary sound source match only when they are plane waves, but when considering the phase, this equation may not match. The decoding formula that considers the phase shift is as follows. $r_{res} \equiv mod(r_{sp},\lambda)$ $w_l= e^{i\bigl(\frac{2\pi r_{res}}{\lambda}\bigr)}\times Y_n^m(\theta,\varphi)^{*^{+}} * A_{nm} $

To reproduce other waves, use the following formula.

w_l= Y_n^m(\theta,\varphi)^{*^{+}} * diag(\frac{1}{F_{nkr}}) * A_{nm}

$ F_ {nkr} $ is a wave-determined value.

In the following code, the drive signal is obtained from the expansion coefficient obtained by encoding, and the drive signal is obtained.

`decode.py`


#Decode
spkr_output = fu.decode(ambi_order,azi_sp,ele_sp1,r_sp,lambda_,ambi_signal)
"""
def decode(ambi_order, azi, ele,r,lambda_,ambi_signal):
    Y_N = np.zeros((len(azi),(ambi_order+1)**2),dtype = np.complex)
    for i in range(azi.size):
        Y_N[i,:] = calcSH(ambi_order, azi[i],ele[i])
    invY = np.linalg.pinv(np.conj(Y_N.T)) 
    r_res = r % lambda_
    spkr_output = np.exp(1j*(2*np.pi*r_res/lambda_)) * (invY @ ambi_signal)
    return spkr_output
"""

About the graph

The graph is called the normalization error. Use the following formula. $ N_E(r,\omega) = 10\log_{10}\frac{{|p_{rep}(r,\omega)-p_{des}(r,\omega)|}^2}{{|p_{des}(r,\omega)|}^2} $

$ p_ {rep} (r, \ omega) $ is the sound pressure in the reproduced sound field, and $ p_ {des} (r, \ omega) $ is the sound pressure in the primary sound field, that is, the desired sound field. In addition, the radius representing the sweet spot area of the area where the reproduction error is 4% or less can be calculated from the following formula.

r = \frac{c}{2 \pi f}N

$ c $ is the speed of sound, $ f $ is the frequency, and $ N $ is the censored order. The censoring order is generally $ floor (\ sqrt {number of speakers or number of microphones})-1 $. It can be $ floor (\ sqrt {number of speakers or number of microphones})-2 $. It is possible to use different orders for sound collection and playback. In this simulation, they match.

`plot.py`


X = np.arange(-Plot_range,Plot_range+space,space)
Y = np.arange(-Plot_range,Plot_range+space,space)
XX,YY = np.meshgrid(X,Y)
P_org = np.zeros((X.size,Y.size), dtype = np.complex)
P_HOA = np.zeros((X.size,Y.size), dtype = np.complex)
for j in range(X.size):
    for i in range(Y.size):
        pos_r = np.array([X[j],Y[i],0])
        P_org[i,j] = gr.Plane_wave(freq,cv,pos_s,vec_dn,pos_r)
        for l in range(num_sp):
            G_transfer = gr.Plane_wave(freq,cv,pos_sp[l,:],-pos_sp[l,:],pos_r)
            P_HOA[i,j] += G_transfer *  spkr_output[l]

NE = np.zeros((X.size,Y.size), dtype = np.complex)
for i in range(X.size):
    for j in range(Y.size):
        NE[i,j] = 10*np.log10(((np.abs(P_HOA[i,j]-P_org[i,j]))**2)/((np.abs(P_org[i,j]))**2))

#Sweet spot
rad = (ambi_order*cv)/(2*np.pi*freq)

#Graph
#Desired sound field
sweet_spot = patches.Circle(xy=(0, 0), radius=rad, ec='r',fill=False)
r_sp_ = patches.Circle(xy=(0, 0), radius=r_sp, ec='b',fill=False)
plt.figure()
ax = plt.axes()
im = ax.imshow(np.real(P_org), interpolation='gaussian',cmap=cm.hot,origin='lower', 
               extent=[-Plot_range, Plot_range, -Plot_range, Plot_range],
               vmax=1, vmin=-1)
ax.grid(False)
ax.add_patch(sweet_spot)
ax.add_patch(r_sp_)
plt.axis('scaled')
ax.set_aspect('equal')
plt.colorbar(im)
plt.savefig('original_sound.pdf',bbox_inches="tight",dpi = 64,
            facecolor = "lightgray", tight_layout = True) 
plt.show()

#Reproduced sound field
sweet_spot = patches.Circle(xy=(0, 0), radius=rad, ec='r',fill=False)
r_sp_ = patches.Circle(xy=(0, 0), radius=r_sp, ec='b',fill=False)
plt.figure()
ax2 = plt.axes()
im2 = ax2.imshow(np.real(P_HOA), interpolation='gaussian',cmap=cm.hot,origin='lower', 
               extent=[-Plot_range, Plot_range, -Plot_range, Plot_range],
               vmax=1, vmin=-1)
ax2.grid(False)
ax2.add_patch(sweet_spot)
ax2.add_patch(r_sp_)
plt.axis('scaled')
ax2.set_aspect('equal')
plt.colorbar(im2)
plt.savefig('reproduced_sound.pdf',bbox_inches="tight",dpi = 64,
            facecolor = "lightgray", tight_layout = True)
plt.show()

#Normalization error
sweet_spot = patches.Circle(xy=(0, 0), radius=rad, ec='r',fill=False)
r_sp_ = patches.Circle(xy=(0, 0), radius=r_sp, ec='b',fill=False)
plt.figure()
ax3 = plt.axes()
im3 = ax3.imshow(np.real(NE), interpolation='gaussian',cmap=cm.pink,origin='lower', 
               extent=[-Plot_range, Plot_range, -Plot_range, Plot_range],
               vmax=0, vmin=-60)
ax3.grid(False)
ax3.add_patch(sweet_spot)
ax3.add_patch(r_sp_)
plt.axis('scaled')
ax3.set_aspect('equal')
plt.colorbar(im3)
plt.savefig('normalization_error.pdf',bbox_inches="tight",dpi = 64,
            facecolor = "lightgray", tight_layout = True)
plt.show()

result

It is the result of each sound field. Desired sound field

Reproduced sound field

Normalization error

`ambisonics.py`


import green_function as gr
import  function as fu
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.cm as cm

#Initial conditions
ambi_order = 7              #Censoring order
freq = 550                  #frequency
pos_s = np.array([2,0,0])   #Primary sound source coordinates x,y,z
vec_dn = np.array([1,0,0])  #Plane wave direction
cv = 344                    #Speed of sound
r_sp = 1.5                  #Radius to place the speaker
r_mic = 0.05                #Microphone radius
#Graph conditions
Plot_range = 2.0
space = 0.05

#Calculation
lambda_ = cv/freq
omega = 2*np.pi*freq
k = omega/cv


#Decide speaker placement
num_sp = (ambi_order+1)**2
xyz = fu.EquiDistSphere((ambi_order+1)**2)
xyz = np.array([xyz]).reshape(num_sp,3)

pos_sp = xyz * r_sp

azi_sp, ele_sp, rr_sp = fu.cart2sph(pos_sp[:,0],pos_sp[:,1],pos_sp[:,2])
ele_sp1 = np.pi /2 - ele_sp
#Decide the microphone placement
num_mic = (ambi_order+1)**2
pos_mic = xyz * r_mic
azi_mic, ele_mic, rr_mic = fu.cart2sph(pos_mic[:,0],pos_mic[:,1],pos_mic[:,2])
ele_mic1 = np.pi /2 - ele_mic
mic_sig = np.array([])
for i in range(num_mic):
    mic_sig_ = gr.Plane_wave(freq,cv,pos_s,vec_dn,pos_mic[i])
    mic_sig = np.append(mic_sig, mic_sig_)
#Encode
ambi_signal = fu.encode(ambi_order,azi_mic,ele_mic1,k,r_mic,mic_sig)

#Decode
spkr_output = fu.decode(ambi_order,azi_sp,ele_sp1,r_sp,lambda_,ambi_signal)

#Desired sound field(Primary sound source)And seek the reproduction sound field
X = np.arange(-Plot_range,Plot_range+space,space)
Y = np.arange(-Plot_range,Plot_range+space,space)
XX,YY = np.meshgrid(X,Y)
P_org = np.zeros((X.size,Y.size), dtype = np.complex)
P_HOA = np.zeros((X.size,Y.size), dtype = np.complex)
for j in range(X.size):
    for i in range(Y.size):
        pos_r = np.array([X[j],Y[i],0])
        P_org[i,j] = gr.Plane_wave(freq,cv,pos_s,vec_dn,pos_r)
        for l in range(num_sp):
            G_transfer = gr.Plane_wave(freq,cv,pos_sp[l,:],-pos_sp[l,:],pos_r)
            P_HOA[i,j] += G_transfer *  spkr_output[l]
#Normalization error
NE = np.zeros((X.size,Y.size), dtype = np.complex)
for i in range(X.size):
    for j in range(Y.size):
        NE[i,j] = 10*np.log10(((np.abs(P_HOA[i,j]-P_org[i,j]))**2)/((np.abs(P_org[i,j]))**2))

#Sweet spot
rad = (ambi_order*cv)/(2*np.pi*freq)

#Graph
#Desired sound field
sweet_spot = patches.Circle(xy=(0, 0), radius=rad, ec='r',fill=False)
r_sp_ = patches.Circle(xy=(0, 0), radius=r_sp, ec='b',fill=False)
plt.figure()
ax = plt.axes()
im = ax.imshow(np.real(P_org), interpolation='gaussian',cmap=cm.hot,origin='lower', 
               extent=[-Plot_range, Plot_range, -Plot_range, Plot_range],
               vmax=1, vmin=-1)
ax.grid(False)
ax.add_patch(sweet_spot)
ax.add_patch(r_sp_)
plt.axis('scaled')
ax.set_aspect('equal')
plt.colorbar(im)
plt.savefig('original_sound.pdf',bbox_inches="tight",dpi = 64,
            facecolor = "lightgray", tight_layout = True) 
plt.show()

#Reproduced sound field
sweet_spot = patches.Circle(xy=(0, 0), radius=rad, ec='r',fill=False)
r_sp_ = patches.Circle(xy=(0, 0), radius=r_sp, ec='b',fill=False)
plt.figure()
ax2 = plt.axes()
im2 = ax2.imshow(np.real(P_HOA), interpolation='gaussian',cmap=cm.jet,origin='lower', 
               extent=[-Plot_range, Plot_range, -Plot_range, Plot_range],
               vmax=1, vmin=-1)
ax2.grid(False)
ax2.add_patch(sweet_spot)
ax2.add_patch(r_sp_)
plt.axis('scaled')
ax2.set_aspect('equal')
plt.colorbar(im2)
plt.savefig('reproduced_sound.pdf',bbox_inches="tight",dpi = 64,
            facecolor = "lightgray", tight_layout = True)
plt.show()

#Find the normalization error
sweet_spot = patches.Circle(xy=(0, 0), radius=rad, ec='r',fill=False)
r_sp_ = patches.Circle(xy=(0, 0), radius=r_sp, ec='b',fill=False)
plt.figure()
ax3 = plt.axes()
im3 = ax3.imshow(np.real(NE), interpolation='gaussian',cmap=cm.pink_r,origin='lower', 
               extent=[-Plot_range, Plot_range, -Plot_range, Plot_range],
               vmax=0, vmin=-60)
ax3.grid(False)
ax3.add_patch(sweet_spot)
ax3.add_patch(r_sp_)
plt.axis('scaled')
ax3.set_aspect('equal')
plt.colorbar(im3)
plt.savefig('normalization_error.pdf',bbox_inches="tight",dpi = 64,
            facecolor = "lightgray", tight_layout = True)
plt.show()

Summary

Since I rewrote what was originally made in MATLAB with Python, I think that there are places where it is written strangely, but if there is something that is better, please comment.

I'm giving the code to github https://github.com/koseki2580/ambisonics

ambisonics simulation python

About Higher Ambisonics

About the waves used in the simulation

green_function.py

function.py