Assignment 2: VBAP and HRTF#
Sebastian J. Schlecht (1), Nils Meyer-Kahlen (2)
Notebook: Cristóbal Andrade (1)
(1) Friedrich-Alexander-Universität Erlangen-Nürnberg (2) Aalto University
Contact: sebastian.schlecht@fau.de, cristobal.andrade@fau.de
The rendered scene in this assignment is rotating music band. You will learn how to implement VBAP and apply a set of HRTFs to binauralize the directional sound experience.
Duration: 12 Hours
matplotlib numpy pooch pyfar scipy sofar soundfile ipywidgets ipymp[ ]:
import soundfile as sfile
import numpy as np
import pooch
import matplotlib.pyplot as plt
import os
from scipy.spatial import ConvexHull
from IPython.display import Audio, display
%load_ext autoreload
%autoreload 2
from Config import Config
from Binaural_DSP import BinauralDSP, sph2cart
from VBAP_DSP import VBAP_DSP, sph2cart_vec
from Scene import rotating_band_scene
from cart2sph_vec import cart2sph_vec
%matplotlib inline
config = Config()
[ ]:
# Leave this as it is: This is the URL from which the data will be downloaded
url_py = 'https://github.com/pyfar/open-educational-resources/raw/main/courses/Virtual_Acoustics_Lab_FAU/Assignment2'
url_large_files = 'https://github.com/pyfar/files/raw/main/education/VAL_FAU'
url_hrtfs = 'https://github.com/pyfar/files/raw/main/education/VAR_TUB'
# Get current working directory (where the notebook was started)
notebook_dir = os.getcwd()
# Create a Pooch object using that directory
my_pooch_py = pooch.create(
path=notebook_dir,
base_url=url_py,
registry={
"Scene.py" : None,
"Binaural_DSP.py" : None,
"DSP.py" : None,
"cart2sph_vec.py" : None,
"VBAP_DSP.py" : None,
"hrirsDiffuseFieldEQ.py" : None,
"BlockConvolver_DSP.py" : None,
"Config.py" : None,
}
)
my_pooch_large_files = pooch.create(
path=notebook_dir,
base_url=url_large_files,
registry={
"rotatingBand.wav" : None,
"band_combined_snip.wav" : None,
}
)
my_pooch_hrtfs = pooch.create(
path=notebook_dir,
base_url=url_hrtfs,
registry={
"FABIAN_HRIR_measured_HATO_0.sofa" : None
}
)
# Download all files
for fname in my_pooch_py.registry:
fpath = my_pooch_py.fetch(fname)
print(f"Downloaded: {fpath}")
for fname in my_pooch_large_files.registry:
fpath = my_pooch_large_files.fetch(fname)
print(f"Downloaded: {fpath}")
for fname in my_pooch_hrtfs.registry:
fpath = my_pooch_hrtfs.fetch(fname)
print(f"Downloaded: {fpath}")
Motivation#
Upon completing this assignment, you will be able to binauralize a multichannel recording, resulting in the following output:
[ ]:
renderedSignal, fs = sfile.read('rotatingBand.wav')
display(Audio(renderedSignal.T, rate=fs))
BINAURALIZE with HRTFs#
For that, we first need a set of HRTFs. We provide you with a function that loads a SOFA (Spatially Oriented Format for Acoustics) file from the internet. You can see this set was measured on a very dense grid.
[ ]:
# Initialize binauralizer with empty source
binauralizer = BinauralDSP(config, None)
# Create 3D scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(binauralizer.hrir_positions[:, 0],
binauralizer.hrir_positions[:, 1],
binauralizer.hrir_positions[:, 2])
ax.view_init(elev=20, azim=30)
ax.set_title("HRIR Measurement Grid")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
ax.set_box_aspect([1, 1, 1])
plt.grid(True)
plt.show()
1 TASK: Plot HRIR#
As an example, lets plot the HRIRs of a source at [90 0] deg (left). For that we first have to choose the appropriate set of HRIRs. For such a dense grid as measured here, a simple technique is to choose the closest measurement point.
Your task is to implement the nearestPoint function in Binaural_DSP
[ ]:
def nearestPoint(self, azi, ele):
# YOUR CODE HERE
raise NotImplementedError()
return idx_hrir
# Add the nearestPoint function to the BinauralDSP class
BinauralDSP.nearestPoint = nearestPoint
# Get the nearest HRIR index for azimuth 90° (pi/2) and elevation 0°
left_hrir_idx = binauralizer.nearestPoint(np.array([np.pi / 2]), np.array([0]))
# Plot HRIRs
plt.figure(figsize=(10, 6))
# Subplot 1: HRIRs
plt.subplot(2, 1, 1)
plt.plot(binauralizer.hrirs[left_hrir_idx,:,:].squeeze().transpose())
plt.title(r'HRIRs for $\Omega = [90^\circ, 0^\circ]$')
plt.legend(['Left', 'Right'])
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.grid(True)
# Subplot 2: HRTFs
fft_size = 2**13
fs = config.fs # Sampling frequency
freqs = np.linspace(0, fs, fft_size)
hrtf = np.fft.fft(binauralizer.hrirs[left_hrir_idx,:, :], fft_size, axis=2)
hrtf_db = 20 * np.log10(np.abs(hrtf))
plt.subplot(2, 1, 2)
plt.semilogx(freqs, hrtf_db.squeeze().transpose())
plt.title(r'HRTFs for $\Omega = [90^\circ, 0^\circ]$')
plt.xlim([20, fs / 2])
plt.ylim([-30, 30])
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude (dB)')
plt.grid(True)
# Show the figure
plt.tight_layout()
plt.show()
3 TASK:#
Describe and explain what you see, try to use the terms ITD and ILD!
[ ]:
# Write your answer here
# YOUR CODE HERE
raise NotImplementedError()
4 TASK: Render a signal with HRTFs#
Implement process for Binaural_DSP
The band instruments are placed in equal spacing around you: Write a render loop to apply the HRIRs using binauralizer.process()
[ ]:
def process(self, in_sig):
assert in_sig.shape[1] == self.numberOfInputs, 'Number of Inputs incorrect.'
out_sig = np.zeros((self.blockSize, 2))
# YOUR CODE HERE
raise NotImplementedError()
return out_sig
BinauralDSP.process = process
# Read audio file
inSignals, _ = sfile.read('band_combined_snip.wav') # [numSamples, numSources]
numSources = inSignals.shape[1] if len(inSignals.shape) > 1 else 1
# Create source directions
sourceDoa = np.column_stack([
np.linspace(0, 2*np.pi - 2*np.pi/numSources, numSources),
np.zeros(numSources)
])
# Initialize binauralizer
binauralizer = BinauralDSP(config, numSources)
binauralizer.set_doa(sourceDoa[:, 0], sourceDoa[:, 1])
# Block processing
blockSize = config.blockSize
numBlocks = inSignals.shape[0] // blockSize
binauralOut = np.zeros((numBlocks * blockSize, 2))
# Implement block processing Hint: Take a look at BlockConvolver_DSP.py
# YOUR CODE HERE
raise NotImplementedError()
# Plot first 2 seconds
plt.figure()
time_axis = np.linspace(0, 2, 2 * fs)
plt.plot(time_axis, binauralOut[:2*fs, 0],
time_axis, binauralOut[:2*fs, 1])
plt.grid(True)
plt.legend(['left', 'right'])
plt.xlabel('Time in s')
plt.ylabel('Amplitude')
plt.show()
binauralOut = binauralOut / np.max(np.abs(binauralOut))
# Write to file
sfile.write('binauralOut.wav', (0.9 * binauralOut).astype(np.float32), fs)
5 TASK: Convex Hull and triangulation of the loudspeaker setup#
Next, we introduce a new VBAP DSP object to perform directional panning. Before we can actually implement VBAP we need to triangulize the loudspeaker array. Recap from the lecture what a convex hull is and how it is used in VBAP. We provide you a plotting function show_hull() that visualizes the result.
Your task is to implement get_conv_hull for VBAP_DSP
[ ]:
def get_conv_hull(self):
# YOUR CODE HERE
raise NotImplementedError()
return hull.simplices
VBAP_DSP.get_conv_hull = get_conv_hull
vbap = VBAP_DSP(config.lsPositions)
vbap.get_conv_hull()
vbap.show_hull()
6 TASK: Modify loudspeaker setup in config and show hull#
Experiment with different changes to the loudspeaker positions and see how the convex hull changes.
[ ]:
%matplotlib widget
# DUMMY SOLUTION
ls_positions_modified = config.lsPositions.copy()
# YOUR CODE HERE
raise NotImplementedError()
vbap = VBAP_DSP(ls_positions_modified)
vbap.get_conv_hull()
vbap.show_hull()
7 TASK: Describe and discuss original and alternative loudspeaker setup and its convex hull.#
[ ]:
# Write your answer here
# YOUR CODE HERE
raise NotImplementedError()
Add virtual sources#
We revert to original loudspeaker layout, define virtual source positions and show them on top of the loudspeaker layout
[ ]:
%matplotlib widget
# Create VBAP object with original loudspeaker layout
vbap = VBAP_DSP(config.lsPositions)
vbap.get_conv_hull()
vbap.show_hull()
# Define virtual sources
src_azimuth = np.array([0, np.pi / 8])
src_elevation = np.array([0, np.pi / 8])
src_radius = np.array([1, 1])
# Convert spherical to Cartesian
src_position = sph2cart_vec(np.stack((src_azimuth, src_elevation, src_radius), axis=-1))
# Plot on top of the loudspeaker layout
ax.scatter(src_position[:,0],
src_position[:,1],
src_position[:,2], s=100, color='black', label='Virtual Source', alpha=1, zorder=5)
# Add legend and title
ax.legend(['Face', 'Loudspeaker', 'Virtual Source'])
ax.set_title('Loudspeaker Layout and Virtual Sources')
plt.show()
# move figure around to see the black dots
8 TASK: Which loudspeakers should be active?#
Look at the loudspeaker hull and explain for both virtual sources the expected outcome.
[ ]:
# Write your answer here
# YOUR CODE HERE
raise NotImplementedError()
9 TASK: Implement VBAP#
Pass the direction of arrival (DOA) angle and calculate the loudspeaker gains for each virtual source. In VBAP_DSP
Implement invert_bases()
Implement calculate_gains()
[ ]:
%matplotlib inline
def invert_bases(self):
num_faces = self.hull.shape[0]
inv_bases = np.zeros((3, 3, num_faces))
# YOUR CODE HERE
raise NotImplementedError()
return inv_bases
def calculate_gains(self, src_pos):
assert src_pos.shape[1] == 3
num_src = src_pos.shape[0]
gains = np.zeros((num_src, self.num_ls))
# YOUR CODE HERE
raise NotImplementedError()
return gains
# Add Methods to VBA_DSP
VBAP_DSP.invert_bases = invert_bases
VBAP_DSP.calculate_gains = calculate_gains
vbap.inv_bases = vbap.invert_bases()
# Set virtual sources
vbap.set_sources(src_azimuth, src_elevation)
colors = ['r', 'g', 'b', 'm', 'c', 'y']
# Plot the loudspeaker gains
plt.figure()
for i in range(vbap.ls_gains_new.shape[0]):
color = colors[i % len(colors)]
plt.stem(vbap.ls_gains_new[i],
markerfmt=color + 'o',
linefmt=color + '-',
label=f'Virtual Source {i+1}')
plt.xlabel('Loudspeaker')
plt.ylabel('Gains')
plt.title('Loudspeaker Gains')
plt.legend()
plt.grid(True)
plt.show()
10 TASK: Test VBAP#
Create 1sec test signals for each virtual source, two pure tones, 20 and 200 Hz, amplitude 0.5 .
Implement process in VBAP_DSP
Test rendering with: lsSignals = vbap.process(inSignals)
[ ]:
# --- DUMMY SOLUTION ---
# in_signals = np.zeros((config.fs, 2))
# ls_signals = vbap.process(in_signals)
# YOUR CODE HERE
raise NotImplementedError()
time = np.arange(ls_signals.shape[0]) / config.fs
offset_signals = ls_signals + np.arange(1, ls_signals.shape[1]+1) # Add offset per channel
plt.figure(figsize=(10, 4))
plt.plot(time, offset_signals)
plt.grid(True)
plt.xlabel('Time in s')
plt.ylabel('Signal + LS number offset')
plt.title('Loudspeaker Signals')
plt.tight_layout()
plt.show()
11 TASK: Look at the plots and explain the output#
[ ]:
# Write your answer here
# YOUR CODE HERE
raise NotImplementedError()
[ ]:
# --- Read audio ---
in_signals, fs = sfile.read('band_combined_snip.wav') # shape: [samples, channels]
# Create scene generator
scenes = rotating_band_scene(in_signals, config.blockSize)
# --- Initialize Binauralizer ---
binauralizer = BinauralDSP(config, config.lsPositions.shape[0])
ls_doa = cart2sph_vec(config.lsPositions)
binauralizer.set_doa(ls_doa[:, 0], ls_doa[:, 1])
assert fs == binauralizer.fs
# --- Block Processing ---
block_size = config.blockSize
num_blocks = len(scenes)
binaural_out = np.zeros((num_blocks * block_size, 2))
for it_block in range(num_blocks):
# YOUR CODE HERE
raise NotImplementedError()
# --- Plot waveform (first 2 seconds) ---
duration_sec = 2
num_samples = int(fs * duration_sec)
time_axis = np.linspace(0, duration_sec, num_samples)
plt.figure(figsize=(10, 4))
plt.plot(time_axis, binaural_out[:num_samples, 0], label='Left')
plt.plot(time_axis, binaural_out[:num_samples, 1], label='Right')
plt.xlabel('Time in s')
plt.ylabel('Amplitude')
plt.title('Binaural Output (First 2 Seconds)')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
# --- Save output ---
#sfile.write('rotatingBand.wav', 0.9 * binaural_out, fs)
display(Audio((0.9 * binaural_out.T).astype(np.float32), rate=config.fs))
License notice#
This notebook is licensed under CC BY 4.0
Watermark#
The following watermark might help others to install specific package versions that might be required to run the notebook.
[1]:
%load_ext watermark
%watermark -v -m -p numpy,scipy,pyfar,sofar,pooch,nbgrader,watermark,matplotlib,ipywidgets
Python implementation: CPython
Python version : 3.12.9
IPython version : 8.12.3
numpy : 2.1.3
scipy : 1.15.2
pyfar : 0.7.1
sofar : 1.2.1
pooch : 1.8.2
nbgrader : 0.9.5
watermark : 2.5.0
matplotlib: 3.10.1
ipywidgets: 8.1.5
Compiler : Clang 13.0.0 (clang-1300.0.29.30)
OS : Darwin
Release : 24.5.0
Machine : arm64
Processor : arm
CPU cores : 12
Architecture: 64bit