使用python对音频信号进行降噪
本文系转在,原文地址请点击此算法基于(但不是完全重现)Audacity概述的一种降噪效果的算法(链接到C ++代码)该算法需要两个输入:包含音频片段原型噪声的噪声音频片段包含要删除的信号和噪声的信号音频片段算法步骤在噪声音频片段上计算FFT统计信息是通过噪声的FFT计算得出的(频率)基于噪声的统计信息(和算法的期望灵敏度)计算阈值通过信号计算FFT通过将信...
·
源码链接:https://download.csdn.net/download/bibinGee/12327301
此算法基于(但不是完全重现)Audacity概述的一种降噪效果的算法(链接到C ++代码)
该算法需要两个输入:
- 包含音频片段原型噪声的噪声音频片段
- 包含要删除的信号和噪声的信号音频片段
算法步骤
- 在噪声音频片段上计算FFT
- 统计信息是通过噪声的FFT计算得出的(频率)
- 基于噪声的统计信息(和算法的期望灵敏度)计算阈值
- 通过信号计算FFT
- 通过将信号FFT与阈值进行比较来确定掩码
- 使用滤镜在频率和时间上对蒙版进行平滑处理
- 掩码被叠加到信号的FFT中,并被反转
wav_loc = "assets/audio/fish.wav"
rate, data = wavfile.read(wav_loc)
data = data / 32768
def fftnoise(f):
f = np.array(f, dtype="complex")
Np = (len(f) - 1) // 2
phases = np.random.rand(Np) * 2 * np.pi
phases = np.cos(phases) + 1j * np.sin(phases)
f[1 : Np + 1] *= phases
f[-1 : -1 - Np : -1] = np.conj(f[1 : Np + 1])
return np.fft.ifft(f).real
def band_limited_noise(min_freq, max_freq, samples=1024, samplerate=1):
freqs = np.abs(np.fft.fftfreq(samples, 1 / samplerate))
f = np.zeros(samples)
f[np.logical_and(freqs >= min_freq, freqs <= max_freq)] = 1
IPython.display.Audio(data=data, rate=rate)
fig, ax = plt.subplots(figsize=(20,4))
ax.plot(data)
加入噪声
noise_len = 2 # seconds
noise = band_limited_noise(min_freq=4000, max_freq = 12000, samples=len(data), samplerate=rate)*10
noise_clip = noise[:rate*noise_len]
audio_clip_band_limited = data+noise
fig, ax = plt.subplots(figsize=(20,4))
ax.plot(audio_clip_band_limited)
IPython.display.Audio(data=audio_clip_band_limited, rate=rate)
加入噪声后的波形:
去噪
import time
from datetime import timedelta as td
def _stft(y, n_fft, hop_length, win_length):
return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
def _istft(y, hop_length, win_length):
return librosa.istft(y, hop_length, win_length)
def _amp_to_db(x):
return librosa.core.amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)
def _db_to_amp(x,):
return librosa.core.db_to_amplitude(x, ref=1.0)
def plot_spectrogram(signal, title):
fig, ax = plt.subplots(figsize=(20, 4))
cax = ax.matshow(
signal,
origin="lower",
aspect="auto",
cmap=plt.cm.seismic,
vmin=-1 * np.max(np.abs(signal)),
vmax=np.max(np.abs(signal)),
)
fig.colorbar(cax)
ax.set_title(title)
plt.tight_layout()
plt.show()
def plot_statistics_and_filter(
mean_freq_noise, std_freq_noise, noise_thresh, smoothing_filter
):
fig, ax = plt.subplots(ncols=2, figsize=(20, 4))
plt_mean, = ax[0].plot(mean_freq_noise, label="Mean power of noise")
plt_std, = ax[0].plot(std_freq_noise, label="Std. power of noise")
plt_std, = ax[0].plot(noise_thresh, label="Noise threshold (by frequency)")
ax[0].set_title("Threshold for mask")
ax[0].legend()
cax = ax[1].matshow(smoothing_filter, origin="lower")
fig.colorbar(cax)
ax[1].set_title("Filter for smoothing Mask")
plt.show()
def removeNoise(
audio_clip,
noise_clip,
n_grad_freq=2,
n_grad_time=4,
n_fft=2048,
win_length=2048,
hop_length=512,
n_std_thresh=1.5,
prop_decrease=1.0,
verbose=False,
visual=False,
):
"""Remove noise from audio based upon a clip containing only noise
Args:
audio_clip (array): The first parameter.
noise_clip (array): The second parameter.
n_grad_freq (int): how many frequency channels to smooth over with the mask.
n_grad_time (int): how many time channels to smooth over with the mask.
n_fft (int): number audio of frames between STFT columns.
win_length (int): Each frame of audio is windowed by `window()`. The window will be of length `win_length` and then padded with zeros to match `n_fft`..
hop_length (int):number audio of frames between STFT columns.
n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
visual (bool): Whether to plot the steps of the algorithm
Returns:
array: The recovered signal with noise subtracted
"""
if verbose:
start = time.time()
# STFT over noise
noise_stft = _stft(noise_clip, n_fft, hop_length, win_length)
noise_stft_db = _amp_to_db(np.abs(noise_stft)) # convert to dB
# Calculate statistics over noise
mean_freq_noise = np.mean(noise_stft_db, axis=1)
std_freq_noise = np.std(noise_stft_db, axis=1)
noise_thresh = mean_freq_noise + std_freq_noise * n_std_thresh
if verbose:
print("STFT on noise:", td(seconds=time.time() - start))
start = time.time()
# STFT over signal
if verbose:
start = time.time()
sig_stft = _stft(audio_clip, n_fft, hop_length, win_length)
sig_stft_db = _amp_to_db(np.abs(sig_stft))
if verbose:
print("STFT on signal:", td(seconds=time.time() - start))
start = time.time()
# Calculate value to mask dB to
mask_gain_dB = np.min(_amp_to_db(np.abs(sig_stft)))
print(noise_thresh, mask_gain_dB)
# Create a smoothing filter for the mask in time and frequency
smoothing_filter = np.outer(
np.concatenate(
[
np.linspace(0, 1, n_grad_freq + 1, endpoint=False),
np.linspace(1, 0, n_grad_freq + 2),
]
)[1:-1],
np.concatenate(
[
np.linspace(0, 1, n_grad_time + 1, endpoint=False),
np.linspace(1, 0, n_grad_time + 2),
]
)[1:-1],
)
smoothing_filter = smoothing_filter / np.sum(smoothing_filter)
# calculate the threshold for each frequency/time bin
db_thresh = np.repeat(
np.reshape(noise_thresh, [1, len(mean_freq_noise)]),
np.shape(sig_stft_db)[1],
axis=0,
).T
# mask if the signal is above the threshold
sig_mask = sig_stft_db < db_thresh
if verbose:
print("Masking:", td(seconds=time.time() - start))
start = time.time()
# convolve the mask with a smoothing filter
sig_mask = scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")
sig_mask = sig_mask * prop_decrease
if verbose:
print("Mask convolution:", td(seconds=time.time() - start))
start = time.time()
# mask the signal
sig_stft_db_masked = (
sig_stft_db * (1 - sig_mask)
+ np.ones(np.shape(mask_gain_dB)) * mask_gain_dB * sig_mask
) # mask real
sig_imag_masked = np.imag(sig_stft) * (1 - sig_mask)
sig_stft_amp = (_db_to_amp(sig_stft_db_masked) * np.sign(sig_stft)) + (
1j * sig_imag_masked
)
if verbose:
print("Mask application:", td(seconds=time.time() - start))
start = time.time()
# recover the signal
recovered_signal = _istft(sig_stft_amp, hop_length, win_length)
recovered_spec = _amp_to_db(
np.abs(_stft(recovered_signal, n_fft, hop_length, win_length))
)
if verbose:
print("Signal recovery:", td(seconds=time.time() - start))
if visual:
plot_spectrogram(noise_stft_db, title="Noise")
if visual:
plot_statistics_and_filter(
mean_freq_noise, std_freq_noise, noise_thresh, smoothing_filter
)
if visual:
plot_spectrogram(sig_stft_db, title="Signal")
if visual:
plot_spectrogram(sig_mask, title="Mask applied")
if visual:
plot_spectrogram(sig_stft_db_masked, title="Masked signal")
if visual:
plot_spectrogram(recovered_spec, title="Recovered spectrogram")
return recovered_signal
output = removeNoise(audio_clip=audio_clip_band_limited,noise_clip=noise_clip,verbose=True,visual=True)
fig, ax = plt.subplots(nrows=1,ncols=1, figsize=(20,4))
plt.plot(output, color='black')
ax.set_xlim((0, len(output)))
plt.show()
# play back a sample of the song
IPython.display.Audio(data=output, rate=44100)
去噪后的波形:
让我们尝试一个更具挑战性的示例
wav_loc = "assets/audio/fish.wav"
src_rate, src_data = wavfile.read(wav_loc)
# src_data = np.concatenate((src_data, np.zeros(src_rate*3)))
src_data = src_data / 32768
wav_loc = "assets/audio/cafe.wav"
noise_rate, noise_data = wavfile.read(wav_loc)
# get some noise to add to the signal
noise_to_add = noise_data[len(src_data) : len(src_data) * 2]
# get a different part of the noise clip for calculating statistics
noise_clip = noise_data[: len(src_data)]
noise_clip = noise_clip / max(noise_to_add)
noise_to_add = noise_to_add / max(noise_to_add)
# apply noise
snr = 1 # signal to noise ratio
audio_clip_cafe = src_data + noise_to_add / snr
noise_clip = noise_clip / snr
fig, ax = plt.subplots(figsize=(20, 4))
ax.plot(noise_clip)
IPython.display.Audio(data=noise_clip, rate=src_rate)
强干扰噪声波形
fig, ax = plt.subplots(figsize=(20,4))
ax.plot(audio_clip_cafe)
IPython.display.Audio(data=audio_clip_cafe, rate=src_rate)
信号加上强干扰噪声后
执行去噪
output = removeNoise(
audio_clip=audio_clip_cafe,
noise_clip=noise_clip,
n_std_thresh=2,
prop_decrease=0.95,
visual=True,
)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 4))
plt.plot(output, color="black")
ax.set_xlim((0, len(output)))
plt.show()
# play back a sample of the song
IPython.display.Audio(data=output, rate=44100)
去噪后的信号
开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!
更多推荐
已为社区贡献2条内容
所有评论(0)