Add a bunch of test files
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
from copy import copy
|
||||
import numpy as np
|
||||
import time
|
||||
import whisper
|
||||
|
||||
print('Loaded audio.py')
|
||||
|
||||
CHUNK_LENGTH = 24000 # 48000 Hz * 0.5 s
|
||||
|
||||
def process_pcm(audio_chunks, data):
|
||||
# pymumble PCM is 16-bit 48000 Hz
|
||||
|
||||
start = time.time()
|
||||
|
||||
audio_chunks.append(data)
|
||||
|
||||
if len(audio_chunks) > 75:
|
||||
audio_chunks.pop(0)
|
||||
|
||||
#print('finished chunk in', time.time() - start, 's')
|
||||
|
||||
def process_stream(audio_chunks, model):
|
||||
|
||||
if len(audio_chunks) != 75:
|
||||
print('Skipping, bad length.')
|
||||
time.sleep(0.5)
|
||||
return
|
||||
|
||||
start = time.time()
|
||||
a = copy(audio_chunks)
|
||||
b = b''.join(a)
|
||||
c = np.frombuffer(b, np.int16)
|
||||
|
||||
# Define a low-pass filter kernel
|
||||
fs = 48000
|
||||
cutoff_freq = fs / 6
|
||||
nyquist_freq = fs / 2
|
||||
num_taps = 101
|
||||
taps = np.sinc(2 * cutoff_freq / fs * (np.arange(num_taps) - (num_taps - 1) / 2))
|
||||
taps *= np.blackman(num_taps)
|
||||
taps /= np.sum(taps)
|
||||
|
||||
# Apply the filter kernel to audio_data using convolution
|
||||
filtered_audio_data = np.convolve(c, taps, mode='same')
|
||||
# Downsample filtered_audio_data by a factor of 3 using take
|
||||
downsampled_audio_data = filtered_audio_data.take(np.arange(0, len(filtered_audio_data), 3)).flatten()
|
||||
|
||||
norm_audio = downsampled_audio_data.astype(np.float32) / 32768.0
|
||||
|
||||
#abs_mean = np.mean(np.abs(downsampled_audio_data ** 3))
|
||||
#print('abs mean:', abs_mean)
|
||||
#if abs_mean < 0.0:
|
||||
# print('silence detected, skipping')
|
||||
# time.sleep(1)
|
||||
# return
|
||||
|
||||
d = whisper.pad_or_trim(norm_audio)
|
||||
|
||||
#print('processed audio in', time.time() - start, 's')
|
||||
|
||||
start = time.time()
|
||||
e = model.transcribe(d, language='en')
|
||||
print('transcribed audio in', time.time() - start, 's')
|
||||
|
||||
if time.time() - start > 10:
|
||||
with open('downsampled.pcm', 'wb') as f:
|
||||
f.write(downsampled_audio_data.astype(np.int16).tobytes())
|
||||
|
||||
print('wrote file, sleeping')
|
||||
#breakpoint()
|
||||
time.sleep(100)
|
||||
|
||||
print(' ', e['text'])
|
||||
Reference in New Issue
Block a user