Add a bunch of test files

2023-04-07 21:50:01 +00:00
parent 89cb732e42
commit 41bc6015bf
7 changed files with 557 additions and 0 deletions
@@ -0,0 +1,73 @@
+from copy import copy
+import numpy as np
+import time
+import whisper
+
+print('Loaded audio.py')
+
+CHUNK_LENGTH = 24000  # 48000 Hz * 0.5 s
+
+def process_pcm(audio_chunks, data):
+    # pymumble PCM is 16-bit 48000 Hz
+
+    start = time.time()
+
+    audio_chunks.append(data)
+
+    if len(audio_chunks) > 75:
+        audio_chunks.pop(0)
+
+    #print('finished chunk in', time.time() - start, 's')
+
+def process_stream(audio_chunks, model):
+
+    if len(audio_chunks) != 75:
+        print('Skipping, bad length.')
+        time.sleep(0.5)
+        return
+
+    start = time.time()
+    a = copy(audio_chunks)
+    b = b''.join(a)
+    c = np.frombuffer(b, np.int16)
+
+    # Define a low-pass filter kernel
+    fs = 48000
+    cutoff_freq = fs / 6
+    nyquist_freq = fs / 2
+    num_taps = 101
+    taps = np.sinc(2 * cutoff_freq / fs * (np.arange(num_taps) - (num_taps - 1) / 2))
+    taps *= np.blackman(num_taps)
+    taps /= np.sum(taps)
+
+    # Apply the filter kernel to audio_data using convolution
+    filtered_audio_data = np.convolve(c, taps, mode='same')
+    # Downsample filtered_audio_data by a factor of 3 using take
+    downsampled_audio_data = filtered_audio_data.take(np.arange(0, len(filtered_audio_data), 3)).flatten()
+
+    norm_audio = downsampled_audio_data.astype(np.float32) / 32768.0
+
+    #abs_mean = np.mean(np.abs(downsampled_audio_data ** 3))
+    #print('abs mean:', abs_mean)
+    #if abs_mean < 0.0:
+    #    print('silence detected, skipping')
+    #    time.sleep(1)
+    #    return
+
+    d = whisper.pad_or_trim(norm_audio)
+
+    #print('processed audio in', time.time() - start, 's')
+
+    start = time.time()
+    e = model.transcribe(d, language='en')
+    print('transcribed audio in', time.time() - start, 's')
+
+    if time.time() - start > 10:
+        with open('downsampled.pcm', 'wb') as f:
+            f.write(downsampled_audio_data.astype(np.int16).tobytes())
+
+        print('wrote file, sleeping')
+        #breakpoint()
+        time.sleep(100)
+
+    print('  ', e['text'])