Tunisian-Speech-rec / recognizer_tunisian_vosk.py
Rania Mani
initial commit
623d37e
raw
history blame
1.25 kB
import wave
import json
from vosk import Model as VoskModel, KaldiRecognizer as VoskRecognizer
import os
class RecognizerTunisianVosk:
def __init__(self, recognizer_name: str = "vosk", vosk_model_dir: str = "model/vosk-model-small-ar-tn-0.1-linto"):
self.recognizer_name = recognizer_name
self.vosk_model_dir = vosk_model_dir
if not os.path.exists(self.vosk_model_dir):
raise ValueError(f"Vosk model directory '{self.vosk_model_dir}' does not exist.")
self.vosk_model = VoskModel(self.vosk_model_dir)
def transcribe(self, audio_path: str) -> str:
"""
Transcribe speech from an audio file.
:param audio_path: Path to the WAV file.
:return: Transcribed text.
"""
with wave.open(audio_path, "rb") as wf:
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
raise ValueError("Audio file must be WAV format mono PCM (16-bit, mono, uncompressed).")
recognizer = VoskRecognizer(self.vosk_model, wf.getframerate())
recognizer.AcceptWaveform(wf.readframes(wf.getnframes()))
result = recognizer.FinalResult()
return json.loads(result)["text"]