import wave import json from vosk import Model as VoskModel, KaldiRecognizer as VoskRecognizer import os class RecognizerTunisianVosk: def __init__(self, recognizer_name: str = "vosk", vosk_model_dir: str = "model/vosk-model-small-ar-tn-0.1-linto"): self.recognizer_name = recognizer_name self.vosk_model_dir = vosk_model_dir if not os.path.exists(self.vosk_model_dir): raise ValueError(f"Vosk model directory '{self.vosk_model_dir}' does not exist.") self.vosk_model = VoskModel(self.vosk_model_dir) def transcribe(self, audio_path: str) -> str: """ Transcribe speech from an audio file. :param audio_path: Path to the WAV file. :return: Transcribed text. """ with wave.open(audio_path, "rb") as wf: if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE": raise ValueError("Audio file must be WAV format mono PCM (16-bit, mono, uncompressed).") recognizer = VoskRecognizer(self.vosk_model, wf.getframerate()) recognizer.AcceptWaveform(wf.readframes(wf.getnframes())) result = recognizer.FinalResult() return json.loads(result)["text"]