Spaces:
Sleeping
Sleeping
| import wave | |
| import json | |
| from vosk import Model as VoskModel, KaldiRecognizer as VoskRecognizer | |
| import os | |
| class RecognizerTunisianVosk: | |
| def __init__(self, recognizer_name: str = "vosk", vosk_model_dir: str = "model/vosk-model-small-ar-tn-0.1-linto"): | |
| self.recognizer_name = recognizer_name | |
| self.vosk_model_dir = vosk_model_dir | |
| if not os.path.exists(self.vosk_model_dir): | |
| raise ValueError(f"Vosk model directory '{self.vosk_model_dir}' does not exist.") | |
| self.vosk_model = VoskModel(self.vosk_model_dir) | |
| def transcribe(self, audio_path: str) -> str: | |
| """ | |
| Transcribe speech from an audio file. | |
| :param audio_path: Path to the WAV file. | |
| :return: Transcribed text. | |
| """ | |
| with wave.open(audio_path, "rb") as wf: | |
| if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE": | |
| raise ValueError("Audio file must be WAV format mono PCM (16-bit, mono, uncompressed).") | |
| recognizer = VoskRecognizer(self.vosk_model, wf.getframerate()) | |
| recognizer.AcceptWaveform(wf.readframes(wf.getnframes())) | |
| result = recognizer.FinalResult() | |
| return json.loads(result)["text"] |