Spaces:
Sleeping
Sleeping
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| import argparse | |
| import logging | |
| import json | |
| from pathlib import Path | |
| import platform | |
| import re | |
| from typing import Tuple | |
| from project_settings import project_path, log_directory | |
| import log | |
| log.setup(log_directory=log_directory) | |
| import gradio as gr | |
| from toolbox.os.command import Command | |
| main_logger = logging.getLogger("main") | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--example_wav_dir", | |
| default=(project_path / "data/examples").as_posix(), | |
| type=str | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| def process_uploaded_file( | |
| vad_engine: str, | |
| filename: str, | |
| silence_time: float = 0.3, | |
| longest_activate: float = 3.0, | |
| speech_pad_time: float = 0.03, | |
| threshold: float = 0.5, | |
| ) -> Tuple[str, str]: | |
| if vad_engine == "nx_vad": | |
| return run_nx_vad(filename, silence_time, longest_activate) | |
| elif vad_engine == "silero_vad": | |
| return run_silero_vad(filename, silence_time, speech_pad_time, threshold) | |
| else: | |
| return f"vad engine invalid: {vad_engine}", "" | |
| def run_nx_vad(filename: str, silence_time: float = 0.3, longest_activate: float = 3.0) -> Tuple[str, str]: | |
| filename = Path(filename).as_posix() | |
| main_logger.info("do nx vad: {}".format(filename)) | |
| cmd = "vad_bins/nx_vad --filename {} --silence_time {} --longest_activate {}".format( | |
| filename, silence_time, longest_activate | |
| ) | |
| raw_vad_result = Command.popen(cmd) | |
| pattern = "(\\d+)[\r\n]VadFlagPrepare[\r\n](?:\\d+)[\r\n]VadFlagSpeaking(?:[\r\n](?:\\d+)[\r\n]VadFlagPause[\r\n](?:\\d+)[\r\n]VadFlagSpeaking)?[\r\n](\\d+)[\r\n]VadFlagNoSpeech" | |
| vad_timestamps = re.findall(pattern, raw_vad_result, flags=re.DOTALL) | |
| vad_timestamps = [(int(start), int(end)) for start, end in vad_timestamps] | |
| vad_timestamps: str = json.dumps(vad_timestamps, ensure_ascii=False, indent=2) | |
| return vad_timestamps, raw_vad_result | |
| def run_silero_vad(filename: str, | |
| silence_time: float = 0.3, | |
| speech_pad_time: float = 0.03, | |
| threshold: float = 0.5 | |
| ) -> Tuple[str, str]: | |
| filename = Path(filename).as_posix() | |
| main_logger.info("do silero vad: {}".format(filename)) | |
| cmd = "vad_bins/silero_vad --filename {} --silence_time {} --speech_pad_time {} --threshold {}".format( | |
| filename, silence_time, speech_pad_time, threshold | |
| ) | |
| raw_vad_result = Command.popen(cmd) | |
| pattern = "speech starts at (.+?)s[\r\n].*?speech ends at (.+?)s" | |
| vad_timestamps = re.findall(pattern, raw_vad_result, flags=re.DOTALL) | |
| vad_timestamps = [(int(float(start) * 1000), int(float(end) * 1000)) for start, end in vad_timestamps] | |
| vad_timestamps: str = json.dumps(vad_timestamps, ensure_ascii=False, indent=2) | |
| return vad_timestamps, raw_vad_result | |
| def shell(cmd: str): | |
| return Command.popen(cmd) | |
| def main(): | |
| args = get_args() | |
| title = "## GO语言实现的VAD." | |
| # examples | |
| example_wav_dir = Path(args.example_wav_dir) | |
| examples = list() | |
| for filename in example_wav_dir.glob("*.wav"): | |
| examples.append( | |
| [ | |
| "nx_vad", | |
| filename.as_posix(), | |
| 0.3, 3.0, 0.03, 0.5 | |
| ] | |
| ) | |
| # blocks | |
| with gr.Blocks() as blocks: | |
| gr.Markdown(value=title) | |
| with gr.Tabs(): | |
| with gr.TabItem("Upload from disk"): | |
| uploaded_file = gr.Audio( | |
| sources=["upload"], | |
| type="filepath", | |
| label="Upload from disk", | |
| ) | |
| with gr.Row(): | |
| uploaded_vad_engine = gr.Dropdown(choices=["nx_vad", "silero_vad"], value="nx_vad", label="vad_engine") | |
| uploaded_silence_time = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.01, label="silence_time") | |
| uploaded_longest_activate = gr.Slider(minimum=0.0, maximum=20.0, value=3.0, step=0.1, label="longest_activate") | |
| uploaded_speech_pad_time = gr.Slider(minimum=0.00, maximum=0.50, value=0.03, step=0.01, label="speech_pad_time") | |
| uploaded_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.1, label="threshold") | |
| upload_button = gr.Button("Run VAD", variant="primary") | |
| with gr.Row(): | |
| uploaded_vad_timestamps = gr.Textbox(label="vad_timestamps") | |
| uploaded_raw_vad_result = gr.Textbox(label="raw_vad_result") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[ | |
| uploaded_vad_engine, | |
| uploaded_file, | |
| uploaded_silence_time, | |
| uploaded_longest_activate, | |
| uploaded_speech_pad_time, | |
| uploaded_threshold, | |
| ], | |
| outputs=[ | |
| uploaded_vad_timestamps, | |
| uploaded_raw_vad_result, | |
| ], | |
| fn=process_uploaded_file | |
| ) | |
| upload_button.click( | |
| process_uploaded_file, | |
| inputs=[ | |
| uploaded_vad_engine, | |
| uploaded_file, | |
| uploaded_silence_time, | |
| uploaded_longest_activate, | |
| uploaded_speech_pad_time, | |
| uploaded_threshold, | |
| ], | |
| outputs=[ | |
| uploaded_vad_timestamps, | |
| uploaded_raw_vad_result, | |
| ], | |
| ) | |
| with gr.TabItem("shell"): | |
| shell_text = gr.Textbox(label="cmd") | |
| shell_button = gr.Button("run", variant="primary") | |
| shell_output = gr.Textbox(label="output") | |
| shell_button.click( | |
| shell, | |
| inputs=[ | |
| shell_text, | |
| ], | |
| outputs=[ | |
| shell_output | |
| ], | |
| ) | |
| blocks.queue().launch( | |
| share=False if platform.system() == "Windows" else False, | |
| server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0", | |
| server_port=7860, | |
| show_error=True | |
| ) | |
| return | |
| if __name__ == "__main__": | |
| main() | |