Spaces:
Runtime error
Runtime error
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| import argparse | |
| import json | |
| import platform | |
| from typing import Tuple | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from PIL import Image | |
| from project_settings import project_path, temp_directory | |
| from toolbox.webrtcvad.vad import WebRTCVad | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--webrtcvad_examples_file", | |
| default=(project_path / "webrtcvad_examples.json").as_posix(), | |
| type=str | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| webrtcvad: WebRTCVad = None | |
| def click_webrtcvad_button(audio: Tuple[int, np.ndarray], | |
| agg: int = 3, | |
| frame_duration_ms: int = 30, | |
| padding_duration_ms: int = 300, | |
| silence_duration_threshold: float = 0.3, | |
| ): | |
| global webrtcvad | |
| sample_rate, signal = audio | |
| webrtcvad = WebRTCVad(agg=int(agg), | |
| frame_duration_ms=frame_duration_ms, | |
| padding_duration_ms=padding_duration_ms, | |
| silence_duration_threshold=silence_duration_threshold, | |
| sample_rate=sample_rate, | |
| ) | |
| vad_segments = list() | |
| segments = webrtcvad.vad(signal) | |
| vad_segments += segments | |
| segments = webrtcvad.last_vad_segments() | |
| vad_segments += segments | |
| time = np.arange(0, len(signal)) / sample_rate | |
| plt.figure(figsize=(12, 5)) | |
| plt.plot(time, signal / 32768, color='b') | |
| for start, end in vad_segments: | |
| plt.axvline(x=start, ymin=0.25, ymax=0.75, color='g', linestyle='--', label='开始端点') # 标记开始端点 | |
| plt.axvline(x=end, ymin=0.25, ymax=0.75, color='r', linestyle='--', label='结束端点') # 标记结束端点 | |
| temp_image_file = temp_directory / "temp.jpg" | |
| plt.savefig(temp_image_file) | |
| image = Image.open(open(temp_image_file, "rb")) | |
| return image, vad_segments | |
| def main(): | |
| args = get_args() | |
| brief_description = """ | |
| ## Voice Activity Detection | |
| """ | |
| # examples | |
| with open(args.webrtcvad_examples_file, "r", encoding="utf-8") as f: | |
| webrtcvad_examples = json.load(f) | |
| # ui | |
| with gr.Blocks() as blocks: | |
| gr.Markdown(value=brief_description) | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| with gr.Tabs(): | |
| with gr.TabItem("webrtcvad"): | |
| gr.Markdown(value="") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| webrtcvad_wav = gr.Audio(label="wav") | |
| with gr.Row(): | |
| webrtcvad_agg = gr.Dropdown(choices=[1, 2, 3], value=3, label="agg") | |
| webrtcvad_frame_duration_ms = gr.Slider(minimum=0, maximum=100, value=30, label="frame_duration_ms") | |
| with gr.Row(): | |
| webrtcvad_padding_duration_ms = gr.Slider(minimum=0, maximum=1000, value=300, label="padding_duration_ms") | |
| webrtcvad_silence_duration_threshold = gr.Slider(minimum=0, maximum=1.0, value=0.3, step=0.1, label="silence_duration_threshold") | |
| webrtcvad_button = gr.Button("retrieval", variant="primary") | |
| with gr.Column(scale=1): | |
| webrtcvad_image = gr.Image(label="image", height=300, width=720, show_label=False) | |
| webrtcvad_end_points = gr.TextArea(label="end_points", max_lines=35) | |
| gr.Examples( | |
| examples=webrtcvad_examples, | |
| inputs=[ | |
| webrtcvad_wav, webrtcvad_agg, webrtcvad_frame_duration_ms, | |
| webrtcvad_padding_duration_ms, webrtcvad_silence_duration_threshold | |
| ], | |
| outputs=[webrtcvad_image, webrtcvad_end_points], | |
| fn=click_webrtcvad_button | |
| ) | |
| # click event | |
| webrtcvad_button.click( | |
| click_webrtcvad_button, | |
| inputs=[ | |
| webrtcvad_wav, webrtcvad_agg, webrtcvad_frame_duration_ms, | |
| webrtcvad_padding_duration_ms, webrtcvad_silence_duration_threshold | |
| ], | |
| outputs=[webrtcvad_image, webrtcvad_end_points], | |
| ) | |
| blocks.queue().launch( | |
| share=False if platform.system() == "Windows" else False | |
| ) | |
| return | |
| if __name__ == "__main__": | |
| main() | |