Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| import torchaudio | |
| # Check for CUDA availability and set device | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| else: | |
| device = "cpu" | |
| # Load the Whisper pipeline | |
| whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device=device) | |
| def transcribe_audio(audio_file): | |
| if audio_file is None: | |
| return "Please upload or record an audio file." | |
| try: | |
| # Load audio using torchaudio to handle various formats and long files | |
| audio, sample_rate = torchaudio.load(audio_file) | |
| # Resample if necessary (Whisper often expects 16kHz) | |
| if sample_rate != 16000: | |
| resampler = torchaudio.transforms.Resample(sample_rate, 16000) | |
| audio = resampler(audio) | |
| # Transcribe the audio | |
| transcription = whisper_pipeline(audio.squeeze().numpy())["text"] # .squeeze() removes extra dimensions | |
| return transcription | |
| except Exception as e: | |
| return f"An error occurred: {e}" | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| audio_input = gr.Audio(type="filepath", label="Upload or Record Audio") | |
| transcribe_button = gr.Button("Transcribe") | |
| transcription_output = gr.Textbox(label="Transcription") | |
| transcribe_button.click(transcribe_audio, inputs=audio_input, outputs=transcription_output) | |
| demo.launch() | |