kassaby's picture
Update app.py
b1ac1c2 verified
"""
Quran Audio Enhancer - Hugging Face Gradio Space
=================================================
GUI ูƒุงู…ู„ ู„ุชุญุณูŠู† ุฌูˆุฏุฉ ุชู„ุงูˆุฉ ุงู„ู‚ุฑุขู† ุงู„ูƒุฑูŠู…
"""
import numpy as np
import scipy.signal as signal
import librosa
import soundfile as sf
import noisereduce as nr
import gradio as gr
import tempfile
import os
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ุฏูˆุงู„ ุงู„ู…ุนุงู„ุฌุฉ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def load_audio(file_path: str, sr: int = 22050):
audio, sample_rate = librosa.load(file_path, sr=sr, mono=True)
return audio, sample_rate
def remove_dc_offset(audio):
return (audio - np.mean(audio)).astype(np.float32)
def reduce_noise(audio, sr, strength):
noise_clip = audio[:int(sr * 0.5)] if len(audio) > sr else audio
return nr.reduce_noise(
y=audio, sr=sr, y_noise=noise_clip,
prop_decrease=strength, stationary=False,
n_fft=2048, win_length=2048, hop_length=512,
n_std_thresh_stationary=1.5, chunk_size=60000, use_torch=False
)
def apply_bandpass_filter(audio, sr, low_hz=80, high_hz=8000):
nyquist = sr / 2
low = low_hz / nyquist
high = min(high_hz / nyquist, 0.99)
b, a = signal.butter(6, [low, high], btype='band')
return signal.filtfilt(b, a, audio).astype(np.float32)
def enhance_clarity(audio):
harmonic, _ = librosa.effects.hpss(audio, margin=3.0)
return (0.8 * harmonic + 0.2 * audio).astype(np.float32)
def apply_de_essing(audio, sr, threshold=0.4):
nyquist = sr / 2
low = 5000 / nyquist
high = min(10000 / nyquist, 0.99)
b, a = signal.butter(4, [low, high], btype='band')
sibilant = signal.filtfilt(b, a, audio)
sib_rms = np.sqrt(np.convolve(sibilant**2, np.ones(512)/512, mode='same'))
max_rms = np.max(sib_rms) + 1e-8
mask = np.where(sib_rms / max_rms > threshold,
threshold / (sib_rms / max_rms + 1e-8), 1.0)
return (audio - sibilant + sibilant * mask).astype(np.float32)
def normalize_loudness(audio, target_db):
rms = np.sqrt(np.mean(audio ** 2))
if rms < 1e-8:
return audio
target_rms = 10 ** (target_db / 20)
return np.clip(audio * (target_rms / rms), -1.0, 1.0).astype(np.float32)
def analyze_quality(audio, sr):
rms_db = float(20 * np.log10(np.sqrt(np.mean(audio**2)) + 1e-8))
peak_db = float(20 * np.log10(np.max(np.abs(audio)) + 1e-8))
frames = librosa.util.frame(audio, frame_length=512, hop_length=512)
frame_rms = np.sqrt(np.mean(frames**2, axis=0))
noise_floor_db = float(20 * np.log10(np.percentile(frame_rms, 10) + 1e-8))
noise_label = "๐ŸŸข ู…ู†ุฎูุถุฉ" if noise_floor_db < -50 else "๐ŸŸก ู…ุชูˆุณุทุฉ" if noise_floor_db < -35 else "๐Ÿ”ด ู…ุฑุชูุนุฉ"
return rms_db, peak_db, noise_floor_db, noise_label
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ุงู„ุฏุงู„ุฉ ุงู„ุฑุฆูŠุณูŠุฉ ู„ู„ู…ุนุงู„ุฌุฉ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def process_audio(audio_file, noise_strength, apply_bandpass,
apply_enhancement, apply_deessing, target_db, output_format):
if audio_file is None:
return None, "โš ๏ธ ุงู„ุฑุฌุงุก ุฑูุน ู…ู„ู ุตูˆุชูŠ ุฃูˆู„ุงู‹."
# ุชุญู…ูŠู„
audio, sr = load_audio(audio_file, sr=22050)
duration = len(audio) / sr
# ุชุญู„ูŠู„ ู‚ุจู„
rms_b, peak_b, noise_b, noise_label_b = analyze_quality(audio, sr)
# ู…ุนุงู„ุฌุฉ
audio = remove_dc_offset(audio)
audio = reduce_noise(audio, sr, noise_strength)
if apply_bandpass:
audio = apply_bandpass_filter(audio, sr)
if apply_enhancement:
audio = enhance_clarity(audio)
if apply_deessing:
audio = apply_de_essing(audio, sr)
audio = normalize_loudness(audio, target_db)
# ุชุญู„ูŠู„ ุจุนุฏ
rms_a, peak_a, noise_a, noise_label_a = analyze_quality(audio, sr)
# ุญูุธ
ext = "wav" if output_format == "WAV" else "flac"
out_path = tempfile.mktemp(suffix=f"_enhanced.{ext}")
sf.write(out_path, audio, sr,
format=ext.upper(),
subtype='PCM_16' if ext == 'wav' else None)
# ุชู‚ุฑูŠุฑ
report = f"""
## ๐Ÿ“Š ุชู‚ุฑูŠุฑ ุงู„ู…ุนุงู„ุฌุฉ
| | ู‚ุจู„ | ุจุนุฏ |
|---|---|---|
| ู…ุณุชูˆู‰ ุงู„ุตูˆุช (RMS) | {rms_b:.1f} dBFS | {rms_a:.1f} dBFS |
| ุงู„ุฐุฑูˆุฉ | {peak_b:.1f} dBFS | {peak_a:.1f} dBFS |
| ู…ุณุชูˆู‰ ุงู„ุถูˆุถุงุก | {noise_b:.1f} dBFS | {noise_a:.1f} dBFS |
| ุชู‚ุฏูŠุฑ ุงู„ุถูˆุถุงุก | {noise_label_b} | {noise_label_a} |
**โฑ๏ธ ู…ุฏุฉ ุงู„ู…ู„ู:** {duration:.1f} ุซุงู†ูŠุฉ
**๐ŸŽต ู…ุนุฏู„ ุงู„ุนูŠู†ุงุช:** {sr} Hz
**๐Ÿ“ ุงู„ุตูŠุบุฉ:** {output_format}
### ุงู„ุฎุทูˆุงุช ุงู„ู…ุทุจู‘ู‚ุฉ:
{"โœ…" if True else "โŒ"} ุฅุฒุงู„ุฉ DC Offset
โœ… ุฅุฒุงู„ุฉ ุงู„ุถูˆุถุงุก (ู‚ูˆุฉ: {noise_strength})
{"โœ…" if apply_bandpass else "โŒ"} ูู„ุชุฑ ุงู„ุชุฑุฏุฏุงุช ุงู„ุตูˆุชูŠุฉ
{"โœ…" if apply_enhancement else "โŒ"} ุชุญุณูŠู† ุงู„ูˆุถูˆุญ
{"โœ…" if apply_deessing else "โŒ"} De-essing
โœ… ุชุนุฏูŠู„ ู…ุณุชูˆู‰ ุงู„ุตูˆุช โ†’ {target_db} dBFS
""".strip()
return out_path, report
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ูˆุงุฌู‡ุฉ Gradio
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Blocks(
title="๐Ÿ•Œ Quran Audio Enhancer",
) as demo:
gr.HTML("""
<div class='title-text'>
<h1>๐Ÿ•Œ Quran Audio Enhancer</h1>
</div>
<div class='subtitle-text'>
<p>ุฃุฏุงุฉ ู„ุชุญุณูŠู† ุฌูˆุฏุฉ ุชู„ุงูˆุฉ ุงู„ู‚ุฑุขู† ุงู„ูƒุฑูŠู… โ€” ุฅุฒุงู„ุฉ ุงู„ุถูˆุถุงุก ูˆุชุญุณูŠู† ุงู„ุตูˆุช</p>
</div>
""")
with gr.Row():
# ุงู„ุนู…ูˆุฏ ุงู„ุฃูŠุณุฑ: ุงู„ุฅุฏุฎุงู„ ูˆุงู„ุฅุนุฏุงุฏุงุช
with gr.Column(scale=1):
gr.Markdown("### ๐Ÿ“ ุฑูุน ุงู„ู…ู„ู ุงู„ุตูˆุชูŠ")
audio_input = gr.Audio(
label="ุงุฑูุน ุงู„ู…ู„ู ู‡ู†ุง (WAV, MP3, FLAC, OGG, M4A)",
type="filepath",
)
gr.Markdown("### โš™๏ธ ุฅุนุฏุงุฏุงุช ุงู„ู…ุนุงู„ุฌุฉ")
noise_strength = gr.Slider(
minimum=0.0, maximum=1.0, value=0.75, step=0.05,
label="ู‚ูˆุฉ ุฅุฒุงู„ุฉ ุงู„ุถูˆุถุงุก",
info="0 = ุฎููŠู ุฌุฏุงู‹ | 1 = ู‚ูˆูŠ ุฌุฏุงู‹"
)
target_db = gr.Slider(
minimum=-40.0, maximum=-6.0, value=-18.0, step=1.0,
label="ู…ุณุชูˆู‰ ุงู„ุตูˆุช ุงู„ู†ู‡ุงุฆูŠ (dBFS)",
info="ุงู„ู‚ูŠู…ุฉ ุงู„ู…ูˆุตู‰ ุจู‡ุง: -18"
)
with gr.Row():
apply_bandpass = gr.Checkbox(value=True, label="ูู„ุชุฑ ุงู„ุชุฑุฏุฏุงุช ุงู„ุตูˆุชูŠุฉ")
apply_enhancement = gr.Checkbox(value=True, label="ุชุญุณูŠู† ุงู„ูˆุถูˆุญ")
apply_deessing = gr.Checkbox(value=True, label="De-essing")
output_format = gr.Radio(
choices=["WAV", "FLAC"],
value="WAV",
label="ุตูŠุบุฉ ุงู„ุฅุฎุฑุงุฌ"
)
process_btn = gr.Button(
"๐Ÿš€ ุงุจุฏุฃ ุงู„ู…ุนุงู„ุฌุฉ",
variant="primary",
size="lg"
)
# ุงู„ุนู…ูˆุฏ ุงู„ุฃูŠู…ู†: ุงู„ู†ุชูŠุฌุฉ
with gr.Column(scale=1):
gr.Markdown("### ๐ŸŽต ุงู„ู…ู„ู ุงู„ู…ุญุณู‘ู†")
audio_output = gr.Audio(
label="ุงุณุชู…ุน ูˆุญู…ู‘ู„ ุงู„ู…ู„ู ุงู„ู…ุญุณู‘ู†",
type="filepath",
)
gr.Markdown("### ๐Ÿ“Š ุงู„ุชู‚ุฑูŠุฑ")
report_output = gr.Markdown(
value="*ุณูŠุธู‡ุฑ ุงู„ุชู‚ุฑูŠุฑ ุจุนุฏ ุงู„ู…ุนุงู„ุฌุฉ...*"
)
# ุฑุจุท ุงู„ุฒุฑ
process_btn.click(
fn=process_audio,
inputs=[
audio_input, noise_strength, apply_bandpass,
apply_enhancement, apply_deessing, target_db, output_format
],
outputs=[audio_output, report_output],
)
gr.Markdown("""
---
**ู†ุตุงุฆุญ ู„ู„ุญุตูˆู„ ุนู„ู‰ ุฃูุถู„ ู†ุชูŠุฌุฉ:**
- ุงุณุชุฎุฏู… `ู‚ูˆุฉ ุฅุฒุงู„ุฉ ุงู„ุถูˆุถุงุก` ุจูŠู† 0.6 ูˆ0.85 ู„ู„ุชู„ุงูˆุงุช
- ุฅุฐุง ูƒุงู† ุงู„ุตูˆุช ูŠุจุฏูˆ ุงุตุทู†ุงุนูŠุงู‹ุŒ ู‚ู„ู„ ุงู„ู‚ูˆุฉ
- ุตูŠุบุฉ FLAC ุฃูุถู„ ู„ู„ุฃุฑุดูุฉ | WAV ู„ู„ุงุณุชุฎุฏุงู… ุงู„ุนุงุฏูŠ
""")
if __name__ == "__main__":
demo.launch()