Spaces:

ftshijt
/

versa

Sleeping

ftshijt commited on Apr 14

Commit

6c509e2

1 Parent(s): 37d87af

fix docker setup for sdk

Files changed (2) hide show

README.md CHANGED Viewed

@@ -3,10 +3,10 @@ title: VERSA Speech & Audio Evaluation Demo
 emoji: 🎙️
 colorFrom: blue
 colorTo: indigo
-sdk: gradio
-sdk_version: 5.25.0
-app_file: app.py
 pinned: false
 license: apache-2.0
 ---
@@ -51,4 +51,4 @@ If you use VERSA in your research, please cite:
   primaryClass={cs.SD},
   url={https://arxiv.org/abs/2412.17667},
 }
-```

 emoji: 🎙️
 colorFrom: blue
 colorTo: indigo
+sdk: docker
 pinned: false
+license: mit
+hf_oauth: false
 license: apache-2.0
 ---
   primaryClass={cs.SD},
   url={https://arxiv.org/abs/2412.17667},
 }
+```

universal_metrics.yaml CHANGED Viewed

@@ -155,4 +155,49 @@
   fmin: 50
   fmax: 8000 #14000
   n_fft: 1024 # 1028
-  hop_size

   fmin: 50
   fmax: 8000 #14000
   n_fft: 1024 # 1028
+  hop_size: 320
+  mel_bins: 64
+  window_size: 1024
+  # PROJECTION SPACE CONFIG
+  d_proj: 1024
+  temperature: 0.003
+  # TRAINING AND EVALUATION CONFIG
+  num_classes: 527
+  batch_size: 1024
+  demo: False
+# Speaking rate calculating
+# --speaking_rate: correct matching words/character counts
+- name: speaking_rate
+  model_tag: default
+  beam_size: 1
+  text_cleaner: whisper_basic
+# Audiobox Aesthetics (Unified automatic quality assessment for speech, music, and sound.)
+- name: audiobox_aesthetics
+  batch_size: 1
+  cache_dir: versa_cache/audiobox
+# ASR-match calculating
+# --asr_match_error_rate: correct matching words/character counts
+- name: asr_match
+  model_tag: default
+  beam_size: 1
+  text_cleaner: whisper_basic
+# speaker related metrics
+# -- spk_similarity: speaker cosine similarity
+- name: speaker
+  model_tag: default
+# asvspoof related metrics
+# -- asvspoof_score: evaluate how the generated speech is likely to be classifiied by a deepfake classifier
+- name: asvspoof_score
+# signal related metrics
+# -- sir: signal to interference ratio
+# -- sar: signal to artifact ratio
+# -- sdr: signal to distortion ratio
+# -- ci-sdr: scale-invariant signal to distortion ratio
+# -- si-snri: scale-invariant signal to noise ratio improvement
+- name: signal_metric