Spaces:

AJ50
/

voice-cloning-backend

Sleeping

AJ50 commited on 21 days ago

Commit

d9c4b3e

1 Parent(s): d6456b5

Fix Hindi synthesis: Use TTS library's built-in model auto-download

BREAKING: Removed HINDI_MODEL_DIR configuration
- TTS library now handles XTTS model auto-download automatically
- No need to pre-configure hindi_model_dir
- First Hindi synthesis will download ~1.2GB XTTS model from HF Hub
- Much faster and more reliable than manual downloading

Changes:
- multilingual_tts.py: Load model using TTS(model_name='...')
- routes.py: Removed HINDI_MODEL_DIR detection logic
- routes.py: Removed Hindi availability check (automatic now)
- /synthesize and /convert_song now work seamlessly with Hindi

This resolves the 503 error - Hindi synthesis will work on first request

Files changed (2) hide show

backend/app/multilingual_tts.py +15 -35
backend/app/routes.py +5 -27

backend/app/multilingual_tts.py CHANGED Viewed

@@ -81,10 +81,7 @@ class MultilingualTTSService:
             print("[MultilingualTTSService] ✓ English vocoder loaded")
     def _load_hindi_models(self):
-        """Load Hindi XTTS model (lazy load with auto-download)."""
-        if not self.hindi_model_dir:
-            raise RuntimeError("Hindi model not configured. Set hindi_model_dir path.")
         if self._xtts_model is None:
             print("[MultilingualTTSService] Loading Hindi XTTS model...")
             try:
@@ -95,37 +92,20 @@ class MultilingualTTSService:
                     "Install with: pip install TTS>=0.21.0"
                 )
-            config_path = self.hindi_model_dir / "config.json"
-            # Auto-download from HuggingFace Hub if model files missing
-            if not config_path.exists():
-                print("[MultilingualTTSService] Model files not found. Downloading from HuggingFace Hub...")
-                try:
-                    from huggingface_hub import snapshot_download
-                    # Download XTTS-v2 model from HF Hub
-                    snapshot_download(
-                        repo_id="coqui/XTTS-v2",
-                        cache_dir=str(self.hindi_model_dir.parent),
-                        local_dir=str(self.hindi_model_dir),
-                        local_dir_use_symlinks=False,  # Avoid symlinks for HF Spaces
-                    )
-                    print("[MultilingualTTSService] ✓ Model downloaded from HuggingFace Hub")
-                except ImportError:
-                    raise ImportError(
-                        "huggingface_hub library required for auto-download. "
-                        "Install with: pip install huggingface_hub"
-                    )
-                except Exception as e:
-                    raise RuntimeError(f"Failed to download Hindi model: {e}")
-            # Load XTTS model
-            self._xtts_model = TTS(
-                model_path=str(self.hindi_model_dir.resolve().as_posix()),
-                config_path=str(config_path),
-                gpu=False  # Set to True if CUDA available and needed
-            )
-            print("[MultilingualTTSService] ✓ Hindi XTTS loaded")
     def synthesize(self, text: str, voice_sample_path: Union[str, Path],
                   language: str = "english") -> np.ndarray:

             print("[MultilingualTTSService] ✓ English vocoder loaded")
     def _load_hindi_models(self):
+        """Load Hindi XTTS model (lazy load with auto-download via TTS library)."""
         if self._xtts_model is None:
             print("[MultilingualTTSService] Loading Hindi XTTS model...")
             try:
                     "Install with: pip install TTS>=0.21.0"
                 )
+            try:
+                # Let TTS library handle model download automatically
+                # It will use its built-in model cache and download from Hugging Face
+                # Model name: "tts_models/multilingual/multi-dataset/xtts_v2"
+                print("[MultilingualTTSService] Loading XTTS-v2 model (may auto-download if needed)...")
+                self._xtts_model = TTS(
+                    model_name="tts_models/multilingual/multi-dataset/xtts_v2",
+                    gpu=False,  # Set to True if CUDA available and needed
+                    progress_bar=True
+                )
+                print("[MultilingualTTSService] ✓ Hindi XTTS loaded successfully")
+            except Exception as e:
+                print(f"[MultilingualTTSService] Error loading XTTS model: {e}")
+                raise RuntimeError(f"Failed to load Hindi XTTS model: {e}")
     def synthesize(self, text: str, voice_sample_path: Union[str, Path],
                   language: str = "english") -> np.ndarray:

backend/app/routes.py CHANGED Viewed

@@ -23,23 +23,8 @@ OUTPUT_FOLDER = BASE_DIR / 'outputs'
 MODELS_DIR = BASE_DIR / 'models'
 VOICES_DB = UPLOAD_FOLDER / 'voices.json'
-# Hindi model directory (check multiple possible locations)
-HINDI_MODEL_DIR = None
-possible_hindi_dirs = [
-    Path(os.getenv('HINDI_MODEL_PATH', '')) if os.getenv('HINDI_MODEL_PATH') else None,
-    BASE_DIR.parent / 'Apoorv_hindi_model' / 'models' / 'xtts_hindi',  # Local development
-    BASE_DIR / 'models' / 'xtts_hindi',  # Alternative location
-]
-for path in possible_hindi_dirs:
-    if path and path.exists():
-        HINDI_MODEL_DIR = path
-        print(f"✓ Hindi model found at: {HINDI_MODEL_DIR}")
-        break
-if not HINDI_MODEL_DIR:
-    print("⚠ Hindi model not found. Hindi synthesis will be unavailable.")
-    print("  To enable Hindi support, set HINDI_MODEL_PATH environment variable")
-    print("  or place model at: Apoorv_hindi_model/models/xtts_hindi")
 # Create directories with parents
 try:
@@ -203,13 +188,6 @@ def synthesize_speech():
         if language not in ['english', 'hindi']:
             return jsonify({'error': f'Unsupported language: {language}. Supported: english, hindi'}), 400
-        # Check if Hindi model is available for Hindi synthesis
-        if language == 'hindi' and not HINDI_MODEL_DIR:
-            return jsonify({
-                'error': 'Hindi synthesis unavailable. Hindi model not configured.',
-                'available_languages': ['english']
-            }), 503
         # Find the voice in database
         voices = load_voices_db()
         voice = next((v for v in voices if v['id'] == voice_id), None)
@@ -245,11 +223,11 @@ def synthesize_speech():
                     out_path=output_path
                 )
             else:
-                # Use multilingual TTS for Hindi
                 from app.multilingual_tts import MultilingualTTSService
                 tts_service = MultilingualTTSService(
                     models_dir=MODELS_DIR,
-                    hindi_model_dir=HINDI_MODEL_DIR
                 )
                 tts_service.synthesize_and_save(
                     text=text,
@@ -530,7 +508,7 @@ def convert_song():
         processor = MultilingualSongProcessor(
             models_dir=MODELS_DIR,
-            hindi_model_dir=HINDI_MODEL_DIR if language == 'hindi' else None
         )
         result_path = processor.convert_song(
             song_path=song_path,

 MODELS_DIR = BASE_DIR / 'models'
 VOICES_DB = UPLOAD_FOLDER / 'voices.json'
+# Note: Hindi model is auto-downloaded via TTS library on first use
+# No pre-configuration needed - TTS handles model management
 # Create directories with parents
 try:
         if language not in ['english', 'hindi']:
             return jsonify({'error': f'Unsupported language: {language}. Supported: english, hindi'}), 400
         # Find the voice in database
         voices = load_voices_db()
         voice = next((v for v in voices if v['id'] == voice_id), None)
                     out_path=output_path
                 )
             else:
+                # Use multilingual TTS for Hindi (auto-downloads model via TTS library)
                 from app.multilingual_tts import MultilingualTTSService
                 tts_service = MultilingualTTSService(
                     models_dir=MODELS_DIR,
+                    hindi_model_dir=None  # Not needed - TTS auto-manages model download
                 )
                 tts_service.synthesize_and_save(
                     text=text,
         processor = MultilingualSongProcessor(
             models_dir=MODELS_DIR,
+            hindi_model_dir=None  # Not needed - TTS auto-manages model download
         )
         result_path = processor.convert_song(
             song_path=song_path,