AJ50 commited on
Commit
d9c4b3e
Β·
1 Parent(s): d6456b5

Fix Hindi synthesis: Use TTS library's built-in model auto-download

Browse files

BREAKING: Removed HINDI_MODEL_DIR configuration
- TTS library now handles XTTS model auto-download automatically
- No need to pre-configure hindi_model_dir
- First Hindi synthesis will download ~1.2GB XTTS model from HF Hub
- Much faster and more reliable than manual downloading

Changes:
- multilingual_tts.py: Load model using TTS(model_name='...')
- routes.py: Removed HINDI_MODEL_DIR detection logic
- routes.py: Removed Hindi availability check (automatic now)
- /synthesize and /convert_song now work seamlessly with Hindi

This resolves the 503 error - Hindi synthesis will work on first request

backend/app/multilingual_tts.py CHANGED
@@ -81,10 +81,7 @@ class MultilingualTTSService:
81
  print("[MultilingualTTSService] βœ“ English vocoder loaded")
82
 
83
  def _load_hindi_models(self):
84
- """Load Hindi XTTS model (lazy load with auto-download)."""
85
- if not self.hindi_model_dir:
86
- raise RuntimeError("Hindi model not configured. Set hindi_model_dir path.")
87
-
88
  if self._xtts_model is None:
89
  print("[MultilingualTTSService] Loading Hindi XTTS model...")
90
  try:
@@ -95,37 +92,20 @@ class MultilingualTTSService:
95
  "Install with: pip install TTS>=0.21.0"
96
  )
97
 
98
- config_path = self.hindi_model_dir / "config.json"
99
-
100
- # Auto-download from HuggingFace Hub if model files missing
101
- if not config_path.exists():
102
- print("[MultilingualTTSService] Model files not found. Downloading from HuggingFace Hub...")
103
- try:
104
- from huggingface_hub import snapshot_download
105
-
106
- # Download XTTS-v2 model from HF Hub
107
- snapshot_download(
108
- repo_id="coqui/XTTS-v2",
109
- cache_dir=str(self.hindi_model_dir.parent),
110
- local_dir=str(self.hindi_model_dir),
111
- local_dir_use_symlinks=False, # Avoid symlinks for HF Spaces
112
- )
113
- print("[MultilingualTTSService] βœ“ Model downloaded from HuggingFace Hub")
114
- except ImportError:
115
- raise ImportError(
116
- "huggingface_hub library required for auto-download. "
117
- "Install with: pip install huggingface_hub"
118
- )
119
- except Exception as e:
120
- raise RuntimeError(f"Failed to download Hindi model: {e}")
121
-
122
- # Load XTTS model
123
- self._xtts_model = TTS(
124
- model_path=str(self.hindi_model_dir.resolve().as_posix()),
125
- config_path=str(config_path),
126
- gpu=False # Set to True if CUDA available and needed
127
- )
128
- print("[MultilingualTTSService] βœ“ Hindi XTTS loaded")
129
 
130
  def synthesize(self, text: str, voice_sample_path: Union[str, Path],
131
  language: str = "english") -> np.ndarray:
 
81
  print("[MultilingualTTSService] βœ“ English vocoder loaded")
82
 
83
  def _load_hindi_models(self):
84
+ """Load Hindi XTTS model (lazy load with auto-download via TTS library)."""
 
 
 
85
  if self._xtts_model is None:
86
  print("[MultilingualTTSService] Loading Hindi XTTS model...")
87
  try:
 
92
  "Install with: pip install TTS>=0.21.0"
93
  )
94
 
95
+ try:
96
+ # Let TTS library handle model download automatically
97
+ # It will use its built-in model cache and download from Hugging Face
98
+ # Model name: "tts_models/multilingual/multi-dataset/xtts_v2"
99
+ print("[MultilingualTTSService] Loading XTTS-v2 model (may auto-download if needed)...")
100
+ self._xtts_model = TTS(
101
+ model_name="tts_models/multilingual/multi-dataset/xtts_v2",
102
+ gpu=False, # Set to True if CUDA available and needed
103
+ progress_bar=True
104
+ )
105
+ print("[MultilingualTTSService] βœ“ Hindi XTTS loaded successfully")
106
+ except Exception as e:
107
+ print(f"[MultilingualTTSService] Error loading XTTS model: {e}")
108
+ raise RuntimeError(f"Failed to load Hindi XTTS model: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def synthesize(self, text: str, voice_sample_path: Union[str, Path],
111
  language: str = "english") -> np.ndarray:
backend/app/routes.py CHANGED
@@ -23,23 +23,8 @@ OUTPUT_FOLDER = BASE_DIR / 'outputs'
23
  MODELS_DIR = BASE_DIR / 'models'
24
  VOICES_DB = UPLOAD_FOLDER / 'voices.json'
25
 
26
- # Hindi model directory (check multiple possible locations)
27
- HINDI_MODEL_DIR = None
28
- possible_hindi_dirs = [
29
- Path(os.getenv('HINDI_MODEL_PATH', '')) if os.getenv('HINDI_MODEL_PATH') else None,
30
- BASE_DIR.parent / 'Apoorv_hindi_model' / 'models' / 'xtts_hindi', # Local development
31
- BASE_DIR / 'models' / 'xtts_hindi', # Alternative location
32
- ]
33
- for path in possible_hindi_dirs:
34
- if path and path.exists():
35
- HINDI_MODEL_DIR = path
36
- print(f"βœ“ Hindi model found at: {HINDI_MODEL_DIR}")
37
- break
38
-
39
- if not HINDI_MODEL_DIR:
40
- print("⚠ Hindi model not found. Hindi synthesis will be unavailable.")
41
- print(" To enable Hindi support, set HINDI_MODEL_PATH environment variable")
42
- print(" or place model at: Apoorv_hindi_model/models/xtts_hindi")
43
 
44
  # Create directories with parents
45
  try:
@@ -203,13 +188,6 @@ def synthesize_speech():
203
  if language not in ['english', 'hindi']:
204
  return jsonify({'error': f'Unsupported language: {language}. Supported: english, hindi'}), 400
205
 
206
- # Check if Hindi model is available for Hindi synthesis
207
- if language == 'hindi' and not HINDI_MODEL_DIR:
208
- return jsonify({
209
- 'error': 'Hindi synthesis unavailable. Hindi model not configured.',
210
- 'available_languages': ['english']
211
- }), 503
212
-
213
  # Find the voice in database
214
  voices = load_voices_db()
215
  voice = next((v for v in voices if v['id'] == voice_id), None)
@@ -245,11 +223,11 @@ def synthesize_speech():
245
  out_path=output_path
246
  )
247
  else:
248
- # Use multilingual TTS for Hindi
249
  from app.multilingual_tts import MultilingualTTSService
250
  tts_service = MultilingualTTSService(
251
  models_dir=MODELS_DIR,
252
- hindi_model_dir=HINDI_MODEL_DIR
253
  )
254
  tts_service.synthesize_and_save(
255
  text=text,
@@ -530,7 +508,7 @@ def convert_song():
530
 
531
  processor = MultilingualSongProcessor(
532
  models_dir=MODELS_DIR,
533
- hindi_model_dir=HINDI_MODEL_DIR if language == 'hindi' else None
534
  )
535
  result_path = processor.convert_song(
536
  song_path=song_path,
 
23
  MODELS_DIR = BASE_DIR / 'models'
24
  VOICES_DB = UPLOAD_FOLDER / 'voices.json'
25
 
26
+ # Note: Hindi model is auto-downloaded via TTS library on first use
27
+ # No pre-configuration needed - TTS handles model management
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Create directories with parents
30
  try:
 
188
  if language not in ['english', 'hindi']:
189
  return jsonify({'error': f'Unsupported language: {language}. Supported: english, hindi'}), 400
190
 
 
 
 
 
 
 
 
191
  # Find the voice in database
192
  voices = load_voices_db()
193
  voice = next((v for v in voices if v['id'] == voice_id), None)
 
223
  out_path=output_path
224
  )
225
  else:
226
+ # Use multilingual TTS for Hindi (auto-downloads model via TTS library)
227
  from app.multilingual_tts import MultilingualTTSService
228
  tts_service = MultilingualTTSService(
229
  models_dir=MODELS_DIR,
230
+ hindi_model_dir=None # Not needed - TTS auto-manages model download
231
  )
232
  tts_service.synthesize_and_save(
233
  text=text,
 
508
 
509
  processor = MultilingualSongProcessor(
510
  models_dir=MODELS_DIR,
511
+ hindi_model_dir=None # Not needed - TTS auto-manages model download
512
  )
513
  result_path = processor.convert_song(
514
  song_path=song_path,