dicksonsarpong9/nigeria_ascent
Viewer • Updated • 2.69k • 4
How to use dicksonsarpong9/voxcpm_nigeria_accent with VoxCPM:
import soundfile as sf
from voxcpm import VoxCPM
model = VoxCPM.from_pretrained("dicksonsarpong9/voxcpm_nigeria_accent")
wav = model.generate(
text="VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech.",
prompt_wav_path=None, # optional: path to a prompt speech for voice cloning
prompt_text=None, # optional: reference text
cfg_value=2.0, # LM guidance on LocDiT, higher for better adherence to the prompt, but maybe worse
inference_timesteps=10, # LocDiT inference timesteps, higher for better result, lower for fast speed
normalize=True, # enable external TN tool
denoise=True, # enable external Denoise tool
retry_badcase=True, # enable retrying mode for some bad cases (unstoppable)
retry_badcase_max_times=3, # maximum retrying times
retry_badcase_ratio_threshold=6.0, # maximum length restriction for bad case detection (simple but effective), it could be adjusted for slow pace speech
)
sf.write("output.wav", wav, 16000)
print("saved: output.wav")VoxCPM0.5B fine-tuned on Nigerian accent data.
from voxcpm.core import VoxCPM
from voxcpm.model.voxcpm import LoRAConfig
import json
with open('lora_config.json') as f:
lora_info = json.load(f)
lora_cfg = LoRAConfig(**lora_info['lora_config'])
model = VoxCPM.from_pretrained(
hf_model_id='dicksonsarpong9/voxcpm_nigeria_accent',
lora_config=lora_cfg,
lora_weights_path='lora_weights.safetensors'
)
audio = model.generate(text='Hello!')