{ "architectures": [ "VocosModel" ], "dtype": "float32", "hidden_act": "gelu", "hidden_size": 512, "hop_length": 256, "intermediate_size": 1536, "istft_padding": "center", "kernel_size": 7, "layer_norm_eps": 1e-06, "layer_scale_init_value": 0.125, "model_type": "vocos", "n_fft": 1024, "n_mels": 100, "num_layers": 8, "padding": 3, "sample_rate": 24000, "transformers_version": "5.0.0.dev0" }