Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -153,6 +153,37 @@ def get_tango(prompt):
|
|
| 153 |
print(result)
|
| 154 |
return result
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def blend_vsfx(video_in, audio_result):
|
| 157 |
audioClip = AudioFileClip(audio_result)
|
| 158 |
print(f"AUD: {audioClip.duration}")
|
|
@@ -180,6 +211,10 @@ def infer(video_in, chosen_model):
|
|
| 180 |
audio_result = get_audiogen(caption)
|
| 181 |
elif chosen_model == "Tango" :
|
| 182 |
audio_result = get_tango(caption)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
final_res = blend_vsfx(video_in, audio_result)
|
| 185 |
return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
|
|
@@ -195,6 +230,10 @@ def retry(edited_prompt, video_in, chosen_model):
|
|
| 195 |
audio_result = get_audiogen(caption)
|
| 196 |
elif chosen_model == "Tango" :
|
| 197 |
audio_result = get_tango(caption)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
final_res = blend_vsfx(video_in, audio_result)
|
| 200 |
return audio_result, final_res
|
|
@@ -225,7 +264,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 225 |
with gr.Column():
|
| 226 |
video_in = gr.Video(sources=["upload"], label="Video input")
|
| 227 |
with gr.Row():
|
| 228 |
-
chosen_model = gr.Dropdown(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen", "Tango"], value="Tango")
|
| 229 |
submit_btn = gr.Button("Submit", scale=0)
|
| 230 |
with gr.Column():
|
| 231 |
caption_o = gr.Textbox(label="Scene caption", interactive=False)
|
|
@@ -267,14 +306,12 @@ with gr.Blocks(css=css) as demo:
|
|
| 267 |
fn=infer,
|
| 268 |
inputs=[video_in, chosen_model],
|
| 269 |
outputs=[caption_o, retry_btn, audio_o, video_o],
|
| 270 |
-
concurrency_limit = 2
|
| 271 |
)
|
| 272 |
|
| 273 |
retry_btn.click(
|
| 274 |
fn=retry,
|
| 275 |
inputs=[caption_o, video_in, chosen_model],
|
| 276 |
outputs=[audio_o, video_o],
|
| 277 |
-
concurrency_limit = 2
|
| 278 |
)
|
| 279 |
|
| 280 |
demo.queue(max_size=10).launch(show_api=False, debug=True, show_error=True)
|
|
|
|
| 153 |
print(result)
|
| 154 |
return result
|
| 155 |
|
| 156 |
+
def get_tango2(prompt):
|
| 157 |
+
try:
|
| 158 |
+
client = Client("declare-lab/tango2")
|
| 159 |
+
except:
|
| 160 |
+
raise gr.Error("Tango2 space API is not ready, please try again in few minutes ")
|
| 161 |
+
|
| 162 |
+
result = client.predict(
|
| 163 |
+
prompt,
|
| 164 |
+
100,
|
| 165 |
+
4,
|
| 166 |
+
api_name="/predict"
|
| 167 |
+
)
|
| 168 |
+
print(result)
|
| 169 |
+
return result
|
| 170 |
+
|
| 171 |
+
def get_stable_audio_open(prompt):
|
| 172 |
+
try:
|
| 173 |
+
client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
|
| 174 |
+
except:
|
| 175 |
+
raise gr.Error("Stable Audio Open space API is not ready, please try again in few minutes ")
|
| 176 |
+
|
| 177 |
+
result = client.predict(
|
| 178 |
+
prompt=prompt,
|
| 179 |
+
seconds_total=30,
|
| 180 |
+
steps=100,
|
| 181 |
+
cfg_scale=7,
|
| 182 |
+
api_name="/predict"
|
| 183 |
+
)
|
| 184 |
+
print(result)
|
| 185 |
+
return result
|
| 186 |
+
|
| 187 |
def blend_vsfx(video_in, audio_result):
|
| 188 |
audioClip = AudioFileClip(audio_result)
|
| 189 |
print(f"AUD: {audioClip.duration}")
|
|
|
|
| 211 |
audio_result = get_audiogen(caption)
|
| 212 |
elif chosen_model == "Tango" :
|
| 213 |
audio_result = get_tango(caption)
|
| 214 |
+
elif chosen_model == "Tango 2" :
|
| 215 |
+
audio_result = get_tango2(caption)
|
| 216 |
+
elif chosen_model == "Stable Audio Open" :
|
| 217 |
+
audio_result = get_stable_audio_open(caption)
|
| 218 |
|
| 219 |
final_res = blend_vsfx(video_in, audio_result)
|
| 220 |
return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
|
|
|
|
| 230 |
audio_result = get_audiogen(caption)
|
| 231 |
elif chosen_model == "Tango" :
|
| 232 |
audio_result = get_tango(caption)
|
| 233 |
+
elif chosen_model == "Tango 2" :
|
| 234 |
+
audio_result = get_tango2(caption)
|
| 235 |
+
elif chosen_model == "Stable Audio Open" :
|
| 236 |
+
audio_result = get_stable_audio_open(caption)
|
| 237 |
|
| 238 |
final_res = blend_vsfx(video_in, audio_result)
|
| 239 |
return audio_result, final_res
|
|
|
|
| 264 |
with gr.Column():
|
| 265 |
video_in = gr.Video(sources=["upload"], label="Video input")
|
| 266 |
with gr.Row():
|
| 267 |
+
chosen_model = gr.Dropdown(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen", "Tango", "Tango 2", "Stable Audio Open"], value="Tango")
|
| 268 |
submit_btn = gr.Button("Submit", scale=0)
|
| 269 |
with gr.Column():
|
| 270 |
caption_o = gr.Textbox(label="Scene caption", interactive=False)
|
|
|
|
| 306 |
fn=infer,
|
| 307 |
inputs=[video_in, chosen_model],
|
| 308 |
outputs=[caption_o, retry_btn, audio_o, video_o],
|
|
|
|
| 309 |
)
|
| 310 |
|
| 311 |
retry_btn.click(
|
| 312 |
fn=retry,
|
| 313 |
inputs=[caption_o, video_in, chosen_model],
|
| 314 |
outputs=[audio_o, video_o],
|
|
|
|
| 315 |
)
|
| 316 |
|
| 317 |
demo.queue(max_size=10).launch(show_api=False, debug=True, show_error=True)
|