Spaces:

dronesplace
/

Nola

Sleeping

App Files Files Community

dronesplace commited on Nov 17, 2025

Commit

4be86aa

verified ·

1 Parent(s): a6a9419

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -110

app.py CHANGED Viewed

@@ -1,114 +1,13 @@
 import gradio as gr
-import torch
-import cv2
-import numpy as np
-from gtts import gTTS
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, WhisperProcessor, WhisperForConditionalGeneration
-from PIL import Image
-import ffmpeg
-import tempfile
-import os
-# -----------------------
-# Load Models
-# -----------------------
-device = "cpu"
-# Speech-to-text (Whisper small)
-whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
-# Text generation (Flan-T5 small)
-tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
-t5_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device)
-# -----------------------
-# Helper Functions
-# -----------------------
-def transcribe(audio):
-    if audio is None:
-        return ""
-    audio = whisper_processor(audio["array"], sampling_rate=16000, return_tensors="pt")
-    result = whisper_model.generate(audio["input_features"])
-    return whisper_processor.batch_decode(result, skip_special_tokens=True)[0]
-def reply(text):
-    inp = tokenizer(text, return_tensors="pt")
-    out = t5_model.generate(**inp, max_length=120)
-    return tokenizer.decode(out[0], skip_special_tokens=True)
-def synth_voice(text, path):
-    tts = gTTS(text=text, lang="en", tld="com", slow=False)
-    tts.save(path)
-    return path
-def animate_avatar(image, audio_path):
-    avatar = Image.open(image).convert("RGBA")
-    w, h = avatar.size
-    avatar_np = np.array(avatar)
-    # Extract audio amplitude → fake lip motion
-    import wave
-    with wave.open(audio_path, "rb") as wav:
-        frames = wav.readframes(-1)
-        audio_np = np.frombuffer(frames, dtype=np.int16)
-        amp = np.abs(audio_np)[::2000]  # downsample amplitude curve
-    frames_list = []
-    for a in amp:
-        frame = avatar_np.copy()
-        intensity = min(8, int(a / 3000))
-        frame[h - 40 : h - 20, w//2 - 20 : w//2 + 20, 3] = 255 - intensity * 20
-        frames_list.append(frame)
-    # Export to video
-    temp_video = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-    out = cv2.VideoWriter(temp_video, cv2.VideoWriter_fourcc(*"mp4v"), 20, (w, h))
-    for f in frames_list:
-        out.write(cv2.cvtColor(f, cv2.COLOR_RGBA2BGR))
-    out.release()
-    return temp_video
-# -----------------------
-# Main Chat Logic
-# -----------------------
-def chat(image, audio, text):
-    user_input = text if text else transcribe(audio)
-    if not user_input:
-        return "Say something!", None
-    ai_answer = reply(user_input)
-    # TTS
-    temp_audio = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
-    synth_voice(ai_answer, temp_audio)
-    # Talking avatar
-    video = animate_avatar(image, temp_audio)
-    return ai_answer, video
-# -----------------------
-# Gradio UI
-# -----------------------
-with gr.Blocks() as interface:
-    gr.Markdown("## 🧚‍♀️ AI Avatar Companion — Free & No-Install")
-    avatar = gr.Image(type="filepath", label="Upload Avatar PNG")
-    audio = gr.Audio(source="microphone", type="numpy", label="Speak")
-    txt = gr.Textbox(label="Or type your message")
-    out_text = gr.Textbox(label="AI Response")
-    out_video = gr.Video(label="Talking Avatar")
-    submit = gr.Button("Talk")
-    submit.click(chat, inputs=[avatar, audio, txt], outputs=[out_text, out_video])
-interface.launch()

 import gradio as gr
+def hello(name):
+    return f"Hello {name}, the Space is working!"
+with gr.Blocks() as demo:
+    gr.Markdown("## Test App")
+    name = gr.Textbox("world", label="Your name")
+    out = gr.Textbox(label="Output")
+    btn = gr.Button("Run")
+    btn.click(hello, name, out)
+demo.launch()