dronesplace commited on
Commit
4be86aa
·
verified ·
1 Parent(s): a6a9419

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -110
app.py CHANGED
@@ -1,114 +1,13 @@
1
  import gradio as gr
2
- import torch
3
- import cv2
4
- import numpy as np
5
- from gtts import gTTS
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, WhisperProcessor, WhisperForConditionalGeneration
7
- from PIL import Image
8
- import ffmpeg
9
- import tempfile
10
- import os
11
 
12
- # -----------------------
13
- # Load Models
14
- # -----------------------
15
 
16
- device = "cpu"
 
 
 
 
 
17
 
18
- # Speech-to-text (Whisper small)
19
- whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-small")
20
- whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
21
-
22
- # Text generation (Flan-T5 small)
23
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
24
- t5_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device)
25
-
26
- # -----------------------
27
- # Helper Functions
28
- # -----------------------
29
-
30
- def transcribe(audio):
31
- if audio is None:
32
- return ""
33
- audio = whisper_processor(audio["array"], sampling_rate=16000, return_tensors="pt")
34
- result = whisper_model.generate(audio["input_features"])
35
- return whisper_processor.batch_decode(result, skip_special_tokens=True)[0]
36
-
37
- def reply(text):
38
- inp = tokenizer(text, return_tensors="pt")
39
- out = t5_model.generate(**inp, max_length=120)
40
- return tokenizer.decode(out[0], skip_special_tokens=True)
41
-
42
- def synth_voice(text, path):
43
- tts = gTTS(text=text, lang="en", tld="com", slow=False)
44
- tts.save(path)
45
- return path
46
-
47
- def animate_avatar(image, audio_path):
48
- avatar = Image.open(image).convert("RGBA")
49
- w, h = avatar.size
50
- avatar_np = np.array(avatar)
51
-
52
- # Extract audio amplitude → fake lip motion
53
- import wave
54
- with wave.open(audio_path, "rb") as wav:
55
- frames = wav.readframes(-1)
56
- audio_np = np.frombuffer(frames, dtype=np.int16)
57
- amp = np.abs(audio_np)[::2000] # downsample amplitude curve
58
-
59
- frames_list = []
60
- for a in amp:
61
- frame = avatar_np.copy()
62
- intensity = min(8, int(a / 3000))
63
- frame[h - 40 : h - 20, w//2 - 20 : w//2 + 20, 3] = 255 - intensity * 20
64
- frames_list.append(frame)
65
-
66
- # Export to video
67
- temp_video = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
68
- out = cv2.VideoWriter(temp_video, cv2.VideoWriter_fourcc(*"mp4v"), 20, (w, h))
69
-
70
- for f in frames_list:
71
- out.write(cv2.cvtColor(f, cv2.COLOR_RGBA2BGR))
72
- out.release()
73
-
74
- return temp_video
75
-
76
- # -----------------------
77
- # Main Chat Logic
78
- # -----------------------
79
-
80
- def chat(image, audio, text):
81
- user_input = text if text else transcribe(audio)
82
- if not user_input:
83
- return "Say something!", None
84
-
85
- ai_answer = reply(user_input)
86
-
87
- # TTS
88
- temp_audio = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
89
- synth_voice(ai_answer, temp_audio)
90
-
91
- # Talking avatar
92
- video = animate_avatar(image, temp_audio)
93
-
94
- return ai_answer, video
95
-
96
- # -----------------------
97
- # Gradio UI
98
- # -----------------------
99
-
100
- with gr.Blocks() as interface:
101
- gr.Markdown("## 🧚‍♀️ AI Avatar Companion — Free & No-Install")
102
-
103
- avatar = gr.Image(type="filepath", label="Upload Avatar PNG")
104
- audio = gr.Audio(source="microphone", type="numpy", label="Speak")
105
- txt = gr.Textbox(label="Or type your message")
106
-
107
- out_text = gr.Textbox(label="AI Response")
108
- out_video = gr.Video(label="Talking Avatar")
109
-
110
- submit = gr.Button("Talk")
111
-
112
- submit.click(chat, inputs=[avatar, audio, txt], outputs=[out_text, out_video])
113
-
114
- interface.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
2
 
3
+ def hello(name):
4
+ return f"Hello {name}, the Space is working!"
 
5
 
6
+ with gr.Blocks() as demo:
7
+ gr.Markdown("## Test App")
8
+ name = gr.Textbox("world", label="Your name")
9
+ out = gr.Textbox(label="Output")
10
+ btn = gr.Button("Run")
11
+ btn.click(hello, name, out)
12
 
13
+ demo.launch()