meancat's picture
Update app.py
c7ca60c verified
import gradio as gr
import spaces
import gc
import numpy as np
import os
import torch
from video_depth_anything.video_depth import VideoDepthAnything
from utils.dc_utils import read_video_frames, save_video
from huggingface_hub import hf_hub_download
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model_configs = {
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
}
encoder2name = {
'vits': 'Small',
'vitl': 'Large',
}
encoder='vitl'
model_name = encoder2name[encoder]
video_depth_anything = VideoDepthAnything(**model_configs[encoder])
filepath = hf_hub_download(repo_id=f"depth-anything/Video-Depth-Anything-{model_name}", filename=f"video_depth_anything_{encoder}.pth", repo_type="model")
video_depth_anything.load_state_dict(torch.load(filepath, map_location='cpu'))
video_depth_anything = video_depth_anything.to(DEVICE).eval()
@spaces.GPU(duration=240)
def infer_video_depth(
input_video: str,
max_len: int = -1,
target_fps: int = -1,
max_res: int = 1280,
grayscale: bool = False,
output_dir: str = './outputs',
input_size: int = 518,
):
"""
Generate depth maps from input video.
This function processes the input video to generate corresponding depth maps
using the Video Depth Anything model.
Args:
input_video (str): Path to the input video file
max_len (int): Maximum number of frames to process
target_fps (int): Target frames per second for processing
max_res (int): Maximum resolution for processing
grayscale (bool): Whether to output in grayscale
output_dir (str): Directory to save output videos
input_size (int): Input size for the model
Returns:
List[str]: Paths to the processed video and depth visualization
"""
frames, target_fps = read_video_frames(input_video, max_len, target_fps, max_res)
depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=input_size, device=DEVICE)
video_name = os.path.basename(input_video)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
processed_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0]+'_src.mp4')
depth_vis_path = os.path.join(output_dir, os.path.splitext(video_name)[0]+'_vis.mp4')
save_video(frames, processed_video_path, fps=fps)
save_video(depths, depth_vis_path, fps=fps, is_depths=True, grayscale=grayscale)
gc.collect()
torch.cuda.empty_cache()
return [processed_video_path, depth_vis_path]
theme = gr.themes.Base().set(
body_background_fill="#1A1A1A",
body_background_fill_dark="#1A1A1A",
body_text_color="#CCCCCC",
body_text_color_dark="#CCCCCC",
block_background_fill="#2C2C2C",
block_background_fill_dark="#2C2C2C",
block_border_color="#3C3C3C",
block_border_color_dark="#3C3C3C",
button_primary_background_fill="#FF8C00",
button_primary_background_fill_dark="#FF8C00",
button_primary_background_fill_hover="#FF9F33",
button_primary_border_color="*primary_500",
button_primary_text_color="white",
button_primary_text_color_dark="white",
block_border_width="1px",
block_radius="8px"
)
with gr.Blocks(
theme=theme,
css="""
.gradio-container {
background: #1A1A1A !important;
color: #CCCCCC !important;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
}
.gradio-container .footer,
.gradio-container footer,
.gradio-container [data-testid="footer"],
.gradio-container .gradio-footer {
display: none !important;
}
.gradio-container .gradio-container {
padding-bottom: 0 !important;
}
.gradio-container h1, .gradio-container h2, .gradio-container h3 {
color: #FFFFFF !important;
font-weight: bold !important;
}
.gradio-container .markdown {
color: #CCCCCC !important;
}
.gradio-container .tab-nav {
background: #2C2C2C !important;
border: none !important;
}
.gradio-container .tab-nav button {
background: #2C2C2C !important;
color: #CCCCCC !important;
border: none !important;
border-radius: 8px 8px 0 0 !important;
}
.gradio-container .tab-nav button.selected {
background: #FF8C00 !important;
color: #FFFFFF !important;
}
.gradio-container .tab-nav button:hover {
background: #3C3C3C !important;
}
.gradio-container .tab-nav button.selected:hover {
background: #FF8C00 !important;
}
.gradio-container .tab-content {
background: #2C2C2C !important;
border: none !important;
border-radius: 0 0 8px 8px !important;
padding: 20px !important;
}
.gradio-container .accordion {
background: #2C2C2C !important;
border: 1px solid #3C3C3C !important;
border-radius: 8px !important;
margin: 10px 0 !important;
}
.gradio-container .accordion .accordion-header {
background: #2C2C2C !important;
color: #FFFFFF !important;
border: none !important;
border-radius: 8px !important;
}
.gradio-container .accordion .accordion-content {
background: #2C2C2C !important;
color: #CCCCCC !important;
border: none !important;
border-radius: 0 0 8px 8px !important;
}
.gradio-container .button {
background: #FF8C00 !important;
color: #FFFFFF !important;
border: none !important;
border-radius: 8px !important;
font-weight: bold !important;
padding: 12px 24px !important;
}
.gradio-container .button:hover {
background: #FF9F33 !important;
}
.gradio-container .button.secondary {
background: #3C3C3C !important;
color: #CCCCCC !important;
}
.gradio-container .button.secondary:hover {
background: #4C4C4C !important;
}
.gradio-container .slider {
background: #3C3C3C !important;
}
.gradio-container .slider .slider-handle {
background: #FF8C00 !important;
border: 2px solid #FFFFFF !important;
}
.gradio-container .slider .slider-track {
background: #3C3C3C !important;
}
.gradio-container .slider .slider-track-fill {
background: #FF8C00 !important;
}
.gradio-container .checkbox {
color: #CCCCCC !important;
}
.gradio-container .radio {
color: #CCCCCC !important;
}
.gradio-container .gallery {
background: #2C2C2C !important;
border: 1px solid #3C3C3C !important;
border-radius: 8px !important;
}
.gradio-container .image {
background: #2C2C2C !important;
border: 1px solid #3C3C3C !important;
border-radius: 8px !important;
}
.gradio-container .video {
background: #2C2C2C !important;
border: 1px solid #3C3C3C !important;
border-radius: 8px !important;
}
.gradio-container .model3d {
background: #2C2C2C !important;
border: 1px solid #3C3C3C !important;
border-radius: 8px !important;
}
.gradio-container .row {
gap: 20px !important;
}
.gradio-container .column {
background: #2C2C2C !important;
border: 1px solid #3C3C3C !important;
border-radius: 8px !important;
padding: 20px !important;
}
.gradio-container .row {
align-items: flex-start !important;
justify-content: center !important;
}
.gradio-container .container {
max-width: 1200px !important;
margin: 0 auto !important;
padding: 20px !important;
}
"""
) as demo:
gr.Markdown("""
<div style="text-align: center; margin-bottom: 30px; padding: 20px; background: #2C2C2C; border: 1px solid #3C3C3C; border-radius: 8px;">
<h3 style="color: #FFFFFF; margin-bottom: 15px;">Instructions</h3>
<p style="color: #CCCCCC; margin-bottom: 10px;">
• Upload a video and click "Generate Depth" to create depth maps
</p>
<p style="color: #CCCCCC; margin-bottom: 10px;">
• Adjust settings in Generation Settings for optimal results
</p>
<p style="color: #CCCCCC;">
• Download the processed video and depth visualization
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("""
<div style="background: #2C2C2C; border: 1px solid #3C3C3C; border-radius: 8px; padding: 20px; margin-bottom: 20px;">
<h3 style="color: #FFFFFF; margin-bottom: 15px;">Video Depth Generation</h3>
<p style="color: #CCCCCC; margin-bottom: 20px;">Generate depth maps from video content for compositing and 3D effects.</p>
</div>
""")
input_video = gr.Video(label="Input Video", height=300)
with gr.Accordion(label="Generation Settings", open=False):
max_len = gr.Slider(0, 1000, label="Max Process Length", value=500, step=1)
target_fps = gr.Slider(-1, 30, label="Target FPS", value=15, step=1)
max_res = gr.Slider(480, 1920, label="Max Side Resolution", value=1280, step=1)
grayscale = gr.Checkbox(label="Grayscale Output", value=False)
generate_btn = gr.Button("Generate Depth", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("""
<div style="background: #2C2C2C; border: 1px solid #3C3C3C; border-radius: 8px; padding: 20px; margin-bottom: 20px;">
<h3 style="color: #FFFFFF; margin-bottom: 15px;">Generated Depth Maps</h3>
<p style="color: #CCCCCC; margin-bottom: 20px;">Preview and download your generated depth maps.</p>
</div>
""")
video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
model_output = gr.Video(label="Generated Depth Video", autoplay=True, loop=True, height=300)
generate_btn.click(
fn=infer_video_depth,
inputs=[input_video, max_len, target_fps, max_res, grayscale],
outputs=[video_output, model_output],
)
gr.Markdown("""
<div style="text-align: center; margin-top: 40px; padding: 20px; background: #2C2C2C; border: 1px solid #3C3C3C; border-radius: 8px;">
<p style="color: #CCCCCC; font-size: 0.9rem; margin: 0;">
Powered by <span style="color: #FF8C00;">Mean Cat Entertainment</span> • Built for the future of VFX
</p>
</div>
""")
# Launch the Gradio app
if __name__ == "__main__":
demo.launch(share=True)