Spaces:

LoufAn
/

AR_Testing

Runtime error

App Files Files Community

XiaoyiYangRIT commited on May 8

Commit

aed9794

1 Parent(s): a6cd9f8

Update some files

Browse files

Files changed (1) hide show

app.py +35 -24

app.py CHANGED Viewed

@@ -1,13 +1,26 @@
 import gradio as gr
 import torch
 import math
-import os
-from transformers import AutoTokenizer, AutoModel, AutoProcessor
-from huggingface_hub import snapshot_download
-from decord import VideoReader, cpu
 from PIL import Image
 from torchvision.transforms import Compose, Resize, ToTensor, Normalize
 # === 视觉预处理 ===
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_STD = (0.229, 0.224, 0.225)
@@ -18,23 +31,15 @@ transform = Compose([
     Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
 ])
-# === 模型加载 ===
-PERSISTENT_DIR = "/data/internvl3_model"  # 持久路径
-MODEL_NAME = "OpenGVLab/InternVL3-14B"
-# 如果第一次运行：下载模型并缓存到 /data
-if not os.path.exists(PERSISTENT_DIR):
     print("⏬ First run: downloading model to persistent storage...")
-    snapshot_download(repo_id=MODEL_NAME, local_dir=PERSISTENT_DIR, trust_remote_code=True)
 else:
     print("✅ Loaded model from persistent cache.")
-# 模型加载（从本地）
-tokenizer = AutoTokenizer.from_pretrained(PERSISTENT_DIR, trust_remote_code=True)
-processor = AutoProcessor.from_pretrained(PERSISTENT_DIR, trust_remote_code=True)
 def split_model(model_path):
-    from transformers import AutoConfig
     device_map = {}
     world_size = torch.cuda.device_count()
     config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
@@ -58,10 +63,13 @@ def split_model(model_path):
     device_map[f'language_model.model.layers.{num_layers - 1}'] = 0
     return device_map
-device_map = split_model(PERSISTENT_DIR)
 model = AutoModel.from_pretrained(
-    PERSISTENT_DIR,
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     use_flash_attn=True,
@@ -69,7 +77,7 @@ model = AutoModel.from_pretrained(
     device_map=device_map
 ).eval()
-# === 视频帧采样 ===
 def extract_frames(video_path, num_frames=8):
     vr = VideoReader(video_path, ctx=cpu(0))
     total_frames = len(vr)
@@ -81,10 +89,10 @@ def extract_frames(video_path, num_frames=8):
         images.append(img_tensor)
     return torch.stack(images)
-# === 推理函数 ===
 def evaluate_ar(video):
     frames = extract_frames(video.name).to(torch.bfloat16).cuda()
-    prompt = "Evaluate the quality of AR occlusion and rendering in the uploaded video."  # 可换成具体任务
     num_patches = [1] * frames.shape[0]
     output, _ = model.chat(
         tokenizer,
@@ -97,11 +105,14 @@ def evaluate_ar(video):
     )
     return output
-# === Gradio 界面 ===
 gr.Interface(
     fn=evaluate_ar,
     inputs=gr.Video(label="Upload your AR video"),
     outputs="text",
     title="InternVL3 AR Evaluation (Single-turn)",
-    description="Upload a video clip. The model will analyze AR occlusion and rendering quality."
 ).launch()

+import os
 import gradio as gr
 import torch
 import math
+import time
 from PIL import Image
+from decord import VideoReader, cpu
 from torchvision.transforms import Compose, Resize, ToTensor, Normalize
+from transformers import (
+    AutoModel,
+    AutoTokenizer,
+    AutoProcessor,
+    AutoConfig
+)
+from huggingface_hub import snapshot_download
+start_time = time.time()
+# === 常量设定 ===
+MODEL_NAME = "OpenGVLab/InternVL3-14B"
+CACHE_DIR = "/data/internvl3_model"
 # === 视觉预处理 ===
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_STD = (0.229, 0.224, 0.225)
     Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
 ])
+# === 模型下载与缓存 ===
+if not os.path.exists(CACHE_DIR):
     print("⏬ First run: downloading model to persistent storage...")
+    snapshot_download(repo_id=MODEL_NAME, local_dir=CACHE_DIR)
 else:
     print("✅ Loaded model from persistent cache.")
+# === GPU层级分配（多GPU支持） ===
 def split_model(model_path):
     device_map = {}
     world_size = torch.cuda.device_count()
     config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
     device_map[f'language_model.model.layers.{num_layers - 1}'] = 0
     return device_map
+# === 加载组件（已缓存） ===
+print("🚀 Loading tokenizer/processor/model from cache...")
+tokenizer = AutoTokenizer.from_pretrained(CACHE_DIR, trust_remote_code=True)
+processor = AutoProcessor.from_pretrained(CACHE_DIR, trust_remote_code=True)
+device_map = split_model(CACHE_DIR)
 model = AutoModel.from_pretrained(
+    CACHE_DIR,
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     use_flash_attn=True,
     device_map=device_map
 ).eval()
+# === 视频帧提取函数 ===
 def extract_frames(video_path, num_frames=8):
     vr = VideoReader(video_path, ctx=cpu(0))
     total_frames = len(vr)
         images.append(img_tensor)
     return torch.stack(images)
+# === 主推理函数 ===
 def evaluate_ar(video):
     frames = extract_frames(video.name).to(torch.bfloat16).cuda()
+    prompt = "Evaluate the quality of AR occlusion and rendering in the uploaded video."
     num_patches = [1] * frames.shape[0]
     output, _ = model.chat(
         tokenizer,
     )
     return output
+# === Gradio 接口 ===
 gr.Interface(
     fn=evaluate_ar,
     inputs=gr.Video(label="Upload your AR video"),
     outputs="text",
     title="InternVL3 AR Evaluation (Single-turn)",
+    description="Upload a short AR video clip. The model will sample frames and assess occlusion/rendering quality."
 ).launch()
+# (在模型加载完成后)
+print(f"✅ Model fully loaded. Time elapsed: {time.time() - start_time:.2f} sec.")