Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -81,14 +81,15 @@ torch.backends.cuda.matmul.allow_tf32 = True
|
|
| 81 |
torch.backends.cudnn.benchmark = True
|
| 82 |
|
| 83 |
# 환경 변수 설정
|
| 84 |
-
|
| 85 |
-
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512,garbage_collection_threshold:0.6"
|
| 86 |
os.environ['SPCONV_ALGO'] = 'native'
|
| 87 |
os.environ['SPARSE_BACKEND'] = 'native'
|
| 88 |
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
|
| 89 |
os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'
|
| 90 |
os.environ['XFORMERS_ENABLE_FLASH_ATTENTION'] = '1'
|
| 91 |
os.environ['TORCH_CUDA_MEMORY_ALLOCATOR'] = 'native'
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# CUDA 초기화 방지
|
| 94 |
torch.set_grad_enabled(False)
|
|
@@ -208,6 +209,7 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
| 208 |
try:
|
| 209 |
# CUDA 메모리 초기화
|
| 210 |
torch.cuda.empty_cache()
|
|
|
|
| 211 |
|
| 212 |
if randomize_seed:
|
| 213 |
seed = np.random.randint(0, MAX_SEED)
|
|
@@ -222,37 +224,60 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
| 222 |
image = Image.open(image_path)
|
| 223 |
print(f"Successfully loaded image with size: {image.size}")
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
# GPU 작업 시작
|
| 226 |
with torch.cuda.device(0):
|
| 227 |
try:
|
| 228 |
# 모델을 GPU로 이동
|
| 229 |
-
g.trellis_pipeline
|
| 230 |
torch.cuda.synchronize()
|
| 231 |
|
| 232 |
-
with torch.inference_mode():
|
| 233 |
-
#
|
|
|
|
|
|
|
|
|
|
| 234 |
outputs = g.trellis_pipeline.run(
|
| 235 |
image,
|
| 236 |
seed=seed,
|
| 237 |
formats=["gaussian", "mesh"],
|
| 238 |
preprocess_image=False,
|
| 239 |
sparse_structure_sampler_params={
|
| 240 |
-
"steps": ss_sampling_steps,
|
| 241 |
"cfg_strength": ss_guidance_strength,
|
| 242 |
},
|
| 243 |
slat_sampler_params={
|
| 244 |
-
"steps": slat_sampling_steps,
|
| 245 |
"cfg_strength": slat_guidance_strength,
|
| 246 |
},
|
| 247 |
)
|
| 248 |
torch.cuda.synchronize()
|
| 249 |
|
| 250 |
-
#
|
| 251 |
-
|
| 252 |
-
torch.cuda.synchronize()
|
| 253 |
|
| 254 |
-
|
| 255 |
-
torch.cuda.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
# CPU로 데이터 이동 및 후처리
|
| 258 |
video = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video]
|
|
@@ -271,15 +296,14 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
| 271 |
|
| 272 |
finally:
|
| 273 |
# 정리 작업
|
| 274 |
-
g.trellis_pipeline
|
| 275 |
torch.cuda.empty_cache()
|
| 276 |
torch.cuda.synchronize()
|
| 277 |
|
| 278 |
except Exception as e:
|
| 279 |
print(f"Error in image_to_3d: {str(e)}")
|
| 280 |
-
# 에러 발생 시 정리
|
| 281 |
if hasattr(g.trellis_pipeline, 'to'):
|
| 282 |
-
g.trellis_pipeline
|
| 283 |
torch.cuda.empty_cache()
|
| 284 |
torch.cuda.synchronize()
|
| 285 |
return None, None
|
|
@@ -289,14 +313,17 @@ def clear_gpu_memory():
|
|
| 289 |
if torch.cuda.is_available():
|
| 290 |
torch.cuda.empty_cache()
|
| 291 |
torch.cuda.synchronize()
|
| 292 |
-
|
|
|
|
| 293 |
def move_to_device(model, device):
|
| 294 |
"""모델을 안전하게 디바이스로 이동하는 함수"""
|
| 295 |
try:
|
| 296 |
if hasattr(model, 'to'):
|
|
|
|
| 297 |
model.to(device)
|
| 298 |
if device == 'cuda':
|
| 299 |
torch.cuda.synchronize()
|
|
|
|
| 300 |
except Exception as e:
|
| 301 |
print(f"Error moving model to {device}: {str(e)}")
|
| 302 |
|
|
|
|
| 81 |
torch.backends.cudnn.benchmark = True
|
| 82 |
|
| 83 |
# 환경 변수 설정
|
| 84 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256,garbage_collection_threshold:0.8"
|
|
|
|
| 85 |
os.environ['SPCONV_ALGO'] = 'native'
|
| 86 |
os.environ['SPARSE_BACKEND'] = 'native'
|
| 87 |
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
|
| 88 |
os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'
|
| 89 |
os.environ['XFORMERS_ENABLE_FLASH_ATTENTION'] = '1'
|
| 90 |
os.environ['TORCH_CUDA_MEMORY_ALLOCATOR'] = 'native'
|
| 91 |
+
os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
|
| 92 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 93 |
|
| 94 |
# CUDA 초기화 방지
|
| 95 |
torch.set_grad_enabled(False)
|
|
|
|
| 209 |
try:
|
| 210 |
# CUDA 메모리 초기화
|
| 211 |
torch.cuda.empty_cache()
|
| 212 |
+
torch.cuda.synchronize()
|
| 213 |
|
| 214 |
if randomize_seed:
|
| 215 |
seed = np.random.randint(0, MAX_SEED)
|
|
|
|
| 224 |
image = Image.open(image_path)
|
| 225 |
print(f"Successfully loaded image with size: {image.size}")
|
| 226 |
|
| 227 |
+
# 이미지 크기 제한
|
| 228 |
+
max_size = 512
|
| 229 |
+
if max(image.size) > max_size:
|
| 230 |
+
ratio = max_size / max(image.size)
|
| 231 |
+
new_size = tuple(int(dim * ratio) for dim in image.size)
|
| 232 |
+
image = image.resize(new_size, Image.LANCZOS)
|
| 233 |
+
print(f"Resized image to: {image.size}")
|
| 234 |
+
|
| 235 |
# GPU 작업 시작
|
| 236 |
with torch.cuda.device(0):
|
| 237 |
try:
|
| 238 |
# 모델을 GPU로 이동
|
| 239 |
+
move_to_device(g.trellis_pipeline, 'cuda')
|
| 240 |
torch.cuda.synchronize()
|
| 241 |
|
| 242 |
+
with torch.inference_mode(), torch.cuda.amp.autocast():
|
| 243 |
+
# 메모리 사용량 최적화를 위한 배치 크기 설정
|
| 244 |
+
torch.cuda.set_per_process_memory_fraction(0.8) # GPU 메모리 사용량 제한
|
| 245 |
+
|
| 246 |
+
# 3D 생성
|
| 247 |
outputs = g.trellis_pipeline.run(
|
| 248 |
image,
|
| 249 |
seed=seed,
|
| 250 |
formats=["gaussian", "mesh"],
|
| 251 |
preprocess_image=False,
|
| 252 |
sparse_structure_sampler_params={
|
| 253 |
+
"steps": min(ss_sampling_steps, 20), # 스텝 수 제한
|
| 254 |
"cfg_strength": ss_guidance_strength,
|
| 255 |
},
|
| 256 |
slat_sampler_params={
|
| 257 |
+
"steps": min(slat_sampling_steps, 20), # 스텝 수 제한
|
| 258 |
"cfg_strength": slat_guidance_strength,
|
| 259 |
},
|
| 260 |
)
|
| 261 |
torch.cuda.synchronize()
|
| 262 |
|
| 263 |
+
# 비디오 렌더링을 위한 메모리 확보
|
| 264 |
+
torch.cuda.empty_cache()
|
|
|
|
| 265 |
|
| 266 |
+
# 비디오 렌더링
|
| 267 |
+
with torch.cuda.amp.autocast():
|
| 268 |
+
video = render_utils.render_video(
|
| 269 |
+
outputs['gaussian'][0],
|
| 270 |
+
num_frames=60, # 프레임 수 감소
|
| 271 |
+
resolution=512 # 해상도 제한
|
| 272 |
+
)['color']
|
| 273 |
+
torch.cuda.synchronize()
|
| 274 |
+
|
| 275 |
+
video_geo = render_utils.render_video(
|
| 276 |
+
outputs['mesh'][0],
|
| 277 |
+
num_frames=60, # 프레임 수 감소
|
| 278 |
+
resolution=512 # 해상도 제한
|
| 279 |
+
)['normal']
|
| 280 |
+
torch.cuda.synchronize()
|
| 281 |
|
| 282 |
# CPU로 데이터 이동 및 후처리
|
| 283 |
video = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video]
|
|
|
|
| 296 |
|
| 297 |
finally:
|
| 298 |
# 정리 작업
|
| 299 |
+
move_to_device(g.trellis_pipeline, 'cpu')
|
| 300 |
torch.cuda.empty_cache()
|
| 301 |
torch.cuda.synchronize()
|
| 302 |
|
| 303 |
except Exception as e:
|
| 304 |
print(f"Error in image_to_3d: {str(e)}")
|
|
|
|
| 305 |
if hasattr(g.trellis_pipeline, 'to'):
|
| 306 |
+
move_to_device(g.trellis_pipeline, 'cpu')
|
| 307 |
torch.cuda.empty_cache()
|
| 308 |
torch.cuda.synchronize()
|
| 309 |
return None, None
|
|
|
|
| 313 |
if torch.cuda.is_available():
|
| 314 |
torch.cuda.empty_cache()
|
| 315 |
torch.cuda.synchronize()
|
| 316 |
+
gc.collect() # 가비지 컬렉션 실행
|
| 317 |
+
|
| 318 |
def move_to_device(model, device):
|
| 319 |
"""모델을 안전하게 디바이스로 이동하는 함수"""
|
| 320 |
try:
|
| 321 |
if hasattr(model, 'to'):
|
| 322 |
+
clear_gpu_memory()
|
| 323 |
model.to(device)
|
| 324 |
if device == 'cuda':
|
| 325 |
torch.cuda.synchronize()
|
| 326 |
+
clear_gpu_memory()
|
| 327 |
except Exception as e:
|
| 328 |
print(f"Error moving model to {device}: {str(e)}")
|
| 329 |
|