Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,27 +32,32 @@ def initialize_models():
|
|
| 32 |
try:
|
| 33 |
import torch
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
torch.backends.cudnn.benchmark = True
|
| 37 |
-
torch.backends.cuda.matmul.allow_tf32 = True
|
| 38 |
torch.backends.cudnn.allow_tf32 = True
|
| 39 |
|
| 40 |
print("Initializing Trellis pipeline...")
|
| 41 |
pipeline = TrellisImageTo3DPipeline.from_pretrained(
|
| 42 |
-
"JeffreyXiang/TRELLIS-image-large"
|
| 43 |
-
torch_dtype=torch.float16 # A100μμ FP16 μ¬μ©
|
| 44 |
)
|
| 45 |
|
| 46 |
if torch.cuda.is_available():
|
| 47 |
pipeline = pipeline.to("cuda")
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
print("Initializing translator...")
|
| 50 |
translator = translation_pipeline(
|
| 51 |
"translation",
|
| 52 |
model="Helsinki-NLP/opus-mt-ko-en",
|
| 53 |
-
device="cuda"
|
| 54 |
)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
| 56 |
print("Models initialized successfully")
|
| 57 |
return True
|
| 58 |
|
|
@@ -68,17 +73,15 @@ def get_flux_pipe():
|
|
| 68 |
free_memory()
|
| 69 |
flux_pipe = FluxPipeline.from_pretrained(
|
| 70 |
"black-forest-labs/FLUX.1-dev",
|
| 71 |
-
torch_dtype=torch.float16, # A100μμ FP16 μ¬μ©
|
| 72 |
use_safetensors=True
|
| 73 |
).to("cuda")
|
|
|
|
|
|
|
| 74 |
except Exception as e:
|
| 75 |
print(f"Error loading Flux pipeline: {e}")
|
| 76 |
return None
|
| 77 |
return flux_pipe
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
def free_memory():
|
| 83 |
"""κ°νλ λ©λͺ¨λ¦¬ μ 리 ν¨μ"""
|
| 84 |
import gc
|
|
@@ -108,7 +111,7 @@ def free_memory():
|
|
| 108 |
except:
|
| 109 |
pass
|
| 110 |
|
| 111 |
-
|
| 112 |
def setup_gpu_model(model):
|
| 113 |
"""GPU μ€μ μ΄ νμν λͺ¨λΈμ μ²λ¦¬νλ ν¨μ"""
|
| 114 |
if torch.cuda.is_available():
|
|
@@ -122,7 +125,7 @@ def translate_if_korean(text):
|
|
| 122 |
return translated
|
| 123 |
return text
|
| 124 |
|
| 125 |
-
|
| 126 |
def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
|
| 127 |
try:
|
| 128 |
if pipeline is None:
|
|
@@ -192,7 +195,6 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
|
|
| 192 |
|
| 193 |
return gs, mesh, state['trial_id']
|
| 194 |
|
| 195 |
-
@spaces.GPU
|
| 196 |
def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
|
| 197 |
ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
|
| 198 |
try:
|
|
@@ -201,8 +203,8 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
| 201 |
|
| 202 |
input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
|
| 203 |
|
| 204 |
-
# μ΄λ―Έμ§ ν¬κΈ° μ ν
|
| 205 |
-
max_size =
|
| 206 |
if max(input_image.size) > max_size:
|
| 207 |
ratio = max_size / max(input_image.size)
|
| 208 |
input_image = input_image.resize(
|
|
@@ -214,31 +216,31 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
| 214 |
if torch.cuda.is_available():
|
| 215 |
pipeline.to("cuda")
|
| 216 |
|
| 217 |
-
with torch.
|
| 218 |
outputs = pipeline.run(
|
| 219 |
input_image,
|
| 220 |
seed=seed,
|
| 221 |
formats=["gaussian", "mesh"],
|
| 222 |
preprocess_image=False,
|
| 223 |
sparse_structure_sampler_params={
|
| 224 |
-
"steps": min(ss_sampling_steps,
|
| 225 |
"cfg_strength": ss_guidance_strength,
|
| 226 |
},
|
| 227 |
slat_sampler_params={
|
| 228 |
-
"steps": min(slat_sampling_steps,
|
| 229 |
"cfg_strength": slat_guidance_strength,
|
| 230 |
}
|
| 231 |
)
|
| 232 |
|
| 233 |
-
# λΉλμ€
|
| 234 |
-
video = render_utils.render_video(outputs['gaussian'][0], num_frames=
|
| 235 |
-
video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=
|
| 236 |
video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
|
| 237 |
|
| 238 |
trial_id = str(uuid.uuid4())
|
| 239 |
video_path = f"{TMP_DIR}/{trial_id}.mp4"
|
| 240 |
os.makedirs(os.path.dirname(video_path), exist_ok=True)
|
| 241 |
-
imageio.mimsave(video_path, video, fps=
|
| 242 |
|
| 243 |
state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
|
| 244 |
|
|
@@ -253,26 +255,23 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
|
|
| 253 |
pipeline.to("cpu")
|
| 254 |
raise e
|
| 255 |
|
| 256 |
-
|
| 257 |
def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
|
| 258 |
try:
|
| 259 |
free_memory()
|
| 260 |
|
| 261 |
-
# Flux νμ΄νλΌμΈ κ°μ Έμ€κΈ°
|
| 262 |
flux_pipe = get_flux_pipe()
|
| 263 |
if flux_pipe is None:
|
| 264 |
raise Exception("Failed to load Flux pipeline")
|
| 265 |
|
| 266 |
-
#
|
| 267 |
-
height = min(height, 1024)
|
| 268 |
width = min(width, 1024)
|
| 269 |
|
| 270 |
-
# ν둬ννΈ μ²λ¦¬
|
| 271 |
-
base_prompt = "wbgmsst, 3D, white background"
|
| 272 |
translated_prompt = translate_if_korean(prompt)
|
| 273 |
-
final_prompt = f"{translated_prompt},
|
| 274 |
|
| 275 |
-
with torch.cuda.amp.autocast():
|
| 276 |
output = flux_pipe(
|
| 277 |
prompt=[final_prompt],
|
| 278 |
height=height,
|
|
@@ -292,7 +291,7 @@ def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
|
|
| 292 |
free_memory()
|
| 293 |
raise e
|
| 294 |
|
| 295 |
-
|
| 296 |
def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
|
| 297 |
gs, mesh, trial_id = unpack_state(state)
|
| 298 |
glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
|
|
|
|
| 32 |
try:
|
| 33 |
import torch
|
| 34 |
|
| 35 |
+
# L40S GPU μ΅μ ν μ€μ
|
| 36 |
+
torch.backends.cudnn.benchmark = True
|
| 37 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 38 |
torch.backends.cudnn.allow_tf32 = True
|
| 39 |
|
| 40 |
print("Initializing Trellis pipeline...")
|
| 41 |
pipeline = TrellisImageTo3DPipeline.from_pretrained(
|
| 42 |
+
"JeffreyXiang/TRELLIS-image-large"
|
|
|
|
| 43 |
)
|
| 44 |
|
| 45 |
if torch.cuda.is_available():
|
| 46 |
pipeline = pipeline.to("cuda")
|
| 47 |
+
# λͺ¨λΈμ FP16μΌλ‘ λ³ν
|
| 48 |
+
for param in pipeline.parameters():
|
| 49 |
+
param.data = param.data.half()
|
| 50 |
|
| 51 |
print("Initializing translator...")
|
| 52 |
translator = translation_pipeline(
|
| 53 |
"translation",
|
| 54 |
model="Helsinki-NLP/opus-mt-ko-en",
|
| 55 |
+
device="cuda"
|
| 56 |
)
|
| 57 |
|
| 58 |
+
# Flux νμ΄νλΌμΈμ λμ€μ μ΄κΈ°ν
|
| 59 |
+
flux_pipe = None
|
| 60 |
+
|
| 61 |
print("Models initialized successfully")
|
| 62 |
return True
|
| 63 |
|
|
|
|
| 73 |
free_memory()
|
| 74 |
flux_pipe = FluxPipeline.from_pretrained(
|
| 75 |
"black-forest-labs/FLUX.1-dev",
|
|
|
|
| 76 |
use_safetensors=True
|
| 77 |
).to("cuda")
|
| 78 |
+
# FP16μΌλ‘ λ³ν
|
| 79 |
+
flux_pipe.to(torch.float16)
|
| 80 |
except Exception as e:
|
| 81 |
print(f"Error loading Flux pipeline: {e}")
|
| 82 |
return None
|
| 83 |
return flux_pipe
|
| 84 |
|
|
|
|
|
|
|
|
|
|
| 85 |
def free_memory():
|
| 86 |
"""κ°νλ λ©λͺ¨λ¦¬ μ 리 ν¨μ"""
|
| 87 |
import gc
|
|
|
|
| 111 |
except:
|
| 112 |
pass
|
| 113 |
|
| 114 |
+
|
| 115 |
def setup_gpu_model(model):
|
| 116 |
"""GPU μ€μ μ΄ νμν λͺ¨λΈμ μ²λ¦¬νλ ν¨μ"""
|
| 117 |
if torch.cuda.is_available():
|
|
|
|
| 125 |
return translated
|
| 126 |
return text
|
| 127 |
|
| 128 |
+
|
| 129 |
def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
|
| 130 |
try:
|
| 131 |
if pipeline is None:
|
|
|
|
| 195 |
|
| 196 |
return gs, mesh, state['trial_id']
|
| 197 |
|
|
|
|
| 198 |
def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
|
| 199 |
ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
|
| 200 |
try:
|
|
|
|
| 203 |
|
| 204 |
input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
|
| 205 |
|
| 206 |
+
# L40Sμ λ§κ² μ΄λ―Έμ§ ν¬κΈ° μ ν μ‘°μ
|
| 207 |
+
max_size = 768 # L40Sλ λ ν° μ΄λ―Έμ§ μ²λ¦¬ κ°λ₯
|
| 208 |
if max(input_image.size) > max_size:
|
| 209 |
ratio = max_size / max(input_image.size)
|
| 210 |
input_image = input_image.resize(
|
|
|
|
| 216 |
if torch.cuda.is_available():
|
| 217 |
pipeline.to("cuda")
|
| 218 |
|
| 219 |
+
with torch.cuda.amp.autocast(): # μλ νΌν© μ λ°λ μ¬μ©
|
| 220 |
outputs = pipeline.run(
|
| 221 |
input_image,
|
| 222 |
seed=seed,
|
| 223 |
formats=["gaussian", "mesh"],
|
| 224 |
preprocess_image=False,
|
| 225 |
sparse_structure_sampler_params={
|
| 226 |
+
"steps": min(ss_sampling_steps, 20), # L40Sμμ λ λ§μ μ€ν
νμ©
|
| 227 |
"cfg_strength": ss_guidance_strength,
|
| 228 |
},
|
| 229 |
slat_sampler_params={
|
| 230 |
+
"steps": min(slat_sampling_steps, 20),
|
| 231 |
"cfg_strength": slat_guidance_strength,
|
| 232 |
}
|
| 233 |
)
|
| 234 |
|
| 235 |
+
# λΉλμ€ μμ±
|
| 236 |
+
video = render_utils.render_video(outputs['gaussian'][0], num_frames=40)['color']
|
| 237 |
+
video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=40)['normal']
|
| 238 |
video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
|
| 239 |
|
| 240 |
trial_id = str(uuid.uuid4())
|
| 241 |
video_path = f"{TMP_DIR}/{trial_id}.mp4"
|
| 242 |
os.makedirs(os.path.dirname(video_path), exist_ok=True)
|
| 243 |
+
imageio.mimsave(video_path, video, fps=20)
|
| 244 |
|
| 245 |
state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
|
| 246 |
|
|
|
|
| 255 |
pipeline.to("cpu")
|
| 256 |
raise e
|
| 257 |
|
| 258 |
+
|
| 259 |
def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
|
| 260 |
try:
|
| 261 |
free_memory()
|
| 262 |
|
|
|
|
| 263 |
flux_pipe = get_flux_pipe()
|
| 264 |
if flux_pipe is None:
|
| 265 |
raise Exception("Failed to load Flux pipeline")
|
| 266 |
|
| 267 |
+
# L40Sμ λ§κ² ν¬κΈ° μ ν μ‘°μ
|
| 268 |
+
height = min(height, 1024)
|
| 269 |
width = min(width, 1024)
|
| 270 |
|
|
|
|
|
|
|
| 271 |
translated_prompt = translate_if_korean(prompt)
|
| 272 |
+
final_prompt = f"{translated_prompt}, wbgmsst, 3D, white background"
|
| 273 |
|
| 274 |
+
with torch.cuda.amp.autocast():
|
| 275 |
output = flux_pipe(
|
| 276 |
prompt=[final_prompt],
|
| 277 |
height=height,
|
|
|
|
| 291 |
free_memory()
|
| 292 |
raise e
|
| 293 |
|
| 294 |
+
|
| 295 |
def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
|
| 296 |
gs, mesh, trial_id = unpack_state(state)
|
| 297 |
glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
|