import spaces import os import gradio as gr import numpy as np import torch from PIL import Image import trimesh import random from transformers import AutoModelForImageSegmentation from torchvision import transforms from huggingface_hub import hf_hub_download, snapshot_download import subprocess import shutil # install others subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 print("DEVICE: ", DEVICE) DEFAULT_FACE_NUMBER = 100000 MAX_SEED = np.iinfo(np.int32).max TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git" MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git" RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4" TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG" TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp") os.makedirs(TMP_DIR, exist_ok=True) TRIPOSG_CODE_DIR = "./triposg" if not os.path.exists(TRIPOSG_CODE_DIR): os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}") MV_ADAPTER_CODE_DIR = "./mv_adapter" if not os.path.exists(MV_ADAPTER_CODE_DIR): os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d") import sys sys.path.append(TRIPOSG_CODE_DIR) sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts")) sys.path.append(MV_ADAPTER_CODE_DIR) sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts")) HEADER = """ # 🎨 PolyGenixAI: Transform Ideas into 3D Masterpieces ## Unleash Your Creativity with AI-Powered 3D Generation by AnvilInteractive Solutions
## 🚀 Get Started: 1. **Upload an Image** (clear, single-object images work best) 2. **Select Filters** to customize styles 3. Click **Generate 3D Model** to create your mesh 4. Click **Apply Texture** to enhance with realistic textures 5. **Download GLB** to save your creationPowered by advanced AI and multi-view technology from AnvilInteractive Solutions. Join our community at PolyGenixAI for tips and inspiration.
""" # triposg from image_process import prepare_image from briarmbg import BriaRMBG snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL) rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE) rmbg_net.eval() from triposg.pipelines.pipeline_triposg import TripoSGPipeline snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL) triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE) # mv adapter NUM_VIEWS = 6 from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render mv_adapter_pipe = prepare_pipeline( base_model="stabilityai/stable-diffusion-xl-base-1.0", vae_model="madebyollin/sdxl-vae-fp16-fix", unet_model=None, lora_model=None, adapter_path="huanngzh/mv-adapter", scheduler=None, num_views=NUM_VIEWS, device=DEVICE, dtype=torch.float16, ) birefnet = AutoModelForImageSegmentation.from_pretrained( "ZhengPeng7/BiRefNet", trust_remote_code=True ) birefnet.to(DEVICE) transform_image = transforms.Compose( [ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ] ) remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE) if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"): hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints") if not os.path.exists("checkpoints/big-lama.pt"): subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True) def start_session(req: gr.Request): save_dir = os.path.join(TMP_DIR, str(req.session_hash)) os.makedirs(save_dir, exist_ok=True) print("start session, mkdir", save_dir) def end_session(req: gr.Request): save_dir = os.path.join(TMP_DIR, str(req.session_hash)) shutil.rmtree(save_dir) def get_random_hex(): random_bytes = os.urandom(8) random_hex = random_bytes.hex() return random_hex def get_random_seed(randomize_seed, seed): if randomize_seed: seed = random.randint(0, MAX_SEED) return seed @spaces.GPU(duration=180) def run_full(image: str, req: gr.Request): seed = 0 num_inference_steps = 50 guidance_scale = 7.5 simplify = True target_face_num = DEFAULT_FACE_NUMBER image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net) outputs = triposg_pipe( image=image_seg, generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), num_inference_steps=num_inference_steps, guidance_scale=guidance_scale ).samples[0] print("mesh extraction done") mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])) if simplify: print("start simplify") from utils import simplify_mesh mesh = simplify_mesh(mesh, target_face_num) save_dir = os.path.join(TMP_DIR, "examples") os.makedirs(save_dir, exist_ok=True) mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb") mesh.export(mesh_path) print("save to ", mesh_path) torch.cuda.empty_cache() height, width = 768, 768 # Prepare cameras cameras = get_orthogonal_camera( elevation_deg=[0, 0, 0, 0, 89.99, -89.99], distance=[1.8] * NUM_VIEWS, left=-0.55, right=0.55, bottom=-0.55, top=0.55, azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], device=DEVICE, ) ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") mesh = load_mesh(mesh_path, rescale=True, device=DEVICE) render_out = render( ctx, mesh, cameras, height=height, width=width, render_attr=False, normal_background=0.0, ) control_images = ( torch.cat( [ (render_out.pos + 0.5).clamp(0, 1), (render_out.normal / 2 + 0.5).clamp(0, 1), ], dim=-1, ) .permute(0, 3, 1, 2) .to(DEVICE) ) image = Image.open(image) image = remove_bg_fn(image) image = preprocess_image(image, height, width) pipe_kwargs = {} if seed != -1 and isinstance(seed, int): pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed) images = mv_adapter_pipe( "high quality", height=height, width=width, num_inference_steps=15, guidance_scale=3.0, num_images_per_prompt=NUM_VIEWS, control_image=control_images, control_conditioning_scale=1.0, reference_image=image, reference_conditioning_scale=1.0, negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", cross_attention_kwargs={"scale": 1.0}, **pipe_kwargs, ).images torch.cuda.empty_cache() mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png") make_image_grid(images, rows=1).save(mv_image_path) from texture import TexturePipeline, ModProcessConfig texture_pipe = TexturePipeline( upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth", inpaint_ckpt_path="checkpoints/big-lama.pt", device=DEVICE, ) textured_glb_path = texture_pipe( mesh_path=mesh_path, save_dir=save_dir, save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb", uv_unwarp=True, uv_size=4096, rgb_path=mv_image_path, rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], ) return image_seg, mesh_path, textured_glb_path @spaces.GPU() @torch.no_grad() def run_segmentation(image: str): image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net) return image @spaces.GPU(duration=90) @torch.no_grad() def image_to_3d( image: Image.Image, seed: int, num_inference_steps: int, guidance_scale: float, simplify: bool, target_face_num: int, req: gr.Request ): outputs = triposg_pipe( image=image, generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed), num_inference_steps=num_inference_steps, guidance_scale=guidance_scale ).samples[0] print("mesh extraction done") mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1])) if simplify: print("start simplify") from utils import simplify_mesh mesh = simplify_mesh(mesh, target_face_num) save_dir = os.path.join(TMP_DIR, str(req.session_hash)) mesh_path = os.path.join(save_dir, f"polygenixai_{get_random_hex()}.glb") mesh.export(mesh_path) print("save to ", mesh_path) torch.cuda.empty_cache() return mesh_path @spaces.GPU(duration=120) @torch.no_grad() def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request): height, width = 768, 768 # Prepare cameras cameras = get_orthogonal_camera( elevation_deg=[0, 0, 0, 0, 89.99, -89.99], distance=[1.8] * NUM_VIEWS, left=-0.55, right=0.55, bottom=-0.55, top=0.55, azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], device=DEVICE, ) ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda") mesh = load_mesh(mesh_path, rescale=True, device=DEVICE) render_out = render( ctx, mesh, cameras, height=height, width=width, render_attr=False, normal_background=0.0, ) control_images = ( torch.cat( [ (render_out.pos + 0.5).clamp(0, 1), (render_out.normal / 2 + 0.5).clamp(0, 1), ], dim=-1, ) .permute(0, 3, 1, 2) .to(DEVICE) ) image = Image.open(image) image = remove_bg_fn(image) image = preprocess_image(image, height, width) pipe_kwargs = {} if seed != -1 and isinstance(seed, int): pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed) images = mv_adapter_pipe( "high quality", height=height, width=width, num_inference_steps=15, guidance_scale=3.0, num_images_per_prompt=NUM_VIEWS, control_image=control_images, control_conditioning_scale=1.0, reference_image=image, reference_conditioning_scale=1.0, negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast", cross_attention_kwargs={"scale": 1.0}, **pipe_kwargs, ).images torch.cuda.empty_cache() save_dir = os.path.join(TMP_DIR, str(req.session_hash)) mv_image_path = os.path.join(save_dir, f"polygenixai_mv_{get_random_hex()}.png") make_image_grid(images, rows=1).save(mv_image_path) from texture import TexturePipeline, ModProcessConfig texture_pipe = TexturePipeline( upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth", inpaint_ckpt_path="checkpoints/big-lama.pt", device=DEVICE, ) textured_glb_path = texture_pipe( mesh_path=mesh_path, save_dir=save_dir, save_name=f"polygenixai_texture_mesh_{get_random_hex()}.glb", uv_unwarp=True, uv_size=4096, rgb_path=mv_image_path, rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"), camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]], ) return textured_glb_path with gr.Blocks(title="PolyGenixAI", css="body { background-color: #F3F4F6; } .gr-panel { background-color: white; }") as demo: gr.Markdown(HEADER) with gr.Tabs(): with gr.Tab("Create 3D Model"): with gr.Row(): with gr.Column(scale=1): image_prompts = gr.Image(label="Upload Image", type="filepath", height=300) seg_image = gr.Image(label="Preview Segmentation", type="pil", format="png", interactive=False, height=300) with gr.Accordion("Style & Settings", open=True): style_filter = gr.Dropdown( choices=["None", "Realistic", "Fantasy", "Cartoon", "Sci-Fi", "Vintage"], label="Style Filter", value="None", info="Select a style to enhance your 3D model (optional)" ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0 ) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) num_inference_steps = gr.Slider( label="Inference Steps", minimum=8, maximum=50, step=1, value=50, info="Higher steps improve quality but take longer" ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.0, maximum=20.0, step=0.1, value=7.0, info="Controls how closely the model follows the input" ) reduce_face = gr.Checkbox(label="Simplify Mesh", value=True) target_face_num = gr.Slider( maximum=1000000, minimum=10000, value=DEFAULT_FACE_NUMBER, label="Target Face Number", info="Adjust mesh complexity" ) gen_button = gr.Button("Generate 3D Model", variant="primary") gen_texture_button = gr.Button("Apply Texture", variant="secondary", interactive=False) with gr.Column(scale=1): model_output = gr.Model3D(label="3D Model Preview", interactive=False, height=400) textured_model_output = gr.Model3D(label="Textured 3D Model", interactive=False, height=400) download_button = gr.Button("Download GLB", variant="secondary") with gr.Tab("Gallery & Community"): gr.Markdown("### Explore Creations") gr.Examples( examples=[ f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}" for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data") ], fn=run_full, inputs=[image_prompts], outputs=[seg_image, model_output, textured_model_output], cache_examples=True, ) gr.Markdown("Join our [PolyGenixAI Community](https://www.anvilinteractive.com/community) to share your creations and get inspired!") gen_button.click( run_segmentation, inputs=[image_prompts], outputs=[seg_image] ).then( get_random_seed, inputs=[randomize_seed, seed], outputs=[seed], ).then( image_to_3d, inputs=[ seg_image, seed, num_inference_steps, guidance_scale, reduce_face, target_face_num ], outputs=[model_output] ).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button]) gen_texture_button.click( run_texture, inputs=[image_prompts, model_output, seed], outputs=[textured_model_output] ) demo.load(start_session) demo.unload(end_session) demo.launch()