|
|
""" |
|
|
Text-to-3D Pipeline with Editing: Gemini + SAM-3D |
|
|
MCP Server + Gradio UI for MCP Hackathon |
|
|
""" |
|
|
|
|
|
import os |
|
|
import io |
|
|
import json |
|
|
import tempfile |
|
|
import gradio as gr |
|
|
from google import genai |
|
|
from google.genai import types |
|
|
from PIL import Image |
|
|
import modal |
|
|
|
|
|
|
|
|
client = None |
|
|
|
|
|
def init_gemini(): |
|
|
global client |
|
|
api_key = os.environ.get("GEMINI_API_KEY") |
|
|
if api_key: |
|
|
os.environ["GEMINI_API_KEY"] = api_key |
|
|
client = genai.Client() |
|
|
return True |
|
|
return False |
|
|
|
|
|
def image_to_bytes(image): |
|
|
"""Convert PIL Image to PNG bytes""" |
|
|
buffer = io.BytesIO() |
|
|
image.save(buffer, format='PNG') |
|
|
return buffer.getvalue() |
|
|
|
|
|
def run_sam3d(image, mask): |
|
|
"""Send image and mask to SAM-3D on Modal""" |
|
|
img_bytes = image_to_bytes(image.convert("RGB")) |
|
|
mask_bytes = image_to_bytes(mask) |
|
|
|
|
|
SAM3DModel = modal.Cls.from_name("sam3d-objects-inference", "SAM3DModel") |
|
|
model = SAM3DModel() |
|
|
ply_bytes, glb_bytes = model.reconstruct.remote(img_bytes, mask_bytes) |
|
|
|
|
|
return ply_bytes, glb_bytes |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_3d_model(prompt: str) -> str: |
|
|
""" |
|
|
Generate a 3D model from a text description. |
|
|
|
|
|
Args: |
|
|
prompt: Text description of the object to generate (e.g., "a red sports car", "a wooden chair") |
|
|
|
|
|
Returns: |
|
|
JSON string with paths to generated files |
|
|
""" |
|
|
if not client: |
|
|
if not init_gemini(): |
|
|
return json.dumps({"error": "GEMINI_API_KEY not configured"}) |
|
|
|
|
|
try: |
|
|
|
|
|
initial_prompt = f"{prompt}, three-quarter front view angle, natural daylight, soft shadows showing depth and contours, clean simple background, full object visible, photorealistic" |
|
|
|
|
|
response_gen = client.models.generate_content( |
|
|
model="gemini-2.5-flash-image", |
|
|
contents=[initial_prompt], |
|
|
) |
|
|
|
|
|
initial_image = None |
|
|
for part in response_gen.parts: |
|
|
if part.inline_data: |
|
|
image_bytes = part.inline_data.data |
|
|
initial_image = Image.open(io.BytesIO(image_bytes)) |
|
|
break |
|
|
|
|
|
if initial_image is None: |
|
|
return json.dumps({"error": "Image generation failed"}) |
|
|
|
|
|
|
|
|
edit_prompt = "Remove the background completely, make the background transparent. Preserve the object's shadow for realism." |
|
|
image_part = types.Part.from_bytes( |
|
|
data=image_to_bytes(initial_image), |
|
|
mime_type="image/png" |
|
|
) |
|
|
|
|
|
response_edit = client.models.generate_content( |
|
|
model="gemini-3-pro-image-preview", |
|
|
contents=[edit_prompt, image_part], |
|
|
) |
|
|
|
|
|
final_image = None |
|
|
for part in response_edit.parts: |
|
|
if part.inline_data: |
|
|
edited_bytes = part.inline_data.data |
|
|
final_image = Image.open(io.BytesIO(edited_bytes)) |
|
|
break |
|
|
|
|
|
if final_image is None: |
|
|
return json.dumps({"error": "Background removal failed"}) |
|
|
|
|
|
|
|
|
gray = final_image.convert("L") |
|
|
|
|
|
|
|
|
ply_bytes, glb_bytes = run_sam3d(final_image, gray) |
|
|
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
original_path = os.path.join(temp_dir, "original.png") |
|
|
nobg_path = os.path.join(temp_dir, "transparent.png") |
|
|
mask_path = os.path.join(temp_dir, "mask.png") |
|
|
ply_path = os.path.join(temp_dir, "model.ply") |
|
|
|
|
|
initial_image.save(original_path) |
|
|
final_image.save(nobg_path) |
|
|
gray.save(mask_path) |
|
|
|
|
|
with open(ply_path, 'wb') as f: |
|
|
f.write(ply_bytes) |
|
|
|
|
|
glb_path = None |
|
|
if glb_bytes: |
|
|
glb_path = os.path.join(temp_dir, "model.glb") |
|
|
with open(glb_path, 'wb') as f: |
|
|
f.write(glb_bytes) |
|
|
|
|
|
return json.dumps({ |
|
|
"success": True, |
|
|
"prompt": prompt, |
|
|
"original_image": original_path, |
|
|
"transparent_image": nobg_path, |
|
|
"mask_image": mask_path, |
|
|
"ply_model": ply_path, |
|
|
"glb_model": glb_path, |
|
|
"message": f"Successfully generated 3D model for: {prompt}" |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return json.dumps({"error": str(e)}) |
|
|
|
|
|
|
|
|
def edit_3d_model(edit_prompt: str, transparent_image_path: str) -> str: |
|
|
""" |
|
|
Edit an existing 3D model by modifying its transparent image and regenerating. |
|
|
|
|
|
Args: |
|
|
edit_prompt: Description of the edit to apply (e.g., "remove the wings", "change color to blue") |
|
|
transparent_image_path: Path to the transparent PNG image from a previous generation |
|
|
|
|
|
Returns: |
|
|
JSON string with paths to the new edited files |
|
|
""" |
|
|
if not client: |
|
|
if not init_gemini(): |
|
|
return json.dumps({"error": "GEMINI_API_KEY not configured"}) |
|
|
|
|
|
try: |
|
|
current_image = Image.open(transparent_image_path) |
|
|
|
|
|
image_part = types.Part.from_bytes( |
|
|
data=image_to_bytes(current_image), |
|
|
mime_type="image/png" |
|
|
) |
|
|
|
|
|
full_edit_prompt = f"{edit_prompt}. Keep the background transparent. Maintain image quality and lighting." |
|
|
|
|
|
response_edit = client.models.generate_content( |
|
|
model="gemini-3-pro-image-preview", |
|
|
contents=[full_edit_prompt, image_part], |
|
|
) |
|
|
|
|
|
edited_image = None |
|
|
for part in response_edit.parts: |
|
|
if part.inline_data: |
|
|
edited_bytes = part.inline_data.data |
|
|
edited_image = Image.open(io.BytesIO(edited_bytes)) |
|
|
break |
|
|
|
|
|
if edited_image is None: |
|
|
return json.dumps({"error": "Edit failed"}) |
|
|
|
|
|
gray = edited_image.convert("L") |
|
|
ply_bytes, glb_bytes = run_sam3d(edited_image, gray) |
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
nobg_path = os.path.join(temp_dir, "edited.png") |
|
|
mask_path = os.path.join(temp_dir, "mask.png") |
|
|
ply_path = os.path.join(temp_dir, "model.ply") |
|
|
|
|
|
edited_image.save(nobg_path) |
|
|
gray.save(mask_path) |
|
|
|
|
|
with open(ply_path, 'wb') as f: |
|
|
f.write(ply_bytes) |
|
|
|
|
|
glb_path = None |
|
|
if glb_bytes: |
|
|
glb_path = os.path.join(temp_dir, "model.glb") |
|
|
with open(glb_path, 'wb') as f: |
|
|
f.write(glb_bytes) |
|
|
|
|
|
return json.dumps({ |
|
|
"success": True, |
|
|
"edit_prompt": edit_prompt, |
|
|
"transparent_image": nobg_path, |
|
|
"mask_image": mask_path, |
|
|
"ply_model": ply_path, |
|
|
"glb_model": glb_path, |
|
|
"message": f"Successfully applied edit: {edit_prompt}" |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return json.dumps({"error": str(e)}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_3d_ui(prompt, progress=gr.Progress()): |
|
|
"""UI wrapper with progress updates""" |
|
|
if not client: |
|
|
if not init_gemini(): |
|
|
raise gr.Error("GEMINI_API_KEY not set in Space secrets") |
|
|
|
|
|
progress(0.1, desc="Generating image...") |
|
|
|
|
|
initial_prompt = f"{prompt}, three-quarter front view angle, natural daylight, soft shadows showing depth and contours, clean simple background, full object visible, photorealistic" |
|
|
|
|
|
try: |
|
|
response_gen = client.models.generate_content( |
|
|
model="gemini-2.5-flash-image", |
|
|
contents=[initial_prompt], |
|
|
) |
|
|
|
|
|
initial_image = None |
|
|
for part in response_gen.parts: |
|
|
if part.inline_data: |
|
|
image_bytes = part.inline_data.data |
|
|
initial_image = Image.open(io.BytesIO(image_bytes)) |
|
|
break |
|
|
|
|
|
if initial_image is None: |
|
|
raise gr.Error("Image generation failed") |
|
|
|
|
|
except Exception as e: |
|
|
raise gr.Error(f"Image generation failed: {e}") |
|
|
|
|
|
progress(0.3, desc="Removing background...") |
|
|
|
|
|
try: |
|
|
image_part = types.Part.from_bytes( |
|
|
data=image_to_bytes(initial_image), |
|
|
mime_type="image/png" |
|
|
) |
|
|
|
|
|
response_edit = client.models.generate_content( |
|
|
model="gemini-3-pro-image-preview", |
|
|
contents=["Remove the background completely, make the background transparent. Preserve the object's shadow for realism.", image_part], |
|
|
) |
|
|
|
|
|
final_image = None |
|
|
for part in response_edit.parts: |
|
|
if part.inline_data: |
|
|
edited_bytes = part.inline_data.data |
|
|
final_image = Image.open(io.BytesIO(edited_bytes)) |
|
|
break |
|
|
|
|
|
if final_image is None: |
|
|
raise gr.Error("Background removal failed") |
|
|
|
|
|
except Exception as e: |
|
|
raise gr.Error(f"Background removal failed: {e}") |
|
|
|
|
|
progress(0.4, desc="Creating mask...") |
|
|
gray = final_image.convert("L") |
|
|
|
|
|
progress(0.5, desc="Running SAM-3D (1-2 min, first run may take longer)...") |
|
|
|
|
|
try: |
|
|
ply_bytes, glb_bytes = run_sam3d(final_image, gray) |
|
|
except Exception as e: |
|
|
raise gr.Error(f"SAM-3D failed: {e}") |
|
|
|
|
|
progress(0.9, desc="Saving outputs...") |
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
original_path = os.path.join(temp_dir, "original.png") |
|
|
nobg_path = os.path.join(temp_dir, "no_background.png") |
|
|
mask_path = os.path.join(temp_dir, "mask.png") |
|
|
ply_path = os.path.join(temp_dir, "model.ply") |
|
|
|
|
|
initial_image.save(original_path) |
|
|
final_image.save(nobg_path) |
|
|
gray.save(mask_path) |
|
|
|
|
|
with open(ply_path, 'wb') as f: |
|
|
f.write(ply_bytes) |
|
|
|
|
|
glb_path = None |
|
|
if glb_bytes: |
|
|
glb_path = os.path.join(temp_dir, "model.glb") |
|
|
with open(glb_path, 'wb') as f: |
|
|
f.write(glb_bytes) |
|
|
|
|
|
progress(1.0, desc="Done!") |
|
|
|
|
|
return ( |
|
|
original_path, |
|
|
nobg_path, |
|
|
mask_path, |
|
|
glb_path if glb_path else ply_path, |
|
|
glb_path, |
|
|
ply_path, |
|
|
final_image, |
|
|
1, |
|
|
) |
|
|
|
|
|
|
|
|
def edit_3d_ui(edit_prompt, current_image, edit_count, progress=gr.Progress()): |
|
|
"""UI wrapper for editing""" |
|
|
if current_image is None: |
|
|
raise gr.Error("No image to edit. Generate a 3D model first!") |
|
|
|
|
|
if not client: |
|
|
if not init_gemini(): |
|
|
raise gr.Error("GEMINI_API_KEY not set") |
|
|
|
|
|
progress(0.1, desc=f"Applying edit: {edit_prompt}...") |
|
|
|
|
|
try: |
|
|
image_part = types.Part.from_bytes( |
|
|
data=image_to_bytes(current_image), |
|
|
mime_type="image/png" |
|
|
) |
|
|
|
|
|
full_edit_prompt = f"{edit_prompt}. Keep the background transparent. Maintain image quality and lighting." |
|
|
|
|
|
response_edit = client.models.generate_content( |
|
|
model="gemini-3-pro-image-preview", |
|
|
contents=[full_edit_prompt, image_part], |
|
|
) |
|
|
|
|
|
edited_image = None |
|
|
for part in response_edit.parts: |
|
|
if part.inline_data: |
|
|
edited_bytes = part.inline_data.data |
|
|
edited_image = Image.open(io.BytesIO(edited_bytes)) |
|
|
break |
|
|
|
|
|
if edited_image is None: |
|
|
raise gr.Error("Edit failed") |
|
|
|
|
|
except Exception as e: |
|
|
raise gr.Error(f"Edit failed: {e}") |
|
|
|
|
|
progress(0.3, desc="Creating new mask...") |
|
|
gray = edited_image.convert("L") |
|
|
|
|
|
progress(0.4, desc="Running SAM-3D (1-2 min)...") |
|
|
|
|
|
try: |
|
|
ply_bytes, glb_bytes = run_sam3d(edited_image, gray) |
|
|
except Exception as e: |
|
|
raise gr.Error(f"SAM-3D failed: {e}") |
|
|
|
|
|
progress(0.9, desc="Saving outputs...") |
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
nobg_path = os.path.join(temp_dir, "edited.png") |
|
|
mask_path = os.path.join(temp_dir, "mask.png") |
|
|
ply_path = os.path.join(temp_dir, "model.ply") |
|
|
|
|
|
edited_image.save(nobg_path) |
|
|
gray.save(mask_path) |
|
|
|
|
|
with open(ply_path, 'wb') as f: |
|
|
f.write(ply_bytes) |
|
|
|
|
|
glb_path = None |
|
|
if glb_bytes: |
|
|
glb_path = os.path.join(temp_dir, "model.glb") |
|
|
with open(glb_path, 'wb') as f: |
|
|
f.write(glb_bytes) |
|
|
|
|
|
new_edit_count = edit_count + 1 |
|
|
progress(1.0, desc=f"Edit #{new_edit_count} complete!") |
|
|
|
|
|
return ( |
|
|
nobg_path, |
|
|
mask_path, |
|
|
glb_path if glb_path else ply_path, |
|
|
glb_path, |
|
|
ply_path, |
|
|
edited_image, |
|
|
new_edit_count, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_tool = gr.Interface( |
|
|
fn=generate_3d_model, |
|
|
inputs=gr.Textbox(label="Prompt", placeholder="A red sports car"), |
|
|
outputs=gr.Textbox(label="Result (JSON)"), |
|
|
api_name="generate_3d", |
|
|
title="Generate 3D Model", |
|
|
description="Generate a 3D model from a text description" |
|
|
) |
|
|
|
|
|
edit_tool = gr.Interface( |
|
|
fn=edit_3d_model, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Edit Prompt", placeholder="Remove the wings"), |
|
|
gr.Textbox(label="Transparent Image Path", placeholder="/path/to/transparent.png") |
|
|
], |
|
|
outputs=gr.Textbox(label="Result (JSON)"), |
|
|
api_name="edit_3d", |
|
|
title="Edit 3D Model", |
|
|
description="Edit an existing 3D model" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as main_ui: |
|
|
|
|
|
current_image_state = gr.State(None) |
|
|
edit_count_state = gr.State(0) |
|
|
|
|
|
gr.Markdown(""" |
|
|
# 🎨 Text to 3D Model (MCP Server) |
|
|
### Powered by Gemini + SAM-3D Objects |
|
|
|
|
|
**This app is also an MCP Server!** Claude Desktop, Cursor, and other MCP clients can use the `generate_3d` and `edit_3d` tools. |
|
|
|
|
|
⏱️ *Generation takes 1-2 minutes. First run may take longer as the model warms up.* |
|
|
""") |
|
|
|
|
|
gr.Markdown("## 1️⃣ Generate Initial 3D Model") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
prompt_input = gr.Textbox(label="Text Prompt", placeholder="A plane with eagle wings", lines=2) |
|
|
with gr.Column(scale=1): |
|
|
generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg") |
|
|
|
|
|
gr.Examples( |
|
|
examples=["A plane with eagle wings", "A wooden chair", "A red sports car", "A ceramic coffee mug", "A robot dog"], |
|
|
inputs=prompt_input |
|
|
) |
|
|
|
|
|
gr.Markdown("## 2️⃣ Edit Your Model") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
edit_input = gr.Textbox(label="Edit Prompt", placeholder="Remove the wings", lines=2) |
|
|
with gr.Column(scale=1): |
|
|
edit_btn = gr.Button("✏️ Apply Edit", variant="secondary", size="lg") |
|
|
edit_counter = gr.Markdown("*No edits yet*") |
|
|
|
|
|
gr.Examples( |
|
|
examples=["Remove the wings", "Change color to blue", "Add racing stripes", "Make it larger", "Add wheels"], |
|
|
inputs=edit_input |
|
|
) |
|
|
|
|
|
gr.Markdown("## 📸 Images") |
|
|
with gr.Row(): |
|
|
original_output = gr.Image(label="1. Original", type="filepath") |
|
|
nobg_output = gr.Image(label="2. Transparent", type="filepath") |
|
|
mask_output = gr.Image(label="3. Mask", type="filepath") |
|
|
|
|
|
gr.Markdown("## 🎮 3D Model") |
|
|
model_output = gr.Model3D(label="Interactive 3D Model (drag to rotate)", clear_color=[0.1, 0.1, 0.1, 1.0]) |
|
|
|
|
|
gr.Markdown("## 📥 Downloads") |
|
|
with gr.Row(): |
|
|
glb_download = gr.File(label="GLB (mesh)") |
|
|
ply_download = gr.File(label="PLY (splat)") |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
## 🔌 MCP Server Info |
|
|
|
|
|
This app exposes two MCP tools: `generate_3d` and `edit_3d` |
|
|
|
|
|
**Connect via:** `https://YOUR-SPACE.hf.space/gradio_api/mcp/sse` |
|
|
|
|
|
--- |
|
|
**Built for [MCP 1st Birthday Hackathon](https://huggingface.co/MCP-1st-Birthday)** 🎂 |
|
|
""") |
|
|
|
|
|
def update_counter(count): |
|
|
return "*No edits yet*" if count == 0 else f"**Edits applied: {count}**" |
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_3d_ui, |
|
|
inputs=[prompt_input], |
|
|
outputs=[original_output, nobg_output, mask_output, model_output, glb_download, ply_download, current_image_state, edit_count_state] |
|
|
).then(fn=update_counter, inputs=[edit_count_state], outputs=[edit_counter]) |
|
|
|
|
|
edit_btn.click( |
|
|
fn=edit_3d_ui, |
|
|
inputs=[edit_input, current_image_state, edit_count_state], |
|
|
outputs=[nobg_output, mask_output, model_output, glb_download, ply_download, current_image_state, edit_count_state] |
|
|
).then(fn=update_counter, inputs=[edit_count_state], outputs=[edit_counter]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.TabbedInterface( |
|
|
interface_list=[main_ui, generate_tool, edit_tool], |
|
|
tab_names=["🎨 Interactive UI", "🔧 Generate Tool", "✏️ Edit Tool"], |
|
|
title="Text to 3D | MCP Server" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(mcp_server=True) |