SAM3D

Running

App Files Files Community

SAM3D / app.py

bhatanerohan

Update app.py

adc638a verified about 1 month ago

raw

history blame contribute delete

17.5 kB

	"""
	Text-to-3D Pipeline with Editing: Gemini + SAM-3D
	MCP Server + Gradio UI for MCP Hackathon
	"""

	import os
	import io
	import json
	import tempfile
	import gradio as gr
	from google import genai
	from google.genai import types
	from PIL import Image
	import modal

	# Initialize Gemini client
	client = None

	def init_gemini():
	global client
	api_key = os.environ.get("GEMINI_API_KEY")
	if api_key:
	os.environ["GEMINI_API_KEY"] = api_key
	client = genai.Client()
	return True
	return False

	def image_to_bytes(image):
	"""Convert PIL Image to PNG bytes"""
	buffer = io.BytesIO()
	image.save(buffer, format='PNG')
	return buffer.getvalue()

	def run_sam3d(image, mask):
	"""Send image and mask to SAM-3D on Modal"""
	img_bytes = image_to_bytes(image.convert("RGB"))
	mask_bytes = image_to_bytes(mask)

	SAM3DModel = modal.Cls.from_name("sam3d-objects-inference", "SAM3DModel")
	model = SAM3DModel()
	ply_bytes, glb_bytes = model.reconstruct.remote(img_bytes, mask_bytes)

	return ply_bytes, glb_bytes


	# ============================================================
	# MCP TOOLS - These functions are exposed as MCP tools
	# ============================================================

	def generate_3d_model(prompt: str) -> str:
	"""
	Generate a 3D model from a text description.

	Args:
	prompt: Text description of the object to generate (e.g., "a red sports car", "a wooden chair")

	Returns:
	JSON string with paths to generated files
	"""
	if not client:
	if not init_gemini():
	return json.dumps({"error": "GEMINI_API_KEY not configured"})

	try:
	# STEP 1: Generate image
	initial_prompt = f"{prompt}, three-quarter front view angle, natural daylight, soft shadows showing depth and contours, clean simple background, full object visible, photorealistic"

	response_gen = client.models.generate_content(
	model="gemini-2.5-flash-image",
	contents=[initial_prompt],
	)

	initial_image = None
	for part in response_gen.parts:
	if part.inline_data:
	image_bytes = part.inline_data.data
	initial_image = Image.open(io.BytesIO(image_bytes))
	break

	if initial_image is None:
	return json.dumps({"error": "Image generation failed"})

	# STEP 2: Remove background
	edit_prompt = "Remove the background completely, make the background transparent. Preserve the object's shadow for realism."
	image_part = types.Part.from_bytes(
	data=image_to_bytes(initial_image),
	mime_type="image/png"
	)

	response_edit = client.models.generate_content(
	model="gemini-3-pro-image-preview",
	contents=[edit_prompt, image_part],
	)

	final_image = None
	for part in response_edit.parts:
	if part.inline_data:
	edited_bytes = part.inline_data.data
	final_image = Image.open(io.BytesIO(edited_bytes))
	break

	if final_image is None:
	return json.dumps({"error": "Background removal failed"})

	# STEP 3: Create grayscale mask
	gray = final_image.convert("L")

	# STEP 4: Run SAM-3D
	ply_bytes, glb_bytes = run_sam3d(final_image, gray)

	# Save all outputs
	temp_dir = tempfile.mkdtemp()

	original_path = os.path.join(temp_dir, "original.png")
	nobg_path = os.path.join(temp_dir, "transparent.png")
	mask_path = os.path.join(temp_dir, "mask.png")
	ply_path = os.path.join(temp_dir, "model.ply")

	initial_image.save(original_path)
	final_image.save(nobg_path)
	gray.save(mask_path)

	with open(ply_path, 'wb') as f:
	f.write(ply_bytes)

	glb_path = None
	if glb_bytes:
	glb_path = os.path.join(temp_dir, "model.glb")
	with open(glb_path, 'wb') as f:
	f.write(glb_bytes)

	return json.dumps({
	"success": True,
	"prompt": prompt,
	"original_image": original_path,
	"transparent_image": nobg_path,
	"mask_image": mask_path,
	"ply_model": ply_path,
	"glb_model": glb_path,
	"message": f"Successfully generated 3D model for: {prompt}"
	})

	except Exception as e:
	return json.dumps({"error": str(e)})


	def edit_3d_model(edit_prompt: str, transparent_image_path: str) -> str:
	"""
	Edit an existing 3D model by modifying its transparent image and regenerating.

	Args:
	edit_prompt: Description of the edit to apply (e.g., "remove the wings", "change color to blue")
	transparent_image_path: Path to the transparent PNG image from a previous generation

	Returns:
	JSON string with paths to the new edited files
	"""
	if not client:
	if not init_gemini():
	return json.dumps({"error": "GEMINI_API_KEY not configured"})

	try:
	current_image = Image.open(transparent_image_path)

	image_part = types.Part.from_bytes(
	data=image_to_bytes(current_image),
	mime_type="image/png"
	)

	full_edit_prompt = f"{edit_prompt}. Keep the background transparent. Maintain image quality and lighting."

	response_edit = client.models.generate_content(
	model="gemini-3-pro-image-preview",
	contents=[full_edit_prompt, image_part],
	)

	edited_image = None
	for part in response_edit.parts:
	if part.inline_data:
	edited_bytes = part.inline_data.data
	edited_image = Image.open(io.BytesIO(edited_bytes))
	break

	if edited_image is None:
	return json.dumps({"error": "Edit failed"})

	gray = edited_image.convert("L")
	ply_bytes, glb_bytes = run_sam3d(edited_image, gray)

	temp_dir = tempfile.mkdtemp()

	nobg_path = os.path.join(temp_dir, "edited.png")
	mask_path = os.path.join(temp_dir, "mask.png")
	ply_path = os.path.join(temp_dir, "model.ply")

	edited_image.save(nobg_path)
	gray.save(mask_path)

	with open(ply_path, 'wb') as f:
	f.write(ply_bytes)

	glb_path = None
	if glb_bytes:
	glb_path = os.path.join(temp_dir, "model.glb")
	with open(glb_path, 'wb') as f:
	f.write(glb_bytes)

	return json.dumps({
	"success": True,
	"edit_prompt": edit_prompt,
	"transparent_image": nobg_path,
	"mask_image": mask_path,
	"ply_model": ply_path,
	"glb_model": glb_path,
	"message": f"Successfully applied edit: {edit_prompt}"
	})

	except Exception as e:
	return json.dumps({"error": str(e)})


	# ============================================================
	# GRADIO UI FUNCTIONS
	# ============================================================

	def generate_3d_ui(prompt, progress=gr.Progress()):
	"""UI wrapper with progress updates"""
	if not client:
	if not init_gemini():
	raise gr.Error("GEMINI_API_KEY not set in Space secrets")

	progress(0.1, desc="Generating image...")

	initial_prompt = f"{prompt}, three-quarter front view angle, natural daylight, soft shadows showing depth and contours, clean simple background, full object visible, photorealistic"

	try:
	response_gen = client.models.generate_content(
	model="gemini-2.5-flash-image",
	contents=[initial_prompt],
	)

	initial_image = None
	for part in response_gen.parts:
	if part.inline_data:
	image_bytes = part.inline_data.data
	initial_image = Image.open(io.BytesIO(image_bytes))
	break

	if initial_image is None:
	raise gr.Error("Image generation failed")

	except Exception as e:
	raise gr.Error(f"Image generation failed: {e}")

	progress(0.3, desc="Removing background...")

	try:
	image_part = types.Part.from_bytes(
	data=image_to_bytes(initial_image),
	mime_type="image/png"
	)

	response_edit = client.models.generate_content(
	model="gemini-3-pro-image-preview",
	contents=["Remove the background completely, make the background transparent. Preserve the object's shadow for realism.", image_part],
	)

	final_image = None
	for part in response_edit.parts:
	if part.inline_data:
	edited_bytes = part.inline_data.data
	final_image = Image.open(io.BytesIO(edited_bytes))
	break

	if final_image is None:
	raise gr.Error("Background removal failed")

	except Exception as e:
	raise gr.Error(f"Background removal failed: {e}")

	progress(0.4, desc="Creating mask...")
	gray = final_image.convert("L")

	progress(0.5, desc="Running SAM-3D (1-2 min, first run may take longer)...")

	try:
	ply_bytes, glb_bytes = run_sam3d(final_image, gray)
	except Exception as e:
	raise gr.Error(f"SAM-3D failed: {e}")

	progress(0.9, desc="Saving outputs...")

	temp_dir = tempfile.mkdtemp()

	original_path = os.path.join(temp_dir, "original.png")
	nobg_path = os.path.join(temp_dir, "no_background.png")
	mask_path = os.path.join(temp_dir, "mask.png")
	ply_path = os.path.join(temp_dir, "model.ply")

	initial_image.save(original_path)
	final_image.save(nobg_path)
	gray.save(mask_path)

	with open(ply_path, 'wb') as f:
	f.write(ply_bytes)

	glb_path = None
	if glb_bytes:
	glb_path = os.path.join(temp_dir, "model.glb")
	with open(glb_path, 'wb') as f:
	f.write(glb_bytes)

	progress(1.0, desc="Done!")

	return (
	original_path,
	nobg_path,
	mask_path,
	glb_path if glb_path else ply_path,
	glb_path,
	ply_path,
	final_image,
	1,
	)


	def edit_3d_ui(edit_prompt, current_image, edit_count, progress=gr.Progress()):
	"""UI wrapper for editing"""
	if current_image is None:
	raise gr.Error("No image to edit. Generate a 3D model first!")

	if not client:
	if not init_gemini():
	raise gr.Error("GEMINI_API_KEY not set")

	progress(0.1, desc=f"Applying edit: {edit_prompt}...")

	try:
	image_part = types.Part.from_bytes(
	data=image_to_bytes(current_image),
	mime_type="image/png"
	)

	full_edit_prompt = f"{edit_prompt}. Keep the background transparent. Maintain image quality and lighting."

	response_edit = client.models.generate_content(
	model="gemini-3-pro-image-preview",
	contents=[full_edit_prompt, image_part],
	)

	edited_image = None
	for part in response_edit.parts:
	if part.inline_data:
	edited_bytes = part.inline_data.data
	edited_image = Image.open(io.BytesIO(edited_bytes))
	break

	if edited_image is None:
	raise gr.Error("Edit failed")

	except Exception as e:
	raise gr.Error(f"Edit failed: {e}")

	progress(0.3, desc="Creating new mask...")
	gray = edited_image.convert("L")

	progress(0.4, desc="Running SAM-3D (1-2 min)...")

	try:
	ply_bytes, glb_bytes = run_sam3d(edited_image, gray)
	except Exception as e:
	raise gr.Error(f"SAM-3D failed: {e}")

	progress(0.9, desc="Saving outputs...")

	temp_dir = tempfile.mkdtemp()

	nobg_path = os.path.join(temp_dir, "edited.png")
	mask_path = os.path.join(temp_dir, "mask.png")
	ply_path = os.path.join(temp_dir, "model.ply")

	edited_image.save(nobg_path)
	gray.save(mask_path)

	with open(ply_path, 'wb') as f:
	f.write(ply_bytes)

	glb_path = None
	if glb_bytes:
	glb_path = os.path.join(temp_dir, "model.glb")
	with open(glb_path, 'wb') as f:
	f.write(glb_bytes)

	new_edit_count = edit_count + 1
	progress(1.0, desc=f"Edit #{new_edit_count} complete!")

	return (
	nobg_path,
	mask_path,
	glb_path if glb_path else ply_path,
	glb_path,
	ply_path,
	edited_image,
	new_edit_count,
	)


	# ============================================================
	# MCP TOOL INTERFACES
	# ============================================================

	generate_tool = gr.Interface(
	fn=generate_3d_model,
	inputs=gr.Textbox(label="Prompt", placeholder="A red sports car"),
	outputs=gr.Textbox(label="Result (JSON)"),
	api_name="generate_3d",
	title="Generate 3D Model",
	description="Generate a 3D model from a text description"
	)

	edit_tool = gr.Interface(
	fn=edit_3d_model,
	inputs=[
	gr.Textbox(label="Edit Prompt", placeholder="Remove the wings"),
	gr.Textbox(label="Transparent Image Path", placeholder="/path/to/transparent.png")
	],
	outputs=gr.Textbox(label="Result (JSON)"),
	api_name="edit_3d",
	title="Edit 3D Model",
	description="Edit an existing 3D model"
	)


	# ============================================================
	# MAIN UI
	# ============================================================

	with gr.Blocks() as main_ui:

	current_image_state = gr.State(None)
	edit_count_state = gr.State(0)

	gr.Markdown("""
	# 🎨 Text to 3D Model (MCP Server)
	### Powered by Gemini + SAM-3D Objects

	This app is also an MCP Server! Claude Desktop, Cursor, and other MCP clients can use the `generate_3d` and `edit_3d` tools.

	⏱️ Generation takes 1-2 minutes. First run may take longer as the model warms up.
	""")

	gr.Markdown("## 1️⃣ Generate Initial 3D Model")

	with gr.Row():
	with gr.Column(scale=2):
	prompt_input = gr.Textbox(label="Text Prompt", placeholder="A plane with eagle wings", lines=2)
	with gr.Column(scale=1):
	generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")

	gr.Examples(
	examples=["A plane with eagle wings", "A wooden chair", "A red sports car", "A ceramic coffee mug", "A robot dog"],
	inputs=prompt_input
	)

	gr.Markdown("## 2️⃣ Edit Your Model")

	with gr.Row():
	with gr.Column(scale=2):
	edit_input = gr.Textbox(label="Edit Prompt", placeholder="Remove the wings", lines=2)
	with gr.Column(scale=1):
	edit_btn = gr.Button("✏️ Apply Edit", variant="secondary", size="lg")
	edit_counter = gr.Markdown("No edits yet")

	gr.Examples(
	examples=["Remove the wings", "Change color to blue", "Add racing stripes", "Make it larger", "Add wheels"],
	inputs=edit_input
	)

	gr.Markdown("## 📸 Images")
	with gr.Row():
	original_output = gr.Image(label="1. Original", type="filepath")
	nobg_output = gr.Image(label="2. Transparent", type="filepath")
	mask_output = gr.Image(label="3. Mask", type="filepath")

	gr.Markdown("## 🎮 3D Model")
	model_output = gr.Model3D(label="Interactive 3D Model (drag to rotate)", clear_color=[0.1, 0.1, 0.1, 1.0])

	gr.Markdown("## 📥 Downloads")
	with gr.Row():
	glb_download = gr.File(label="GLB (mesh)")
	ply_download = gr.File(label="PLY (splat)")

	gr.Markdown("""
	---
	## 🔌 MCP Server Info

	This app exposes two MCP tools: `generate_3d` and `edit_3d`

	Connect via: `https://YOUR-SPACE.hf.space/gradio_api/mcp/sse`

	---
	Built for [MCP 1st Birthday Hackathon](https://huggingface.co/MCP-1st-Birthday) 🎂
	""")

	def update_counter(count):
	return "No edits yet" if count == 0 else f"Edits applied: {count}"

	generate_btn.click(
	fn=generate_3d_ui,
	inputs=[prompt_input],
	outputs=[original_output, nobg_output, mask_output, model_output, glb_download, ply_download, current_image_state, edit_count_state]
	).then(fn=update_counter, inputs=[edit_count_state], outputs=[edit_counter])

	edit_btn.click(
	fn=edit_3d_ui,
	inputs=[edit_input, current_image_state, edit_count_state],
	outputs=[nobg_output, mask_output, model_output, glb_download, ply_download, current_image_state, edit_count_state]
	).then(fn=update_counter, inputs=[edit_count_state], outputs=[edit_counter])


	# ============================================================
	# COMBINE UI + MCP TOOLS
	# ============================================================

	demo = gr.TabbedInterface(
	interface_list=[main_ui, generate_tool, edit_tool],
	tab_names=["🎨 Interactive UI", "🔧 Generate Tool", "✏️ Edit Tool"],
	title="Text to 3D \| MCP Server"
	)

	if __name__ == "__main__":
	demo.launch(mcp_server=True)