Spaces:

AbstractPhil
/

lyra-xl-playground

Running on Zero

App Files Files Community

lyra-xl-playground / app.py

AbstractPhil

Update app.py

223ed70 verified 13 days ago

raw

history blame contribute delete

75.7 kB

	"""
	Lyra/Lune Flow-Matching Inference Space
	Author: AbstractPhil
	License: MIT

	SD1.5 and SDXL-based flow matching with geometric crystalline architectures.
	Supports Illustrious XL, standard SDXL, and SD1.5 variants.

	Lyra VAE Versions:
	- v1: SD1.5 (768 dim CLIP + T5-base) - geofractal.model.vae.vae_lyra
	- v2: SDXL/Illustrious (768 CLIP-L + 1280 CLIP-G + 2048 T5-XL) - geofractal.model.vae.vae_lyra_v2

	Features:
	- Lazy loading: T5 and Lyra only download when first used
	- Multiple schedulers: Euler Ancestral, Euler, DPM++ 2M SDE, DPM++ 2M
	- Integrated loader module for automatic version detection
	"""

	import os
	import json
	import torch
	import gradio as gr
	import numpy as np
	from PIL import Image
	from typing import Optional, Dict, Tuple, Union
	import spaces
	from safetensors.torch import load_file as load_safetensors

	from diffusers import (
	UNet2DConditionModel,
	AutoencoderKL,
	EulerDiscreteScheduler,
	EulerAncestralDiscreteScheduler,
	DPMSolverMultistepScheduler,
	DPMSolverSDEScheduler,
	)
	from transformers import (
	CLIPTextModel,
	CLIPTokenizer,
	CLIPTextModelWithProjection,
	T5EncoderModel,
	T5Tokenizer
	)
	from huggingface_hub import hf_hub_download

	# Import Lyra VAE v1 (SD1.5) from geofractal
	try:
	from geofractal.model.vae.vae_lyra import MultiModalVAE as LyraV1, MultiModalVAEConfig as LyraV1Config
	LYRA_V1_AVAILABLE = True
	except ImportError:
	print("⚠️ Lyra VAE v1 not available")
	LYRA_V1_AVAILABLE = False

	# Import Lyra VAE v2 (SDXL/Illustrious) from geofractal
	try:
	from geofractal.model.vae.vae_lyra_v2 import MultiModalVAE as LyraV2, MultiModalVAEConfig as LyraV2Config
	LYRA_V2_AVAILABLE = True
	except ImportError:
	print("⚠️ Lyra VAE v2 not available")
	LYRA_V2_AVAILABLE = False

	# Import Lyra loader module
	try:
	from geofractal.model.vae.loader import load_vae_lyra, load_lyra_illustrious
	LYRA_LOADER_AVAILABLE = True
	except ImportError:
	print("⚠️ Lyra loader module not available, using fallback")
	LYRA_LOADER_AVAILABLE = False


	# ============================================================================
	# CONSTANTS
	# ============================================================================

	ARCH_SD15 = "sd15"
	ARCH_SDXL = "sdxl"

	# Scheduler names
	SCHEDULER_EULER_A = "Euler Ancestral"
	SCHEDULER_EULER = "Euler"
	SCHEDULER_DPM_2M_SDE = "DPM++ 2M SDE"
	SCHEDULER_DPM_2M = "DPM++ 2M"

	SCHEDULER_CHOICES = [
	SCHEDULER_EULER_A,
	SCHEDULER_EULER,
	SCHEDULER_DPM_2M_SDE,
	SCHEDULER_DPM_2M,
	]

	# ComfyUI key prefixes for SDXL single-file checkpoints
	COMFYUI_UNET_PREFIX = "model.diffusion_model."
	COMFYUI_CLIP_L_PREFIX = "conditioner.embedders.0.transformer."
	COMFYUI_CLIP_G_PREFIX = "conditioner.embedders.1.model."
	COMFYUI_VAE_PREFIX = "first_stage_model."

	# Lyra repos
	LYRA_ILLUSTRIOUS_REPO = "AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious"
	LYRA_SD15_REPO = "AbstractPhil/vae-lyra"

	# T5 model - use flan-t5-xl (what Lyra was trained on)
	T5_XL_MODEL = "google/flan-t5-xl"
	T5_BASE_MODEL = "google/flan-t5-base"


	# ============================================================================
	# LAZY LOADERS
	# ============================================================================

	class LazyT5Encoder:
	"""Lazy loader for T5 encoder - only downloads/loads when first accessed."""

	def __init__(self, model_name: str = T5_XL_MODEL, device: str = "cuda", dtype=torch.float16):
	self.model_name = model_name
	self.device = device
	self.dtype = dtype
	self._encoder = None
	self._tokenizer = None
	self._loaded = False

	@property
	def encoder(self) -> T5EncoderModel:
	if self._encoder is None:
	print(f"📥 Lazy loading T5 encoder: {self.model_name}...")
	self._encoder = T5EncoderModel.from_pretrained(
	self.model_name,
	torch_dtype=self.dtype
	).to(self.device)
	self._encoder.eval()
	print(f"✓ T5 encoder loaded ({sum(p.numel() for p in self._encoder.parameters())/1e6:.1f}M params)")
	self._loaded = True
	return self._encoder

	@property
	def tokenizer(self) -> T5Tokenizer:
	if self._tokenizer is None:
	print(f"📥 Loading T5 tokenizer: {self.model_name}...")
	self._tokenizer = T5Tokenizer.from_pretrained(self.model_name)
	print("✓ T5 tokenizer loaded")
	return self._tokenizer

	@property
	def is_loaded(self) -> bool:
	return self._loaded

	def unload(self):
	"""Free VRAM by unloading the encoder."""
	if self._encoder is not None:
	del self._encoder
	self._encoder = None
	self._loaded = False
	torch.cuda.empty_cache()
	print("🗑️ T5 encoder unloaded")


	class LazyLyraModel:
	"""Lazy loader for Lyra VAE - only downloads/loads when first accessed.

	Exposes config with modality_seq_lens for proper tokenization lengths.
	"""

	def __init__(
	self,
	repo_id: str = LYRA_ILLUSTRIOUS_REPO,
	device: str = "cuda",
	checkpoint: Optional[str] = None
	):
	self.repo_id = repo_id
	self.device = device
	self.checkpoint = checkpoint
	self._model = None
	self._info = None
	self._config = None
	self._loaded = False

	# Pre-fetch config without loading model (lightweight)
	self._prefetch_config()

	def _prefetch_config(self):
	"""Fetch config.json to get sequence lengths without loading the full model."""
	try:
	config_path = hf_hub_download(
	repo_id=self.repo_id,
	filename="config.json",
	repo_type="model"
	)
	with open(config_path, 'r') as f:
	self._config = json.load(f)

	# Detect version from config
	is_v2 = 'modality_seq_lens' in self._config or 'binding_config' in self._config
	version = "v2" if is_v2 else "v1"

	print(f"📋 Lyra config prefetched: {self.repo_id} ({version})")

	if is_v2:
	print(f" Sequence lengths: {self._config.get('modality_seq_lens', {})}")
	else:
	print(f" Sequence length: {self._config.get('seq_len', 77)}")

	except Exception as e:
	print(f"⚠️ Could not prefetch Lyra config: {e}")
	# Detect version from repo name and use appropriate defaults
	is_illustrious = 'illustrious' in self.repo_id.lower() or 'xl' in self.repo_id.lower()

	if is_illustrious:
	# v2 defaults for SDXL/Illustrious
	self._config = {
	"modality_dims": {
	"clip_l": 768,
	"clip_g": 1280,
	"t5_xl_l": 2048,
	"t5_xl_g": 2048
	},
	"modality_seq_lens": {
	"clip_l": 77,
	"clip_g": 77,
	"t5_xl_l": 512,
	"t5_xl_g": 512
	},
	"fusion_strategy": "adaptive_cantor",
	"latent_dim": 2048
	}
	else:
	# v1 defaults for SD1.5
	self._config = {
	"modality_dims": {
	"clip": 768,
	"t5": 768
	},
	"seq_len": 77,
	"fusion_strategy": "cantor",
	"latent_dim": 768
	}

	@property
	def config(self) -> Dict:
	"""Get model config (available before full model load)."""
	return self._config or {}

	@property
	def modality_seq_lens(self) -> Dict[str, int]:
	"""Get sequence lengths for each modality.

	Handles both v1 (seq_len) and v2 (modality_seq_lens) config formats.
	"""
	# v2 format: modality_seq_lens dict
	if 'modality_seq_lens' in self.config:
	return self.config['modality_seq_lens']

	# v1 format: derive from single seq_len
	seq_len = self.config.get('seq_len', 77)
	modality_dims = self.config.get('modality_dims', {})

	# Return seq_len for all modalities in v1
	return {name: seq_len for name in modality_dims.keys()}

	@property
	def t5_max_length(self) -> int:
	"""Get T5 max sequence length from config.

	Handles both v1 (seq_len) and v2 (modality_seq_lens) config formats.
	"""
	# v2 format: modality_seq_lens dict
	if 'modality_seq_lens' in self.config:
	seq_lens = self.config['modality_seq_lens']
	return seq_lens.get('t5_xl_l', seq_lens.get('t5_xl_g', 512))

	# v1 format: single seq_len
	return self.config.get('seq_len', 77)

	@property
	def clip_max_length(self) -> int:
	"""Get CLIP max sequence length from config.

	Handles both v1 (seq_len) and v2 (modality_seq_lens) config formats.
	"""
	# v2 format: modality_seq_lens dict
	if 'modality_seq_lens' in self.config:
	seq_lens = self.config['modality_seq_lens']
	return seq_lens.get('clip_l', 77)

	# v1 format: single seq_len (same for all modalities)
	return self.config.get('seq_len', 77)

	@property
	def model(self):
	if self._model is None:
	print(f"📥 Lazy loading Lyra VAE: {self.repo_id}...")

	if LYRA_LOADER_AVAILABLE:
	# Use the loader module
	self._model, self._info = load_vae_lyra(
	self.repo_id,
	checkpoint=self.checkpoint,
	device=self.device,
	return_info=True
	)
	# Update config from loaded info
	if self._info and 'config' in self._info:
	self._config = self._info['config']
	else:
	# Fallback to manual loading
	self._model = self._load_fallback()
	self._info = {"repo_id": self.repo_id, "version": "v2", "config": self._config}

	self._model.eval()
	self._loaded = True
	print(f"✓ Lyra VAE loaded")
	return self._model

	@property
	def info(self) -> Optional[Dict]:
	if self._info is None:
	return {"repo_id": self.repo_id, "config": self._config}
	return self._info

	@property
	def is_loaded(self) -> bool:
	return self._loaded

	def _load_fallback(self):
	"""Fallback loading if loader module not available."""
	if not LYRA_V2_AVAILABLE:
	raise ImportError("Lyra VAE v2 not available")

	# Config already prefetched
	config_dict = self._config

	# Use provided checkpoint or find one
	if self.checkpoint and self.checkpoint.strip():
	checkpoint_file = self.checkpoint.strip()
	# Add weights/ prefix if not present and file doesn't exist at root
	if not checkpoint_file.startswith('weights/'):
	checkpoint_file = f"weights/{checkpoint_file}"
	print(f"[Lyra] Using specified checkpoint: {checkpoint_file}")
	else:
	# Find checkpoint automatically
	from huggingface_hub import list_repo_files

	repo_files = list_repo_files(self.repo_id, repo_type="model")
	checkpoint_files = [f for f in repo_files if f.endswith('.safetensors') or f.endswith('.pt')]

	# Prefer weights/ folder
	weights_files = [f for f in checkpoint_files if f.startswith('weights/')]
	if weights_files:
	checkpoint_file = sorted(weights_files)[-1] # Latest
	elif checkpoint_files:
	checkpoint_file = checkpoint_files[0]
	else:
	raise FileNotFoundError(f"No checkpoint found in {self.repo_id}")
	print(f"[Lyra] Auto-selected checkpoint: {checkpoint_file}")

	checkpoint_path = hf_hub_download(
	repo_id=self.repo_id,
	filename=checkpoint_file,
	repo_type="model"
	)

	# Load weights
	if checkpoint_file.endswith('.safetensors'):
	state_dict = load_safetensors(checkpoint_path, device="cpu")
	else:
	checkpoint = torch.load(checkpoint_path, map_location="cpu")
	state_dict = checkpoint.get('model_state_dict', checkpoint)

	# Build config with all fields from prefetched config
	vae_config = LyraV2Config(
	modality_dims=config_dict.get('modality_dims'),
	modality_seq_lens=config_dict.get('modality_seq_lens'),
	binding_config=config_dict.get('binding_config'),
	latent_dim=config_dict.get('latent_dim', 2048),
	hidden_dim=config_dict.get('hidden_dim', 2048),
	fusion_strategy=config_dict.get('fusion_strategy', 'adaptive_cantor'),
	encoder_layers=config_dict.get('encoder_layers', 3),
	decoder_layers=config_dict.get('decoder_layers', 3),
	fusion_heads=config_dict.get('fusion_heads', 8),
	cantor_depth=config_dict.get('cantor_depth', 8),
	cantor_local_window=config_dict.get('cantor_local_window', 3),
	alpha_init=config_dict.get('alpha_init', 1.0),
	beta_init=config_dict.get('beta_init', 0.3),
	)

	model = LyraV2(vae_config)
	model.load_state_dict(state_dict, strict=False)
	model.to(self.device)

	return model

	def unload(self):
	"""Free VRAM by unloading the model."""
	if self._model is not None:
	del self._model
	self._model = None
	self._info = None
	self._loaded = False
	torch.cuda.empty_cache()
	print("🗑️ Lyra VAE unloaded")


	# ============================================================================
	# SCHEDULER FACTORY
	# ============================================================================

	def get_scheduler(
	scheduler_name: str,
	config_source: str = "stabilityai/stable-diffusion-xl-base-1.0",
	is_sdxl: bool = True
	):
	"""Create scheduler by name.

	Args:
	scheduler_name: One of SCHEDULER_CHOICES
	config_source: HF repo to load scheduler config from
	is_sdxl: Whether this is for SDXL (affects some defaults)

	Returns:
	Configured scheduler instance
	"""
	subfolder = "scheduler"

	if scheduler_name == SCHEDULER_EULER_A:
	return EulerAncestralDiscreteScheduler.from_pretrained(
	config_source,
	subfolder=subfolder
	)

	elif scheduler_name == SCHEDULER_EULER:
	return EulerDiscreteScheduler.from_pretrained(
	config_source,
	subfolder=subfolder
	)

	elif scheduler_name == SCHEDULER_DPM_2M_SDE:
	# DPM++ 2M SDE - good for detailed images
	return DPMSolverSDEScheduler.from_pretrained(
	config_source,
	subfolder=subfolder,
	algorithm_type="sde-dpmsolver++",
	solver_order=2,
	use_karras_sigmas=True,
	)

	elif scheduler_name == SCHEDULER_DPM_2M:
	# DPM++ 2M - fast and quality
	return DPMSolverMultistepScheduler.from_pretrained(
	config_source,
	subfolder=subfolder,
	algorithm_type="dpmsolver++",
	solver_order=2,
	use_karras_sigmas=True,
	)

	else:
	print(f"⚠️ Unknown scheduler '{scheduler_name}', defaulting to Euler Ancestral")
	return EulerAncestralDiscreteScheduler.from_pretrained(
	config_source,
	subfolder=subfolder
	)


	# ============================================================================
	# UTILITIES
	# ============================================================================

	def get_clip_hidden_state(
	model_output,
	clip_skip: int = 1,
	output_hidden_states: bool = True
	) -> torch.Tensor:
	"""Extract hidden state with clip_skip support."""
	if clip_skip == 1 or not output_hidden_states:
	return model_output.last_hidden_state

	if hasattr(model_output, 'hidden_states') and model_output.hidden_states is not None:
	return model_output.hidden_states[-clip_skip]

	return model_output.last_hidden_state


	# ============================================================================
	# SDXL PIPELINE
	# ============================================================================

	class SDXLFlowMatchingPipeline:
	"""Pipeline for SDXL-based flow-matching inference with dual CLIP encoders.

	Uses lazy loading for T5 and Lyra - they're only downloaded when actually used.
	"""

	def __init__(
	self,
	vae: AutoencoderKL,
	text_encoder: CLIPTextModel,
	text_encoder_2: CLIPTextModelWithProjection,
	tokenizer: CLIPTokenizer,
	tokenizer_2: CLIPTokenizer,
	unet: UNet2DConditionModel,
	scheduler,
	device: str = "cuda",
	t5_loader: Optional[LazyT5Encoder] = None,
	lyra_loader: Optional[LazyLyraModel] = None,
	clip_skip: int = 1
	):
	self.vae = vae
	self.text_encoder = text_encoder
	self.text_encoder_2 = text_encoder_2
	self.tokenizer = tokenizer
	self.tokenizer_2 = tokenizer_2
	self.unet = unet
	self.scheduler = scheduler
	self.device = device

	# Lazy loaders for Lyra components
	self.t5_loader = t5_loader
	self.lyra_loader = lyra_loader

	# Settings
	self.clip_skip = clip_skip
	self.vae_scale_factor = 0.13025
	self.arch = ARCH_SDXL

	# Track current scheduler name for UI
	self._scheduler_name = SCHEDULER_EULER_A

	def set_scheduler(self, scheduler_name: str):
	"""Switch scheduler without reloading model."""
	if scheduler_name != self._scheduler_name:
	self.scheduler = get_scheduler(
	scheduler_name,
	config_source="stabilityai/stable-diffusion-xl-base-1.0",
	is_sdxl=True
	)
	self._scheduler_name = scheduler_name
	print(f"✓ Scheduler changed to: {scheduler_name}")

	@property
	def t5_encoder(self) -> Optional[T5EncoderModel]:
	"""Access T5 encoder (triggers lazy load if needed)."""
	return self.t5_loader.encoder if self.t5_loader else None

	@property
	def t5_tokenizer(self) -> Optional[T5Tokenizer]:
	"""Access T5 tokenizer (triggers lazy load if needed)."""
	return self.t5_loader.tokenizer if self.t5_loader else None

	@property
	def lyra_model(self):
	"""Access Lyra model (triggers lazy load if needed)."""
	return self.lyra_loader.model if self.lyra_loader else None

	@property
	def lyra_available(self) -> bool:
	"""Check if Lyra components are configured (not necessarily loaded)."""
	return self.t5_loader is not None and self.lyra_loader is not None

	def encode_prompt(
	self,
	prompt: str,
	negative_prompt: str = "",
	clip_skip: int = 1
	) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
	"""Encode prompts using dual CLIP encoders for SDXL."""

	# CLIP-L encoding
	text_inputs = self.tokenizer(
	prompt,
	padding="max_length",
	max_length=self.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	text_input_ids = text_inputs.input_ids.to(self.device)

	with torch.no_grad():
	output_hidden_states = clip_skip > 1
	clip_l_output = self.text_encoder(
	text_input_ids,
	output_hidden_states=output_hidden_states
	)
	prompt_embeds_l = get_clip_hidden_state(clip_l_output, clip_skip, output_hidden_states)

	# CLIP-G encoding
	text_inputs_2 = self.tokenizer_2(
	prompt,
	padding="max_length",
	max_length=self.tokenizer_2.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	text_input_ids_2 = text_inputs_2.input_ids.to(self.device)

	with torch.no_grad():
	clip_g_output = self.text_encoder_2(
	text_input_ids_2,
	output_hidden_states=output_hidden_states
	)
	prompt_embeds_g = get_clip_hidden_state(clip_g_output, clip_skip, output_hidden_states)
	pooled_prompt_embeds = clip_g_output.text_embeds

	# Concatenate CLIP-L and CLIP-G embeddings
	prompt_embeds = torch.cat([prompt_embeds_l, prompt_embeds_g], dim=-1)

	# Negative prompt
	if negative_prompt:
	uncond_inputs = self.tokenizer(
	negative_prompt,
	padding="max_length",
	max_length=self.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	uncond_input_ids = uncond_inputs.input_ids.to(self.device)

	uncond_inputs_2 = self.tokenizer_2(
	negative_prompt,
	padding="max_length",
	max_length=self.tokenizer_2.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	uncond_input_ids_2 = uncond_inputs_2.input_ids.to(self.device)

	with torch.no_grad():
	uncond_output_l = self.text_encoder(
	uncond_input_ids,
	output_hidden_states=output_hidden_states
	)
	negative_embeds_l = get_clip_hidden_state(uncond_output_l, clip_skip, output_hidden_states)

	uncond_output_g = self.text_encoder_2(
	uncond_input_ids_2,
	output_hidden_states=output_hidden_states
	)
	negative_embeds_g = get_clip_hidden_state(uncond_output_g, clip_skip, output_hidden_states)
	negative_pooled = uncond_output_g.text_embeds

	negative_prompt_embeds = torch.cat([negative_embeds_l, negative_embeds_g], dim=-1)
	else:
	negative_prompt_embeds = torch.zeros_like(prompt_embeds)
	negative_pooled = torch.zeros_like(pooled_prompt_embeds)

	return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled

	def encode_prompt_lyra(
	self,
	prompt: str,
	negative_prompt: str = "",
	clip_skip: int = 1,
	t5_summary: str = "",
	lyra_strength: float = 0.3,
	use_separator: bool = True,
	clip_include_summary: bool = False
	) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
	"""Encode prompts using Lyra VAE v2 fusion (CLIP + T5).

	CLIP encoders receive tags only (prompt field).
	T5 encoder receives tags + separator + summary.

	Args:
	prompt: Tags/keywords for CLIP encoding
	negative_prompt: Negative tags
	clip_skip: CLIP skip layers
	t5_summary: Natural language summary for T5
	lyra_strength: Blend factor (0=pure CLIP, 1=pure Lyra)
	use_separator: If True, use ¶ separator between tags and summary
	clip_include_summary: If True, append summary to CLIP input (default False)

	This triggers lazy loading of T5 and Lyra if not already loaded.
	Uses sequence lengths from Lyra config for proper tokenization.
	"""
	if not self.lyra_available:
	raise ValueError("Lyra VAE components not configured")

	# Get sequence lengths from Lyra config (available before full load)
	t5_max_length = self.lyra_loader.t5_max_length # 512 for Illustrious
	clip_max_length = self.lyra_loader.clip_max_length # 77 for Illustrious

	print(f"[Lyra] Using sequence lengths: CLIP={clip_max_length}, T5={t5_max_length}")

	# Access properties triggers lazy load
	t5_encoder = self.t5_encoder
	t5_tokenizer = self.t5_tokenizer
	lyra_model = self.lyra_model

	# === CLIP ENCODING ===
	# CLIP sees tags only (unless clip_include_summary is True)
	if clip_include_summary and t5_summary.strip():
	clip_prompt = f"{prompt} {t5_summary}"
	else:
	clip_prompt = prompt

	# Get CLIP embeddings with tags only
	prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
	clip_prompt, negative_prompt, clip_skip
	)

	# === T5 ENCODING ===
	# T5 sees tags + separator + summary (or tags + summary if no separator)
	SUMMARY_SEPARATOR = "¶"

	if t5_summary.strip():
	if use_separator:
	t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {t5_summary}"
	else:
	t5_prompt = f"{prompt} {t5_summary}"
	else:
	# No summary provided - T5 just sees the tags
	t5_prompt = prompt

	print(f"[Lyra] CLIP input: {clip_prompt[:80]}...")
	print(f"[Lyra] T5 input: {t5_prompt[:80]}...")

	# Get T5 embeddings with config-specified max_length
	t5_inputs = t5_tokenizer(
	t5_prompt,
	max_length=t5_max_length,
	padding='max_length',
	truncation=True,
	return_tensors='pt'
	).to(self.device)

	with torch.no_grad():
	t5_embeds = t5_encoder(**t5_inputs).last_hidden_state

	# === LYRA FUSION ===
	clip_l_dim = 768
	clip_g_dim = 1280

	clip_l_embeds = prompt_embeds[..., :clip_l_dim]
	clip_g_embeds = prompt_embeds[..., clip_l_dim:]

	with torch.no_grad():
	modality_inputs = {
	'clip_l': clip_l_embeds.float(),
	'clip_g': clip_g_embeds.float(),
	't5_xl_l': t5_embeds.float(),
	't5_xl_g': t5_embeds.float()
	}
	reconstructions, mu, logvar, _ = lyra_model(
	modality_inputs,
	target_modalities=['clip_l', 'clip_g']
	)

	lyra_clip_l = reconstructions['clip_l'].to(prompt_embeds.dtype)
	lyra_clip_g = reconstructions['clip_g'].to(prompt_embeds.dtype)

	# Normalize reconstructions to match input statistics
	clip_l_std_ratio = lyra_clip_l.std() / (clip_l_embeds.std() + 1e-8)
	clip_g_std_ratio = lyra_clip_g.std() / (clip_g_embeds.std() + 1e-8)

	if clip_l_std_ratio > 2.0 or clip_l_std_ratio < 0.5:
	lyra_clip_l = (lyra_clip_l - lyra_clip_l.mean()) / (lyra_clip_l.std() + 1e-8)
	lyra_clip_l = lyra_clip_l * clip_l_embeds.std() + clip_l_embeds.mean()

	if clip_g_std_ratio > 2.0 or clip_g_std_ratio < 0.5:
	lyra_clip_g = (lyra_clip_g - lyra_clip_g.mean()) / (lyra_clip_g.std() + 1e-8)
	lyra_clip_g = lyra_clip_g * clip_g_embeds.std() + clip_g_embeds.mean()

	# Blend original CLIP with Lyra reconstruction
	fused_clip_l = (1 - lyra_strength) * clip_l_embeds + lyra_strength * lyra_clip_l
	fused_clip_g = (1 - lyra_strength) * clip_g_embeds + lyra_strength * lyra_clip_g

	prompt_embeds_fused = torch.cat([fused_clip_l, fused_clip_g], dim=-1)

	# === NEGATIVE PROMPT ===
	# Negative uses same logic: CLIP sees negative tags only
	if negative_prompt:
	neg_strength = lyra_strength * 0.5 # Less aggressive for negative

	# T5 negative: tags only (no summary for negative)
	t5_neg_prompt = negative_prompt

	t5_inputs_neg = t5_tokenizer(
	t5_neg_prompt,
	max_length=t5_max_length,
	padding='max_length',
	truncation=True,
	return_tensors='pt'
	).to(self.device)

	with torch.no_grad():
	t5_embeds_neg = t5_encoder(**t5_inputs_neg).last_hidden_state

	neg_clip_l = negative_prompt_embeds[..., :clip_l_dim]
	neg_clip_g = negative_prompt_embeds[..., clip_l_dim:]

	modality_inputs_neg = {
	'clip_l': neg_clip_l.float(),
	'clip_g': neg_clip_g.float(),
	't5_xl_l': t5_embeds_neg.float(),
	't5_xl_g': t5_embeds_neg.float()
	}
	recon_neg, _, _, _ = lyra_model(modality_inputs_neg, target_modalities=['clip_l', 'clip_g'])

	lyra_neg_l = recon_neg['clip_l'].to(negative_prompt_embeds.dtype)
	lyra_neg_g = recon_neg['clip_g'].to(negative_prompt_embeds.dtype)

	# Normalize
	neg_l_ratio = lyra_neg_l.std() / (neg_clip_l.std() + 1e-8)
	neg_g_ratio = lyra_neg_g.std() / (neg_clip_g.std() + 1e-8)
	if neg_l_ratio > 2.0 or neg_l_ratio < 0.5:
	lyra_neg_l = (lyra_neg_l - lyra_neg_l.mean()) / (lyra_neg_l.std() + 1e-8)
	lyra_neg_l = lyra_neg_l * neg_clip_l.std() + neg_clip_l.mean()
	if neg_g_ratio > 2.0 or neg_g_ratio < 0.5:
	lyra_neg_g = (lyra_neg_g - lyra_neg_g.mean()) / (lyra_neg_g.std() + 1e-8)
	lyra_neg_g = lyra_neg_g * neg_clip_g.std() + neg_clip_g.mean()

	fused_neg_l = (1 - neg_strength) * neg_clip_l + neg_strength * lyra_neg_l
	fused_neg_g = (1 - neg_strength) * neg_clip_g + neg_strength * lyra_neg_g

	negative_prompt_embeds_fused = torch.cat([fused_neg_l, fused_neg_g], dim=-1)
	else:
	negative_prompt_embeds_fused = torch.zeros_like(prompt_embeds_fused)

	return prompt_embeds_fused, negative_prompt_embeds_fused, pooled, negative_pooled

	def _get_add_time_ids(
	self,
	original_size: Tuple[int, int],
	crops_coords_top_left: Tuple[int, int],
	target_size: Tuple[int, int],
	dtype: torch.dtype
	) -> torch.Tensor:
	"""Create time embedding IDs for SDXL."""
	add_time_ids = list(original_size + crops_coords_top_left + target_size)
	add_time_ids = torch.tensor([add_time_ids], dtype=dtype, device=self.device)
	return add_time_ids

	@torch.no_grad()
	def __call__(
	self,
	prompt: str,
	negative_prompt: str = "",
	height: int = 1024,
	width: int = 1024,
	num_inference_steps: int = 20,
	guidance_scale: float = 7.5,
	shift: float = 0.0,
	use_flow_matching: bool = False,
	prediction_type: str = "epsilon",
	seed: Optional[int] = None,
	use_lyra: bool = False,
	clip_skip: int = 1,
	t5_summary: str = "",
	lyra_strength: float = 1.0,
	use_separator: bool = True,
	clip_include_summary: bool = False,
	progress_callback=None
	):
	"""Generate image using SDXL architecture.

	Args:
	prompt: Tags/keywords for image generation
	negative_prompt: Negative tags
	t5_summary: Natural language summary (T5 only, unless clip_include_summary=True)
	use_separator: Use ¶ separator between tags and summary in T5 input
	clip_include_summary: If True, append summary to CLIP input (default False)
	seed: Random seed for reproducibility (must be int)
	"""

	# Create generator with seed for deterministic generation
	if seed is not None:
	seed = int(seed)
	generator = torch.Generator(device=self.device).manual_seed(seed)
	print(f"[SDXL Pipeline] Using seed: {seed}")
	else:
	generator = None
	print("[SDXL Pipeline] No seed provided, using random")

	# Encode prompts (Lyra triggers lazy load only if use_lyra=True)
	if use_lyra and self.lyra_available:
	prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt_lyra(
	prompt, negative_prompt, clip_skip, t5_summary, lyra_strength,
	use_separator=use_separator,
	clip_include_summary=clip_include_summary
	)
	else:
	prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
	prompt, negative_prompt, clip_skip
	)

	# Prepare latents
	latent_channels = 4
	latent_height = height // 8
	latent_width = width // 8

	latents = torch.randn(
	(1, latent_channels, latent_height, latent_width),
	generator=generator,
	device=self.device,
	dtype=torch.float16
	)

	# Set timesteps
	self.scheduler.set_timesteps(num_inference_steps, device=self.device)
	timesteps = self.scheduler.timesteps

	if not use_flow_matching:
	latents = latents * self.scheduler.init_noise_sigma

	# Prepare added time embeddings for SDXL
	original_size = (height, width)
	target_size = (height, width)
	crops_coords_top_left = (0, 0)

	add_time_ids = self._get_add_time_ids(
	original_size, crops_coords_top_left, target_size, dtype=torch.float16
	)
	negative_add_time_ids = add_time_ids

	# Denoising loop
	for i, t in enumerate(timesteps):
	if progress_callback:
	progress_callback(i, num_inference_steps, f"Step {i+1}/{num_inference_steps}")

	latent_model_input = torch.cat([latents] * 2) if guidance_scale > 1.0 else latents

	if use_flow_matching and shift > 0:
	sigma = t.float() / 1000.0
	sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)
	scaling = torch.sqrt(1 + sigma_shifted ** 2)
	latent_model_input = latent_model_input / scaling
	else:
	latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)

	timestep = t.expand(latent_model_input.shape[0])

	if guidance_scale > 1.0:
	text_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
	add_text_embeds = torch.cat([negative_pooled, pooled])
	add_time_ids_input = torch.cat([negative_add_time_ids, add_time_ids])
	else:
	text_embeds = prompt_embeds
	add_text_embeds = pooled
	add_time_ids_input = add_time_ids

	added_cond_kwargs = {
	"text_embeds": add_text_embeds,
	"time_ids": add_time_ids_input
	}

	noise_pred = self.unet(
	latent_model_input,
	timestep,
	encoder_hidden_states=text_embeds,
	added_cond_kwargs=added_cond_kwargs,
	return_dict=False
	)[0]

	if guidance_scale > 1.0:
	noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
	noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)

	if use_flow_matching:
	sigma = t.float() / 1000.0
	sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)

	if prediction_type == "v_prediction":
	v_pred = noise_pred
	alpha_t = torch.sqrt(1 - sigma_shifted ** 2)
	sigma_t = sigma_shifted
	noise_pred = alpha_t * v_pred + sigma_t * latents

	dt = -1.0 / num_inference_steps
	latents = latents + dt * noise_pred
	else:
	# Pass generator for deterministic ancestral/SDE sampling
	latents = self.scheduler.step(
	noise_pred, t, latents, generator=generator, return_dict=False
	)[0]

	# Decode
	latents = latents / self.vae_scale_factor

	with torch.no_grad():
	image = self.vae.decode(latents.to(self.vae.dtype)).sample

	image = (image / 2 + 0.5).clamp(0, 1)
	image = image.cpu().permute(0, 2, 3, 1).float().numpy()
	image = (image * 255).round().astype("uint8")
	image = Image.fromarray(image[0])

	return image


	# ============================================================================
	# SD1.5 PIPELINE
	# ============================================================================

	class SD15FlowMatchingPipeline:
	"""Pipeline for SD1.5-based flow-matching inference."""

	def __init__(
	self,
	vae: AutoencoderKL,
	text_encoder: CLIPTextModel,
	tokenizer: CLIPTokenizer,
	unet: UNet2DConditionModel,
	scheduler,
	device: str = "cuda",
	t5_loader: Optional[LazyT5Encoder] = None,
	lyra_loader: Optional[LazyLyraModel] = None,
	):
	self.vae = vae
	self.text_encoder = text_encoder
	self.tokenizer = tokenizer
	self.unet = unet
	self.scheduler = scheduler
	self.device = device

	self.t5_loader = t5_loader
	self.lyra_loader = lyra_loader

	self.vae_scale_factor = 0.18215
	self.arch = ARCH_SD15
	self.is_lune_model = False

	@property
	def t5_encoder(self):
	return self.t5_loader.encoder if self.t5_loader else None

	@property
	def t5_tokenizer(self):
	return self.t5_loader.tokenizer if self.t5_loader else None

	@property
	def lyra_model(self):
	return self.lyra_loader.model if self.lyra_loader else None

	@property
	def lyra_available(self) -> bool:
	return self.t5_loader is not None and self.lyra_loader is not None

	def encode_prompt(self, prompt: str, negative_prompt: str = ""):
	"""Encode text prompts to embeddings."""
	text_inputs = self.tokenizer(
	prompt,
	padding="max_length",
	max_length=self.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	text_input_ids = text_inputs.input_ids.to(self.device)

	with torch.no_grad():
	prompt_embeds = self.text_encoder(text_input_ids)[0]

	if negative_prompt:
	uncond_inputs = self.tokenizer(
	negative_prompt,
	padding="max_length",
	max_length=self.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	uncond_input_ids = uncond_inputs.input_ids.to(self.device)

	with torch.no_grad():
	negative_prompt_embeds = self.text_encoder(uncond_input_ids)[0]
	else:
	negative_prompt_embeds = torch.zeros_like(prompt_embeds)

	return prompt_embeds, negative_prompt_embeds

	def encode_prompt_lyra(self, prompt: str, negative_prompt: str = ""):
	"""Encode using Lyra VAE v1 (CLIP + T5 fusion).

	Uses sequence lengths from Lyra config for proper tokenization.
	"""
	if not self.lyra_available:
	raise ValueError("Lyra VAE components not configured")

	# Get sequence length from config (v1 uses same length for clip and t5)
	# Default to 77 for SD1.5/v1
	t5_max_length = self.lyra_loader.config.get('seq_len', 77)

	print(f"[Lyra v1] Using sequence length: {t5_max_length}")

	t5_encoder = self.t5_encoder
	t5_tokenizer = self.t5_tokenizer
	lyra_model = self.lyra_model

	# CLIP
	text_inputs = self.tokenizer(
	prompt,
	padding="max_length",
	max_length=self.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	text_input_ids = text_inputs.input_ids.to(self.device)

	with torch.no_grad():
	clip_embeds = self.text_encoder(text_input_ids)[0]

	# T5 with config-specified max_length
	t5_inputs = t5_tokenizer(
	prompt,
	max_length=t5_max_length,
	padding='max_length',
	truncation=True,
	return_tensors='pt'
	).to(self.device)

	with torch.no_grad():
	t5_embeds = t5_encoder(**t5_inputs).last_hidden_state

	# Fuse
	modality_inputs = {'clip': clip_embeds, 't5': t5_embeds}

	with torch.no_grad():
	reconstructions, mu, logvar = lyra_model(
	modality_inputs,
	target_modalities=['clip']
	)
	prompt_embeds = reconstructions['clip']

	# Negative
	if negative_prompt:
	uncond_inputs = self.tokenizer(
	negative_prompt,
	padding="max_length",
	max_length=self.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	uncond_input_ids = uncond_inputs.input_ids.to(self.device)

	with torch.no_grad():
	clip_embeds_uncond = self.text_encoder(uncond_input_ids)[0]

	t5_inputs_uncond = t5_tokenizer(
	negative_prompt,
	max_length=t5_max_length,
	padding='max_length',
	truncation=True,
	return_tensors='pt'
	).to(self.device)

	with torch.no_grad():
	t5_embeds_uncond = t5_encoder(**t5_inputs_uncond).last_hidden_state

	modality_inputs_uncond = {'clip': clip_embeds_uncond, 't5': t5_embeds_uncond}

	with torch.no_grad():
	reconstructions_uncond, _, _ = lyra_model(
	modality_inputs_uncond,
	target_modalities=['clip']
	)
	negative_prompt_embeds = reconstructions_uncond['clip']
	else:
	negative_prompt_embeds = torch.zeros_like(prompt_embeds)

	return prompt_embeds, negative_prompt_embeds

	@torch.no_grad()
	def __call__(
	self,
	prompt: str,
	negative_prompt: str = "",
	height: int = 512,
	width: int = 512,
	num_inference_steps: int = 20,
	guidance_scale: float = 7.5,
	shift: float = 2.5,
	use_flow_matching: bool = True,
	prediction_type: str = "epsilon",
	seed: Optional[int] = None,
	use_lyra: bool = False,
	clip_skip: int = 1,
	t5_summary: str = "",
	lyra_strength: float = 1.0,
	progress_callback=None
	):
	"""Generate image."""

	# Create generator with seed for deterministic generation
	if seed is not None:
	seed = int(seed)
	generator = torch.Generator(device=self.device).manual_seed(seed)
	print(f"[SD1.5 Pipeline] Using seed: {seed}")
	else:
	generator = None
	print("[SD1.5 Pipeline] No seed provided, using random")

	if use_lyra and self.lyra_available:
	prompt_embeds, negative_prompt_embeds = self.encode_prompt_lyra(prompt, negative_prompt)
	else:
	prompt_embeds, negative_prompt_embeds = self.encode_prompt(prompt, negative_prompt)

	latent_channels = 4
	latent_height = height // 8
	latent_width = width // 8

	latents = torch.randn(
	(1, latent_channels, latent_height, latent_width),
	generator=generator,
	device=self.device,
	dtype=torch.float32
	)

	self.scheduler.set_timesteps(num_inference_steps, device=self.device)
	timesteps = self.scheduler.timesteps

	if not use_flow_matching:
	latents = latents * self.scheduler.init_noise_sigma

	for i, t in enumerate(timesteps):
	if progress_callback:
	progress_callback(i, num_inference_steps, f"Step {i+1}/{num_inference_steps}")

	latent_model_input = torch.cat([latents] * 2) if guidance_scale > 1.0 else latents

	if use_flow_matching and shift > 0:
	sigma = t.float() / 1000.0
	sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)
	scaling = torch.sqrt(1 + sigma_shifted ** 2)
	latent_model_input = latent_model_input / scaling
	else:
	latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)

	timestep = t.expand(latent_model_input.shape[0])
	text_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) if guidance_scale > 1.0 else prompt_embeds

	noise_pred = self.unet(
	latent_model_input,
	timestep,
	encoder_hidden_states=text_embeds,
	return_dict=False
	)[0]

	if guidance_scale > 1.0:
	noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
	noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)

	if use_flow_matching:
	sigma = t.float() / 1000.0
	sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)

	if prediction_type == "v_prediction":
	v_pred = noise_pred
	alpha_t = torch.sqrt(1 - sigma_shifted ** 2)
	sigma_t = sigma_shifted
	noise_pred = alpha_t * v_pred + sigma_t * latents

	dt = -1.0 / num_inference_steps
	latents = latents + dt * noise_pred
	else:
	# Pass generator for deterministic ancestral/SDE sampling
	latents = self.scheduler.step(
	noise_pred, t, latents, generator=generator, return_dict=False
	)[0]

	latents = latents / self.vae_scale_factor

	if self.is_lune_model:
	latents = latents * 5.52

	with torch.no_grad():
	image = self.vae.decode(latents).sample

	image = (image / 2 + 0.5).clamp(0, 1)
	image = image.cpu().permute(0, 2, 3, 1).float().numpy()
	image = (image * 255).round().astype("uint8")
	image = Image.fromarray(image[0])

	return image


	# ============================================================================
	# MODEL LOADERS
	# ============================================================================

	def load_lune_checkpoint(repo_id: str, filename: str, device: str = "cuda"):
	"""Load Lune checkpoint from .pt file."""
	print(f"📥 Downloading: {repo_id}/{filename}")

	checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
	checkpoint = torch.load(checkpoint_path, map_location="cpu")

	print(f"🏗️ Initializing SD1.5 UNet...")
	unet = UNet2DConditionModel.from_pretrained(
	"runwayml/stable-diffusion-v1-5",
	subfolder="unet",
	torch_dtype=torch.float32
	)

	student_state_dict = checkpoint["student"]
	cleaned_dict = {}
	for key, value in student_state_dict.items():
	if key.startswith("unet."):
	cleaned_dict[key[5:]] = value
	else:
	cleaned_dict[key] = value

	unet.load_state_dict(cleaned_dict, strict=False)

	step = checkpoint.get("gstep", "unknown")
	print(f"✅ Loaded Lune from step {step}")

	return unet.to(device)


	def load_illustrious_xl(
	repo_id: str = "AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious",
	filename: str = "",
	device: str = "cuda"
	) -> Tuple[UNet2DConditionModel, AutoencoderKL, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer, CLIPTokenizer]:
	"""Load Illustrious XL from single safetensors file."""
	from diffusers import StableDiffusionXLPipeline

	# Default checkpoint if none specified
	if not filename or not filename.strip():
	filename = "illustriousXL_v01.safetensors"

	print(f"📥 Loading Illustrious XL: {repo_id}/{filename}")

	checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
	print(f"✓ Downloaded: {checkpoint_path}")

	print("📦 Loading with StableDiffusionXLPipeline.from_single_file()...")
	pipe = StableDiffusionXLPipeline.from_single_file(
	checkpoint_path,
	torch_dtype=torch.float16,
	use_safetensors=True,
	)

	unet = pipe.unet.to(device)
	vae = pipe.vae.to(device)
	text_encoder = pipe.text_encoder.to(device)
	text_encoder_2 = pipe.text_encoder_2.to(device)
	tokenizer = pipe.tokenizer
	tokenizer_2 = pipe.tokenizer_2

	del pipe
	torch.cuda.empty_cache()

	print("✅ Illustrious XL loaded!")
	print(f" UNet params: {sum(p.numel() for p in unet.parameters()):,}")
	print(f" VAE params: {sum(p.numel() for p in vae.parameters()):,}")

	return unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2


	def load_sdxl_base(device: str = "cuda"):
	"""Load standard SDXL base model."""
	print("📥 Loading SDXL Base 1.0...")

	unet = UNet2DConditionModel.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	subfolder="unet",
	torch_dtype=torch.float16
	).to(device)

	# Use fp16-fix VAE to avoid NaN issues with SDXL's original VAE in fp16
	print(" Using madebyollin/sdxl-vae-fp16-fix for stable fp16 decoding...")
	vae = AutoencoderKL.from_pretrained(
	"madebyollin/sdxl-vae-fp16-fix",
	torch_dtype=torch.float16
	).to(device)

	text_encoder = CLIPTextModel.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	subfolder="text_encoder",
	torch_dtype=torch.float16
	).to(device)

	text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	subfolder="text_encoder_2",
	torch_dtype=torch.float16
	).to(device)

	tokenizer = CLIPTokenizer.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	subfolder="tokenizer"
	)

	tokenizer_2 = CLIPTokenizer.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	subfolder="tokenizer_2"
	)

	print("✅ SDXL Base loaded!")

	return unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2


	# ============================================================================
	# PIPELINE INITIALIZATION
	# ============================================================================

	def initialize_pipeline(model_choice: str, device: str = "cuda", checkpoint: str = "", lyra_checkpoint: str = ""):
	"""Initialize the complete pipeline based on model choice.

	Uses lazy loading for T5 and Lyra - they won't be downloaded until first use.

	Args:
	model_choice: Model selection from dropdown
	device: Target device
	checkpoint: Optional custom checkpoint filename (e.g., "my_model.safetensors")
	lyra_checkpoint: Optional custom Lyra VAE checkpoint filename
	"""

	print(f"🚀 Initializing {model_choice} pipeline...")
	if checkpoint:
	print(f" Custom model checkpoint: {checkpoint}")
	if lyra_checkpoint:
	print(f" Custom Lyra checkpoint: {lyra_checkpoint}")

	is_sdxl = "Illustrious" in model_choice or "SDXL" in model_choice
	is_lune = "Lune" in model_choice

	if is_sdxl:
	# SDXL-based models
	if "Illustrious" in model_choice:
	unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2 = load_illustrious_xl(
	device=device,
	filename=checkpoint
	)
	else:
	unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2 = load_sdxl_base(device=device)

	# Create LAZY loaders for T5 and Lyra (no download yet!)
	print("📋 Configuring lazy loaders for T5-XL and Lyra VAE (will download on first use)")
	t5_loader = LazyT5Encoder(
	model_name=T5_XL_MODEL, # google/flan-t5-xl
	device=device,
	dtype=torch.float16
	)
	lyra_loader = LazyLyraModel(
	repo_id=LYRA_ILLUSTRIOUS_REPO,
	device=device,
	checkpoint=lyra_checkpoint if lyra_checkpoint and lyra_checkpoint.strip() else None
	)

	# Default scheduler: Euler Ancestral
	scheduler = get_scheduler(SCHEDULER_EULER_A, is_sdxl=True)

	pipeline = SDXLFlowMatchingPipeline(
	vae=vae,
	text_encoder=text_encoder,
	text_encoder_2=text_encoder_2,
	tokenizer=tokenizer,
	tokenizer_2=tokenizer_2,
	unet=unet,
	scheduler=scheduler,
	device=device,
	t5_loader=t5_loader,
	lyra_loader=lyra_loader,
	clip_skip=1
	)

	else:
	# SD1.5-based models
	vae = AutoencoderKL.from_pretrained(
	"runwayml/stable-diffusion-v1-5",
	subfolder="vae",
	torch_dtype=torch.float32
	).to(device)

	text_encoder = CLIPTextModel.from_pretrained(
	"openai/clip-vit-large-patch14",
	torch_dtype=torch.float32
	).to(device)

	tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")

	# Lazy loaders for SD1.5 Lyra (T5-base)
	print("📋 Configuring lazy loaders for T5-base and Lyra VAE v1 (will download on first use)")
	t5_loader = LazyT5Encoder(
	model_name=T5_BASE_MODEL, # google/flan-t5-base
	device=device,
	dtype=torch.float32
	)
	lyra_loader = LazyLyraModel(
	repo_id=LYRA_SD15_REPO,
	device=device,
	checkpoint=lyra_checkpoint if lyra_checkpoint and lyra_checkpoint.strip() else None
	)

	# Load UNet
	if is_lune:
	repo_id = "AbstractPhil/sd15-flow-lune"
	# Use custom checkpoint or default
	if checkpoint and checkpoint.strip():
	filename = checkpoint
	else:
	filename = "sd15_flow_lune_e34_s34000.pt"
	unet = load_lune_checkpoint(repo_id, filename, device)
	else:
	unet = UNet2DConditionModel.from_pretrained(
	"runwayml/stable-diffusion-v1-5",
	subfolder="unet",
	torch_dtype=torch.float32
	).to(device)

	scheduler = EulerDiscreteScheduler.from_pretrained(
	"runwayml/stable-diffusion-v1-5",
	subfolder="scheduler"
	)

	pipeline = SD15FlowMatchingPipeline(
	vae=vae,
	text_encoder=text_encoder,
	tokenizer=tokenizer,
	unet=unet,
	scheduler=scheduler,
	device=device,
	t5_loader=t5_loader,
	lyra_loader=lyra_loader,
	)

	pipeline.is_lune_model = is_lune

	print("✅ Pipeline initialized! (T5 and Lyra will load on first use)")
	return pipeline


	# ============================================================================
	# GLOBAL STATE
	# ============================================================================

	CURRENT_PIPELINE = None
	CURRENT_MODEL = None
	CURRENT_CHECKPOINT = None
	CURRENT_LYRA_CHECKPOINT = None


	def get_pipeline(model_choice: str, checkpoint: str = "", lyra_checkpoint: str = ""):
	"""Get or create pipeline for selected model."""
	global CURRENT_PIPELINE, CURRENT_MODEL, CURRENT_CHECKPOINT, CURRENT_LYRA_CHECKPOINT

	# Normalize empty values
	checkpoint = checkpoint.strip() if checkpoint else ""
	lyra_checkpoint = lyra_checkpoint.strip() if lyra_checkpoint else ""

	# Reinitialize if model or any checkpoint changed
	if (CURRENT_PIPELINE is None or
	CURRENT_MODEL != model_choice or
	CURRENT_CHECKPOINT != checkpoint or
	CURRENT_LYRA_CHECKPOINT != lyra_checkpoint):
	CURRENT_PIPELINE = initialize_pipeline(
	model_choice, device="cuda",
	checkpoint=checkpoint,
	lyra_checkpoint=lyra_checkpoint
	)
	CURRENT_MODEL = model_choice
	CURRENT_CHECKPOINT = checkpoint
	CURRENT_LYRA_CHECKPOINT = lyra_checkpoint

	return CURRENT_PIPELINE


	# ============================================================================
	# INFERENCE
	# ============================================================================

	def estimate_duration(num_steps: int, width: int, height: int, use_lyra: bool = False, is_sdxl: bool = False) -> int:
	"""Estimate GPU duration."""
	base_time_per_step = 0.5 if is_sdxl else 0.3
	resolution_factor = (width * height) / (512 * 512)
	estimated = num_steps * base_time_per_step * resolution_factor

	if use_lyra:
	estimated *= 2
	estimated += 10 # Extra time for lazy loading on first use

	return int(estimated + 20)


	@spaces.GPU(duration=lambda *args: estimate_duration(
	args[8], args[10], args[11], args[14],
	"SDXL" in args[3] or "Illustrious" in args[3]
	))
	def generate_image(
	prompt: str,
	t5_summary: str,
	negative_prompt: str,
	model_choice: str,
	checkpoint: str,
	lyra_checkpoint: str,
	scheduler_choice: str,
	clip_skip: int,
	num_steps: int,
	cfg_scale: float,
	width: int,
	height: int,
	shift: float,
	use_flow_matching: bool,
	use_lyra: bool,
	lyra_strength: float,
	use_separator: bool,
	clip_include_summary: bool,
	seed: int,
	randomize_seed: bool,
	progress=gr.Progress()
	):
	"""Generate image with ZeroGPU support.

	Args:
	prompt: Tags/keywords (CLIP input)
	t5_summary: Natural language summary (T5 input, unless clip_include_summary)
	checkpoint: Custom model checkpoint filename (empty for default)
	lyra_checkpoint: Custom Lyra VAE checkpoint filename (empty for default)
	use_separator: Use ¶ separator between tags and summary
	clip_include_summary: If True, CLIP also sees the summary
	"""

	# Ensure seed is an integer (Gradio sliders return floats)
	seed = int(seed)

	if randomize_seed:
	seed = np.random.randint(0, 2**32 - 1)

	print(f"🎲 Using seed: {seed} (randomize={randomize_seed})")

	def progress_callback(step, total, desc):
	progress((step + 1) / total, desc=desc)

	try:
	pipeline = get_pipeline(model_choice, checkpoint, lyra_checkpoint)

	# Update scheduler if needed (SDXL only)
	is_sdxl = "SDXL" in model_choice or "Illustrious" in model_choice
	if is_sdxl and hasattr(pipeline, 'set_scheduler'):
	pipeline.set_scheduler(scheduler_choice)

	prediction_type = "epsilon"
	if not is_sdxl and "Lune" in model_choice:
	prediction_type = "v_prediction"

	if not use_lyra or not pipeline.lyra_available:
	progress(0.05, desc="Generating...")

	image = pipeline(
	prompt=prompt,
	negative_prompt=negative_prompt,
	height=height,
	width=width,
	num_inference_steps=num_steps,
	guidance_scale=cfg_scale,
	shift=shift,
	use_flow_matching=use_flow_matching,
	prediction_type=prediction_type,
	seed=seed,
	use_lyra=False,
	clip_skip=clip_skip,
	progress_callback=progress_callback
	)

	progress(1.0, desc="Complete!")
	return image, None, seed

	else:
	# Side-by-side comparison: SAME seed for both!
	progress(0.05, desc="Generating standard...")

	image_standard = pipeline(
	prompt=prompt,
	negative_prompt=negative_prompt,
	height=height,
	width=width,
	num_inference_steps=num_steps,
	guidance_scale=cfg_scale,
	shift=shift,
	use_flow_matching=use_flow_matching,
	prediction_type=prediction_type,
	seed=seed, # Same seed
	use_lyra=False,
	clip_skip=clip_skip,
	progress_callback=lambda s, t, d: progress(0.05 + (s/t) * 0.45, desc=d)
	)

	progress(0.5, desc="Generating Lyra fusion (loading T5 + Lyra if needed)...")

	image_lyra = pipeline(
	prompt=prompt,
	negative_prompt=negative_prompt,
	height=height,
	width=width,
	num_inference_steps=num_steps,
	guidance_scale=cfg_scale,
	shift=shift,
	use_flow_matching=use_flow_matching,
	prediction_type=prediction_type,
	seed=seed, # Same seed for deterministic comparison
	use_lyra=True,
	clip_skip=clip_skip,
	t5_summary=t5_summary,
	lyra_strength=lyra_strength,
	use_separator=use_separator,
	clip_include_summary=clip_include_summary,
	progress_callback=lambda s, t, d: progress(0.5 + (s/t) * 0.45, desc=d)
	)

	progress(1.0, desc="Complete!")
	return image_standard, image_lyra, seed

	except Exception as e:
	print(f"❌ Generation failed: {e}")
	import traceback
	traceback.print_exc()
	raise e


	# ============================================================================
	# GRADIO UI
	# ============================================================================

	def create_demo():
	"""Create Gradio interface."""

	with gr.Blocks() as demo:
	gr.Markdown("""
	# 🌙 Lyra/Lune Flow-Matching Image Generation

	Geometric crystalline diffusion by [AbstractPhil](https://huggingface.co/AbstractPhil)

	Generate images using SD1.5 and SDXL-based models with geometric deep learning:

	\| Model \| Architecture \| Lyra Version \| Best For \|
	\|-------\|-------------\|--------------\|----------\|
	\| Illustrious XL \| SDXL \| v2 (T5-XL) \| Anime/illustration, high detail \|
	\| SDXL Base \| SDXL \| v2 (T5-XL) \| Photorealistic, general purpose \|
	\| Flow-Lune \| SD1.5 \| v1 (T5-base) \| Fast flow matching (15-25 steps) \|
	\| SD1.5 Base \| SD1.5 \| v1 (T5-base) \| Baseline comparison \|

	Lazy Loading: T5 and Lyra VAE are only downloaded when you enable Lyra fusion!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	prompt = gr.TextArea(
	label="Prompt (Tags for CLIP)",
	value="masterpiece, best quality, 1girl, blue hair, school uniform, cherry blossoms, detailed background",
	lines=3,
	info="CLIP encoders see these tags. T5 also sees these + the summary below."
	)

	t5_summary = gr.TextArea(
	label="T5 Summary (Natural Language - T5 Only)",
	value="A beautiful anime girl with flowing blue hair wearing a school uniform, surrounded by delicate pink cherry blossoms against a bright sky",
	lines=2,
	info="T5 sees: tags ¶ summary. CLIP sees: tags only (unless 'Include Summary in CLIP' is enabled)."
	)

	negative_prompt = gr.TextArea(
	label="Negative Prompt",
	value="lowres, bad anatomy, bad hands, text, error, cropped, worst quality, low quality",
	lines=2
	)

	model_choice = gr.Dropdown(
	label="Model",
	choices=[
	"Illustrious XL",
	"SDXL Base",
	"Flow-Lune (SD1.5)",
	"SD1.5 Base"
	],
	value="Illustrious XL"
	)

	with gr.Accordion("Advanced Options", open=False):
	checkpoint = gr.Textbox(
	label="Model Checkpoint (optional)",
	value="",
	placeholder="e.g., illustriousXL_v01.safetensors",
	info="Leave empty for default. Illustrious: .safetensors, Lune: .pt"
	)

	lyra_checkpoint = gr.Textbox(
	label="Lyra VAE Checkpoint (optional)",
	value="weights/lyra_illustrious_step_12000.safetensors",
	placeholder="e.g., lyra_e100_s50000.safetensors",
	info="Leave empty for latest. Loaded from weights/ folder in Lyra repo."
	)

	scheduler_choice = gr.Dropdown(
	label="Scheduler (SDXL only)",
	choices=SCHEDULER_CHOICES,
	value=SCHEDULER_EULER_A,
	info="Euler Ancestral recommended for Illustrious"
	)

	clip_skip = gr.Slider(
	label="CLIP Skip",
	minimum=1,
	maximum=4,
	value=2,
	step=1,
	info="2 recommended for Illustrious, 1 for others"
	)

	use_lyra = gr.Checkbox(
	label="Enable Lyra VAE (CLIP+T5 Fusion)",
	value=True, # DEFAULT: ON
	info="Enables lazy loading of T5 and Lyra on first use"
	)

	lyra_strength = gr.Slider(
	label="Lyra Blend Strength",
	minimum=0.0,
	maximum=3.0,
	value=1.0,
	step=0.05,
	info="0.0 = pure CLIP, 1.0 = pure Lyra reconstruction"
	)

	with gr.Accordion("Lyra Advanced Settings", open=False):
	use_separator = gr.Checkbox(
	label="Use ¶ Separator",
	value=True,
	info="Insert ¶ between tags and summary in T5 input"
	)

	clip_include_summary = gr.Checkbox(
	label="Include Summary in CLIP",
	value=False,
	info="By default CLIP sees tags only. Enable to append summary to CLIP input."
	)

	with gr.Accordion("Generation Settings", open=True):
	num_steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=50,
	value=25,
	step=1
	)

	cfg_scale = gr.Slider(
	label="CFG Scale",
	minimum=1.0,
	maximum=20.0,
	value=7.0,
	step=0.5
	)

	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=512,
	maximum=1536,
	value=1024,
	step=64
	)
	height = gr.Slider(
	label="Height",
	minimum=512,
	maximum=1536,
	value=1024,
	step=64
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=2**32 - 1,
	value=42, # DEFAULT: 42
	step=1
	)

	randomize_seed = gr.Checkbox(
	label="Randomize Seed",
	value=False # DEFAULT: OFF for reproducibility
	)

	with gr.Accordion("Advanced (Flow Matching)", open=False):
	use_flow_matching = gr.Checkbox(
	label="Enable Flow Matching",
	value=False,
	info="Use flow matching ODE (for Lune only)"
	)

	shift = gr.Slider(
	label="Shift",
	minimum=0.0,
	maximum=5.0,
	value=0.0,
	step=0.1,
	info="Flow matching shift (0=disabled)"
	)

	generate_btn = gr.Button("🎨 Generate", variant="primary", size="lg")

	with gr.Column(scale=1):
	with gr.Row():
	output_image_standard = gr.Image(
	label="Standard",
	type="pil"
	)
	output_image_lyra = gr.Image(
	label="Lyra Fusion 🎵",
	type="pil",
	visible=True # Visible by default since Lyra is on
	)

	output_seed = gr.Number(label="Seed Used", precision=0)

	gr.Markdown("""
	### Tips
	- Lazy Loading: T5-XL (~3GB) and Lyra VAE only download when you enable Lyra
	- Illustrious XL: Use CLIP skip 2, Euler Ancestral scheduler
	- Schedulers: DPM++ 2M SDE for detail, Euler A for speed
	- Lyra v2: Uses `google/flan-t5-xl` for richer semantics
	- Same Seed: Both Standard and Lyra use the same seed for fair comparison
	""")

	# Event handlers
	def on_model_change(model_name):
	"""Update defaults based on model."""
	if "Illustrious" in model_name:
	return {
	clip_skip: gr.update(value=2),
	width: gr.update(value=1024),
	height: gr.update(value=1024),
	num_steps: gr.update(value=25),
	use_flow_matching: gr.update(value=False),
	shift: gr.update(value=0.0),
	scheduler_choice: gr.update(visible=True, value=SCHEDULER_EULER_A)
	}
	elif "SDXL" in model_name:
	return {
	clip_skip: gr.update(value=1),
	width: gr.update(value=1024),
	height: gr.update(value=1024),
	num_steps: gr.update(value=30),
	use_flow_matching: gr.update(value=False),
	shift: gr.update(value=0.0),
	scheduler_choice: gr.update(visible=True, value=SCHEDULER_EULER_A)
	}
	elif "Lune" in model_name:
	return {
	clip_skip: gr.update(value=1),
	width: gr.update(value=512),
	height: gr.update(value=512),
	num_steps: gr.update(value=20),
	use_flow_matching: gr.update(value=True),
	shift: gr.update(value=2.5),
	scheduler_choice: gr.update(visible=False)
	}
	else: # SD1.5 Base
	return {
	clip_skip: gr.update(value=1),
	width: gr.update(value=512),
	height: gr.update(value=512),
	num_steps: gr.update(value=30),
	use_flow_matching: gr.update(value=False),
	shift: gr.update(value=0.0),
	scheduler_choice: gr.update(visible=False)
	}

	def on_lyra_toggle(enabled):
	"""Show/hide Lyra comparison."""
	if enabled:
	return {
	output_image_standard: gr.update(visible=True, label="Standard"),
	output_image_lyra: gr.update(visible=True, label="Lyra Fusion 🎵")
	}
	else:
	return {
	output_image_standard: gr.update(visible=True, label="Generated Image"),
	output_image_lyra: gr.update(visible=False)
	}

	model_choice.change(
	fn=on_model_change,
	inputs=[model_choice],
	outputs=[clip_skip, width, height, num_steps, use_flow_matching, shift, scheduler_choice]
	)

	use_lyra.change(
	fn=on_lyra_toggle,
	inputs=[use_lyra],
	outputs=[output_image_standard, output_image_lyra]
	)

	generate_btn.click(
	fn=generate_image,
	inputs=[
	prompt, t5_summary, negative_prompt, model_choice, checkpoint, lyra_checkpoint,
	scheduler_choice, clip_skip,
	num_steps, cfg_scale, width, height, shift,
	use_flow_matching, use_lyra, lyra_strength, use_separator, clip_include_summary,
	seed, randomize_seed
	],
	outputs=[output_image_standard, output_image_lyra, output_seed]
	)

	return demo


	# ============================================================================
	# LAUNCH
	# ============================================================================

	if __name__ == "__main__":
	demo = create_demo()
	demo.queue(max_size=20)
	demo.launch()