{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "396e62df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/Qwen-Image-Edit-Angles\n"
     ]
    }
   ],
   "source": [
    "%cd /home/ubuntu/Qwen-Image-Edit-Angles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "08516c94",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.4\n",
      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n",
      "Skipping import of cpp extensions due to incompatible torch version 2.9.1+cu128 for torchao version 0.14.1             Please see https://github.com/pytorch/ao/issues/2919 for more info\n",
      "TMA benchmarks will be running without grid constant TMA descriptor.\n",
      "2025-11-13 14:36:44.418437: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2025-11-13 14:36:44.432587: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
      "E0000 00:00:1763044604.449633  918286 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "E0000 00:00:1763044604.455190  918286 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "W0000 00:00:1763044604.468352  918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1763044604.468368  918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1763044604.468370  918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1763044604.468372  918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "2025-11-13 14:36:44.472502: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 83173.17it/s]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "from qwenimage.debug import clear_cuda_memory, print_gpu_memory\n",
    "from qwenimage.experiment import ExperimentConfig\n",
    "from qwenimage.experiments.experiments_qwen import PipeInputs, Qwen_AoT, QwenBaseExperiment, ExperimentRegistry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d65e5223",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "720 input combinations\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 14820.86it/s]\n",
      "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|██████████| 4/4 [00:15<00:00,  3.92s/it]\n",
      "Loading pipeline components...:  33%|███▎      | 2/6 [00:00<00:01,  3.48it/s]`torch_dtype` is deprecated! Use `dtype` instead!\n",
      "Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 31.97it/s]\n",
      "Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  6.24it/s]\n",
      "Expected types for transformer: (<class 'diffusers.models.transformers.transformer_qwenimage.QwenImageTransformer2DModel'>,), got <class 'qwenimage.models.transformer_qwenimage.QwenImageTransformer2DModel'>.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time taken by QwenBaseExperiment.load: 21.346381488998304 seconds\n"
     ]
    }
   ],
   "source": [
    "name = \"qwen_base\"\n",
    "\n",
    "experiment = ExperimentRegistry.get(name)(\n",
    "    config=ExperimentConfig(\n",
    "        name=name,\n",
    "    ), \n",
    ")\n",
    "experiment.load()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d9c71b17",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "QwenImageTransformer2DModel(\n",
       "  (pos_embed): QwenEmbedRope()\n",
       "  (time_text_embed): QwenTimestepProjEmbeddings(\n",
       "    (time_proj): Timesteps()\n",
       "    (timestep_embedder): TimestepEmbedding(\n",
       "      (linear_1): Linear(in_features=256, out_features=3072, bias=True)\n",
       "      (act): SiLU()\n",
       "      (linear_2): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "    )\n",
       "  )\n",
       "  (txt_norm): RMSNorm()\n",
       "  (img_in): Linear(in_features=64, out_features=3072, bias=True)\n",
       "  (txt_in): Linear(in_features=3584, out_features=3072, bias=True)\n",
       "  (transformer_blocks): ModuleList(\n",
       "    (0-59): 60 x QwenImageTransformerBlock(\n",
       "      (img_mod): Sequential(\n",
       "        (0): SiLU()\n",
       "        (1): Linear(in_features=3072, out_features=18432, bias=True)\n",
       "      )\n",
       "      (img_norm1): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n",
       "      (attn): Attention(\n",
       "        (norm_q): RMSNorm()\n",
       "        (norm_k): RMSNorm()\n",
       "        (to_q): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (to_k): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (to_v): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (add_k_proj): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (add_v_proj): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (add_q_proj): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (to_out): ModuleList(\n",
       "          (0): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "          (1): Dropout(p=0.0, inplace=False)\n",
       "        )\n",
       "        (to_add_out): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "        (norm_added_q): RMSNorm()\n",
       "        (norm_added_k): RMSNorm()\n",
       "      )\n",
       "      (img_norm2): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n",
       "      (img_mlp): FeedForward(\n",
       "        (net): ModuleList(\n",
       "          (0): GELU(\n",
       "            (proj): Linear(in_features=3072, out_features=12288, bias=True)\n",
       "          )\n",
       "          (1): Dropout(p=0.0, inplace=False)\n",
       "          (2): Linear(in_features=12288, out_features=3072, bias=True)\n",
       "        )\n",
       "      )\n",
       "      (txt_mod): Sequential(\n",
       "        (0): SiLU()\n",
       "        (1): Linear(in_features=3072, out_features=18432, bias=True)\n",
       "      )\n",
       "      (txt_norm1): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n",
       "      (txt_norm2): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n",
       "      (txt_mlp): FeedForward(\n",
       "        (net): ModuleList(\n",
       "          (0): GELU(\n",
       "            (proj): Linear(in_features=3072, out_features=12288, bias=True)\n",
       "          )\n",
       "          (1): Dropout(p=0.0, inplace=False)\n",
       "          (2): Linear(in_features=12288, out_features=3072, bias=True)\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "  )\n",
       "  (norm_out): AdaLayerNormContinuous(\n",
       "    (silu): SiLU()\n",
       "    (linear): Linear(in_features=3072, out_features=6144, bias=True)\n",
       "    (norm): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n",
       "  )\n",
       "  (proj_out): Linear(in_features=3072, out_features=64, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "95cc14bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment.pipe.transformer.fuse_qkv_projections()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "dc4e25ac",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<qwenimage.models.transformer_qwenimage.QwenDoubleStreamAttnProcessor2_0 at 0x7e99225db4f0>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.processor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "22f4252e",
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.processor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "58e2e14a",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'QwenDoubleStreamAttnProcessor2_0' object has no attribute 'fuse_projections'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_918286/4092651979.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mexperiment\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_blocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfuse_projections\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m: 'QwenDoubleStreamAttnProcessor2_0' object has no attribute 'fuse_projections'"
     ]
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.processor.fuse_projections()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bf0a2e1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.fuse_projections()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b4fad048",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Attention(\n",
       "  (norm_q): RMSNorm()\n",
       "  (norm_k): RMSNorm()\n",
       "  (to_q): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (to_k): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (to_v): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (add_k_proj): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (add_v_proj): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (add_q_proj): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (to_out): ModuleList(\n",
       "    (0): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "    (1): Dropout(p=0.0, inplace=False)\n",
       "  )\n",
       "  (to_add_out): Linear(in_features=3072, out_features=3072, bias=True)\n",
       "  (norm_added_q): RMSNorm()\n",
       "  (norm_added_k): RMSNorm()\n",
       "  (to_qkv): Linear(in_features=3072, out_features=9216, bias=True)\n",
       "  (to_added_qkv): Linear(in_features=3072, out_features=9216, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "7c856cf8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "tensor([[-0.0354, -0.0508,  0.0098,  ..., -0.0466,  0.0349, -0.0154],\n",
       "        [ 0.0036,  0.1016,  0.0059,  ..., -0.2812,  0.0466,  0.0233],\n",
       "        [ 0.0041,  0.0253, -0.0157,  ..., -0.0137,  0.0294,  0.0137],\n",
       "        ...,\n",
       "        [-0.0354, -0.0393, -0.0237,  ...,  0.0352,  0.0315,  0.0058],\n",
       "        [ 0.0214, -0.0430,  0.0119,  ...,  0.0547,  0.0352, -0.0117],\n",
       "        [-0.0315, -0.0703, -0.0292,  ...,  0.0859, -0.0270, -0.0097]],\n",
       "       device='cuda:0', dtype=torch.bfloat16, requires_grad=True)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.to_qkv.weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "05a84eb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "tensor([[-0.0354, -0.0508,  0.0098,  ..., -0.0466,  0.0349, -0.0154],\n",
       "        [ 0.0036,  0.1016,  0.0059,  ..., -0.2812,  0.0466,  0.0233],\n",
       "        [ 0.0041,  0.0253, -0.0157,  ..., -0.0137,  0.0294,  0.0137],\n",
       "        ...,\n",
       "        [ 0.0258,  0.0508,  0.0137,  ..., -0.0430,  0.0197, -0.0007],\n",
       "        [-0.0349,  0.0058,  0.0195,  ..., -0.0255,  0.0100,  0.0289],\n",
       "        [ 0.0312, -0.0703, -0.0177,  ...,  0.0198, -0.0233, -0.0060]],\n",
       "       device='cuda:0', dtype=torch.bfloat16)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.to_q.weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ea73499c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "tensor([[-0.1089,  0.1406,  0.0310,  ...,  0.0623, -0.1016,  0.0859],\n",
       "        [ 0.0391,  0.0002,  0.0312,  ...,  0.0505,  0.0208, -0.0549],\n",
       "        [ 0.0055, -0.0703, -0.0471,  ..., -0.0171, -0.0874,  0.0625],\n",
       "        ...,\n",
       "        [-0.0386,  0.0703, -0.0116,  ..., -0.0004, -0.0015,  0.0037],\n",
       "        [-0.0869, -0.0229,  0.0586,  ..., -0.0092,  0.1875, -0.0231],\n",
       "        [-0.0182,  0.0432,  0.0019,  ..., -0.0152, -0.1250,  0.0471]],\n",
       "       device='cuda:0', dtype=torch.bfloat16)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.add_q_proj.weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "eb9b5f5a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "tensor([[-1.0889e-01,  1.4062e-01,  3.1006e-02,  ...,  6.2256e-02,\n",
       "         -1.0156e-01,  8.5938e-02],\n",
       "        [ 3.9062e-02,  2.1744e-04,  3.1250e-02,  ...,  5.0537e-02,\n",
       "          2.0752e-02, -5.4932e-02],\n",
       "        [ 5.5237e-03, -7.0312e-02, -4.7119e-02,  ..., -1.7090e-02,\n",
       "         -8.7402e-02,  6.2500e-02],\n",
       "        ...,\n",
       "        [ 5.9509e-03,  3.9062e-02,  1.3550e-02,  ...,  2.0905e-03,\n",
       "          1.3611e-02,  3.8452e-03],\n",
       "        [-3.9062e-02, -7.0312e-02, -3.7384e-03,  ...,  1.8158e-03,\n",
       "          2.1875e-01,  5.4688e-02],\n",
       "        [-8.5938e-02, -1.3611e-02,  3.1128e-02,  ...,  2.5391e-02,\n",
       "         -1.0938e-01,  1.7700e-02]], device='cuda:0', dtype=torch.bfloat16,\n",
       "       requires_grad=True)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.transformer.transformer_blocks[0].attn.to_added_qkv.weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "38ddd904",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "FlowMatchEulerDiscreteScheduler {\n",
       "  \"_class_name\": \"FlowMatchEulerDiscreteScheduler\",\n",
       "  \"_diffusers_version\": \"0.36.0.dev0\",\n",
       "  \"base_image_seq_len\": 256,\n",
       "  \"base_shift\": 0.5,\n",
       "  \"invert_sigmas\": false,\n",
       "  \"max_image_seq_len\": 8192,\n",
       "  \"max_shift\": 0.9,\n",
       "  \"num_train_timesteps\": 1000,\n",
       "  \"shift\": 1.0,\n",
       "  \"shift_terminal\": 0.02,\n",
       "  \"stochastic_sampling\": false,\n",
       "  \"time_shift_type\": \"exponential\",\n",
       "  \"use_beta_sigmas\": false,\n",
       "  \"use_dynamic_shifting\": true,\n",
       "  \"use_exponential_sigmas\": false,\n",
       "  \"use_karras_sigmas\": false\n",
       "}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "experiment.pipe.scheduler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4fed8e99",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2bfe69a3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.4\n",
      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n",
      "2025-11-13 15:17:10.085053: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2025-11-13 15:17:10.099287: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
      "E0000 00:00:1763047030.116296  952543 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "E0000 00:00:1763047030.121798  952543 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "W0000 00:00:1763047030.135130  952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1763047030.135144  952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1763047030.135147  952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1763047030.135148  952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "2025-11-13 15:17:10.139216: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "Skipping import of cpp extensions due to incompatible torch version 2.9.1+cu128 for torchao version 0.14.1             Please see https://github.com/pytorch/ao/issues/2919 for more info\n",
      "TMA benchmarks will be running without grid constant TMA descriptor.\n",
      "Fetching 31 files: 100%|██████████| 31/31 [00:12<00:00,  2.39it/s]\n",
      "Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]`torch_dtype` is deprecated! Use `dtype` instead!\n",
      "Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 34.79it/s]\n",
      "Loading checkpoint shards: 100%|██████████| 5/5 [00:00<00:00, 31.71it/s]it/s]\n",
      "Loading pipeline components...: 100%|██████████| 6/6 [00:01<00:00,  3.52it/s]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import math\n",
    "from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler\n",
    "import torch\n",
    "\n",
    "from qwenimage.models.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline\n",
    "from qwenimage.models.transformer_qwenimage import QwenImageTransformer2DModel\n",
    "\n",
    "# Scheduler configuration for Lightning\n",
    "scheduler_config = {\n",
    "    \"base_image_seq_len\": 256,\n",
    "    \"base_shift\": math.log(3),  # We use shift=3 in distillation\n",
    "    \"invert_sigmas\": False,\n",
    "    \"max_image_seq_len\": 8192,\n",
    "    \"max_shift\": math.log(3),  # We use shift=3 in distillation\n",
    "    \"num_train_timesteps\": 1000,\n",
    "    \"shift\": 1.0,\n",
    "    \"shift_terminal\": None,  # set shift_terminal to None\n",
    "    \"stochastic_sampling\": False,\n",
    "    \"time_shift_type\": \"exponential\",\n",
    "    \"use_beta_sigmas\": False,\n",
    "    \"use_dynamic_shifting\": True,\n",
    "    \"use_exponential_sigmas\": False,\n",
    "    \"use_karras_sigmas\": False,\n",
    "}\n",
    "\n",
    "# Initialize scheduler with Lightning config\n",
    "\n",
    "scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) # TODO: check scheduler sync issue mentioned by https://pytorch.org/blog/presenting-flux-fast-making-flux-go-brrr-on-h100s/\n",
    "\n",
    "dtype = torch.bfloat16\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "\n",
    "\n",
    "pipe = QwenImageEditPlusPipeline.from_pretrained(\n",
    "    \"Qwen/Qwen-Image-Edit-2509\", \n",
    "    transformer=QwenImageTransformer2DModel.from_pretrained( # TODO: remove this if using lightning\n",
    "        \"linoyts/Qwen-Image-Edit-Rapid-AIO\", \n",
    "        subfolder='transformer',\n",
    "        torch_dtype=dtype,\n",
    "        device_map='cuda'),\n",
    "    scheduler=scheduler,\n",
    "    torch_dtype=dtype,\n",
    ").to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2435f81d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "386e8f4c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e81b77e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.7294921875"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "e = torch.rand((1, 253, 3584), dtype=torch.bfloat16)\n",
    "e.numel() * e.element_size() / (1024 ** 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e571d339",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3402c9bb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71672e20",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7b6c550",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55dec4a6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}