{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "396e62df", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/ubuntu/Qwen-Image-Edit-Angles\n" ] } ], "source": [ "%cd /home/ubuntu/Qwen-Image-Edit-Angles" ] }, { "cell_type": "code", "execution_count": 2, "id": "08516c94", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "Skipping import of cpp extensions due to incompatible torch version 2.9.1+cu128 for torchao version 0.14.1 Please see https://github.com/pytorch/ao/issues/2919 for more info\n", "TMA benchmarks will be running without grid constant TMA descriptor.\n", "2025-11-13 14:36:44.418437: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2025-11-13 14:36:44.432587: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1763044604.449633 918286 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1763044604.455190 918286 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "W0000 00:00:1763044604.468352 918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1763044604.468368 918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1763044604.468370 918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1763044604.468372 918286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "2025-11-13 14:36:44.472502: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 83173.17it/s]\n" ] } ], "source": [ "\n", "from qwenimage.debug import clear_cuda_memory, print_gpu_memory\n", "from qwenimage.experiment import ExperimentConfig\n", "from qwenimage.experiments.experiments_qwen import PipeInputs, Qwen_AoT, QwenBaseExperiment, ExperimentRegistry" ] }, { "cell_type": "code", "execution_count": 3, "id": "d65e5223", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "720 input combinations\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 14820.86it/s]\n", "Loading checkpoint shards: 0%| | 0/4 [00:00,), got .\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Time taken by QwenBaseExperiment.load: 21.346381488998304 seconds\n" ] } ], "source": [ "name = \"qwen_base\"\n", "\n", "experiment = ExperimentRegistry.get(name)(\n", " config=ExperimentConfig(\n", " name=name,\n", " ), \n", ")\n", "experiment.load()\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "d9c71b17", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "QwenImageTransformer2DModel(\n", " (pos_embed): QwenEmbedRope()\n", " (time_text_embed): QwenTimestepProjEmbeddings(\n", " (time_proj): Timesteps()\n", " (timestep_embedder): TimestepEmbedding(\n", " (linear_1): Linear(in_features=256, out_features=3072, bias=True)\n", " (act): SiLU()\n", " (linear_2): Linear(in_features=3072, out_features=3072, bias=True)\n", " )\n", " )\n", " (txt_norm): RMSNorm()\n", " (img_in): Linear(in_features=64, out_features=3072, bias=True)\n", " (txt_in): Linear(in_features=3584, out_features=3072, bias=True)\n", " (transformer_blocks): ModuleList(\n", " (0-59): 60 x QwenImageTransformerBlock(\n", " (img_mod): Sequential(\n", " (0): SiLU()\n", " (1): Linear(in_features=3072, out_features=18432, bias=True)\n", " )\n", " (img_norm1): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n", " (attn): Attention(\n", " (norm_q): RMSNorm()\n", " (norm_k): RMSNorm()\n", " (to_q): Linear(in_features=3072, out_features=3072, bias=True)\n", " (to_k): Linear(in_features=3072, out_features=3072, bias=True)\n", " (to_v): Linear(in_features=3072, out_features=3072, bias=True)\n", " (add_k_proj): Linear(in_features=3072, out_features=3072, bias=True)\n", " (add_v_proj): Linear(in_features=3072, out_features=3072, bias=True)\n", " (add_q_proj): Linear(in_features=3072, out_features=3072, bias=True)\n", " (to_out): ModuleList(\n", " (0): Linear(in_features=3072, out_features=3072, bias=True)\n", " (1): Dropout(p=0.0, inplace=False)\n", " )\n", " (to_add_out): Linear(in_features=3072, out_features=3072, bias=True)\n", " (norm_added_q): RMSNorm()\n", " (norm_added_k): RMSNorm()\n", " )\n", " (img_norm2): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n", " (img_mlp): FeedForward(\n", " (net): ModuleList(\n", " (0): GELU(\n", " (proj): Linear(in_features=3072, out_features=12288, bias=True)\n", " )\n", " (1): Dropout(p=0.0, inplace=False)\n", " (2): Linear(in_features=12288, out_features=3072, bias=True)\n", " )\n", " )\n", " (txt_mod): Sequential(\n", " (0): SiLU()\n", " (1): Linear(in_features=3072, out_features=18432, bias=True)\n", " )\n", " (txt_norm1): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n", " (txt_norm2): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n", " (txt_mlp): FeedForward(\n", " (net): ModuleList(\n", " (0): GELU(\n", " (proj): Linear(in_features=3072, out_features=12288, bias=True)\n", " )\n", " (1): Dropout(p=0.0, inplace=False)\n", " (2): Linear(in_features=12288, out_features=3072, bias=True)\n", " )\n", " )\n", " )\n", " )\n", " (norm_out): AdaLayerNormContinuous(\n", " (silu): SiLU()\n", " (linear): Linear(in_features=3072, out_features=6144, bias=True)\n", " (norm): LayerNorm((3072,), eps=1e-06, elementwise_affine=False)\n", " )\n", " (proj_out): Linear(in_features=3072, out_features=64, bias=True)\n", ")" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer" ] }, { "cell_type": "code", "execution_count": 5, "id": "95cc14bd", "metadata": {}, "outputs": [], "source": [ "experiment.pipe.transformer.fuse_qkv_projections()" ] }, { "cell_type": "code", "execution_count": 9, "id": "dc4e25ac", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.processor" ] }, { "cell_type": "code", "execution_count": null, "id": "22f4252e", "metadata": {}, "outputs": [], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.processor" ] }, { "cell_type": "code", "execution_count": 10, "id": "58e2e14a", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'QwenDoubleStreamAttnProcessor2_0' object has no attribute 'fuse_projections'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/tmp/ipykernel_918286/4092651979.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mexperiment\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_blocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfuse_projections\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m: 'QwenDoubleStreamAttnProcessor2_0' object has no attribute 'fuse_projections'" ] } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.processor.fuse_projections()" ] }, { "cell_type": "code", "execution_count": 11, "id": "bf0a2e1e", "metadata": {}, "outputs": [], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.fuse_projections()" ] }, { "cell_type": "code", "execution_count": 12, "id": "b4fad048", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Attention(\n", " (norm_q): RMSNorm()\n", " (norm_k): RMSNorm()\n", " (to_q): Linear(in_features=3072, out_features=3072, bias=True)\n", " (to_k): Linear(in_features=3072, out_features=3072, bias=True)\n", " (to_v): Linear(in_features=3072, out_features=3072, bias=True)\n", " (add_k_proj): Linear(in_features=3072, out_features=3072, bias=True)\n", " (add_v_proj): Linear(in_features=3072, out_features=3072, bias=True)\n", " (add_q_proj): Linear(in_features=3072, out_features=3072, bias=True)\n", " (to_out): ModuleList(\n", " (0): Linear(in_features=3072, out_features=3072, bias=True)\n", " (1): Dropout(p=0.0, inplace=False)\n", " )\n", " (to_add_out): Linear(in_features=3072, out_features=3072, bias=True)\n", " (norm_added_q): RMSNorm()\n", " (norm_added_k): RMSNorm()\n", " (to_qkv): Linear(in_features=3072, out_features=9216, bias=True)\n", " (to_added_qkv): Linear(in_features=3072, out_features=9216, bias=True)\n", ")" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn" ] }, { "cell_type": "code", "execution_count": 18, "id": "7c856cf8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([[-0.0354, -0.0508, 0.0098, ..., -0.0466, 0.0349, -0.0154],\n", " [ 0.0036, 0.1016, 0.0059, ..., -0.2812, 0.0466, 0.0233],\n", " [ 0.0041, 0.0253, -0.0157, ..., -0.0137, 0.0294, 0.0137],\n", " ...,\n", " [-0.0354, -0.0393, -0.0237, ..., 0.0352, 0.0315, 0.0058],\n", " [ 0.0214, -0.0430, 0.0119, ..., 0.0547, 0.0352, -0.0117],\n", " [-0.0315, -0.0703, -0.0292, ..., 0.0859, -0.0270, -0.0097]],\n", " device='cuda:0', dtype=torch.bfloat16, requires_grad=True)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.to_qkv.weight" ] }, { "cell_type": "code", "execution_count": 17, "id": "05a84eb7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([[-0.0354, -0.0508, 0.0098, ..., -0.0466, 0.0349, -0.0154],\n", " [ 0.0036, 0.1016, 0.0059, ..., -0.2812, 0.0466, 0.0233],\n", " [ 0.0041, 0.0253, -0.0157, ..., -0.0137, 0.0294, 0.0137],\n", " ...,\n", " [ 0.0258, 0.0508, 0.0137, ..., -0.0430, 0.0197, -0.0007],\n", " [-0.0349, 0.0058, 0.0195, ..., -0.0255, 0.0100, 0.0289],\n", " [ 0.0312, -0.0703, -0.0177, ..., 0.0198, -0.0233, -0.0060]],\n", " device='cuda:0', dtype=torch.bfloat16)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.to_q.weight" ] }, { "cell_type": "code", "execution_count": 21, "id": "ea73499c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([[-0.1089, 0.1406, 0.0310, ..., 0.0623, -0.1016, 0.0859],\n", " [ 0.0391, 0.0002, 0.0312, ..., 0.0505, 0.0208, -0.0549],\n", " [ 0.0055, -0.0703, -0.0471, ..., -0.0171, -0.0874, 0.0625],\n", " ...,\n", " [-0.0386, 0.0703, -0.0116, ..., -0.0004, -0.0015, 0.0037],\n", " [-0.0869, -0.0229, 0.0586, ..., -0.0092, 0.1875, -0.0231],\n", " [-0.0182, 0.0432, 0.0019, ..., -0.0152, -0.1250, 0.0471]],\n", " device='cuda:0', dtype=torch.bfloat16)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.add_q_proj.weight" ] }, { "cell_type": "code", "execution_count": 22, "id": "eb9b5f5a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([[-1.0889e-01, 1.4062e-01, 3.1006e-02, ..., 6.2256e-02,\n", " -1.0156e-01, 8.5938e-02],\n", " [ 3.9062e-02, 2.1744e-04, 3.1250e-02, ..., 5.0537e-02,\n", " 2.0752e-02, -5.4932e-02],\n", " [ 5.5237e-03, -7.0312e-02, -4.7119e-02, ..., -1.7090e-02,\n", " -8.7402e-02, 6.2500e-02],\n", " ...,\n", " [ 5.9509e-03, 3.9062e-02, 1.3550e-02, ..., 2.0905e-03,\n", " 1.3611e-02, 3.8452e-03],\n", " [-3.9062e-02, -7.0312e-02, -3.7384e-03, ..., 1.8158e-03,\n", " 2.1875e-01, 5.4688e-02],\n", " [-8.5938e-02, -1.3611e-02, 3.1128e-02, ..., 2.5391e-02,\n", " -1.0938e-01, 1.7700e-02]], device='cuda:0', dtype=torch.bfloat16,\n", " requires_grad=True)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.transformer.transformer_blocks[0].attn.to_added_qkv.weight" ] }, { "cell_type": "code", "execution_count": 23, "id": "38ddd904", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "FlowMatchEulerDiscreteScheduler {\n", " \"_class_name\": \"FlowMatchEulerDiscreteScheduler\",\n", " \"_diffusers_version\": \"0.36.0.dev0\",\n", " \"base_image_seq_len\": 256,\n", " \"base_shift\": 0.5,\n", " \"invert_sigmas\": false,\n", " \"max_image_seq_len\": 8192,\n", " \"max_shift\": 0.9,\n", " \"num_train_timesteps\": 1000,\n", " \"shift\": 1.0,\n", " \"shift_terminal\": 0.02,\n", " \"stochastic_sampling\": false,\n", " \"time_shift_type\": \"exponential\",\n", " \"use_beta_sigmas\": false,\n", " \"use_dynamic_shifting\": true,\n", " \"use_exponential_sigmas\": false,\n", " \"use_karras_sigmas\": false\n", "}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "experiment.pipe.scheduler" ] }, { "cell_type": "code", "execution_count": null, "id": "4fed8e99", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "2bfe69a3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.4\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n", "2025-11-13 15:17:10.085053: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2025-11-13 15:17:10.099287: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1763047030.116296 952543 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1763047030.121798 952543 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "W0000 00:00:1763047030.135130 952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1763047030.135144 952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1763047030.135147 952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "W0000 00:00:1763047030.135148 952543 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n", "2025-11-13 15:17:10.139216: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "Skipping import of cpp extensions due to incompatible torch version 2.9.1+cu128 for torchao version 0.14.1 Please see https://github.com/pytorch/ao/issues/2919 for more info\n", "TMA benchmarks will be running without grid constant TMA descriptor.\n", "Fetching 31 files: 100%|██████████| 31/31 [00:12<00:00, 2.39it/s]\n", "Loading pipeline components...: 0%| | 0/6 [00:00