{ "adapter_config": { "attention_dropout": 0.0, "dtype": "float16", "float32_attention": true, "head_dim": 72, "hidden_act": "silu", "hidden_size": 1152, "image_feature_dropout": 0.0, "image_padding_embed": null, "initializer_range": 0.02, "intermediate_size": 18944, "model_type": "", "num_attention_heads": 16, "num_key_value_heads": 16, "residual_dropout": 0.0, "text_hidden_size": 3584, "vit_layers": [ -3, -9 ] }, "architectures": [ "MolmoActForActionReasoning" ], "auto_map": { "AutoConfig": "configuration_molmoact.MolmoActConfig", "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" }, "dtype": "float16", "image_patch_id": 152066, "initializer_range": 0.02, "llm_config": { "additional_vocab_size": 128, "attention_dropout": 0.0, "dtype": "float16", "embedding_dropout": 0.0, "head_dim": 128, "hidden_act": "silu", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 18944, "layer_norm_eps": 1e-06, "max_position_embeddings": 4096, "model_type": "molmoact_llm", "norm_after": false, "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "qk_norm_type": "olmo", "qkv_bias": true, "residual_dropout": 0.0, "rope_scaling": null, "rope_theta": 1000000.0, "use_cache": true, "use_qk_norm": false, "vocab_size": 152064 }, "model_type": "molmoact", "n_action_bins": 256, "norm_stats": { "molmoact": { "action": { "max": [ 0.06042003631591797, 0.09417290985584259, 0.07019275426864624, 0.2616892158985138, 0.11751057207584381, 0.16968433558940887, 1.0 ], "mean": [ 0.0005706787342205644, 0.0002448957529850304, -3.5987635783385485e-05, 0.00021597897284664214, -0.0004896928439848125, -0.000241481073317118, 0.5570635199546814 ], "min": [ -0.07434078305959702, -0.07339745759963989, -0.06539416313171387, -0.1688285619020462, -0.10289879888296127, -0.2667275667190552, 0.0 ], "q01": [ -0.01538565568625927, -0.021047022193670273, -0.01688069850206375, -0.044314172118902206, -0.03890235349535942, -0.04788423702120781, 0.0 ], "q99": [ 0.014661382883787155, 0.026515591889619827, 0.021398313343524933, 0.04216696694493294, 0.03401297703385353, 0.04957397282123566, 1.0 ], "std": [ 0.005207270849496126, 0.007506529800593853, 0.006415561307221651, 0.013248044066131115, 0.010928540490567684, 0.014873150736093521, 0.49715080857276917 ] }, "num_entries": 1560068 } }, "quantization_config": { "config_groups": { "group_0": { "format": "pack-quantized", "input_activations": null, "output_activations": null, "targets": [ "Linear" ], "weights": { "actorder": null, "block_structure": null, "dynamic": false, "group_size": 128, "num_bits": 4, "observer": "minmax", "observer_kwargs": {}, "strategy": "group", "symmetric": true, "type": "int" } } }, "format": "pack-quantized", "global_compression_ratio": null, "ignore": [ "model.vision_backbone.image_vit.patch_embedding", "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.23.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.23.feed_forward.w2", "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wq", "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wk", "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wv", "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wo", "model.vision_backbone.image_vit.transformer.resblocks.24.feed_forward.w1", "model.vision_backbone.image_vit.transformer.resblocks.24.feed_forward.w2", "model.vision_backbone.image_pooling_2d.wq", "model.vision_backbone.image_pooling_2d.wk", "model.vision_backbone.image_pooling_2d.wv", "model.vision_backbone.image_pooling_2d.wo", "model.vision_backbone.image_projector.w1", "model.vision_backbone.image_projector.w2", "model.vision_backbone.image_projector.w3", "lm_head" ], "kv_cache_scheme": null, "quant_method": "compressed-tensors", "quantization_status": "compressed", "sparsity_config": {}, "transform_config": {}, "version": "0.12.3.a20251013" }, "tie_word_embeddings": false, "transformers_version": "4.57.1", "use_cache": true, "vit_config": { "attention_dropout": 0.0, "dtype": "float16", "float32_attention": true, "head_dim": 72, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_default_input_size": [ 378, 378 ], "image_num_pos": 729, "image_patch_size": 14, "initializer_range": 0.02, "intermediate_size": 4304, "layer_norm_eps": 1e-06, "model_type": "molmoact_vit", "num_attention_heads": 16, "num_hidden_layers": 27, "num_key_value_heads": 16, "patch_bias": true, "pre_layernorm": false, "residual_dropout": 0.0, "use_cls_token": false } }