{ "architectures": [ "PS3Model" ], "model_type": "ps3", "vision_config": { "architectures": [ "PS3VisionModel" ], "model_type": "ps3_vision_model", "model_name": "vit_so400m_patch14_siglip_378", "hidden_size": 1152, "pool": "map", "ps3": true, "ps3_scales": [ 378, 756, 1512, 3780 ], "select_based_on_layer": [ 0, 9, 18, 26 ], "min_select_num": 1, "max_select_num": 2560, "separate_pos_emb": true, "highres_selection_feature": true }, "text_config": { "context_length": 64, "vocab_size": 256000, "hf_tokenizer_name": "timm/ViT-SO400M-14-SigLIP2-378", "tokenizer_kwargs": { "clean": "canonicalize" }, "width": 1152, "heads": 16, "layers": 27, "mlp_ratio": 3.7362, "no_causal_mask": true, "proj_bias": true, "pool_type": "last", "norm_kwargs": { "eps": 1e-06 }, "act_kwargs": { "approximate": "tanh" }, "architectures": [ "PS3TextModel" ], "model_type": "ps3_text_model", "output_dim": 1152, "prompt_proj_dim": 1152 } }