| { | |
| "architectures": [ | |
| "PS3Model" | |
| ], | |
| "model_type": "ps3", | |
| "vision_config": { | |
| "architectures": [ | |
| "PS3VisionModel" | |
| ], | |
| "model_type": "ps3_vision_model", | |
| "model_name": "vit_so400m_patch14_siglip_378", | |
| "hidden_size": 1152, | |
| "pool": "map", | |
| "ps3": true, | |
| "ps3_scales": [ | |
| 378, | |
| 756, | |
| 1512, | |
| 3780 | |
| ], | |
| "select_based_on_layer": [ | |
| 0, | |
| 9, | |
| 18, | |
| 26 | |
| ], | |
| "min_select_num": 1, | |
| "max_select_num": 2560, | |
| "separate_pos_emb": true, | |
| "highres_selection_feature": true | |
| }, | |
| "text_config": { | |
| "context_length": 64, | |
| "vocab_size": 256000, | |
| "hf_tokenizer_name": "timm/ViT-SO400M-14-SigLIP2-378", | |
| "tokenizer_kwargs": { | |
| "clean": "canonicalize" | |
| }, | |
| "width": 1152, | |
| "heads": 16, | |
| "layers": 27, | |
| "mlp_ratio": 3.7362, | |
| "no_causal_mask": true, | |
| "proj_bias": true, | |
| "pool_type": "last", | |
| "norm_kwargs": { | |
| "eps": 1e-06 | |
| }, | |
| "act_kwargs": { | |
| "approximate": "tanh" | |
| }, | |
| "architectures": [ | |
| "PS3TextModel" | |
| ], | |
| "model_type": "ps3_text_model", | |
| "output_dim": 1152, | |
| "prompt_proj_dim": 1152 | |
| } | |
| } |