zhwang4ai's picture
Update score.json
5d301c2 verified
{
"llama3-llava-next-8b-hf-2412": {
"VQA": 0.4186046511627907,
"QA": 0.1896551724137931,
"Reason": 0.23076923076923078,
"VQA_Reasoning": 0.25
},
"gpt-4o-2412": {
"QA": 0.9655172413793104,
"Reason": 0.7692307692307693,
"VQA_Reasoning": 0.85,
"VQA": 0.7674418604651163
},
"gpt-4o-mini-2412": {
"QA": 0.7586206896551724,
"Reason": 0.5384615384615384,
"VQA_Reasoning": 0.6,
"VQA": 0.627906976744186
},
"fuyu-8b-2412": {
"Reason": 0.0,
"QA": 0.017241379310344827
},
"llava-1.5-13b-hf-2412": {
"Reason": 0.0,
"VQA": 0.3023255813953488,
"QA": 0.0,
"VQA_Reasoning": 0.65
},
"llava-1.5-7b-hf-2412": {
"VQA_Reasoning": 0.45,
"VQA": 0.32558139534883723,
"Reason": 0.0,
"QA": 0.0
},
"llava-v1.6-mistral-7b-hf-2412": {
"VQA": 0.37209302325581395,
"VQA_Reasoning": 0.55,
"Reason": 0.15384615384615385,
"QA": 0.15517241379310345
},
"llava-v1.6-vicuna-13b-hf-2412": {
"VQA_Reasoning": 0.35,
"VQA": 0.4186046511627907,
"QA": 0.1724137931034483,
"Reason": 0.15384615384615385
},
"llava-v1.6-vicuna-7b-hf-2412": {
"Reason": 0.15384615384615385,
"VQA": 0.23255813953488372,
"VQA_Reasoning": 0.2,
"QA": 0.06896551724137931
},
"MiniCPM-V-2_6-2412": {
"Reason": 0.15384615384615385,
"VQA": 0.5116279069767442,
"VQA_Reasoning": 0.4,
"QA": 0.15517241379310345
},
"llava-gemma-2b-2412": {
"Reason": 0.0,
"VQA": 0.3023255813953488,
"VQA_Reasoning": 0.2,
"QA": 0.034482758620689655
},
"molmo-7b-d-0924-2412": {
"VQA": 0.5813953488372093,
"VQA_Reasoning": 0.15,
"QA": 0.1206896551724138,
"Reason": 0.15384615384615385,
"Gui_Grounding": {
"100": 0.65,
"20": 0.19,
"50": 0.46,
"200": 0.84
},
"Embodied_Grounding": {
"100": 0.30916030534351147,
"20": 0.03435114503816794,
"50": 0.16412213740458015,
"200": 0.5
}
},
"molmo-72b-0924-2412": {
"Reason": 0.46153846153846156,
"VQA_Reasoning": 0.55,
"QA": 0.3620689655172414,
"VQA": 0.5581395348837209
},
"mc-sft-llava_next_8b-mcqa_v3_12_25_277k-2411": {
"Reason": 0.23076923076923078,
"VQA_Reasoning": 0.25,
"QA": 0.6724137931034483,
"VQA": 0.11627906976744186
},
"qwen2-vl-7b-instruct-2412": {
"Gui_Grounding": {
"100": 0.41,
"20": 0.08,
"200": 0.76,
"50": 0.21
},
"Embodied_Grounding": {
"100": 0.40458015267175573,
"20": 0.13740458015267176,
"200": 0.5572519083969466,
"50": 0.25190839694656486
},
"QA": 0.06896551724137931,
"VQA": 0.4186046511627907,
"VQA_Reasoning": 0.375,
"Reason": 0.15384615384615385
},
"llama-3.2-11b-vision-instruct-2412": {
"VQA": 0.4418604651162791,
"VQA_Reasoning": 0.25,
"Reason": 0.23076923076923078,
"QA": 0.20689655172413793
},
"qwen2-vl-72b-instruct-2412": {
"Gui_Grounding": {
"100": 0.0,
"20": 0.0,
"200": 0.0,
"50": 0.0
},
"Embodied_Grounding": {
"100": 0.35877862595419846,
"20": 0.09541984732824428,
"200": 0.5038167938931297,
"50": 0.22137404580152673
}
},
"mc-sft-llava_next_8b-mcqa_v3_12_25_277k-2412": {
"QA": 0.6724137931034483,
"Reason": 0.38461538461538464,
"VQA": 0.20930232558139536,
"VQA_Reasoning": 0.45
},
"mc-vsft-llama3_llava_next_8b-mcvqa_v4_11_21_80k-2412": {
"QA": 0.603448275862069,
"Reason": 0.3076923076923077,
"VQA": 0.5581395348837209,
"VQA_Reasoning": 0.55
},
"mc-vsft-llava_v1.6_vicuna_13b-mcvqa_v4_11_21_80k-2412": {
"QA": 0.6206896551724138,
"Reason": 0.5384615384615384,
"VQA": 0.7441860465116279,
"VQA_Reasoning": 0.7
},
"mc-sft-qwen2_vl_7b-mcqa_v3_12_25_277k-2412": {
"QA": 0.6551724137931034,
"VQA_Reasoning": 0.45,
"Reason": 0.38461538461538464,
"VQA": 0.46511627906976744
},
"mc-vsft-qwen2_vl_7b-2412": {
"VQA": 0.6511627906976745,
"Reason": 0.23076923076923078,
"QA": 0.6206896551724138,
"VQA_Reasoning": 0.75
},
"mc-vsft-qwen2_vl_2b-2412": {
"Reason": 0.15384615384615385,
"VQA_Reasoning": 0.7,
"VQA": 0.5813953488372093,
"QA": 0.1724137931034483
},
"mc-point-qwen2_vl_7b-2502": {
"Gui_Grounding": {
"100": 0.69,
"20": 0.4,
"200": 0.95,
"50": 0.51
},
"Embodied_Grounding": {
"100": 0.6603053435114504,
"20": 0.22137404580152673,
"200": 0.7862595419847328,
"50": 0.4732824427480916
},
"Reason": 0.38461538461538464,
"QA": 0.6724137931034483,
"VQA": 0.5116279069767442,
"VQA_Reasoning": 0.475
},
"mc-bbox-qwen2_vl_7b-2502": {
"Gui_Grounding": {
"100": 0.57,
"20": 0.38,
"200": 0.74,
"50": 0.47
},
"Embodied_Grounding": {
"100": 0.0916030534351145,
"20": 0.015267175572519083,
"200": 0.15648854961832062,
"50": 0.03435114503816794
}
},
"mc-base-qwen2_vl_7b-2502": {
"Gui_Grounding": {
"100": 0.74,
"20": 0.38,
"200": 0.93,
"50": 0.56
},
"Embodied_Grounding": {
"100": 0.648854961832061,
"20": 0.2099236641221374,
"200": 0.7786259541984732,
"50": 0.4618320610687023
},
"Reason": 0.38461538461538464,
"QA": 0.6982758620689655,
"VQA": 0.5348837209302325,
"VQA_Reasoning": 0.55
},
"mc-base-qwen2_vl_7b-v2-ckpt2400-2502": {
"Gui_Grounding": {
"100": 0.92,
"20": 0.79,
"200": 0.99,
"50": 0.88
},
"Embodied_Grounding": {
"100": 0.6068702290076335,
"20": 0.2099236641221374,
"200": 0.7824427480916031,
"50": 0.41603053435114506
},
"QA": 0.7068965517241379,
"VQA": 0.7674418604651163,
"VQA_Reasoning": 0.55,
"Reason": 0.46153846153846156
},
"mc-base-qwen2_vl_7b-v2-ckpt3200-2502": {
"Gui_Grounding": {
"100": 0.93,
"20": 0.84,
"200": 0.99,
"50": 0.9
},
"Embodied_Grounding": {
"100": 0.6259541984732825,
"20": 0.24427480916030533,
"200": 0.7748091603053435,
"50": 0.46946564885496184
},
"QA": 0.6810344827586207,
"VQA": 0.6976744186046512,
"VQA_Reasoning": 0.55,
"Reason": 0.38461538461538464
},
"mc-base-qwen2_vl_2b-v2-ckpt1600-2502": {
"Gui_Grounding": {
"100": 0.54,
"20": 0.32,
"200": 0.93,
"50": 0.38
},
"Embodied_Grounding": {
"100": 0.5038167938931297,
"20": 0.08015267175572519,
"200": 0.7175572519083969,
"50": 0.2748091603053435
},
"QA": 0.3103448275862069,
"VQA": 0.5116279069767442,
"VQA_Reasoning": 0.4,
"Reason": 0.38461538461538464
},
"mc-base-qwen2_vl_2b-v2-ckpt4000-2502": {
"Gui_Grounding": {
"100": 0.59,
"20": 0.39,
"200": 0.94,
"50": 0.45
},
"Embodied_Grounding": {
"100": 0.5305343511450382,
"20": 0.16793893129770993,
"200": 0.7557251908396947,
"50": 0.3473282442748092
}
},
"mc-base-llava_next_vicuna_13b-ckpt4000-2502": {
"Gui_Grounding": {
"100": 0.74,
"20": 0.6,
"200": 0.99,
"50": 0.68
},
"Embodied_Grounding": {
"100": 0.6259541984732825,
"20": 0.15267175572519084,
"200": 0.7748091603053435,
"50": 0.4122137404580153
},
"QA": 0.603448275862069,
"VQA": 0.4883720930232558,
"VQA_Reasoning": 0.4,
"Reason": 0.46153846153846156
},
"mc-reason-qwen2_vl_7b-ckpt2400-2502": {
"Gui_Grounding": {
"100": 0.0,
"20": 0.0,
"200": 0.0,
"50": 0.0
},
"Embodied_Grounding": {
"100": 0.0,
"20": 0.0,
"200": 0.003816793893129771,
"50": 0.0
},
"QA": 0.6551724137931034,
"VQA": 0.6046511627906976,
"VQA_Reasoning": 0.325,
"Reason": 0.5229885057471264
}
}