jylins commited on
Commit
7b13ded
·
verified ·
1 Parent(s): c8b7cdd

Upload folder using huggingface_hub

Browse files
checkpoint_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6bb07ec05bd4f4c49eb41d351e0c3b736eeb286e57c619acbbf0d9d4ba2c9aa
3
+ size 2897697355
checkpoint_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d22cc65257192dcf16a1367c3d3e903387a5218d8dc18b14a133543c283105b
3
+ size 2897697355
checkpoint_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df666a589e7e218392670d8db75007ff53015645e836bd8272dc9d84fbd2958a
3
+ size 2897697355
checkpoint_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b2d7f7ac32b75c0e79623f4430ea3f167e27bc5b904baaba703a68a9b940ab
3
+ size 2897697355
checkpoint_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:506125389afa9f0ec9161856146e0f7814c21f7064b3270be38f99a32be83e4c
3
+ size 2897697355
checkpoint_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9d6d562e5ebf4e74e76c6bf816d04d9b397a69ef4741e6c4655f616053a3110
3
+ size 2897697355
log.txt ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run": {
3
+ "task": "video_text_pretrain",
4
+ "lr_sched": "linear_warmup_cosine_lr",
5
+ "init_lr": 0.0001,
6
+ "min_lr": 1e-05,
7
+ "warmup_lr": 1e-06,
8
+ "weight_decay": 0.05,
9
+ "max_epoch": 6,
10
+ "iters_per_epoch": 3102,
11
+ "batch_size_train": 1,
12
+ "batch_size_eval": 4,
13
+ "num_workers": 4,
14
+ "warmup_steps": 3102,
15
+ "accum_grad_iters": 8,
16
+ "seed": 42,
17
+ "output_dir": "ckpt/timechat/train_stage2_llama2_7b_instruct12.4k_charades_bz4_f96_epoch6_ws32_stride32_mfp96_mtl2048_lr1e-4",
18
+ "amp": true,
19
+ "resume_ckpt_path": null,
20
+ "evaluate": false,
21
+ "train_splits": [
22
+ "train"
23
+ ],
24
+ "device": "cuda",
25
+ "world_size": 4,
26
+ "dist_url": "env://",
27
+ "distributed": true,
28
+ "rank": 0,
29
+ "gpu": 0,
30
+ "dist_backend": "nccl"
31
+ },
32
+ "model": {
33
+ "arch": "timechat",
34
+ "image_size": 224,
35
+ "drop_path_rate": 0,
36
+ "use_grad_checkpoint": true,
37
+ "vit_precision": "fp16",
38
+ "freeze_vit": true,
39
+ "freeze_qformer": false,
40
+ "num_query_token": 32,
41
+ "llama_model": "ckpt/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/",
42
+ "prompt": "",
43
+ "model_type": "pretrain_llama_v2",
44
+ "vit_model": "ckpt/eva-vit-g/eva_vit_g.pth",
45
+ "q_former_model": "ckpt/instruct-blip/instruct_blip_vicuna7b_trimmed.pth",
46
+ "ckpt": "ckpt/timechat/timechat_7b.pth",
47
+ "frozen_llama_proj": false,
48
+ "frozen_video_Qformer": false,
49
+ "fusion_head_layers": 2,
50
+ "max_frame_pos": 96,
51
+ "fusion_header_type": "seqTransf",
52
+ "max_txt_len": 2048,
53
+ "end_sym": "</s>",
54
+ "prompt_path": "",
55
+ "prompt_template": "[INST] <<SYS>>\\n \\n<</SYS>>\\n\\n{} [/INST] ",
56
+ "lora": true,
57
+ "lora_inference_mode": false,
58
+ "qformer_text_input": true,
59
+ "window_size": 32,
60
+ "stride": 32
61
+ },
62
+ "preprocess": {
63
+ "vis_processor": {
64
+ "train": {
65
+ "name": "alpro_video_train",
66
+ "image_size": 224,
67
+ "n_frms": 8
68
+ },
69
+ "eval": {
70
+ "name": "alpro_video_eval",
71
+ "image_size": 224,
72
+ "n_frms": 8
73
+ }
74
+ },
75
+ "text_processor": {
76
+ "train": {
77
+ "name": "blip_caption"
78
+ },
79
+ "eval": {
80
+ "name": "blip_caption"
81
+ }
82
+ }
83
+ },
84
+ "datasets": {
85
+ "charades_instruct": {
86
+ "data_type": "video",
87
+ "build_info": {
88
+ "anno_dir": "data/TimeIT/data/temporal_video_grounding/charades/instruct_tvg_12.4k_charades.json",
89
+ "videos_dir": "data/"
90
+ },
91
+ "vis_processor": {
92
+ "train": {
93
+ "name": "alpro_video_train",
94
+ "n_frms": 96,
95
+ "image_size": 224
96
+ }
97
+ },
98
+ "text_processor": {
99
+ "train": {
100
+ "name": "blip_caption"
101
+ }
102
+ },
103
+ "num_video_query_token": 32,
104
+ "tokenizer_name": "ckpt/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/",
105
+ "model_type": "llama_v2",
106
+ "num_frm": 96,
107
+ "sample_type": "rand",
108
+ "max_txt_len": 2048,
109
+ "stride": 32
110
+ }
111
+ }
112
+ }
113
+ {"train_lr": "0.000", "train_loss": "0.482"}
114
+ {"train_lr": "0.000", "train_loss": "0.443"}
115
+ {"train_lr": "0.000", "train_loss": "0.426"}
116
+ {"train_lr": "0.000", "train_loss": "0.409"}
117
+ {"train_lr": "0.000", "train_loss": "0.387"}
118
+ {"train_lr": "0.000", "train_loss": "0.361"}
tvg/charades_test_f96_result.json ADDED
The diff for this file is too large to render. See raw diff
 
tvg/fmt_charades_test_f96_result.json ADDED
The diff for this file is too large to render. See raw diff
 
tvg/fmt_charades_test_f96_result.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # pred video timestamps 3720; # gt video timestamps 3720
2
+ IOU 0.3: 70.3763440860215
3
+ IOU 0.5: 53.38709677419355
4
+ IOU 0.7: 28.252688172043012
tvg/log.txt ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run": {
3
+ "task": "video_text_pretrain"
4
+ },
5
+ "model": {
6
+ "arch": "timechat",
7
+ "image_size": 224,
8
+ "drop_path_rate": 0,
9
+ "use_grad_checkpoint": true,
10
+ "vit_precision": "fp16",
11
+ "freeze_vit": true,
12
+ "freeze_qformer": true,
13
+ "num_query_token": 32,
14
+ "llama_model": "ckpt/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/",
15
+ "prompt": "",
16
+ "model_type": "pretrain_llama_v2",
17
+ "max_txt_len": 2048,
18
+ "end_sym": "</s>",
19
+ "low_resource": false,
20
+ "frozen_llama_proj": true,
21
+ "frozen_video_Qformer": true,
22
+ "vit_model": "ckpt/eva-vit-g/eva_vit_g.pth",
23
+ "q_former_model": "ckpt/instruct-blip/instruct_blip_vicuna7b_trimmed.pth",
24
+ "ckpt": "timechat/ckpt/timechat/train_stage2_llama2_7b_instruct12.4k_charades_bz4_f96_epoch6_ws32_stride32_mfp96_mtl2048_lr1e-4/20240407024/checkpoint_5.pth",
25
+ "fusion_head_layers": 2,
26
+ "max_frame_pos": 96,
27
+ "fusion_header_type": "seqTransf",
28
+ "lora": true,
29
+ "lora_inference_mode": true,
30
+ "qformer_text_input": true,
31
+ "window_size": 32,
32
+ "stride": 32,
33
+ "device_8bit": "0"
34
+ },
35
+ "preprocess": {
36
+ "vis_processor": {
37
+ "train": {
38
+ "name": "alpro_video_train",
39
+ "image_size": 224,
40
+ "n_frms": 8
41
+ },
42
+ "eval": {
43
+ "name": "alpro_video_eval",
44
+ "image_size": 224,
45
+ "n_frms": 8
46
+ }
47
+ },
48
+ "text_processor": {
49
+ "train": {
50
+ "name": "blip_caption"
51
+ },
52
+ "eval": {
53
+ "name": "blip_caption"
54
+ }
55
+ }
56
+ },
57
+ "datasets": {
58
+ "webvid": {
59
+ "data_type": "video",
60
+ "build_info": {
61
+ "anno_dir": "path/webvid/webvid_tain_data/annotations/",
62
+ "videos_dir": "path//webvid/webvid_tain_data/videos/"
63
+ },
64
+ "vis_processor": {
65
+ "train": {
66
+ "name": "alpro_video_eval",
67
+ "n_frms": 96,
68
+ "image_size": 224
69
+ }
70
+ },
71
+ "text_processor": {
72
+ "train": {
73
+ "name": "blip_caption"
74
+ }
75
+ },
76
+ "num_video_query_token": 32,
77
+ "tokenizer_name": "ckpt/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/",
78
+ "model_type": "llama_v2",
79
+ "num_frm": 96,
80
+ "sample_type": "uniform",
81
+ "max_txt_len": 2048,
82
+ "stride": 32
83
+ }
84
+ }
85
+ }
86
+
87
+ cfg_path : eval_configs/timechat.yaml
88
+ anno_path : data/TimeIT/data/temporal_video_grounding/charades/charades_annotation
89
+ video_path : data/Charades/videos/
90
+ model_type : None
91
+ task : tvg
92
+ dataset : charades
93
+ output_dir : timechat/ckpt/timechat/train_stage2_llama2_7b_instruct12.4k_charades_bz4_f96_epoch6_ws32_stride32_mfp96_mtl2048_lr1e-4/20240407024/tvg
94
+ split : test
95
+ num_frames : 96
96
+ top_p : 0.8
97
+ temperature : 1
98
+ batch_size : 8
99
+ gpu_id : 0
100
+ timestamp : False
101
+ timestamp_file :
102
+ debug : False
103
+ prompt_file : prompts/tvg_description_zeroshot.txt
104
+ timechat_model_path : timechat/ckpt/timechat/train_stage2_llama2_7b_instruct12.4k_charades_bz4_f96_epoch6_ws32_stride32_mfp96_mtl2048_lr1e-4/20240407024/checkpoint_5.pth
105
+ sample_num : -1
106
+ example_output : False
107
+ no_lora : False
108
+ post_check : False
109
+ post_check_prompt_file : prompts/dvc_post_check.txt
110
+ asr : False
111
+ asr_path : data/YouCook2-BB/YouCook2_asr_denseCap/whisper_outputs_with_time/small.en.cleaned/
112
+ options : []