RefalMachine commited on
Commit
2938659
·
verified ·
1 Parent(s): b43ce9f

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -60,3 +60,4 @@ llmtf_eval_k5_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
60
  llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
61
  llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
62
  llmtf_eval_k5_bs8/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
60
  llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
61
  llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
62
  llmtf_eval_k5_bs8/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
63
+ llmtf_eval_k5_bs1/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
llmtf_eval_k5_bs1/daru_treewayabstractive.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k5_bs1/daru_treewayabstractive_params.jsonl ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "openchat/openchat-3.5-0106",
5
+ "generation_config": {
6
+ "_from_model_config": true,
7
+ "bos_token_id": 1,
8
+ "do_sample": true,
9
+ "eos_token_id": [
10
+ 32000
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 512,
14
+ "pad_token_id": 32000,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2"
20
+ },
21
+ "conversation_template": {
22
+ "system_prompt": "",
23
+ "system_message_template": "{role} {content}<|end_of_turn|>",
24
+ "user_message_template": "{role} {content}<|end_of_turn|>",
25
+ "bot_message_template": "{role} {content}<|end_of_turn|>",
26
+ "bot_message_template_incomplete": "{role} {content}",
27
+ "user_role": "GPT4 Correct User:",
28
+ "bot_role": "GPT4 Correct Assistant:",
29
+ "system_role": "GPT4 Correct System:",
30
+ "global_prefix": "<s>",
31
+ "suffix": "GPT4 Correct Assistant:",
32
+ "add_special_tokens": false,
33
+ "eos_token": "<|end_of_turn|>"
34
+ },
35
+ "load_in_8bit": false,
36
+ "torch_dtype": "auto",
37
+ "use_flash_attention_2": true,
38
+ "device_map": "cuda:0",
39
+ "use_fast_tokenizer": true,
40
+ "leading_space": true,
41
+ "space_token": 28705,
42
+ "trust_remote_code": [
43
+ false
44
+ ],
45
+ "max_model_len": 8192
46
+ },
47
+ "task_params": {
48
+ "max_len": 4000,
49
+ "few_shot_count": 5,
50
+ "batch_size": 1,
51
+ "max_sample_per_dataset": 500,
52
+ "method": "generate"
53
+ }
54
+ }
llmtf_eval_k5_bs1/daru_treewayabstractive_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayabstractive",
3
+ "results": {
4
+ "rouge1": 0.357438599714093,
5
+ "rouge2": 0.13372912507444903
6
+ },
7
+ "leaderboard_result": 0.245583862394271
8
+ }
llmtf_eval_k5_bs1/darumeru_cp_para_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k5_bs1/darumeru_cp_para_en_params.jsonl ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "openchat/openchat-3.5-0106",
5
+ "generation_config": {
6
+ "_from_model_config": true,
7
+ "bos_token_id": 1,
8
+ "do_sample": true,
9
+ "eos_token_id": [
10
+ 32000
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 32000,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2"
20
+ },
21
+ "conversation_template": {
22
+ "system_prompt": "",
23
+ "system_message_template": "{role} {content}<|end_of_turn|>",
24
+ "user_message_template": "{role} {content}<|end_of_turn|>",
25
+ "bot_message_template": "{role} {content}<|end_of_turn|>",
26
+ "bot_message_template_incomplete": "{role} {content}",
27
+ "user_role": "GPT4 Correct User:",
28
+ "bot_role": "GPT4 Correct Assistant:",
29
+ "system_role": "GPT4 Correct System:",
30
+ "global_prefix": "<s>",
31
+ "suffix": "GPT4 Correct Assistant:",
32
+ "add_special_tokens": false,
33
+ "eos_token": "<|end_of_turn|>"
34
+ },
35
+ "load_in_8bit": false,
36
+ "torch_dtype": "auto",
37
+ "use_flash_attention_2": true,
38
+ "device_map": "cuda:0",
39
+ "use_fast_tokenizer": true,
40
+ "leading_space": true,
41
+ "space_token": 28705,
42
+ "trust_remote_code": [
43
+ false
44
+ ],
45
+ "max_model_len": 8192
46
+ },
47
+ "task_params": {
48
+ "max_len": 4000,
49
+ "few_shot_count": 5,
50
+ "batch_size": 1,
51
+ "max_sample_per_dataset": 10000000000000,
52
+ "method": "generate"
53
+ }
54
+ }
llmtf_eval_k5_bs1/darumeru_cp_para_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_en",
3
+ "results": {
4
+ "symbol_per_token": 3.960763996832381,
5
+ "len": 0.9995281850843424,
6
+ "lcs": 0.9811766452032213
7
+ },
8
+ "leaderboard_result": 0.9811766452032213
9
+ }
llmtf_eval_k5_bs1/darumeru_cp_para_ru.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k5_bs1/darumeru_cp_para_ru_params.jsonl ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "openchat/openchat-3.5-0106",
5
+ "generation_config": {
6
+ "_from_model_config": true,
7
+ "bos_token_id": 1,
8
+ "do_sample": true,
9
+ "eos_token_id": [
10
+ 32000
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 32000,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2"
20
+ },
21
+ "conversation_template": {
22
+ "system_prompt": "",
23
+ "system_message_template": "{role} {content}<|end_of_turn|>",
24
+ "user_message_template": "{role} {content}<|end_of_turn|>",
25
+ "bot_message_template": "{role} {content}<|end_of_turn|>",
26
+ "bot_message_template_incomplete": "{role} {content}",
27
+ "user_role": "GPT4 Correct User:",
28
+ "bot_role": "GPT4 Correct Assistant:",
29
+ "system_role": "GPT4 Correct System:",
30
+ "global_prefix": "<s>",
31
+ "suffix": "GPT4 Correct Assistant:",
32
+ "add_special_tokens": false,
33
+ "eos_token": "<|end_of_turn|>"
34
+ },
35
+ "load_in_8bit": false,
36
+ "torch_dtype": "auto",
37
+ "use_flash_attention_2": true,
38
+ "device_map": "cuda:0",
39
+ "use_fast_tokenizer": true,
40
+ "leading_space": true,
41
+ "space_token": 28705,
42
+ "trust_remote_code": [
43
+ false
44
+ ],
45
+ "max_model_len": 8192
46
+ },
47
+ "task_params": {
48
+ "max_len": 4000,
49
+ "few_shot_count": 5,
50
+ "batch_size": 1,
51
+ "max_sample_per_dataset": 10000000000000,
52
+ "method": "generate"
53
+ }
54
+ }
llmtf_eval_k5_bs1/darumeru_cp_para_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.4702341373518975,
5
+ "len": 0.9993717494721948,
6
+ "lcs": 0.958885193897962
7
+ },
8
+ "leaderboard_result": 0.958885193897962
9
+ }
llmtf_eval_k5_bs1/evaluation_log.txt CHANGED
@@ -251,3 +251,23 @@ INFO: 2024-07-13 16:31:58,534: llmtf.base.evaluator: Ended eval
251
  INFO: 2024-07-13 16:31:58,610: llmtf.base.evaluator:
252
  mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
253
  0.627 0.404 0.533 0.830 0.507 0.608 0.122 1.000 0.999 0.487 0.754 0.540 0.876 0.659 0.528 0.553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  INFO: 2024-07-13 16:31:58,610: llmtf.base.evaluator:
252
  mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
253
  0.627 0.404 0.533 0.830 0.507 0.608 0.122 1.000 0.999 0.487 0.754 0.540 0.876 0.659 0.528 0.553
254
+ INFO: 2024-07-13 16:33:42,900: llmtf.base.daru/treewayabstractive: Processing Dataset: 2112.96s
255
+ INFO: 2024-07-13 16:33:42,904: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
256
+ INFO: 2024-07-13 16:33:42,937: llmtf.base.daru/treewayabstractive: {'rouge1': 0.357438599714093, 'rouge2': 0.13372912507444903}
257
+ INFO: 2024-07-13 16:33:42,941: llmtf.base.evaluator: Ended eval
258
+ INFO: 2024-07-13 16:33:42,951: llmtf.base.evaluator:
259
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
260
+ 0.603 0.246 0.404 0.533 0.830 0.507 0.608 0.122 1.000 0.999 0.487 0.754 0.540 0.876 0.659 0.528 0.553
261
+ INFO: 2024-07-13 16:34:08,837: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 908.48s
262
+ INFO: 2024-07-13 16:34:08,840: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
263
+ INFO: 2024-07-13 16:34:08,857: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.4702341373518975, 'len': 0.9993717494721948, 'lcs': 0.958885193897962}
264
+ INFO: 2024-07-13 16:34:08,859: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [32000]
265
+ INFO: 2024-07-13 16:34:08,859: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
266
+ INFO: 2024-07-13 16:34:11,992: llmtf.base.darumeru/cp_para_en: Loading Dataset: 3.13s
267
+ INFO: 2024-07-13 16:45:47,059: llmtf.base.darumeru/cp_para_en: Processing Dataset: 695.07s
268
+ INFO: 2024-07-13 16:45:47,066: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
269
+ INFO: 2024-07-13 16:45:47,099: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 3.960763996832381, 'len': 0.9995281850843424, 'lcs': 0.9811766452032213}
270
+ INFO: 2024-07-13 16:45:47,100: llmtf.base.evaluator: Ended eval
271
+ INFO: 2024-07-13 16:45:47,126: llmtf.base.evaluator:
272
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
273
+ 0.644 0.246 0.404 0.533 0.830 0.507 0.608 0.122 0.981 0.959 1.000 0.999 0.487 0.754 0.540 0.876 0.659 0.528 0.553
llmtf_eval_k5_bs1/evaluation_results.txt CHANGED
@@ -1,2 +1,2 @@
1
- mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
- 0.627 0.404 0.533 0.830 0.507 0.608 0.122 1.000 0.999 0.487 0.754 0.540 0.876 0.659 0.528 0.553
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.644 0.246 0.404 0.533 0.830 0.507 0.608 0.122 0.981 0.959 1.000 0.999 0.487 0.754 0.540 0.876 0.659 0.528 0.553
llmtf_eval_k5_bs1/nlpcoreteam_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "openchat/openchat-3.5-0106",
5
+ "generation_config": {
6
+ "_from_model_config": true,
7
+ "bos_token_id": 1,
8
+ "do_sample": true,
9
+ "eos_token_id": [
10
+ 32000
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 32000,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2"
20
+ },
21
+ "conversation_template": {
22
+ "system_prompt": "",
23
+ "system_message_template": "{role} {content}<|end_of_turn|>",
24
+ "user_message_template": "{role} {content}<|end_of_turn|>",
25
+ "bot_message_template": "{role} {content}<|end_of_turn|>",
26
+ "bot_message_template_incomplete": "{role} {content}",
27
+ "user_role": "GPT4 Correct User:",
28
+ "bot_role": "GPT4 Correct Assistant:",
29
+ "system_role": "GPT4 Correct System:",
30
+ "global_prefix": "<s>",
31
+ "suffix": "GPT4 Correct Assistant:",
32
+ "add_special_tokens": false,
33
+ "eos_token": "<|end_of_turn|>"
34
+ },
35
+ "load_in_8bit": false,
36
+ "torch_dtype": "auto",
37
+ "use_flash_attention_2": true,
38
+ "device_map": "cuda:0",
39
+ "use_fast_tokenizer": true,
40
+ "leading_space": true,
41
+ "space_token": 28705,
42
+ "trust_remote_code": [
43
+ false
44
+ ],
45
+ "max_model_len": 8192
46
+ },
47
+ "task_params": {
48
+ "max_len": 4000,
49
+ "few_shot_count": 5,
50
+ "batch_size": 1,
51
+ "max_sample_per_dataset": 10000000000000,
52
+ "method": "calculate_tokens_proba"
53
+ }
54
+ }
llmtf_eval_k5_bs1/nlpcoreteam_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "nlpcoreteam/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5283401236619901
5
+ },
6
+ "leaderboard_result": 0.5283401236619901
7
+ }