Bencode92 commited on
Commit
89099b1
·
1 Parent(s): 5dd060b

🔄 Incremental importance | Acc: 0.760, F1: 0.685

Browse files
README.md CHANGED
@@ -21,19 +21,19 @@ Fine-tuned FinBERT model for financial importance analysis in TradePulse.
21
 
22
  ## Performance
23
 
24
- *Last training: 2025-09-09 16:49*
25
  *Dataset: `base_reference.csv` (1797 samples)*
26
 
27
  | Metric | Value |
28
  |--------|-------|
29
- | Loss | 0.6364 |
30
- | Accuracy | 0.8044 |
31
- | F1 Score | 0.7994 |
32
 
33
- | F1 Macro | 0.7994 |
34
 
35
- | Precision | 0.7989 |
36
- | Recall | 0.8044 |
37
 
38
  ## Training Details
39
 
@@ -65,4 +65,4 @@ predictions = outputs.logits.softmax(dim=-1)
65
  ## Model Card Authors
66
 
67
  - TradePulse ML Team
68
- - Auto-generated on 2025-09-09 16:49:13
 
21
 
22
  ## Performance
23
 
24
+ *Last training: 2026-04-20 19:18*
25
  *Dataset: `base_reference.csv` (1797 samples)*
26
 
27
  | Metric | Value |
28
  |--------|-------|
29
+ | Loss | 1.0043 |
30
+ | Accuracy | 0.8156 |
31
+ | F1 Score | 0.8143 |
32
 
33
+ | F1 Macro | 0.8143 |
34
 
35
+ | Precision | 0.8134 |
36
+ | Recall | 0.8156 |
37
 
38
  ## Training Details
39
 
 
65
  ## Model Card Authors
66
 
67
  - TradePulse ML Team
68
+ - Auto-generated on 2026-04-20 19:18:26
checkpoint-674/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd80fd639c367555a09626bec7fc4ef19dd64a7e668c299d8a2899cbbb3fd16e
3
  size 439039996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7748177c8395df0da39e54a0663c3615146760c6059125dc394291026d6663e5
3
  size 439039996
checkpoint-674/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7994034079615475,
3
  "best_model_checkpoint": "hf-importance-production/checkpoint-674",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,501 +10,501 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.02967359050445104,
13
- "grad_norm": 168.60842895507812,
14
  "learning_rate": 2.9761904761904765e-07,
15
- "loss": 1.824,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.05934718100890208,
20
- "grad_norm": 197.9370574951172,
21
  "learning_rate": 5.952380952380953e-07,
22
- "loss": 1.9581,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.08902077151335312,
27
- "grad_norm": 18.61470603942871,
28
  "learning_rate": 8.928571428571429e-07,
29
- "loss": 1.3577,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.11869436201780416,
34
- "grad_norm": 171.209228515625,
35
  "learning_rate": 1.1904761904761906e-06,
36
- "loss": 1.5439,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.14836795252225518,
41
- "grad_norm": 30.70751953125,
42
  "learning_rate": 1.4880952380952381e-06,
43
- "loss": 0.935,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.17804154302670624,
48
- "grad_norm": 1.3179957866668701,
49
  "learning_rate": 1.7857142857142859e-06,
50
- "loss": 0.863,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.20771513353115728,
55
- "grad_norm": 46.17071533203125,
56
  "learning_rate": 2.0833333333333334e-06,
57
- "loss": 0.9118,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.23738872403560832,
62
- "grad_norm": 136.59942626953125,
63
  "learning_rate": 2.380952380952381e-06,
64
- "loss": 1.3016,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.26706231454005935,
69
- "grad_norm": 164.12306213378906,
70
  "learning_rate": 2.6785714285714285e-06,
71
- "loss": 1.3362,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.29673590504451036,
76
- "grad_norm": 72.49063110351562,
77
  "learning_rate": 2.9761904761904763e-06,
78
- "loss": 0.7472,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.3264094955489614,
83
- "grad_norm": 63.815528869628906,
84
  "learning_rate": 3.273809523809524e-06,
85
- "loss": 0.5805,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.3560830860534125,
90
- "grad_norm": 97.9521255493164,
91
  "learning_rate": 3.5714285714285718e-06,
92
- "loss": 0.6095,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.3857566765578635,
97
- "grad_norm": 46.075660705566406,
98
  "learning_rate": 3.869047619047619e-06,
99
- "loss": 0.9813,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.41543026706231456,
104
- "grad_norm": 56.08775329589844,
105
  "learning_rate": 4.166666666666667e-06,
106
- "loss": 0.8756,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.44510385756676557,
111
- "grad_norm": 41.81604766845703,
112
  "learning_rate": 4.464285714285715e-06,
113
- "loss": 1.0463,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.47477744807121663,
118
- "grad_norm": 42.02016067504883,
119
  "learning_rate": 4.761904761904762e-06,
120
- "loss": 1.1095,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.5044510385756676,
125
- "grad_norm": 92.67354583740234,
126
  "learning_rate": 5.05952380952381e-06,
127
- "loss": 1.0643,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.5341246290801187,
132
- "grad_norm": 92.83062744140625,
133
  "learning_rate": 5.357142857142857e-06,
134
- "loss": 0.6986,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.5637982195845698,
139
- "grad_norm": 37.39644241333008,
140
  "learning_rate": 5.654761904761905e-06,
141
- "loss": 1.1433,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.5934718100890207,
146
- "grad_norm": 50.91009521484375,
147
  "learning_rate": 5.9523809523809525e-06,
148
- "loss": 0.7077,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.6231454005934718,
153
- "grad_norm": 38.2639045715332,
154
  "learning_rate": 6.25e-06,
155
- "loss": 0.7619,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.6528189910979229,
160
- "grad_norm": 2.9356582164764404,
161
  "learning_rate": 6.547619047619048e-06,
162
- "loss": 0.4398,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.6824925816023739,
167
- "grad_norm": 49.96760940551758,
168
  "learning_rate": 6.845238095238096e-06,
169
- "loss": 1.1917,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.712166172106825,
174
- "grad_norm": 73.6959228515625,
175
  "learning_rate": 7.1428571428571436e-06,
176
- "loss": 0.4473,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.7418397626112759,
181
- "grad_norm": 52.25584411621094,
182
  "learning_rate": 7.440476190476191e-06,
183
- "loss": 1.3084,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.771513353115727,
188
- "grad_norm": 66.60286712646484,
189
  "learning_rate": 7.738095238095238e-06,
190
- "loss": 0.9924,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.8011869436201781,
195
- "grad_norm": 61.88084411621094,
196
  "learning_rate": 8.035714285714286e-06,
197
- "loss": 0.9188,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.8308605341246291,
202
- "grad_norm": 32.82881546020508,
203
  "learning_rate": 8.333333333333334e-06,
204
- "loss": 0.5267,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.8605341246290801,
209
- "grad_norm": 50.554412841796875,
210
  "learning_rate": 8.630952380952381e-06,
211
- "loss": 0.8353,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.8902077151335311,
216
- "grad_norm": 74.67025756835938,
217
  "learning_rate": 8.92857142857143e-06,
218
- "loss": 0.7204,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.9198813056379822,
223
- "grad_norm": 73.39923095703125,
224
  "learning_rate": 9.226190476190477e-06,
225
- "loss": 1.5639,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.9495548961424333,
230
- "grad_norm": 43.86273193359375,
231
  "learning_rate": 9.523809523809525e-06,
232
- "loss": 0.6084,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.9792284866468842,
237
- "grad_norm": 60.18267822265625,
238
  "learning_rate": 9.821428571428573e-06,
239
- "loss": 0.8311,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 1.0,
244
- "eval_accuracy": 0.72,
245
- "eval_f1": 0.6758345853065657,
246
- "eval_f1_macro": 0.5593865060621974,
247
- "eval_loss": 1.2557945251464844,
248
- "eval_precision": 0.7713116348671903,
249
- "eval_precision_macro": 0.7813692480359148,
250
- "eval_recall": 0.72,
251
- "eval_recall_macro": 0.5248060178139268,
252
- "eval_runtime": 247.1233,
253
- "eval_samples_per_second": 1.821,
254
- "eval_steps_per_second": 0.457,
255
  "step": 337
256
  },
257
  {
258
  "epoch": 1.0089020771513353,
259
- "grad_norm": 86.58915710449219,
260
  "learning_rate": 9.88165680473373e-06,
261
- "loss": 0.9621,
262
  "step": 340
263
  },
264
  {
265
  "epoch": 1.0385756676557865,
266
- "grad_norm": 46.461692810058594,
267
  "learning_rate": 9.585798816568049e-06,
268
- "loss": 0.5792,
269
  "step": 350
270
  },
271
  {
272
  "epoch": 1.0682492581602374,
273
- "grad_norm": 49.093719482421875,
274
  "learning_rate": 9.289940828402368e-06,
275
- "loss": 0.6038,
276
  "step": 360
277
  },
278
  {
279
  "epoch": 1.0979228486646884,
280
- "grad_norm": 43.62152862548828,
281
  "learning_rate": 8.994082840236687e-06,
282
- "loss": 0.6391,
283
  "step": 370
284
  },
285
  {
286
  "epoch": 1.1275964391691395,
287
- "grad_norm": 14.27550983428955,
288
  "learning_rate": 8.698224852071006e-06,
289
- "loss": 0.573,
290
  "step": 380
291
  },
292
  {
293
  "epoch": 1.1572700296735905,
294
- "grad_norm": 67.1692123413086,
295
  "learning_rate": 8.402366863905327e-06,
296
- "loss": 0.409,
297
  "step": 390
298
  },
299
  {
300
  "epoch": 1.1869436201780414,
301
- "grad_norm": 43.99798583984375,
302
  "learning_rate": 8.106508875739646e-06,
303
- "loss": 0.4652,
304
  "step": 400
305
  },
306
  {
307
  "epoch": 1.2166172106824926,
308
- "grad_norm": 29.33543586730957,
309
  "learning_rate": 7.810650887573965e-06,
310
- "loss": 0.2783,
311
  "step": 410
312
  },
313
  {
314
  "epoch": 1.2462908011869436,
315
- "grad_norm": 61.13819885253906,
316
  "learning_rate": 7.5147928994082845e-06,
317
- "loss": 0.8814,
318
  "step": 420
319
  },
320
  {
321
  "epoch": 1.2759643916913945,
322
- "grad_norm": 37.44241714477539,
323
  "learning_rate": 7.218934911242604e-06,
324
- "loss": 0.4763,
325
  "step": 430
326
  },
327
  {
328
  "epoch": 1.3056379821958457,
329
- "grad_norm": 58.24061965942383,
330
  "learning_rate": 6.923076923076923e-06,
331
- "loss": 0.9232,
332
  "step": 440
333
  },
334
  {
335
  "epoch": 1.3353115727002967,
336
- "grad_norm": 0.6983101963996887,
337
  "learning_rate": 6.627218934911244e-06,
338
- "loss": 0.3548,
339
  "step": 450
340
  },
341
  {
342
  "epoch": 1.3649851632047478,
343
- "grad_norm": 81.1714096069336,
344
  "learning_rate": 6.331360946745563e-06,
345
- "loss": 1.1273,
346
  "step": 460
347
  },
348
  {
349
  "epoch": 1.3946587537091988,
350
- "grad_norm": 9.78393840789795,
351
  "learning_rate": 6.035502958579882e-06,
352
- "loss": 0.5734,
353
  "step": 470
354
  },
355
  {
356
  "epoch": 1.4243323442136497,
357
- "grad_norm": 3.354196071624756,
358
  "learning_rate": 5.739644970414202e-06,
359
- "loss": 0.4616,
360
  "step": 480
361
  },
362
  {
363
  "epoch": 1.454005934718101,
364
- "grad_norm": 30.13648796081543,
365
  "learning_rate": 5.443786982248521e-06,
366
- "loss": 1.4066,
367
  "step": 490
368
  },
369
  {
370
  "epoch": 1.4836795252225519,
371
- "grad_norm": 56.0758056640625,
372
  "learning_rate": 5.14792899408284e-06,
373
- "loss": 0.5536,
374
  "step": 500
375
  },
376
  {
377
  "epoch": 1.513353115727003,
378
- "grad_norm": 41.43285369873047,
379
  "learning_rate": 4.85207100591716e-06,
380
- "loss": 0.3679,
381
  "step": 510
382
  },
383
  {
384
  "epoch": 1.543026706231454,
385
- "grad_norm": 45.90950012207031,
386
  "learning_rate": 4.55621301775148e-06,
387
- "loss": 0.4564,
388
  "step": 520
389
  },
390
  {
391
  "epoch": 1.572700296735905,
392
- "grad_norm": 75.42010498046875,
393
  "learning_rate": 4.2603550295858e-06,
394
- "loss": 0.6049,
395
  "step": 530
396
  },
397
  {
398
  "epoch": 1.6023738872403561,
399
- "grad_norm": 115.4093246459961,
400
  "learning_rate": 3.964497041420119e-06,
401
- "loss": 0.4721,
402
  "step": 540
403
  },
404
  {
405
  "epoch": 1.632047477744807,
406
- "grad_norm": 105.15296936035156,
407
  "learning_rate": 3.668639053254438e-06,
408
- "loss": 0.5965,
409
  "step": 550
410
  },
411
  {
412
  "epoch": 1.6617210682492582,
413
- "grad_norm": 28.678022384643555,
414
  "learning_rate": 3.3727810650887576e-06,
415
- "loss": 0.629,
416
  "step": 560
417
  },
418
  {
419
  "epoch": 1.6913946587537092,
420
- "grad_norm": 8.347318649291992,
421
  "learning_rate": 3.0769230769230774e-06,
422
- "loss": 0.8679,
423
  "step": 570
424
  },
425
  {
426
  "epoch": 1.7210682492581602,
427
- "grad_norm": 78.90312194824219,
428
  "learning_rate": 2.7810650887573965e-06,
429
- "loss": 0.6902,
430
  "step": 580
431
  },
432
  {
433
  "epoch": 1.7507418397626113,
434
- "grad_norm": 58.25022506713867,
435
  "learning_rate": 2.485207100591716e-06,
436
- "loss": 0.2924,
437
  "step": 590
438
  },
439
  {
440
  "epoch": 1.7804154302670623,
441
- "grad_norm": 6.382408618927002,
442
  "learning_rate": 2.1893491124260358e-06,
443
- "loss": 0.5099,
444
  "step": 600
445
  },
446
  {
447
  "epoch": 1.8100890207715135,
448
- "grad_norm": 42.28348159790039,
449
  "learning_rate": 1.8934911242603552e-06,
450
- "loss": 0.2495,
451
  "step": 610
452
  },
453
  {
454
  "epoch": 1.8397626112759644,
455
- "grad_norm": 53.895606994628906,
456
  "learning_rate": 1.5976331360946749e-06,
457
- "loss": 0.3869,
458
  "step": 620
459
  },
460
  {
461
  "epoch": 1.8694362017804154,
462
- "grad_norm": 19.67365074157715,
463
  "learning_rate": 1.301775147928994e-06,
464
- "loss": 0.4416,
465
  "step": 630
466
  },
467
  {
468
  "epoch": 1.8991097922848663,
469
- "grad_norm": 6.208206653594971,
470
  "learning_rate": 1.0059171597633138e-06,
471
- "loss": 0.2573,
472
  "step": 640
473
  },
474
  {
475
  "epoch": 1.9287833827893175,
476
- "grad_norm": 76.81669616699219,
477
  "learning_rate": 7.100591715976332e-07,
478
- "loss": 0.9684,
479
  "step": 650
480
  },
481
  {
482
  "epoch": 1.9584569732937687,
483
- "grad_norm": 70.443115234375,
484
  "learning_rate": 4.1420118343195276e-07,
485
- "loss": 0.7257,
486
  "step": 660
487
  },
488
  {
489
  "epoch": 1.9881305637982196,
490
- "grad_norm": 18.408775329589844,
491
  "learning_rate": 1.183431952662722e-07,
492
- "loss": 0.4501,
493
  "step": 670
494
  },
495
  {
496
  "epoch": 2.0,
497
- "eval_accuracy": 0.8044444444444444,
498
- "eval_f1": 0.7994034079615475,
499
- "eval_f1_macro": 0.7226974575811784,
500
- "eval_loss": 0.6364149451255798,
501
- "eval_precision": 0.7989471347051534,
502
- "eval_precision_macro": 0.7605190105677089,
503
- "eval_recall": 0.8044444444444444,
504
- "eval_recall_macro": 0.697208985138428,
505
- "eval_runtime": 247.0192,
506
- "eval_samples_per_second": 1.822,
507
- "eval_steps_per_second": 0.457,
508
  "step": 674
509
  }
510
  ],
 
1
  {
2
+ "best_metric": 0.8142631998503326,
3
  "best_model_checkpoint": "hf-importance-production/checkpoint-674",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.02967359050445104,
13
+ "grad_norm": 3.6716532707214355,
14
  "learning_rate": 2.9761904761904765e-07,
15
+ "loss": 0.7206,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.05934718100890208,
20
+ "grad_norm": 84.27377319335938,
21
  "learning_rate": 5.952380952380953e-07,
22
+ "loss": 0.7268,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.08902077151335312,
27
+ "grad_norm": 26.416622161865234,
28
  "learning_rate": 8.928571428571429e-07,
29
+ "loss": 0.2319,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.11869436201780416,
34
+ "grad_norm": 89.31320190429688,
35
  "learning_rate": 1.1904761904761906e-06,
36
+ "loss": 0.3689,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.14836795252225518,
41
+ "grad_norm": 57.75520324707031,
42
  "learning_rate": 1.4880952380952381e-06,
43
+ "loss": 0.3368,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.17804154302670624,
48
+ "grad_norm": 0.20002073049545288,
49
  "learning_rate": 1.7857142857142859e-06,
50
+ "loss": 0.1607,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.20771513353115728,
55
+ "grad_norm": 1.5216213464736938,
56
  "learning_rate": 2.0833333333333334e-06,
57
+ "loss": 0.306,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.23738872403560832,
62
+ "grad_norm": 41.2434196472168,
63
  "learning_rate": 2.380952380952381e-06,
64
+ "loss": 0.3066,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.26706231454005935,
69
+ "grad_norm": 5.119740962982178,
70
  "learning_rate": 2.6785714285714285e-06,
71
+ "loss": 0.2926,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.29673590504451036,
76
+ "grad_norm": 18.411285400390625,
77
  "learning_rate": 2.9761904761904763e-06,
78
+ "loss": 0.0527,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.3264094955489614,
83
+ "grad_norm": 10.587057113647461,
84
  "learning_rate": 3.273809523809524e-06,
85
+ "loss": 0.0482,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.3560830860534125,
90
+ "grad_norm": 0.3884180188179016,
91
  "learning_rate": 3.5714285714285718e-06,
92
+ "loss": 0.0497,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.3857566765578635,
97
+ "grad_norm": 1.365146279335022,
98
  "learning_rate": 3.869047619047619e-06,
99
+ "loss": 0.3155,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.41543026706231456,
104
+ "grad_norm": 4.573118209838867,
105
  "learning_rate": 4.166666666666667e-06,
106
+ "loss": 0.311,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.44510385756676557,
111
+ "grad_norm": 1.426039457321167,
112
  "learning_rate": 4.464285714285715e-06,
113
+ "loss": 0.3497,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.47477744807121663,
118
+ "grad_norm": 0.09445377439260483,
119
  "learning_rate": 4.761904761904762e-06,
120
+ "loss": 0.2294,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.5044510385756676,
125
+ "grad_norm": 65.85540008544922,
126
  "learning_rate": 5.05952380952381e-06,
127
+ "loss": 0.4496,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.5341246290801187,
132
+ "grad_norm": 11.331717491149902,
133
  "learning_rate": 5.357142857142857e-06,
134
+ "loss": 0.0287,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.5637982195845698,
139
+ "grad_norm": 0.08736006170511246,
140
  "learning_rate": 5.654761904761905e-06,
141
+ "loss": 0.2235,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.5934718100890207,
146
+ "grad_norm": 0.08108571171760559,
147
  "learning_rate": 5.9523809523809525e-06,
148
+ "loss": 0.1383,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.6231454005934718,
153
+ "grad_norm": 1.3186999559402466,
154
  "learning_rate": 6.25e-06,
155
+ "loss": 0.0586,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.6528189910979229,
160
+ "grad_norm": 0.5177898406982422,
161
  "learning_rate": 6.547619047619048e-06,
162
+ "loss": 0.1747,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.6824925816023739,
167
+ "grad_norm": 0.0261867493391037,
168
  "learning_rate": 6.845238095238096e-06,
169
+ "loss": 0.3088,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.712166172106825,
174
+ "grad_norm": 71.2027359008789,
175
  "learning_rate": 7.1428571428571436e-06,
176
+ "loss": 0.072,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.7418397626112759,
181
+ "grad_norm": 1.161055564880371,
182
  "learning_rate": 7.440476190476191e-06,
183
+ "loss": 0.2914,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.771513353115727,
188
+ "grad_norm": 0.14359746873378754,
189
  "learning_rate": 7.738095238095238e-06,
190
+ "loss": 0.1225,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.8011869436201781,
195
+ "grad_norm": 1.0606952905654907,
196
  "learning_rate": 8.035714285714286e-06,
197
+ "loss": 0.0171,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.8308605341246291,
202
+ "grad_norm": 0.6554273962974548,
203
  "learning_rate": 8.333333333333334e-06,
204
+ "loss": 0.0506,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.8605341246290801,
209
+ "grad_norm": 1.1165417432785034,
210
  "learning_rate": 8.630952380952381e-06,
211
+ "loss": 0.0356,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.8902077151335311,
216
+ "grad_norm": 0.5638610124588013,
217
  "learning_rate": 8.92857142857143e-06,
218
+ "loss": 0.0817,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.9198813056379822,
223
+ "grad_norm": 1.0001670122146606,
224
  "learning_rate": 9.226190476190477e-06,
225
+ "loss": 0.3066,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.9495548961424333,
230
+ "grad_norm": 38.69874572753906,
231
  "learning_rate": 9.523809523809525e-06,
232
+ "loss": 0.1078,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.9792284866468842,
237
+ "grad_norm": 56.8232421875,
238
  "learning_rate": 9.821428571428573e-06,
239
+ "loss": 0.2535,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 1.0,
244
+ "eval_accuracy": 0.7755555555555556,
245
+ "eval_f1": 0.7583953503356006,
246
+ "eval_f1_macro": 0.7081696883789205,
247
+ "eval_loss": 1.281579852104187,
248
+ "eval_precision": 0.7929564160050105,
249
+ "eval_precision_macro": 0.775272015765626,
250
+ "eval_recall": 0.7755555555555556,
251
+ "eval_recall_macro": 0.687870603694186,
252
+ "eval_runtime": 244.5666,
253
+ "eval_samples_per_second": 1.84,
254
+ "eval_steps_per_second": 0.462,
255
  "step": 337
256
  },
257
  {
258
  "epoch": 1.0089020771513353,
259
+ "grad_norm": 0.5638492107391357,
260
  "learning_rate": 9.88165680473373e-06,
261
+ "loss": 0.0292,
262
  "step": 340
263
  },
264
  {
265
  "epoch": 1.0385756676557865,
266
+ "grad_norm": 0.47718924283981323,
267
  "learning_rate": 9.585798816568049e-06,
268
+ "loss": 0.0292,
269
  "step": 350
270
  },
271
  {
272
  "epoch": 1.0682492581602374,
273
+ "grad_norm": 1.0324794054031372,
274
  "learning_rate": 9.289940828402368e-06,
275
+ "loss": 0.0045,
276
  "step": 360
277
  },
278
  {
279
  "epoch": 1.0979228486646884,
280
+ "grad_norm": 48.776546478271484,
281
  "learning_rate": 8.994082840236687e-06,
282
+ "loss": 0.191,
283
  "step": 370
284
  },
285
  {
286
  "epoch": 1.1275964391691395,
287
+ "grad_norm": 0.06004100292921066,
288
  "learning_rate": 8.698224852071006e-06,
289
+ "loss": 0.0276,
290
  "step": 380
291
  },
292
  {
293
  "epoch": 1.1572700296735905,
294
+ "grad_norm": 0.47821861505508423,
295
  "learning_rate": 8.402366863905327e-06,
296
+ "loss": 0.009,
297
  "step": 390
298
  },
299
  {
300
  "epoch": 1.1869436201780414,
301
+ "grad_norm": 0.03524915501475334,
302
  "learning_rate": 8.106508875739646e-06,
303
+ "loss": 0.0308,
304
  "step": 400
305
  },
306
  {
307
  "epoch": 1.2166172106824926,
308
+ "grad_norm": 13.793149948120117,
309
  "learning_rate": 7.810650887573965e-06,
310
+ "loss": 0.0126,
311
  "step": 410
312
  },
313
  {
314
  "epoch": 1.2462908011869436,
315
+ "grad_norm": 1.682591438293457,
316
  "learning_rate": 7.5147928994082845e-06,
317
+ "loss": 0.2919,
318
  "step": 420
319
  },
320
  {
321
  "epoch": 1.2759643916913945,
322
+ "grad_norm": 0.007207744754850864,
323
  "learning_rate": 7.218934911242604e-06,
324
+ "loss": 0.0697,
325
  "step": 430
326
  },
327
  {
328
  "epoch": 1.3056379821958457,
329
+ "grad_norm": 0.00030356255592778325,
330
  "learning_rate": 6.923076923076923e-06,
331
+ "loss": 0.1152,
332
  "step": 440
333
  },
334
  {
335
  "epoch": 1.3353115727002967,
336
+ "grad_norm": 0.005271286703646183,
337
  "learning_rate": 6.627218934911244e-06,
338
+ "loss": 0.0046,
339
  "step": 450
340
  },
341
  {
342
  "epoch": 1.3649851632047478,
343
+ "grad_norm": 52.9085578918457,
344
  "learning_rate": 6.331360946745563e-06,
345
+ "loss": 0.1375,
346
  "step": 460
347
  },
348
  {
349
  "epoch": 1.3946587537091988,
350
+ "grad_norm": 0.8953670859336853,
351
  "learning_rate": 6.035502958579882e-06,
352
+ "loss": 0.1016,
353
  "step": 470
354
  },
355
  {
356
  "epoch": 1.4243323442136497,
357
+ "grad_norm": 0.020477179437875748,
358
  "learning_rate": 5.739644970414202e-06,
359
+ "loss": 0.1751,
360
  "step": 480
361
  },
362
  {
363
  "epoch": 1.454005934718101,
364
+ "grad_norm": 0.991908848285675,
365
  "learning_rate": 5.443786982248521e-06,
366
+ "loss": 0.7432,
367
  "step": 490
368
  },
369
  {
370
  "epoch": 1.4836795252225519,
371
+ "grad_norm": 11.726466178894043,
372
  "learning_rate": 5.14792899408284e-06,
373
+ "loss": 0.015,
374
  "step": 500
375
  },
376
  {
377
  "epoch": 1.513353115727003,
378
+ "grad_norm": 3.8922200202941895,
379
  "learning_rate": 4.85207100591716e-06,
380
+ "loss": 0.107,
381
  "step": 510
382
  },
383
  {
384
  "epoch": 1.543026706231454,
385
+ "grad_norm": 0.01926845870912075,
386
  "learning_rate": 4.55621301775148e-06,
387
+ "loss": 0.008,
388
  "step": 520
389
  },
390
  {
391
  "epoch": 1.572700296735905,
392
+ "grad_norm": 0.8362104296684265,
393
  "learning_rate": 4.2603550295858e-06,
394
+ "loss": 0.3872,
395
  "step": 530
396
  },
397
  {
398
  "epoch": 1.6023738872403561,
399
+ "grad_norm": 33.54790496826172,
400
  "learning_rate": 3.964497041420119e-06,
401
+ "loss": 0.0676,
402
  "step": 540
403
  },
404
  {
405
  "epoch": 1.632047477744807,
406
+ "grad_norm": 132.1903076171875,
407
  "learning_rate": 3.668639053254438e-06,
408
+ "loss": 0.1706,
409
  "step": 550
410
  },
411
  {
412
  "epoch": 1.6617210682492582,
413
+ "grad_norm": 0.2985036075115204,
414
  "learning_rate": 3.3727810650887576e-06,
415
+ "loss": 0.2046,
416
  "step": 560
417
  },
418
  {
419
  "epoch": 1.6913946587537092,
420
+ "grad_norm": 0.014743988402187824,
421
  "learning_rate": 3.0769230769230774e-06,
422
+ "loss": 0.2134,
423
  "step": 570
424
  },
425
  {
426
  "epoch": 1.7210682492581602,
427
+ "grad_norm": 129.1288604736328,
428
  "learning_rate": 2.7810650887573965e-06,
429
+ "loss": 0.3908,
430
  "step": 580
431
  },
432
  {
433
  "epoch": 1.7507418397626113,
434
+ "grad_norm": 1.7959301471710205,
435
  "learning_rate": 2.485207100591716e-06,
436
+ "loss": 0.1372,
437
  "step": 590
438
  },
439
  {
440
  "epoch": 1.7804154302670623,
441
+ "grad_norm": 0.01699359342455864,
442
  "learning_rate": 2.1893491124260358e-06,
443
+ "loss": 0.4411,
444
  "step": 600
445
  },
446
  {
447
  "epoch": 1.8100890207715135,
448
+ "grad_norm": 90.1771240234375,
449
  "learning_rate": 1.8934911242603552e-06,
450
+ "loss": 0.069,
451
  "step": 610
452
  },
453
  {
454
  "epoch": 1.8397626112759644,
455
+ "grad_norm": 35.42045593261719,
456
  "learning_rate": 1.5976331360946749e-06,
457
+ "loss": 0.1271,
458
  "step": 620
459
  },
460
  {
461
  "epoch": 1.8694362017804154,
462
+ "grad_norm": 0.6810668706893921,
463
  "learning_rate": 1.301775147928994e-06,
464
+ "loss": 0.2155,
465
  "step": 630
466
  },
467
  {
468
  "epoch": 1.8991097922848663,
469
+ "grad_norm": 0.34031471610069275,
470
  "learning_rate": 1.0059171597633138e-06,
471
+ "loss": 0.1749,
472
  "step": 640
473
  },
474
  {
475
  "epoch": 1.9287833827893175,
476
+ "grad_norm": 146.79493713378906,
477
  "learning_rate": 7.100591715976332e-07,
478
+ "loss": 0.4689,
479
  "step": 650
480
  },
481
  {
482
  "epoch": 1.9584569732937687,
483
+ "grad_norm": 41.91508102416992,
484
  "learning_rate": 4.1420118343195276e-07,
485
+ "loss": 0.6593,
486
  "step": 660
487
  },
488
  {
489
  "epoch": 1.9881305637982196,
490
+ "grad_norm": 1.5943464040756226,
491
  "learning_rate": 1.183431952662722e-07,
492
+ "loss": 0.5305,
493
  "step": 670
494
  },
495
  {
496
  "epoch": 2.0,
497
+ "eval_accuracy": 0.8155555555555556,
498
+ "eval_f1": 0.8142631998503326,
499
+ "eval_f1_macro": 0.7531422918979267,
500
+ "eval_loss": 1.0042973756790161,
501
+ "eval_precision": 0.8133816425120772,
502
+ "eval_precision_macro": 0.7608695652173912,
503
+ "eval_recall": 0.8155555555555556,
504
+ "eval_recall_macro": 0.7461026837321091,
505
+ "eval_runtime": 246.0963,
506
+ "eval_samples_per_second": 1.829,
507
+ "eval_steps_per_second": 0.459,
508
  "step": 674
509
  }
510
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd80fd639c367555a09626bec7fc4ef19dd64a7e668c299d8a2899cbbb3fd16e
3
  size 439039996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7748177c8395df0da39e54a0663c3615146760c6059125dc394291026d6663e5
3
  size 439039996