Ben10x commited on
Commit
7037635
·
verified ·
1 Parent(s): 3cd5cc8

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,8 @@ license: mit
4
  base_model: EleutherAI/gpt-neo-1.3B
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - precision
9
  - recall
@@ -11,7 +13,26 @@ metrics:
11
  - accuracy
12
  model-index:
13
  - name: gpt-medmentions
14
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ---
16
 
17
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -19,13 +40,13 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # gpt-medmentions
21
 
22
- This model is a fine-tuned version of [EleutherAI/gpt-neo-1.3B](https://huggingface.co/EleutherAI/gpt-neo-1.3B) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.0903
25
- - Precision: 0.4758
26
- - Recall: 0.5181
27
- - F1: 0.4960
28
- - Accuracy: 0.8518
29
 
30
  ## Model description
31
 
 
4
  base_model: EleutherAI/gpt-neo-1.3B
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - Ben10x/MedMentions-MTI881-NER
9
  metrics:
10
  - precision
11
  - recall
 
13
  - accuracy
14
  model-index:
15
  - name: gpt-medmentions
16
+ results:
17
+ - task:
18
+ name: Token Classification
19
+ type: token-classification
20
+ dataset:
21
+ name: Ben10x/MedMentions-MTI881-NER
22
+ type: Ben10x/MedMentions-MTI881-NER
23
+ metrics:
24
+ - name: Precision
25
+ type: precision
26
+ value: 0.4453316069630269
27
+ - name: Recall
28
+ type: recall
29
+ value: 0.5247499576199356
30
+ - name: F1
31
+ type: f1
32
+ value: 0.48178988326848243
33
+ - name: Accuracy
34
+ type: accuracy
35
+ value: 0.8454107464662687
36
  ---
37
 
38
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
40
 
41
  # gpt-medmentions
42
 
43
+ This model is a fine-tuned version of [EleutherAI/gpt-neo-1.3B](https://huggingface.co/EleutherAI/gpt-neo-1.3B) on the Ben10x/MedMentions-MTI881-NER dataset.
44
  It achieves the following results on the evaluation set:
45
+ - Loss: 0.5111
46
+ - Precision: 0.4453
47
+ - Recall: 0.5247
48
+ - F1: 0.4818
49
+ - Accuracy: 0.8454
50
 
51
  ## Model description
52
 
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.8431182650696939,
4
- "eval_f1": 0.48405930705999844,
5
- "eval_loss": 0.5186361074447632,
6
- "eval_precision": 0.44261028378758077,
7
- "eval_recall": 0.5340735717918291,
8
- "eval_runtime": 20.2401,
9
  "eval_samples": 2924,
10
- "eval_samples_per_second": 144.466,
11
- "eval_steps_per_second": 18.083,
12
- "predict_accuracy": 0.84832,
13
- "predict_f1": 0.49048134963627926,
14
- "predict_loss": 0.5068721771240234,
15
- "predict_precision": 0.45042996233387816,
16
- "predict_recall": 0.5383504629236389,
17
- "predict_runtime": 19.7633,
18
- "predict_samples_per_second": 148.052,
19
- "predict_steps_per_second": 18.519,
20
  "total_flos": 5.182622875540416e+16,
21
- "train_loss": 0.25193174704527244,
22
- "train_runtime": 8708.7353,
23
  "train_samples": 23399,
24
- "train_samples_per_second": 13.434,
25
- "train_steps_per_second": 3.359
26
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.8454107464662687,
4
+ "eval_f1": 0.48178988326848243,
5
+ "eval_loss": 0.5111122131347656,
6
+ "eval_precision": 0.4453316069630269,
7
+ "eval_recall": 0.5247499576199356,
8
+ "eval_runtime": 20.2472,
9
  "eval_samples": 2924,
10
+ "eval_samples_per_second": 144.415,
11
+ "eval_steps_per_second": 18.077,
12
+ "predict_accuracy": 0.8501538461538461,
13
+ "predict_f1": 0.48921537980618945,
14
+ "predict_loss": 0.5000657439231873,
15
+ "predict_precision": 0.4529994934510457,
16
+ "predict_recall": 0.5317251337806846,
17
+ "predict_runtime": 19.8324,
18
+ "predict_samples_per_second": 147.537,
19
+ "predict_steps_per_second": 18.455,
20
  "total_flos": 5.182622875540416e+16,
21
+ "train_loss": 0.2549698821499816,
22
+ "train_runtime": 7489.8753,
23
  "train_samples": 23399,
24
+ "train_samples_per_second": 15.62,
25
+ "train_steps_per_second": 3.905
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.8431182650696939,
4
- "eval_f1": 0.48405930705999844,
5
- "eval_loss": 0.5186361074447632,
6
- "eval_precision": 0.44261028378758077,
7
- "eval_recall": 0.5340735717918291,
8
- "eval_runtime": 20.2401,
9
  "eval_samples": 2924,
10
- "eval_samples_per_second": 144.466,
11
- "eval_steps_per_second": 18.083
12
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.8454107464662687,
4
+ "eval_f1": 0.48178988326848243,
5
+ "eval_loss": 0.5111122131347656,
6
+ "eval_precision": 0.4453316069630269,
7
+ "eval_recall": 0.5247499576199356,
8
+ "eval_runtime": 20.2472,
9
  "eval_samples": 2924,
10
+ "eval_samples_per_second": 144.415,
11
+ "eval_steps_per_second": 18.077
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.84832,
3
- "predict_f1": 0.49048134963627926,
4
- "predict_loss": 0.5068721771240234,
5
- "predict_precision": 0.45042996233387816,
6
- "predict_recall": 0.5383504629236389,
7
- "predict_runtime": 19.7633,
8
- "predict_samples_per_second": 148.052,
9
- "predict_steps_per_second": 18.519
10
  }
 
1
  {
2
+ "predict_accuracy": 0.8501538461538461,
3
+ "predict_f1": 0.48921537980618945,
4
+ "predict_loss": 0.5000657439231873,
5
+ "predict_precision": 0.4529994934510457,
6
+ "predict_recall": 0.5317251337806846,
7
+ "predict_runtime": 19.8324,
8
+ "predict_samples_per_second": 147.537,
9
+ "predict_steps_per_second": 18.455
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 5.182622875540416e+16,
4
- "train_loss": 0.25193174704527244,
5
- "train_runtime": 8708.7353,
6
  "train_samples": 23399,
7
- "train_samples_per_second": 13.434,
8
- "train_steps_per_second": 3.359
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 5.182622875540416e+16,
4
+ "train_loss": 0.2549698821499816,
5
+ "train_runtime": 7489.8753,
6
  "train_samples": 23399,
7
+ "train_samples_per_second": 15.62,
8
+ "train_steps_per_second": 3.905
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 11700,
3
- "best_metric": 0.5186361074447632,
4
  "best_model_checkpoint": "./output/gpt-medmentions/checkpoint-11700",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
@@ -11,478 +11,478 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08547008547008547,
14
- "grad_norm": 8.512005805969238,
15
  "learning_rate": 4.9153846153846157e-05,
16
- "loss": 0.9503,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.17094017094017094,
21
- "grad_norm": 3.139110803604126,
22
  "learning_rate": 4.829914529914531e-05,
23
- "loss": 0.6673,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2564102564102564,
28
- "grad_norm": 4.947507381439209,
29
  "learning_rate": 4.7444444444444445e-05,
30
- "loss": 0.6468,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3418803418803419,
35
- "grad_norm": 5.252842426300049,
36
  "learning_rate": 4.658974358974359e-05,
37
- "loss": 0.6059,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.42735042735042733,
42
- "grad_norm": 2.6649463176727295,
43
- "learning_rate": 4.5736752136752135e-05,
44
- "loss": 0.5897,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5128205128205128,
49
- "grad_norm": 4.974217414855957,
50
- "learning_rate": 4.4882051282051286e-05,
51
- "loss": 0.5861,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.5982905982905983,
56
- "grad_norm": 3.8443071842193604,
57
- "learning_rate": 4.402735042735043e-05,
58
- "loss": 0.5629,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.6837606837606838,
63
- "grad_norm": 5.033857345581055,
64
- "learning_rate": 4.3172649572649575e-05,
65
- "loss": 0.5581,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.7692307692307693,
70
- "grad_norm": 4.114415168762207,
71
- "learning_rate": 4.231794871794872e-05,
72
- "loss": 0.5432,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.8547008547008547,
77
- "grad_norm": 5.629084587097168,
78
- "learning_rate": 4.146324786324787e-05,
79
- "loss": 0.5461,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 0.9401709401709402,
84
- "grad_norm": 5.255064010620117,
85
- "learning_rate": 4.060854700854701e-05,
86
- "loss": 0.5309,
87
  "step": 5500
88
  },
89
  {
90
  "epoch": 1.0,
91
- "eval_accuracy": 0.8329798580377831,
92
- "eval_f1": 0.4380611839491458,
93
- "eval_loss": 0.5357780456542969,
94
- "eval_precision": 0.41227938976966794,
95
- "eval_recall": 0.4672825902695372,
96
- "eval_runtime": 35.3911,
97
- "eval_samples_per_second": 82.62,
98
- "eval_steps_per_second": 10.342,
99
  "step": 5850
100
  },
101
  {
102
  "epoch": 1.0256410256410255,
103
- "grad_norm": 2.4081554412841797,
104
- "learning_rate": 3.975384615384616e-05,
105
- "loss": 0.4705,
106
  "step": 6000
107
  },
108
  {
109
  "epoch": 1.1111111111111112,
110
- "grad_norm": 2.1898910999298096,
111
- "learning_rate": 3.88991452991453e-05,
112
- "loss": 0.3737,
113
  "step": 6500
114
  },
115
  {
116
  "epoch": 1.1965811965811965,
117
- "grad_norm": 2.9521656036376953,
118
- "learning_rate": 3.804444444444445e-05,
119
- "loss": 0.3675,
120
  "step": 7000
121
  },
122
  {
123
  "epoch": 1.282051282051282,
124
- "grad_norm": 4.8203840255737305,
125
- "learning_rate": 3.719145299145299e-05,
126
- "loss": 0.3916,
127
  "step": 7500
128
  },
129
  {
130
  "epoch": 1.3675213675213675,
131
- "grad_norm": 2.337897777557373,
132
- "learning_rate": 3.633675213675214e-05,
133
- "loss": 0.3648,
134
  "step": 8000
135
  },
136
  {
137
  "epoch": 1.452991452991453,
138
- "grad_norm": 4.426022529602051,
139
- "learning_rate": 3.548205128205128e-05,
140
- "loss": 0.3719,
141
  "step": 8500
142
  },
143
  {
144
  "epoch": 1.5384615384615383,
145
- "grad_norm": 2.349121570587158,
146
- "learning_rate": 3.462735042735043e-05,
147
- "loss": 0.3633,
148
  "step": 9000
149
  },
150
  {
151
  "epoch": 1.623931623931624,
152
- "grad_norm": 3.221034526824951,
153
- "learning_rate": 3.377264957264957e-05,
154
- "loss": 0.3642,
155
  "step": 9500
156
  },
157
  {
158
  "epoch": 1.7094017094017095,
159
- "grad_norm": 4.463802337646484,
160
- "learning_rate": 3.291794871794872e-05,
161
- "loss": 0.3742,
162
  "step": 10000
163
  },
164
  {
165
  "epoch": 1.7948717948717947,
166
- "grad_norm": 2.482924222946167,
167
- "learning_rate": 3.2063247863247865e-05,
168
- "loss": 0.3496,
169
  "step": 10500
170
  },
171
  {
172
  "epoch": 1.8803418803418803,
173
- "grad_norm": 3.6561119556427,
174
- "learning_rate": 3.120854700854701e-05,
175
- "loss": 0.3638,
176
  "step": 11000
177
  },
178
  {
179
  "epoch": 1.965811965811966,
180
- "grad_norm": 5.051749229431152,
181
- "learning_rate": 3.035555555555556e-05,
182
- "loss": 0.3521,
183
  "step": 11500
184
  },
185
  {
186
  "epoch": 2.0,
187
- "eval_accuracy": 0.8431182650696939,
188
- "eval_f1": 0.48405930705999844,
189
- "eval_loss": 0.5186361074447632,
190
- "eval_precision": 0.44261028378758077,
191
- "eval_recall": 0.5340735717918291,
192
- "eval_runtime": 20.534,
193
- "eval_samples_per_second": 142.398,
194
- "eval_steps_per_second": 17.824,
195
  "step": 11700
196
  },
197
  {
198
  "epoch": 2.051282051282051,
199
- "grad_norm": 2.8310258388519287,
200
- "learning_rate": 2.9500854700854703e-05,
201
- "loss": 0.2492,
202
  "step": 12000
203
  },
204
  {
205
  "epoch": 2.1367521367521367,
206
- "grad_norm": 4.582665920257568,
207
- "learning_rate": 2.8646153846153844e-05,
208
- "loss": 0.1745,
209
  "step": 12500
210
  },
211
  {
212
  "epoch": 2.2222222222222223,
213
- "grad_norm": 3.4961071014404297,
214
- "learning_rate": 2.779145299145299e-05,
215
- "loss": 0.1652,
216
  "step": 13000
217
  },
218
  {
219
  "epoch": 2.3076923076923075,
220
- "grad_norm": 3.6112935543060303,
221
- "learning_rate": 2.6936752136752136e-05,
222
- "loss": 0.1778,
223
  "step": 13500
224
  },
225
  {
226
  "epoch": 2.393162393162393,
227
- "grad_norm": 3.437180280685425,
228
- "learning_rate": 2.6082051282051283e-05,
229
- "loss": 0.1736,
230
  "step": 14000
231
  },
232
  {
233
  "epoch": 2.4786324786324787,
234
- "grad_norm": 4.227977275848389,
235
- "learning_rate": 2.5227350427350428e-05,
236
- "loss": 0.1761,
237
  "step": 14500
238
  },
239
  {
240
  "epoch": 2.564102564102564,
241
- "grad_norm": 4.455820083618164,
242
- "learning_rate": 2.4372649572649575e-05,
243
- "loss": 0.1669,
244
  "step": 15000
245
  },
246
  {
247
  "epoch": 2.6495726495726495,
248
- "grad_norm": 1.813612461090088,
249
- "learning_rate": 2.3519658119658118e-05,
250
- "loss": 0.1684,
251
  "step": 15500
252
  },
253
  {
254
  "epoch": 2.735042735042735,
255
- "grad_norm": 3.37239146232605,
256
  "learning_rate": 2.2666666666666668e-05,
257
- "loss": 0.1814,
258
  "step": 16000
259
  },
260
  {
261
  "epoch": 2.8205128205128203,
262
- "grad_norm": 3.2784674167633057,
263
  "learning_rate": 2.1811965811965812e-05,
264
- "loss": 0.1666,
265
  "step": 16500
266
  },
267
  {
268
  "epoch": 2.905982905982906,
269
- "grad_norm": 1.3887425661087036,
270
- "learning_rate": 2.0957264957264956e-05,
271
- "loss": 0.1607,
272
  "step": 17000
273
  },
274
  {
275
  "epoch": 2.9914529914529915,
276
- "grad_norm": 1.3888640403747559,
277
- "learning_rate": 2.0102564102564104e-05,
278
- "loss": 0.1695,
279
  "step": 17500
280
  },
281
  {
282
  "epoch": 3.0,
283
- "eval_accuracy": 0.8485245982027927,
284
- "eval_f1": 0.476198409062513,
285
- "eval_loss": 0.6351242661476135,
286
- "eval_precision": 0.46810775403258825,
287
- "eval_recall": 0.48457365655195794,
288
- "eval_runtime": 20.5093,
289
- "eval_samples_per_second": 142.569,
290
- "eval_steps_per_second": 17.846,
291
  "step": 17550
292
  },
293
  {
294
  "epoch": 3.076923076923077,
295
- "grad_norm": 3.210146427154541,
296
- "learning_rate": 1.9247863247863248e-05,
297
- "loss": 0.0761,
298
  "step": 18000
299
  },
300
  {
301
  "epoch": 3.1623931623931623,
302
- "grad_norm": 1.787926197052002,
303
- "learning_rate": 1.8393162393162395e-05,
304
- "loss": 0.0751,
305
  "step": 18500
306
  },
307
  {
308
  "epoch": 3.247863247863248,
309
- "grad_norm": 5.146090984344482,
310
- "learning_rate": 1.753846153846154e-05,
311
- "loss": 0.072,
312
  "step": 19000
313
  },
314
  {
315
  "epoch": 3.3333333333333335,
316
- "grad_norm": 1.8365877866744995,
317
- "learning_rate": 1.6683760683760684e-05,
318
- "loss": 0.0766,
319
  "step": 19500
320
  },
321
  {
322
  "epoch": 3.4188034188034186,
323
- "grad_norm": 2.014242172241211,
324
- "learning_rate": 1.582905982905983e-05,
325
- "loss": 0.0732,
326
  "step": 20000
327
  },
328
  {
329
  "epoch": 3.5042735042735043,
330
- "grad_norm": 1.4598703384399414,
331
- "learning_rate": 1.4974358974358976e-05,
332
- "loss": 0.074,
333
  "step": 20500
334
  },
335
  {
336
  "epoch": 3.58974358974359,
337
- "grad_norm": 3.3222851753234863,
338
- "learning_rate": 1.4119658119658118e-05,
339
- "loss": 0.075,
340
  "step": 21000
341
  },
342
  {
343
  "epoch": 3.6752136752136755,
344
- "grad_norm": 1.5937930345535278,
345
- "learning_rate": 1.3266666666666666e-05,
346
- "loss": 0.0747,
347
  "step": 21500
348
  },
349
  {
350
  "epoch": 3.7606837606837606,
351
- "grad_norm": 0.7858961224555969,
352
- "learning_rate": 1.2411965811965814e-05,
353
- "loss": 0.07,
354
  "step": 22000
355
  },
356
  {
357
  "epoch": 3.8461538461538463,
358
- "grad_norm": 2.8545825481414795,
359
- "learning_rate": 1.1557264957264958e-05,
360
- "loss": 0.0695,
361
  "step": 22500
362
  },
363
  {
364
  "epoch": 3.931623931623932,
365
- "grad_norm": 3.5758216381073,
366
- "learning_rate": 1.0702564102564102e-05,
367
- "loss": 0.0711,
368
  "step": 23000
369
  },
370
  {
371
  "epoch": 4.0,
372
- "eval_accuracy": 0.8497750426009244,
373
- "eval_f1": 0.49261043851479935,
374
- "eval_loss": 0.9213722944259644,
375
- "eval_precision": 0.4715869447243972,
376
- "eval_recall": 0.5155958637057129,
377
- "eval_runtime": 20.5388,
378
- "eval_samples_per_second": 142.365,
379
- "eval_steps_per_second": 17.82,
380
  "step": 23400
381
  },
382
  {
383
  "epoch": 4.017094017094017,
384
- "grad_norm": 0.3363385498523712,
385
- "learning_rate": 9.847863247863248e-06,
386
- "loss": 0.0658,
387
  "step": 23500
388
  },
389
  {
390
  "epoch": 4.102564102564102,
391
- "grad_norm": 0.3475494980812073,
392
- "learning_rate": 8.993162393162394e-06,
393
- "loss": 0.0363,
394
  "step": 24000
395
  },
396
  {
397
  "epoch": 4.188034188034188,
398
- "grad_norm": 0.8006152510643005,
399
- "learning_rate": 8.13846153846154e-06,
400
- "loss": 0.0392,
401
  "step": 24500
402
  },
403
  {
404
  "epoch": 4.273504273504273,
405
- "grad_norm": 1.5949403047561646,
406
- "learning_rate": 7.283760683760685e-06,
407
- "loss": 0.0388,
408
  "step": 25000
409
  },
410
  {
411
  "epoch": 4.358974358974359,
412
- "grad_norm": 0.0918673723936081,
413
- "learning_rate": 6.429059829059829e-06,
414
- "loss": 0.0418,
415
  "step": 25500
416
  },
417
  {
418
  "epoch": 4.444444444444445,
419
- "grad_norm": 2.251410961151123,
420
- "learning_rate": 5.576068376068376e-06,
421
- "loss": 0.0388,
422
  "step": 26000
423
  },
424
  {
425
  "epoch": 4.52991452991453,
426
- "grad_norm": 1.2202078104019165,
427
- "learning_rate": 4.721367521367521e-06,
428
- "loss": 0.0399,
429
  "step": 26500
430
  },
431
  {
432
  "epoch": 4.615384615384615,
433
- "grad_norm": 0.54653000831604,
434
- "learning_rate": 3.866666666666667e-06,
435
- "loss": 0.0374,
436
  "step": 27000
437
  },
438
  {
439
  "epoch": 4.700854700854701,
440
- "grad_norm": 0.7960435152053833,
441
- "learning_rate": 3.013675213675214e-06,
442
- "loss": 0.0389,
443
  "step": 27500
444
  },
445
  {
446
  "epoch": 4.786324786324786,
447
- "grad_norm": 0.8028801679611206,
448
- "learning_rate": 2.158974358974359e-06,
449
- "loss": 0.0367,
450
  "step": 28000
451
  },
452
  {
453
  "epoch": 4.871794871794872,
454
- "grad_norm": 0.4618303179740906,
455
- "learning_rate": 1.3042735042735044e-06,
456
- "loss": 0.0368,
457
  "step": 28500
458
  },
459
  {
460
  "epoch": 4.957264957264957,
461
- "grad_norm": 0.23023363947868347,
462
- "learning_rate": 4.495726495726496e-07,
463
- "loss": 0.0384,
464
  "step": 29000
465
  },
466
  {
467
  "epoch": 5.0,
468
- "eval_accuracy": 0.8505718944232632,
469
- "eval_f1": 0.4937960213172776,
470
- "eval_loss": 1.1010900735855103,
471
- "eval_precision": 0.47477118047406713,
472
- "eval_recall": 0.5144092219020173,
473
- "eval_runtime": 20.7237,
474
- "eval_samples_per_second": 141.094,
475
- "eval_steps_per_second": 17.661,
476
  "step": 29250
477
  },
478
  {
479
  "epoch": 5.0,
480
  "step": 29250,
481
  "total_flos": 5.182622875540416e+16,
482
- "train_loss": 0.25193174704527244,
483
- "train_runtime": 8708.7353,
484
- "train_samples_per_second": 13.434,
485
- "train_steps_per_second": 3.359
486
  }
487
  ],
488
  "logging_steps": 500,
 
1
  {
2
  "best_global_step": 11700,
3
+ "best_metric": 0.5111122131347656,
4
  "best_model_checkpoint": "./output/gpt-medmentions/checkpoint-11700",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08547008547008547,
14
+ "grad_norm": 7.1186957359313965,
15
  "learning_rate": 4.9153846153846157e-05,
16
+ "loss": 0.9374,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.17094017094017094,
21
+ "grad_norm": 2.6842756271362305,
22
  "learning_rate": 4.829914529914531e-05,
23
+ "loss": 0.6708,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2564102564102564,
28
+ "grad_norm": 4.874898433685303,
29
  "learning_rate": 4.7444444444444445e-05,
30
+ "loss": 0.6453,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3418803418803419,
35
+ "grad_norm": 5.255520820617676,
36
  "learning_rate": 4.658974358974359e-05,
37
+ "loss": 0.6121,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.42735042735042733,
42
+ "grad_norm": 2.5840187072753906,
43
+ "learning_rate": 4.573504273504274e-05,
44
+ "loss": 0.5936,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5128205128205128,
49
+ "grad_norm": 5.246805667877197,
50
+ "learning_rate": 4.488547008547009e-05,
51
+ "loss": 0.5943,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.5982905982905983,
56
+ "grad_norm": 3.491377592086792,
57
+ "learning_rate": 4.4032478632478637e-05,
58
+ "loss": 0.5663,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.6837606837606838,
63
+ "grad_norm": 4.0636210441589355,
64
+ "learning_rate": 4.317777777777778e-05,
65
+ "loss": 0.5611,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.7692307692307693,
70
+ "grad_norm": 4.316960334777832,
71
+ "learning_rate": 4.2323076923076925e-05,
72
+ "loss": 0.5449,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.8547008547008547,
77
+ "grad_norm": 4.260451316833496,
78
+ "learning_rate": 4.146837606837607e-05,
79
+ "loss": 0.5481,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 0.9401709401709402,
84
+ "grad_norm": 5.917566299438477,
85
+ "learning_rate": 4.061367521367522e-05,
86
+ "loss": 0.5307,
87
  "step": 5500
88
  },
89
  {
90
  "epoch": 1.0,
91
+ "eval_accuracy": 0.8341444876242783,
92
+ "eval_f1": 0.44006334125098967,
93
+ "eval_loss": 0.5368949770927429,
94
+ "eval_precision": 0.41286584459961373,
95
+ "eval_recall": 0.47109679606713,
96
+ "eval_runtime": 20.612,
97
+ "eval_samples_per_second": 141.859,
98
+ "eval_steps_per_second": 17.757,
99
  "step": 5850
100
  },
101
  {
102
  "epoch": 1.0256410256410255,
103
+ "grad_norm": 3.265369415283203,
104
+ "learning_rate": 3.975897435897436e-05,
105
+ "loss": 0.4748,
106
  "step": 6000
107
  },
108
  {
109
  "epoch": 1.1111111111111112,
110
+ "grad_norm": 3.2452337741851807,
111
+ "learning_rate": 3.890427350427351e-05,
112
+ "loss": 0.3767,
113
  "step": 6500
114
  },
115
  {
116
  "epoch": 1.1965811965811965,
117
+ "grad_norm": 2.8009512424468994,
118
+ "learning_rate": 3.804957264957265e-05,
119
+ "loss": 0.3761,
120
  "step": 7000
121
  },
122
  {
123
  "epoch": 1.282051282051282,
124
+ "grad_norm": 5.858109951019287,
125
+ "learning_rate": 3.71948717948718e-05,
126
+ "loss": 0.3922,
127
  "step": 7500
128
  },
129
  {
130
  "epoch": 1.3675213675213675,
131
+ "grad_norm": 1.7275584936141968,
132
+ "learning_rate": 3.634017094017094e-05,
133
+ "loss": 0.3677,
134
  "step": 8000
135
  },
136
  {
137
  "epoch": 1.452991452991453,
138
+ "grad_norm": 4.6104044914245605,
139
+ "learning_rate": 3.5485470085470085e-05,
140
+ "loss": 0.3746,
141
  "step": 8500
142
  },
143
  {
144
  "epoch": 1.5384615384615383,
145
+ "grad_norm": 2.6036839485168457,
146
+ "learning_rate": 3.4630769230769236e-05,
147
+ "loss": 0.3661,
148
  "step": 9000
149
  },
150
  {
151
  "epoch": 1.623931623931624,
152
+ "grad_norm": 2.7406065464019775,
153
+ "learning_rate": 3.3776068376068374e-05,
154
+ "loss": 0.3712,
155
  "step": 9500
156
  },
157
  {
158
  "epoch": 1.7094017094017095,
159
+ "grad_norm": 5.254650115966797,
160
+ "learning_rate": 3.2921367521367525e-05,
161
+ "loss": 0.3774,
162
  "step": 10000
163
  },
164
  {
165
  "epoch": 1.7948717948717947,
166
+ "grad_norm": 2.274414539337158,
167
+ "learning_rate": 3.206666666666667e-05,
168
+ "loss": 0.3541,
169
  "step": 10500
170
  },
171
  {
172
  "epoch": 1.8803418803418803,
173
+ "grad_norm": 3.5981504917144775,
174
+ "learning_rate": 3.121196581196581e-05,
175
+ "loss": 0.3694,
176
  "step": 11000
177
  },
178
  {
179
  "epoch": 1.965811965811966,
180
+ "grad_norm": 3.6442151069641113,
181
+ "learning_rate": 3.0357264957264958e-05,
182
+ "loss": 0.3585,
183
  "step": 11500
184
  },
185
  {
186
  "epoch": 2.0,
187
+ "eval_accuracy": 0.8454107464662687,
188
+ "eval_f1": 0.48178988326848243,
189
+ "eval_loss": 0.5111122131347656,
190
+ "eval_precision": 0.4453316069630269,
191
+ "eval_recall": 0.5247499576199356,
192
+ "eval_runtime": 20.6467,
193
+ "eval_samples_per_second": 141.621,
194
+ "eval_steps_per_second": 17.727,
195
  "step": 11700
196
  },
197
  {
198
  "epoch": 2.051282051282051,
199
+ "grad_norm": 3.4292666912078857,
200
+ "learning_rate": 2.9502564102564105e-05,
201
+ "loss": 0.2559,
202
  "step": 12000
203
  },
204
  {
205
  "epoch": 2.1367521367521367,
206
+ "grad_norm": 6.138054370880127,
207
+ "learning_rate": 2.864786324786325e-05,
208
+ "loss": 0.1817,
209
  "step": 12500
210
  },
211
  {
212
  "epoch": 2.2222222222222223,
213
+ "grad_norm": 3.659104347229004,
214
+ "learning_rate": 2.7793162393162394e-05,
215
+ "loss": 0.1759,
216
  "step": 13000
217
  },
218
  {
219
  "epoch": 2.3076923076923075,
220
+ "grad_norm": 5.470319747924805,
221
+ "learning_rate": 2.693846153846154e-05,
222
+ "loss": 0.184,
223
  "step": 13500
224
  },
225
  {
226
  "epoch": 2.393162393162393,
227
+ "grad_norm": 2.9865291118621826,
228
+ "learning_rate": 2.6083760683760682e-05,
229
+ "loss": 0.1761,
230
  "step": 14000
231
  },
232
  {
233
  "epoch": 2.4786324786324787,
234
+ "grad_norm": 6.152403831481934,
235
+ "learning_rate": 2.522905982905983e-05,
236
+ "loss": 0.1798,
237
  "step": 14500
238
  },
239
  {
240
  "epoch": 2.564102564102564,
241
+ "grad_norm": 4.3192338943481445,
242
+ "learning_rate": 2.4374358974358977e-05,
243
+ "loss": 0.1757,
244
  "step": 15000
245
  },
246
  {
247
  "epoch": 2.6495726495726495,
248
+ "grad_norm": 3.217804193496704,
249
+ "learning_rate": 2.3521367521367523e-05,
250
+ "loss": 0.1738,
251
  "step": 15500
252
  },
253
  {
254
  "epoch": 2.735042735042735,
255
+ "grad_norm": 3.670557737350464,
256
  "learning_rate": 2.2666666666666668e-05,
257
+ "loss": 0.1861,
258
  "step": 16000
259
  },
260
  {
261
  "epoch": 2.8205128205128203,
262
+ "grad_norm": 2.3006069660186768,
263
  "learning_rate": 2.1811965811965812e-05,
264
+ "loss": 0.1705,
265
  "step": 16500
266
  },
267
  {
268
  "epoch": 2.905982905982906,
269
+ "grad_norm": 1.9008346796035767,
270
+ "learning_rate": 2.0958974358974358e-05,
271
+ "loss": 0.1672,
272
  "step": 17000
273
  },
274
  {
275
  "epoch": 2.9914529914529915,
276
+ "grad_norm": 1.8553671836853027,
277
+ "learning_rate": 2.0104273504273506e-05,
278
+ "loss": 0.1758,
279
  "step": 17500
280
  },
281
  {
282
  "epoch": 3.0,
283
+ "eval_accuracy": 0.8497137463068983,
284
+ "eval_f1": 0.48074844074844075,
285
+ "eval_loss": 0.6349462270736694,
286
+ "eval_precision": 0.4718413320274241,
287
+ "eval_recall": 0.4899983047974233,
288
+ "eval_runtime": 20.6084,
289
+ "eval_samples_per_second": 141.884,
290
+ "eval_steps_per_second": 17.76,
291
  "step": 17550
292
  },
293
  {
294
  "epoch": 3.076923076923077,
295
+ "grad_norm": 1.145456314086914,
296
+ "learning_rate": 1.924957264957265e-05,
297
+ "loss": 0.0777,
298
  "step": 18000
299
  },
300
  {
301
  "epoch": 3.1623931623931623,
302
+ "grad_norm": 2.4131710529327393,
303
+ "learning_rate": 1.8394871794871797e-05,
304
+ "loss": 0.0769,
305
  "step": 18500
306
  },
307
  {
308
  "epoch": 3.247863247863248,
309
+ "grad_norm": 2.5216588973999023,
310
+ "learning_rate": 1.754017094017094e-05,
311
+ "loss": 0.0779,
312
  "step": 19000
313
  },
314
  {
315
  "epoch": 3.3333333333333335,
316
+ "grad_norm": 2.811354160308838,
317
+ "learning_rate": 1.6685470085470086e-05,
318
+ "loss": 0.0804,
319
  "step": 19500
320
  },
321
  {
322
  "epoch": 3.4188034188034186,
323
+ "grad_norm": 0.5833438634872437,
324
+ "learning_rate": 1.5830769230769233e-05,
325
+ "loss": 0.0772,
326
  "step": 20000
327
  },
328
  {
329
  "epoch": 3.5042735042735043,
330
+ "grad_norm": 3.458584785461426,
331
+ "learning_rate": 1.4976068376068378e-05,
332
+ "loss": 0.0751,
333
  "step": 20500
334
  },
335
  {
336
  "epoch": 3.58974358974359,
337
+ "grad_norm": 0.8929054141044617,
338
+ "learning_rate": 1.4121367521367524e-05,
339
+ "loss": 0.0761,
340
  "step": 21000
341
  },
342
  {
343
  "epoch": 3.6752136752136755,
344
+ "grad_norm": 5.908766269683838,
345
+ "learning_rate": 1.3268376068376068e-05,
346
+ "loss": 0.0736,
347
  "step": 21500
348
  },
349
  {
350
  "epoch": 3.7606837606837606,
351
+ "grad_norm": 1.0228583812713623,
352
+ "learning_rate": 1.2413675213675214e-05,
353
+ "loss": 0.0716,
354
  "step": 22000
355
  },
356
  {
357
  "epoch": 3.8461538461538463,
358
+ "grad_norm": 0.626966118812561,
359
+ "learning_rate": 1.1560683760683762e-05,
360
+ "loss": 0.0718,
361
  "step": 22500
362
  },
363
  {
364
  "epoch": 3.931623931623932,
365
+ "grad_norm": 1.9417258501052856,
366
+ "learning_rate": 1.0705982905982906e-05,
367
+ "loss": 0.0751,
368
  "step": 23000
369
  },
370
  {
371
  "epoch": 4.0,
372
+ "eval_accuracy": 0.8496892277892879,
373
+ "eval_f1": 0.49008894029434047,
374
+ "eval_loss": 0.9264360070228577,
375
+ "eval_precision": 0.46282485875706214,
376
+ "eval_recall": 0.5207662315646719,
377
+ "eval_runtime": 20.6775,
378
+ "eval_samples_per_second": 141.41,
379
+ "eval_steps_per_second": 17.7,
380
  "step": 23400
381
  },
382
  {
383
  "epoch": 4.017094017094017,
384
+ "grad_norm": 0.14389610290527344,
385
+ "learning_rate": 9.851282051282052e-06,
386
+ "loss": 0.0641,
387
  "step": 23500
388
  },
389
  {
390
  "epoch": 4.102564102564102,
391
+ "grad_norm": 0.6148084402084351,
392
+ "learning_rate": 8.996581196581196e-06,
393
+ "loss": 0.0352,
394
  "step": 24000
395
  },
396
  {
397
  "epoch": 4.188034188034188,
398
+ "grad_norm": 1.3859856128692627,
399
+ "learning_rate": 8.141880341880342e-06,
400
+ "loss": 0.0399,
401
  "step": 24500
402
  },
403
  {
404
  "epoch": 4.273504273504273,
405
+ "grad_norm": 1.6096197366714478,
406
+ "learning_rate": 7.287179487179488e-06,
407
+ "loss": 0.0385,
408
  "step": 25000
409
  },
410
  {
411
  "epoch": 4.358974358974359,
412
+ "grad_norm": 0.26734989881515503,
413
+ "learning_rate": 6.432478632478633e-06,
414
+ "loss": 0.0411,
415
  "step": 25500
416
  },
417
  {
418
  "epoch": 4.444444444444445,
419
+ "grad_norm": 1.3472919464111328,
420
+ "learning_rate": 5.577777777777778e-06,
421
+ "loss": 0.0395,
422
  "step": 26000
423
  },
424
  {
425
  "epoch": 4.52991452991453,
426
+ "grad_norm": 0.8369725942611694,
427
+ "learning_rate": 4.723076923076923e-06,
428
+ "loss": 0.0432,
429
  "step": 26500
430
  },
431
  {
432
  "epoch": 4.615384615384615,
433
+ "grad_norm": 0.7225199341773987,
434
+ "learning_rate": 3.87008547008547e-06,
435
+ "loss": 0.0412,
436
  "step": 27000
437
  },
438
  {
439
  "epoch": 4.700854700854701,
440
+ "grad_norm": 0.6592767834663391,
441
+ "learning_rate": 3.0153846153846154e-06,
442
+ "loss": 0.0407,
443
  "step": 27500
444
  },
445
  {
446
  "epoch": 4.786324786324786,
447
+ "grad_norm": 0.9876635670661926,
448
+ "learning_rate": 2.160683760683761e-06,
449
+ "loss": 0.0372,
450
  "step": 28000
451
  },
452
  {
453
  "epoch": 4.871794871794872,
454
+ "grad_norm": 0.7053186297416687,
455
+ "learning_rate": 1.3059829059829061e-06,
456
+ "loss": 0.0384,
457
  "step": 28500
458
  },
459
  {
460
  "epoch": 4.957264957264957,
461
+ "grad_norm": 0.24596278369426727,
462
+ "learning_rate": 4.52991452991453e-07,
463
+ "loss": 0.0387,
464
  "step": 29000
465
  },
466
  {
467
  "epoch": 5.0,
468
+ "eval_accuracy": 0.8518223388213949,
469
+ "eval_f1": 0.4960441433034446,
470
+ "eval_loss": 1.0903491973876953,
471
+ "eval_precision": 0.47575686823877344,
472
+ "eval_recall": 0.5181386675707748,
473
+ "eval_runtime": 20.8036,
474
+ "eval_samples_per_second": 140.552,
475
+ "eval_steps_per_second": 17.593,
476
  "step": 29250
477
  },
478
  {
479
  "epoch": 5.0,
480
  "step": 29250,
481
  "total_flos": 5.182622875540416e+16,
482
+ "train_loss": 0.2549698821499816,
483
+ "train_runtime": 7489.8753,
484
+ "train_samples_per_second": 15.62,
485
+ "train_steps_per_second": 3.905
486
  }
487
  ],
488
  "logging_steps": 500,