wav2vec2-large-slurp / trainer_state.json
alkiskoudounas's picture
Upload folder using huggingface_hub
aab5c8a verified
{
"best_metric": 0.8005753739930955,
"best_model_checkpoint": "results/facebook/wav2vec2-large-960h-lv60-self/42/_retain/checkpoint-30000",
"epoch": 75.80543272267846,
"eval_steps": 400,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.010739102969046,
"grad_norm": 3.2389800548553467,
"learning_rate": 6.666666666666667e-05,
"loss": 4.0919,
"step": 400
},
{
"epoch": 1.010739102969046,
"eval_accuracy": 0.10586881472957423,
"eval_f1_macro": 0.008128718856806105,
"eval_loss": 3.68546199798584,
"eval_runtime": 133.4265,
"eval_samples_per_second": 65.129,
"eval_steps_per_second": 2.039,
"step": 400
},
{
"epoch": 2.021478205938092,
"grad_norm": 5.276159286499023,
"learning_rate": 0.00013333333333333334,
"loss": 2.9391,
"step": 800
},
{
"epoch": 2.021478205938092,
"eval_accuracy": 0.5268124280782509,
"eval_f1_macro": 0.2773414885221941,
"eval_loss": 1.907711386680603,
"eval_runtime": 132.9453,
"eval_samples_per_second": 65.365,
"eval_steps_per_second": 2.046,
"step": 800
},
{
"epoch": 3.0322173089071383,
"grad_norm": 5.944188117980957,
"learning_rate": 0.0002,
"loss": 1.583,
"step": 1200
},
{
"epoch": 3.0322173089071383,
"eval_accuracy": 0.6894131185270426,
"eval_f1_macro": 0.48707209156248815,
"eval_loss": 1.2798452377319336,
"eval_runtime": 133.055,
"eval_samples_per_second": 65.311,
"eval_steps_per_second": 2.044,
"step": 1200
},
{
"epoch": 4.042956411876184,
"grad_norm": 6.609740257263184,
"learning_rate": 0.0002666666666666667,
"loss": 1.0089,
"step": 1600
},
{
"epoch": 4.042956411876184,
"eval_accuracy": 0.7447640966628308,
"eval_f1_macro": 0.5630866427455141,
"eval_loss": 1.1743698120117188,
"eval_runtime": 132.7655,
"eval_samples_per_second": 65.454,
"eval_steps_per_second": 2.049,
"step": 1600
},
{
"epoch": 5.053695514845231,
"grad_norm": 9.530195236206055,
"learning_rate": 0.0003333333333333333,
"loss": 0.7348,
"step": 2000
},
{
"epoch": 5.053695514845231,
"eval_accuracy": 0.7604142692750288,
"eval_f1_macro": 0.5961285021365654,
"eval_loss": 1.1527246236801147,
"eval_runtime": 127.8747,
"eval_samples_per_second": 67.957,
"eval_steps_per_second": 2.127,
"step": 2000
},
{
"epoch": 6.0644346178142765,
"grad_norm": 6.680343151092529,
"learning_rate": 0.0004,
"loss": 0.5957,
"step": 2400
},
{
"epoch": 6.0644346178142765,
"eval_accuracy": 0.7676639815880322,
"eval_f1_macro": 0.6054951189790404,
"eval_loss": 1.198480248451233,
"eval_runtime": 128.28,
"eval_samples_per_second": 67.742,
"eval_steps_per_second": 2.12,
"step": 2400
},
{
"epoch": 7.075173720783323,
"grad_norm": 3.774092435836792,
"learning_rate": 0.00046666666666666666,
"loss": 0.521,
"step": 2800
},
{
"epoch": 7.075173720783323,
"eval_accuracy": 0.7630609896432681,
"eval_f1_macro": 0.5903658522565237,
"eval_loss": 1.1921718120574951,
"eval_runtime": 128.2033,
"eval_samples_per_second": 67.783,
"eval_steps_per_second": 2.122,
"step": 2800
},
{
"epoch": 8.085912823752368,
"grad_norm": 3.719675302505493,
"learning_rate": 0.0004962962962962963,
"loss": 0.4667,
"step": 3200
},
{
"epoch": 8.085912823752368,
"eval_accuracy": 0.7619102416570771,
"eval_f1_macro": 0.6061718024259425,
"eval_loss": 1.2508888244628906,
"eval_runtime": 109.5839,
"eval_samples_per_second": 79.3,
"eval_steps_per_second": 2.482,
"step": 3200
},
{
"epoch": 9.096651926721416,
"grad_norm": 3.703678607940674,
"learning_rate": 0.0004888888888888889,
"loss": 0.3861,
"step": 3600
},
{
"epoch": 9.096651926721416,
"eval_accuracy": 0.7640966628308401,
"eval_f1_macro": 0.5910106640214171,
"eval_loss": 1.2851234674453735,
"eval_runtime": 109.2588,
"eval_samples_per_second": 79.536,
"eval_steps_per_second": 2.49,
"step": 3600
},
{
"epoch": 10.107391029690461,
"grad_norm": 5.4869585037231445,
"learning_rate": 0.00048148148148148144,
"loss": 0.32,
"step": 4000
},
{
"epoch": 10.107391029690461,
"eval_accuracy": 0.7590333716915996,
"eval_f1_macro": 0.5804751345832923,
"eval_loss": 1.4432213306427002,
"eval_runtime": 109.3455,
"eval_samples_per_second": 79.473,
"eval_steps_per_second": 2.488,
"step": 4000
},
{
"epoch": 11.118130132659507,
"grad_norm": 2.1531548500061035,
"learning_rate": 0.0004740740740740741,
"loss": 0.2828,
"step": 4400
},
{
"epoch": 11.118130132659507,
"eval_accuracy": 0.7590333716915996,
"eval_f1_macro": 0.6021086310983942,
"eval_loss": 1.3173363208770752,
"eval_runtime": 109.3574,
"eval_samples_per_second": 79.464,
"eval_steps_per_second": 2.487,
"step": 4400
},
{
"epoch": 12.128869235628553,
"grad_norm": 2.9061076641082764,
"learning_rate": 0.00046666666666666666,
"loss": 0.2367,
"step": 4800
},
{
"epoch": 12.128869235628553,
"eval_accuracy": 0.7543153049482163,
"eval_f1_macro": 0.6092446104843484,
"eval_loss": 1.4384377002716064,
"eval_runtime": 109.3136,
"eval_samples_per_second": 79.496,
"eval_steps_per_second": 2.488,
"step": 4800
},
{
"epoch": 13.139608338597599,
"grad_norm": 2.8866333961486816,
"learning_rate": 0.00045925925925925925,
"loss": 0.2187,
"step": 5200
},
{
"epoch": 13.139608338597599,
"eval_accuracy": 0.7654775604142693,
"eval_f1_macro": 0.5880603922791815,
"eval_loss": 1.4380950927734375,
"eval_runtime": 109.4554,
"eval_samples_per_second": 79.393,
"eval_steps_per_second": 2.485,
"step": 5200
},
{
"epoch": 14.150347441566646,
"grad_norm": 1.7574183940887451,
"learning_rate": 0.00045185185185185183,
"loss": 0.1847,
"step": 5600
},
{
"epoch": 14.150347441566646,
"eval_accuracy": 0.7730724971231301,
"eval_f1_macro": 0.5690127519635726,
"eval_loss": 1.4231289625167847,
"eval_runtime": 109.3887,
"eval_samples_per_second": 79.441,
"eval_steps_per_second": 2.487,
"step": 5600
},
{
"epoch": 15.161086544535692,
"grad_norm": 1.8373284339904785,
"learning_rate": 0.0004444444444444444,
"loss": 0.1701,
"step": 6000
},
{
"epoch": 15.161086544535692,
"eval_accuracy": 0.7680092059838896,
"eval_f1_macro": 0.5878361109327175,
"eval_loss": 1.5120900869369507,
"eval_runtime": 109.6944,
"eval_samples_per_second": 79.22,
"eval_steps_per_second": 2.48,
"step": 6000
},
{
"epoch": 16.171825647504736,
"grad_norm": 2.9617397785186768,
"learning_rate": 0.00043703703703703705,
"loss": 0.1504,
"step": 6400
},
{
"epoch": 16.171825647504736,
"eval_accuracy": 0.7609896432681242,
"eval_f1_macro": 0.6017434401264726,
"eval_loss": 1.5701994895935059,
"eval_runtime": 108.7867,
"eval_samples_per_second": 79.881,
"eval_steps_per_second": 2.5,
"step": 6400
},
{
"epoch": 17.182564750473784,
"grad_norm": 1.9067094326019287,
"learning_rate": 0.00042962962962962963,
"loss": 0.1416,
"step": 6800
},
{
"epoch": 17.182564750473784,
"eval_accuracy": 0.7680092059838896,
"eval_f1_macro": 0.5846132297229183,
"eval_loss": 1.6262372732162476,
"eval_runtime": 109.3355,
"eval_samples_per_second": 79.48,
"eval_steps_per_second": 2.488,
"step": 6800
},
{
"epoch": 18.19330385344283,
"grad_norm": 1.788485050201416,
"learning_rate": 0.0004222222222222222,
"loss": 0.1345,
"step": 7200
},
{
"epoch": 18.19330385344283,
"eval_accuracy": 0.7582278481012659,
"eval_f1_macro": 0.606730101292868,
"eval_loss": 1.6317014694213867,
"eval_runtime": 109.1193,
"eval_samples_per_second": 79.638,
"eval_steps_per_second": 2.493,
"step": 7200
},
{
"epoch": 19.204042956411875,
"grad_norm": 3.0378000736236572,
"learning_rate": 0.0004148148148148148,
"loss": 0.1226,
"step": 7600
},
{
"epoch": 19.204042956411875,
"eval_accuracy": 0.7739930955120828,
"eval_f1_macro": 0.6193094447560485,
"eval_loss": 1.486433982849121,
"eval_runtime": 109.0558,
"eval_samples_per_second": 79.684,
"eval_steps_per_second": 2.494,
"step": 7600
},
{
"epoch": 20.214782059380923,
"grad_norm": 3.1991524696350098,
"learning_rate": 0.0004074074074074074,
"loss": 0.114,
"step": 8000
},
{
"epoch": 20.214782059380923,
"eval_accuracy": 0.774108170310702,
"eval_f1_macro": 0.6157091732739274,
"eval_loss": 1.5931099653244019,
"eval_runtime": 109.0943,
"eval_samples_per_second": 79.656,
"eval_steps_per_second": 2.493,
"step": 8000
},
{
"epoch": 21.225521162349967,
"grad_norm": 2.1036899089813232,
"learning_rate": 0.0004,
"loss": 0.1064,
"step": 8400
},
{
"epoch": 21.225521162349967,
"eval_accuracy": 0.7730724971231301,
"eval_f1_macro": 0.6020232192562277,
"eval_loss": 1.7101207971572876,
"eval_runtime": 108.899,
"eval_samples_per_second": 79.799,
"eval_steps_per_second": 2.498,
"step": 8400
},
{
"epoch": 22.236260265319014,
"grad_norm": 2.786360025405884,
"learning_rate": 0.0003925925925925926,
"loss": 0.1009,
"step": 8800
},
{
"epoch": 22.236260265319014,
"eval_accuracy": 0.7655926352128883,
"eval_f1_macro": 0.5794753743607411,
"eval_loss": 1.6664392948150635,
"eval_runtime": 109.2502,
"eval_samples_per_second": 79.542,
"eval_steps_per_second": 2.49,
"step": 8800
},
{
"epoch": 23.246999368288062,
"grad_norm": 1.0751720666885376,
"learning_rate": 0.0003851851851851852,
"loss": 0.0941,
"step": 9200
},
{
"epoch": 23.246999368288062,
"eval_accuracy": 0.7772151898734178,
"eval_f1_macro": 0.5717636011134882,
"eval_loss": 1.5253993272781372,
"eval_runtime": 109.0143,
"eval_samples_per_second": 79.714,
"eval_steps_per_second": 2.495,
"step": 9200
},
{
"epoch": 24.257738471257106,
"grad_norm": 1.744019865989685,
"learning_rate": 0.00037777777777777777,
"loss": 0.0861,
"step": 9600
},
{
"epoch": 24.257738471257106,
"eval_accuracy": 0.777445339470656,
"eval_f1_macro": 0.625140306336925,
"eval_loss": 1.6324084997177124,
"eval_runtime": 108.6336,
"eval_samples_per_second": 79.994,
"eval_steps_per_second": 2.504,
"step": 9600
},
{
"epoch": 25.268477574226154,
"grad_norm": 1.838752269744873,
"learning_rate": 0.00037037037037037035,
"loss": 0.0807,
"step": 10000
},
{
"epoch": 25.268477574226154,
"eval_accuracy": 0.7728423475258919,
"eval_f1_macro": 0.5870939911644882,
"eval_loss": 1.7057673931121826,
"eval_runtime": 108.6842,
"eval_samples_per_second": 79.956,
"eval_steps_per_second": 2.503,
"step": 10000
},
{
"epoch": 26.279216677195198,
"grad_norm": 2.3391871452331543,
"learning_rate": 0.000362962962962963,
"loss": 0.0739,
"step": 10400
},
{
"epoch": 26.279216677195198,
"eval_accuracy": 0.774108170310702,
"eval_f1_macro": 0.6190123341706849,
"eval_loss": 1.6950148344039917,
"eval_runtime": 108.9167,
"eval_samples_per_second": 79.786,
"eval_steps_per_second": 2.497,
"step": 10400
},
{
"epoch": 27.289955780164245,
"grad_norm": 1.3197505474090576,
"learning_rate": 0.00035555555555555557,
"loss": 0.0685,
"step": 10800
},
{
"epoch": 27.289955780164245,
"eval_accuracy": 0.7652474108170311,
"eval_f1_macro": 0.5984200620053731,
"eval_loss": 1.8148038387298584,
"eval_runtime": 108.998,
"eval_samples_per_second": 79.726,
"eval_steps_per_second": 2.495,
"step": 10800
},
{
"epoch": 28.300694883133293,
"grad_norm": 0.8027063608169556,
"learning_rate": 0.00034814814814814816,
"loss": 0.0692,
"step": 11200
},
{
"epoch": 28.300694883133293,
"eval_accuracy": 0.776409666283084,
"eval_f1_macro": 0.6002766778970904,
"eval_loss": 1.6219606399536133,
"eval_runtime": 108.9613,
"eval_samples_per_second": 79.753,
"eval_steps_per_second": 2.496,
"step": 11200
},
{
"epoch": 29.311433986102337,
"grad_norm": 0.8713662028312683,
"learning_rate": 0.00034074074074074074,
"loss": 0.0662,
"step": 11600
},
{
"epoch": 29.311433986102337,
"eval_accuracy": 0.7794016110471806,
"eval_f1_macro": 0.6123819840203646,
"eval_loss": 1.6953762769699097,
"eval_runtime": 109.1585,
"eval_samples_per_second": 79.609,
"eval_steps_per_second": 2.492,
"step": 11600
},
{
"epoch": 30.322173089071384,
"grad_norm": 0.9094525575637817,
"learning_rate": 0.0003333333333333333,
"loss": 0.0639,
"step": 12000
},
{
"epoch": 30.322173089071384,
"eval_accuracy": 0.7785960874568469,
"eval_f1_macro": 0.5900178041075752,
"eval_loss": 1.7562154531478882,
"eval_runtime": 108.917,
"eval_samples_per_second": 79.786,
"eval_steps_per_second": 2.497,
"step": 12000
},
{
"epoch": 31.33291219204043,
"grad_norm": 2.3824515342712402,
"learning_rate": 0.00032592592592592596,
"loss": 0.0613,
"step": 12400
},
{
"epoch": 31.33291219204043,
"eval_accuracy": 0.7708860759493671,
"eval_f1_macro": 0.5886611331241638,
"eval_loss": 1.7263332605361938,
"eval_runtime": 109.2037,
"eval_samples_per_second": 79.576,
"eval_steps_per_second": 2.491,
"step": 12400
},
{
"epoch": 32.34365129500947,
"grad_norm": 1.1265066862106323,
"learning_rate": 0.00031851851851851854,
"loss": 0.0562,
"step": 12800
},
{
"epoch": 32.34365129500947,
"eval_accuracy": 0.777445339470656,
"eval_f1_macro": 0.6069323146272442,
"eval_loss": 1.595489263534546,
"eval_runtime": 110.1086,
"eval_samples_per_second": 78.922,
"eval_steps_per_second": 2.47,
"step": 12800
},
{
"epoch": 33.35439039797852,
"grad_norm": 0.765870988368988,
"learning_rate": 0.0003111111111111111,
"loss": 0.0482,
"step": 13200
},
{
"epoch": 33.35439039797852,
"eval_accuracy": 0.7858457997698504,
"eval_f1_macro": 0.6152260699722518,
"eval_loss": 1.6528053283691406,
"eval_runtime": 109.0363,
"eval_samples_per_second": 79.698,
"eval_steps_per_second": 2.495,
"step": 13200
},
{
"epoch": 34.36512950094757,
"grad_norm": 2.386359930038452,
"learning_rate": 0.0003037037037037037,
"loss": 0.0516,
"step": 13600
},
{
"epoch": 34.36512950094757,
"eval_accuracy": 0.7713463751438435,
"eval_f1_macro": 0.5894778786253475,
"eval_loss": 1.65277099609375,
"eval_runtime": 109.1673,
"eval_samples_per_second": 79.603,
"eval_steps_per_second": 2.492,
"step": 13600
},
{
"epoch": 35.375868603916615,
"grad_norm": 1.8987774848937988,
"learning_rate": 0.0002962962962962963,
"loss": 0.0447,
"step": 14000
},
{
"epoch": 35.375868603916615,
"eval_accuracy": 0.7799769850402761,
"eval_f1_macro": 0.6297477374058172,
"eval_loss": 1.813390851020813,
"eval_runtime": 109.6977,
"eval_samples_per_second": 79.218,
"eval_steps_per_second": 2.48,
"step": 14000
},
{
"epoch": 36.38660770688566,
"grad_norm": 1.353411078453064,
"learning_rate": 0.0002888888888888889,
"loss": 0.047,
"step": 14400
},
{
"epoch": 36.38660770688566,
"eval_accuracy": 0.7795166858457998,
"eval_f1_macro": 0.5795862617467612,
"eval_loss": 1.663203477859497,
"eval_runtime": 109.0323,
"eval_samples_per_second": 79.701,
"eval_steps_per_second": 2.495,
"step": 14400
},
{
"epoch": 37.3973468098547,
"grad_norm": 1.1114296913146973,
"learning_rate": 0.0002814814814814815,
"loss": 0.0436,
"step": 14800
},
{
"epoch": 37.3973468098547,
"eval_accuracy": 0.784234752589183,
"eval_f1_macro": 0.5995152264247978,
"eval_loss": 1.783818006515503,
"eval_runtime": 109.4106,
"eval_samples_per_second": 79.426,
"eval_steps_per_second": 2.486,
"step": 14800
},
{
"epoch": 38.40808591282375,
"grad_norm": 1.3422303199768066,
"learning_rate": 0.0002740740740740741,
"loss": 0.0422,
"step": 15200
},
{
"epoch": 38.40808591282375,
"eval_accuracy": 0.7838895281933257,
"eval_f1_macro": 0.6189287691248615,
"eval_loss": 1.7172709703445435,
"eval_runtime": 108.6629,
"eval_samples_per_second": 79.972,
"eval_steps_per_second": 2.503,
"step": 15200
},
{
"epoch": 39.4188250157928,
"grad_norm": 1.8279023170471191,
"learning_rate": 0.0002666666666666667,
"loss": 0.0377,
"step": 15600
},
{
"epoch": 39.4188250157928,
"eval_accuracy": 0.7834292289988493,
"eval_f1_macro": 0.5814739153081228,
"eval_loss": 1.7523770332336426,
"eval_runtime": 108.9839,
"eval_samples_per_second": 79.737,
"eval_steps_per_second": 2.496,
"step": 15600
},
{
"epoch": 40.429564118761846,
"grad_norm": 2.154459238052368,
"learning_rate": 0.00025925925925925926,
"loss": 0.0359,
"step": 16000
},
{
"epoch": 40.429564118761846,
"eval_accuracy": 0.7886075949367088,
"eval_f1_macro": 0.6293741181702724,
"eval_loss": 1.623598337173462,
"eval_runtime": 108.8195,
"eval_samples_per_second": 79.857,
"eval_steps_per_second": 2.5,
"step": 16000
},
{
"epoch": 41.44030322173089,
"grad_norm": 0.8551483154296875,
"learning_rate": 0.00025185185185185185,
"loss": 0.0344,
"step": 16400
},
{
"epoch": 41.44030322173089,
"eval_accuracy": 0.7815880322209436,
"eval_f1_macro": 0.6087804648227756,
"eval_loss": 1.7353272438049316,
"eval_runtime": 109.2273,
"eval_samples_per_second": 79.559,
"eval_steps_per_second": 2.49,
"step": 16400
},
{
"epoch": 42.451042324699934,
"grad_norm": 0.5178919434547424,
"learning_rate": 0.00024444444444444443,
"loss": 0.033,
"step": 16800
},
{
"epoch": 42.451042324699934,
"eval_accuracy": 0.7820483314154201,
"eval_f1_macro": 0.6001569016578011,
"eval_loss": 1.727620244026184,
"eval_runtime": 109.4385,
"eval_samples_per_second": 79.405,
"eval_steps_per_second": 2.485,
"step": 16800
},
{
"epoch": 43.46178142766898,
"grad_norm": 0.4940205514431,
"learning_rate": 0.00023703703703703704,
"loss": 0.0325,
"step": 17200
},
{
"epoch": 43.46178142766898,
"eval_accuracy": 0.7783659378596087,
"eval_f1_macro": 0.6283289368126677,
"eval_loss": 1.7798371315002441,
"eval_runtime": 109.2576,
"eval_samples_per_second": 79.537,
"eval_steps_per_second": 2.49,
"step": 17200
},
{
"epoch": 44.47252053063803,
"grad_norm": 0.8661497235298157,
"learning_rate": 0.00022962962962962962,
"loss": 0.0302,
"step": 17600
},
{
"epoch": 44.47252053063803,
"eval_accuracy": 0.7828538550057538,
"eval_f1_macro": 0.6164776778280789,
"eval_loss": 1.7507109642028809,
"eval_runtime": 109.1869,
"eval_samples_per_second": 79.588,
"eval_steps_per_second": 2.491,
"step": 17600
},
{
"epoch": 45.48325963360708,
"grad_norm": 0.015332411043345928,
"learning_rate": 0.0002222222222222222,
"loss": 0.0268,
"step": 18000
},
{
"epoch": 45.48325963360708,
"eval_accuracy": 0.7826237054085156,
"eval_f1_macro": 0.6031617249417177,
"eval_loss": 1.7825220823287964,
"eval_runtime": 109.3518,
"eval_samples_per_second": 79.468,
"eval_steps_per_second": 2.487,
"step": 18000
},
{
"epoch": 46.493998736576124,
"grad_norm": 0.5325392484664917,
"learning_rate": 0.00021481481481481482,
"loss": 0.0287,
"step": 18400
},
{
"epoch": 46.493998736576124,
"eval_accuracy": 0.7882623705408516,
"eval_f1_macro": 0.6256320010133759,
"eval_loss": 1.6932624578475952,
"eval_runtime": 108.513,
"eval_samples_per_second": 80.083,
"eval_steps_per_second": 2.507,
"step": 18400
},
{
"epoch": 47.504737839545164,
"grad_norm": 0.5086055994033813,
"learning_rate": 0.0002074074074074074,
"loss": 0.0252,
"step": 18800
},
{
"epoch": 47.504737839545164,
"eval_accuracy": 0.7856156501726121,
"eval_f1_macro": 0.6143416230351354,
"eval_loss": 1.7501070499420166,
"eval_runtime": 109.2365,
"eval_samples_per_second": 79.552,
"eval_steps_per_second": 2.49,
"step": 18800
},
{
"epoch": 48.51547694251421,
"grad_norm": 1.229317545890808,
"learning_rate": 0.0002,
"loss": 0.0283,
"step": 19200
},
{
"epoch": 48.51547694251421,
"eval_accuracy": 0.7843498273878021,
"eval_f1_macro": 0.6189575264715401,
"eval_loss": 1.9032423496246338,
"eval_runtime": 108.2906,
"eval_samples_per_second": 80.247,
"eval_steps_per_second": 2.512,
"step": 19200
},
{
"epoch": 49.52621604548326,
"grad_norm": 0.05275914818048477,
"learning_rate": 0.0001925925925925926,
"loss": 0.024,
"step": 19600
},
{
"epoch": 49.52621604548326,
"eval_accuracy": 0.7874568469505179,
"eval_f1_macro": 0.6393370936978522,
"eval_loss": 1.8691409826278687,
"eval_runtime": 108.1545,
"eval_samples_per_second": 80.348,
"eval_steps_per_second": 2.515,
"step": 19600
},
{
"epoch": 50.53695514845231,
"grad_norm": 0.9653208255767822,
"learning_rate": 0.00018518518518518518,
"loss": 0.0229,
"step": 20000
},
{
"epoch": 50.53695514845231,
"eval_accuracy": 0.786536248561565,
"eval_f1_macro": 0.6026385719720891,
"eval_loss": 1.7541390657424927,
"eval_runtime": 107.9085,
"eval_samples_per_second": 80.531,
"eval_steps_per_second": 2.521,
"step": 20000
},
{
"epoch": 51.547694251421355,
"grad_norm": 0.4658529758453369,
"learning_rate": 0.00017777777777777779,
"loss": 0.0219,
"step": 20400
},
{
"epoch": 51.547694251421355,
"eval_accuracy": 0.7872266973532797,
"eval_f1_macro": 0.6309747652348119,
"eval_loss": 1.7537351846694946,
"eval_runtime": 107.7743,
"eval_samples_per_second": 80.632,
"eval_steps_per_second": 2.524,
"step": 20400
},
{
"epoch": 52.558433354390395,
"grad_norm": 0.32756420969963074,
"learning_rate": 0.00017037037037037037,
"loss": 0.0211,
"step": 20800
},
{
"epoch": 52.558433354390395,
"eval_accuracy": 0.7934407364787112,
"eval_f1_macro": 0.6206166338546538,
"eval_loss": 1.6842619180679321,
"eval_runtime": 107.7209,
"eval_samples_per_second": 80.671,
"eval_steps_per_second": 2.525,
"step": 20800
},
{
"epoch": 53.56917245735944,
"grad_norm": 0.584701418876648,
"learning_rate": 0.00016296296296296298,
"loss": 0.0203,
"step": 21200
},
{
"epoch": 53.56917245735944,
"eval_accuracy": 0.7950517836593786,
"eval_f1_macro": 0.6206542591204762,
"eval_loss": 1.699610710144043,
"eval_runtime": 107.6954,
"eval_samples_per_second": 80.691,
"eval_steps_per_second": 2.526,
"step": 21200
},
{
"epoch": 54.57991156032849,
"grad_norm": 0.0553191676735878,
"learning_rate": 0.00015555555555555556,
"loss": 0.0174,
"step": 21600
},
{
"epoch": 54.57991156032849,
"eval_accuracy": 0.7894131185270425,
"eval_f1_macro": 0.6214961351780512,
"eval_loss": 1.8445045948028564,
"eval_runtime": 107.7853,
"eval_samples_per_second": 80.623,
"eval_steps_per_second": 2.524,
"step": 21600
},
{
"epoch": 55.59065066329754,
"grad_norm": 0.4328874945640564,
"learning_rate": 0.00014814814814814815,
"loss": 0.0197,
"step": 22000
},
{
"epoch": 55.59065066329754,
"eval_accuracy": 0.792059838895282,
"eval_f1_macro": 0.6308138834712996,
"eval_loss": 1.8310879468917847,
"eval_runtime": 107.7421,
"eval_samples_per_second": 80.656,
"eval_steps_per_second": 2.525,
"step": 22000
},
{
"epoch": 56.601389766266585,
"grad_norm": 0.02704198658466339,
"learning_rate": 0.00014074074074074076,
"loss": 0.0169,
"step": 22400
},
{
"epoch": 56.601389766266585,
"eval_accuracy": 0.7879171461449942,
"eval_f1_macro": 0.5896127682611725,
"eval_loss": 1.8162003755569458,
"eval_runtime": 107.8141,
"eval_samples_per_second": 80.602,
"eval_steps_per_second": 2.523,
"step": 22400
},
{
"epoch": 57.612128869235626,
"grad_norm": 0.2748865485191345,
"learning_rate": 0.00013333333333333334,
"loss": 0.0121,
"step": 22800
},
{
"epoch": 57.612128869235626,
"eval_accuracy": 0.7852704257767549,
"eval_f1_macro": 0.5951106108532582,
"eval_loss": 1.924727201461792,
"eval_runtime": 107.712,
"eval_samples_per_second": 80.678,
"eval_steps_per_second": 2.525,
"step": 22800
},
{
"epoch": 58.62286797220467,
"grad_norm": 0.0328911654651165,
"learning_rate": 0.00012592592592592592,
"loss": 0.0152,
"step": 23200
},
{
"epoch": 58.62286797220467,
"eval_accuracy": 0.7881472957422324,
"eval_f1_macro": 0.6063430405057288,
"eval_loss": 1.8502182960510254,
"eval_runtime": 107.788,
"eval_samples_per_second": 80.621,
"eval_steps_per_second": 2.523,
"step": 23200
},
{
"epoch": 59.63360707517372,
"grad_norm": 0.00955616869032383,
"learning_rate": 0.00011851851851851852,
"loss": 0.0142,
"step": 23600
},
{
"epoch": 59.63360707517372,
"eval_accuracy": 0.789873417721519,
"eval_f1_macro": 0.617993825444742,
"eval_loss": 1.7803289890289307,
"eval_runtime": 107.8043,
"eval_samples_per_second": 80.609,
"eval_steps_per_second": 2.523,
"step": 23600
},
{
"epoch": 60.64434617814277,
"grad_norm": 0.06125176325440407,
"learning_rate": 0.0001111111111111111,
"loss": 0.0105,
"step": 24000
},
{
"epoch": 60.64434617814277,
"eval_accuracy": 0.7861910241657077,
"eval_f1_macro": 0.6254018987758924,
"eval_loss": 1.916595458984375,
"eval_runtime": 107.7673,
"eval_samples_per_second": 80.637,
"eval_steps_per_second": 2.524,
"step": 24000
},
{
"epoch": 61.655085281111816,
"grad_norm": 0.10605888813734055,
"learning_rate": 0.0001037037037037037,
"loss": 0.0116,
"step": 24400
},
{
"epoch": 61.655085281111816,
"eval_accuracy": 0.7858457997698504,
"eval_f1_macro": 0.5961002471321352,
"eval_loss": 1.9204109907150269,
"eval_runtime": 107.7648,
"eval_samples_per_second": 80.639,
"eval_steps_per_second": 2.524,
"step": 24400
},
{
"epoch": 62.66582438408086,
"grad_norm": 0.044181693345308304,
"learning_rate": 9.62962962962963e-05,
"loss": 0.0112,
"step": 24800
},
{
"epoch": 62.66582438408086,
"eval_accuracy": 0.7878020713463751,
"eval_f1_macro": 0.6235710102313945,
"eval_loss": 1.9822152853012085,
"eval_runtime": 107.735,
"eval_samples_per_second": 80.661,
"eval_steps_per_second": 2.525,
"step": 24800
},
{
"epoch": 63.676563487049904,
"grad_norm": 0.023459970951080322,
"learning_rate": 8.888888888888889e-05,
"loss": 0.0102,
"step": 25200
},
{
"epoch": 63.676563487049904,
"eval_accuracy": 0.7840046029919447,
"eval_f1_macro": 0.6155669395709024,
"eval_loss": 1.9653674364089966,
"eval_runtime": 107.7821,
"eval_samples_per_second": 80.626,
"eval_steps_per_second": 2.524,
"step": 25200
},
{
"epoch": 64.68730259001894,
"grad_norm": 1.9076263904571533,
"learning_rate": 8.148148148148149e-05,
"loss": 0.01,
"step": 25600
},
{
"epoch": 64.68730259001894,
"eval_accuracy": 0.7880322209436134,
"eval_f1_macro": 0.6226637633596005,
"eval_loss": 1.938231348991394,
"eval_runtime": 107.7205,
"eval_samples_per_second": 80.672,
"eval_steps_per_second": 2.525,
"step": 25600
},
{
"epoch": 65.698041692988,
"grad_norm": 0.4948989748954773,
"learning_rate": 7.407407407407407e-05,
"loss": 0.0101,
"step": 26000
},
{
"epoch": 65.698041692988,
"eval_accuracy": 0.7960874568469505,
"eval_f1_macro": 0.6277935659004009,
"eval_loss": 1.8299671411514282,
"eval_runtime": 107.7348,
"eval_samples_per_second": 80.661,
"eval_steps_per_second": 2.525,
"step": 26000
},
{
"epoch": 66.70878079595704,
"grad_norm": 0.00608784519135952,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0086,
"step": 26400
},
{
"epoch": 66.70878079595704,
"eval_accuracy": 0.7968929804372842,
"eval_f1_macro": 0.6234372893298947,
"eval_loss": 1.9254202842712402,
"eval_runtime": 108.035,
"eval_samples_per_second": 80.437,
"eval_steps_per_second": 2.518,
"step": 26400
},
{
"epoch": 67.7195198989261,
"grad_norm": 0.08328448981046677,
"learning_rate": 5.925925925925926e-05,
"loss": 0.0073,
"step": 26800
},
{
"epoch": 67.7195198989261,
"eval_accuracy": 0.7915995397008055,
"eval_f1_macro": 0.6320923241131308,
"eval_loss": 1.8887046575546265,
"eval_runtime": 107.8399,
"eval_samples_per_second": 80.582,
"eval_steps_per_second": 2.522,
"step": 26800
},
{
"epoch": 68.73025900189513,
"grad_norm": 0.02061997540295124,
"learning_rate": 5.185185185185185e-05,
"loss": 0.0069,
"step": 27200
},
{
"epoch": 68.73025900189513,
"eval_accuracy": 0.794361334867664,
"eval_f1_macro": 0.636665979654867,
"eval_loss": 1.9074466228485107,
"eval_runtime": 107.6829,
"eval_samples_per_second": 80.7,
"eval_steps_per_second": 2.526,
"step": 27200
},
{
"epoch": 69.74099810486418,
"grad_norm": 0.012987918220460415,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.0059,
"step": 27600
},
{
"epoch": 69.74099810486418,
"eval_accuracy": 0.792059838895282,
"eval_f1_macro": 0.6315720450251525,
"eval_loss": 1.9398057460784912,
"eval_runtime": 107.8991,
"eval_samples_per_second": 80.538,
"eval_steps_per_second": 2.521,
"step": 27600
},
{
"epoch": 70.75173720783323,
"grad_norm": 0.005101632326841354,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.0066,
"step": 28000
},
{
"epoch": 70.75173720783323,
"eval_accuracy": 0.794361334867664,
"eval_f1_macro": 0.6349818220797456,
"eval_loss": 1.8699119091033936,
"eval_runtime": 109.2809,
"eval_samples_per_second": 79.52,
"eval_steps_per_second": 2.489,
"step": 28000
},
{
"epoch": 71.76247631080227,
"grad_norm": 0.6047748923301697,
"learning_rate": 2.962962962962963e-05,
"loss": 0.0062,
"step": 28400
},
{
"epoch": 71.76247631080227,
"eval_accuracy": 0.7951668584579977,
"eval_f1_macro": 0.6343250573277666,
"eval_loss": 1.8893409967422485,
"eval_runtime": 109.2978,
"eval_samples_per_second": 79.508,
"eval_steps_per_second": 2.489,
"step": 28400
},
{
"epoch": 72.77321541377133,
"grad_norm": 0.012553258799016476,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.0058,
"step": 28800
},
{
"epoch": 72.77321541377133,
"eval_accuracy": 0.7982738780207135,
"eval_f1_macro": 0.6409643965446785,
"eval_loss": 1.883091926574707,
"eval_runtime": 109.2468,
"eval_samples_per_second": 79.545,
"eval_steps_per_second": 2.49,
"step": 28800
},
{
"epoch": 73.78395451674037,
"grad_norm": 0.0007793375989422202,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.0056,
"step": 29200
},
{
"epoch": 73.78395451674037,
"eval_accuracy": 0.7958573072497123,
"eval_f1_macro": 0.6356613761441215,
"eval_loss": 1.8901586532592773,
"eval_runtime": 108.6154,
"eval_samples_per_second": 80.007,
"eval_steps_per_second": 2.504,
"step": 29200
},
{
"epoch": 74.7946936197094,
"grad_norm": 0.14352725446224213,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.0053,
"step": 29600
},
{
"epoch": 74.7946936197094,
"eval_accuracy": 0.7991944764096662,
"eval_f1_macro": 0.643747242061282,
"eval_loss": 1.888542890548706,
"eval_runtime": 108.5316,
"eval_samples_per_second": 80.069,
"eval_steps_per_second": 2.506,
"step": 29600
},
{
"epoch": 75.80543272267846,
"grad_norm": 0.9781034588813782,
"learning_rate": 0.0,
"loss": 0.0046,
"step": 30000
},
{
"epoch": 75.80543272267846,
"eval_accuracy": 0.8005753739930955,
"eval_f1_macro": 0.6435443913467072,
"eval_loss": 1.888439655303955,
"eval_runtime": 108.5256,
"eval_samples_per_second": 80.073,
"eval_steps_per_second": 2.506,
"step": 30000
}
],
"logging_steps": 400,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 76,
"save_steps": 1200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.8164789316384843e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}