Instructions to use alkiskoudounas/wav2vec2-large-slurp with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use alkiskoudounas/wav2vec2-large-slurp with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("audio-classification", model="alkiskoudounas/wav2vec2-large-slurp")# Load model directly from transformers import AutoProcessor, AutoModelForAudioClassification processor = AutoProcessor.from_pretrained("alkiskoudounas/wav2vec2-large-slurp") model = AutoModelForAudioClassification.from_pretrained("alkiskoudounas/wav2vec2-large-slurp") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.8005753739930955, | |
| "best_model_checkpoint": "results/facebook/wav2vec2-large-960h-lv60-self/42/_retain/checkpoint-30000", | |
| "epoch": 75.80543272267846, | |
| "eval_steps": 400, | |
| "global_step": 30000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.010739102969046, | |
| "grad_norm": 3.2389800548553467, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 4.0919, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.010739102969046, | |
| "eval_accuracy": 0.10586881472957423, | |
| "eval_f1_macro": 0.008128718856806105, | |
| "eval_loss": 3.68546199798584, | |
| "eval_runtime": 133.4265, | |
| "eval_samples_per_second": 65.129, | |
| "eval_steps_per_second": 2.039, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.021478205938092, | |
| "grad_norm": 5.276159286499023, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 2.9391, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.021478205938092, | |
| "eval_accuracy": 0.5268124280782509, | |
| "eval_f1_macro": 0.2773414885221941, | |
| "eval_loss": 1.907711386680603, | |
| "eval_runtime": 132.9453, | |
| "eval_samples_per_second": 65.365, | |
| "eval_steps_per_second": 2.046, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.0322173089071383, | |
| "grad_norm": 5.944188117980957, | |
| "learning_rate": 0.0002, | |
| "loss": 1.583, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.0322173089071383, | |
| "eval_accuracy": 0.6894131185270426, | |
| "eval_f1_macro": 0.48707209156248815, | |
| "eval_loss": 1.2798452377319336, | |
| "eval_runtime": 133.055, | |
| "eval_samples_per_second": 65.311, | |
| "eval_steps_per_second": 2.044, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.042956411876184, | |
| "grad_norm": 6.609740257263184, | |
| "learning_rate": 0.0002666666666666667, | |
| "loss": 1.0089, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.042956411876184, | |
| "eval_accuracy": 0.7447640966628308, | |
| "eval_f1_macro": 0.5630866427455141, | |
| "eval_loss": 1.1743698120117188, | |
| "eval_runtime": 132.7655, | |
| "eval_samples_per_second": 65.454, | |
| "eval_steps_per_second": 2.049, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.053695514845231, | |
| "grad_norm": 9.530195236206055, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 0.7348, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.053695514845231, | |
| "eval_accuracy": 0.7604142692750288, | |
| "eval_f1_macro": 0.5961285021365654, | |
| "eval_loss": 1.1527246236801147, | |
| "eval_runtime": 127.8747, | |
| "eval_samples_per_second": 67.957, | |
| "eval_steps_per_second": 2.127, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.0644346178142765, | |
| "grad_norm": 6.680343151092529, | |
| "learning_rate": 0.0004, | |
| "loss": 0.5957, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.0644346178142765, | |
| "eval_accuracy": 0.7676639815880322, | |
| "eval_f1_macro": 0.6054951189790404, | |
| "eval_loss": 1.198480248451233, | |
| "eval_runtime": 128.28, | |
| "eval_samples_per_second": 67.742, | |
| "eval_steps_per_second": 2.12, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.075173720783323, | |
| "grad_norm": 3.774092435836792, | |
| "learning_rate": 0.00046666666666666666, | |
| "loss": 0.521, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.075173720783323, | |
| "eval_accuracy": 0.7630609896432681, | |
| "eval_f1_macro": 0.5903658522565237, | |
| "eval_loss": 1.1921718120574951, | |
| "eval_runtime": 128.2033, | |
| "eval_samples_per_second": 67.783, | |
| "eval_steps_per_second": 2.122, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.085912823752368, | |
| "grad_norm": 3.719675302505493, | |
| "learning_rate": 0.0004962962962962963, | |
| "loss": 0.4667, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.085912823752368, | |
| "eval_accuracy": 0.7619102416570771, | |
| "eval_f1_macro": 0.6061718024259425, | |
| "eval_loss": 1.2508888244628906, | |
| "eval_runtime": 109.5839, | |
| "eval_samples_per_second": 79.3, | |
| "eval_steps_per_second": 2.482, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 9.096651926721416, | |
| "grad_norm": 3.703678607940674, | |
| "learning_rate": 0.0004888888888888889, | |
| "loss": 0.3861, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 9.096651926721416, | |
| "eval_accuracy": 0.7640966628308401, | |
| "eval_f1_macro": 0.5910106640214171, | |
| "eval_loss": 1.2851234674453735, | |
| "eval_runtime": 109.2588, | |
| "eval_samples_per_second": 79.536, | |
| "eval_steps_per_second": 2.49, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 10.107391029690461, | |
| "grad_norm": 5.4869585037231445, | |
| "learning_rate": 0.00048148148148148144, | |
| "loss": 0.32, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.107391029690461, | |
| "eval_accuracy": 0.7590333716915996, | |
| "eval_f1_macro": 0.5804751345832923, | |
| "eval_loss": 1.4432213306427002, | |
| "eval_runtime": 109.3455, | |
| "eval_samples_per_second": 79.473, | |
| "eval_steps_per_second": 2.488, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 11.118130132659507, | |
| "grad_norm": 2.1531548500061035, | |
| "learning_rate": 0.0004740740740740741, | |
| "loss": 0.2828, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 11.118130132659507, | |
| "eval_accuracy": 0.7590333716915996, | |
| "eval_f1_macro": 0.6021086310983942, | |
| "eval_loss": 1.3173363208770752, | |
| "eval_runtime": 109.3574, | |
| "eval_samples_per_second": 79.464, | |
| "eval_steps_per_second": 2.487, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 12.128869235628553, | |
| "grad_norm": 2.9061076641082764, | |
| "learning_rate": 0.00046666666666666666, | |
| "loss": 0.2367, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 12.128869235628553, | |
| "eval_accuracy": 0.7543153049482163, | |
| "eval_f1_macro": 0.6092446104843484, | |
| "eval_loss": 1.4384377002716064, | |
| "eval_runtime": 109.3136, | |
| "eval_samples_per_second": 79.496, | |
| "eval_steps_per_second": 2.488, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 13.139608338597599, | |
| "grad_norm": 2.8866333961486816, | |
| "learning_rate": 0.00045925925925925925, | |
| "loss": 0.2187, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 13.139608338597599, | |
| "eval_accuracy": 0.7654775604142693, | |
| "eval_f1_macro": 0.5880603922791815, | |
| "eval_loss": 1.4380950927734375, | |
| "eval_runtime": 109.4554, | |
| "eval_samples_per_second": 79.393, | |
| "eval_steps_per_second": 2.485, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 14.150347441566646, | |
| "grad_norm": 1.7574183940887451, | |
| "learning_rate": 0.00045185185185185183, | |
| "loss": 0.1847, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 14.150347441566646, | |
| "eval_accuracy": 0.7730724971231301, | |
| "eval_f1_macro": 0.5690127519635726, | |
| "eval_loss": 1.4231289625167847, | |
| "eval_runtime": 109.3887, | |
| "eval_samples_per_second": 79.441, | |
| "eval_steps_per_second": 2.487, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 15.161086544535692, | |
| "grad_norm": 1.8373284339904785, | |
| "learning_rate": 0.0004444444444444444, | |
| "loss": 0.1701, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 15.161086544535692, | |
| "eval_accuracy": 0.7680092059838896, | |
| "eval_f1_macro": 0.5878361109327175, | |
| "eval_loss": 1.5120900869369507, | |
| "eval_runtime": 109.6944, | |
| "eval_samples_per_second": 79.22, | |
| "eval_steps_per_second": 2.48, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 16.171825647504736, | |
| "grad_norm": 2.9617397785186768, | |
| "learning_rate": 0.00043703703703703705, | |
| "loss": 0.1504, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 16.171825647504736, | |
| "eval_accuracy": 0.7609896432681242, | |
| "eval_f1_macro": 0.6017434401264726, | |
| "eval_loss": 1.5701994895935059, | |
| "eval_runtime": 108.7867, | |
| "eval_samples_per_second": 79.881, | |
| "eval_steps_per_second": 2.5, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 17.182564750473784, | |
| "grad_norm": 1.9067094326019287, | |
| "learning_rate": 0.00042962962962962963, | |
| "loss": 0.1416, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 17.182564750473784, | |
| "eval_accuracy": 0.7680092059838896, | |
| "eval_f1_macro": 0.5846132297229183, | |
| "eval_loss": 1.6262372732162476, | |
| "eval_runtime": 109.3355, | |
| "eval_samples_per_second": 79.48, | |
| "eval_steps_per_second": 2.488, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 18.19330385344283, | |
| "grad_norm": 1.788485050201416, | |
| "learning_rate": 0.0004222222222222222, | |
| "loss": 0.1345, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 18.19330385344283, | |
| "eval_accuracy": 0.7582278481012659, | |
| "eval_f1_macro": 0.606730101292868, | |
| "eval_loss": 1.6317014694213867, | |
| "eval_runtime": 109.1193, | |
| "eval_samples_per_second": 79.638, | |
| "eval_steps_per_second": 2.493, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 19.204042956411875, | |
| "grad_norm": 3.0378000736236572, | |
| "learning_rate": 0.0004148148148148148, | |
| "loss": 0.1226, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 19.204042956411875, | |
| "eval_accuracy": 0.7739930955120828, | |
| "eval_f1_macro": 0.6193094447560485, | |
| "eval_loss": 1.486433982849121, | |
| "eval_runtime": 109.0558, | |
| "eval_samples_per_second": 79.684, | |
| "eval_steps_per_second": 2.494, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 20.214782059380923, | |
| "grad_norm": 3.1991524696350098, | |
| "learning_rate": 0.0004074074074074074, | |
| "loss": 0.114, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 20.214782059380923, | |
| "eval_accuracy": 0.774108170310702, | |
| "eval_f1_macro": 0.6157091732739274, | |
| "eval_loss": 1.5931099653244019, | |
| "eval_runtime": 109.0943, | |
| "eval_samples_per_second": 79.656, | |
| "eval_steps_per_second": 2.493, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 21.225521162349967, | |
| "grad_norm": 2.1036899089813232, | |
| "learning_rate": 0.0004, | |
| "loss": 0.1064, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 21.225521162349967, | |
| "eval_accuracy": 0.7730724971231301, | |
| "eval_f1_macro": 0.6020232192562277, | |
| "eval_loss": 1.7101207971572876, | |
| "eval_runtime": 108.899, | |
| "eval_samples_per_second": 79.799, | |
| "eval_steps_per_second": 2.498, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 22.236260265319014, | |
| "grad_norm": 2.786360025405884, | |
| "learning_rate": 0.0003925925925925926, | |
| "loss": 0.1009, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 22.236260265319014, | |
| "eval_accuracy": 0.7655926352128883, | |
| "eval_f1_macro": 0.5794753743607411, | |
| "eval_loss": 1.6664392948150635, | |
| "eval_runtime": 109.2502, | |
| "eval_samples_per_second": 79.542, | |
| "eval_steps_per_second": 2.49, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 23.246999368288062, | |
| "grad_norm": 1.0751720666885376, | |
| "learning_rate": 0.0003851851851851852, | |
| "loss": 0.0941, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 23.246999368288062, | |
| "eval_accuracy": 0.7772151898734178, | |
| "eval_f1_macro": 0.5717636011134882, | |
| "eval_loss": 1.5253993272781372, | |
| "eval_runtime": 109.0143, | |
| "eval_samples_per_second": 79.714, | |
| "eval_steps_per_second": 2.495, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 24.257738471257106, | |
| "grad_norm": 1.744019865989685, | |
| "learning_rate": 0.00037777777777777777, | |
| "loss": 0.0861, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 24.257738471257106, | |
| "eval_accuracy": 0.777445339470656, | |
| "eval_f1_macro": 0.625140306336925, | |
| "eval_loss": 1.6324084997177124, | |
| "eval_runtime": 108.6336, | |
| "eval_samples_per_second": 79.994, | |
| "eval_steps_per_second": 2.504, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 25.268477574226154, | |
| "grad_norm": 1.838752269744873, | |
| "learning_rate": 0.00037037037037037035, | |
| "loss": 0.0807, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 25.268477574226154, | |
| "eval_accuracy": 0.7728423475258919, | |
| "eval_f1_macro": 0.5870939911644882, | |
| "eval_loss": 1.7057673931121826, | |
| "eval_runtime": 108.6842, | |
| "eval_samples_per_second": 79.956, | |
| "eval_steps_per_second": 2.503, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 26.279216677195198, | |
| "grad_norm": 2.3391871452331543, | |
| "learning_rate": 0.000362962962962963, | |
| "loss": 0.0739, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 26.279216677195198, | |
| "eval_accuracy": 0.774108170310702, | |
| "eval_f1_macro": 0.6190123341706849, | |
| "eval_loss": 1.6950148344039917, | |
| "eval_runtime": 108.9167, | |
| "eval_samples_per_second": 79.786, | |
| "eval_steps_per_second": 2.497, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 27.289955780164245, | |
| "grad_norm": 1.3197505474090576, | |
| "learning_rate": 0.00035555555555555557, | |
| "loss": 0.0685, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 27.289955780164245, | |
| "eval_accuracy": 0.7652474108170311, | |
| "eval_f1_macro": 0.5984200620053731, | |
| "eval_loss": 1.8148038387298584, | |
| "eval_runtime": 108.998, | |
| "eval_samples_per_second": 79.726, | |
| "eval_steps_per_second": 2.495, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 28.300694883133293, | |
| "grad_norm": 0.8027063608169556, | |
| "learning_rate": 0.00034814814814814816, | |
| "loss": 0.0692, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 28.300694883133293, | |
| "eval_accuracy": 0.776409666283084, | |
| "eval_f1_macro": 0.6002766778970904, | |
| "eval_loss": 1.6219606399536133, | |
| "eval_runtime": 108.9613, | |
| "eval_samples_per_second": 79.753, | |
| "eval_steps_per_second": 2.496, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 29.311433986102337, | |
| "grad_norm": 0.8713662028312683, | |
| "learning_rate": 0.00034074074074074074, | |
| "loss": 0.0662, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 29.311433986102337, | |
| "eval_accuracy": 0.7794016110471806, | |
| "eval_f1_macro": 0.6123819840203646, | |
| "eval_loss": 1.6953762769699097, | |
| "eval_runtime": 109.1585, | |
| "eval_samples_per_second": 79.609, | |
| "eval_steps_per_second": 2.492, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 30.322173089071384, | |
| "grad_norm": 0.9094525575637817, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 0.0639, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 30.322173089071384, | |
| "eval_accuracy": 0.7785960874568469, | |
| "eval_f1_macro": 0.5900178041075752, | |
| "eval_loss": 1.7562154531478882, | |
| "eval_runtime": 108.917, | |
| "eval_samples_per_second": 79.786, | |
| "eval_steps_per_second": 2.497, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 31.33291219204043, | |
| "grad_norm": 2.3824515342712402, | |
| "learning_rate": 0.00032592592592592596, | |
| "loss": 0.0613, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 31.33291219204043, | |
| "eval_accuracy": 0.7708860759493671, | |
| "eval_f1_macro": 0.5886611331241638, | |
| "eval_loss": 1.7263332605361938, | |
| "eval_runtime": 109.2037, | |
| "eval_samples_per_second": 79.576, | |
| "eval_steps_per_second": 2.491, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 32.34365129500947, | |
| "grad_norm": 1.1265066862106323, | |
| "learning_rate": 0.00031851851851851854, | |
| "loss": 0.0562, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 32.34365129500947, | |
| "eval_accuracy": 0.777445339470656, | |
| "eval_f1_macro": 0.6069323146272442, | |
| "eval_loss": 1.595489263534546, | |
| "eval_runtime": 110.1086, | |
| "eval_samples_per_second": 78.922, | |
| "eval_steps_per_second": 2.47, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 33.35439039797852, | |
| "grad_norm": 0.765870988368988, | |
| "learning_rate": 0.0003111111111111111, | |
| "loss": 0.0482, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 33.35439039797852, | |
| "eval_accuracy": 0.7858457997698504, | |
| "eval_f1_macro": 0.6152260699722518, | |
| "eval_loss": 1.6528053283691406, | |
| "eval_runtime": 109.0363, | |
| "eval_samples_per_second": 79.698, | |
| "eval_steps_per_second": 2.495, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 34.36512950094757, | |
| "grad_norm": 2.386359930038452, | |
| "learning_rate": 0.0003037037037037037, | |
| "loss": 0.0516, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 34.36512950094757, | |
| "eval_accuracy": 0.7713463751438435, | |
| "eval_f1_macro": 0.5894778786253475, | |
| "eval_loss": 1.65277099609375, | |
| "eval_runtime": 109.1673, | |
| "eval_samples_per_second": 79.603, | |
| "eval_steps_per_second": 2.492, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 35.375868603916615, | |
| "grad_norm": 1.8987774848937988, | |
| "learning_rate": 0.0002962962962962963, | |
| "loss": 0.0447, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 35.375868603916615, | |
| "eval_accuracy": 0.7799769850402761, | |
| "eval_f1_macro": 0.6297477374058172, | |
| "eval_loss": 1.813390851020813, | |
| "eval_runtime": 109.6977, | |
| "eval_samples_per_second": 79.218, | |
| "eval_steps_per_second": 2.48, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 36.38660770688566, | |
| "grad_norm": 1.353411078453064, | |
| "learning_rate": 0.0002888888888888889, | |
| "loss": 0.047, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 36.38660770688566, | |
| "eval_accuracy": 0.7795166858457998, | |
| "eval_f1_macro": 0.5795862617467612, | |
| "eval_loss": 1.663203477859497, | |
| "eval_runtime": 109.0323, | |
| "eval_samples_per_second": 79.701, | |
| "eval_steps_per_second": 2.495, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 37.3973468098547, | |
| "grad_norm": 1.1114296913146973, | |
| "learning_rate": 0.0002814814814814815, | |
| "loss": 0.0436, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 37.3973468098547, | |
| "eval_accuracy": 0.784234752589183, | |
| "eval_f1_macro": 0.5995152264247978, | |
| "eval_loss": 1.783818006515503, | |
| "eval_runtime": 109.4106, | |
| "eval_samples_per_second": 79.426, | |
| "eval_steps_per_second": 2.486, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 38.40808591282375, | |
| "grad_norm": 1.3422303199768066, | |
| "learning_rate": 0.0002740740740740741, | |
| "loss": 0.0422, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 38.40808591282375, | |
| "eval_accuracy": 0.7838895281933257, | |
| "eval_f1_macro": 0.6189287691248615, | |
| "eval_loss": 1.7172709703445435, | |
| "eval_runtime": 108.6629, | |
| "eval_samples_per_second": 79.972, | |
| "eval_steps_per_second": 2.503, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 39.4188250157928, | |
| "grad_norm": 1.8279023170471191, | |
| "learning_rate": 0.0002666666666666667, | |
| "loss": 0.0377, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 39.4188250157928, | |
| "eval_accuracy": 0.7834292289988493, | |
| "eval_f1_macro": 0.5814739153081228, | |
| "eval_loss": 1.7523770332336426, | |
| "eval_runtime": 108.9839, | |
| "eval_samples_per_second": 79.737, | |
| "eval_steps_per_second": 2.496, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 40.429564118761846, | |
| "grad_norm": 2.154459238052368, | |
| "learning_rate": 0.00025925925925925926, | |
| "loss": 0.0359, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 40.429564118761846, | |
| "eval_accuracy": 0.7886075949367088, | |
| "eval_f1_macro": 0.6293741181702724, | |
| "eval_loss": 1.623598337173462, | |
| "eval_runtime": 108.8195, | |
| "eval_samples_per_second": 79.857, | |
| "eval_steps_per_second": 2.5, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 41.44030322173089, | |
| "grad_norm": 0.8551483154296875, | |
| "learning_rate": 0.00025185185185185185, | |
| "loss": 0.0344, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 41.44030322173089, | |
| "eval_accuracy": 0.7815880322209436, | |
| "eval_f1_macro": 0.6087804648227756, | |
| "eval_loss": 1.7353272438049316, | |
| "eval_runtime": 109.2273, | |
| "eval_samples_per_second": 79.559, | |
| "eval_steps_per_second": 2.49, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 42.451042324699934, | |
| "grad_norm": 0.5178919434547424, | |
| "learning_rate": 0.00024444444444444443, | |
| "loss": 0.033, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 42.451042324699934, | |
| "eval_accuracy": 0.7820483314154201, | |
| "eval_f1_macro": 0.6001569016578011, | |
| "eval_loss": 1.727620244026184, | |
| "eval_runtime": 109.4385, | |
| "eval_samples_per_second": 79.405, | |
| "eval_steps_per_second": 2.485, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 43.46178142766898, | |
| "grad_norm": 0.4940205514431, | |
| "learning_rate": 0.00023703703703703704, | |
| "loss": 0.0325, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 43.46178142766898, | |
| "eval_accuracy": 0.7783659378596087, | |
| "eval_f1_macro": 0.6283289368126677, | |
| "eval_loss": 1.7798371315002441, | |
| "eval_runtime": 109.2576, | |
| "eval_samples_per_second": 79.537, | |
| "eval_steps_per_second": 2.49, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 44.47252053063803, | |
| "grad_norm": 0.8661497235298157, | |
| "learning_rate": 0.00022962962962962962, | |
| "loss": 0.0302, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 44.47252053063803, | |
| "eval_accuracy": 0.7828538550057538, | |
| "eval_f1_macro": 0.6164776778280789, | |
| "eval_loss": 1.7507109642028809, | |
| "eval_runtime": 109.1869, | |
| "eval_samples_per_second": 79.588, | |
| "eval_steps_per_second": 2.491, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 45.48325963360708, | |
| "grad_norm": 0.015332411043345928, | |
| "learning_rate": 0.0002222222222222222, | |
| "loss": 0.0268, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 45.48325963360708, | |
| "eval_accuracy": 0.7826237054085156, | |
| "eval_f1_macro": 0.6031617249417177, | |
| "eval_loss": 1.7825220823287964, | |
| "eval_runtime": 109.3518, | |
| "eval_samples_per_second": 79.468, | |
| "eval_steps_per_second": 2.487, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 46.493998736576124, | |
| "grad_norm": 0.5325392484664917, | |
| "learning_rate": 0.00021481481481481482, | |
| "loss": 0.0287, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 46.493998736576124, | |
| "eval_accuracy": 0.7882623705408516, | |
| "eval_f1_macro": 0.6256320010133759, | |
| "eval_loss": 1.6932624578475952, | |
| "eval_runtime": 108.513, | |
| "eval_samples_per_second": 80.083, | |
| "eval_steps_per_second": 2.507, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 47.504737839545164, | |
| "grad_norm": 0.5086055994033813, | |
| "learning_rate": 0.0002074074074074074, | |
| "loss": 0.0252, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 47.504737839545164, | |
| "eval_accuracy": 0.7856156501726121, | |
| "eval_f1_macro": 0.6143416230351354, | |
| "eval_loss": 1.7501070499420166, | |
| "eval_runtime": 109.2365, | |
| "eval_samples_per_second": 79.552, | |
| "eval_steps_per_second": 2.49, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 48.51547694251421, | |
| "grad_norm": 1.229317545890808, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0283, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 48.51547694251421, | |
| "eval_accuracy": 0.7843498273878021, | |
| "eval_f1_macro": 0.6189575264715401, | |
| "eval_loss": 1.9032423496246338, | |
| "eval_runtime": 108.2906, | |
| "eval_samples_per_second": 80.247, | |
| "eval_steps_per_second": 2.512, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 49.52621604548326, | |
| "grad_norm": 0.05275914818048477, | |
| "learning_rate": 0.0001925925925925926, | |
| "loss": 0.024, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 49.52621604548326, | |
| "eval_accuracy": 0.7874568469505179, | |
| "eval_f1_macro": 0.6393370936978522, | |
| "eval_loss": 1.8691409826278687, | |
| "eval_runtime": 108.1545, | |
| "eval_samples_per_second": 80.348, | |
| "eval_steps_per_second": 2.515, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 50.53695514845231, | |
| "grad_norm": 0.9653208255767822, | |
| "learning_rate": 0.00018518518518518518, | |
| "loss": 0.0229, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 50.53695514845231, | |
| "eval_accuracy": 0.786536248561565, | |
| "eval_f1_macro": 0.6026385719720891, | |
| "eval_loss": 1.7541390657424927, | |
| "eval_runtime": 107.9085, | |
| "eval_samples_per_second": 80.531, | |
| "eval_steps_per_second": 2.521, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 51.547694251421355, | |
| "grad_norm": 0.4658529758453369, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 0.0219, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 51.547694251421355, | |
| "eval_accuracy": 0.7872266973532797, | |
| "eval_f1_macro": 0.6309747652348119, | |
| "eval_loss": 1.7537351846694946, | |
| "eval_runtime": 107.7743, | |
| "eval_samples_per_second": 80.632, | |
| "eval_steps_per_second": 2.524, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 52.558433354390395, | |
| "grad_norm": 0.32756420969963074, | |
| "learning_rate": 0.00017037037037037037, | |
| "loss": 0.0211, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 52.558433354390395, | |
| "eval_accuracy": 0.7934407364787112, | |
| "eval_f1_macro": 0.6206166338546538, | |
| "eval_loss": 1.6842619180679321, | |
| "eval_runtime": 107.7209, | |
| "eval_samples_per_second": 80.671, | |
| "eval_steps_per_second": 2.525, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 53.56917245735944, | |
| "grad_norm": 0.584701418876648, | |
| "learning_rate": 0.00016296296296296298, | |
| "loss": 0.0203, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 53.56917245735944, | |
| "eval_accuracy": 0.7950517836593786, | |
| "eval_f1_macro": 0.6206542591204762, | |
| "eval_loss": 1.699610710144043, | |
| "eval_runtime": 107.6954, | |
| "eval_samples_per_second": 80.691, | |
| "eval_steps_per_second": 2.526, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 54.57991156032849, | |
| "grad_norm": 0.0553191676735878, | |
| "learning_rate": 0.00015555555555555556, | |
| "loss": 0.0174, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 54.57991156032849, | |
| "eval_accuracy": 0.7894131185270425, | |
| "eval_f1_macro": 0.6214961351780512, | |
| "eval_loss": 1.8445045948028564, | |
| "eval_runtime": 107.7853, | |
| "eval_samples_per_second": 80.623, | |
| "eval_steps_per_second": 2.524, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 55.59065066329754, | |
| "grad_norm": 0.4328874945640564, | |
| "learning_rate": 0.00014814814814814815, | |
| "loss": 0.0197, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 55.59065066329754, | |
| "eval_accuracy": 0.792059838895282, | |
| "eval_f1_macro": 0.6308138834712996, | |
| "eval_loss": 1.8310879468917847, | |
| "eval_runtime": 107.7421, | |
| "eval_samples_per_second": 80.656, | |
| "eval_steps_per_second": 2.525, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 56.601389766266585, | |
| "grad_norm": 0.02704198658466339, | |
| "learning_rate": 0.00014074074074074076, | |
| "loss": 0.0169, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 56.601389766266585, | |
| "eval_accuracy": 0.7879171461449942, | |
| "eval_f1_macro": 0.5896127682611725, | |
| "eval_loss": 1.8162003755569458, | |
| "eval_runtime": 107.8141, | |
| "eval_samples_per_second": 80.602, | |
| "eval_steps_per_second": 2.523, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 57.612128869235626, | |
| "grad_norm": 0.2748865485191345, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.0121, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 57.612128869235626, | |
| "eval_accuracy": 0.7852704257767549, | |
| "eval_f1_macro": 0.5951106108532582, | |
| "eval_loss": 1.924727201461792, | |
| "eval_runtime": 107.712, | |
| "eval_samples_per_second": 80.678, | |
| "eval_steps_per_second": 2.525, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 58.62286797220467, | |
| "grad_norm": 0.0328911654651165, | |
| "learning_rate": 0.00012592592592592592, | |
| "loss": 0.0152, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 58.62286797220467, | |
| "eval_accuracy": 0.7881472957422324, | |
| "eval_f1_macro": 0.6063430405057288, | |
| "eval_loss": 1.8502182960510254, | |
| "eval_runtime": 107.788, | |
| "eval_samples_per_second": 80.621, | |
| "eval_steps_per_second": 2.523, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 59.63360707517372, | |
| "grad_norm": 0.00955616869032383, | |
| "learning_rate": 0.00011851851851851852, | |
| "loss": 0.0142, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 59.63360707517372, | |
| "eval_accuracy": 0.789873417721519, | |
| "eval_f1_macro": 0.617993825444742, | |
| "eval_loss": 1.7803289890289307, | |
| "eval_runtime": 107.8043, | |
| "eval_samples_per_second": 80.609, | |
| "eval_steps_per_second": 2.523, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 60.64434617814277, | |
| "grad_norm": 0.06125176325440407, | |
| "learning_rate": 0.0001111111111111111, | |
| "loss": 0.0105, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 60.64434617814277, | |
| "eval_accuracy": 0.7861910241657077, | |
| "eval_f1_macro": 0.6254018987758924, | |
| "eval_loss": 1.916595458984375, | |
| "eval_runtime": 107.7673, | |
| "eval_samples_per_second": 80.637, | |
| "eval_steps_per_second": 2.524, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 61.655085281111816, | |
| "grad_norm": 0.10605888813734055, | |
| "learning_rate": 0.0001037037037037037, | |
| "loss": 0.0116, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 61.655085281111816, | |
| "eval_accuracy": 0.7858457997698504, | |
| "eval_f1_macro": 0.5961002471321352, | |
| "eval_loss": 1.9204109907150269, | |
| "eval_runtime": 107.7648, | |
| "eval_samples_per_second": 80.639, | |
| "eval_steps_per_second": 2.524, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 62.66582438408086, | |
| "grad_norm": 0.044181693345308304, | |
| "learning_rate": 9.62962962962963e-05, | |
| "loss": 0.0112, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 62.66582438408086, | |
| "eval_accuracy": 0.7878020713463751, | |
| "eval_f1_macro": 0.6235710102313945, | |
| "eval_loss": 1.9822152853012085, | |
| "eval_runtime": 107.735, | |
| "eval_samples_per_second": 80.661, | |
| "eval_steps_per_second": 2.525, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 63.676563487049904, | |
| "grad_norm": 0.023459970951080322, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 0.0102, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 63.676563487049904, | |
| "eval_accuracy": 0.7840046029919447, | |
| "eval_f1_macro": 0.6155669395709024, | |
| "eval_loss": 1.9653674364089966, | |
| "eval_runtime": 107.7821, | |
| "eval_samples_per_second": 80.626, | |
| "eval_steps_per_second": 2.524, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 64.68730259001894, | |
| "grad_norm": 1.9076263904571533, | |
| "learning_rate": 8.148148148148149e-05, | |
| "loss": 0.01, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 64.68730259001894, | |
| "eval_accuracy": 0.7880322209436134, | |
| "eval_f1_macro": 0.6226637633596005, | |
| "eval_loss": 1.938231348991394, | |
| "eval_runtime": 107.7205, | |
| "eval_samples_per_second": 80.672, | |
| "eval_steps_per_second": 2.525, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 65.698041692988, | |
| "grad_norm": 0.4948989748954773, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 0.0101, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 65.698041692988, | |
| "eval_accuracy": 0.7960874568469505, | |
| "eval_f1_macro": 0.6277935659004009, | |
| "eval_loss": 1.8299671411514282, | |
| "eval_runtime": 107.7348, | |
| "eval_samples_per_second": 80.661, | |
| "eval_steps_per_second": 2.525, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 66.70878079595704, | |
| "grad_norm": 0.00608784519135952, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.0086, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 66.70878079595704, | |
| "eval_accuracy": 0.7968929804372842, | |
| "eval_f1_macro": 0.6234372893298947, | |
| "eval_loss": 1.9254202842712402, | |
| "eval_runtime": 108.035, | |
| "eval_samples_per_second": 80.437, | |
| "eval_steps_per_second": 2.518, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 67.7195198989261, | |
| "grad_norm": 0.08328448981046677, | |
| "learning_rate": 5.925925925925926e-05, | |
| "loss": 0.0073, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 67.7195198989261, | |
| "eval_accuracy": 0.7915995397008055, | |
| "eval_f1_macro": 0.6320923241131308, | |
| "eval_loss": 1.8887046575546265, | |
| "eval_runtime": 107.8399, | |
| "eval_samples_per_second": 80.582, | |
| "eval_steps_per_second": 2.522, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 68.73025900189513, | |
| "grad_norm": 0.02061997540295124, | |
| "learning_rate": 5.185185185185185e-05, | |
| "loss": 0.0069, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 68.73025900189513, | |
| "eval_accuracy": 0.794361334867664, | |
| "eval_f1_macro": 0.636665979654867, | |
| "eval_loss": 1.9074466228485107, | |
| "eval_runtime": 107.6829, | |
| "eval_samples_per_second": 80.7, | |
| "eval_steps_per_second": 2.526, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 69.74099810486418, | |
| "grad_norm": 0.012987918220460415, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.0059, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 69.74099810486418, | |
| "eval_accuracy": 0.792059838895282, | |
| "eval_f1_macro": 0.6315720450251525, | |
| "eval_loss": 1.9398057460784912, | |
| "eval_runtime": 107.8991, | |
| "eval_samples_per_second": 80.538, | |
| "eval_steps_per_second": 2.521, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 70.75173720783323, | |
| "grad_norm": 0.005101632326841354, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.0066, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 70.75173720783323, | |
| "eval_accuracy": 0.794361334867664, | |
| "eval_f1_macro": 0.6349818220797456, | |
| "eval_loss": 1.8699119091033936, | |
| "eval_runtime": 109.2809, | |
| "eval_samples_per_second": 79.52, | |
| "eval_steps_per_second": 2.489, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 71.76247631080227, | |
| "grad_norm": 0.6047748923301697, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.0062, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 71.76247631080227, | |
| "eval_accuracy": 0.7951668584579977, | |
| "eval_f1_macro": 0.6343250573277666, | |
| "eval_loss": 1.8893409967422485, | |
| "eval_runtime": 109.2978, | |
| "eval_samples_per_second": 79.508, | |
| "eval_steps_per_second": 2.489, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 72.77321541377133, | |
| "grad_norm": 0.012553258799016476, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0058, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 72.77321541377133, | |
| "eval_accuracy": 0.7982738780207135, | |
| "eval_f1_macro": 0.6409643965446785, | |
| "eval_loss": 1.883091926574707, | |
| "eval_runtime": 109.2468, | |
| "eval_samples_per_second": 79.545, | |
| "eval_steps_per_second": 2.49, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 73.78395451674037, | |
| "grad_norm": 0.0007793375989422202, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.0056, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 73.78395451674037, | |
| "eval_accuracy": 0.7958573072497123, | |
| "eval_f1_macro": 0.6356613761441215, | |
| "eval_loss": 1.8901586532592773, | |
| "eval_runtime": 108.6154, | |
| "eval_samples_per_second": 80.007, | |
| "eval_steps_per_second": 2.504, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 74.7946936197094, | |
| "grad_norm": 0.14352725446224213, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.0053, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 74.7946936197094, | |
| "eval_accuracy": 0.7991944764096662, | |
| "eval_f1_macro": 0.643747242061282, | |
| "eval_loss": 1.888542890548706, | |
| "eval_runtime": 108.5316, | |
| "eval_samples_per_second": 80.069, | |
| "eval_steps_per_second": 2.506, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 75.80543272267846, | |
| "grad_norm": 0.9781034588813782, | |
| "learning_rate": 0.0, | |
| "loss": 0.0046, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 75.80543272267846, | |
| "eval_accuracy": 0.8005753739930955, | |
| "eval_f1_macro": 0.6435443913467072, | |
| "eval_loss": 1.888439655303955, | |
| "eval_runtime": 108.5256, | |
| "eval_samples_per_second": 80.073, | |
| "eval_steps_per_second": 2.506, | |
| "step": 30000 | |
| } | |
| ], | |
| "logging_steps": 400, | |
| "max_steps": 30000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 76, | |
| "save_steps": 1200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.8164789316384843e+20, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |