Training in progress, step 1000000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3934f519240d590552d43746648c081056a7995bf6c44310ab67246f6ef8ad67
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:619263ccd39f733619bbbf55e178f9282f2d9680aa9481a120d8cd9e41fe0f1b
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d806e9f9f09813043b95cbeda18b18cdfb60c100fbde3239bf79ee81c659dc36
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -19806,11 +19806,211 @@
|
|
| 19806 |
"eval_samples_per_second": 878.327,
|
| 19807 |
"eval_steps_per_second": 13.766,
|
| 19808 |
"step": 990000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19809 |
}
|
| 19810 |
],
|
| 19811 |
"max_steps": 1000000,
|
| 19812 |
"num_train_epochs": 12,
|
| 19813 |
-
"total_flos":
|
| 19814 |
"trial_name": null,
|
| 19815 |
"trial_params": null
|
| 19816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.152872422292361,
|
| 5 |
+
"global_step": 1000000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 19806 |
"eval_samples_per_second": 878.327,
|
| 19807 |
"eval_steps_per_second": 13.766,
|
| 19808 |
"step": 990000
|
| 19809 |
+
},
|
| 19810 |
+
{
|
| 19811 |
+
"epoch": 11.05,
|
| 19812 |
+
"learning_rate": 1.003454077439879e-05,
|
| 19813 |
+
"loss": 0.1795,
|
| 19814 |
+
"step": 990500
|
| 19815 |
+
},
|
| 19816 |
+
{
|
| 19817 |
+
"epoch": 11.05,
|
| 19818 |
+
"learning_rate": 1.0031000845556304e-05,
|
| 19819 |
+
"loss": 0.1792,
|
| 19820 |
+
"step": 991000
|
| 19821 |
+
},
|
| 19822 |
+
{
|
| 19823 |
+
"epoch": 11.05,
|
| 19824 |
+
"eval_loss": 0.17132483422756195,
|
| 19825 |
+
"eval_runtime": 2.6196,
|
| 19826 |
+
"eval_samples_per_second": 876.851,
|
| 19827 |
+
"eval_steps_per_second": 13.743,
|
| 19828 |
+
"step": 991000
|
| 19829 |
+
},
|
| 19830 |
+
{
|
| 19831 |
+
"epoch": 11.06,
|
| 19832 |
+
"learning_rate": 1.0027652209285743e-05,
|
| 19833 |
+
"loss": 0.1795,
|
| 19834 |
+
"step": 991500
|
| 19835 |
+
},
|
| 19836 |
+
{
|
| 19837 |
+
"epoch": 11.06,
|
| 19838 |
+
"learning_rate": 1.0024494874742152e-05,
|
| 19839 |
+
"loss": 0.1794,
|
| 19840 |
+
"step": 992000
|
| 19841 |
+
},
|
| 19842 |
+
{
|
| 19843 |
+
"epoch": 11.06,
|
| 19844 |
+
"eval_loss": 0.1712769716978073,
|
| 19845 |
+
"eval_runtime": 2.602,
|
| 19846 |
+
"eval_samples_per_second": 882.772,
|
| 19847 |
+
"eval_steps_per_second": 13.835,
|
| 19848 |
+
"step": 992000
|
| 19849 |
+
},
|
| 19850 |
+
{
|
| 19851 |
+
"epoch": 11.07,
|
| 19852 |
+
"learning_rate": 1.0021528850557572e-05,
|
| 19853 |
+
"loss": 0.1793,
|
| 19854 |
+
"step": 992500
|
| 19855 |
+
},
|
| 19856 |
+
{
|
| 19857 |
+
"epoch": 11.07,
|
| 19858 |
+
"learning_rate": 1.0018754144840986e-05,
|
| 19859 |
+
"loss": 0.1794,
|
| 19860 |
+
"step": 993000
|
| 19861 |
+
},
|
| 19862 |
+
{
|
| 19863 |
+
"epoch": 11.07,
|
| 19864 |
+
"eval_loss": 0.17019130289554596,
|
| 19865 |
+
"eval_runtime": 2.6352,
|
| 19866 |
+
"eval_samples_per_second": 871.66,
|
| 19867 |
+
"eval_steps_per_second": 13.661,
|
| 19868 |
+
"step": 993000
|
| 19869 |
+
},
|
| 19870 |
+
{
|
| 19871 |
+
"epoch": 11.08,
|
| 19872 |
+
"learning_rate": 1.0016170765178345e-05,
|
| 19873 |
+
"loss": 0.1796,
|
| 19874 |
+
"step": 993500
|
| 19875 |
+
},
|
| 19876 |
+
{
|
| 19877 |
+
"epoch": 11.09,
|
| 19878 |
+
"learning_rate": 1.0013778718632507e-05,
|
| 19879 |
+
"loss": 0.1795,
|
| 19880 |
+
"step": 994000
|
| 19881 |
+
},
|
| 19882 |
+
{
|
| 19883 |
+
"epoch": 11.09,
|
| 19884 |
+
"eval_loss": 0.16902120411396027,
|
| 19885 |
+
"eval_runtime": 2.6744,
|
| 19886 |
+
"eval_samples_per_second": 858.899,
|
| 19887 |
+
"eval_steps_per_second": 13.461,
|
| 19888 |
+
"step": 994000
|
| 19889 |
+
},
|
| 19890 |
+
{
|
| 19891 |
+
"epoch": 11.09,
|
| 19892 |
+
"learning_rate": 1.0011578011743233e-05,
|
| 19893 |
+
"loss": 0.1794,
|
| 19894 |
+
"step": 994500
|
| 19895 |
+
},
|
| 19896 |
+
{
|
| 19897 |
+
"epoch": 11.1,
|
| 19898 |
+
"learning_rate": 1.000956865052717e-05,
|
| 19899 |
+
"loss": 0.1795,
|
| 19900 |
+
"step": 995000
|
| 19901 |
+
},
|
| 19902 |
+
{
|
| 19903 |
+
"epoch": 11.1,
|
| 19904 |
+
"eval_loss": 0.17112106084823608,
|
| 19905 |
+
"eval_runtime": 2.6298,
|
| 19906 |
+
"eval_samples_per_second": 873.44,
|
| 19907 |
+
"eval_steps_per_second": 13.689,
|
| 19908 |
+
"step": 995000
|
| 19909 |
+
},
|
| 19910 |
+
{
|
| 19911 |
+
"epoch": 11.1,
|
| 19912 |
+
"learning_rate": 1.0007750640477843e-05,
|
| 19913 |
+
"loss": 0.1797,
|
| 19914 |
+
"step": 995500
|
| 19915 |
+
},
|
| 19916 |
+
{
|
| 19917 |
+
"epoch": 11.11,
|
| 19918 |
+
"learning_rate": 1.0006123986565623e-05,
|
| 19919 |
+
"loss": 0.1797,
|
| 19920 |
+
"step": 996000
|
| 19921 |
+
},
|
| 19922 |
+
{
|
| 19923 |
+
"epoch": 11.11,
|
| 19924 |
+
"eval_loss": 0.17197231948375702,
|
| 19925 |
+
"eval_runtime": 2.6674,
|
| 19926 |
+
"eval_samples_per_second": 861.138,
|
| 19927 |
+
"eval_steps_per_second": 13.496,
|
| 19928 |
+
"step": 996000
|
| 19929 |
+
},
|
| 19930 |
+
{
|
| 19931 |
+
"epoch": 11.11,
|
| 19932 |
+
"learning_rate": 1.0004688693237708e-05,
|
| 19933 |
+
"loss": 0.179,
|
| 19934 |
+
"step": 996500
|
| 19935 |
+
},
|
| 19936 |
+
{
|
| 19937 |
+
"epoch": 11.12,
|
| 19938 |
+
"learning_rate": 1.0003444764418138e-05,
|
| 19939 |
+
"loss": 0.1795,
|
| 19940 |
+
"step": 997000
|
| 19941 |
+
},
|
| 19942 |
+
{
|
| 19943 |
+
"epoch": 11.12,
|
| 19944 |
+
"eval_loss": 0.16935667395591736,
|
| 19945 |
+
"eval_runtime": 2.6744,
|
| 19946 |
+
"eval_samples_per_second": 858.882,
|
| 19947 |
+
"eval_steps_per_second": 13.461,
|
| 19948 |
+
"step": 997000
|
| 19949 |
+
},
|
| 19950 |
+
{
|
| 19951 |
+
"epoch": 11.12,
|
| 19952 |
+
"learning_rate": 1.0002392203507781e-05,
|
| 19953 |
+
"loss": 0.1796,
|
| 19954 |
+
"step": 997500
|
| 19955 |
+
},
|
| 19956 |
+
{
|
| 19957 |
+
"epoch": 11.13,
|
| 19958 |
+
"learning_rate": 1.000153101338428e-05,
|
| 19959 |
+
"loss": 0.1794,
|
| 19960 |
+
"step": 998000
|
| 19961 |
+
},
|
| 19962 |
+
{
|
| 19963 |
+
"epoch": 11.13,
|
| 19964 |
+
"eval_loss": 0.16944564878940582,
|
| 19965 |
+
"eval_runtime": 2.6058,
|
| 19966 |
+
"eval_samples_per_second": 881.508,
|
| 19967 |
+
"eval_steps_per_second": 13.816,
|
| 19968 |
+
"step": 998000
|
| 19969 |
+
},
|
| 19970 |
+
{
|
| 19971 |
+
"epoch": 11.14,
|
| 19972 |
+
"learning_rate": 1.00008611964021e-05,
|
| 19973 |
+
"loss": 0.1795,
|
| 19974 |
+
"step": 998500
|
| 19975 |
+
},
|
| 19976 |
+
{
|
| 19977 |
+
"epoch": 11.14,
|
| 19978 |
+
"learning_rate": 1.00003827543925e-05,
|
| 19979 |
+
"loss": 0.1797,
|
| 19980 |
+
"step": 999000
|
| 19981 |
+
},
|
| 19982 |
+
{
|
| 19983 |
+
"epoch": 11.14,
|
| 19984 |
+
"eval_loss": 0.1695910096168518,
|
| 19985 |
+
"eval_runtime": 2.6979,
|
| 19986 |
+
"eval_samples_per_second": 851.388,
|
| 19987 |
+
"eval_steps_per_second": 13.343,
|
| 19988 |
+
"step": 999000
|
| 19989 |
+
},
|
| 19990 |
+
{
|
| 19991 |
+
"epoch": 11.15,
|
| 19992 |
+
"learning_rate": 1.0000095688663532e-05,
|
| 19993 |
+
"loss": 0.1796,
|
| 19994 |
+
"step": 999500
|
| 19995 |
+
},
|
| 19996 |
+
{
|
| 19997 |
+
"epoch": 11.15,
|
| 19998 |
+
"learning_rate": 1e-05,
|
| 19999 |
+
"loss": 0.1796,
|
| 20000 |
+
"step": 1000000
|
| 20001 |
+
},
|
| 20002 |
+
{
|
| 20003 |
+
"epoch": 11.15,
|
| 20004 |
+
"eval_loss": 0.16828955709934235,
|
| 20005 |
+
"eval_runtime": 2.6549,
|
| 20006 |
+
"eval_samples_per_second": 865.189,
|
| 20007 |
+
"eval_steps_per_second": 13.56,
|
| 20008 |
+
"step": 1000000
|
| 20009 |
}
|
| 20010 |
],
|
| 20011 |
"max_steps": 1000000,
|
| 20012 |
"num_train_epochs": 12,
|
| 20013 |
+
"total_flos": 7.009965862112043e+22,
|
| 20014 |
"trial_name": null,
|
| 20015 |
"trial_params": null
|
| 20016 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:619263ccd39f733619bbbf55e178f9282f2d9680aa9481a120d8cd9e41fe0f1b
|
| 3 |
size 449471589
|