Training in progress, step 980000
Browse files- config.json +1 -1
- last-checkpoint/config.json +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-970000",
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
last-checkpoint/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-970000",
|
| 3 |
"architectures": [
|
| 4 |
"PIXELForPreTraining"
|
| 5 |
],
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa6e21575dd459731b96c75fb2eff44427788a2b21e2cba9f9983669023c697a
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1f60f9446cba0320cf9ced93c4b14816af8d6988d011f7cc2f5b01e8ada101d
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -19406,11 +19406,211 @@
|
|
| 19406 |
"eval_samples_per_second": 884.178,
|
| 19407 |
"eval_steps_per_second": 13.857,
|
| 19408 |
"step": 970000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19409 |
}
|
| 19410 |
],
|
| 19411 |
"max_steps": 1000000,
|
| 19412 |
"num_train_epochs": 12,
|
| 19413 |
-
"total_flos": 6.
|
| 19414 |
"trial_name": null,
|
| 19415 |
"trial_params": null
|
| 19416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.929814973846515,
|
| 5 |
+
"global_step": 980000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 19406 |
"eval_samples_per_second": 884.178,
|
| 19407 |
"eval_steps_per_second": 13.857,
|
| 19408 |
"step": 970000
|
| 19409 |
+
},
|
| 19410 |
+
{
|
| 19411 |
+
"epoch": 10.82,
|
| 19412 |
+
"learning_rate": 1.0332828229586692e-05,
|
| 19413 |
+
"loss": 0.1799,
|
| 19414 |
+
"step": 970500
|
| 19415 |
+
},
|
| 19416 |
+
{
|
| 19417 |
+
"epoch": 10.83,
|
| 19418 |
+
"learning_rate": 1.032165010471157e-05,
|
| 19419 |
+
"loss": 0.1796,
|
| 19420 |
+
"step": 971000
|
| 19421 |
+
},
|
| 19422 |
+
{
|
| 19423 |
+
"epoch": 10.83,
|
| 19424 |
+
"eval_loss": 0.17119638621807098,
|
| 19425 |
+
"eval_runtime": 2.5911,
|
| 19426 |
+
"eval_samples_per_second": 886.512,
|
| 19427 |
+
"eval_steps_per_second": 13.894,
|
| 19428 |
+
"step": 971000
|
| 19429 |
+
},
|
| 19430 |
+
{
|
| 19431 |
+
"epoch": 10.84,
|
| 19432 |
+
"learning_rate": 1.0310662477784401e-05,
|
| 19433 |
+
"loss": 0.1804,
|
| 19434 |
+
"step": 971500
|
| 19435 |
+
},
|
| 19436 |
+
{
|
| 19437 |
+
"epoch": 10.84,
|
| 19438 |
+
"learning_rate": 1.0299865378844936e-05,
|
| 19439 |
+
"loss": 0.1798,
|
| 19440 |
+
"step": 972000
|
| 19441 |
+
},
|
| 19442 |
+
{
|
| 19443 |
+
"epoch": 10.84,
|
| 19444 |
+
"eval_loss": 0.1710081547498703,
|
| 19445 |
+
"eval_runtime": 2.5437,
|
| 19446 |
+
"eval_samples_per_second": 903.014,
|
| 19447 |
+
"eval_steps_per_second": 14.153,
|
| 19448 |
+
"step": 972000
|
| 19449 |
+
},
|
| 19450 |
+
{
|
| 19451 |
+
"epoch": 10.85,
|
| 19452 |
+
"learning_rate": 1.028925883741203e-05,
|
| 19453 |
+
"loss": 0.18,
|
| 19454 |
+
"step": 972500
|
| 19455 |
+
},
|
| 19456 |
+
{
|
| 19457 |
+
"epoch": 10.85,
|
| 19458 |
+
"learning_rate": 1.0278842882483569e-05,
|
| 19459 |
+
"loss": 0.1797,
|
| 19460 |
+
"step": 973000
|
| 19461 |
+
},
|
| 19462 |
+
{
|
| 19463 |
+
"epoch": 10.85,
|
| 19464 |
+
"eval_loss": 0.17146818339824677,
|
| 19465 |
+
"eval_runtime": 2.5692,
|
| 19466 |
+
"eval_samples_per_second": 894.045,
|
| 19467 |
+
"eval_steps_per_second": 14.012,
|
| 19468 |
+
"step": 973000
|
| 19469 |
+
},
|
| 19470 |
+
{
|
| 19471 |
+
"epoch": 10.86,
|
| 19472 |
+
"learning_rate": 1.026861754253637e-05,
|
| 19473 |
+
"loss": 0.1796,
|
| 19474 |
+
"step": 973500
|
| 19475 |
+
},
|
| 19476 |
+
{
|
| 19477 |
+
"epoch": 10.86,
|
| 19478 |
+
"learning_rate": 1.025858284552612e-05,
|
| 19479 |
+
"loss": 0.1797,
|
| 19480 |
+
"step": 974000
|
| 19481 |
+
},
|
| 19482 |
+
{
|
| 19483 |
+
"epoch": 10.86,
|
| 19484 |
+
"eval_loss": 0.1706797480583191,
|
| 19485 |
+
"eval_runtime": 2.6865,
|
| 19486 |
+
"eval_samples_per_second": 855.008,
|
| 19487 |
+
"eval_steps_per_second": 13.4,
|
| 19488 |
+
"step": 974000
|
| 19489 |
+
},
|
| 19490 |
+
{
|
| 19491 |
+
"epoch": 10.87,
|
| 19492 |
+
"learning_rate": 1.0248738818887307e-05,
|
| 19493 |
+
"loss": 0.1799,
|
| 19494 |
+
"step": 974500
|
| 19495 |
+
},
|
| 19496 |
+
{
|
| 19497 |
+
"epoch": 10.87,
|
| 19498 |
+
"learning_rate": 1.023908548953311e-05,
|
| 19499 |
+
"loss": 0.1799,
|
| 19500 |
+
"step": 975000
|
| 19501 |
+
},
|
| 19502 |
+
{
|
| 19503 |
+
"epoch": 10.87,
|
| 19504 |
+
"eval_loss": 0.1708817481994629,
|
| 19505 |
+
"eval_runtime": 2.5759,
|
| 19506 |
+
"eval_samples_per_second": 891.738,
|
| 19507 |
+
"eval_steps_per_second": 13.976,
|
| 19508 |
+
"step": 975000
|
| 19509 |
+
},
|
| 19510 |
+
{
|
| 19511 |
+
"epoch": 10.88,
|
| 19512 |
+
"learning_rate": 1.0229622883855378e-05,
|
| 19513 |
+
"loss": 0.1798,
|
| 19514 |
+
"step": 975500
|
| 19515 |
+
},
|
| 19516 |
+
{
|
| 19517 |
+
"epoch": 10.89,
|
| 19518 |
+
"learning_rate": 1.02203510277245e-05,
|
| 19519 |
+
"loss": 0.1796,
|
| 19520 |
+
"step": 976000
|
| 19521 |
+
},
|
| 19522 |
+
{
|
| 19523 |
+
"epoch": 10.89,
|
| 19524 |
+
"eval_loss": 0.1709393560886383,
|
| 19525 |
+
"eval_runtime": 2.6094,
|
| 19526 |
+
"eval_samples_per_second": 880.296,
|
| 19527 |
+
"eval_steps_per_second": 13.797,
|
| 19528 |
+
"step": 976000
|
| 19529 |
+
},
|
| 19530 |
+
{
|
| 19531 |
+
"epoch": 10.89,
|
| 19532 |
+
"learning_rate": 1.021126994648939e-05,
|
| 19533 |
+
"loss": 0.1801,
|
| 19534 |
+
"step": 976500
|
| 19535 |
+
},
|
| 19536 |
+
{
|
| 19537 |
+
"epoch": 10.9,
|
| 19538 |
+
"learning_rate": 1.0202379664977364e-05,
|
| 19539 |
+
"loss": 0.1799,
|
| 19540 |
+
"step": 977000
|
| 19541 |
+
},
|
| 19542 |
+
{
|
| 19543 |
+
"epoch": 10.9,
|
| 19544 |
+
"eval_loss": 0.17174768447875977,
|
| 19545 |
+
"eval_runtime": 2.6289,
|
| 19546 |
+
"eval_samples_per_second": 873.739,
|
| 19547 |
+
"eval_steps_per_second": 13.694,
|
| 19548 |
+
"step": 977000
|
| 19549 |
+
},
|
| 19550 |
+
{
|
| 19551 |
+
"epoch": 10.9,
|
| 19552 |
+
"learning_rate": 1.019368020749412e-05,
|
| 19553 |
+
"loss": 0.1797,
|
| 19554 |
+
"step": 977500
|
| 19555 |
+
},
|
| 19556 |
+
{
|
| 19557 |
+
"epoch": 10.91,
|
| 19558 |
+
"learning_rate": 1.018517159782365e-05,
|
| 19559 |
+
"loss": 0.1797,
|
| 19560 |
+
"step": 978000
|
| 19561 |
+
},
|
| 19562 |
+
{
|
| 19563 |
+
"epoch": 10.91,
|
| 19564 |
+
"eval_loss": 0.16800174117088318,
|
| 19565 |
+
"eval_runtime": 2.57,
|
| 19566 |
+
"eval_samples_per_second": 893.767,
|
| 19567 |
+
"eval_steps_per_second": 14.008,
|
| 19568 |
+
"step": 978000
|
| 19569 |
+
},
|
| 19570 |
+
{
|
| 19571 |
+
"epoch": 10.91,
|
| 19572 |
+
"learning_rate": 1.0176853859228149e-05,
|
| 19573 |
+
"loss": 0.1794,
|
| 19574 |
+
"step": 978500
|
| 19575 |
+
},
|
| 19576 |
+
{
|
| 19577 |
+
"epoch": 10.92,
|
| 19578 |
+
"learning_rate": 1.0168727014448004e-05,
|
| 19579 |
+
"loss": 0.1794,
|
| 19580 |
+
"step": 979000
|
| 19581 |
+
},
|
| 19582 |
+
{
|
| 19583 |
+
"epoch": 10.92,
|
| 19584 |
+
"eval_loss": 0.16953879594802856,
|
| 19585 |
+
"eval_runtime": 2.6173,
|
| 19586 |
+
"eval_samples_per_second": 877.629,
|
| 19587 |
+
"eval_steps_per_second": 13.755,
|
| 19588 |
+
"step": 979000
|
| 19589 |
+
},
|
| 19590 |
+
{
|
| 19591 |
+
"epoch": 10.92,
|
| 19592 |
+
"learning_rate": 1.0160791085701714e-05,
|
| 19593 |
+
"loss": 0.1798,
|
| 19594 |
+
"step": 979500
|
| 19595 |
+
},
|
| 19596 |
+
{
|
| 19597 |
+
"epoch": 10.93,
|
| 19598 |
+
"learning_rate": 1.0153046094685783e-05,
|
| 19599 |
+
"loss": 0.1794,
|
| 19600 |
+
"step": 980000
|
| 19601 |
+
},
|
| 19602 |
+
{
|
| 19603 |
+
"epoch": 10.93,
|
| 19604 |
+
"eval_loss": 0.1709355264902115,
|
| 19605 |
+
"eval_runtime": 2.587,
|
| 19606 |
+
"eval_samples_per_second": 887.915,
|
| 19607 |
+
"eval_steps_per_second": 13.916,
|
| 19608 |
+
"step": 980000
|
| 19609 |
}
|
| 19610 |
],
|
| 19611 |
"max_steps": 1000000,
|
| 19612 |
"num_train_epochs": 12,
|
| 19613 |
+
"total_flos": 6.869770816498864e+22,
|
| 19614 |
"trial_name": null,
|
| 19615 |
"trial_params": null
|
| 19616 |
}
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a854cf81d57a7e4747d79eeee0e792b9b0db2dfcccddbeaecfbfa4a0ff53eef
|
| 3 |
size 3311
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
|
| 3 |
size 449471589
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a854cf81d57a7e4747d79eeee0e792b9b0db2dfcccddbeaecfbfa4a0ff53eef
|
| 3 |
size 3311
|