{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 10.0,
  "eval_steps": 500,
  "global_step": 13370,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.3739715781600598,
      "grad_norm": 3.664752721786499,
      "learning_rate": 4.813014210919971e-05,
      "loss": 2.1969,
      "step": 500
    },
    {
      "epoch": 0.7479431563201197,
      "grad_norm": 2.6418004035949707,
      "learning_rate": 4.62602842183994e-05,
      "loss": 2.0162,
      "step": 1000
    },
    {
      "epoch": 1.1219147344801794,
      "grad_norm": 2.6499061584472656,
      "learning_rate": 4.4390426327599105e-05,
      "loss": 1.928,
      "step": 1500
    },
    {
      "epoch": 1.4958863126402393,
      "grad_norm": 2.92419695854187,
      "learning_rate": 4.252056843679881e-05,
      "loss": 1.8714,
      "step": 2000
    },
    {
      "epoch": 1.8698578908002992,
      "grad_norm": 2.9675424098968506,
      "learning_rate": 4.06507105459985e-05,
      "loss": 1.8688,
      "step": 2500
    },
    {
      "epoch": 2.243829468960359,
      "grad_norm": 2.4510722160339355,
      "learning_rate": 3.878085265519821e-05,
      "loss": 1.8199,
      "step": 3000
    },
    {
      "epoch": 2.6178010471204187,
      "grad_norm": 3.3887815475463867,
      "learning_rate": 3.691099476439791e-05,
      "loss": 1.7878,
      "step": 3500
    },
    {
      "epoch": 2.9917726252804786,
      "grad_norm": 2.7540111541748047,
      "learning_rate": 3.5041136873597606e-05,
      "loss": 1.804,
      "step": 4000
    },
    {
      "epoch": 3.3657442034405385,
      "grad_norm": 2.7998127937316895,
      "learning_rate": 3.317127898279731e-05,
      "loss": 1.764,
      "step": 4500
    },
    {
      "epoch": 3.7397157816005984,
      "grad_norm": 3.000854015350342,
      "learning_rate": 3.130142109199701e-05,
      "loss": 1.7486,
      "step": 5000
    },
    {
      "epoch": 4.113687359760658,
      "grad_norm": 2.85657000541687,
      "learning_rate": 2.9431563201196712e-05,
      "loss": 1.7418,
      "step": 5500
    },
    {
      "epoch": 4.487658937920718,
      "grad_norm": 2.8628413677215576,
      "learning_rate": 2.7561705310396414e-05,
      "loss": 1.7193,
      "step": 6000
    },
    {
      "epoch": 4.861630516080778,
      "grad_norm": 3.066136360168457,
      "learning_rate": 2.569184741959611e-05,
      "loss": 1.7083,
      "step": 6500
    },
    {
      "epoch": 5.2356020942408374,
      "grad_norm": 2.46604585647583,
      "learning_rate": 2.382198952879581e-05,
      "loss": 1.6959,
      "step": 7000
    },
    {
      "epoch": 5.609573672400898,
      "grad_norm": 2.4070425033569336,
      "learning_rate": 2.1952131637995513e-05,
      "loss": 1.6787,
      "step": 7500
    },
    {
      "epoch": 5.983545250560957,
      "grad_norm": 2.59104585647583,
      "learning_rate": 2.0082273747195215e-05,
      "loss": 1.6979,
      "step": 8000
    },
    {
      "epoch": 6.3575168287210175,
      "grad_norm": 2.270085334777832,
      "learning_rate": 1.8212415856394914e-05,
      "loss": 1.6618,
      "step": 8500
    },
    {
      "epoch": 6.731488406881077,
      "grad_norm": 2.6211342811584473,
      "learning_rate": 1.6342557965594616e-05,
      "loss": 1.665,
      "step": 9000
    },
    {
      "epoch": 7.105459985041137,
      "grad_norm": 2.876598358154297,
      "learning_rate": 1.4472700074794315e-05,
      "loss": 1.6716,
      "step": 9500
    },
    {
      "epoch": 7.479431563201197,
      "grad_norm": 2.5392422676086426,
      "learning_rate": 1.2602842183994019e-05,
      "loss": 1.6354,
      "step": 10000
    },
    {
      "epoch": 7.853403141361256,
      "grad_norm": 2.6859071254730225,
      "learning_rate": 1.0732984293193717e-05,
      "loss": 1.6581,
      "step": 10500
    },
    {
      "epoch": 8.227374719521316,
      "grad_norm": 2.5266973972320557,
      "learning_rate": 8.863126402393418e-06,
      "loss": 1.6501,
      "step": 11000
    },
    {
      "epoch": 8.601346297681376,
      "grad_norm": 1.9921244382858276,
      "learning_rate": 6.993268511593119e-06,
      "loss": 1.6351,
      "step": 11500
    },
    {
      "epoch": 8.975317875841435,
      "grad_norm": 2.633855104446411,
      "learning_rate": 5.12341062079282e-06,
      "loss": 1.6507,
      "step": 12000
    },
    {
      "epoch": 9.349289454001497,
      "grad_norm": 2.9301204681396484,
      "learning_rate": 3.2535527299925206e-06,
      "loss": 1.6296,
      "step": 12500
    },
    {
      "epoch": 9.723261032161556,
      "grad_norm": 2.9712412357330322,
      "learning_rate": 1.3836948391922214e-06,
      "loss": 1.6316,
      "step": 13000
    },
    {
      "epoch": 10.0,
      "step": 13370,
      "total_flos": 1.7358769019486208e+16,
      "train_loss": 1.7481338489563856,
      "train_runtime": 1121.6717,
      "train_samples_per_second": 95.331,
      "train_steps_per_second": 11.92
    }
  ],
  "logging_steps": 500,
  "max_steps": 13370,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.7358769019486208e+16,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}