| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 16.666666666666668, |
| "global_step": 1300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.941571524513096e-05, |
| "loss": 0.682, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.7003610108303249, |
| "eval_loss": 0.6075819134712219, |
| "eval_runtime": 3.25, |
| "eval_samples_per_second": 85.23, |
| "eval_steps_per_second": 2.769, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.7400940228341167e-05, |
| "loss": 0.5556, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 0.6967509025270758, |
| "eval_loss": 0.5837834477424622, |
| "eval_runtime": 1.0394, |
| "eval_samples_per_second": 266.512, |
| "eval_steps_per_second": 8.659, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 2.538616521155138e-05, |
| "loss": 0.3956, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.85, |
| "eval_accuracy": 0.7653429602888087, |
| "eval_loss": 0.5577418208122253, |
| "eval_runtime": 1.0269, |
| "eval_samples_per_second": 269.751, |
| "eval_steps_per_second": 8.764, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 2.3371390194761586e-05, |
| "loss": 0.2705, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.13, |
| "eval_accuracy": 0.7472924187725631, |
| "eval_loss": 0.7243108153343201, |
| "eval_runtime": 1.0048, |
| "eval_samples_per_second": 275.666, |
| "eval_steps_per_second": 8.957, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 2.1356615177971793e-05, |
| "loss": 0.1431, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.41, |
| "eval_accuracy": 0.7581227436823105, |
| "eval_loss": 1.279666781425476, |
| "eval_runtime": 1.01, |
| "eval_samples_per_second": 274.262, |
| "eval_steps_per_second": 8.911, |
| "step": 500 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 1.9341840161182e-05, |
| "loss": 0.1032, |
| "step": 600 |
| }, |
| { |
| "epoch": 7.69, |
| "eval_accuracy": 0.7617328519855595, |
| "eval_loss": 1.1039319038391113, |
| "eval_runtime": 1.0386, |
| "eval_samples_per_second": 266.696, |
| "eval_steps_per_second": 8.665, |
| "step": 600 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 1.7327065144392212e-05, |
| "loss": 0.0787, |
| "step": 700 |
| }, |
| { |
| "epoch": 8.97, |
| "eval_accuracy": 0.7653429602888087, |
| "eval_loss": 1.4732542037963867, |
| "eval_runtime": 1.4811, |
| "eval_samples_per_second": 187.028, |
| "eval_steps_per_second": 6.077, |
| "step": 700 |
| }, |
| { |
| "epoch": 10.26, |
| "learning_rate": 1.531229012760242e-05, |
| "loss": 0.0543, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.26, |
| "eval_accuracy": 0.776173285198556, |
| "eval_loss": 1.4965362548828125, |
| "eval_runtime": 0.9908, |
| "eval_samples_per_second": 279.574, |
| "eval_steps_per_second": 9.084, |
| "step": 800 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 1.3297515110812627e-05, |
| "loss": 0.0411, |
| "step": 900 |
| }, |
| { |
| "epoch": 11.54, |
| "eval_accuracy": 0.7725631768953068, |
| "eval_loss": 1.4953675270080566, |
| "eval_runtime": 1.0282, |
| "eval_samples_per_second": 269.415, |
| "eval_steps_per_second": 8.754, |
| "step": 900 |
| }, |
| { |
| "epoch": 12.82, |
| "learning_rate": 1.1282740094022835e-05, |
| "loss": 0.0335, |
| "step": 1000 |
| }, |
| { |
| "epoch": 12.82, |
| "eval_accuracy": 0.7689530685920578, |
| "eval_loss": 1.7958177328109741, |
| "eval_runtime": 1.0152, |
| "eval_samples_per_second": 272.858, |
| "eval_steps_per_second": 8.865, |
| "step": 1000 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 9.267965077233043e-06, |
| "loss": 0.0236, |
| "step": 1100 |
| }, |
| { |
| "epoch": 14.1, |
| "eval_accuracy": 0.779783393501805, |
| "eval_loss": 1.6150606870651245, |
| "eval_runtime": 2.5651, |
| "eval_samples_per_second": 107.987, |
| "eval_steps_per_second": 3.509, |
| "step": 1100 |
| }, |
| { |
| "epoch": 15.38, |
| "learning_rate": 7.253190060443251e-06, |
| "loss": 0.0132, |
| "step": 1200 |
| }, |
| { |
| "epoch": 15.38, |
| "eval_accuracy": 0.776173285198556, |
| "eval_loss": 2.066054582595825, |
| "eval_runtime": 1.0235, |
| "eval_samples_per_second": 270.642, |
| "eval_steps_per_second": 8.793, |
| "step": 1200 |
| }, |
| { |
| "epoch": 16.67, |
| "learning_rate": 5.238415043653459e-06, |
| "loss": 0.0233, |
| "step": 1300 |
| }, |
| { |
| "epoch": 16.67, |
| "eval_accuracy": 0.7906137184115524, |
| "eval_loss": 1.6857606172561646, |
| "eval_runtime": 2.6037, |
| "eval_samples_per_second": 106.386, |
| "eval_steps_per_second": 3.457, |
| "step": 1300 |
| } |
| ], |
| "max_steps": 1560, |
| "num_train_epochs": 20, |
| "total_flos": 3182186931732480.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|