| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.7225164175033569, | |
| "eval_runtime": 2.4692, | |
| "eval_samples_per_second": 80.998, | |
| "eval_steps_per_second": 10.125, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.5052716732025146, | |
| "eval_runtime": 2.4705, | |
| "eval_samples_per_second": 80.955, | |
| "eval_steps_per_second": 10.119, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.4475671350955963, | |
| "eval_runtime": 2.4731, | |
| "eval_samples_per_second": 80.871, | |
| "eval_steps_per_second": 10.109, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.4104467034339905, | |
| "eval_runtime": 2.4763, | |
| "eval_samples_per_second": 80.765, | |
| "eval_steps_per_second": 10.096, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.39656341075897217, | |
| "eval_runtime": 2.4766, | |
| "eval_samples_per_second": 80.755, | |
| "eval_steps_per_second": 10.094, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.36905747652053833, | |
| "eval_runtime": 2.4774, | |
| "eval_samples_per_second": 80.731, | |
| "eval_steps_per_second": 10.091, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.362547367811203, | |
| "eval_runtime": 2.4792, | |
| "eval_samples_per_second": 80.672, | |
| "eval_steps_per_second": 10.084, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.3497239053249359, | |
| "eval_runtime": 2.4872, | |
| "eval_samples_per_second": 80.413, | |
| "eval_steps_per_second": 10.052, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.3523653447628021, | |
| "eval_runtime": 2.4878, | |
| "eval_samples_per_second": 80.392, | |
| "eval_steps_per_second": 10.049, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.33985191583633423, | |
| "eval_runtime": 2.4877, | |
| "eval_samples_per_second": 80.395, | |
| "eval_steps_per_second": 10.049, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.3415805399417877, | |
| "eval_runtime": 2.4896, | |
| "eval_samples_per_second": 80.333, | |
| "eval_steps_per_second": 10.042, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.33282220363616943, | |
| "eval_runtime": 2.49, | |
| "eval_samples_per_second": 80.321, | |
| "eval_steps_per_second": 10.04, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.33800217509269714, | |
| "eval_runtime": 2.491, | |
| "eval_samples_per_second": 80.289, | |
| "eval_steps_per_second": 10.036, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.3342490792274475, | |
| "eval_runtime": 2.491, | |
| "eval_samples_per_second": 80.288, | |
| "eval_steps_per_second": 10.036, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.34117013216018677, | |
| "eval_runtime": 2.4915, | |
| "eval_samples_per_second": 80.273, | |
| "eval_steps_per_second": 10.034, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.3388213813304901, | |
| "eval_runtime": 2.4936, | |
| "eval_samples_per_second": 80.205, | |
| "eval_steps_per_second": 10.026, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.319783091545105, | |
| "eval_runtime": 2.4927, | |
| "eval_samples_per_second": 80.234, | |
| "eval_steps_per_second": 10.029, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.3183771073818207, | |
| "eval_runtime": 2.4921, | |
| "eval_samples_per_second": 80.253, | |
| "eval_steps_per_second": 10.032, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.31770122051239014, | |
| "eval_runtime": 2.4935, | |
| "eval_samples_per_second": 80.21, | |
| "eval_steps_per_second": 10.026, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.8955293893814087, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.4631, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.31926316022872925, | |
| "eval_runtime": 2.4537, | |
| "eval_samples_per_second": 81.51, | |
| "eval_steps_per_second": 10.189, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.3148637115955353, | |
| "eval_runtime": 2.4822, | |
| "eval_samples_per_second": 80.572, | |
| "eval_steps_per_second": 10.072, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.31756889820098877, | |
| "eval_runtime": 2.4835, | |
| "eval_samples_per_second": 80.531, | |
| "eval_steps_per_second": 10.066, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.3171720504760742, | |
| "eval_runtime": 2.4904, | |
| "eval_samples_per_second": 80.309, | |
| "eval_steps_per_second": 10.039, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.3179776072502136, | |
| "eval_runtime": 2.4915, | |
| "eval_samples_per_second": 80.273, | |
| "eval_steps_per_second": 10.034, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.3148040473461151, | |
| "eval_runtime": 2.4928, | |
| "eval_samples_per_second": 80.232, | |
| "eval_steps_per_second": 10.029, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.3072579503059387, | |
| "eval_runtime": 2.4926, | |
| "eval_samples_per_second": 80.239, | |
| "eval_steps_per_second": 10.03, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.3129171133041382, | |
| "eval_runtime": 2.4936, | |
| "eval_samples_per_second": 80.206, | |
| "eval_steps_per_second": 10.026, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.3081643283367157, | |
| "eval_runtime": 2.4941, | |
| "eval_samples_per_second": 80.188, | |
| "eval_steps_per_second": 10.024, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.3064418435096741, | |
| "eval_runtime": 2.4964, | |
| "eval_samples_per_second": 80.116, | |
| "eval_steps_per_second": 10.014, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.30982914566993713, | |
| "eval_runtime": 2.4963, | |
| "eval_samples_per_second": 80.12, | |
| "eval_steps_per_second": 10.015, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.3063754439353943, | |
| "eval_runtime": 2.4956, | |
| "eval_samples_per_second": 80.142, | |
| "eval_steps_per_second": 10.018, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.3113013207912445, | |
| "eval_runtime": 2.4972, | |
| "eval_samples_per_second": 80.089, | |
| "eval_steps_per_second": 10.011, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.30704861879348755, | |
| "eval_runtime": 2.4962, | |
| "eval_samples_per_second": 80.122, | |
| "eval_steps_per_second": 10.015, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.2988375425338745, | |
| "eval_runtime": 2.4974, | |
| "eval_samples_per_second": 80.083, | |
| "eval_steps_per_second": 10.01, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.3142584264278412, | |
| "eval_runtime": 2.4958, | |
| "eval_samples_per_second": 80.135, | |
| "eval_steps_per_second": 10.017, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.3032761514186859, | |
| "eval_runtime": 2.4976, | |
| "eval_samples_per_second": 80.077, | |
| "eval_steps_per_second": 10.01, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.30415403842926025, | |
| "eval_runtime": 2.4973, | |
| "eval_samples_per_second": 80.087, | |
| "eval_steps_per_second": 10.011, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.30165913701057434, | |
| "eval_runtime": 2.4982, | |
| "eval_samples_per_second": 80.057, | |
| "eval_steps_per_second": 10.007, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.3017444908618927, | |
| "eval_runtime": 2.4977, | |
| "eval_samples_per_second": 80.072, | |
| "eval_steps_per_second": 10.009, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 2.4287023544311523, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.3457, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.3025864064693451, | |
| "eval_runtime": 2.4539, | |
| "eval_samples_per_second": 81.504, | |
| "eval_steps_per_second": 10.188, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.30045461654663086, | |
| "eval_runtime": 2.4819, | |
| "eval_samples_per_second": 80.584, | |
| "eval_steps_per_second": 10.073, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.30064135789871216, | |
| "eval_runtime": 2.4896, | |
| "eval_samples_per_second": 80.334, | |
| "eval_steps_per_second": 10.042, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.29575350880622864, | |
| "eval_runtime": 2.4903, | |
| "eval_samples_per_second": 80.312, | |
| "eval_steps_per_second": 10.039, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.30160149931907654, | |
| "eval_runtime": 2.4926, | |
| "eval_samples_per_second": 80.239, | |
| "eval_steps_per_second": 10.03, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.30429255962371826, | |
| "eval_runtime": 2.4921, | |
| "eval_samples_per_second": 80.255, | |
| "eval_steps_per_second": 10.032, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.3015859127044678, | |
| "eval_runtime": 2.4927, | |
| "eval_samples_per_second": 80.236, | |
| "eval_steps_per_second": 10.029, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.29914650321006775, | |
| "eval_runtime": 2.4947, | |
| "eval_samples_per_second": 80.171, | |
| "eval_steps_per_second": 10.021, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.2971905469894409, | |
| "eval_runtime": 2.4958, | |
| "eval_samples_per_second": 80.134, | |
| "eval_steps_per_second": 10.017, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.29176658391952515, | |
| "eval_runtime": 2.4963, | |
| "eval_samples_per_second": 80.118, | |
| "eval_steps_per_second": 10.015, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.2934282720088959, | |
| "eval_runtime": 2.4976, | |
| "eval_samples_per_second": 80.076, | |
| "eval_steps_per_second": 10.01, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 0.2918751835823059, | |
| "eval_runtime": 2.4964, | |
| "eval_samples_per_second": 80.115, | |
| "eval_steps_per_second": 10.014, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 0.2914879620075226, | |
| "eval_runtime": 2.4977, | |
| "eval_samples_per_second": 80.075, | |
| "eval_steps_per_second": 10.009, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_loss": 0.2925909757614136, | |
| "eval_runtime": 2.4975, | |
| "eval_samples_per_second": 80.081, | |
| "eval_steps_per_second": 10.01, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 0.2940743565559387, | |
| "eval_runtime": 2.4974, | |
| "eval_samples_per_second": 80.085, | |
| "eval_steps_per_second": 10.011, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_loss": 0.2973780930042267, | |
| "eval_runtime": 2.4989, | |
| "eval_samples_per_second": 80.037, | |
| "eval_steps_per_second": 10.005, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 0.2954687178134918, | |
| "eval_runtime": 2.498, | |
| "eval_samples_per_second": 80.065, | |
| "eval_steps_per_second": 10.008, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_loss": 0.29051879048347473, | |
| "eval_runtime": 2.4979, | |
| "eval_samples_per_second": 80.069, | |
| "eval_steps_per_second": 10.009, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_loss": 0.29731473326683044, | |
| "eval_runtime": 2.4993, | |
| "eval_samples_per_second": 80.023, | |
| "eval_steps_per_second": 10.003, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_loss": 0.2933524250984192, | |
| "eval_runtime": 2.4988, | |
| "eval_samples_per_second": 80.04, | |
| "eval_steps_per_second": 10.005, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 1.120781421661377, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.3291, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 0.2888854742050171, | |
| "eval_runtime": 2.456, | |
| "eval_samples_per_second": 81.434, | |
| "eval_steps_per_second": 10.179, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_loss": 0.2901514172554016, | |
| "eval_runtime": 2.4865, | |
| "eval_samples_per_second": 80.436, | |
| "eval_steps_per_second": 10.054, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_loss": 0.29295194149017334, | |
| "eval_runtime": 2.4921, | |
| "eval_samples_per_second": 80.253, | |
| "eval_steps_per_second": 10.032, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_loss": 0.29049646854400635, | |
| "eval_runtime": 2.4924, | |
| "eval_samples_per_second": 80.244, | |
| "eval_steps_per_second": 10.031, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_loss": 0.2913173735141754, | |
| "eval_runtime": 2.4934, | |
| "eval_samples_per_second": 80.212, | |
| "eval_steps_per_second": 10.026, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_loss": 0.28798389434814453, | |
| "eval_runtime": 2.4945, | |
| "eval_samples_per_second": 80.176, | |
| "eval_steps_per_second": 10.022, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_loss": 0.28929680585861206, | |
| "eval_runtime": 2.4963, | |
| "eval_samples_per_second": 80.118, | |
| "eval_steps_per_second": 10.015, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_loss": 0.2856563925743103, | |
| "eval_runtime": 2.4957, | |
| "eval_samples_per_second": 80.139, | |
| "eval_steps_per_second": 10.017, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_loss": 0.2869837284088135, | |
| "eval_runtime": 2.496, | |
| "eval_samples_per_second": 80.13, | |
| "eval_steps_per_second": 10.016, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_loss": 0.2902255356311798, | |
| "eval_runtime": 2.4969, | |
| "eval_samples_per_second": 80.098, | |
| "eval_steps_per_second": 10.012, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 0.28557059168815613, | |
| "eval_runtime": 2.4957, | |
| "eval_samples_per_second": 80.139, | |
| "eval_steps_per_second": 10.017, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_loss": 0.2883276343345642, | |
| "eval_runtime": 2.4986, | |
| "eval_samples_per_second": 80.045, | |
| "eval_steps_per_second": 10.006, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_loss": 0.286774605512619, | |
| "eval_runtime": 2.4979, | |
| "eval_samples_per_second": 80.068, | |
| "eval_steps_per_second": 10.009, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_loss": 0.28692272305488586, | |
| "eval_runtime": 2.4977, | |
| "eval_samples_per_second": 80.073, | |
| "eval_steps_per_second": 10.009, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_loss": 0.2842114567756653, | |
| "eval_runtime": 2.4982, | |
| "eval_samples_per_second": 80.058, | |
| "eval_steps_per_second": 10.007, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 0.2869337201118469, | |
| "eval_runtime": 2.4984, | |
| "eval_samples_per_second": 80.052, | |
| "eval_steps_per_second": 10.007, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_loss": 0.2843911647796631, | |
| "eval_runtime": 2.498, | |
| "eval_samples_per_second": 80.064, | |
| "eval_steps_per_second": 10.008, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_loss": 0.28588855266571045, | |
| "eval_runtime": 2.4985, | |
| "eval_samples_per_second": 80.047, | |
| "eval_steps_per_second": 10.006, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_loss": 0.2864097058773041, | |
| "eval_runtime": 2.4994, | |
| "eval_samples_per_second": 80.02, | |
| "eval_steps_per_second": 10.002, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_loss": 0.28731438517570496, | |
| "eval_runtime": 2.4981, | |
| "eval_samples_per_second": 80.061, | |
| "eval_steps_per_second": 10.008, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 1.4072085618972778, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.3199, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 0.2888672947883606, | |
| "eval_runtime": 2.4574, | |
| "eval_samples_per_second": 81.386, | |
| "eval_steps_per_second": 10.173, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_loss": 0.28676241636276245, | |
| "eval_runtime": 2.4848, | |
| "eval_samples_per_second": 80.49, | |
| "eval_steps_per_second": 10.061, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_loss": 0.28567585349082947, | |
| "eval_runtime": 2.4917, | |
| "eval_samples_per_second": 80.268, | |
| "eval_steps_per_second": 10.033, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_loss": 0.2843726873397827, | |
| "eval_runtime": 2.4926, | |
| "eval_samples_per_second": 80.236, | |
| "eval_steps_per_second": 10.03, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_loss": 0.28754809498786926, | |
| "eval_runtime": 2.4936, | |
| "eval_samples_per_second": 80.205, | |
| "eval_steps_per_second": 10.026, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_loss": 0.2854582667350769, | |
| "eval_runtime": 2.4941, | |
| "eval_samples_per_second": 80.188, | |
| "eval_steps_per_second": 10.023, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 0.2840147316455841, | |
| "eval_runtime": 2.4944, | |
| "eval_samples_per_second": 80.181, | |
| "eval_steps_per_second": 10.023, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_loss": 0.28520676493644714, | |
| "eval_runtime": 2.4963, | |
| "eval_samples_per_second": 80.119, | |
| "eval_steps_per_second": 10.015, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_loss": 0.28196609020233154, | |
| "eval_runtime": 2.4972, | |
| "eval_samples_per_second": 80.091, | |
| "eval_steps_per_second": 10.011, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_loss": 0.28386008739471436, | |
| "eval_runtime": 2.4982, | |
| "eval_samples_per_second": 80.057, | |
| "eval_steps_per_second": 10.007, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 0.2850269675254822, | |
| "eval_runtime": 2.4975, | |
| "eval_samples_per_second": 80.079, | |
| "eval_steps_per_second": 10.01, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_loss": 0.28362977504730225, | |
| "eval_runtime": 2.4982, | |
| "eval_samples_per_second": 80.057, | |
| "eval_steps_per_second": 10.007, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_loss": 0.2840833365917206, | |
| "eval_runtime": 2.4978, | |
| "eval_samples_per_second": 80.07, | |
| "eval_steps_per_second": 10.009, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_loss": 0.28477975726127625, | |
| "eval_runtime": 2.4997, | |
| "eval_samples_per_second": 80.01, | |
| "eval_steps_per_second": 10.001, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_loss": 0.2831202745437622, | |
| "eval_runtime": 2.4995, | |
| "eval_samples_per_second": 80.016, | |
| "eval_steps_per_second": 10.002, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_loss": 0.28298699855804443, | |
| "eval_runtime": 2.5008, | |
| "eval_samples_per_second": 79.975, | |
| "eval_steps_per_second": 9.997, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_loss": 0.2848021984100342, | |
| "eval_runtime": 2.4987, | |
| "eval_samples_per_second": 80.041, | |
| "eval_steps_per_second": 10.005, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_loss": 0.2818942368030548, | |
| "eval_runtime": 2.4999, | |
| "eval_samples_per_second": 80.004, | |
| "eval_steps_per_second": 10.0, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_loss": 0.28425753116607666, | |
| "eval_runtime": 2.5005, | |
| "eval_samples_per_second": 79.982, | |
| "eval_steps_per_second": 9.998, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_loss": 0.28273478150367737, | |
| "eval_runtime": 2.4987, | |
| "eval_samples_per_second": 80.042, | |
| "eval_steps_per_second": 10.005, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": 1.4653393030166626, | |
| "learning_rate": 0.0, | |
| "loss": 0.3157, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 1306483752960000.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |