diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9022 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 31.446190102120973, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002094789211835559, + "learning_rate": 1.0206207261596573e-07, + "loss": 42.596397399902344, + "step": 1 + }, + { + "epoch": 0.020947892118355592, + "learning_rate": 1.0206207261596575e-06, + "loss": 41.090047200520836, + "step": 10 + }, + { + "epoch": 0.041895784236711184, + "learning_rate": 2.041241452319315e-06, + "loss": 40.72335205078125, + "step": 20 + }, + { + "epoch": 0.06284367635506677, + "learning_rate": 3.0618621784789722e-06, + "loss": 40.80942077636719, + "step": 30 + }, + { + "epoch": 0.08379156847342237, + "learning_rate": 4.08248290463863e-06, + "loss": 40.0015869140625, + "step": 40 + }, + { + "epoch": 0.10473946059177795, + "learning_rate": 5.103103630798286e-06, + "loss": 39.082891845703124, + "step": 50 + }, + { + "epoch": 0.12568735271013354, + "learning_rate": 6.1237243569579445e-06, + "loss": 39.320306396484376, + "step": 60 + }, + { + "epoch": 0.14663524482848914, + "learning_rate": 7.144345083117603e-06, + "loss": 38.66647033691406, + "step": 70 + }, + { + "epoch": 0.16758313694684474, + "learning_rate": 8.16496580927726e-06, + "loss": 38.0055908203125, + "step": 80 + }, + { + "epoch": 0.1885310290652003, + "learning_rate": 9.185586535436916e-06, + "loss": 37.521505737304686, + "step": 90 + }, + { + "epoch": 0.2094789211835559, + "learning_rate": 1.0206207261596573e-05, + "loss": 35.75002746582031, + "step": 100 + }, + { + "epoch": 0.2304268133019115, + "learning_rate": 1.1226827987756233e-05, + "loss": 35.38407592773437, + "step": 110 + }, + { + "epoch": 0.2513747054202671, + "learning_rate": 1.2247448713915889e-05, + "loss": 34.455230712890625, + "step": 120 + }, + { + "epoch": 0.2723225975386227, + "learning_rate": 1.3268069440075545e-05, + "loss": 33.48695373535156, + "step": 130 + }, + { + "epoch": 0.2932704896569783, + "learning_rate": 1.4288690166235205e-05, + "loss": 33.0624755859375, + "step": 140 + }, + { + "epoch": 0.31421838177533384, + "learning_rate": 1.530931089239486e-05, + "loss": 31.633297729492188, + "step": 150 + }, + { + "epoch": 0.33516627389368947, + "learning_rate": 1.632993161855452e-05, + "loss": 30.392620849609376, + "step": 160 + }, + { + "epoch": 0.35611416601204504, + "learning_rate": 1.7350552344714174e-05, + "loss": 30.184588623046874, + "step": 170 + }, + { + "epoch": 0.3770620581304006, + "learning_rate": 1.8371173070873833e-05, + "loss": 29.326535034179688, + "step": 180 + }, + { + "epoch": 0.39800995024875624, + "learning_rate": 1.939179379703349e-05, + "loss": 28.23697509765625, + "step": 190 + }, + { + "epoch": 0.4189578423671118, + "learning_rate": 2.0412414523193145e-05, + "loss": 27.301419067382813, + "step": 200 + }, + { + "epoch": 0.4399057344854674, + "learning_rate": 2.1433035249352804e-05, + "loss": 26.5281494140625, + "step": 210 + }, + { + "epoch": 0.460853626603823, + "learning_rate": 2.2453655975512465e-05, + "loss": 25.992547607421876, + "step": 220 + }, + { + "epoch": 0.4818015187221786, + "learning_rate": 2.347427670167212e-05, + "loss": 24.90663299560547, + "step": 230 + }, + { + "epoch": 0.5027494108405341, + "learning_rate": 2.4494897427831778e-05, + "loss": 23.896534729003907, + "step": 240 + }, + { + "epoch": 0.5236973029588897, + "learning_rate": 2.5515518153991436e-05, + "loss": 22.488262939453126, + "step": 250 + }, + { + "epoch": 0.5446451950772454, + "learning_rate": 2.653613888015109e-05, + "loss": 21.760206604003905, + "step": 260 + }, + { + "epoch": 0.565593087195601, + "learning_rate": 2.755675960631075e-05, + "loss": 20.808561706542967, + "step": 270 + }, + { + "epoch": 0.5865409793139565, + "learning_rate": 2.857738033247041e-05, + "loss": 20.107774353027345, + "step": 280 + }, + { + "epoch": 0.6074888714323121, + "learning_rate": 2.9598001058630065e-05, + "loss": 19.469386291503906, + "step": 290 + }, + { + "epoch": 0.6284367635506677, + "learning_rate": 3.061862178478972e-05, + "loss": 18.442234802246094, + "step": 300 + }, + { + "epoch": 0.6493846556690233, + "learning_rate": 3.163924251094938e-05, + "loss": 18.132992553710938, + "step": 310 + }, + { + "epoch": 0.6703325477873789, + "learning_rate": 3.265986323710904e-05, + "loss": 17.425698852539064, + "step": 320 + }, + { + "epoch": 0.6912804399057345, + "learning_rate": 3.3680483963268694e-05, + "loss": 16.683474731445312, + "step": 330 + }, + { + "epoch": 0.7122283320240901, + "learning_rate": 3.470110468942835e-05, + "loss": 16.511445617675783, + "step": 340 + }, + { + "epoch": 0.7331762241424457, + "learning_rate": 3.5721725415588004e-05, + "loss": 15.8478759765625, + "step": 350 + }, + { + "epoch": 0.7541241162608012, + "learning_rate": 3.6742346141747665e-05, + "loss": 15.450515747070312, + "step": 360 + }, + { + "epoch": 0.7750720083791568, + "learning_rate": 3.7762966867907327e-05, + "loss": 14.916090393066407, + "step": 370 + }, + { + "epoch": 0.7960199004975125, + "learning_rate": 3.878358759406698e-05, + "loss": 14.495413208007813, + "step": 380 + }, + { + "epoch": 0.816967792615868, + "learning_rate": 3.980420832022664e-05, + "loss": 13.862504577636718, + "step": 390 + }, + { + "epoch": 0.8379156847342236, + "learning_rate": 4.082482904638629e-05, + "loss": 13.526719665527343, + "step": 400 + }, + { + "epoch": 0.8588635768525792, + "learning_rate": 4.184544977254595e-05, + "loss": 13.241981506347656, + "step": 410 + }, + { + "epoch": 0.8798114689709348, + "learning_rate": 4.286607049870561e-05, + "loss": 12.791949462890624, + "step": 420 + }, + { + "epoch": 0.9007593610892904, + "learning_rate": 4.388669122486527e-05, + "loss": 12.353260040283203, + "step": 430 + }, + { + "epoch": 0.921707253207646, + "learning_rate": 4.490731195102493e-05, + "loss": 11.939605712890625, + "step": 440 + }, + { + "epoch": 0.9426551453260016, + "learning_rate": 4.5927932677184585e-05, + "loss": 11.615445709228515, + "step": 450 + }, + { + "epoch": 0.9636030374443572, + "learning_rate": 4.694855340334424e-05, + "loss": 11.29063720703125, + "step": 460 + }, + { + "epoch": 0.9845509295627127, + "learning_rate": 4.7969174129503894e-05, + "loss": 11.051673889160156, + "step": 470 + }, + { + "epoch": 1.0062843676355067, + "learning_rate": 4.8989794855663556e-05, + "loss": 11.174005889892578, + "step": 480 + }, + { + "epoch": 1.0272322597538623, + "learning_rate": 5.001041558182322e-05, + "loss": 10.351375579833984, + "step": 490 + }, + { + "epoch": 1.0481801518722178, + "learning_rate": 5.103103630798287e-05, + "loss": 10.082479858398438, + "step": 500 + }, + { + "epoch": 1.0691280439905735, + "learning_rate": 5.205165703414253e-05, + "loss": 9.848101806640624, + "step": 510 + }, + { + "epoch": 1.090075936108929, + "learning_rate": 5.307227776030218e-05, + "loss": 9.558543395996093, + "step": 520 + }, + { + "epoch": 1.1110238282272846, + "learning_rate": 5.409289848646184e-05, + "loss": 9.263871765136718, + "step": 530 + }, + { + "epoch": 1.1319717203456403, + "learning_rate": 5.51135192126215e-05, + "loss": 9.101696014404297, + "step": 540 + }, + { + "epoch": 1.1529196124639958, + "learning_rate": 5.613413993878116e-05, + "loss": 8.784052276611328, + "step": 550 + }, + { + "epoch": 1.1738675045823515, + "learning_rate": 5.715476066494082e-05, + "loss": 8.466715240478516, + "step": 560 + }, + { + "epoch": 1.194815396700707, + "learning_rate": 5.817538139110047e-05, + "loss": 8.419536590576172, + "step": 570 + }, + { + "epoch": 1.2157632888190626, + "learning_rate": 5.919600211726013e-05, + "loss": 8.445430755615234, + "step": 580 + }, + { + "epoch": 1.236711180937418, + "learning_rate": 6.0216622843419785e-05, + "loss": 7.745582580566406, + "step": 590 + }, + { + "epoch": 1.2576590730557737, + "learning_rate": 6.123724356957945e-05, + "loss": 7.821333312988282, + "step": 600 + }, + { + "epoch": 1.2786069651741294, + "learning_rate": 6.22578642957391e-05, + "loss": 7.500454711914062, + "step": 610 + }, + { + "epoch": 1.2995548572924849, + "learning_rate": 6.327848502189876e-05, + "loss": 7.44578857421875, + "step": 620 + }, + { + "epoch": 1.3205027494108406, + "learning_rate": 6.429910574805841e-05, + "loss": 7.184627532958984, + "step": 630 + }, + { + "epoch": 1.341450641529196, + "learning_rate": 6.531972647421808e-05, + "loss": 6.880846405029297, + "step": 640 + }, + { + "epoch": 1.3623985336475517, + "learning_rate": 6.634034720037773e-05, + "loss": 6.639464569091797, + "step": 650 + }, + { + "epoch": 1.3833464257659074, + "learning_rate": 6.736096792653739e-05, + "loss": 6.543840789794922, + "step": 660 + }, + { + "epoch": 1.4042943178842628, + "learning_rate": 6.838158865269704e-05, + "loss": 6.245196914672851, + "step": 670 + }, + { + "epoch": 1.4252422100026185, + "learning_rate": 6.94022093788567e-05, + "loss": 6.316292572021484, + "step": 680 + }, + { + "epoch": 1.446190102120974, + "learning_rate": 7.042283010501637e-05, + "loss": 6.2703697204589846, + "step": 690 + }, + { + "epoch": 1.4671379942393297, + "learning_rate": 7.144345083117601e-05, + "loss": 6.09345703125, + "step": 700 + }, + { + "epoch": 1.4880858863576854, + "learning_rate": 7.246407155733568e-05, + "loss": 5.9814506530761715, + "step": 710 + }, + { + "epoch": 1.5090337784760408, + "learning_rate": 7.348469228349533e-05, + "loss": 5.9337646484375, + "step": 720 + }, + { + "epoch": 1.5299816705943965, + "learning_rate": 7.450531300965498e-05, + "loss": 5.937409591674805, + "step": 730 + }, + { + "epoch": 1.550929562712752, + "learning_rate": 7.552593373581465e-05, + "loss": 5.64327392578125, + "step": 740 + }, + { + "epoch": 1.5718774548311076, + "learning_rate": 7.654655446197431e-05, + "loss": 5.371760559082031, + "step": 750 + }, + { + "epoch": 1.5928253469494633, + "learning_rate": 7.756717518813396e-05, + "loss": 5.309605407714844, + "step": 760 + }, + { + "epoch": 1.6137732390678188, + "learning_rate": 7.858779591429362e-05, + "loss": 5.3783222198486325, + "step": 770 + }, + { + "epoch": 1.6347211311861742, + "learning_rate": 7.960841664045329e-05, + "loss": 5.400894546508789, + "step": 780 + }, + { + "epoch": 1.65566902330453, + "learning_rate": 8.062903736661294e-05, + "loss": 5.137008285522461, + "step": 790 + }, + { + "epoch": 1.6766169154228856, + "learning_rate": 8.164965809277258e-05, + "loss": 5.365228271484375, + "step": 800 + }, + { + "epoch": 1.6975648075412413, + "learning_rate": 8.267027881893225e-05, + "loss": 5.210577392578125, + "step": 810 + }, + { + "epoch": 1.7185126996595967, + "learning_rate": 8.36908995450919e-05, + "loss": 5.149754333496094, + "step": 820 + }, + { + "epoch": 1.7394605917779522, + "learning_rate": 8.471152027125156e-05, + "loss": 5.063209915161133, + "step": 830 + }, + { + "epoch": 1.7604084838963079, + "learning_rate": 8.573214099741121e-05, + "loss": 4.89969482421875, + "step": 840 + }, + { + "epoch": 1.7813563760146636, + "learning_rate": 8.675276172357088e-05, + "loss": 4.8380378723144535, + "step": 850 + }, + { + "epoch": 1.8023042681330192, + "learning_rate": 8.777338244973054e-05, + "loss": 4.826490783691407, + "step": 860 + }, + { + "epoch": 1.8232521602513747, + "learning_rate": 8.879400317589019e-05, + "loss": 4.733642578125, + "step": 870 + }, + { + "epoch": 1.8442000523697302, + "learning_rate": 8.981462390204986e-05, + "loss": 4.840193557739258, + "step": 880 + }, + { + "epoch": 1.8651479444880859, + "learning_rate": 9.083524462820951e-05, + "loss": 4.841461944580078, + "step": 890 + }, + { + "epoch": 1.8860958366064415, + "learning_rate": 9.185586535436917e-05, + "loss": 4.591343688964844, + "step": 900 + }, + { + "epoch": 1.9070437287247972, + "learning_rate": 9.287648608052881e-05, + "loss": 4.796835708618164, + "step": 910 + }, + { + "epoch": 1.9279916208431527, + "learning_rate": 9.389710680668848e-05, + "loss": 4.928312301635742, + "step": 920 + }, + { + "epoch": 1.9489395129615081, + "learning_rate": 9.491772753284813e-05, + "loss": 4.6936603546142575, + "step": 930 + }, + { + "epoch": 1.9698874050798638, + "learning_rate": 9.593834825900779e-05, + "loss": 4.678832626342773, + "step": 940 + }, + { + "epoch": 1.9908352971982195, + "learning_rate": 9.695896898516746e-05, + "loss": 4.801474380493164, + "step": 950 + }, + { + "epoch": 2.0125687352710133, + "learning_rate": 9.797958971132711e-05, + "loss": 4.615579986572266, + "step": 960 + }, + { + "epoch": 2.033516627389369, + "learning_rate": 9.900021043748677e-05, + "loss": 4.685293197631836, + "step": 970 + }, + { + "epoch": 2.0544645195077247, + "learning_rate": 0.00010002083116364643, + "loss": 4.529672622680664, + "step": 980 + }, + { + "epoch": 2.07541241162608, + "learning_rate": 0.00010104145188980609, + "loss": 4.6519828796386715, + "step": 990 + }, + { + "epoch": 2.0963603037444356, + "learning_rate": 0.00010206207261596574, + "loss": 4.200122451782226, + "step": 1000 + }, + { + "epoch": 2.1173081958627913, + "learning_rate": 0.0001030826933421254, + "loss": 4.467970275878907, + "step": 1010 + }, + { + "epoch": 2.138256087981147, + "learning_rate": 0.00010410331406828505, + "loss": 4.364266586303711, + "step": 1020 + }, + { + "epoch": 2.1592039800995027, + "learning_rate": 0.00010512393479444471, + "loss": 4.321992874145508, + "step": 1030 + }, + { + "epoch": 2.180151872217858, + "learning_rate": 0.00010614455552060436, + "loss": 4.208817672729492, + "step": 1040 + }, + { + "epoch": 2.2010997643362136, + "learning_rate": 0.00010716517624676403, + "loss": 4.414374923706054, + "step": 1050 + }, + { + "epoch": 2.2220476564545693, + "learning_rate": 0.00010818579697292369, + "loss": 4.222100067138672, + "step": 1060 + }, + { + "epoch": 2.242995548572925, + "learning_rate": 0.00010920641769908334, + "loss": 4.392937850952149, + "step": 1070 + }, + { + "epoch": 2.2639434406912806, + "learning_rate": 0.000110227038425243, + "loss": 4.361217498779297, + "step": 1080 + }, + { + "epoch": 2.284891332809636, + "learning_rate": 0.00011124765915140266, + "loss": 4.305131912231445, + "step": 1090 + }, + { + "epoch": 2.3058392249279915, + "learning_rate": 0.00011226827987756232, + "loss": 4.322722244262695, + "step": 1100 + }, + { + "epoch": 2.326787117046347, + "learning_rate": 0.00011328890060372197, + "loss": 4.250308990478516, + "step": 1110 + }, + { + "epoch": 2.347735009164703, + "learning_rate": 0.00011430952132988164, + "loss": 4.366016006469726, + "step": 1120 + }, + { + "epoch": 2.3686829012830586, + "learning_rate": 0.00011533014205604128, + "loss": 4.271330642700195, + "step": 1130 + }, + { + "epoch": 2.389630793401414, + "learning_rate": 0.00011635076278220094, + "loss": 4.35943489074707, + "step": 1140 + }, + { + "epoch": 2.4105786855197695, + "learning_rate": 0.00011737138350836059, + "loss": 4.14013442993164, + "step": 1150 + }, + { + "epoch": 2.431526577638125, + "learning_rate": 0.00011839200423452026, + "loss": 4.263423156738281, + "step": 1160 + }, + { + "epoch": 2.452474469756481, + "learning_rate": 0.00011941262496067991, + "loss": 4.046255874633789, + "step": 1170 + }, + { + "epoch": 2.473422361874836, + "learning_rate": 0.00012043324568683957, + "loss": 4.267144775390625, + "step": 1180 + }, + { + "epoch": 2.494370253993192, + "learning_rate": 0.00012145386641299924, + "loss": 4.446724319458008, + "step": 1190 + }, + { + "epoch": 2.5153181461115475, + "learning_rate": 0.0001224744871391589, + "loss": 4.310148239135742, + "step": 1200 + }, + { + "epoch": 2.536266038229903, + "learning_rate": 0.00012349510786531856, + "loss": 4.145759963989258, + "step": 1210 + }, + { + "epoch": 2.557213930348259, + "learning_rate": 0.0001245157285914782, + "loss": 3.9344154357910157, + "step": 1220 + }, + { + "epoch": 2.5781618224666145, + "learning_rate": 0.00012553634931763784, + "loss": 4.1616455078125, + "step": 1230 + }, + { + "epoch": 2.5991097145849698, + "learning_rate": 0.0001265569700437975, + "loss": 4.117146682739258, + "step": 1240 + }, + { + "epoch": 2.6200576067033254, + "learning_rate": 0.00012757759076995718, + "loss": 4.102180480957031, + "step": 1250 + }, + { + "epoch": 2.641005498821681, + "learning_rate": 0.00012859821149611682, + "loss": 4.15636100769043, + "step": 1260 + }, + { + "epoch": 2.661953390940037, + "learning_rate": 0.0001296188322222765, + "loss": 4.069457626342773, + "step": 1270 + }, + { + "epoch": 2.682901283058392, + "learning_rate": 0.00013063945294843616, + "loss": 4.0670215606689455, + "step": 1280 + }, + { + "epoch": 2.7038491751767477, + "learning_rate": 0.0001316600736745958, + "loss": 3.985906219482422, + "step": 1290 + }, + { + "epoch": 2.7247970672951034, + "learning_rate": 0.00013268069440075547, + "loss": 4.272599792480468, + "step": 1300 + }, + { + "epoch": 2.745744959413459, + "learning_rate": 0.00013370131512691514, + "loss": 3.9709007263183596, + "step": 1310 + }, + { + "epoch": 2.7666928515318148, + "learning_rate": 0.00013472193585307478, + "loss": 4.1308135986328125, + "step": 1320 + }, + { + "epoch": 2.7876407436501704, + "learning_rate": 0.00013574255657923444, + "loss": 4.175233840942383, + "step": 1330 + }, + { + "epoch": 2.8085886357685257, + "learning_rate": 0.00013676317730539409, + "loss": 3.9187103271484376, + "step": 1340 + }, + { + "epoch": 2.8295365278868814, + "learning_rate": 0.00013778379803155375, + "loss": 3.8264163970947265, + "step": 1350 + }, + { + "epoch": 2.850484420005237, + "learning_rate": 0.0001388044187577134, + "loss": 4.124664306640625, + "step": 1360 + }, + { + "epoch": 2.8714323121235923, + "learning_rate": 0.00013982503948387306, + "loss": 3.952465057373047, + "step": 1370 + }, + { + "epoch": 2.892380204241948, + "learning_rate": 0.00014084566021003273, + "loss": 3.987406921386719, + "step": 1380 + }, + { + "epoch": 2.9133280963603037, + "learning_rate": 0.00014186628093619237, + "loss": 3.92406005859375, + "step": 1390 + }, + { + "epoch": 2.9342759884786593, + "learning_rate": 0.00014288690166235201, + "loss": 3.932135009765625, + "step": 1400 + }, + { + "epoch": 2.955223880597015, + "learning_rate": 0.00014390752238851168, + "loss": 4.134164428710937, + "step": 1410 + }, + { + "epoch": 2.9761717727153707, + "learning_rate": 0.00014492814311467135, + "loss": 4.045958709716797, + "step": 1420 + }, + { + "epoch": 2.9971196648337264, + "learning_rate": 0.000145948763840831, + "loss": 3.9462562561035157, + "step": 1430 + }, + { + "epoch": 3.01885310290652, + "learning_rate": 0.00014696938456699066, + "loss": 4.254601669311524, + "step": 1440 + }, + { + "epoch": 3.0398009950248754, + "learning_rate": 0.00014799000529315033, + "loss": 3.9297733306884766, + "step": 1450 + }, + { + "epoch": 3.060748887143231, + "learning_rate": 0.00014901062601930997, + "loss": 3.8294136047363283, + "step": 1460 + }, + { + "epoch": 3.081696779261587, + "learning_rate": 0.00015003124674546964, + "loss": 4.05113525390625, + "step": 1470 + }, + { + "epoch": 3.1026446713799425, + "learning_rate": 0.0001510518674716293, + "loss": 3.975564956665039, + "step": 1480 + }, + { + "epoch": 3.123592563498298, + "learning_rate": 0.00015207248819778895, + "loss": 3.9852630615234377, + "step": 1490 + }, + { + "epoch": 3.1445404556166534, + "learning_rate": 0.00015309310892394862, + "loss": 4.0287940979003904, + "step": 1500 + }, + { + "epoch": 3.165488347735009, + "learning_rate": 0.00015411372965010828, + "loss": 3.9631397247314455, + "step": 1510 + }, + { + "epoch": 3.1864362398533648, + "learning_rate": 0.00015513435037626793, + "loss": 3.9254386901855467, + "step": 1520 + }, + { + "epoch": 3.2073841319717205, + "learning_rate": 0.0001561549711024276, + "loss": 3.82324104309082, + "step": 1530 + }, + { + "epoch": 3.228332024090076, + "learning_rate": 0.00015717559182858723, + "loss": 3.9215030670166016, + "step": 1540 + }, + { + "epoch": 3.2492799162084314, + "learning_rate": 0.0001581962125547469, + "loss": 3.8164131164550783, + "step": 1550 + }, + { + "epoch": 3.270227808326787, + "learning_rate": 0.00015921683328090657, + "loss": 3.7764801025390624, + "step": 1560 + }, + { + "epoch": 3.2911757004451427, + "learning_rate": 0.0001602374540070662, + "loss": 3.9773059844970704, + "step": 1570 + }, + { + "epoch": 3.3121235925634984, + "learning_rate": 0.00016125807473322588, + "loss": 3.8427078247070314, + "step": 1580 + }, + { + "epoch": 3.333071484681854, + "learning_rate": 0.00016227869545938555, + "loss": 3.8103118896484376, + "step": 1590 + }, + { + "epoch": 3.3540193768002093, + "learning_rate": 0.00016329931618554516, + "loss": 3.8182399749755858, + "step": 1600 + }, + { + "epoch": 3.374967268918565, + "learning_rate": 0.00016431993691170483, + "loss": 3.750722122192383, + "step": 1610 + }, + { + "epoch": 3.3959151610369207, + "learning_rate": 0.0001653405576378645, + "loss": 3.9241371154785156, + "step": 1620 + }, + { + "epoch": 3.4168630531552764, + "learning_rate": 0.00016636117836402414, + "loss": 3.827128219604492, + "step": 1630 + }, + { + "epoch": 3.4378109452736316, + "learning_rate": 0.0001673817990901838, + "loss": 3.750970458984375, + "step": 1640 + }, + { + "epoch": 3.4587588373919873, + "learning_rate": 0.00016840241981634345, + "loss": 3.827236557006836, + "step": 1650 + }, + { + "epoch": 3.479706729510343, + "learning_rate": 0.00016942304054250312, + "loss": 3.862264633178711, + "step": 1660 + }, + { + "epoch": 3.5006546216286987, + "learning_rate": 0.0001704436612686628, + "loss": 3.9794548034667967, + "step": 1670 + }, + { + "epoch": 3.5216025137470544, + "learning_rate": 0.00017146428199482243, + "loss": 3.8642444610595703, + "step": 1680 + }, + { + "epoch": 3.54255040586541, + "learning_rate": 0.0001724849027209821, + "loss": 3.8710708618164062, + "step": 1690 + }, + { + "epoch": 3.5634982979837653, + "learning_rate": 0.00017350552344714176, + "loss": 3.7993534088134764, + "step": 1700 + }, + { + "epoch": 3.584446190102121, + "learning_rate": 0.0001745261441733014, + "loss": 3.8951980590820314, + "step": 1710 + }, + { + "epoch": 3.6053940822204766, + "learning_rate": 0.00017554676489946107, + "loss": 3.728221893310547, + "step": 1720 + }, + { + "epoch": 3.6263419743388323, + "learning_rate": 0.00017656738562562074, + "loss": 3.7405670166015623, + "step": 1730 + }, + { + "epoch": 3.6472898664571876, + "learning_rate": 0.00017758800635178038, + "loss": 3.7010948181152346, + "step": 1740 + }, + { + "epoch": 3.6682377585755432, + "learning_rate": 0.00017860862707794005, + "loss": 3.7746726989746096, + "step": 1750 + }, + { + "epoch": 3.689185650693899, + "learning_rate": 0.00017962924780409972, + "loss": 3.842145538330078, + "step": 1760 + }, + { + "epoch": 3.7101335428122546, + "learning_rate": 0.00018064986853025936, + "loss": 3.817458724975586, + "step": 1770 + }, + { + "epoch": 3.7310814349306103, + "learning_rate": 0.00018167048925641903, + "loss": 3.7765247344970705, + "step": 1780 + }, + { + "epoch": 3.752029327048966, + "learning_rate": 0.0001826911099825787, + "loss": 3.6870758056640627, + "step": 1790 + }, + { + "epoch": 3.772977219167321, + "learning_rate": 0.00018371173070873834, + "loss": 3.8823310852050783, + "step": 1800 + }, + { + "epoch": 3.793925111285677, + "learning_rate": 0.000184732351434898, + "loss": 3.7599208831787108, + "step": 1810 + }, + { + "epoch": 3.8148730034040326, + "learning_rate": 0.00018575297216105762, + "loss": 3.8205623626708984, + "step": 1820 + }, + { + "epoch": 3.835820895522388, + "learning_rate": 0.0001867735928872173, + "loss": 3.840204620361328, + "step": 1830 + }, + { + "epoch": 3.8567687876407435, + "learning_rate": 0.00018779421361337696, + "loss": 3.8769672393798826, + "step": 1840 + }, + { + "epoch": 3.877716679759099, + "learning_rate": 0.0001888148343395366, + "loss": 3.7292160034179687, + "step": 1850 + }, + { + "epoch": 3.898664571877455, + "learning_rate": 0.00018983545506569627, + "loss": 3.785298156738281, + "step": 1860 + }, + { + "epoch": 3.9196124639958105, + "learning_rate": 0.00019085607579185594, + "loss": 3.691172790527344, + "step": 1870 + }, + { + "epoch": 3.940560356114166, + "learning_rate": 0.00019187669651801558, + "loss": 3.7312793731689453, + "step": 1880 + }, + { + "epoch": 3.9615082482325215, + "learning_rate": 0.00019289731724417525, + "loss": 3.7419872283935547, + "step": 1890 + }, + { + "epoch": 3.982456140350877, + "learning_rate": 0.0001939179379703349, + "loss": 3.587678909301758, + "step": 1900 + }, + { + "epoch": 4.004189578423671, + "learning_rate": 0.00019493855869649455, + "loss": 3.9609317779541016, + "step": 1910 + }, + { + "epoch": 4.025137470542027, + "learning_rate": 0.00019595917942265422, + "loss": 3.7329071044921873, + "step": 1920 + }, + { + "epoch": 4.046085362660382, + "learning_rate": 0.0001969798001488139, + "loss": 3.7702545166015624, + "step": 1930 + }, + { + "epoch": 4.067033254778738, + "learning_rate": 0.00019800042087497353, + "loss": 3.796523666381836, + "step": 1940 + }, + { + "epoch": 4.087981146897094, + "learning_rate": 0.0001990210416011332, + "loss": 3.643301773071289, + "step": 1950 + }, + { + "epoch": 4.108929039015449, + "learning_rate": 0.00020004166232729287, + "loss": 3.705374526977539, + "step": 1960 + }, + { + "epoch": 4.129876931133805, + "learning_rate": 0.0002010622830534525, + "loss": 3.619226837158203, + "step": 1970 + }, + { + "epoch": 4.15082482325216, + "learning_rate": 0.00020208290377961218, + "loss": 3.862563705444336, + "step": 1980 + }, + { + "epoch": 4.1717727153705155, + "learning_rate": 0.00020310352450577185, + "loss": 3.6324195861816406, + "step": 1990 + }, + { + "epoch": 4.192720607488871, + "learning_rate": 0.0002041241452319315, + "loss": 3.674951171875, + "step": 2000 + }, + { + "epoch": 4.213668499607227, + "learning_rate": 0.00020514476595809116, + "loss": 3.59210205078125, + "step": 2010 + }, + { + "epoch": 4.234616391725583, + "learning_rate": 0.0002061653866842508, + "loss": 3.8746570587158202, + "step": 2020 + }, + { + "epoch": 4.255564283843938, + "learning_rate": 0.00020718600741041044, + "loss": 3.682146453857422, + "step": 2030 + }, + { + "epoch": 4.276512175962294, + "learning_rate": 0.0002082066281365701, + "loss": 3.8217212677001955, + "step": 2040 + }, + { + "epoch": 4.29746006808065, + "learning_rate": 0.00020922724886272975, + "loss": 3.691872787475586, + "step": 2050 + }, + { + "epoch": 4.318407960199005, + "learning_rate": 0.00021024786958888942, + "loss": 3.7160354614257813, + "step": 2060 + }, + { + "epoch": 4.339355852317361, + "learning_rate": 0.00021126849031504908, + "loss": 3.5512325286865236, + "step": 2070 + }, + { + "epoch": 4.360303744435716, + "learning_rate": 0.00021228911104120873, + "loss": 3.623905563354492, + "step": 2080 + }, + { + "epoch": 4.3812516365540715, + "learning_rate": 0.0002133097317673684, + "loss": 3.756671905517578, + "step": 2090 + }, + { + "epoch": 4.402199528672427, + "learning_rate": 0.00021433035249352806, + "loss": 3.71322021484375, + "step": 2100 + }, + { + "epoch": 4.423147420790783, + "learning_rate": 0.0002153509732196877, + "loss": 3.588302993774414, + "step": 2110 + }, + { + "epoch": 4.4440953129091385, + "learning_rate": 0.00021637159394584737, + "loss": 3.666096496582031, + "step": 2120 + }, + { + "epoch": 4.465043205027494, + "learning_rate": 0.00021739221467200704, + "loss": 3.660139465332031, + "step": 2130 + }, + { + "epoch": 4.48599109714585, + "learning_rate": 0.00021841283539816668, + "loss": 3.5749874114990234, + "step": 2140 + }, + { + "epoch": 4.5069389892642056, + "learning_rate": 0.00021943345612432635, + "loss": 3.621977615356445, + "step": 2150 + }, + { + "epoch": 4.527886881382561, + "learning_rate": 0.000220454076850486, + "loss": 3.7179306030273436, + "step": 2160 + }, + { + "epoch": 4.548834773500916, + "learning_rate": 0.00022147469757664566, + "loss": 3.5558433532714844, + "step": 2170 + }, + { + "epoch": 4.569782665619272, + "learning_rate": 0.00022249531830280533, + "loss": 3.754520034790039, + "step": 2180 + }, + { + "epoch": 4.590730557737627, + "learning_rate": 0.00022351593902896497, + "loss": 3.734426498413086, + "step": 2190 + }, + { + "epoch": 4.611678449855983, + "learning_rate": 0.00022453655975512464, + "loss": 3.6508132934570314, + "step": 2200 + }, + { + "epoch": 4.632626341974339, + "learning_rate": 0.0002255571804812843, + "loss": 3.7082672119140625, + "step": 2210 + }, + { + "epoch": 4.653574234092694, + "learning_rate": 0.00022657780120744395, + "loss": 3.600681686401367, + "step": 2220 + }, + { + "epoch": 4.67452212621105, + "learning_rate": 0.00022759842193360361, + "loss": 3.6909461975097657, + "step": 2230 + }, + { + "epoch": 4.695470018329406, + "learning_rate": 0.00022861904265976328, + "loss": 3.557560348510742, + "step": 2240 + }, + { + "epoch": 4.7164179104477615, + "learning_rate": 0.0002296396633859229, + "loss": 3.6415851593017576, + "step": 2250 + }, + { + "epoch": 4.737365802566117, + "learning_rate": 0.00023066028411208256, + "loss": 3.6133026123046874, + "step": 2260 + }, + { + "epoch": 4.758313694684473, + "learning_rate": 0.00023168090483824223, + "loss": 3.5348537445068358, + "step": 2270 + }, + { + "epoch": 4.779261586802828, + "learning_rate": 0.00023270152556440187, + "loss": 3.6589839935302733, + "step": 2280 + }, + { + "epoch": 4.800209478921183, + "learning_rate": 0.00023372214629056154, + "loss": 3.5354270935058594, + "step": 2290 + }, + { + "epoch": 4.821157371039539, + "learning_rate": 0.00023474276701672118, + "loss": 3.536578369140625, + "step": 2300 + }, + { + "epoch": 4.842105263157895, + "learning_rate": 0.00023576338774288085, + "loss": 3.5566326141357423, + "step": 2310 + }, + { + "epoch": 4.86305315527625, + "learning_rate": 0.00023678400846904052, + "loss": 3.475338363647461, + "step": 2320 + }, + { + "epoch": 4.884001047394606, + "learning_rate": 0.00023780462919520016, + "loss": 3.6042369842529296, + "step": 2330 + }, + { + "epoch": 4.904948939512962, + "learning_rate": 0.00023882524992135983, + "loss": 3.621173095703125, + "step": 2340 + }, + { + "epoch": 4.925896831631317, + "learning_rate": 0.0002398458706475195, + "loss": 3.640410232543945, + "step": 2350 + }, + { + "epoch": 4.946844723749672, + "learning_rate": 0.00024086649137367914, + "loss": 3.488922882080078, + "step": 2360 + }, + { + "epoch": 4.967792615868028, + "learning_rate": 0.0002418871120998388, + "loss": 3.4922332763671875, + "step": 2370 + }, + { + "epoch": 4.988740507986384, + "learning_rate": 0.00024290773282599848, + "loss": 3.590007019042969, + "step": 2380 + }, + { + "epoch": 5.010473946059178, + "learning_rate": 0.00024392835355215812, + "loss": 3.6829368591308596, + "step": 2390 + }, + { + "epoch": 5.0314218381775335, + "learning_rate": 0.0002449489742783178, + "loss": 3.651840591430664, + "step": 2400 + }, + { + "epoch": 5.052369730295889, + "learning_rate": 0.00024596959500447745, + "loss": 3.7254043579101563, + "step": 2410 + }, + { + "epoch": 5.073317622414245, + "learning_rate": 0.0002469902157306371, + "loss": 3.594907760620117, + "step": 2420 + }, + { + "epoch": 5.094265514532601, + "learning_rate": 0.00024801083645679674, + "loss": 3.4854148864746093, + "step": 2430 + }, + { + "epoch": 5.115213406650955, + "learning_rate": 0.0002490314571829564, + "loss": 3.58482666015625, + "step": 2440 + }, + { + "epoch": 5.136161298769311, + "learning_rate": 0.00025005207790911607, + "loss": 3.5847278594970704, + "step": 2450 + }, + { + "epoch": 5.157109190887667, + "learning_rate": 0.0002510726986352757, + "loss": 3.5045509338378906, + "step": 2460 + }, + { + "epoch": 5.178057083006022, + "learning_rate": 0.00025209331936143535, + "loss": 3.6352733612060546, + "step": 2470 + }, + { + "epoch": 5.199004975124378, + "learning_rate": 0.000253113940087595, + "loss": 3.555766296386719, + "step": 2480 + }, + { + "epoch": 5.219952867242734, + "learning_rate": 0.0002541345608137547, + "loss": 3.541688919067383, + "step": 2490 + }, + { + "epoch": 5.2409007593610895, + "learning_rate": 0.00025515518153991436, + "loss": 3.5400638580322266, + "step": 2500 + }, + { + "epoch": 5.261848651479445, + "learning_rate": 0.00025617580226607403, + "loss": 3.4929561614990234, + "step": 2510 + }, + { + "epoch": 5.282796543597801, + "learning_rate": 0.00025719642299223364, + "loss": 3.5611968994140626, + "step": 2520 + }, + { + "epoch": 5.3037444357161565, + "learning_rate": 0.0002582170437183933, + "loss": 3.4528472900390623, + "step": 2530 + }, + { + "epoch": 5.324692327834511, + "learning_rate": 0.000259237664444553, + "loss": 3.474958801269531, + "step": 2540 + }, + { + "epoch": 5.345640219952867, + "learning_rate": 0.00026025828517071265, + "loss": 3.6800113677978517, + "step": 2550 + }, + { + "epoch": 5.366588112071223, + "learning_rate": 0.0002612789058968723, + "loss": 3.524998092651367, + "step": 2560 + }, + { + "epoch": 5.387536004189578, + "learning_rate": 0.00026229952662303193, + "loss": 3.3865074157714843, + "step": 2570 + }, + { + "epoch": 5.408483896307934, + "learning_rate": 0.0002633201473491916, + "loss": 3.416782760620117, + "step": 2580 + }, + { + "epoch": 5.42943178842629, + "learning_rate": 0.00026434076807535127, + "loss": 3.4291786193847655, + "step": 2590 + }, + { + "epoch": 5.450379680544645, + "learning_rate": 0.00026536138880151093, + "loss": 3.487574005126953, + "step": 2600 + }, + { + "epoch": 5.471327572663001, + "learning_rate": 0.0002663820095276706, + "loss": 3.5091732025146483, + "step": 2610 + }, + { + "epoch": 5.492275464781357, + "learning_rate": 0.00026740263025383027, + "loss": 4.268975448608399, + "step": 2620 + }, + { + "epoch": 5.5132233568997115, + "learning_rate": 0.0002684232509799899, + "loss": 3.732823944091797, + "step": 2630 + }, + { + "epoch": 5.534171249018067, + "learning_rate": 0.00026944387170614955, + "loss": 3.650152587890625, + "step": 2640 + }, + { + "epoch": 5.555119141136423, + "learning_rate": 0.0002704644924323092, + "loss": 3.6992671966552733, + "step": 2650 + }, + { + "epoch": 5.576067033254779, + "learning_rate": 0.0002714851131584689, + "loss": 3.5627864837646483, + "step": 2660 + }, + { + "epoch": 5.597014925373134, + "learning_rate": 0.0002725057338846285, + "loss": 3.5473575592041016, + "step": 2670 + }, + { + "epoch": 5.61796281749149, + "learning_rate": 0.00027352635461078817, + "loss": 3.7565258026123045, + "step": 2680 + }, + { + "epoch": 5.638910709609846, + "learning_rate": 0.00027454697533694784, + "loss": 3.4811996459960937, + "step": 2690 + }, + { + "epoch": 5.659858601728201, + "learning_rate": 0.0002755675960631075, + "loss": 3.5412361145019533, + "step": 2700 + }, + { + "epoch": 5.680806493846557, + "learning_rate": 0.0002765882167892671, + "loss": 3.6894275665283205, + "step": 2710 + }, + { + "epoch": 5.701754385964913, + "learning_rate": 0.0002776088375154268, + "loss": 3.532870864868164, + "step": 2720 + }, + { + "epoch": 5.722702278083268, + "learning_rate": 0.00027862945824158646, + "loss": 3.4718368530273436, + "step": 2730 + }, + { + "epoch": 5.743650170201623, + "learning_rate": 0.00027965007896774613, + "loss": 3.5320533752441405, + "step": 2740 + }, + { + "epoch": 5.764598062319979, + "learning_rate": 0.0002806706996939058, + "loss": 3.6331645965576174, + "step": 2750 + }, + { + "epoch": 5.7855459544383345, + "learning_rate": 0.00028169132042006546, + "loss": 3.50958137512207, + "step": 2760 + }, + { + "epoch": 5.80649384655669, + "learning_rate": 0.00028271194114622513, + "loss": 3.5480377197265627, + "step": 2770 + }, + { + "epoch": 5.827441738675046, + "learning_rate": 0.00028373256187238475, + "loss": 3.5677505493164063, + "step": 2780 + }, + { + "epoch": 5.848389630793402, + "learning_rate": 0.0002847531825985444, + "loss": 3.5496990203857424, + "step": 2790 + }, + { + "epoch": 5.869337522911757, + "learning_rate": 0.00028577380332470403, + "loss": 3.4990489959716795, + "step": 2800 + }, + { + "epoch": 5.890285415030113, + "learning_rate": 0.0002867944240508637, + "loss": 3.4764991760253907, + "step": 2810 + }, + { + "epoch": 5.911233307148468, + "learning_rate": 0.00028781504477702337, + "loss": 3.4782173156738283, + "step": 2820 + }, + { + "epoch": 5.932181199266823, + "learning_rate": 0.00028883566550318303, + "loss": 3.5007530212402345, + "step": 2830 + }, + { + "epoch": 5.953129091385179, + "learning_rate": 0.0002898562862293427, + "loss": 3.4596179962158202, + "step": 2840 + }, + { + "epoch": 5.974076983503535, + "learning_rate": 0.0002908769069555023, + "loss": 3.4080764770507814, + "step": 2850 + }, + { + "epoch": 5.9950248756218905, + "learning_rate": 0.000291897527681662, + "loss": 3.5865558624267577, + "step": 2860 + }, + { + "epoch": 6.016758313694685, + "learning_rate": 0.00029291814840782165, + "loss": 3.539356231689453, + "step": 2870 + }, + { + "epoch": 6.03770620581304, + "learning_rate": 0.0002939387691339813, + "loss": 3.5746910095214846, + "step": 2880 + }, + { + "epoch": 6.058654097931396, + "learning_rate": 0.000294959389860141, + "loss": 3.477669906616211, + "step": 2890 + }, + { + "epoch": 6.079601990049751, + "learning_rate": 0.00029598001058630066, + "loss": 3.385912322998047, + "step": 2900 + }, + { + "epoch": 6.100549882168107, + "learning_rate": 0.00029700063131246027, + "loss": 3.566743850708008, + "step": 2910 + }, + { + "epoch": 6.121497774286462, + "learning_rate": 0.00029802125203861994, + "loss": 3.4617984771728514, + "step": 2920 + }, + { + "epoch": 6.142445666404818, + "learning_rate": 0.0002990418727647796, + "loss": 3.46124382019043, + "step": 2930 + }, + { + "epoch": 6.163393558523174, + "learning_rate": 0.0003000624934909393, + "loss": 3.5851741790771485, + "step": 2940 + }, + { + "epoch": 6.184341450641529, + "learning_rate": 0.00030108311421709894, + "loss": 3.5333206176757814, + "step": 2950 + }, + { + "epoch": 6.205289342759885, + "learning_rate": 0.0003021037349432586, + "loss": 3.4518871307373047, + "step": 2960 + }, + { + "epoch": 6.226237234878241, + "learning_rate": 0.0003031243556694182, + "loss": 3.5048519134521485, + "step": 2970 + }, + { + "epoch": 6.247185126996596, + "learning_rate": 0.0003041449763955779, + "loss": 3.386810302734375, + "step": 2980 + }, + { + "epoch": 6.268133019114952, + "learning_rate": 0.00030516559712173756, + "loss": 3.3472484588623046, + "step": 2990 + }, + { + "epoch": 6.289080911233307, + "learning_rate": 0.00030618621784789723, + "loss": 3.578289794921875, + "step": 3000 + }, + { + "epoch": 6.3100288033516625, + "learning_rate": 0.0003072068385740569, + "loss": 3.4641948699951173, + "step": 3010 + }, + { + "epoch": 6.330976695470018, + "learning_rate": 0.00030822745930021657, + "loss": 3.42608757019043, + "step": 3020 + }, + { + "epoch": 6.351924587588374, + "learning_rate": 0.0003092480800263762, + "loss": 3.4154186248779297, + "step": 3030 + }, + { + "epoch": 6.3728724797067295, + "learning_rate": 0.00031026870075253585, + "loss": 3.5270923614501952, + "step": 3040 + }, + { + "epoch": 6.393820371825085, + "learning_rate": 0.0003112893214786955, + "loss": 3.4780391693115233, + "step": 3050 + }, + { + "epoch": 6.414768263943441, + "learning_rate": 0.0003123099422048552, + "loss": 3.379390335083008, + "step": 3060 + }, + { + "epoch": 6.435716156061797, + "learning_rate": 0.00031333056293101486, + "loss": 3.405352020263672, + "step": 3070 + }, + { + "epoch": 6.456664048180152, + "learning_rate": 0.00031435118365717447, + "loss": 3.523044204711914, + "step": 3080 + }, + { + "epoch": 6.477611940298507, + "learning_rate": 0.00031537180438333414, + "loss": 3.488709259033203, + "step": 3090 + }, + { + "epoch": 6.498559832416863, + "learning_rate": 0.0003163924251094938, + "loss": 3.3736576080322265, + "step": 3100 + }, + { + "epoch": 6.519507724535218, + "learning_rate": 0.0003174130458356535, + "loss": 3.5457527160644533, + "step": 3110 + }, + { + "epoch": 6.540455616653574, + "learning_rate": 0.00031843366656181314, + "loss": 3.38338623046875, + "step": 3120 + }, + { + "epoch": 6.56140350877193, + "learning_rate": 0.0003194542872879728, + "loss": 3.4261444091796873, + "step": 3130 + }, + { + "epoch": 6.5823514008902855, + "learning_rate": 0.0003204749080141324, + "loss": 3.365514373779297, + "step": 3140 + }, + { + "epoch": 6.603299293008641, + "learning_rate": 0.0003214955287402921, + "loss": 3.3958808898925783, + "step": 3150 + }, + { + "epoch": 6.624247185126997, + "learning_rate": 0.00032251614946645176, + "loss": 3.4493579864501953, + "step": 3160 + }, + { + "epoch": 6.6451950772453525, + "learning_rate": 0.00032353677019261143, + "loss": 3.509814453125, + "step": 3170 + }, + { + "epoch": 6.666142969363708, + "learning_rate": 0.0003245573909187711, + "loss": 3.3270954132080077, + "step": 3180 + }, + { + "epoch": 6.687090861482063, + "learning_rate": 0.00032557801164493077, + "loss": 3.407461929321289, + "step": 3190 + }, + { + "epoch": 6.708038753600419, + "learning_rate": 0.0003265986323710903, + "loss": 3.3440326690673827, + "step": 3200 + }, + { + "epoch": 6.728986645718774, + "learning_rate": 0.00032761925309725, + "loss": 3.3119239807128906, + "step": 3210 + }, + { + "epoch": 6.74993453783713, + "learning_rate": 0.00032863987382340966, + "loss": 3.441579818725586, + "step": 3220 + }, + { + "epoch": 6.770882429955486, + "learning_rate": 0.00032966049454956933, + "loss": 3.448055648803711, + "step": 3230 + }, + { + "epoch": 6.791830322073841, + "learning_rate": 0.000330681115275729, + "loss": 3.2744544982910155, + "step": 3240 + }, + { + "epoch": 6.812778214192197, + "learning_rate": 0.0003317017360018886, + "loss": 3.4412532806396485, + "step": 3250 + }, + { + "epoch": 6.833726106310553, + "learning_rate": 0.0003327223567280483, + "loss": 3.459817123413086, + "step": 3260 + }, + { + "epoch": 6.8546739984289085, + "learning_rate": 0.00033374297745420795, + "loss": 3.3376232147216798, + "step": 3270 + }, + { + "epoch": 6.875621890547263, + "learning_rate": 0.0003347635981803676, + "loss": 3.557674789428711, + "step": 3280 + }, + { + "epoch": 6.896569782665619, + "learning_rate": 0.0003357842189065273, + "loss": 3.290896987915039, + "step": 3290 + }, + { + "epoch": 6.917517674783975, + "learning_rate": 0.0003368048396326869, + "loss": 3.49056396484375, + "step": 3300 + }, + { + "epoch": 6.93846556690233, + "learning_rate": 0.00033782546035884657, + "loss": 3.3487998962402346, + "step": 3310 + }, + { + "epoch": 6.959413459020686, + "learning_rate": 0.00033884608108500624, + "loss": 3.3251983642578127, + "step": 3320 + }, + { + "epoch": 6.980361351139042, + "learning_rate": 0.0003398667018111659, + "loss": 3.463846206665039, + "step": 3330 + }, + { + "epoch": 7.002094789211836, + "learning_rate": 0.0003408873225373256, + "loss": 3.5404449462890626, + "step": 3340 + }, + { + "epoch": 7.023042681330191, + "learning_rate": 0.00034190794326348524, + "loss": 3.317121887207031, + "step": 3350 + }, + { + "epoch": 7.043990573448546, + "learning_rate": 0.00034292856398964486, + "loss": 3.446538543701172, + "step": 3360 + }, + { + "epoch": 7.064938465566902, + "learning_rate": 0.0003439491847158045, + "loss": 3.410959243774414, + "step": 3370 + }, + { + "epoch": 7.085886357685258, + "learning_rate": 0.0003449698054419642, + "loss": 3.4593124389648438, + "step": 3380 + }, + { + "epoch": 7.1068342498036134, + "learning_rate": 0.00034599042616812386, + "loss": 3.4331336975097657, + "step": 3390 + }, + { + "epoch": 7.127782141921969, + "learning_rate": 0.00034701104689428353, + "loss": 3.415497970581055, + "step": 3400 + }, + { + "epoch": 7.148730034040325, + "learning_rate": 0.0003480316676204432, + "loss": 3.358320617675781, + "step": 3410 + }, + { + "epoch": 7.1696779261586805, + "learning_rate": 0.0003490522883466028, + "loss": 3.369782257080078, + "step": 3420 + }, + { + "epoch": 7.190625818277036, + "learning_rate": 0.0003500729090727625, + "loss": 3.3603092193603517, + "step": 3430 + }, + { + "epoch": 7.211573710395392, + "learning_rate": 0.00035109352979892215, + "loss": 3.4065528869628907, + "step": 3440 + }, + { + "epoch": 7.232521602513747, + "learning_rate": 0.0003521141505250818, + "loss": 3.368368148803711, + "step": 3450 + }, + { + "epoch": 7.253469494632102, + "learning_rate": 0.0003531347712512415, + "loss": 3.3687610626220703, + "step": 3460 + }, + { + "epoch": 7.274417386750458, + "learning_rate": 0.00035415539197740115, + "loss": 3.397439956665039, + "step": 3470 + }, + { + "epoch": 7.295365278868814, + "learning_rate": 0.00035517601270356077, + "loss": 3.329518508911133, + "step": 3480 + }, + { + "epoch": 7.316313170987169, + "learning_rate": 0.00035619663342972044, + "loss": 3.3606395721435547, + "step": 3490 + }, + { + "epoch": 7.337261063105525, + "learning_rate": 0.0003572172541558801, + "loss": 3.373159408569336, + "step": 3500 + }, + { + "epoch": 7.358208955223881, + "learning_rate": 0.00035823787488203977, + "loss": 3.460713195800781, + "step": 3510 + }, + { + "epoch": 7.379156847342236, + "learning_rate": 0.00035925849560819944, + "loss": 3.3600276947021483, + "step": 3520 + }, + { + "epoch": 7.400104739460592, + "learning_rate": 0.0003602791163343591, + "loss": 3.3381488800048826, + "step": 3530 + }, + { + "epoch": 7.421052631578947, + "learning_rate": 0.0003612997370605187, + "loss": 3.5098854064941407, + "step": 3540 + }, + { + "epoch": 7.442000523697303, + "learning_rate": 0.0003623203577866784, + "loss": 3.358294677734375, + "step": 3550 + }, + { + "epoch": 7.462948415815658, + "learning_rate": 0.00036334097851283806, + "loss": 3.361553955078125, + "step": 3560 + }, + { + "epoch": 7.483896307934014, + "learning_rate": 0.00036436159923899773, + "loss": 3.3908660888671873, + "step": 3570 + }, + { + "epoch": 7.50484420005237, + "learning_rate": 0.0003653822199651574, + "loss": 3.305834197998047, + "step": 3580 + }, + { + "epoch": 7.525792092170725, + "learning_rate": 0.000366402840691317, + "loss": 3.3551094055175783, + "step": 3590 + }, + { + "epoch": 7.546739984289081, + "learning_rate": 0.0003674234614174767, + "loss": 3.336803436279297, + "step": 3600 + }, + { + "epoch": 7.567687876407437, + "learning_rate": 0.00036844408214363635, + "loss": 3.402811050415039, + "step": 3610 + }, + { + "epoch": 7.588635768525792, + "learning_rate": 0.000369464702869796, + "loss": 3.3823123931884767, + "step": 3620 + }, + { + "epoch": 7.609583660644148, + "learning_rate": 0.00037048532359595563, + "loss": 3.3616653442382813, + "step": 3630 + }, + { + "epoch": 7.630531552762504, + "learning_rate": 0.00037150594432211524, + "loss": 3.409253692626953, + "step": 3640 + }, + { + "epoch": 7.6514794448808585, + "learning_rate": 0.0003725265650482749, + "loss": 3.1992008209228517, + "step": 3650 + }, + { + "epoch": 7.672427336999214, + "learning_rate": 0.0003735471857744346, + "loss": 3.2639488220214843, + "step": 3660 + }, + { + "epoch": 7.69337522911757, + "learning_rate": 0.00037456780650059425, + "loss": 3.4038814544677733, + "step": 3670 + }, + { + "epoch": 7.714323121235926, + "learning_rate": 0.0003755884272267539, + "loss": 3.382715606689453, + "step": 3680 + }, + { + "epoch": 7.735271013354281, + "learning_rate": 0.0003766090479529136, + "loss": 3.380691146850586, + "step": 3690 + }, + { + "epoch": 7.756218905472637, + "learning_rate": 0.0003776296686790732, + "loss": 3.3293548583984376, + "step": 3700 + }, + { + "epoch": 7.777166797590993, + "learning_rate": 0.00037865028940523287, + "loss": 3.2755306243896483, + "step": 3710 + }, + { + "epoch": 7.798114689709348, + "learning_rate": 0.00037967091013139253, + "loss": 3.3747108459472654, + "step": 3720 + }, + { + "epoch": 7.819062581827704, + "learning_rate": 0.0003806915308575522, + "loss": 3.325161361694336, + "step": 3730 + }, + { + "epoch": 7.840010473946059, + "learning_rate": 0.00038171215158371187, + "loss": 3.3385120391845704, + "step": 3740 + }, + { + "epoch": 7.8609583660644144, + "learning_rate": 0.00038273277230987154, + "loss": 3.365946960449219, + "step": 3750 + }, + { + "epoch": 7.88190625818277, + "learning_rate": 0.00038375339303603115, + "loss": 3.2765518188476563, + "step": 3760 + }, + { + "epoch": 7.902854150301126, + "learning_rate": 0.0003847740137621908, + "loss": 3.4135189056396484, + "step": 3770 + }, + { + "epoch": 7.9238020424194815, + "learning_rate": 0.0003857946344883505, + "loss": 3.3337100982666015, + "step": 3780 + }, + { + "epoch": 7.944749934537837, + "learning_rate": 0.00038681525521451016, + "loss": 3.469867706298828, + "step": 3790 + }, + { + "epoch": 7.965697826656193, + "learning_rate": 0.0003878358759406698, + "loss": 3.4340835571289063, + "step": 3800 + }, + { + "epoch": 7.9866457187745485, + "learning_rate": 0.00038885649666682944, + "loss": 3.3163192749023436, + "step": 3810 + }, + { + "epoch": 8.008379156847342, + "learning_rate": 0.0003898771173929891, + "loss": 3.4269264221191404, + "step": 3820 + }, + { + "epoch": 8.029327048965698, + "learning_rate": 0.0003908977381191488, + "loss": 3.366756057739258, + "step": 3830 + }, + { + "epoch": 8.050274941084053, + "learning_rate": 0.00039191835884530845, + "loss": 3.5194732666015627, + "step": 3840 + }, + { + "epoch": 8.071222833202409, + "learning_rate": 0.0003929389795714681, + "loss": 3.2884559631347656, + "step": 3850 + }, + { + "epoch": 8.092170725320765, + "learning_rate": 0.0003939596002976278, + "loss": 3.3115074157714846, + "step": 3860 + }, + { + "epoch": 8.11311861743912, + "learning_rate": 0.0003949802210237874, + "loss": 3.342890167236328, + "step": 3870 + }, + { + "epoch": 8.134066509557476, + "learning_rate": 0.00039600084174994706, + "loss": 3.337261962890625, + "step": 3880 + }, + { + "epoch": 8.155014401675832, + "learning_rate": 0.00039702146247610673, + "loss": 3.3896888732910155, + "step": 3890 + }, + { + "epoch": 8.175962293794187, + "learning_rate": 0.0003980420832022664, + "loss": 3.314004898071289, + "step": 3900 + }, + { + "epoch": 8.196910185912543, + "learning_rate": 0.00039906270392842607, + "loss": 3.253472900390625, + "step": 3910 + }, + { + "epoch": 8.217858078030899, + "learning_rate": 0.00040008332465458574, + "loss": 3.2534595489501954, + "step": 3920 + }, + { + "epoch": 8.238805970149254, + "learning_rate": 0.00040110394538074535, + "loss": 3.3420372009277344, + "step": 3930 + }, + { + "epoch": 8.25975386226761, + "learning_rate": 0.000402124566106905, + "loss": 3.3110313415527344, + "step": 3940 + }, + { + "epoch": 8.280701754385966, + "learning_rate": 0.0004031451868330647, + "loss": 3.3015865325927733, + "step": 3950 + }, + { + "epoch": 8.30164964650432, + "learning_rate": 0.00040416580755922436, + "loss": 3.207544708251953, + "step": 3960 + }, + { + "epoch": 8.322597538622675, + "learning_rate": 0.000405186428285384, + "loss": 3.2784183502197264, + "step": 3970 + }, + { + "epoch": 8.343545430741031, + "learning_rate": 0.0004062070490115437, + "loss": 3.29312744140625, + "step": 3980 + }, + { + "epoch": 8.364493322859387, + "learning_rate": 0.0004072276697377033, + "loss": 3.349509048461914, + "step": 3990 + }, + { + "epoch": 8.385441214977742, + "learning_rate": 0.000408248290463863, + "loss": 3.311314010620117, + "step": 4000 + }, + { + "epoch": 8.406389107096098, + "learning_rate": 0.00040926891119002264, + "loss": 3.2704097747802736, + "step": 4010 + }, + { + "epoch": 8.427336999214454, + "learning_rate": 0.0004102895319161823, + "loss": 3.3431529998779297, + "step": 4020 + }, + { + "epoch": 8.44828489133281, + "learning_rate": 0.000411310152642342, + "loss": 3.2444534301757812, + "step": 4030 + }, + { + "epoch": 8.469232783451165, + "learning_rate": 0.0004123307733685016, + "loss": 3.2499820709228517, + "step": 4040 + }, + { + "epoch": 8.49018067556952, + "learning_rate": 0.00041335139409466126, + "loss": 3.4333778381347657, + "step": 4050 + }, + { + "epoch": 8.511128567687877, + "learning_rate": 0.0004143720148208209, + "loss": 3.598118209838867, + "step": 4060 + }, + { + "epoch": 8.532076459806232, + "learning_rate": 0.00041539263554698055, + "loss": 3.1703567504882812, + "step": 4070 + }, + { + "epoch": 8.553024351924588, + "learning_rate": 0.0004164132562731402, + "loss": 3.308456802368164, + "step": 4080 + }, + { + "epoch": 8.573972244042944, + "learning_rate": 0.00041743387699929983, + "loss": 3.262325668334961, + "step": 4090 + }, + { + "epoch": 8.5949201361613, + "learning_rate": 0.0004184544977254595, + "loss": 3.277301788330078, + "step": 4100 + }, + { + "epoch": 8.615868028279655, + "learning_rate": 0.00041947511845161916, + "loss": 3.245453643798828, + "step": 4110 + }, + { + "epoch": 8.63681592039801, + "learning_rate": 0.00042049573917777883, + "loss": 3.246595764160156, + "step": 4120 + }, + { + "epoch": 8.657763812516366, + "learning_rate": 0.0004215163599039385, + "loss": 3.310033416748047, + "step": 4130 + }, + { + "epoch": 8.678711704634722, + "learning_rate": 0.00042253698063009817, + "loss": 3.301158905029297, + "step": 4140 + }, + { + "epoch": 8.699659596753076, + "learning_rate": 0.0004235576013562578, + "loss": 3.2295578002929686, + "step": 4150 + }, + { + "epoch": 8.720607488871432, + "learning_rate": 0.00042457822208241745, + "loss": 3.26501579284668, + "step": 4160 + }, + { + "epoch": 8.741555380989787, + "learning_rate": 0.0004255988428085771, + "loss": 3.346723937988281, + "step": 4170 + }, + { + "epoch": 8.762503273108143, + "learning_rate": 0.0004266194635347368, + "loss": 3.2818328857421877, + "step": 4180 + }, + { + "epoch": 8.783451165226499, + "learning_rate": 0.00042764008426089646, + "loss": 3.3082435607910154, + "step": 4190 + }, + { + "epoch": 8.804399057344854, + "learning_rate": 0.0004286607049870561, + "loss": 3.2165481567382814, + "step": 4200 + }, + { + "epoch": 8.82534694946321, + "learning_rate": 0.00042968132571321574, + "loss": 3.217595672607422, + "step": 4210 + }, + { + "epoch": 8.846294841581566, + "learning_rate": 0.0004307019464393754, + "loss": 3.2202774047851563, + "step": 4220 + }, + { + "epoch": 8.867242733699921, + "learning_rate": 0.0004317225671655351, + "loss": 3.3156604766845703, + "step": 4230 + }, + { + "epoch": 8.888190625818277, + "learning_rate": 0.00043274318789169474, + "loss": 3.2440589904785155, + "step": 4240 + }, + { + "epoch": 8.909138517936633, + "learning_rate": 0.0004337638086178544, + "loss": 3.354948043823242, + "step": 4250 + }, + { + "epoch": 8.930086410054988, + "learning_rate": 0.0004347844293440141, + "loss": 3.2932735443115235, + "step": 4260 + }, + { + "epoch": 8.951034302173344, + "learning_rate": 0.0004358050500701737, + "loss": 3.1642740249633787, + "step": 4270 + }, + { + "epoch": 8.9719821942917, + "learning_rate": 0.00043682567079633336, + "loss": 3.1953109741210937, + "step": 4280 + }, + { + "epoch": 8.992930086410055, + "learning_rate": 0.00043784629152249303, + "loss": 3.1374893188476562, + "step": 4290 + }, + { + "epoch": 9.014663524482849, + "learning_rate": 0.0004388669122486527, + "loss": 3.4447471618652346, + "step": 4300 + }, + { + "epoch": 9.035611416601204, + "learning_rate": 0.00043988753297481237, + "loss": 3.278203582763672, + "step": 4310 + }, + { + "epoch": 9.05655930871956, + "learning_rate": 0.000440908153700972, + "loss": 3.2842811584472655, + "step": 4320 + }, + { + "epoch": 9.077507200837916, + "learning_rate": 0.00044192877442713165, + "loss": 3.2456493377685547, + "step": 4330 + }, + { + "epoch": 9.098455092956272, + "learning_rate": 0.0004429493951532913, + "loss": 3.18524112701416, + "step": 4340 + }, + { + "epoch": 9.119402985074627, + "learning_rate": 0.000443970015879451, + "loss": 3.2943866729736326, + "step": 4350 + }, + { + "epoch": 9.140350877192983, + "learning_rate": 0.00044499063660561065, + "loss": 3.325389862060547, + "step": 4360 + }, + { + "epoch": 9.161298769311339, + "learning_rate": 0.0004460112573317703, + "loss": 3.1400611877441404, + "step": 4370 + }, + { + "epoch": 9.182246661429694, + "learning_rate": 0.00044703187805792994, + "loss": 3.272686004638672, + "step": 4380 + }, + { + "epoch": 9.20319455354805, + "learning_rate": 0.0004480524987840896, + "loss": 3.284004974365234, + "step": 4390 + }, + { + "epoch": 9.224142445666406, + "learning_rate": 0.0004490731195102493, + "loss": 3.2489898681640623, + "step": 4400 + }, + { + "epoch": 9.24509033778476, + "learning_rate": 0.00045009374023640894, + "loss": 3.2408329010009767, + "step": 4410 + }, + { + "epoch": 9.266038229903115, + "learning_rate": 0.0004511143609625686, + "loss": 3.2516738891601564, + "step": 4420 + }, + { + "epoch": 9.286986122021471, + "learning_rate": 0.0004521349816887283, + "loss": 3.1824373245239257, + "step": 4430 + }, + { + "epoch": 9.307934014139827, + "learning_rate": 0.0004531556024148879, + "loss": 3.2249637603759767, + "step": 4440 + }, + { + "epoch": 9.328881906258182, + "learning_rate": 0.00045417622314104756, + "loss": 3.174178695678711, + "step": 4450 + }, + { + "epoch": 9.349829798376538, + "learning_rate": 0.00045519684386720723, + "loss": 3.188156318664551, + "step": 4460 + }, + { + "epoch": 9.370777690494894, + "learning_rate": 0.0004562174645933669, + "loss": 3.228466796875, + "step": 4470 + }, + { + "epoch": 9.39172558261325, + "learning_rate": 0.00045723808531952657, + "loss": 3.2763172149658204, + "step": 4480 + }, + { + "epoch": 9.412673474731605, + "learning_rate": 0.0004582587060456861, + "loss": 3.266025161743164, + "step": 4490 + }, + { + "epoch": 9.43362136684996, + "learning_rate": 0.0004592793267718458, + "loss": 3.2390396118164064, + "step": 4500 + }, + { + "epoch": 9.454569258968316, + "learning_rate": 0.00046029994749800546, + "loss": 3.23939208984375, + "step": 4510 + }, + { + "epoch": 9.475517151086672, + "learning_rate": 0.00046132056822416513, + "loss": 3.219116973876953, + "step": 4520 + }, + { + "epoch": 9.496465043205028, + "learning_rate": 0.0004623411889503248, + "loss": 3.277789306640625, + "step": 4530 + }, + { + "epoch": 9.517412935323383, + "learning_rate": 0.00046336180967648447, + "loss": 3.284175491333008, + "step": 4540 + }, + { + "epoch": 9.538360827441739, + "learning_rate": 0.0004643824304026441, + "loss": 3.208120346069336, + "step": 4550 + }, + { + "epoch": 9.559308719560095, + "learning_rate": 0.00046540305112880375, + "loss": 3.271435546875, + "step": 4560 + }, + { + "epoch": 9.58025661167845, + "learning_rate": 0.0004664236718549634, + "loss": 3.2989322662353517, + "step": 4570 + }, + { + "epoch": 9.601204503796806, + "learning_rate": 0.0004674442925811231, + "loss": 3.1721576690673827, + "step": 4580 + }, + { + "epoch": 9.622152395915162, + "learning_rate": 0.00046846491330728275, + "loss": 3.1907968521118164, + "step": 4590 + }, + { + "epoch": 9.643100288033516, + "learning_rate": 0.00046948553403344237, + "loss": 3.3379592895507812, + "step": 4600 + }, + { + "epoch": 9.664048180151871, + "learning_rate": 0.00047050615475960204, + "loss": 3.2489646911621093, + "step": 4610 + }, + { + "epoch": 9.684996072270227, + "learning_rate": 0.0004715267754857617, + "loss": 3.3012271881103517, + "step": 4620 + }, + { + "epoch": 9.705943964388583, + "learning_rate": 0.00047254739621192137, + "loss": 3.235492706298828, + "step": 4630 + }, + { + "epoch": 9.726891856506938, + "learning_rate": 0.00047356801693808104, + "loss": 3.2170711517333985, + "step": 4640 + }, + { + "epoch": 9.747839748625294, + "learning_rate": 0.0004745886376642407, + "loss": 3.1358510971069338, + "step": 4650 + }, + { + "epoch": 9.76878764074365, + "learning_rate": 0.0004756092583904003, + "loss": 3.154219055175781, + "step": 4660 + }, + { + "epoch": 9.789735532862005, + "learning_rate": 0.00047662987911656, + "loss": 3.3397506713867187, + "step": 4670 + }, + { + "epoch": 9.810683424980361, + "learning_rate": 0.00047765049984271966, + "loss": 3.2498783111572265, + "step": 4680 + }, + { + "epoch": 9.831631317098717, + "learning_rate": 0.00047867112056887933, + "loss": 3.2211677551269533, + "step": 4690 + }, + { + "epoch": 9.852579209217073, + "learning_rate": 0.000479691741295039, + "loss": 3.2689888000488283, + "step": 4700 + }, + { + "epoch": 9.873527101335428, + "learning_rate": 0.00048071236202119866, + "loss": 3.1732282638549805, + "step": 4710 + }, + { + "epoch": 9.894474993453784, + "learning_rate": 0.0004817329827473583, + "loss": 3.1548320770263674, + "step": 4720 + }, + { + "epoch": 9.91542288557214, + "learning_rate": 0.00048275360347351795, + "loss": 3.2713703155517577, + "step": 4730 + }, + { + "epoch": 9.936370777690495, + "learning_rate": 0.0004837742241996776, + "loss": 3.1592134475708007, + "step": 4740 + }, + { + "epoch": 9.957318669808851, + "learning_rate": 0.0004847948449258373, + "loss": 3.213180923461914, + "step": 4750 + }, + { + "epoch": 9.978266561927207, + "learning_rate": 0.00048581546565199695, + "loss": 3.2513309478759767, + "step": 4760 + }, + { + "epoch": 9.999214454045562, + "learning_rate": 0.0004868360863781566, + "loss": 3.3612499237060547, + "step": 4770 + }, + { + "epoch": 10.020947892118356, + "learning_rate": 0.00048785670710431623, + "loss": 3.2473423004150392, + "step": 4780 + }, + { + "epoch": 10.041895784236711, + "learning_rate": 0.000488877327830476, + "loss": 3.3008792877197264, + "step": 4790 + }, + { + "epoch": 10.062843676355067, + "learning_rate": 0.0004898979485566356, + "loss": 3.286971664428711, + "step": 4800 + }, + { + "epoch": 10.083791568473423, + "learning_rate": 0.0004909185692827952, + "loss": 3.300416946411133, + "step": 4810 + }, + { + "epoch": 10.104739460591778, + "learning_rate": 0.0004919391900089549, + "loss": 3.1919363021850584, + "step": 4820 + }, + { + "epoch": 10.125687352710134, + "learning_rate": 0.0004929598107351145, + "loss": 3.2064422607421874, + "step": 4830 + }, + { + "epoch": 10.14663524482849, + "learning_rate": 0.0004939804314612742, + "loss": 3.1432363510131838, + "step": 4840 + }, + { + "epoch": 10.167583136946845, + "learning_rate": 0.0004950010521874339, + "loss": 3.17150764465332, + "step": 4850 + }, + { + "epoch": 10.188531029065201, + "learning_rate": 0.0004960216729135935, + "loss": 3.2499244689941404, + "step": 4860 + }, + { + "epoch": 10.209478921183555, + "learning_rate": 0.0004970422936397532, + "loss": 3.1550519943237303, + "step": 4870 + }, + { + "epoch": 10.23042681330191, + "learning_rate": 0.0004980629143659128, + "loss": 3.1220830917358398, + "step": 4880 + }, + { + "epoch": 10.251374705420266, + "learning_rate": 0.0004990835350920725, + "loss": 3.2074295043945313, + "step": 4890 + }, + { + "epoch": 10.272322597538622, + "learning_rate": 0.0005001041558182321, + "loss": 3.198388862609863, + "step": 4900 + }, + { + "epoch": 10.293270489656978, + "learning_rate": 0.0005011247765443918, + "loss": 3.218120574951172, + "step": 4910 + }, + { + "epoch": 10.314218381775333, + "learning_rate": 0.0005021453972705514, + "loss": 3.2131175994873047, + "step": 4920 + }, + { + "epoch": 10.33516627389369, + "learning_rate": 0.0005031660179967111, + "loss": 3.1987491607666017, + "step": 4930 + }, + { + "epoch": 10.356114166012045, + "learning_rate": 0.0005041866387228707, + "loss": 3.175269889831543, + "step": 4940 + }, + { + "epoch": 10.3770620581304, + "learning_rate": 0.0005052072594490304, + "loss": 3.183473014831543, + "step": 4950 + }, + { + "epoch": 10.398009950248756, + "learning_rate": 0.00050622788017519, + "loss": 3.0571062088012697, + "step": 4960 + }, + { + "epoch": 10.418957842367112, + "learning_rate": 0.0005072485009013497, + "loss": 3.325624465942383, + "step": 4970 + }, + { + "epoch": 10.439905734485468, + "learning_rate": 0.0005082691216275094, + "loss": 3.249886703491211, + "step": 4980 + }, + { + "epoch": 10.460853626603823, + "learning_rate": 0.000509289742353669, + "loss": 3.18145866394043, + "step": 4990 + }, + { + "epoch": 10.481801518722179, + "learning_rate": 0.0005103103630798287, + "loss": 3.1750720977783202, + "step": 5000 + }, + { + "epoch": 10.502749410840535, + "learning_rate": 0.0005098008169087462, + "loss": 3.298409271240234, + "step": 5010 + }, + { + "epoch": 10.52369730295889, + "learning_rate": 0.0005092927940452339, + "loss": 3.0969064712524412, + "step": 5020 + }, + { + "epoch": 10.544645195077246, + "learning_rate": 0.0005087862869144114, + "loss": 3.3151134490966796, + "step": 5030 + }, + { + "epoch": 10.565593087195602, + "learning_rate": 0.0005082812879940277, + "loss": 3.2290550231933595, + "step": 5040 + }, + { + "epoch": 10.586540979313957, + "learning_rate": 0.0005077777898139921, + "loss": 3.140799713134766, + "step": 5050 + }, + { + "epoch": 10.607488871432313, + "learning_rate": 0.0005072757849559103, + "loss": 3.1083478927612305, + "step": 5060 + }, + { + "epoch": 10.628436763550667, + "learning_rate": 0.0005067752660526248, + "loss": 3.1976173400878904, + "step": 5070 + }, + { + "epoch": 10.649384655669023, + "learning_rate": 0.0005062762257877613, + "loss": 3.2580982208251954, + "step": 5080 + }, + { + "epoch": 10.670332547787378, + "learning_rate": 0.0005057786568952791, + "loss": 3.280201721191406, + "step": 5090 + }, + { + "epoch": 10.691280439905734, + "learning_rate": 0.000505282552159027, + "loss": 3.141094779968262, + "step": 5100 + }, + { + "epoch": 10.71222833202409, + "learning_rate": 0.000504787904412304, + "loss": 3.095392608642578, + "step": 5110 + }, + { + "epoch": 10.733176224142445, + "learning_rate": 0.000504294706537424, + "loss": 3.2083145141601563, + "step": 5120 + }, + { + "epoch": 10.754124116260801, + "learning_rate": 0.0005038029514652858, + "loss": 3.289701461791992, + "step": 5130 + }, + { + "epoch": 10.775072008379157, + "learning_rate": 0.0005033126321749477, + "loss": 3.2425827026367187, + "step": 5140 + }, + { + "epoch": 10.796019900497512, + "learning_rate": 0.000502823741693206, + "loss": 3.277665710449219, + "step": 5150 + }, + { + "epoch": 10.816967792615868, + "learning_rate": 0.0005023362730941793, + "loss": 3.1909582138061525, + "step": 5160 + }, + { + "epoch": 10.837915684734224, + "learning_rate": 0.0005018502194988955, + "loss": 3.170912170410156, + "step": 5170 + }, + { + "epoch": 10.85886357685258, + "learning_rate": 0.0005013655740748848, + "loss": 3.2589969635009766, + "step": 5180 + }, + { + "epoch": 10.879811468970935, + "learning_rate": 0.0005008823300357761, + "loss": 3.1849817276000976, + "step": 5190 + }, + { + "epoch": 10.90075936108929, + "learning_rate": 0.0005004004806408972, + "loss": 3.1448366165161135, + "step": 5200 + }, + { + "epoch": 10.921707253207646, + "learning_rate": 0.0004999200191948814, + "loss": 3.122829055786133, + "step": 5210 + }, + { + "epoch": 10.942655145326002, + "learning_rate": 0.0004994409390472751, + "loss": 3.1754734039306642, + "step": 5220 + }, + { + "epoch": 10.963603037444358, + "learning_rate": 0.0004989632335921523, + "loss": 3.1149194717407225, + "step": 5230 + }, + { + "epoch": 10.984550929562714, + "learning_rate": 0.0004984868962677315, + "loss": 3.100501823425293, + "step": 5240 + }, + { + "epoch": 11.006284367635507, + "learning_rate": 0.0004980119205559973, + "loss": 3.330778121948242, + "step": 5250 + }, + { + "epoch": 11.027232259753863, + "learning_rate": 0.0004975382999823259, + "loss": 3.1559074401855467, + "step": 5260 + }, + { + "epoch": 11.048180151872218, + "learning_rate": 0.0004970660281151141, + "loss": 3.172486114501953, + "step": 5270 + }, + { + "epoch": 11.069128043990574, + "learning_rate": 0.0004965950985654126, + "loss": 3.1997749328613283, + "step": 5280 + }, + { + "epoch": 11.09007593610893, + "learning_rate": 0.0004961255049865635, + "loss": 3.2853694915771485, + "step": 5290 + }, + { + "epoch": 11.111023828227285, + "learning_rate": 0.0004956572410738401, + "loss": 3.145161819458008, + "step": 5300 + }, + { + "epoch": 11.131971720345641, + "learning_rate": 0.000495190300564092, + "loss": 3.126105308532715, + "step": 5310 + }, + { + "epoch": 11.152919612463997, + "learning_rate": 0.0004947246772353933, + "loss": 3.2012374877929686, + "step": 5320 + }, + { + "epoch": 11.17386750458235, + "learning_rate": 0.0004942603649066942, + "loss": 3.1729455947875977, + "step": 5330 + }, + { + "epoch": 11.194815396700706, + "learning_rate": 0.0004937973574374762, + "loss": 3.148386001586914, + "step": 5340 + }, + { + "epoch": 11.215763288819062, + "learning_rate": 0.0004933356487274114, + "loss": 3.065207290649414, + "step": 5350 + }, + { + "epoch": 11.236711180937418, + "learning_rate": 0.0004928752327160248, + "loss": 3.150010108947754, + "step": 5360 + }, + { + "epoch": 11.257659073055773, + "learning_rate": 0.0004924161033823598, + "loss": 3.2157524108886717, + "step": 5370 + }, + { + "epoch": 11.278606965174129, + "learning_rate": 0.0004919582547446482, + "loss": 3.0866676330566407, + "step": 5380 + }, + { + "epoch": 11.299554857292485, + "learning_rate": 0.0004915016808599824, + "loss": 3.2173648834228517, + "step": 5390 + }, + { + "epoch": 11.32050274941084, + "learning_rate": 0.0004910463758239914, + "loss": 3.1369649887084963, + "step": 5400 + }, + { + "epoch": 11.341450641529196, + "learning_rate": 0.0004905923337705201, + "loss": 3.0868097305297852, + "step": 5410 + }, + { + "epoch": 11.362398533647552, + "learning_rate": 0.0004901395488713123, + "loss": 3.2488777160644533, + "step": 5420 + }, + { + "epoch": 11.383346425765907, + "learning_rate": 0.0004896880153356963, + "loss": 3.121846008300781, + "step": 5430 + }, + { + "epoch": 11.404294317884263, + "learning_rate": 0.000489237727410273, + "loss": 3.1137548446655274, + "step": 5440 + }, + { + "epoch": 11.425242210002619, + "learning_rate": 0.0004887886793786093, + "loss": 3.0799299240112306, + "step": 5450 + }, + { + "epoch": 11.446190102120974, + "learning_rate": 0.0004883408655609327, + "loss": 3.0961063385009764, + "step": 5460 + }, + { + "epoch": 11.46713799423933, + "learning_rate": 0.0004878942803138293, + "loss": 3.0329910278320313, + "step": 5470 + }, + { + "epoch": 11.488085886357686, + "learning_rate": 0.0004874489180299454, + "loss": 3.173397445678711, + "step": 5480 + }, + { + "epoch": 11.509033778476041, + "learning_rate": 0.00048700477313769213, + "loss": 3.1852407455444336, + "step": 5490 + }, + { + "epoch": 11.529981670594397, + "learning_rate": 0.00048656184010095185, + "loss": 3.2040431976318358, + "step": 5500 + }, + { + "epoch": 11.550929562712753, + "learning_rate": 0.00048612011341878916, + "loss": 3.189468002319336, + "step": 5510 + }, + { + "epoch": 11.571877454831107, + "learning_rate": 0.0004856795876251634, + "loss": 3.2286914825439452, + "step": 5520 + }, + { + "epoch": 11.592825346949462, + "learning_rate": 0.00048524025728864493, + "loss": 3.034438896179199, + "step": 5530 + }, + { + "epoch": 11.613773239067818, + "learning_rate": 0.0004848021170121335, + "loss": 3.085980987548828, + "step": 5540 + }, + { + "epoch": 11.634721131186174, + "learning_rate": 0.0004843651614325803, + "loss": 3.1111934661865233, + "step": 5550 + }, + { + "epoch": 11.65566902330453, + "learning_rate": 0.00048392938522071163, + "loss": 3.159061050415039, + "step": 5560 + }, + { + "epoch": 11.676616915422885, + "learning_rate": 0.0004834947830807563, + "loss": 3.0602264404296875, + "step": 5570 + }, + { + "epoch": 11.69756480754124, + "learning_rate": 0.00048306134975017523, + "loss": 3.120003890991211, + "step": 5580 + }, + { + "epoch": 11.718512699659597, + "learning_rate": 0.0004826290799993939, + "loss": 3.1456703186035155, + "step": 5590 + }, + { + "epoch": 11.739460591777952, + "learning_rate": 0.0004821979686315372, + "loss": 3.112548828125, + "step": 5600 + }, + { + "epoch": 11.760408483896308, + "learning_rate": 0.00048176801048216693, + "loss": 3.1137924194335938, + "step": 5610 + }, + { + "epoch": 11.781356376014664, + "learning_rate": 0.0004813392004190223, + "loss": 3.146605110168457, + "step": 5620 + }, + { + "epoch": 11.80230426813302, + "learning_rate": 0.00048091153334176224, + "loss": 3.1099647521972655, + "step": 5630 + }, + { + "epoch": 11.823252160251375, + "learning_rate": 0.00048048500418171097, + "loss": 3.099277305603027, + "step": 5640 + }, + { + "epoch": 11.84420005236973, + "learning_rate": 0.0004800596079016053, + "loss": 3.085763931274414, + "step": 5650 + }, + { + "epoch": 11.865147944488086, + "learning_rate": 0.0004796353394953452, + "loss": 3.188782501220703, + "step": 5660 + }, + { + "epoch": 11.886095836606442, + "learning_rate": 0.0004792121939877459, + "loss": 3.176821708679199, + "step": 5670 + }, + { + "epoch": 11.907043728724798, + "learning_rate": 0.00047879016643429336, + "loss": 3.067020797729492, + "step": 5680 + }, + { + "epoch": 11.927991620843153, + "learning_rate": 0.00047836925192090116, + "loss": 3.1511611938476562, + "step": 5690 + }, + { + "epoch": 11.948939512961509, + "learning_rate": 0.0004779494455636703, + "loss": 3.0738733291625975, + "step": 5700 + }, + { + "epoch": 11.969887405079863, + "learning_rate": 0.00047753074250865145, + "loss": 3.237213897705078, + "step": 5710 + }, + { + "epoch": 11.990835297198219, + "learning_rate": 0.00047711313793160877, + "loss": 3.052178382873535, + "step": 5720 + }, + { + "epoch": 12.012568735271014, + "learning_rate": 0.000476696627037787, + "loss": 3.2651294708251952, + "step": 5730 + }, + { + "epoch": 12.03351662738937, + "learning_rate": 0.0004762812050616797, + "loss": 3.162643241882324, + "step": 5740 + }, + { + "epoch": 12.054464519507725, + "learning_rate": 0.0004758668672668006, + "loss": 3.1709291458129885, + "step": 5750 + }, + { + "epoch": 12.07541241162608, + "learning_rate": 0.00047545360894545664, + "loss": 3.063345527648926, + "step": 5760 + }, + { + "epoch": 12.096360303744436, + "learning_rate": 0.0004750414254185235, + "loss": 3.093794250488281, + "step": 5770 + }, + { + "epoch": 12.117308195862792, + "learning_rate": 0.0004746303120352226, + "loss": 3.1082719802856444, + "step": 5780 + }, + { + "epoch": 12.138256087981146, + "learning_rate": 0.00047422026417290146, + "loss": 3.1271081924438477, + "step": 5790 + }, + { + "epoch": 12.159203980099502, + "learning_rate": 0.0004738112772368146, + "loss": 3.141692543029785, + "step": 5800 + }, + { + "epoch": 12.180151872217857, + "learning_rate": 0.00047340334665990787, + "loss": 3.1134639739990235, + "step": 5810 + }, + { + "epoch": 12.201099764336213, + "learning_rate": 0.0004729964679026039, + "loss": 3.03677921295166, + "step": 5820 + }, + { + "epoch": 12.222047656454569, + "learning_rate": 0.0004725906364525903, + "loss": 3.2071063995361326, + "step": 5830 + }, + { + "epoch": 12.242995548572924, + "learning_rate": 0.0004721858478246089, + "loss": 3.173069953918457, + "step": 5840 + }, + { + "epoch": 12.26394344069128, + "learning_rate": 0.0004717820975602482, + "loss": 3.048240089416504, + "step": 5850 + }, + { + "epoch": 12.284891332809636, + "learning_rate": 0.0004713793812277367, + "loss": 3.041463088989258, + "step": 5860 + }, + { + "epoch": 12.305839224927992, + "learning_rate": 0.00047097769442173856, + "loss": 3.0645767211914063, + "step": 5870 + }, + { + "epoch": 12.326787117046347, + "learning_rate": 0.00047057703276315164, + "loss": 2.982158088684082, + "step": 5880 + }, + { + "epoch": 12.347735009164703, + "learning_rate": 0.0004701773918989065, + "loss": 3.111321449279785, + "step": 5890 + }, + { + "epoch": 12.368682901283059, + "learning_rate": 0.00046977876750176805, + "loss": 3.141143798828125, + "step": 5900 + }, + { + "epoch": 12.389630793401414, + "learning_rate": 0.0004693811552701385, + "loss": 3.0916566848754883, + "step": 5910 + }, + { + "epoch": 12.41057868551977, + "learning_rate": 0.0004689845509278626, + "loss": 3.0807928085327148, + "step": 5920 + }, + { + "epoch": 12.431526577638126, + "learning_rate": 0.00046858895022403474, + "loss": 3.0439529418945312, + "step": 5930 + }, + { + "epoch": 12.452474469756481, + "learning_rate": 0.000468194348932807, + "loss": 3.0425508499145506, + "step": 5940 + }, + { + "epoch": 12.473422361874837, + "learning_rate": 0.00046780074285319984, + "loss": 3.187800407409668, + "step": 5950 + }, + { + "epoch": 12.494370253993193, + "learning_rate": 0.0004674081278089144, + "loss": 3.0460309982299805, + "step": 5960 + }, + { + "epoch": 12.515318146111547, + "learning_rate": 0.00046701649964814616, + "loss": 3.2187931060791017, + "step": 5970 + }, + { + "epoch": 12.536266038229904, + "learning_rate": 0.0004666258542434007, + "loss": 3.109378433227539, + "step": 5980 + }, + { + "epoch": 12.557213930348258, + "learning_rate": 0.000466236187491311, + "loss": 3.0655149459838866, + "step": 5990 + }, + { + "epoch": 12.578161822466614, + "learning_rate": 0.00046584749531245617, + "loss": 3.132980728149414, + "step": 6000 + }, + { + "epoch": 12.59910971458497, + "learning_rate": 0.0004654597736511823, + "loss": 3.0032047271728515, + "step": 6010 + }, + { + "epoch": 12.620057606703325, + "learning_rate": 0.0004650730184754247, + "loss": 3.0569095611572266, + "step": 6020 + }, + { + "epoch": 12.64100549882168, + "learning_rate": 0.0004646872257765318, + "loss": 3.1891340255737304, + "step": 6030 + }, + { + "epoch": 12.661953390940036, + "learning_rate": 0.00046430239156909045, + "loss": 3.036951446533203, + "step": 6040 + }, + { + "epoch": 12.682901283058392, + "learning_rate": 0.00046391851189075343, + "loss": 3.077804374694824, + "step": 6050 + }, + { + "epoch": 12.703849175176748, + "learning_rate": 0.00046353558280206746, + "loss": 3.1198028564453124, + "step": 6060 + }, + { + "epoch": 12.724797067295103, + "learning_rate": 0.00046315360038630404, + "loss": 3.1289579391479494, + "step": 6070 + }, + { + "epoch": 12.745744959413459, + "learning_rate": 0.0004627725607492909, + "loss": 3.047295570373535, + "step": 6080 + }, + { + "epoch": 12.766692851531815, + "learning_rate": 0.00046239246001924503, + "loss": 3.071992111206055, + "step": 6090 + }, + { + "epoch": 12.78764074365017, + "learning_rate": 0.000462013294346608, + "loss": 3.050577735900879, + "step": 6100 + }, + { + "epoch": 12.808588635768526, + "learning_rate": 0.00046163505990388167, + "loss": 3.0774341583251954, + "step": 6110 + }, + { + "epoch": 12.829536527886882, + "learning_rate": 0.00046125775288546623, + "loss": 3.120297431945801, + "step": 6120 + }, + { + "epoch": 12.850484420005237, + "learning_rate": 0.00046088136950749937, + "loss": 3.1301042556762697, + "step": 6130 + }, + { + "epoch": 12.871432312123593, + "learning_rate": 0.0004605059060076967, + "loss": 3.0827388763427734, + "step": 6140 + }, + { + "epoch": 12.892380204241949, + "learning_rate": 0.0004601313586451939, + "loss": 3.136738967895508, + "step": 6150 + }, + { + "epoch": 12.913328096360305, + "learning_rate": 0.00045975772370039034, + "loss": 3.0567752838134767, + "step": 6160 + }, + { + "epoch": 12.93427598847866, + "learning_rate": 0.0004593849974747937, + "loss": 3.053047752380371, + "step": 6170 + }, + { + "epoch": 12.955223880597014, + "learning_rate": 0.0004590131762908664, + "loss": 3.0607650756835936, + "step": 6180 + }, + { + "epoch": 12.97617177271537, + "learning_rate": 0.00045864225649187287, + "loss": 3.1021827697753905, + "step": 6190 + }, + { + "epoch": 12.997119664833725, + "learning_rate": 0.000458272234441729, + "loss": 3.016301727294922, + "step": 6200 + }, + { + "epoch": 13.01885310290652, + "learning_rate": 0.00045790310652485205, + "loss": 3.1855663299560546, + "step": 6210 + }, + { + "epoch": 13.039800995024876, + "learning_rate": 0.0004575348691460124, + "loss": 3.0596897125244142, + "step": 6220 + }, + { + "epoch": 13.060748887143232, + "learning_rate": 0.00045716751873018654, + "loss": 3.061813735961914, + "step": 6230 + }, + { + "epoch": 13.081696779261588, + "learning_rate": 0.00045680105172241103, + "loss": 3.106767463684082, + "step": 6240 + }, + { + "epoch": 13.102644671379942, + "learning_rate": 0.0004564354645876384, + "loss": 3.101357269287109, + "step": 6250 + }, + { + "epoch": 13.123592563498297, + "learning_rate": 0.00045607075381059363, + "loss": 3.085792350769043, + "step": 6260 + }, + { + "epoch": 13.144540455616653, + "learning_rate": 0.00045570691589563234, + "loss": 3.071797752380371, + "step": 6270 + }, + { + "epoch": 13.165488347735009, + "learning_rate": 0.0004553439473666, + "loss": 3.087900161743164, + "step": 6280 + }, + { + "epoch": 13.186436239853364, + "learning_rate": 0.0004549818447666924, + "loss": 3.0981624603271483, + "step": 6290 + }, + { + "epoch": 13.20738413197172, + "learning_rate": 0.00045462060465831743, + "loss": 2.995559501647949, + "step": 6300 + }, + { + "epoch": 13.228332024090076, + "learning_rate": 0.0004542602236229581, + "loss": 3.0986444473266603, + "step": 6310 + }, + { + "epoch": 13.249279916208431, + "learning_rate": 0.00045390069826103653, + "loss": 3.1423923492431642, + "step": 6320 + }, + { + "epoch": 13.270227808326787, + "learning_rate": 0.00045354202519177925, + "loss": 2.981964111328125, + "step": 6330 + }, + { + "epoch": 13.291175700445143, + "learning_rate": 0.0004531842010530839, + "loss": 3.062668800354004, + "step": 6340 + }, + { + "epoch": 13.312123592563498, + "learning_rate": 0.0004528272225013865, + "loss": 3.1759321212768556, + "step": 6350 + }, + { + "epoch": 13.333071484681854, + "learning_rate": 0.00045247108621153056, + "loss": 3.0892358779907227, + "step": 6360 + }, + { + "epoch": 13.35401937680021, + "learning_rate": 0.0004521157888766368, + "loss": 3.0303468704223633, + "step": 6370 + }, + { + "epoch": 13.374967268918565, + "learning_rate": 0.00045176132720797443, + "loss": 3.042502021789551, + "step": 6380 + }, + { + "epoch": 13.395915161036921, + "learning_rate": 0.0004514076979348328, + "loss": 3.109409713745117, + "step": 6390 + }, + { + "epoch": 13.416863053155277, + "learning_rate": 0.0004510548978043951, + "loss": 3.0352380752563475, + "step": 6400 + }, + { + "epoch": 13.437810945273633, + "learning_rate": 0.00045070292358161265, + "loss": 2.9740083694458006, + "step": 6410 + }, + { + "epoch": 13.458758837391988, + "learning_rate": 0.0004503517720490801, + "loss": 3.1181098937988283, + "step": 6420 + }, + { + "epoch": 13.479706729510344, + "learning_rate": 0.000450001440006912, + "loss": 3.089175987243652, + "step": 6430 + }, + { + "epoch": 13.500654621628698, + "learning_rate": 0.00044965192427262043, + "loss": 3.0885658264160156, + "step": 6440 + }, + { + "epoch": 13.521602513747053, + "learning_rate": 0.0004493032216809934, + "loss": 3.0794023513793944, + "step": 6450 + }, + { + "epoch": 13.54255040586541, + "learning_rate": 0.00044895532908397455, + "loss": 3.0824106216430662, + "step": 6460 + }, + { + "epoch": 13.563498297983765, + "learning_rate": 0.00044860824335054384, + "loss": 3.0646196365356446, + "step": 6470 + }, + { + "epoch": 13.58444619010212, + "learning_rate": 0.00044826196136659916, + "loss": 3.044062614440918, + "step": 6480 + }, + { + "epoch": 13.605394082220476, + "learning_rate": 0.00044791648003483884, + "loss": 3.0133747100830077, + "step": 6490 + }, + { + "epoch": 13.626341974338832, + "learning_rate": 0.0004475717962746455, + "loss": 3.070328712463379, + "step": 6500 + }, + { + "epoch": 13.647289866457188, + "learning_rate": 0.0004472279070219706, + "loss": 3.0279052734375, + "step": 6510 + }, + { + "epoch": 13.668237758575543, + "learning_rate": 0.00044688480922922, + "loss": 3.147620964050293, + "step": 6520 + }, + { + "epoch": 13.689185650693899, + "learning_rate": 0.00044654249986514057, + "loss": 3.1151987075805665, + "step": 6530 + }, + { + "epoch": 13.710133542812255, + "learning_rate": 0.0004462009759147076, + "loss": 3.072108268737793, + "step": 6540 + }, + { + "epoch": 13.73108143493061, + "learning_rate": 0.0004458602343790135, + "loss": 3.134627342224121, + "step": 6550 + }, + { + "epoch": 13.752029327048966, + "learning_rate": 0.00044552027227515704, + "loss": 3.032268524169922, + "step": 6560 + }, + { + "epoch": 13.772977219167322, + "learning_rate": 0.00044518108663613355, + "loss": 3.065017509460449, + "step": 6570 + }, + { + "epoch": 13.793925111285677, + "learning_rate": 0.00044484267451072644, + "loss": 3.0611106872558596, + "step": 6580 + }, + { + "epoch": 13.814873003404033, + "learning_rate": 0.0004445050329633992, + "loss": 3.2091243743896483, + "step": 6590 + }, + { + "epoch": 13.835820895522389, + "learning_rate": 0.0004441681590741884, + "loss": 3.058238983154297, + "step": 6600 + }, + { + "epoch": 13.856768787640744, + "learning_rate": 0.0004438320499385977, + "loss": 3.073333168029785, + "step": 6610 + }, + { + "epoch": 13.8777166797591, + "learning_rate": 0.00044349670266749286, + "loss": 3.037291145324707, + "step": 6620 + }, + { + "epoch": 13.898664571877454, + "learning_rate": 0.0004431621143869969, + "loss": 3.0411745071411134, + "step": 6630 + }, + { + "epoch": 13.91961246399581, + "learning_rate": 0.00044282828223838727, + "loss": 3.148990440368652, + "step": 6640 + }, + { + "epoch": 13.940560356114165, + "learning_rate": 0.0004424952033779929, + "loss": 3.014286994934082, + "step": 6650 + }, + { + "epoch": 13.961508248232521, + "learning_rate": 0.00044216287497709253, + "loss": 3.0138343811035155, + "step": 6660 + }, + { + "epoch": 13.982456140350877, + "learning_rate": 0.0004418312942218139, + "loss": 2.9974302291870116, + "step": 6670 + }, + { + "epoch": 14.004189578423672, + "learning_rate": 0.0004415004583130336, + "loss": 3.1527809143066405, + "step": 6680 + }, + { + "epoch": 14.025137470542028, + "learning_rate": 0.0004411703644662778, + "loss": 2.9581697463989256, + "step": 6690 + }, + { + "epoch": 14.046085362660381, + "learning_rate": 0.00044084100991162385, + "loss": 3.0720396041870117, + "step": 6700 + }, + { + "epoch": 14.067033254778737, + "learning_rate": 0.00044051239189360286, + "loss": 3.0470098495483398, + "step": 6710 + }, + { + "epoch": 14.087981146897093, + "learning_rate": 0.00044018450767110235, + "loss": 3.0677566528320312, + "step": 6720 + }, + { + "epoch": 14.108929039015448, + "learning_rate": 0.0004398573545172709, + "loss": 3.110503005981445, + "step": 6730 + }, + { + "epoch": 14.129876931133804, + "learning_rate": 0.0004395309297194223, + "loss": 2.9687520980834963, + "step": 6740 + }, + { + "epoch": 14.15082482325216, + "learning_rate": 0.0004392052305789416, + "loss": 2.985172080993652, + "step": 6750 + }, + { + "epoch": 14.171772715370516, + "learning_rate": 0.0004388802544111908, + "loss": 3.0017110824584963, + "step": 6760 + }, + { + "epoch": 14.192720607488871, + "learning_rate": 0.0004385559985454165, + "loss": 3.041835403442383, + "step": 6770 + }, + { + "epoch": 14.213668499607227, + "learning_rate": 0.0004382324603246575, + "loss": 3.0984907150268555, + "step": 6780 + }, + { + "epoch": 14.234616391725583, + "learning_rate": 0.0004379096371056532, + "loss": 3.0736331939697266, + "step": 6790 + }, + { + "epoch": 14.255564283843938, + "learning_rate": 0.000437587526258753, + "loss": 2.9994585037231447, + "step": 6800 + }, + { + "epoch": 14.276512175962294, + "learning_rate": 0.0004372661251678265, + "loss": 3.1013252258300783, + "step": 6810 + }, + { + "epoch": 14.29746006808065, + "learning_rate": 0.00043694543123017407, + "loss": 3.102655220031738, + "step": 6820 + }, + { + "epoch": 14.318407960199005, + "learning_rate": 0.0004366254418564382, + "loss": 3.1105621337890623, + "step": 6830 + }, + { + "epoch": 14.339355852317361, + "learning_rate": 0.0004363061544705161, + "loss": 3.1368709564208985, + "step": 6840 + }, + { + "epoch": 14.360303744435717, + "learning_rate": 0.0004359875665094723, + "loss": 3.052720069885254, + "step": 6850 + }, + { + "epoch": 14.381251636554072, + "learning_rate": 0.00043566967542345227, + "loss": 2.977310562133789, + "step": 6860 + }, + { + "epoch": 14.402199528672428, + "learning_rate": 0.00043535247867559673, + "loss": 3.111159896850586, + "step": 6870 + }, + { + "epoch": 14.423147420790784, + "learning_rate": 0.00043503597374195665, + "loss": 2.959975814819336, + "step": 6880 + }, + { + "epoch": 14.444095312909138, + "learning_rate": 0.0004347201581114088, + "loss": 3.125636100769043, + "step": 6890 + }, + { + "epoch": 14.465043205027493, + "learning_rate": 0.0004344050292855724, + "loss": 2.9577571868896486, + "step": 6900 + }, + { + "epoch": 14.485991097145849, + "learning_rate": 0.00043409058477872554, + "loss": 3.101388931274414, + "step": 6910 + }, + { + "epoch": 14.506938989264205, + "learning_rate": 0.00043377682211772343, + "loss": 3.0661073684692384, + "step": 6920 + }, + { + "epoch": 14.52788688138256, + "learning_rate": 0.0004334637388419161, + "loss": 2.974909019470215, + "step": 6930 + }, + { + "epoch": 14.548834773500916, + "learning_rate": 0.0004331513325030681, + "loss": 3.0314458847045898, + "step": 6940 + }, + { + "epoch": 14.569782665619272, + "learning_rate": 0.0004328396006652773, + "loss": 3.1744915008544923, + "step": 6950 + }, + { + "epoch": 14.590730557737627, + "learning_rate": 0.00043252854090489564, + "loss": 3.06768741607666, + "step": 6960 + }, + { + "epoch": 14.611678449855983, + "learning_rate": 0.00043221815081044985, + "loss": 3.161996269226074, + "step": 6970 + }, + { + "epoch": 14.632626341974339, + "learning_rate": 0.00043190842798256285, + "loss": 3.102631378173828, + "step": 6980 + }, + { + "epoch": 14.653574234092694, + "learning_rate": 0.00043159937003387584, + "loss": 3.004058074951172, + "step": 6990 + }, + { + "epoch": 14.67452212621105, + "learning_rate": 0.00043129097458897135, + "loss": 3.135270118713379, + "step": 7000 + }, + { + "epoch": 14.695470018329406, + "learning_rate": 0.000430983239284296, + "loss": 3.0084003448486327, + "step": 7010 + }, + { + "epoch": 14.716417910447761, + "learning_rate": 0.0004306761617680849, + "loss": 2.9995773315429686, + "step": 7020 + }, + { + "epoch": 14.737365802566117, + "learning_rate": 0.00043036973970028583, + "loss": 3.000468444824219, + "step": 7030 + }, + { + "epoch": 14.758313694684473, + "learning_rate": 0.00043006397075248464, + "loss": 3.0801364898681642, + "step": 7040 + }, + { + "epoch": 14.779261586802829, + "learning_rate": 0.00042975885260783056, + "loss": 3.0909229278564454, + "step": 7050 + }, + { + "epoch": 14.800209478921184, + "learning_rate": 0.00042945438296096303, + "loss": 2.9928516387939452, + "step": 7060 + }, + { + "epoch": 14.82115737103954, + "learning_rate": 0.0004291505595179379, + "loss": 3.036148262023926, + "step": 7070 + }, + { + "epoch": 14.842105263157894, + "learning_rate": 0.0004288473799961553, + "loss": 3.0693193435668946, + "step": 7080 + }, + { + "epoch": 14.863053155276251, + "learning_rate": 0.0004285448421242875, + "loss": 3.0427278518676757, + "step": 7090 + }, + { + "epoch": 14.884001047394605, + "learning_rate": 0.00042824294364220724, + "loss": 2.9749155044555664, + "step": 7100 + }, + { + "epoch": 14.90494893951296, + "learning_rate": 0.0004279416823009172, + "loss": 2.9990673065185547, + "step": 7110 + }, + { + "epoch": 14.925896831631317, + "learning_rate": 0.0004276410558624791, + "loss": 3.091754913330078, + "step": 7120 + }, + { + "epoch": 14.946844723749672, + "learning_rate": 0.0004273410620999446, + "loss": 3.003107452392578, + "step": 7130 + }, + { + "epoch": 14.967792615868028, + "learning_rate": 0.0004270416987972853, + "loss": 3.023390007019043, + "step": 7140 + }, + { + "epoch": 14.988740507986384, + "learning_rate": 0.00042674296374932424, + "loss": 3.068536376953125, + "step": 7150 + }, + { + "epoch": 15.010473946059177, + "learning_rate": 0.0004264448547616681, + "loss": 3.245321273803711, + "step": 7160 + }, + { + "epoch": 15.031421838177533, + "learning_rate": 0.00042614736965063864, + "loss": 2.9358680725097654, + "step": 7170 + }, + { + "epoch": 15.052369730295888, + "learning_rate": 0.0004258505062432064, + "loss": 3.0279872894287108, + "step": 7180 + }, + { + "epoch": 15.073317622414244, + "learning_rate": 0.0004255542623769234, + "loss": 2.96344108581543, + "step": 7190 + }, + { + "epoch": 15.0942655145326, + "learning_rate": 0.00042525863589985727, + "loss": 3.1603927612304688, + "step": 7200 + }, + { + "epoch": 15.115213406650955, + "learning_rate": 0.00042496362467052564, + "loss": 3.0409677505493162, + "step": 7210 + }, + { + "epoch": 15.136161298769311, + "learning_rate": 0.00042466922655783073, + "loss": 3.154404067993164, + "step": 7220 + }, + { + "epoch": 15.157109190887667, + "learning_rate": 0.00042437543944099504, + "loss": 2.999993324279785, + "step": 7230 + }, + { + "epoch": 15.178057083006022, + "learning_rate": 0.00042408226120949674, + "loss": 2.962456703186035, + "step": 7240 + }, + { + "epoch": 15.199004975124378, + "learning_rate": 0.00042378968976300647, + "loss": 3.050062561035156, + "step": 7250 + }, + { + "epoch": 15.219952867242734, + "learning_rate": 0.00042349772301132377, + "loss": 3.058196258544922, + "step": 7260 + }, + { + "epoch": 15.24090075936109, + "learning_rate": 0.0004232063588743146, + "loss": 3.0242469787597654, + "step": 7270 + }, + { + "epoch": 15.261848651479445, + "learning_rate": 0.00042291559528184904, + "loss": 2.929056930541992, + "step": 7280 + }, + { + "epoch": 15.2827965435978, + "learning_rate": 0.0004226254301737393, + "loss": 3.0346649169921873, + "step": 7290 + }, + { + "epoch": 15.303744435716157, + "learning_rate": 0.0004223358614996787, + "loss": 3.079379081726074, + "step": 7300 + }, + { + "epoch": 15.324692327834512, + "learning_rate": 0.00042204688721918075, + "loss": 2.984081268310547, + "step": 7310 + }, + { + "epoch": 15.345640219952868, + "learning_rate": 0.0004217585053015187, + "loss": 3.025343322753906, + "step": 7320 + }, + { + "epoch": 15.366588112071224, + "learning_rate": 0.0004214707137256656, + "loss": 3.016037940979004, + "step": 7330 + }, + { + "epoch": 15.38753600418958, + "learning_rate": 0.0004211835104802349, + "loss": 3.1470058441162108, + "step": 7340 + }, + { + "epoch": 15.408483896307935, + "learning_rate": 0.00042089689356342115, + "loss": 3.007353591918945, + "step": 7350 + }, + { + "epoch": 15.429431788426289, + "learning_rate": 0.0004206108609829418, + "loss": 3.0402362823486326, + "step": 7360 + }, + { + "epoch": 15.450379680544645, + "learning_rate": 0.00042032541075597875, + "loss": 3.018893241882324, + "step": 7370 + }, + { + "epoch": 15.471327572663, + "learning_rate": 0.0004200405409091207, + "loss": 3.0513105392456055, + "step": 7380 + }, + { + "epoch": 15.492275464781356, + "learning_rate": 0.00041975624947830593, + "loss": 3.0438756942749023, + "step": 7390 + }, + { + "epoch": 15.513223356899712, + "learning_rate": 0.00041947253450876515, + "loss": 2.977249526977539, + "step": 7400 + }, + { + "epoch": 15.534171249018067, + "learning_rate": 0.00041918939405496546, + "loss": 2.956187629699707, + "step": 7410 + }, + { + "epoch": 15.555119141136423, + "learning_rate": 0.00041890682618055396, + "loss": 3.0582775115966796, + "step": 7420 + }, + { + "epoch": 15.576067033254779, + "learning_rate": 0.0004186248289583023, + "loss": 2.9948537826538084, + "step": 7430 + }, + { + "epoch": 15.597014925373134, + "learning_rate": 0.00041834340047005144, + "loss": 2.9837194442749024, + "step": 7440 + }, + { + "epoch": 15.61796281749149, + "learning_rate": 0.0004180625388066569, + "loss": 3.0729391098022463, + "step": 7450 + }, + { + "epoch": 15.638910709609846, + "learning_rate": 0.00041778224206793433, + "loss": 3.054386329650879, + "step": 7460 + }, + { + "epoch": 15.659858601728201, + "learning_rate": 0.00041750250836260536, + "loss": 3.102676582336426, + "step": 7470 + }, + { + "epoch": 15.680806493846557, + "learning_rate": 0.0004172233358082443, + "loss": 2.986006164550781, + "step": 7480 + }, + { + "epoch": 15.701754385964913, + "learning_rate": 0.00041694472253122467, + "loss": 3.0711380004882813, + "step": 7490 + }, + { + "epoch": 15.722702278083268, + "learning_rate": 0.00041666666666666664, + "loss": 2.9737503051757814, + "step": 7500 + }, + { + "epoch": 15.743650170201624, + "learning_rate": 0.0004163891663583843, + "loss": 3.030619812011719, + "step": 7510 + }, + { + "epoch": 15.76459806231998, + "learning_rate": 0.00041611221975883396, + "loss": 3.0626684188842774, + "step": 7520 + }, + { + "epoch": 15.785545954438335, + "learning_rate": 0.00041583582502906203, + "loss": 2.9612255096435547, + "step": 7530 + }, + { + "epoch": 15.806493846556691, + "learning_rate": 0.0004155599803386543, + "loss": 2.9540287017822267, + "step": 7540 + }, + { + "epoch": 15.827441738675045, + "learning_rate": 0.0004152846838656846, + "loss": 2.9890960693359374, + "step": 7550 + }, + { + "epoch": 15.8483896307934, + "learning_rate": 0.00041500993379666443, + "loss": 2.998134803771973, + "step": 7560 + }, + { + "epoch": 15.869337522911756, + "learning_rate": 0.0004147357283264927, + "loss": 3.046440315246582, + "step": 7570 + }, + { + "epoch": 15.890285415030112, + "learning_rate": 0.000414462065658406, + "loss": 2.9803043365478517, + "step": 7580 + }, + { + "epoch": 15.911233307148468, + "learning_rate": 0.0004141889440039292, + "loss": 3.0623497009277343, + "step": 7590 + }, + { + "epoch": 15.932181199266823, + "learning_rate": 0.00041391636158282614, + "loss": 2.926837921142578, + "step": 7600 + }, + { + "epoch": 15.953129091385179, + "learning_rate": 0.00041364431662305114, + "loss": 2.932399368286133, + "step": 7610 + }, + { + "epoch": 15.974076983503535, + "learning_rate": 0.0004133728073607005, + "loss": 2.996663284301758, + "step": 7620 + }, + { + "epoch": 15.99502487562189, + "learning_rate": 0.00041310183203996446, + "loss": 3.0678241729736326, + "step": 7630 + }, + { + "epoch": 16.016758313694684, + "learning_rate": 0.0004128313889130795, + "loss": 3.262166213989258, + "step": 7640 + }, + { + "epoch": 16.03770620581304, + "learning_rate": 0.0004125614762402809, + "loss": 3.072698402404785, + "step": 7650 + }, + { + "epoch": 16.058654097931395, + "learning_rate": 0.00041229209228975627, + "loss": 2.960147476196289, + "step": 7660 + }, + { + "epoch": 16.079601990049753, + "learning_rate": 0.000412023235337598, + "loss": 2.987987518310547, + "step": 7670 + }, + { + "epoch": 16.100549882168107, + "learning_rate": 0.00041175490366775766, + "loss": 2.9958822250366213, + "step": 7680 + }, + { + "epoch": 16.121497774286464, + "learning_rate": 0.0004114870955719997, + "loss": 3.0043949127197265, + "step": 7690 + }, + { + "epoch": 16.142445666404818, + "learning_rate": 0.00041121980934985563, + "loss": 3.013554573059082, + "step": 7700 + }, + { + "epoch": 16.163393558523175, + "learning_rate": 0.000410953043308579, + "loss": 3.0146947860717774, + "step": 7710 + }, + { + "epoch": 16.18434145064153, + "learning_rate": 0.0004106867957631001, + "loss": 2.9756107330322266, + "step": 7720 + }, + { + "epoch": 16.205289342759883, + "learning_rate": 0.00041042106503598165, + "loss": 2.998594284057617, + "step": 7730 + }, + { + "epoch": 16.22623723487824, + "learning_rate": 0.0004101558494573738, + "loss": 2.968126678466797, + "step": 7740 + }, + { + "epoch": 16.247185126996595, + "learning_rate": 0.0004098911473649706, + "loss": 2.938851737976074, + "step": 7750 + }, + { + "epoch": 16.268133019114952, + "learning_rate": 0.0004096269571039658, + "loss": 3.0778596878051756, + "step": 7760 + }, + { + "epoch": 16.289080911233306, + "learning_rate": 0.00040936327702701005, + "loss": 2.871398162841797, + "step": 7770 + }, + { + "epoch": 16.310028803351663, + "learning_rate": 0.00040910010549416687, + "loss": 2.9972572326660156, + "step": 7780 + }, + { + "epoch": 16.330976695470017, + "learning_rate": 0.0004088374408728706, + "loss": 3.1002374649047852, + "step": 7790 + }, + { + "epoch": 16.351924587588375, + "learning_rate": 0.0004085752815378834, + "loss": 2.9723093032836916, + "step": 7800 + }, + { + "epoch": 16.37287247970673, + "learning_rate": 0.0004083136258712532, + "loss": 3.0089197158813477, + "step": 7810 + }, + { + "epoch": 16.393820371825086, + "learning_rate": 0.0004080524722622717, + "loss": 2.9960916519165037, + "step": 7820 + }, + { + "epoch": 16.41476826394344, + "learning_rate": 0.00040779181910743294, + "loss": 2.948496437072754, + "step": 7830 + }, + { + "epoch": 16.435716156061797, + "learning_rate": 0.0004075316648103914, + "loss": 2.986690139770508, + "step": 7840 + }, + { + "epoch": 16.45666404818015, + "learning_rate": 0.0004072720077819216, + "loss": 3.064560317993164, + "step": 7850 + }, + { + "epoch": 16.47761194029851, + "learning_rate": 0.0004070128464398768, + "loss": 3.0366847991943358, + "step": 7860 + }, + { + "epoch": 16.498559832416863, + "learning_rate": 0.0004067541792091489, + "loss": 3.0212535858154297, + "step": 7870 + }, + { + "epoch": 16.51950772453522, + "learning_rate": 0.0004064960045216279, + "loss": 2.966229057312012, + "step": 7880 + }, + { + "epoch": 16.540455616653574, + "learning_rate": 0.0004062383208161624, + "loss": 2.9718713760375977, + "step": 7890 + }, + { + "epoch": 16.56140350877193, + "learning_rate": 0.0004059811265385193, + "loss": 2.938900947570801, + "step": 7900 + }, + { + "epoch": 16.582351400890285, + "learning_rate": 0.00040572442014134516, + "loss": 3.0135732650756837, + "step": 7910 + }, + { + "epoch": 16.60329929300864, + "learning_rate": 0.00040546820008412654, + "loss": 3.016792869567871, + "step": 7920 + }, + { + "epoch": 16.624247185126997, + "learning_rate": 0.0004052124648331515, + "loss": 2.961100387573242, + "step": 7930 + }, + { + "epoch": 16.64519507724535, + "learning_rate": 0.00040495721286147086, + "loss": 2.9855838775634767, + "step": 7940 + }, + { + "epoch": 16.666142969363708, + "learning_rate": 0.00040470244264886006, + "loss": 3.0260711669921876, + "step": 7950 + }, + { + "epoch": 16.687090861482062, + "learning_rate": 0.00040444815268178097, + "loss": 2.9670747756958007, + "step": 7960 + }, + { + "epoch": 16.70803875360042, + "learning_rate": 0.00040419434145334414, + "loss": 2.9868255615234376, + "step": 7970 + }, + { + "epoch": 16.728986645718773, + "learning_rate": 0.00040394100746327154, + "loss": 2.993141746520996, + "step": 7980 + }, + { + "epoch": 16.74993453783713, + "learning_rate": 0.0004036881492178589, + "loss": 2.991754722595215, + "step": 7990 + }, + { + "epoch": 16.770882429955485, + "learning_rate": 0.00040343576522993926, + "loss": 3.0531938552856444, + "step": 8000 + }, + { + "epoch": 16.791830322073842, + "learning_rate": 0.00040318385401884554, + "loss": 2.8399303436279295, + "step": 8010 + }, + { + "epoch": 16.812778214192196, + "learning_rate": 0.00040293241411037484, + "loss": 3.0251434326171873, + "step": 8020 + }, + { + "epoch": 16.833726106310554, + "learning_rate": 0.00040268144403675154, + "loss": 2.9438486099243164, + "step": 8030 + }, + { + "epoch": 16.854673998428908, + "learning_rate": 0.0004024309423365915, + "loss": 2.9782060623168944, + "step": 8040 + }, + { + "epoch": 16.875621890547265, + "learning_rate": 0.0004021809075548668, + "loss": 2.972634696960449, + "step": 8050 + }, + { + "epoch": 16.89656978266562, + "learning_rate": 0.0004019313382428694, + "loss": 2.979868507385254, + "step": 8060 + }, + { + "epoch": 16.917517674783976, + "learning_rate": 0.00040168223295817656, + "loss": 2.990520477294922, + "step": 8070 + }, + { + "epoch": 16.93846556690233, + "learning_rate": 0.00040143359026461554, + "loss": 3.0180835723876953, + "step": 8080 + }, + { + "epoch": 16.959413459020688, + "learning_rate": 0.000401185408732229, + "loss": 3.0260868072509766, + "step": 8090 + }, + { + "epoch": 16.98036135113904, + "learning_rate": 0.0004009376869372401, + "loss": 2.9574857711791993, + "step": 8100 + }, + { + "epoch": 17.002094789211835, + "learning_rate": 0.00040069042346201864, + "loss": 3.0914968490600585, + "step": 8110 + }, + { + "epoch": 17.023042681330192, + "learning_rate": 0.00040044361689504655, + "loss": 3.062566947937012, + "step": 8120 + }, + { + "epoch": 17.043990573448546, + "learning_rate": 0.0004001972658308847, + "loss": 2.976962661743164, + "step": 8130 + }, + { + "epoch": 17.064938465566904, + "learning_rate": 0.0003999513688701383, + "loss": 2.9750572204589845, + "step": 8140 + }, + { + "epoch": 17.085886357685258, + "learning_rate": 0.00039970592461942457, + "loss": 2.987382698059082, + "step": 8150 + }, + { + "epoch": 17.106834249803615, + "learning_rate": 0.00039946093169133874, + "loss": 3.001695442199707, + "step": 8160 + }, + { + "epoch": 17.12778214192197, + "learning_rate": 0.0003992163887044217, + "loss": 2.961598777770996, + "step": 8170 + }, + { + "epoch": 17.148730034040323, + "learning_rate": 0.0003989722942831268, + "loss": 2.957429313659668, + "step": 8180 + }, + { + "epoch": 17.16967792615868, + "learning_rate": 0.0003987286470577879, + "loss": 3.0364120483398436, + "step": 8190 + }, + { + "epoch": 17.190625818277034, + "learning_rate": 0.0003984854456645864, + "loss": 3.0673593521118163, + "step": 8200 + }, + { + "epoch": 17.211573710395392, + "learning_rate": 0.0003982426887455199, + "loss": 2.9361265182495115, + "step": 8210 + }, + { + "epoch": 17.232521602513746, + "learning_rate": 0.00039800037494836985, + "loss": 3.001542854309082, + "step": 8220 + }, + { + "epoch": 17.253469494632103, + "learning_rate": 0.00039775850292667005, + "loss": 2.95641975402832, + "step": 8230 + }, + { + "epoch": 17.274417386750457, + "learning_rate": 0.0003975170713396753, + "loss": 2.8832208633422853, + "step": 8240 + }, + { + "epoch": 17.295365278868815, + "learning_rate": 0.0003972760788523301, + "loss": 2.9211734771728515, + "step": 8250 + }, + { + "epoch": 17.31631317098717, + "learning_rate": 0.0003970355241352378, + "loss": 2.9908830642700197, + "step": 8260 + }, + { + "epoch": 17.337261063105526, + "learning_rate": 0.00039679540586462953, + "loss": 2.991852951049805, + "step": 8270 + }, + { + "epoch": 17.35820895522388, + "learning_rate": 0.00039655572272233384, + "loss": 3.0602521896362305, + "step": 8280 + }, + { + "epoch": 17.379156847342237, + "learning_rate": 0.0003963164733957462, + "loss": 2.975466728210449, + "step": 8290 + }, + { + "epoch": 17.40010473946059, + "learning_rate": 0.00039607765657779864, + "loss": 3.0946418762207033, + "step": 8300 + }, + { + "epoch": 17.42105263157895, + "learning_rate": 0.0003958392709669304, + "loss": 2.993026924133301, + "step": 8310 + }, + { + "epoch": 17.442000523697303, + "learning_rate": 0.00039560131526705723, + "loss": 3.0121936798095703, + "step": 8320 + }, + { + "epoch": 17.46294841581566, + "learning_rate": 0.0003953637881875425, + "loss": 3.0414730072021485, + "step": 8330 + }, + { + "epoch": 17.483896307934014, + "learning_rate": 0.0003951266884431675, + "loss": 3.0235416412353517, + "step": 8340 + }, + { + "epoch": 17.50484420005237, + "learning_rate": 0.00039489001475410214, + "loss": 2.9818603515625, + "step": 8350 + }, + { + "epoch": 17.525792092170725, + "learning_rate": 0.00039465376584587626, + "loss": 2.994624137878418, + "step": 8360 + }, + { + "epoch": 17.54673998428908, + "learning_rate": 0.00039441794044935054, + "loss": 3.0084808349609373, + "step": 8370 + }, + { + "epoch": 17.567687876407437, + "learning_rate": 0.00039418253730068797, + "loss": 2.8967424392700196, + "step": 8380 + }, + { + "epoch": 17.58863576852579, + "learning_rate": 0.0003939475551413253, + "loss": 2.9253704071044924, + "step": 8390 + }, + { + "epoch": 17.609583660644148, + "learning_rate": 0.000393712992717945, + "loss": 3.0752674102783204, + "step": 8400 + }, + { + "epoch": 17.630531552762502, + "learning_rate": 0.0003934788487824469, + "loss": 2.925820159912109, + "step": 8410 + }, + { + "epoch": 17.65147944488086, + "learning_rate": 0.0003932451220919205, + "loss": 2.9983007431030275, + "step": 8420 + }, + { + "epoch": 17.672427336999213, + "learning_rate": 0.0003930118114086172, + "loss": 2.884238624572754, + "step": 8430 + }, + { + "epoch": 17.69337522911757, + "learning_rate": 0.00039277891549992266, + "loss": 2.854781723022461, + "step": 8440 + }, + { + "epoch": 17.714323121235925, + "learning_rate": 0.0003925464331383298, + "loss": 2.9886890411376954, + "step": 8450 + }, + { + "epoch": 17.735271013354282, + "learning_rate": 0.00039231436310141113, + "loss": 3.0236677169799804, + "step": 8460 + }, + { + "epoch": 17.756218905472636, + "learning_rate": 0.00039208270417179214, + "loss": 2.951685905456543, + "step": 8470 + }, + { + "epoch": 17.777166797590993, + "learning_rate": 0.0003918514551371243, + "loss": 2.971786880493164, + "step": 8480 + }, + { + "epoch": 17.798114689709347, + "learning_rate": 0.0003916206147900585, + "loss": 2.983307647705078, + "step": 8490 + }, + { + "epoch": 17.819062581827705, + "learning_rate": 0.00039139018192821845, + "loss": 2.9782459259033205, + "step": 8500 + }, + { + "epoch": 17.84001047394606, + "learning_rate": 0.00039116015535417445, + "loss": 3.02642765045166, + "step": 8510 + }, + { + "epoch": 17.860958366064416, + "learning_rate": 0.00039093053387541745, + "loss": 3.011845588684082, + "step": 8520 + }, + { + "epoch": 17.88190625818277, + "learning_rate": 0.00039070131630433274, + "loss": 2.939919090270996, + "step": 8530 + }, + { + "epoch": 17.902854150301128, + "learning_rate": 0.00039047250145817424, + "loss": 2.996026039123535, + "step": 8540 + }, + { + "epoch": 17.92380204241948, + "learning_rate": 0.00039024408815903914, + "loss": 2.903793716430664, + "step": 8550 + }, + { + "epoch": 17.94474993453784, + "learning_rate": 0.0003900160752338421, + "loss": 2.9599498748779296, + "step": 8560 + }, + { + "epoch": 17.965697826656193, + "learning_rate": 0.00038978846151429, + "loss": 2.841645050048828, + "step": 8570 + }, + { + "epoch": 17.986645718774547, + "learning_rate": 0.0003895612458368572, + "loss": 2.885163497924805, + "step": 8580 + }, + { + "epoch": 18.008379156847344, + "learning_rate": 0.00038933442704275974, + "loss": 2.9911325454711912, + "step": 8590 + }, + { + "epoch": 18.029327048965698, + "learning_rate": 0.0003891080039779314, + "loss": 3.027914619445801, + "step": 8600 + }, + { + "epoch": 18.050274941084055, + "learning_rate": 0.0003888819754929986, + "loss": 2.9694196701049806, + "step": 8610 + }, + { + "epoch": 18.07122283320241, + "learning_rate": 0.0003886563404432558, + "loss": 2.9900096893310546, + "step": 8620 + }, + { + "epoch": 18.092170725320763, + "learning_rate": 0.0003884310976886414, + "loss": 2.987308692932129, + "step": 8630 + }, + { + "epoch": 18.11311861743912, + "learning_rate": 0.0003882062460937135, + "loss": 2.934325408935547, + "step": 8640 + }, + { + "epoch": 18.134066509557474, + "learning_rate": 0.0003879817845276255, + "loss": 2.9511764526367186, + "step": 8650 + }, + { + "epoch": 18.15501440167583, + "learning_rate": 0.0003877577118641029, + "loss": 2.9479455947875977, + "step": 8660 + }, + { + "epoch": 18.175962293794186, + "learning_rate": 0.00038753402698141903, + "loss": 3.0447383880615235, + "step": 8670 + }, + { + "epoch": 18.196910185912543, + "learning_rate": 0.0003873107287623715, + "loss": 2.917817497253418, + "step": 8680 + }, + { + "epoch": 18.217858078030897, + "learning_rate": 0.00038708781609425905, + "loss": 2.8964914321899413, + "step": 8690 + }, + { + "epoch": 18.238805970149254, + "learning_rate": 0.000386865287868858, + "loss": 2.967067527770996, + "step": 8700 + }, + { + "epoch": 18.25975386226761, + "learning_rate": 0.0003866431429823993, + "loss": 2.991856002807617, + "step": 8710 + }, + { + "epoch": 18.280701754385966, + "learning_rate": 0.00038642138033554525, + "loss": 2.96053524017334, + "step": 8720 + }, + { + "epoch": 18.30164964650432, + "learning_rate": 0.00038619999883336703, + "loss": 2.9373369216918945, + "step": 8730 + }, + { + "epoch": 18.322597538622677, + "learning_rate": 0.0003859789973853217, + "loss": 2.9254953384399416, + "step": 8740 + }, + { + "epoch": 18.34354543074103, + "learning_rate": 0.0003857583749052298, + "loss": 3.008597564697266, + "step": 8750 + }, + { + "epoch": 18.36449332285939, + "learning_rate": 0.0003855381303112527, + "loss": 2.9206886291503906, + "step": 8760 + }, + { + "epoch": 18.385441214977742, + "learning_rate": 0.0003853182625258708, + "loss": 2.9910358428955077, + "step": 8770 + }, + { + "epoch": 18.4063891070961, + "learning_rate": 0.0003850987704758608, + "loss": 2.894259452819824, + "step": 8780 + }, + { + "epoch": 18.427336999214454, + "learning_rate": 0.00038487965309227413, + "loss": 3.0239398956298826, + "step": 8790 + }, + { + "epoch": 18.44828489133281, + "learning_rate": 0.0003846609093104148, + "loss": 3.023584747314453, + "step": 8800 + }, + { + "epoch": 18.469232783451165, + "learning_rate": 0.00038444253806981784, + "loss": 2.9302574157714845, + "step": 8810 + }, + { + "epoch": 18.49018067556952, + "learning_rate": 0.00038422453831422784, + "loss": 2.968699264526367, + "step": 8820 + }, + { + "epoch": 18.511128567687877, + "learning_rate": 0.0003840069089915771, + "loss": 2.904973793029785, + "step": 8830 + }, + { + "epoch": 18.53207645980623, + "learning_rate": 0.00038378964905396454, + "loss": 2.9212614059448243, + "step": 8840 + }, + { + "epoch": 18.553024351924588, + "learning_rate": 0.00038357275745763475, + "loss": 2.9412769317626952, + "step": 8850 + }, + { + "epoch": 18.573972244042942, + "learning_rate": 0.0003833562331629563, + "loss": 2.926407814025879, + "step": 8860 + }, + { + "epoch": 18.5949201361613, + "learning_rate": 0.0003831400751344014, + "loss": 2.87393741607666, + "step": 8870 + }, + { + "epoch": 18.615868028279653, + "learning_rate": 0.00038292428234052486, + "loss": 2.93045597076416, + "step": 8880 + }, + { + "epoch": 18.63681592039801, + "learning_rate": 0.0003827088537539434, + "loss": 2.8498041152954103, + "step": 8890 + }, + { + "epoch": 18.657763812516365, + "learning_rate": 0.00038249378835131535, + "loss": 2.980220603942871, + "step": 8900 + }, + { + "epoch": 18.678711704634722, + "learning_rate": 0.0003822790851133196, + "loss": 2.9403018951416016, + "step": 8910 + }, + { + "epoch": 18.699659596753076, + "learning_rate": 0.00038206474302463617, + "loss": 2.932261848449707, + "step": 8920 + }, + { + "epoch": 18.720607488871433, + "learning_rate": 0.00038185076107392544, + "loss": 3.0572792053222657, + "step": 8930 + }, + { + "epoch": 18.741555380989787, + "learning_rate": 0.0003816371382538082, + "loss": 2.8232454299926757, + "step": 8940 + }, + { + "epoch": 18.762503273108145, + "learning_rate": 0.0003814238735608459, + "loss": 2.9247211456298827, + "step": 8950 + }, + { + "epoch": 18.7834511652265, + "learning_rate": 0.0003812109659955207, + "loss": 2.9309356689453123, + "step": 8960 + }, + { + "epoch": 18.804399057344856, + "learning_rate": 0.00038099841456221617, + "loss": 2.9406196594238283, + "step": 8970 + }, + { + "epoch": 18.82534694946321, + "learning_rate": 0.0003807862182691969, + "loss": 2.9863868713378907, + "step": 8980 + }, + { + "epoch": 18.846294841581567, + "learning_rate": 0.00038057437612859003, + "loss": 2.9020156860351562, + "step": 8990 + }, + { + "epoch": 18.86724273369992, + "learning_rate": 0.0003803628871563653, + "loss": 2.8909185409545897, + "step": 9000 + }, + { + "epoch": 18.888190625818275, + "learning_rate": 0.0003801517503723161, + "loss": 2.905278205871582, + "step": 9010 + }, + { + "epoch": 18.909138517936633, + "learning_rate": 0.00037994096480004037, + "loss": 2.971329689025879, + "step": 9020 + }, + { + "epoch": 18.930086410054987, + "learning_rate": 0.0003797305294669214, + "loss": 2.9014846801757814, + "step": 9030 + }, + { + "epoch": 18.951034302173344, + "learning_rate": 0.00037952044340410954, + "loss": 2.919228363037109, + "step": 9040 + }, + { + "epoch": 18.971982194291698, + "learning_rate": 0.00037931070564650276, + "loss": 2.9611515045166015, + "step": 9050 + }, + { + "epoch": 18.992930086410055, + "learning_rate": 0.0003791013152327286, + "loss": 3.06106014251709, + "step": 9060 + }, + { + "epoch": 19.01466352448285, + "learning_rate": 0.00037889227120512545, + "loss": 3.0117502212524414, + "step": 9070 + }, + { + "epoch": 19.035611416601206, + "learning_rate": 0.0003786835726097239, + "loss": 2.908797836303711, + "step": 9080 + }, + { + "epoch": 19.05655930871956, + "learning_rate": 0.00037847521849622895, + "loss": 2.977317047119141, + "step": 9090 + }, + { + "epoch": 19.077507200837914, + "learning_rate": 0.0003782672079180015, + "loss": 2.982106018066406, + "step": 9100 + }, + { + "epoch": 19.09845509295627, + "learning_rate": 0.0003780595399320404, + "loss": 2.9336454391479494, + "step": 9110 + }, + { + "epoch": 19.119402985074625, + "learning_rate": 0.00037785221359896444, + "loss": 2.9511249542236326, + "step": 9120 + }, + { + "epoch": 19.140350877192983, + "learning_rate": 0.00037764522798299443, + "loss": 2.9214210510253906, + "step": 9130 + }, + { + "epoch": 19.161298769311337, + "learning_rate": 0.0003774385821519358, + "loss": 2.9141027450561525, + "step": 9140 + }, + { + "epoch": 19.182246661429694, + "learning_rate": 0.0003772322751771605, + "loss": 3.0709518432617187, + "step": 9150 + }, + { + "epoch": 19.203194553548048, + "learning_rate": 0.00037702630613358986, + "loss": 2.9491186141967773, + "step": 9160 + }, + { + "epoch": 19.224142445666406, + "learning_rate": 0.0003768206740996769, + "loss": 2.9979949951171876, + "step": 9170 + }, + { + "epoch": 19.24509033778476, + "learning_rate": 0.00037661537815738915, + "loss": 2.9300355911254883, + "step": 9180 + }, + { + "epoch": 19.266038229903117, + "learning_rate": 0.00037641041739219143, + "loss": 2.95788631439209, + "step": 9190 + }, + { + "epoch": 19.28698612202147, + "learning_rate": 0.00037620579089302876, + "loss": 2.960785675048828, + "step": 9200 + }, + { + "epoch": 19.30793401413983, + "learning_rate": 0.0003760014977523091, + "loss": 2.9440664291381835, + "step": 9210 + }, + { + "epoch": 19.328881906258182, + "learning_rate": 0.00037579753706588697, + "loss": 2.9864282608032227, + "step": 9220 + }, + { + "epoch": 19.34982979837654, + "learning_rate": 0.00037559390793304604, + "loss": 2.830784225463867, + "step": 9230 + }, + { + "epoch": 19.370777690494894, + "learning_rate": 0.00037539060945648286, + "loss": 2.981433868408203, + "step": 9240 + }, + { + "epoch": 19.39172558261325, + "learning_rate": 0.00037518764074229014, + "loss": 2.8778303146362303, + "step": 9250 + }, + { + "epoch": 19.412673474731605, + "learning_rate": 0.00037498500089994, + "loss": 2.896072006225586, + "step": 9260 + }, + { + "epoch": 19.433621366849962, + "learning_rate": 0.00037478268904226795, + "loss": 2.962319564819336, + "step": 9270 + }, + { + "epoch": 19.454569258968316, + "learning_rate": 0.00037458070428545635, + "loss": 2.9110170364379884, + "step": 9280 + }, + { + "epoch": 19.47551715108667, + "learning_rate": 0.00037437904574901817, + "loss": 2.928105926513672, + "step": 9290 + }, + { + "epoch": 19.496465043205028, + "learning_rate": 0.00037417771255578104, + "loss": 2.91275577545166, + "step": 9300 + }, + { + "epoch": 19.51741293532338, + "learning_rate": 0.00037397670383187097, + "loss": 2.964938163757324, + "step": 9310 + }, + { + "epoch": 19.53836082744174, + "learning_rate": 0.0003737760187066967, + "loss": 2.953006935119629, + "step": 9320 + }, + { + "epoch": 19.559308719560093, + "learning_rate": 0.00037357565631293365, + "loss": 3.108722686767578, + "step": 9330 + }, + { + "epoch": 19.58025661167845, + "learning_rate": 0.00037337561578650833, + "loss": 3.078016471862793, + "step": 9340 + }, + { + "epoch": 19.601204503796804, + "learning_rate": 0.00037317589626658255, + "loss": 3.006759262084961, + "step": 9350 + }, + { + "epoch": 19.622152395915162, + "learning_rate": 0.0003729764968955379, + "loss": 2.9581228256225587, + "step": 9360 + }, + { + "epoch": 19.643100288033516, + "learning_rate": 0.00037277741681896045, + "loss": 3.199405860900879, + "step": 9370 + }, + { + "epoch": 19.664048180151873, + "learning_rate": 0.0003725786551856251, + "loss": 3.039951133728027, + "step": 9380 + }, + { + "epoch": 19.684996072270227, + "learning_rate": 0.0003723802111474804, + "loss": 2.99322566986084, + "step": 9390 + }, + { + "epoch": 19.705943964388585, + "learning_rate": 0.0003721820838596335, + "loss": 2.9919605255126953, + "step": 9400 + }, + { + "epoch": 19.72689185650694, + "learning_rate": 0.00037198427248033485, + "loss": 2.9185922622680662, + "step": 9410 + }, + { + "epoch": 19.747839748625296, + "learning_rate": 0.00037178677617096337, + "loss": 2.927593994140625, + "step": 9420 + }, + { + "epoch": 19.76878764074365, + "learning_rate": 0.0003715895940960111, + "loss": 2.9274175643920897, + "step": 9430 + }, + { + "epoch": 19.789735532862007, + "learning_rate": 0.000371392725423069, + "loss": 2.9191694259643555, + "step": 9440 + }, + { + "epoch": 19.81068342498036, + "learning_rate": 0.00037119616932281165, + "loss": 3.002157974243164, + "step": 9450 + }, + { + "epoch": 19.83163131709872, + "learning_rate": 0.00037099992496898276, + "loss": 2.9765802383422852, + "step": 9460 + }, + { + "epoch": 19.852579209217073, + "learning_rate": 0.00037080399153838065, + "loss": 2.983877182006836, + "step": 9470 + }, + { + "epoch": 19.873527101335426, + "learning_rate": 0.00037060836821084373, + "loss": 2.9292572021484373, + "step": 9480 + }, + { + "epoch": 19.894474993453784, + "learning_rate": 0.00037041305416923604, + "loss": 2.9650571823120115, + "step": 9490 + }, + { + "epoch": 19.915422885572138, + "learning_rate": 0.0003702180485994327, + "loss": 2.9420921325683596, + "step": 9500 + }, + { + "epoch": 19.936370777690495, + "learning_rate": 0.00037002335069030614, + "loss": 2.8849225997924806, + "step": 9510 + }, + { + "epoch": 19.95731866980885, + "learning_rate": 0.0003698289596337116, + "loss": 2.882498550415039, + "step": 9520 + }, + { + "epoch": 19.978266561927207, + "learning_rate": 0.00036963487462447303, + "loss": 2.9787607192993164, + "step": 9530 + }, + { + "epoch": 19.99921445404556, + "learning_rate": 0.0003694410948603691, + "loss": 2.93823299407959, + "step": 9540 + }, + { + "epoch": 20.020947892118354, + "learning_rate": 0.00036924761954211944, + "loss": 3.033322334289551, + "step": 9550 + }, + { + "epoch": 20.04189578423671, + "learning_rate": 0.0003690544478733707, + "loss": 2.9187992095947264, + "step": 9560 + }, + { + "epoch": 20.062843676355065, + "learning_rate": 0.0003688615790606828, + "loss": 2.8914941787719726, + "step": 9570 + }, + { + "epoch": 20.083791568473423, + "learning_rate": 0.000368669012313515, + "loss": 2.8893537521362305, + "step": 9580 + }, + { + "epoch": 20.104739460591777, + "learning_rate": 0.0003684767468442126, + "loss": 2.906744384765625, + "step": 9590 + }, + { + "epoch": 20.125687352710134, + "learning_rate": 0.0003682847818679935, + "loss": 2.8719203948974608, + "step": 9600 + }, + { + "epoch": 20.146635244828488, + "learning_rate": 0.0003680931166029342, + "loss": 2.956478500366211, + "step": 9610 + }, + { + "epoch": 20.167583136946845, + "learning_rate": 0.000367901750269957, + "loss": 3.0235532760620116, + "step": 9620 + }, + { + "epoch": 20.1885310290652, + "learning_rate": 0.00036771068209281657, + "loss": 2.9067535400390625, + "step": 9630 + }, + { + "epoch": 20.209478921183557, + "learning_rate": 0.0003675199112980863, + "loss": 3.0424097061157225, + "step": 9640 + }, + { + "epoch": 20.23042681330191, + "learning_rate": 0.0003673294371151458, + "loss": 2.873898506164551, + "step": 9650 + }, + { + "epoch": 20.251374705420268, + "learning_rate": 0.0003671392587761674, + "loss": 2.9879985809326173, + "step": 9660 + }, + { + "epoch": 20.272322597538622, + "learning_rate": 0.0003669493755161031, + "loss": 2.9691110610961915, + "step": 9670 + }, + { + "epoch": 20.29327048965698, + "learning_rate": 0.00036675978657267204, + "loss": 2.8997966766357424, + "step": 9680 + }, + { + "epoch": 20.314218381775333, + "learning_rate": 0.00036657049118634733, + "loss": 3.0071743011474608, + "step": 9690 + }, + { + "epoch": 20.33516627389369, + "learning_rate": 0.0003663814886003432, + "loss": 2.9313344955444336, + "step": 9700 + }, + { + "epoch": 20.356114166012045, + "learning_rate": 0.00036619277806060276, + "loss": 2.9554468154907227, + "step": 9710 + }, + { + "epoch": 20.377062058130402, + "learning_rate": 0.0003660043588157846, + "loss": 2.9395862579345704, + "step": 9720 + }, + { + "epoch": 20.398009950248756, + "learning_rate": 0.00036581623011725114, + "loss": 2.8985124588012696, + "step": 9730 + }, + { + "epoch": 20.41895784236711, + "learning_rate": 0.0003656283912190554, + "loss": 2.92258243560791, + "step": 9740 + }, + { + "epoch": 20.439905734485468, + "learning_rate": 0.00036544084137792883, + "loss": 2.9244316101074217, + "step": 9750 + }, + { + "epoch": 20.46085362660382, + "learning_rate": 0.00036525357985326903, + "loss": 2.9558000564575195, + "step": 9760 + }, + { + "epoch": 20.48180151872218, + "learning_rate": 0.0003650666059071275, + "loss": 2.84803466796875, + "step": 9770 + }, + { + "epoch": 20.502749410840533, + "learning_rate": 0.00036487991880419725, + "loss": 2.9493398666381836, + "step": 9780 + }, + { + "epoch": 20.52369730295889, + "learning_rate": 0.00036469351781180073, + "loss": 2.925904083251953, + "step": 9790 + }, + { + "epoch": 20.544645195077244, + "learning_rate": 0.00036450740219987765, + "loss": 2.8513689041137695, + "step": 9800 + }, + { + "epoch": 20.5655930871956, + "learning_rate": 0.0003643215712409734, + "loss": 2.9981321334838866, + "step": 9810 + }, + { + "epoch": 20.586540979313956, + "learning_rate": 0.00036413602421022653, + "loss": 2.8937055587768556, + "step": 9820 + }, + { + "epoch": 20.607488871432313, + "learning_rate": 0.0003639507603853572, + "loss": 3.000600814819336, + "step": 9830 + }, + { + "epoch": 20.628436763550667, + "learning_rate": 0.00036376577904665525, + "loss": 2.8795480728149414, + "step": 9840 + }, + { + "epoch": 20.649384655669024, + "learning_rate": 0.00036358107947696876, + "loss": 2.8654504776000977, + "step": 9850 + }, + { + "epoch": 20.67033254778738, + "learning_rate": 0.0003633966609616919, + "loss": 2.9193105697631836, + "step": 9860 + }, + { + "epoch": 20.691280439905736, + "learning_rate": 0.00036321252278875344, + "loss": 3.000343894958496, + "step": 9870 + }, + { + "epoch": 20.71222833202409, + "learning_rate": 0.00036302866424860566, + "loss": 2.9843284606933596, + "step": 9880 + }, + { + "epoch": 20.733176224142447, + "learning_rate": 0.00036284508463421217, + "loss": 2.9358942031860353, + "step": 9890 + }, + { + "epoch": 20.7541241162608, + "learning_rate": 0.0003626617832410371, + "loss": 2.9408638000488283, + "step": 9900 + }, + { + "epoch": 20.77507200837916, + "learning_rate": 0.00036247875936703335, + "loss": 2.9035417556762697, + "step": 9910 + }, + { + "epoch": 20.796019900497512, + "learning_rate": 0.00036229601231263145, + "loss": 2.8692134857177733, + "step": 9920 + }, + { + "epoch": 20.81696779261587, + "learning_rate": 0.0003621135413807282, + "loss": 2.8154270172119142, + "step": 9930 + }, + { + "epoch": 20.837915684734224, + "learning_rate": 0.0003619313458766758, + "loss": 2.905722427368164, + "step": 9940 + }, + { + "epoch": 20.858863576852578, + "learning_rate": 0.0003617494251082704, + "loss": 2.9300207138061523, + "step": 9950 + }, + { + "epoch": 20.879811468970935, + "learning_rate": 0.0003615677783857413, + "loss": 2.9284923553466795, + "step": 9960 + }, + { + "epoch": 20.90075936108929, + "learning_rate": 0.0003613864050217397, + "loss": 2.8830732345581054, + "step": 9970 + }, + { + "epoch": 20.921707253207646, + "learning_rate": 0.0003612053043313283, + "loss": 2.922040557861328, + "step": 9980 + }, + { + "epoch": 20.942655145326, + "learning_rate": 0.0003610244756319697, + "loss": 2.919090461730957, + "step": 9990 + }, + { + "epoch": 20.963603037444358, + "learning_rate": 0.00036084391824351607, + "loss": 2.9119571685791015, + "step": 10000 + }, + { + "epoch": 20.98455092956271, + "learning_rate": 0.00036066363148819854, + "loss": 3.0065305709838865, + "step": 10010 + }, + { + "epoch": 21.006284367635505, + "learning_rate": 0.000360483614690616, + "loss": 3.088846206665039, + "step": 10020 + }, + { + "epoch": 21.027232259753863, + "learning_rate": 0.00036030386717772494, + "loss": 2.887124443054199, + "step": 10030 + }, + { + "epoch": 21.048180151872216, + "learning_rate": 0.0003601243882788286, + "loss": 2.9657873153686523, + "step": 10040 + }, + { + "epoch": 21.069128043990574, + "learning_rate": 0.0003599451773255667, + "loss": 2.8971757888793945, + "step": 10050 + }, + { + "epoch": 21.090075936108928, + "learning_rate": 0.00035976623365190465, + "loss": 2.8915260314941404, + "step": 10060 + }, + { + "epoch": 21.111023828227285, + "learning_rate": 0.0003595875565941235, + "loss": 2.9463220596313477, + "step": 10070 + }, + { + "epoch": 21.13197172034564, + "learning_rate": 0.00035940914549080944, + "loss": 2.9156425476074217, + "step": 10080 + }, + { + "epoch": 21.152919612463997, + "learning_rate": 0.0003592309996828435, + "loss": 2.996392250061035, + "step": 10090 + }, + { + "epoch": 21.17386750458235, + "learning_rate": 0.0003590531185133913, + "loss": 2.899692344665527, + "step": 10100 + }, + { + "epoch": 21.194815396700708, + "learning_rate": 0.0003588755013278929, + "loss": 2.9242908477783205, + "step": 10110 + }, + { + "epoch": 21.215763288819062, + "learning_rate": 0.00035869814747405306, + "loss": 2.846599578857422, + "step": 10120 + }, + { + "epoch": 21.23671118093742, + "learning_rate": 0.00035852105630183027, + "loss": 2.9692026138305665, + "step": 10130 + }, + { + "epoch": 21.257659073055773, + "learning_rate": 0.0003583442271634278, + "loss": 2.921228790283203, + "step": 10140 + }, + { + "epoch": 21.27860696517413, + "learning_rate": 0.000358167659413283, + "loss": 2.906037139892578, + "step": 10150 + }, + { + "epoch": 21.299554857292485, + "learning_rate": 0.00035799135240805765, + "loss": 2.8459890365600584, + "step": 10160 + }, + { + "epoch": 21.320502749410842, + "learning_rate": 0.0003578153055066282, + "loss": 2.839315986633301, + "step": 10170 + }, + { + "epoch": 21.341450641529196, + "learning_rate": 0.00035763951807007597, + "loss": 2.918286895751953, + "step": 10180 + }, + { + "epoch": 21.36239853364755, + "learning_rate": 0.0003574639894616771, + "loss": 2.9180910110473635, + "step": 10190 + }, + { + "epoch": 21.383346425765907, + "learning_rate": 0.0003572887190468934, + "loss": 2.96252498626709, + "step": 10200 + }, + { + "epoch": 21.40429431788426, + "learning_rate": 0.00035711370619336214, + "loss": 2.873885726928711, + "step": 10210 + }, + { + "epoch": 21.42524221000262, + "learning_rate": 0.00035693895027088694, + "loss": 2.8597929000854494, + "step": 10220 + }, + { + "epoch": 21.446190102120973, + "learning_rate": 0.00035676445065142793, + "loss": 2.9235706329345703, + "step": 10230 + }, + { + "epoch": 21.46713799423933, + "learning_rate": 0.0003565902067090925, + "loss": 2.843309783935547, + "step": 10240 + }, + { + "epoch": 21.488085886357684, + "learning_rate": 0.0003564162178201257, + "loss": 2.9598644256591795, + "step": 10250 + }, + { + "epoch": 21.50903377847604, + "learning_rate": 0.0003562424833629007, + "loss": 2.8366893768310546, + "step": 10260 + }, + { + "epoch": 21.529981670594395, + "learning_rate": 0.0003560690027179101, + "loss": 2.927451515197754, + "step": 10270 + }, + { + "epoch": 21.550929562712753, + "learning_rate": 0.00035589577526775603, + "loss": 2.8992708206176756, + "step": 10280 + }, + { + "epoch": 21.571877454831107, + "learning_rate": 0.000355722800397141, + "loss": 2.8801607131958007, + "step": 10290 + }, + { + "epoch": 21.592825346949464, + "learning_rate": 0.00035555007749285897, + "loss": 2.929705047607422, + "step": 10300 + }, + { + "epoch": 21.613773239067818, + "learning_rate": 0.00035537760594378607, + "loss": 2.9493310928344725, + "step": 10310 + }, + { + "epoch": 21.634721131186176, + "learning_rate": 0.00035520538514087155, + "loss": 2.9074274063110352, + "step": 10320 + }, + { + "epoch": 21.65566902330453, + "learning_rate": 0.0003550334144771289, + "loss": 2.8912160873413084, + "step": 10330 + }, + { + "epoch": 21.676616915422887, + "learning_rate": 0.00035486169334762637, + "loss": 2.8537384033203126, + "step": 10340 + }, + { + "epoch": 21.69756480754124, + "learning_rate": 0.00035469022114947857, + "loss": 2.899664878845215, + "step": 10350 + }, + { + "epoch": 21.7185126996596, + "learning_rate": 0.00035451899728183736, + "loss": 2.9585891723632813, + "step": 10360 + }, + { + "epoch": 21.739460591777952, + "learning_rate": 0.00035434802114588305, + "loss": 2.8878129959106444, + "step": 10370 + }, + { + "epoch": 21.760408483896306, + "learning_rate": 0.00035417729214481556, + "loss": 2.9733861923217773, + "step": 10380 + }, + { + "epoch": 21.781356376014664, + "learning_rate": 0.0003540068096838456, + "loss": 2.877838134765625, + "step": 10390 + }, + { + "epoch": 21.802304268133017, + "learning_rate": 0.0003538365731701862, + "loss": 2.874703598022461, + "step": 10400 + }, + { + "epoch": 21.823252160251375, + "learning_rate": 0.0003536665820130437, + "loss": 2.8807294845581053, + "step": 10410 + }, + { + "epoch": 21.84420005236973, + "learning_rate": 0.00035349683562360966, + "loss": 2.9425344467163086, + "step": 10420 + }, + { + "epoch": 21.865147944488086, + "learning_rate": 0.0003533273334150517, + "loss": 2.9000774383544923, + "step": 10430 + }, + { + "epoch": 21.88609583660644, + "learning_rate": 0.0003531580748025054, + "loss": 2.8818313598632814, + "step": 10440 + }, + { + "epoch": 21.907043728724798, + "learning_rate": 0.00035298905920306563, + "loss": 2.9183849334716796, + "step": 10450 + }, + { + "epoch": 21.92799162084315, + "learning_rate": 0.00035282028603577823, + "loss": 2.843509292602539, + "step": 10460 + }, + { + "epoch": 21.94893951296151, + "learning_rate": 0.0003526517547216315, + "loss": 2.9971471786499024, + "step": 10470 + }, + { + "epoch": 21.969887405079863, + "learning_rate": 0.000352483464683548, + "loss": 2.851757621765137, + "step": 10480 + }, + { + "epoch": 21.99083529719822, + "learning_rate": 0.0003523154153463761, + "loss": 2.889949417114258, + "step": 10490 + }, + { + "epoch": 22.012568735271014, + "learning_rate": 0.00035214760613688187, + "loss": 2.9378170013427733, + "step": 10500 + }, + { + "epoch": 22.033516627389368, + "learning_rate": 0.0003519800364837407, + "loss": 2.9217357635498047, + "step": 10510 + }, + { + "epoch": 22.054464519507725, + "learning_rate": 0.0003518127058175293, + "loss": 2.9642236709594725, + "step": 10520 + }, + { + "epoch": 22.07541241162608, + "learning_rate": 0.00035164561357071755, + "loss": 2.9043264389038086, + "step": 10530 + }, + { + "epoch": 22.096360303744436, + "learning_rate": 0.0003514787591776602, + "loss": 2.9355424880981444, + "step": 10540 + }, + { + "epoch": 22.11730819586279, + "learning_rate": 0.0003513121420745892, + "loss": 2.876350975036621, + "step": 10550 + }, + { + "epoch": 22.138256087981148, + "learning_rate": 0.0003511457616996052, + "loss": 2.9557785034179687, + "step": 10560 + }, + { + "epoch": 22.1592039800995, + "learning_rate": 0.0003509796174926703, + "loss": 2.9274904251098635, + "step": 10570 + }, + { + "epoch": 22.18015187221786, + "learning_rate": 0.00035081370889559934, + "loss": 2.873898506164551, + "step": 10580 + }, + { + "epoch": 22.201099764336213, + "learning_rate": 0.0003506480353520526, + "loss": 3.015602684020996, + "step": 10590 + }, + { + "epoch": 22.22204765645457, + "learning_rate": 0.0003504825963075276, + "loss": 2.836939239501953, + "step": 10600 + }, + { + "epoch": 22.242995548572924, + "learning_rate": 0.00035031739120935175, + "loss": 2.9317630767822265, + "step": 10610 + }, + { + "epoch": 22.263943440691282, + "learning_rate": 0.0003501524195066741, + "loss": 2.8972990036010744, + "step": 10620 + }, + { + "epoch": 22.284891332809636, + "learning_rate": 0.0003499876806504578, + "loss": 2.9171611785888674, + "step": 10630 + }, + { + "epoch": 22.305839224927993, + "learning_rate": 0.00034982317409347263, + "loss": 2.9115921020507813, + "step": 10640 + }, + { + "epoch": 22.326787117046347, + "learning_rate": 0.00034965889929028707, + "loss": 3.029188537597656, + "step": 10650 + }, + { + "epoch": 22.3477350091647, + "learning_rate": 0.000349494855697261, + "loss": 2.900659370422363, + "step": 10660 + }, + { + "epoch": 22.36868290128306, + "learning_rate": 0.0003493310427725377, + "loss": 2.8941118240356447, + "step": 10670 + }, + { + "epoch": 22.389630793401412, + "learning_rate": 0.0003491674599760369, + "loss": 2.913021278381348, + "step": 10680 + }, + { + "epoch": 22.41057868551977, + "learning_rate": 0.0003490041067694469, + "loss": 2.907943916320801, + "step": 10690 + }, + { + "epoch": 22.431526577638124, + "learning_rate": 0.00034884098261621724, + "loss": 2.8749153137207033, + "step": 10700 + }, + { + "epoch": 22.45247446975648, + "learning_rate": 0.00034867808698155125, + "loss": 2.836786460876465, + "step": 10710 + }, + { + "epoch": 22.473422361874835, + "learning_rate": 0.0003485154193323988, + "loss": 2.9680070877075195, + "step": 10720 + }, + { + "epoch": 22.494370253993193, + "learning_rate": 0.00034835297913744903, + "loss": 2.839517593383789, + "step": 10730 + }, + { + "epoch": 22.515318146111547, + "learning_rate": 0.0003481907658671227, + "loss": 2.9294412612915037, + "step": 10740 + }, + { + "epoch": 22.536266038229904, + "learning_rate": 0.0003480287789935653, + "loss": 2.931773376464844, + "step": 10750 + }, + { + "epoch": 22.557213930348258, + "learning_rate": 0.00034786701799063976, + "loss": 2.8776823043823243, + "step": 10760 + }, + { + "epoch": 22.578161822466615, + "learning_rate": 0.00034770548233391924, + "loss": 2.8429998397827148, + "step": 10770 + }, + { + "epoch": 22.59910971458497, + "learning_rate": 0.0003475441715006799, + "loss": 2.8952512741088867, + "step": 10780 + }, + { + "epoch": 22.620057606703327, + "learning_rate": 0.0003473830849698938, + "loss": 2.8576940536499023, + "step": 10790 + }, + { + "epoch": 22.64100549882168, + "learning_rate": 0.0003472222222222222, + "loss": 2.9290803909301757, + "step": 10800 + }, + { + "epoch": 22.661953390940038, + "learning_rate": 0.00034706158274000796, + "loss": 2.861796569824219, + "step": 10810 + }, + { + "epoch": 22.682901283058392, + "learning_rate": 0.00034690116600726885, + "loss": 2.9451555252075194, + "step": 10820 + }, + { + "epoch": 22.70384917517675, + "learning_rate": 0.0003467409715096907, + "loss": 2.8637598037719725, + "step": 10830 + }, + { + "epoch": 22.724797067295103, + "learning_rate": 0.00034658099873462027, + "loss": 2.919582176208496, + "step": 10840 + }, + { + "epoch": 22.745744959413457, + "learning_rate": 0.0003464212471710583, + "loss": 2.9625146865844725, + "step": 10850 + }, + { + "epoch": 22.766692851531815, + "learning_rate": 0.0003462617163096529, + "loss": 2.8601587295532225, + "step": 10860 + }, + { + "epoch": 22.78764074365017, + "learning_rate": 0.00034610240564269265, + "loss": 2.8949390411376954, + "step": 10870 + }, + { + "epoch": 22.808588635768526, + "learning_rate": 0.0003459433146640997, + "loss": 2.8205642700195312, + "step": 10880 + }, + { + "epoch": 22.82953652788688, + "learning_rate": 0.00034578444286942307, + "loss": 2.93542366027832, + "step": 10890 + }, + { + "epoch": 22.850484420005237, + "learning_rate": 0.00034562578975583187, + "loss": 2.8913852691650392, + "step": 10900 + }, + { + "epoch": 22.87143231212359, + "learning_rate": 0.00034546735482210894, + "loss": 2.9478212356567384, + "step": 10910 + }, + { + "epoch": 22.89238020424195, + "learning_rate": 0.0003453091375686437, + "loss": 2.854338455200195, + "step": 10920 + }, + { + "epoch": 22.913328096360303, + "learning_rate": 0.00034515113749742586, + "loss": 2.919601058959961, + "step": 10930 + }, + { + "epoch": 22.93427598847866, + "learning_rate": 0.00034499335411203894, + "loss": 2.88704776763916, + "step": 10940 + }, + { + "epoch": 22.955223880597014, + "learning_rate": 0.00034483578691765326, + "loss": 2.906253433227539, + "step": 10950 + }, + { + "epoch": 22.97617177271537, + "learning_rate": 0.00034467843542102, + "loss": 2.8981559753417967, + "step": 10960 + }, + { + "epoch": 22.997119664833725, + "learning_rate": 0.0003445212991304641, + "loss": 2.9073596954345704, + "step": 10970 + }, + { + "epoch": 23.01885310290652, + "learning_rate": 0.00034436437755587827, + "loss": 2.960616874694824, + "step": 10980 + }, + { + "epoch": 23.039800995024876, + "learning_rate": 0.00034420767020871656, + "loss": 2.9549840927124023, + "step": 10990 + }, + { + "epoch": 23.06074888714323, + "learning_rate": 0.00034405117660198765, + "loss": 2.9256917953491213, + "step": 11000 + }, + { + "epoch": 23.081696779261588, + "learning_rate": 0.00034389489625024885, + "loss": 2.89876651763916, + "step": 11010 + }, + { + "epoch": 23.10264467137994, + "learning_rate": 0.00034373882866959936, + "loss": 2.828813362121582, + "step": 11020 + }, + { + "epoch": 23.1235925634983, + "learning_rate": 0.0003435829733776745, + "loss": 2.9374326705932616, + "step": 11030 + }, + { + "epoch": 23.144540455616653, + "learning_rate": 0.00034342732989363903, + "loss": 2.928928184509277, + "step": 11040 + }, + { + "epoch": 23.16548834773501, + "learning_rate": 0.0003432718977381811, + "loss": 2.8702091217041015, + "step": 11050 + }, + { + "epoch": 23.186436239853364, + "learning_rate": 0.0003431166764335058, + "loss": 2.937228965759277, + "step": 11060 + }, + { + "epoch": 23.207384131971722, + "learning_rate": 0.0003429616655033297, + "loss": 2.9454578399658202, + "step": 11070 + }, + { + "epoch": 23.228332024090076, + "learning_rate": 0.00034280686447287373, + "loss": 2.8274587631225585, + "step": 11080 + }, + { + "epoch": 23.249279916208433, + "learning_rate": 0.00034265227286885776, + "loss": 2.876905632019043, + "step": 11090 + }, + { + "epoch": 23.270227808326787, + "learning_rate": 0.00034249789021949435, + "loss": 2.8264415740966795, + "step": 11100 + }, + { + "epoch": 23.29117570044514, + "learning_rate": 0.0003423437160544826, + "loss": 2.8484895706176756, + "step": 11110 + }, + { + "epoch": 23.3121235925635, + "learning_rate": 0.0003421897499050022, + "loss": 2.885685920715332, + "step": 11120 + }, + { + "epoch": 23.333071484681852, + "learning_rate": 0.0003420359913037075, + "loss": 2.9073020935058596, + "step": 11130 + }, + { + "epoch": 23.35401937680021, + "learning_rate": 0.0003418824397847216, + "loss": 2.919231986999512, + "step": 11140 + }, + { + "epoch": 23.374967268918564, + "learning_rate": 0.00034172909488363007, + "loss": 2.8528385162353516, + "step": 11150 + }, + { + "epoch": 23.39591516103692, + "learning_rate": 0.00034157595613747545, + "loss": 2.8856670379638674, + "step": 11160 + }, + { + "epoch": 23.416863053155275, + "learning_rate": 0.00034142302308475133, + "loss": 2.851297950744629, + "step": 11170 + }, + { + "epoch": 23.437810945273633, + "learning_rate": 0.0003412702952653962, + "loss": 2.8231760025024415, + "step": 11180 + }, + { + "epoch": 23.458758837391986, + "learning_rate": 0.00034111777222078796, + "loss": 2.9219854354858397, + "step": 11190 + }, + { + "epoch": 23.479706729510344, + "learning_rate": 0.00034096545349373804, + "loss": 2.977204132080078, + "step": 11200 + }, + { + "epoch": 23.500654621628698, + "learning_rate": 0.0003408133386284857, + "loss": 2.8231952667236326, + "step": 11210 + }, + { + "epoch": 23.521602513747055, + "learning_rate": 0.0003406614271706919, + "loss": 2.891893196105957, + "step": 11220 + }, + { + "epoch": 23.54255040586541, + "learning_rate": 0.0003405097186674344, + "loss": 2.8958648681640624, + "step": 11230 + }, + { + "epoch": 23.563498297983767, + "learning_rate": 0.00034035821266720136, + "loss": 2.8495506286621093, + "step": 11240 + }, + { + "epoch": 23.58444619010212, + "learning_rate": 0.0003402069087198858, + "loss": 2.935627746582031, + "step": 11250 + }, + { + "epoch": 23.605394082220478, + "learning_rate": 0.00034005580637678053, + "loss": 2.840359687805176, + "step": 11260 + }, + { + "epoch": 23.626341974338832, + "learning_rate": 0.00033990490519057183, + "loss": 2.9121625900268553, + "step": 11270 + }, + { + "epoch": 23.64728986645719, + "learning_rate": 0.0003397542047153345, + "loss": 2.897580146789551, + "step": 11280 + }, + { + "epoch": 23.668237758575543, + "learning_rate": 0.0003396037045065257, + "loss": 2.894269561767578, + "step": 11290 + }, + { + "epoch": 23.6891856506939, + "learning_rate": 0.0003394534041209802, + "loss": 2.923667335510254, + "step": 11300 + }, + { + "epoch": 23.710133542812255, + "learning_rate": 0.0003393033031169043, + "loss": 2.9479984283447265, + "step": 11310 + }, + { + "epoch": 23.73108143493061, + "learning_rate": 0.0003391534010538705, + "loss": 2.9156826019287108, + "step": 11320 + }, + { + "epoch": 23.752029327048966, + "learning_rate": 0.00033900369749281225, + "loss": 2.9133535385131837, + "step": 11330 + }, + { + "epoch": 23.77297721916732, + "learning_rate": 0.00033885419199601845, + "loss": 2.912689971923828, + "step": 11340 + }, + { + "epoch": 23.793925111285677, + "learning_rate": 0.000338704884127128, + "loss": 2.921385955810547, + "step": 11350 + }, + { + "epoch": 23.81487300340403, + "learning_rate": 0.00033855577345112453, + "loss": 2.9245376586914062, + "step": 11360 + }, + { + "epoch": 23.83582089552239, + "learning_rate": 0.0003384068595343312, + "loss": 2.921660232543945, + "step": 11370 + }, + { + "epoch": 23.856768787640743, + "learning_rate": 0.00033825814194440504, + "loss": 2.8597009658813475, + "step": 11380 + }, + { + "epoch": 23.8777166797591, + "learning_rate": 0.0003381096202503321, + "loss": 2.9699087142944336, + "step": 11390 + }, + { + "epoch": 23.898664571877454, + "learning_rate": 0.00033796129402242193, + "loss": 2.868007850646973, + "step": 11400 + }, + { + "epoch": 23.91961246399581, + "learning_rate": 0.0003378131628323024, + "loss": 2.8257036209106445, + "step": 11410 + }, + { + "epoch": 23.940560356114165, + "learning_rate": 0.0003376652262529146, + "loss": 2.8747650146484376, + "step": 11420 + }, + { + "epoch": 23.961508248232523, + "learning_rate": 0.00033751748385850753, + "loss": 2.8721830368041994, + "step": 11430 + }, + { + "epoch": 23.982456140350877, + "learning_rate": 0.00033736993522463316, + "loss": 2.8383148193359373, + "step": 11440 + }, + { + "epoch": 24.00418957842367, + "learning_rate": 0.00033722257992814113, + "loss": 2.9513004302978514, + "step": 11450 + }, + { + "epoch": 24.025137470542028, + "learning_rate": 0.0003370754175471737, + "loss": 2.9344108581542967, + "step": 11460 + }, + { + "epoch": 24.04608536266038, + "learning_rate": 0.0003369284476611607, + "loss": 2.9109573364257812, + "step": 11470 + }, + { + "epoch": 24.06703325477874, + "learning_rate": 0.00033678166985081433, + "loss": 2.8260976791381838, + "step": 11480 + }, + { + "epoch": 24.087981146897093, + "learning_rate": 0.0003366350836981245, + "loss": 2.790972137451172, + "step": 11490 + }, + { + "epoch": 24.10892903901545, + "learning_rate": 0.0003364886887863534, + "loss": 2.8716499328613283, + "step": 11500 + }, + { + "epoch": 24.129876931133804, + "learning_rate": 0.0003363424847000309, + "loss": 2.8708847045898436, + "step": 11510 + }, + { + "epoch": 24.15082482325216, + "learning_rate": 0.0003361964710249494, + "loss": 2.8762466430664064, + "step": 11520 + }, + { + "epoch": 24.171772715370516, + "learning_rate": 0.00033605064734815865, + "loss": 2.8436599731445313, + "step": 11530 + }, + { + "epoch": 24.192720607488873, + "learning_rate": 0.0003359050132579615, + "loss": 2.885796546936035, + "step": 11540 + }, + { + "epoch": 24.213668499607227, + "learning_rate": 0.00033575956834390843, + "loss": 2.8459619522094726, + "step": 11550 + }, + { + "epoch": 24.234616391725584, + "learning_rate": 0.00033561431219679297, + "loss": 2.8883172988891603, + "step": 11560 + }, + { + "epoch": 24.25556428384394, + "learning_rate": 0.00033546924440864666, + "loss": 2.8706939697265623, + "step": 11570 + }, + { + "epoch": 24.276512175962292, + "learning_rate": 0.0003353243645727346, + "loss": 2.8278776168823243, + "step": 11580 + }, + { + "epoch": 24.29746006808065, + "learning_rate": 0.0003351796722835502, + "loss": 2.8596363067626953, + "step": 11590 + }, + { + "epoch": 24.318407960199004, + "learning_rate": 0.00033503516713681087, + "loss": 2.8937658309936523, + "step": 11600 + }, + { + "epoch": 24.33935585231736, + "learning_rate": 0.00033489084872945283, + "loss": 2.9479068756103515, + "step": 11610 + }, + { + "epoch": 24.360303744435715, + "learning_rate": 0.0003347467166596268, + "loss": 2.913376235961914, + "step": 11620 + }, + { + "epoch": 24.381251636554072, + "learning_rate": 0.0003346027705266929, + "loss": 2.7945356369018555, + "step": 11630 + }, + { + "epoch": 24.402199528672426, + "learning_rate": 0.0003344590099312164, + "loss": 2.908190155029297, + "step": 11640 + }, + { + "epoch": 24.423147420790784, + "learning_rate": 0.00033431543447496275, + "loss": 2.8602962493896484, + "step": 11650 + }, + { + "epoch": 24.444095312909138, + "learning_rate": 0.000334172043760893, + "loss": 2.8535890579223633, + "step": 11660 + }, + { + "epoch": 24.465043205027495, + "learning_rate": 0.0003340288373931593, + "loss": 2.8150957107543944, + "step": 11670 + }, + { + "epoch": 24.48599109714585, + "learning_rate": 0.0003338858149771002, + "loss": 2.847452735900879, + "step": 11680 + }, + { + "epoch": 24.506938989264206, + "learning_rate": 0.0003337429761192361, + "loss": 2.7802717208862306, + "step": 11690 + }, + { + "epoch": 24.52788688138256, + "learning_rate": 0.00033360032042726483, + "loss": 2.9678937911987306, + "step": 11700 + }, + { + "epoch": 24.548834773500918, + "learning_rate": 0.000333457847510057, + "loss": 2.8469560623168944, + "step": 11710 + }, + { + "epoch": 24.56978266561927, + "learning_rate": 0.0003333155569776514, + "loss": 2.916895866394043, + "step": 11720 + }, + { + "epoch": 24.59073055773763, + "learning_rate": 0.00033317344844125064, + "loss": 2.8457548141479494, + "step": 11730 + }, + { + "epoch": 24.611678449855983, + "learning_rate": 0.00033303152151321696, + "loss": 2.872743606567383, + "step": 11740 + }, + { + "epoch": 24.63262634197434, + "learning_rate": 0.00033288977580706714, + "loss": 2.890146255493164, + "step": 11750 + }, + { + "epoch": 24.653574234092694, + "learning_rate": 0.0003327482109374687, + "loss": 2.847947883605957, + "step": 11760 + }, + { + "epoch": 24.67452212621105, + "learning_rate": 0.00033260682652023517, + "loss": 2.9236717224121094, + "step": 11770 + }, + { + "epoch": 24.695470018329406, + "learning_rate": 0.0003324656221723217, + "loss": 2.9201459884643555, + "step": 11780 + }, + { + "epoch": 24.71641791044776, + "learning_rate": 0.000332324597511821, + "loss": 2.8557527542114256, + "step": 11790 + }, + { + "epoch": 24.737365802566117, + "learning_rate": 0.00033218375215795864, + "loss": 2.875984001159668, + "step": 11800 + }, + { + "epoch": 24.75831369468447, + "learning_rate": 0.00033204308573108897, + "loss": 2.830782890319824, + "step": 11810 + }, + { + "epoch": 24.77926158680283, + "learning_rate": 0.00033190259785269066, + "loss": 2.844138526916504, + "step": 11820 + }, + { + "epoch": 24.800209478921182, + "learning_rate": 0.0003317622881453626, + "loss": 2.8771383285522463, + "step": 11830 + }, + { + "epoch": 24.82115737103954, + "learning_rate": 0.0003316221562328194, + "loss": 2.863381767272949, + "step": 11840 + }, + { + "epoch": 24.842105263157894, + "learning_rate": 0.0003314822017398875, + "loss": 2.8934911727905273, + "step": 11850 + }, + { + "epoch": 24.86305315527625, + "learning_rate": 0.00033134242429250053, + "loss": 2.7928911209106446, + "step": 11860 + }, + { + "epoch": 24.884001047394605, + "learning_rate": 0.00033120282351769556, + "loss": 2.8646501541137694, + "step": 11870 + }, + { + "epoch": 24.904948939512963, + "learning_rate": 0.0003310633990436084, + "loss": 2.8573431015014648, + "step": 11880 + }, + { + "epoch": 24.925896831631317, + "learning_rate": 0.00033092415049947006, + "loss": 2.9596303939819335, + "step": 11890 + }, + { + "epoch": 24.946844723749674, + "learning_rate": 0.00033078507751560195, + "loss": 2.809922790527344, + "step": 11900 + }, + { + "epoch": 24.967792615868028, + "learning_rate": 0.00033064617972341235, + "loss": 2.829710578918457, + "step": 11910 + }, + { + "epoch": 24.988740507986385, + "learning_rate": 0.0003305074567553919, + "loss": 2.837497520446777, + "step": 11920 + }, + { + "epoch": 25.01047394605918, + "learning_rate": 0.0003303689082451096, + "loss": 2.998362922668457, + "step": 11930 + }, + { + "epoch": 25.031421838177533, + "learning_rate": 0.00033023053382720904, + "loss": 2.8903406143188475, + "step": 11940 + }, + { + "epoch": 25.05236973029589, + "learning_rate": 0.0003300923331374039, + "loss": 2.88183536529541, + "step": 11950 + }, + { + "epoch": 25.073317622414244, + "learning_rate": 0.00032995430581247417, + "loss": 2.8853179931640627, + "step": 11960 + }, + { + "epoch": 25.0942655145326, + "learning_rate": 0.0003298164514902622, + "loss": 2.8495412826538087, + "step": 11970 + }, + { + "epoch": 25.115213406650955, + "learning_rate": 0.0003296787698096686, + "loss": 2.8417972564697265, + "step": 11980 + }, + { + "epoch": 25.136161298769313, + "learning_rate": 0.0003295412604106482, + "loss": 2.8254583358764647, + "step": 11990 + }, + { + "epoch": 25.157109190887667, + "learning_rate": 0.00032940392293420614, + "loss": 2.8476821899414064, + "step": 12000 + }, + { + "epoch": 25.178057083006024, + "learning_rate": 0.00032926675702239425, + "loss": 2.8646284103393556, + "step": 12010 + }, + { + "epoch": 25.199004975124378, + "learning_rate": 0.00032912976231830646, + "loss": 2.8645925521850586, + "step": 12020 + }, + { + "epoch": 25.219952867242732, + "learning_rate": 0.0003289929384660757, + "loss": 2.9142387390136717, + "step": 12030 + }, + { + "epoch": 25.24090075936109, + "learning_rate": 0.0003288562851108693, + "loss": 2.911361312866211, + "step": 12040 + }, + { + "epoch": 25.261848651479443, + "learning_rate": 0.0003287198018988856, + "loss": 2.810334014892578, + "step": 12050 + }, + { + "epoch": 25.2827965435978, + "learning_rate": 0.00032858348847734985, + "loss": 2.8691171646118163, + "step": 12060 + }, + { + "epoch": 25.303744435716155, + "learning_rate": 0.00032844734449451055, + "loss": 2.8615827560424805, + "step": 12070 + }, + { + "epoch": 25.324692327834512, + "learning_rate": 0.00032831136959963553, + "loss": 2.8075706481933596, + "step": 12080 + }, + { + "epoch": 25.345640219952866, + "learning_rate": 0.00032817556344300823, + "loss": 2.836076354980469, + "step": 12090 + }, + { + "epoch": 25.366588112071224, + "learning_rate": 0.0003280399256759237, + "loss": 2.873185729980469, + "step": 12100 + }, + { + "epoch": 25.387536004189577, + "learning_rate": 0.0003279044559506852, + "loss": 2.8835927963256838, + "step": 12110 + }, + { + "epoch": 25.408483896307935, + "learning_rate": 0.0003277691539206003, + "loss": 2.884838676452637, + "step": 12120 + }, + { + "epoch": 25.42943178842629, + "learning_rate": 0.0003276340192399769, + "loss": 2.9353681564331056, + "step": 12130 + }, + { + "epoch": 25.450379680544646, + "learning_rate": 0.00032749905156412, + "loss": 2.87127571105957, + "step": 12140 + }, + { + "epoch": 25.471327572663, + "learning_rate": 0.0003273642505493275, + "loss": 2.848041534423828, + "step": 12150 + }, + { + "epoch": 25.492275464781358, + "learning_rate": 0.0003272296158528871, + "loss": 2.8736820220947266, + "step": 12160 + }, + { + "epoch": 25.51322335689971, + "learning_rate": 0.000327095147133072, + "loss": 2.889766502380371, + "step": 12170 + }, + { + "epoch": 25.53417124901807, + "learning_rate": 0.00032696084404913777, + "loss": 2.8456445693969727, + "step": 12180 + }, + { + "epoch": 25.555119141136423, + "learning_rate": 0.00032682670626131837, + "loss": 2.8694175720214843, + "step": 12190 + }, + { + "epoch": 25.57606703325478, + "learning_rate": 0.0003266927334308229, + "loss": 2.863827705383301, + "step": 12200 + }, + { + "epoch": 25.597014925373134, + "learning_rate": 0.0003265589252198317, + "loss": 2.8949514389038087, + "step": 12210 + }, + { + "epoch": 25.617962817491488, + "learning_rate": 0.0003264252812914928, + "loss": 2.870989990234375, + "step": 12220 + }, + { + "epoch": 25.638910709609846, + "learning_rate": 0.0003262918013099186, + "loss": 2.8301280975341796, + "step": 12230 + }, + { + "epoch": 25.6598586017282, + "learning_rate": 0.00032615848494018204, + "loss": 2.7910818099975585, + "step": 12240 + }, + { + "epoch": 25.680806493846557, + "learning_rate": 0.0003260253318483131, + "loss": 2.8830698013305662, + "step": 12250 + }, + { + "epoch": 25.70175438596491, + "learning_rate": 0.0003258923417012957, + "loss": 2.886226844787598, + "step": 12260 + }, + { + "epoch": 25.72270227808327, + "learning_rate": 0.00032575951416706354, + "loss": 2.9646997451782227, + "step": 12270 + }, + { + "epoch": 25.743650170201622, + "learning_rate": 0.0003256268489144972, + "loss": 2.896713066101074, + "step": 12280 + }, + { + "epoch": 25.76459806231998, + "learning_rate": 0.0003254943456134202, + "loss": 2.8680368423461915, + "step": 12290 + }, + { + "epoch": 25.785545954438334, + "learning_rate": 0.0003253620039345959, + "loss": 2.866026496887207, + "step": 12300 + }, + { + "epoch": 25.80649384655669, + "learning_rate": 0.0003252298235497241, + "loss": 2.862067985534668, + "step": 12310 + }, + { + "epoch": 25.827441738675045, + "learning_rate": 0.0003250978041314371, + "loss": 2.8973188400268555, + "step": 12320 + }, + { + "epoch": 25.848389630793402, + "learning_rate": 0.000324965945353297, + "loss": 2.9389106750488283, + "step": 12330 + }, + { + "epoch": 25.869337522911756, + "learning_rate": 0.0003248342468897917, + "loss": 2.9147424697875977, + "step": 12340 + }, + { + "epoch": 25.890285415030114, + "learning_rate": 0.00032470270841633195, + "loss": 2.894465446472168, + "step": 12350 + }, + { + "epoch": 25.911233307148468, + "learning_rate": 0.00032457132960924783, + "loss": 2.9301485061645507, + "step": 12360 + }, + { + "epoch": 25.932181199266825, + "learning_rate": 0.00032444011014578535, + "loss": 2.8576644897460937, + "step": 12370 + }, + { + "epoch": 25.95312909138518, + "learning_rate": 0.00032430904970410314, + "loss": 2.836701202392578, + "step": 12380 + }, + { + "epoch": 25.974076983503537, + "learning_rate": 0.0003241781479632693, + "loss": 2.8457481384277346, + "step": 12390 + }, + { + "epoch": 25.99502487562189, + "learning_rate": 0.0003240474046032579, + "loss": 2.829239082336426, + "step": 12400 + }, + { + "epoch": 26.016758313694684, + "learning_rate": 0.00032391681930494566, + "loss": 3.0122323989868165, + "step": 12410 + }, + { + "epoch": 26.03770620581304, + "learning_rate": 0.000323786391750109, + "loss": 2.8899608612060548, + "step": 12420 + }, + { + "epoch": 26.058654097931395, + "learning_rate": 0.0003236561216214202, + "loss": 2.9380813598632813, + "step": 12430 + }, + { + "epoch": 26.079601990049753, + "learning_rate": 0.000323526008602445, + "loss": 2.9364286422729493, + "step": 12440 + }, + { + "epoch": 26.100549882168107, + "learning_rate": 0.0003233960523776387, + "loss": 2.8298776626586912, + "step": 12450 + }, + { + "epoch": 26.121497774286464, + "learning_rate": 0.0003232662526323429, + "loss": 2.868173027038574, + "step": 12460 + }, + { + "epoch": 26.142445666404818, + "learning_rate": 0.0003231366090527828, + "loss": 2.8364093780517576, + "step": 12470 + }, + { + "epoch": 26.163393558523175, + "learning_rate": 0.00032300712132606366, + "loss": 2.917738342285156, + "step": 12480 + }, + { + "epoch": 26.18434145064153, + "learning_rate": 0.0003228777891401678, + "loss": 2.8115827560424806, + "step": 12490 + }, + { + "epoch": 26.205289342759883, + "learning_rate": 0.0003227486121839514, + "loss": 2.8544151306152346, + "step": 12500 + }, + { + "epoch": 26.22623723487824, + "learning_rate": 0.00032261959014714107, + "loss": 2.890985870361328, + "step": 12510 + }, + { + "epoch": 26.247185126996595, + "learning_rate": 0.0003224907227203312, + "loss": 2.8269269943237303, + "step": 12520 + }, + { + "epoch": 26.268133019114952, + "learning_rate": 0.0003223620095949806, + "loss": 2.8392301559448243, + "step": 12530 + }, + { + "epoch": 26.289080911233306, + "learning_rate": 0.00032223345046340936, + "loss": 2.8283065795898437, + "step": 12540 + }, + { + "epoch": 26.310028803351663, + "learning_rate": 0.00032210504501879576, + "loss": 2.9033248901367186, + "step": 12550 + }, + { + "epoch": 26.330976695470017, + "learning_rate": 0.0003219767929551733, + "loss": 2.8192907333374024, + "step": 12560 + }, + { + "epoch": 26.351924587588375, + "learning_rate": 0.00032184869396742754, + "loss": 2.8758308410644533, + "step": 12570 + }, + { + "epoch": 26.37287247970673, + "learning_rate": 0.00032172074775129323, + "loss": 2.8491661071777346, + "step": 12580 + }, + { + "epoch": 26.393820371825086, + "learning_rate": 0.00032159295400335114, + "loss": 2.862008285522461, + "step": 12590 + }, + { + "epoch": 26.41476826394344, + "learning_rate": 0.00032146531242102476, + "loss": 2.854539489746094, + "step": 12600 + }, + { + "epoch": 26.435716156061797, + "learning_rate": 0.0003213378227025779, + "loss": 2.9059074401855467, + "step": 12610 + }, + { + "epoch": 26.45666404818015, + "learning_rate": 0.00032121048454711114, + "loss": 2.8347517013549806, + "step": 12620 + }, + { + "epoch": 26.47761194029851, + "learning_rate": 0.00032108329765455926, + "loss": 2.8621740341186523, + "step": 12630 + }, + { + "epoch": 26.498559832416863, + "learning_rate": 0.00032095626172568784, + "loss": 2.8287914276123045, + "step": 12640 + }, + { + "epoch": 26.51950772453522, + "learning_rate": 0.00032082937646209084, + "loss": 2.8201780319213867, + "step": 12650 + }, + { + "epoch": 26.540455616653574, + "learning_rate": 0.0003207026415661871, + "loss": 2.853387451171875, + "step": 12660 + }, + { + "epoch": 26.56140350877193, + "learning_rate": 0.0003205760567412178, + "loss": 2.8255029678344727, + "step": 12670 + }, + { + "epoch": 26.582351400890285, + "learning_rate": 0.00032044962169124335, + "loss": 2.8133966445922853, + "step": 12680 + }, + { + "epoch": 26.60329929300864, + "learning_rate": 0.0003203233361211406, + "loss": 2.8209064483642576, + "step": 12690 + }, + { + "epoch": 26.624247185126997, + "learning_rate": 0.00032019719973659996, + "loss": 2.839722442626953, + "step": 12700 + }, + { + "epoch": 26.64519507724535, + "learning_rate": 0.00032007121224412224, + "loss": 2.8414018630981444, + "step": 12710 + }, + { + "epoch": 26.666142969363708, + "learning_rate": 0.0003199453733510162, + "loss": 2.8677789688110353, + "step": 12720 + }, + { + "epoch": 26.687090861482062, + "learning_rate": 0.00031981968276539543, + "loss": 2.9177148818969725, + "step": 12730 + }, + { + "epoch": 26.70803875360042, + "learning_rate": 0.0003196941401961754, + "loss": 2.8555475234985352, + "step": 12740 + }, + { + "epoch": 26.728986645718773, + "learning_rate": 0.000319568745353071, + "loss": 2.8636154174804687, + "step": 12750 + }, + { + "epoch": 26.74993453783713, + "learning_rate": 0.0003194434979465935, + "loss": 2.810639190673828, + "step": 12760 + }, + { + "epoch": 26.770882429955485, + "learning_rate": 0.0003193183976880476, + "loss": 2.9356500625610353, + "step": 12770 + }, + { + "epoch": 26.791830322073842, + "learning_rate": 0.00031919344428952895, + "loss": 2.848637580871582, + "step": 12780 + }, + { + "epoch": 26.812778214192196, + "learning_rate": 0.0003190686374639211, + "loss": 2.8234004974365234, + "step": 12790 + }, + { + "epoch": 26.833726106310554, + "learning_rate": 0.00031894397692489295, + "loss": 2.8002485275268554, + "step": 12800 + }, + { + "epoch": 26.854673998428908, + "learning_rate": 0.0003188194623868958, + "loss": 2.841193199157715, + "step": 12810 + }, + { + "epoch": 26.875621890547265, + "learning_rate": 0.00031869509356516063, + "loss": 2.8377119064331056, + "step": 12820 + }, + { + "epoch": 26.89656978266562, + "learning_rate": 0.00031857087017569556, + "loss": 2.797208786010742, + "step": 12830 + }, + { + "epoch": 26.917517674783976, + "learning_rate": 0.0003184467919352828, + "loss": 2.778369140625, + "step": 12840 + }, + { + "epoch": 26.93846556690233, + "learning_rate": 0.0003183228585614763, + "loss": 2.8303714752197267, + "step": 12850 + }, + { + "epoch": 26.959413459020688, + "learning_rate": 0.0003181990697725988, + "loss": 2.805090141296387, + "step": 12860 + }, + { + "epoch": 26.98036135113904, + "learning_rate": 0.0003180754252877392, + "loss": 2.7620264053344727, + "step": 12870 + }, + { + "epoch": 27.002094789211835, + "learning_rate": 0.0003179519248267498, + "loss": 2.9018489837646486, + "step": 12880 + }, + { + "epoch": 27.023042681330192, + "learning_rate": 0.000317828568110244, + "loss": 2.8413219451904297, + "step": 12890 + }, + { + "epoch": 27.043990573448546, + "learning_rate": 0.000317705354859593, + "loss": 2.868427276611328, + "step": 12900 + }, + { + "epoch": 27.064938465566904, + "learning_rate": 0.0003175822847969239, + "loss": 2.84520263671875, + "step": 12910 + }, + { + "epoch": 27.085886357685258, + "learning_rate": 0.00031745935764511645, + "loss": 2.865756607055664, + "step": 12920 + }, + { + "epoch": 27.106834249803615, + "learning_rate": 0.0003173365731278007, + "loss": 2.8851186752319338, + "step": 12930 + }, + { + "epoch": 27.12778214192197, + "learning_rate": 0.00031721393096935445, + "loss": 2.8631362915039062, + "step": 12940 + }, + { + "epoch": 27.148730034040323, + "learning_rate": 0.00031709143089490063, + "loss": 2.8974273681640623, + "step": 12950 + }, + { + "epoch": 27.16967792615868, + "learning_rate": 0.00031696907263030445, + "loss": 2.8190950393676757, + "step": 12960 + }, + { + "epoch": 27.190625818277034, + "learning_rate": 0.00031684685590217115, + "loss": 2.861093521118164, + "step": 12970 + }, + { + "epoch": 27.211573710395392, + "learning_rate": 0.00031672478043784336, + "loss": 2.925172233581543, + "step": 12980 + }, + { + "epoch": 27.232521602513746, + "learning_rate": 0.0003166028459653984, + "loss": 2.7551206588745116, + "step": 12990 + }, + { + "epoch": 27.253469494632103, + "learning_rate": 0.0003164810522136458, + "loss": 2.8190824508666994, + "step": 13000 + }, + { + "epoch": 27.274417386750457, + "learning_rate": 0.0003163593989121249, + "loss": 2.884243965148926, + "step": 13010 + }, + { + "epoch": 27.295365278868815, + "learning_rate": 0.0003162378857911022, + "loss": 2.831955909729004, + "step": 13020 + }, + { + "epoch": 27.31631317098717, + "learning_rate": 0.00031611651258156884, + "loss": 2.891588020324707, + "step": 13030 + }, + { + "epoch": 27.337261063105526, + "learning_rate": 0.0003159952790152381, + "loss": 2.8689960479736327, + "step": 13040 + }, + { + "epoch": 27.35820895522388, + "learning_rate": 0.0003158741848245431, + "loss": 2.844234085083008, + "step": 13050 + }, + { + "epoch": 27.379156847342237, + "learning_rate": 0.0003157532297426339, + "loss": 2.783745765686035, + "step": 13060 + }, + { + "epoch": 27.40010473946059, + "learning_rate": 0.00031563241350337546, + "loss": 2.85959415435791, + "step": 13070 + }, + { + "epoch": 27.42105263157895, + "learning_rate": 0.00031551173584134514, + "loss": 2.828862762451172, + "step": 13080 + }, + { + "epoch": 27.442000523697303, + "learning_rate": 0.0003153911964918298, + "loss": 2.8126575469970705, + "step": 13090 + }, + { + "epoch": 27.46294841581566, + "learning_rate": 0.0003152707951908239, + "loss": 2.8336280822753905, + "step": 13100 + }, + { + "epoch": 27.483896307934014, + "learning_rate": 0.0003151505316750269, + "loss": 2.8542291641235353, + "step": 13110 + }, + { + "epoch": 27.50484420005237, + "learning_rate": 0.0003150304056818405, + "loss": 2.8555719375610353, + "step": 13120 + }, + { + "epoch": 27.525792092170725, + "learning_rate": 0.00031491041694936697, + "loss": 2.8440032958984376, + "step": 13130 + }, + { + "epoch": 27.54673998428908, + "learning_rate": 0.000314790565216406, + "loss": 2.899538040161133, + "step": 13140 + }, + { + "epoch": 27.567687876407437, + "learning_rate": 0.0003146708502224526, + "loss": 2.823881149291992, + "step": 13150 + }, + { + "epoch": 27.58863576852579, + "learning_rate": 0.0003145512717076948, + "loss": 2.8198898315429686, + "step": 13160 + }, + { + "epoch": 27.609583660644148, + "learning_rate": 0.00031443182941301147, + "loss": 2.8212156295776367, + "step": 13170 + }, + { + "epoch": 27.630531552762502, + "learning_rate": 0.0003143125230799694, + "loss": 2.7753381729125977, + "step": 13180 + }, + { + "epoch": 27.65147944488086, + "learning_rate": 0.00031419335245082134, + "loss": 2.812895393371582, + "step": 13190 + }, + { + "epoch": 27.672427336999213, + "learning_rate": 0.00031407431726850375, + "loss": 2.8747041702270506, + "step": 13200 + }, + { + "epoch": 27.69337522911757, + "learning_rate": 0.00031395541727663413, + "loss": 2.8663089752197264, + "step": 13210 + }, + { + "epoch": 27.714323121235925, + "learning_rate": 0.0003138366522195088, + "loss": 2.8993961334228517, + "step": 13220 + }, + { + "epoch": 27.735271013354282, + "learning_rate": 0.0003137180218421011, + "loss": 2.9394744873046874, + "step": 13230 + }, + { + "epoch": 27.756218905472636, + "learning_rate": 0.0003135995258900582, + "loss": 2.8471282958984374, + "step": 13240 + }, + { + "epoch": 27.777166797590993, + "learning_rate": 0.0003134811641096994, + "loss": 2.7851446151733397, + "step": 13250 + }, + { + "epoch": 27.798114689709347, + "learning_rate": 0.00031336293624801393, + "loss": 2.819938850402832, + "step": 13260 + }, + { + "epoch": 27.819062581827705, + "learning_rate": 0.00031324484205265824, + "loss": 2.8013900756835937, + "step": 13270 + }, + { + "epoch": 27.84001047394606, + "learning_rate": 0.000313126881271954, + "loss": 2.850057601928711, + "step": 13280 + }, + { + "epoch": 27.860958366064416, + "learning_rate": 0.0003130090536548859, + "loss": 2.7631250381469727, + "step": 13290 + }, + { + "epoch": 27.88190625818277, + "learning_rate": 0.00031289135895109924, + "loss": 2.8360868453979493, + "step": 13300 + }, + { + "epoch": 27.902854150301128, + "learning_rate": 0.00031277379691089786, + "loss": 2.804159927368164, + "step": 13310 + }, + { + "epoch": 27.92380204241948, + "learning_rate": 0.00031265636728524174, + "loss": 2.8401294708251954, + "step": 13320 + }, + { + "epoch": 27.94474993453784, + "learning_rate": 0.000312539069825745, + "loss": 2.850791168212891, + "step": 13330 + }, + { + "epoch": 27.965697826656193, + "learning_rate": 0.00031242190428467325, + "loss": 2.862323188781738, + "step": 13340 + }, + { + "epoch": 27.986645718774547, + "learning_rate": 0.0003123048704149423, + "loss": 2.8848134994506838, + "step": 13350 + }, + { + "epoch": 28.008379156847344, + "learning_rate": 0.0003121879679701147, + "loss": 2.9553651809692383, + "step": 13360 + }, + { + "epoch": 28.029327048965698, + "learning_rate": 0.00031207119670439884, + "loss": 2.893220138549805, + "step": 13370 + }, + { + "epoch": 28.050274941084055, + "learning_rate": 0.00031195455637264574, + "loss": 2.8204929351806642, + "step": 13380 + }, + { + "epoch": 28.07122283320241, + "learning_rate": 0.00031183804673034756, + "loss": 2.8356761932373047, + "step": 13390 + }, + { + "epoch": 28.092170725320763, + "learning_rate": 0.0003117216675336353, + "loss": 2.800448989868164, + "step": 13400 + }, + { + "epoch": 28.11311861743912, + "learning_rate": 0.00031160541853927627, + "loss": 2.8977182388305662, + "step": 13410 + }, + { + "epoch": 28.134066509557474, + "learning_rate": 0.0003114892995046725, + "loss": 2.8017560958862306, + "step": 13420 + }, + { + "epoch": 28.15501440167583, + "learning_rate": 0.00031137331018785835, + "loss": 2.7457189559936523, + "step": 13430 + }, + { + "epoch": 28.175962293794186, + "learning_rate": 0.00031125745034749834, + "loss": 2.8290485382080077, + "step": 13440 + }, + { + "epoch": 28.196910185912543, + "learning_rate": 0.00031114171974288516, + "loss": 2.8317813873291016, + "step": 13450 + }, + { + "epoch": 28.217858078030897, + "learning_rate": 0.00031102611813393753, + "loss": 2.7843399047851562, + "step": 13460 + }, + { + "epoch": 28.238805970149254, + "learning_rate": 0.0003109106452811981, + "loss": 2.8257192611694335, + "step": 13470 + }, + { + "epoch": 28.25975386226761, + "learning_rate": 0.00031079530094583135, + "loss": 2.8432809829711916, + "step": 13480 + }, + { + "epoch": 28.280701754385966, + "learning_rate": 0.0003106800848896216, + "loss": 2.882096862792969, + "step": 13490 + }, + { + "epoch": 28.30164964650432, + "learning_rate": 0.0003105649968749708, + "loss": 2.87137508392334, + "step": 13500 + }, + { + "epoch": 28.322597538622677, + "learning_rate": 0.0003104500366648965, + "loss": 2.8303447723388673, + "step": 13510 + }, + { + "epoch": 28.34354543074103, + "learning_rate": 0.0003103352040230302, + "loss": 2.955478477478027, + "step": 13520 + }, + { + "epoch": 28.36449332285939, + "learning_rate": 0.00031022049871361445, + "loss": 2.7974782943725587, + "step": 13530 + }, + { + "epoch": 28.385441214977742, + "learning_rate": 0.0003101059205015017, + "loss": 2.882868766784668, + "step": 13540 + }, + { + "epoch": 28.4063891070961, + "learning_rate": 0.0003099914691521518, + "loss": 2.9435708999633787, + "step": 13550 + }, + { + "epoch": 28.427336999214454, + "learning_rate": 0.00030987714443163, + "loss": 2.8506664276123046, + "step": 13560 + }, + { + "epoch": 28.44828489133281, + "learning_rate": 0.00030976294610660516, + "loss": 2.8492944717407225, + "step": 13570 + }, + { + "epoch": 28.469232783451165, + "learning_rate": 0.00030964887394434754, + "loss": 2.8658618927001953, + "step": 13580 + }, + { + "epoch": 28.49018067556952, + "learning_rate": 0.000309534927712727, + "loss": 2.8701282501220704, + "step": 13590 + }, + { + "epoch": 28.511128567687877, + "learning_rate": 0.0003094211071802107, + "loss": 2.8161798477172852, + "step": 13600 + }, + { + "epoch": 28.53207645980623, + "learning_rate": 0.00030930741211586155, + "loss": 2.768409538269043, + "step": 13610 + }, + { + "epoch": 28.553024351924588, + "learning_rate": 0.0003091938422893358, + "loss": 2.84487361907959, + "step": 13620 + }, + { + "epoch": 28.573972244042942, + "learning_rate": 0.00030908039747088155, + "loss": 2.8081539154052733, + "step": 13630 + }, + { + "epoch": 28.5949201361613, + "learning_rate": 0.00030896707743133635, + "loss": 2.8049062728881835, + "step": 13640 + }, + { + "epoch": 28.615868028279653, + "learning_rate": 0.0003088538819421255, + "loss": 2.8450254440307616, + "step": 13650 + }, + { + "epoch": 28.63681592039801, + "learning_rate": 0.00030874081077526003, + "loss": 2.8079158782958986, + "step": 13660 + }, + { + "epoch": 28.657763812516365, + "learning_rate": 0.00030862786370333505, + "loss": 2.8801244735717773, + "step": 13670 + }, + { + "epoch": 28.678711704634722, + "learning_rate": 0.00030851504049952727, + "loss": 2.8432153701782226, + "step": 13680 + }, + { + "epoch": 28.699659596753076, + "learning_rate": 0.00030840234093759347, + "loss": 2.913180923461914, + "step": 13690 + }, + { + "epoch": 28.720607488871433, + "learning_rate": 0.0003082897647918688, + "loss": 2.857924461364746, + "step": 13700 + }, + { + "epoch": 28.741555380989787, + "learning_rate": 0.0003081773118372642, + "loss": 2.7912296295166015, + "step": 13710 + }, + { + "epoch": 28.762503273108145, + "learning_rate": 0.00030806498184926523, + "loss": 2.8504261016845702, + "step": 13720 + }, + { + "epoch": 28.7834511652265, + "learning_rate": 0.0003079527746039298, + "loss": 2.8378028869628906, + "step": 13730 + }, + { + "epoch": 28.804399057344856, + "learning_rate": 0.00030784068987788624, + "loss": 2.803904914855957, + "step": 13740 + }, + { + "epoch": 28.82534694946321, + "learning_rate": 0.00030772872744833183, + "loss": 2.839299774169922, + "step": 13750 + }, + { + "epoch": 28.846294841581567, + "learning_rate": 0.00030761688709303036, + "loss": 2.7884681701660154, + "step": 13760 + }, + { + "epoch": 28.86724273369992, + "learning_rate": 0.0003075051685903109, + "loss": 2.88138427734375, + "step": 13770 + }, + { + "epoch": 28.888190625818275, + "learning_rate": 0.00030739357171906536, + "loss": 2.81328125, + "step": 13780 + }, + { + "epoch": 28.909138517936633, + "learning_rate": 0.0003072820962587471, + "loss": 2.8141046524047852, + "step": 13790 + }, + { + "epoch": 28.930086410054987, + "learning_rate": 0.00030717074198936904, + "loss": 2.7672204971313477, + "step": 13800 + }, + { + "epoch": 28.951034302173344, + "learning_rate": 0.0003070595086915015, + "loss": 2.8781991958618165, + "step": 13810 + }, + { + "epoch": 28.971982194291698, + "learning_rate": 0.00030694839614627076, + "loss": 2.7781099319458007, + "step": 13820 + }, + { + "epoch": 28.992930086410055, + "learning_rate": 0.0003068374041353571, + "loss": 2.879766082763672, + "step": 13830 + }, + { + "epoch": 29.01466352448285, + "learning_rate": 0.000306726532440993, + "loss": 2.8991397857666015, + "step": 13840 + }, + { + "epoch": 29.035611416601206, + "learning_rate": 0.0003066157808459613, + "loss": 2.8512521743774415, + "step": 13850 + }, + { + "epoch": 29.05655930871956, + "learning_rate": 0.0003065051491335936, + "loss": 2.833390235900879, + "step": 13860 + }, + { + "epoch": 29.077507200837914, + "learning_rate": 0.0003063946370877681, + "loss": 2.8554765701293947, + "step": 13870 + }, + { + "epoch": 29.09845509295627, + "learning_rate": 0.0003062842444929085, + "loss": 2.7805418014526366, + "step": 13880 + }, + { + "epoch": 29.119402985074625, + "learning_rate": 0.00030617397113398125, + "loss": 3.09820671081543, + "step": 13890 + }, + { + "epoch": 29.140350877192983, + "learning_rate": 0.00030606381679649483, + "loss": 2.900446128845215, + "step": 13900 + }, + { + "epoch": 29.161298769311337, + "learning_rate": 0.00030595378126649727, + "loss": 2.852696418762207, + "step": 13910 + }, + { + "epoch": 29.182246661429694, + "learning_rate": 0.0003058438643305747, + "loss": 3.0798343658447265, + "step": 13920 + }, + { + "epoch": 29.203194553548048, + "learning_rate": 0.00030573406577584955, + "loss": 2.8329389572143553, + "step": 13930 + }, + { + "epoch": 29.224142445666406, + "learning_rate": 0.000305624385389979, + "loss": 2.7638198852539064, + "step": 13940 + }, + { + "epoch": 29.24509033778476, + "learning_rate": 0.0003055148229611527, + "loss": 2.7774702072143556, + "step": 13950 + }, + { + "epoch": 29.266038229903117, + "learning_rate": 0.00030540537827809176, + "loss": 2.884586524963379, + "step": 13960 + }, + { + "epoch": 29.28698612202147, + "learning_rate": 0.0003052960511300467, + "loss": 2.858045196533203, + "step": 13970 + }, + { + "epoch": 29.30793401413983, + "learning_rate": 0.0003051868413067956, + "loss": 2.8505125045776367, + "step": 13980 + }, + { + "epoch": 29.328881906258182, + "learning_rate": 0.00030507774859864277, + "loss": 2.840318298339844, + "step": 13990 + }, + { + "epoch": 29.34982979837654, + "learning_rate": 0.0003049687727964166, + "loss": 2.871793746948242, + "step": 14000 + }, + { + "epoch": 29.370777690494894, + "learning_rate": 0.00030485991369146834, + "loss": 2.814739990234375, + "step": 14010 + }, + { + "epoch": 29.39172558261325, + "learning_rate": 0.00030475117107567015, + "loss": 2.8241125106811524, + "step": 14020 + }, + { + "epoch": 29.412673474731605, + "learning_rate": 0.0003046425447414135, + "loss": 2.802973747253418, + "step": 14030 + }, + { + "epoch": 29.433621366849962, + "learning_rate": 0.0003045340344816073, + "loss": 2.829861068725586, + "step": 14040 + }, + { + "epoch": 29.454569258968316, + "learning_rate": 0.0003044256400896769, + "loss": 2.823344612121582, + "step": 14050 + }, + { + "epoch": 29.47551715108667, + "learning_rate": 0.0003043173613595614, + "loss": 2.811284065246582, + "step": 14060 + }, + { + "epoch": 29.496465043205028, + "learning_rate": 0.0003042091980857131, + "loss": 2.8590465545654298, + "step": 14070 + }, + { + "epoch": 29.51741293532338, + "learning_rate": 0.0003041011500630949, + "loss": 2.8229595184326173, + "step": 14080 + }, + { + "epoch": 29.53836082744174, + "learning_rate": 0.00030399321708717947, + "loss": 2.8343103408813475, + "step": 14090 + }, + { + "epoch": 29.559308719560093, + "learning_rate": 0.00030388539895394697, + "loss": 2.804738235473633, + "step": 14100 + }, + { + "epoch": 29.58025661167845, + "learning_rate": 0.00030377769545988394, + "loss": 2.8719600677490233, + "step": 14110 + }, + { + "epoch": 29.601204503796804, + "learning_rate": 0.00030367010640198143, + "loss": 2.777914810180664, + "step": 14120 + }, + { + "epoch": 29.622152395915162, + "learning_rate": 0.0003035626315777333, + "loss": 2.837109375, + "step": 14130 + }, + { + "epoch": 29.643100288033516, + "learning_rate": 0.00030345527078513493, + "loss": 2.8141595840454103, + "step": 14140 + }, + { + "epoch": 29.664048180151873, + "learning_rate": 0.0003033480238226813, + "loss": 2.8648092269897463, + "step": 14150 + }, + { + "epoch": 29.684996072270227, + "learning_rate": 0.0003032408904893656, + "loss": 2.7934087753295898, + "step": 14160 + }, + { + "epoch": 29.705943964388585, + "learning_rate": 0.00030313387058467756, + "loss": 2.834004783630371, + "step": 14170 + }, + { + "epoch": 29.72689185650694, + "learning_rate": 0.0003030269639086021, + "loss": 2.8099668502807615, + "step": 14180 + }, + { + "epoch": 29.747839748625296, + "learning_rate": 0.0003029201702616173, + "loss": 2.830114555358887, + "step": 14190 + }, + { + "epoch": 29.76878764074365, + "learning_rate": 0.0003028134894446933, + "loss": 2.780957794189453, + "step": 14200 + }, + { + "epoch": 29.789735532862007, + "learning_rate": 0.00030270692125929034, + "loss": 2.829334831237793, + "step": 14210 + }, + { + "epoch": 29.81068342498036, + "learning_rate": 0.00030260046550735763, + "loss": 2.840847969055176, + "step": 14220 + }, + { + "epoch": 29.83163131709872, + "learning_rate": 0.0003024941219913316, + "loss": 2.855925369262695, + "step": 14230 + }, + { + "epoch": 29.852579209217073, + "learning_rate": 0.00030238789051413416, + "loss": 2.8478092193603515, + "step": 14240 + }, + { + "epoch": 29.873527101335426, + "learning_rate": 0.00030228177087917153, + "loss": 2.8140996932983398, + "step": 14250 + }, + { + "epoch": 29.894474993453784, + "learning_rate": 0.00030217576289033235, + "loss": 2.803069496154785, + "step": 14260 + }, + { + "epoch": 29.915422885572138, + "learning_rate": 0.00030206986635198654, + "loss": 2.7434965133666993, + "step": 14270 + }, + { + "epoch": 29.936370777690495, + "learning_rate": 0.00030196408106898356, + "loss": 2.859099006652832, + "step": 14280 + }, + { + "epoch": 29.95731866980885, + "learning_rate": 0.0003018584068466507, + "loss": 2.9088722229003907, + "step": 14290 + }, + { + "epoch": 29.978266561927207, + "learning_rate": 0.0003017528434907922, + "loss": 2.7880224227905273, + "step": 14300 + }, + { + "epoch": 29.99921445404556, + "learning_rate": 0.00030164739080768704, + "loss": 2.8113405227661135, + "step": 14310 + }, + { + "epoch": 30.020947892118354, + "learning_rate": 0.0003015420486040879, + "loss": 2.893621826171875, + "step": 14320 + }, + { + "epoch": 30.04189578423671, + "learning_rate": 0.00030143681668721935, + "loss": 2.8216567993164063, + "step": 14330 + }, + { + "epoch": 30.062843676355065, + "learning_rate": 0.00030133169486477694, + "loss": 2.8155281066894533, + "step": 14340 + }, + { + "epoch": 30.083791568473423, + "learning_rate": 0.0003012266829449249, + "loss": 2.8872468948364256, + "step": 14350 + }, + { + "epoch": 30.104739460591777, + "learning_rate": 0.00030112178073629544, + "loss": 2.820456886291504, + "step": 14360 + }, + { + "epoch": 30.125687352710134, + "learning_rate": 0.0003010169880479867, + "loss": 2.869482231140137, + "step": 14370 + }, + { + "epoch": 30.146635244828488, + "learning_rate": 0.0003009123046895618, + "loss": 2.8011972427368166, + "step": 14380 + }, + { + "epoch": 30.167583136946845, + "learning_rate": 0.00030080773047104687, + "loss": 2.8537342071533205, + "step": 14390 + }, + { + "epoch": 30.1885310290652, + "learning_rate": 0.0003007032652029301, + "loss": 2.808944892883301, + "step": 14400 + }, + { + "epoch": 30.209478921183557, + "learning_rate": 0.00030059890869615983, + "loss": 2.833651542663574, + "step": 14410 + }, + { + "epoch": 30.23042681330191, + "learning_rate": 0.0003004946607621435, + "loss": 2.860894203186035, + "step": 14420 + }, + { + "epoch": 30.251374705420268, + "learning_rate": 0.0003003905212127461, + "loss": 2.835972213745117, + "step": 14430 + }, + { + "epoch": 30.272322597538622, + "learning_rate": 0.00030028648986028843, + "loss": 2.857589912414551, + "step": 14440 + }, + { + "epoch": 30.29327048965698, + "learning_rate": 0.00030018256651754633, + "loss": 2.845281219482422, + "step": 14450 + }, + { + "epoch": 30.314218381775333, + "learning_rate": 0.00030007875099774864, + "loss": 2.7922155380249025, + "step": 14460 + }, + { + "epoch": 30.33516627389369, + "learning_rate": 0.0002999750431145761, + "loss": 2.846644973754883, + "step": 14470 + }, + { + "epoch": 30.356114166012045, + "learning_rate": 0.0002998714426821599, + "loss": 2.83693904876709, + "step": 14480 + }, + { + "epoch": 30.377062058130402, + "learning_rate": 0.00029976794951508027, + "loss": 2.8328250885009765, + "step": 14490 + }, + { + "epoch": 30.398009950248756, + "learning_rate": 0.00029966456342836505, + "loss": 2.8287097930908205, + "step": 14500 + }, + { + "epoch": 30.41895784236711, + "learning_rate": 0.0002995612842374884, + "loss": 2.818513298034668, + "step": 14510 + }, + { + "epoch": 30.439905734485468, + "learning_rate": 0.0002994581117583693, + "loss": 2.804762077331543, + "step": 14520 + }, + { + "epoch": 30.46085362660382, + "learning_rate": 0.00029935504580737006, + "loss": 2.8560808181762694, + "step": 14530 + }, + { + "epoch": 30.48180151872218, + "learning_rate": 0.00029925208620129546, + "loss": 2.7961631774902345, + "step": 14540 + }, + { + "epoch": 30.502749410840533, + "learning_rate": 0.0002991492327573909, + "loss": 2.8281347274780275, + "step": 14550 + }, + { + "epoch": 30.52369730295889, + "learning_rate": 0.0002990464852933409, + "loss": 2.813071060180664, + "step": 14560 + }, + { + "epoch": 30.544645195077244, + "learning_rate": 0.0002989438436272684, + "loss": 2.765872001647949, + "step": 14570 + }, + { + "epoch": 30.5655930871956, + "learning_rate": 0.00029884130757773275, + "loss": 2.7835336685180665, + "step": 14580 + }, + { + "epoch": 30.586540979313956, + "learning_rate": 0.0002987388769637288, + "loss": 2.855548286437988, + "step": 14590 + }, + { + "epoch": 30.607488871432313, + "learning_rate": 0.00029863655160468534, + "loss": 2.804723358154297, + "step": 14600 + }, + { + "epoch": 30.628436763550667, + "learning_rate": 0.0002985343313204637, + "loss": 2.8737287521362305, + "step": 14610 + }, + { + "epoch": 30.649384655669024, + "learning_rate": 0.0002984322159313568, + "loss": 2.871350860595703, + "step": 14620 + }, + { + "epoch": 30.67033254778738, + "learning_rate": 0.00029833020525808714, + "loss": 2.780613327026367, + "step": 14630 + }, + { + "epoch": 30.691280439905736, + "learning_rate": 0.00029822829912180636, + "loss": 2.8216100692749024, + "step": 14640 + }, + { + "epoch": 30.71222833202409, + "learning_rate": 0.0002981264973440931, + "loss": 2.789328956604004, + "step": 14650 + }, + { + "epoch": 30.733176224142447, + "learning_rate": 0.00029802479974695223, + "loss": 2.7879051208496093, + "step": 14660 + }, + { + "epoch": 30.7541241162608, + "learning_rate": 0.00029792320615281337, + "loss": 2.762567710876465, + "step": 14670 + }, + { + "epoch": 30.77507200837916, + "learning_rate": 0.00029782171638452937, + "loss": 2.8410247802734374, + "step": 14680 + }, + { + "epoch": 30.796019900497512, + "learning_rate": 0.0002977203302653755, + "loss": 2.7910200119018556, + "step": 14690 + }, + { + "epoch": 30.81696779261587, + "learning_rate": 0.0002976190476190476, + "loss": 2.8463191986083984, + "step": 14700 + }, + { + "epoch": 30.837915684734224, + "learning_rate": 0.0002975178682696613, + "loss": 2.816401481628418, + "step": 14710 + }, + { + "epoch": 30.858863576852578, + "learning_rate": 0.0002974167920417504, + "loss": 2.850655746459961, + "step": 14720 + }, + { + "epoch": 30.879811468970935, + "learning_rate": 0.00029731581876026557, + "loss": 2.844277191162109, + "step": 14730 + }, + { + "epoch": 30.90075936108929, + "learning_rate": 0.00029721494825057357, + "loss": 2.8203685760498045, + "step": 14740 + }, + { + "epoch": 30.921707253207646, + "learning_rate": 0.00029711418033845523, + "loss": 2.848883628845215, + "step": 14750 + }, + { + "epoch": 30.942655145326, + "learning_rate": 0.0002970135148501047, + "loss": 2.7703632354736327, + "step": 14760 + }, + { + "epoch": 30.963603037444358, + "learning_rate": 0.00029691295161212816, + "loss": 2.8733938217163084, + "step": 14770 + }, + { + "epoch": 30.98455092956271, + "learning_rate": 0.0002968124904515423, + "loss": 2.8676376342773438, + "step": 14780 + }, + { + "epoch": 31.006284367635505, + "learning_rate": 0.00029671213119577346, + "loss": 2.8960426330566404, + "step": 14790 + }, + { + "epoch": 31.027232259753863, + "learning_rate": 0.00029661187367265593, + "loss": 2.8205034255981447, + "step": 14800 + }, + { + "epoch": 31.048180151872216, + "learning_rate": 0.0002965117177104311, + "loss": 2.8493398666381835, + "step": 14810 + }, + { + "epoch": 31.069128043990574, + "learning_rate": 0.0002964116631377459, + "loss": 2.808573913574219, + "step": 14820 + }, + { + "epoch": 31.090075936108928, + "learning_rate": 0.000296311709783652, + "loss": 2.770844078063965, + "step": 14830 + }, + { + "epoch": 31.111023828227285, + "learning_rate": 0.00029621185747760406, + "loss": 2.7819324493408204, + "step": 14840 + }, + { + "epoch": 31.13197172034564, + "learning_rate": 0.0002961121060494589, + "loss": 2.7976245880126953, + "step": 14850 + }, + { + "epoch": 31.152919612463997, + "learning_rate": 0.00029601245532947417, + "loss": 2.8540115356445312, + "step": 14860 + }, + { + "epoch": 31.17386750458235, + "learning_rate": 0.0002959129051483069, + "loss": 2.7655929565429687, + "step": 14870 + }, + { + "epoch": 31.194815396700708, + "learning_rate": 0.00029581345533701285, + "loss": 2.847081184387207, + "step": 14880 + }, + { + "epoch": 31.215763288819062, + "learning_rate": 0.0002957141057270448, + "loss": 2.82701416015625, + "step": 14890 + }, + { + "epoch": 31.23671118093742, + "learning_rate": 0.0002956148561502513, + "loss": 2.8076833724975585, + "step": 14900 + }, + { + "epoch": 31.257659073055773, + "learning_rate": 0.00029551570643887603, + "loss": 2.7729957580566404, + "step": 14910 + }, + { + "epoch": 31.27860696517413, + "learning_rate": 0.00029541665642555606, + "loss": 2.8175632476806642, + "step": 14920 + }, + { + "epoch": 31.299554857292485, + "learning_rate": 0.00029531770594332096, + "loss": 2.781933403015137, + "step": 14930 + }, + { + "epoch": 31.320502749410842, + "learning_rate": 0.0002952188548255915, + "loss": 2.826693534851074, + "step": 14940 + }, + { + "epoch": 31.341450641529196, + "learning_rate": 0.00029512010290617854, + "loss": 2.7952367782592775, + "step": 14950 + }, + { + "epoch": 31.36239853364755, + "learning_rate": 0.0002950214500192816, + "loss": 2.7863574981689454, + "step": 14960 + }, + { + "epoch": 31.383346425765907, + "learning_rate": 0.00029492289599948834, + "loss": 2.8061588287353514, + "step": 14970 + }, + { + "epoch": 31.40429431788426, + "learning_rate": 0.0002948244406817725, + "loss": 2.8176244735717773, + "step": 14980 + }, + { + "epoch": 31.42524221000262, + "learning_rate": 0.00029472608390149343, + "loss": 2.8314136505126952, + "step": 14990 + }, + { + "epoch": 31.446190102120973, + "learning_rate": 0.00029462782549439473, + "loss": 2.802597999572754, + "step": 15000 + } + ], + "max_steps": 15000, + "num_train_epochs": 32, + "total_flos": 4124974225514526720, + "trial_name": null, + "trial_params": null +}