Training in progress, step 52000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3555504
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e01ff6aadb44db27b60f5a2939c1f91ceef8ce4c8eda33e8448193e02f7dedf
|
| 3 |
size 3555504
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7141515
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b312a19a4c3a50bbc6d51dc9137976fee7cefb77fe462694cf14c53d8b7b3ed9
|
| 3 |
size 7141515
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01dad4027d20eca2d7fa4b583f03a1d3875b3ab481ed98527232c092bb93df17
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a000201d58220548b692d7c263c2ef536a136348b8e258b7e7e4280e42ea9770
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c18215b26f935a6486c705cff1ccfa7de15b6db51bcfbab399fb0323c2730116
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 4000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3512,6 +3512,298 @@
|
|
| 3512 |
"eval_samples_per_second": 128.979,
|
| 3513 |
"eval_steps_per_second": 8.061,
|
| 3514 |
"step": 48000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3515 |
}
|
| 3516 |
],
|
| 3517 |
"logging_steps": 100,
|
|
@@ -3526,7 +3818,7 @@
|
|
| 3526 |
"early_stopping_threshold": 0.0
|
| 3527 |
},
|
| 3528 |
"attributes": {
|
| 3529 |
-
"early_stopping_patience_counter":
|
| 3530 |
}
|
| 3531 |
},
|
| 3532 |
"TrainerControl": {
|
|
@@ -3540,7 +3832,7 @@
|
|
| 3540 |
"attributes": {}
|
| 3541 |
}
|
| 3542 |
},
|
| 3543 |
-
"total_flos": 2.
|
| 3544 |
"train_batch_size": 16,
|
| 3545 |
"trial_name": null,
|
| 3546 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 52000,
|
| 3 |
+
"best_metric": 0.9908199660129274,
|
| 4 |
+
"best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-52000",
|
| 5 |
+
"epoch": 1.6640000000000001,
|
| 6 |
"eval_steps": 4000,
|
| 7 |
+
"global_step": 52000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3512 |
"eval_samples_per_second": 128.979,
|
| 3513 |
"eval_steps_per_second": 8.061,
|
| 3514 |
"step": 48000
|
| 3515 |
+
},
|
| 3516 |
+
{
|
| 3517 |
+
"epoch": 1.5392000000000001,
|
| 3518 |
+
"grad_norm": 6.397082805633545,
|
| 3519 |
+
"learning_rate": 1.388776886035313e-05,
|
| 3520 |
+
"loss": 0.035,
|
| 3521 |
+
"step": 48100
|
| 3522 |
+
},
|
| 3523 |
+
{
|
| 3524 |
+
"epoch": 1.5424,
|
| 3525 |
+
"grad_norm": 0.11342862993478775,
|
| 3526 |
+
"learning_rate": 1.3874927768860353e-05,
|
| 3527 |
+
"loss": 0.0541,
|
| 3528 |
+
"step": 48200
|
| 3529 |
+
},
|
| 3530 |
+
{
|
| 3531 |
+
"epoch": 1.5455999999999999,
|
| 3532 |
+
"grad_norm": 0.02208523452281952,
|
| 3533 |
+
"learning_rate": 1.3862086677367576e-05,
|
| 3534 |
+
"loss": 0.0506,
|
| 3535 |
+
"step": 48300
|
| 3536 |
+
},
|
| 3537 |
+
{
|
| 3538 |
+
"epoch": 1.5488,
|
| 3539 |
+
"grad_norm": 0.15493257343769073,
|
| 3540 |
+
"learning_rate": 1.38492455858748e-05,
|
| 3541 |
+
"loss": 0.0283,
|
| 3542 |
+
"step": 48400
|
| 3543 |
+
},
|
| 3544 |
+
{
|
| 3545 |
+
"epoch": 1.552,
|
| 3546 |
+
"grad_norm": 0.004556428641080856,
|
| 3547 |
+
"learning_rate": 1.3836404494382023e-05,
|
| 3548 |
+
"loss": 0.0583,
|
| 3549 |
+
"step": 48500
|
| 3550 |
+
},
|
| 3551 |
+
{
|
| 3552 |
+
"epoch": 1.5552000000000001,
|
| 3553 |
+
"grad_norm": 0.10255400836467743,
|
| 3554 |
+
"learning_rate": 1.3823563402889246e-05,
|
| 3555 |
+
"loss": 0.0379,
|
| 3556 |
+
"step": 48600
|
| 3557 |
+
},
|
| 3558 |
+
{
|
| 3559 |
+
"epoch": 1.5584,
|
| 3560 |
+
"grad_norm": 0.04857850447297096,
|
| 3561 |
+
"learning_rate": 1.3810722311396469e-05,
|
| 3562 |
+
"loss": 0.0362,
|
| 3563 |
+
"step": 48700
|
| 3564 |
+
},
|
| 3565 |
+
{
|
| 3566 |
+
"epoch": 1.5615999999999999,
|
| 3567 |
+
"grad_norm": 0.07100783288478851,
|
| 3568 |
+
"learning_rate": 1.3797881219903692e-05,
|
| 3569 |
+
"loss": 0.0691,
|
| 3570 |
+
"step": 48800
|
| 3571 |
+
},
|
| 3572 |
+
{
|
| 3573 |
+
"epoch": 1.5648,
|
| 3574 |
+
"grad_norm": 10.417929649353027,
|
| 3575 |
+
"learning_rate": 1.3785040128410915e-05,
|
| 3576 |
+
"loss": 0.0555,
|
| 3577 |
+
"step": 48900
|
| 3578 |
+
},
|
| 3579 |
+
{
|
| 3580 |
+
"epoch": 1.568,
|
| 3581 |
+
"grad_norm": 0.06572629511356354,
|
| 3582 |
+
"learning_rate": 1.3772199036918138e-05,
|
| 3583 |
+
"loss": 0.0373,
|
| 3584 |
+
"step": 49000
|
| 3585 |
+
},
|
| 3586 |
+
{
|
| 3587 |
+
"epoch": 1.5712000000000002,
|
| 3588 |
+
"grad_norm": 0.47768735885620117,
|
| 3589 |
+
"learning_rate": 1.3759357945425361e-05,
|
| 3590 |
+
"loss": 0.0483,
|
| 3591 |
+
"step": 49100
|
| 3592 |
+
},
|
| 3593 |
+
{
|
| 3594 |
+
"epoch": 1.5744,
|
| 3595 |
+
"grad_norm": 0.07465988397598267,
|
| 3596 |
+
"learning_rate": 1.3746516853932586e-05,
|
| 3597 |
+
"loss": 0.0327,
|
| 3598 |
+
"step": 49200
|
| 3599 |
+
},
|
| 3600 |
+
{
|
| 3601 |
+
"epoch": 1.5776,
|
| 3602 |
+
"grad_norm": 0.018970176577568054,
|
| 3603 |
+
"learning_rate": 1.373367576243981e-05,
|
| 3604 |
+
"loss": 0.0493,
|
| 3605 |
+
"step": 49300
|
| 3606 |
+
},
|
| 3607 |
+
{
|
| 3608 |
+
"epoch": 1.5808,
|
| 3609 |
+
"grad_norm": 0.0272968802601099,
|
| 3610 |
+
"learning_rate": 1.3720834670947033e-05,
|
| 3611 |
+
"loss": 0.0418,
|
| 3612 |
+
"step": 49400
|
| 3613 |
+
},
|
| 3614 |
+
{
|
| 3615 |
+
"epoch": 1.584,
|
| 3616 |
+
"grad_norm": 0.010071586817502975,
|
| 3617 |
+
"learning_rate": 1.3707993579454256e-05,
|
| 3618 |
+
"loss": 0.0416,
|
| 3619 |
+
"step": 49500
|
| 3620 |
+
},
|
| 3621 |
+
{
|
| 3622 |
+
"epoch": 1.5872000000000002,
|
| 3623 |
+
"grad_norm": 0.13110236823558807,
|
| 3624 |
+
"learning_rate": 1.3695152487961479e-05,
|
| 3625 |
+
"loss": 0.0353,
|
| 3626 |
+
"step": 49600
|
| 3627 |
+
},
|
| 3628 |
+
{
|
| 3629 |
+
"epoch": 1.5904,
|
| 3630 |
+
"grad_norm": 0.9969918727874756,
|
| 3631 |
+
"learning_rate": 1.3682311396468702e-05,
|
| 3632 |
+
"loss": 0.046,
|
| 3633 |
+
"step": 49700
|
| 3634 |
+
},
|
| 3635 |
+
{
|
| 3636 |
+
"epoch": 1.5936,
|
| 3637 |
+
"grad_norm": 0.010477591305971146,
|
| 3638 |
+
"learning_rate": 1.3669470304975925e-05,
|
| 3639 |
+
"loss": 0.0351,
|
| 3640 |
+
"step": 49800
|
| 3641 |
+
},
|
| 3642 |
+
{
|
| 3643 |
+
"epoch": 1.5968,
|
| 3644 |
+
"grad_norm": 14.14805793762207,
|
| 3645 |
+
"learning_rate": 1.3656629213483148e-05,
|
| 3646 |
+
"loss": 0.0376,
|
| 3647 |
+
"step": 49900
|
| 3648 |
+
},
|
| 3649 |
+
{
|
| 3650 |
+
"epoch": 1.6,
|
| 3651 |
+
"grad_norm": 0.004804316442459822,
|
| 3652 |
+
"learning_rate": 1.3643788121990371e-05,
|
| 3653 |
+
"loss": 0.0381,
|
| 3654 |
+
"step": 50000
|
| 3655 |
+
},
|
| 3656 |
+
{
|
| 3657 |
+
"epoch": 1.6032,
|
| 3658 |
+
"grad_norm": 0.9756079912185669,
|
| 3659 |
+
"learning_rate": 1.3630947030497595e-05,
|
| 3660 |
+
"loss": 0.0495,
|
| 3661 |
+
"step": 50100
|
| 3662 |
+
},
|
| 3663 |
+
{
|
| 3664 |
+
"epoch": 1.6064,
|
| 3665 |
+
"grad_norm": 0.005941998213529587,
|
| 3666 |
+
"learning_rate": 1.3618105939004818e-05,
|
| 3667 |
+
"loss": 0.0404,
|
| 3668 |
+
"step": 50200
|
| 3669 |
+
},
|
| 3670 |
+
{
|
| 3671 |
+
"epoch": 1.6096,
|
| 3672 |
+
"grad_norm": 0.017781252041459084,
|
| 3673 |
+
"learning_rate": 1.3605264847512039e-05,
|
| 3674 |
+
"loss": 0.0467,
|
| 3675 |
+
"step": 50300
|
| 3676 |
+
},
|
| 3677 |
+
{
|
| 3678 |
+
"epoch": 1.6128,
|
| 3679 |
+
"grad_norm": 0.11217786371707916,
|
| 3680 |
+
"learning_rate": 1.3592423756019262e-05,
|
| 3681 |
+
"loss": 0.0348,
|
| 3682 |
+
"step": 50400
|
| 3683 |
+
},
|
| 3684 |
+
{
|
| 3685 |
+
"epoch": 1.616,
|
| 3686 |
+
"grad_norm": 1.8749943971633911,
|
| 3687 |
+
"learning_rate": 1.3579582664526485e-05,
|
| 3688 |
+
"loss": 0.0323,
|
| 3689 |
+
"step": 50500
|
| 3690 |
+
},
|
| 3691 |
+
{
|
| 3692 |
+
"epoch": 1.6192,
|
| 3693 |
+
"grad_norm": 0.07430779188871384,
|
| 3694 |
+
"learning_rate": 1.3566741573033709e-05,
|
| 3695 |
+
"loss": 0.0481,
|
| 3696 |
+
"step": 50600
|
| 3697 |
+
},
|
| 3698 |
+
{
|
| 3699 |
+
"epoch": 1.6223999999999998,
|
| 3700 |
+
"grad_norm": 0.22179456055164337,
|
| 3701 |
+
"learning_rate": 1.3553900481540932e-05,
|
| 3702 |
+
"loss": 0.0424,
|
| 3703 |
+
"step": 50700
|
| 3704 |
+
},
|
| 3705 |
+
{
|
| 3706 |
+
"epoch": 1.6256,
|
| 3707 |
+
"grad_norm": 0.020223159343004227,
|
| 3708 |
+
"learning_rate": 1.3541059390048155e-05,
|
| 3709 |
+
"loss": 0.0398,
|
| 3710 |
+
"step": 50800
|
| 3711 |
+
},
|
| 3712 |
+
{
|
| 3713 |
+
"epoch": 1.6288,
|
| 3714 |
+
"grad_norm": 0.021107584238052368,
|
| 3715 |
+
"learning_rate": 1.3528218298555378e-05,
|
| 3716 |
+
"loss": 0.0326,
|
| 3717 |
+
"step": 50900
|
| 3718 |
+
},
|
| 3719 |
+
{
|
| 3720 |
+
"epoch": 1.6320000000000001,
|
| 3721 |
+
"grad_norm": 0.047376956790685654,
|
| 3722 |
+
"learning_rate": 1.3515377207062601e-05,
|
| 3723 |
+
"loss": 0.0544,
|
| 3724 |
+
"step": 51000
|
| 3725 |
+
},
|
| 3726 |
+
{
|
| 3727 |
+
"epoch": 1.6352,
|
| 3728 |
+
"grad_norm": 17.578815460205078,
|
| 3729 |
+
"learning_rate": 1.3502536115569824e-05,
|
| 3730 |
+
"loss": 0.0629,
|
| 3731 |
+
"step": 51100
|
| 3732 |
+
},
|
| 3733 |
+
{
|
| 3734 |
+
"epoch": 1.6383999999999999,
|
| 3735 |
+
"grad_norm": 18.477867126464844,
|
| 3736 |
+
"learning_rate": 1.3489695024077047e-05,
|
| 3737 |
+
"loss": 0.039,
|
| 3738 |
+
"step": 51200
|
| 3739 |
+
},
|
| 3740 |
+
{
|
| 3741 |
+
"epoch": 1.6416,
|
| 3742 |
+
"grad_norm": 1.2129385471343994,
|
| 3743 |
+
"learning_rate": 1.347685393258427e-05,
|
| 3744 |
+
"loss": 0.0442,
|
| 3745 |
+
"step": 51300
|
| 3746 |
+
},
|
| 3747 |
+
{
|
| 3748 |
+
"epoch": 1.6448,
|
| 3749 |
+
"grad_norm": 0.21100889146327972,
|
| 3750 |
+
"learning_rate": 1.3464012841091494e-05,
|
| 3751 |
+
"loss": 0.0588,
|
| 3752 |
+
"step": 51400
|
| 3753 |
+
},
|
| 3754 |
+
{
|
| 3755 |
+
"epoch": 1.6480000000000001,
|
| 3756 |
+
"grad_norm": 0.01275007613003254,
|
| 3757 |
+
"learning_rate": 1.3451171749598717e-05,
|
| 3758 |
+
"loss": 0.0467,
|
| 3759 |
+
"step": 51500
|
| 3760 |
+
},
|
| 3761 |
+
{
|
| 3762 |
+
"epoch": 1.6512,
|
| 3763 |
+
"grad_norm": 8.823915481567383,
|
| 3764 |
+
"learning_rate": 1.343833065810594e-05,
|
| 3765 |
+
"loss": 0.0379,
|
| 3766 |
+
"step": 51600
|
| 3767 |
+
},
|
| 3768 |
+
{
|
| 3769 |
+
"epoch": 1.6543999999999999,
|
| 3770 |
+
"grad_norm": 4.7289252281188965,
|
| 3771 |
+
"learning_rate": 1.3425489566613163e-05,
|
| 3772 |
+
"loss": 0.041,
|
| 3773 |
+
"step": 51700
|
| 3774 |
+
},
|
| 3775 |
+
{
|
| 3776 |
+
"epoch": 1.6576,
|
| 3777 |
+
"grad_norm": 0.026279212906956673,
|
| 3778 |
+
"learning_rate": 1.3412648475120386e-05,
|
| 3779 |
+
"loss": 0.0551,
|
| 3780 |
+
"step": 51800
|
| 3781 |
+
},
|
| 3782 |
+
{
|
| 3783 |
+
"epoch": 1.6608,
|
| 3784 |
+
"grad_norm": 9.168363571166992,
|
| 3785 |
+
"learning_rate": 1.339980738362761e-05,
|
| 3786 |
+
"loss": 0.0357,
|
| 3787 |
+
"step": 51900
|
| 3788 |
+
},
|
| 3789 |
+
{
|
| 3790 |
+
"epoch": 1.6640000000000001,
|
| 3791 |
+
"grad_norm": 0.06355811655521393,
|
| 3792 |
+
"learning_rate": 1.3386966292134832e-05,
|
| 3793 |
+
"loss": 0.0416,
|
| 3794 |
+
"step": 52000
|
| 3795 |
+
},
|
| 3796 |
+
{
|
| 3797 |
+
"epoch": 1.6640000000000001,
|
| 3798 |
+
"eval_accuracy": 0.99082,
|
| 3799 |
+
"eval_f1": 0.9908199660129274,
|
| 3800 |
+
"eval_loss": 0.035849522799253464,
|
| 3801 |
+
"eval_precision": 0.9908199447350458,
|
| 3802 |
+
"eval_recall": 0.99082,
|
| 3803 |
+
"eval_runtime": 774.0001,
|
| 3804 |
+
"eval_samples_per_second": 129.199,
|
| 3805 |
+
"eval_steps_per_second": 8.075,
|
| 3806 |
+
"step": 52000
|
| 3807 |
}
|
| 3808 |
],
|
| 3809 |
"logging_steps": 100,
|
|
|
|
| 3818 |
"early_stopping_threshold": 0.0
|
| 3819 |
},
|
| 3820 |
"attributes": {
|
| 3821 |
+
"early_stopping_patience_counter": 0
|
| 3822 |
}
|
| 3823 |
},
|
| 3824 |
"TrainerControl": {
|
|
|
|
| 3832 |
"attributes": {}
|
| 3833 |
}
|
| 3834 |
},
|
| 3835 |
+
"total_flos": 2.208251621920823e+17,
|
| 3836 |
"train_batch_size": 16,
|
| 3837 |
"trial_name": null,
|
| 3838 |
"trial_params": null
|