azherali commited on
Commit
f6241b7
·
verified ·
1 Parent(s): 3fcecd5

Training in progress, step 52000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a65415a9d174aeb6628f2a1f3312063a7b378d9e9b6140f0c42d8a550caf91d7
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e01ff6aadb44db27b60f5a2939c1f91ceef8ce4c8eda33e8448193e02f7dedf
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd517c20ac98c90d8ce6f5887bb515507de628ca9fd8fe91aa578d3677906648
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b312a19a4c3a50bbc6d51dc9137976fee7cefb77fe462694cf14c53d8b7b3ed9
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba01fbde1e06f07c52269f0f4ecd17f79378b8843a5ca873c42d0450dd248933
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01dad4027d20eca2d7fa4b583f03a1d3875b3ab481ed98527232c092bb93df17
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2506b019d401bb5719d882cc120b44ae3c311583a2faec565dae037f51160d5
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a000201d58220548b692d7c263c2ef536a136348b8e258b7e7e4280e42ea9770
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0c8e4e92a6a4d6dc0cfaa1d114795a3cfc8bb22eeb20851eec07e893ae0e183
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c18215b26f935a6486c705cff1ccfa7de15b6db51bcfbab399fb0323c2730116
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 44000,
3
- "best_metric": 0.9900904784547742,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-44000",
5
- "epoch": 1.536,
6
  "eval_steps": 4000,
7
- "global_step": 48000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3512,6 +3512,298 @@
3512
  "eval_samples_per_second": 128.979,
3513
  "eval_steps_per_second": 8.061,
3514
  "step": 48000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3515
  }
3516
  ],
3517
  "logging_steps": 100,
@@ -3526,7 +3818,7 @@
3526
  "early_stopping_threshold": 0.0
3527
  },
3528
  "attributes": {
3529
- "early_stopping_patience_counter": 1
3530
  }
3531
  },
3532
  "TrainerControl": {
@@ -3540,7 +3832,7 @@
3540
  "attributes": {}
3541
  }
3542
  },
3543
- "total_flos": 2.0383461668418547e+17,
3544
  "train_batch_size": 16,
3545
  "trial_name": null,
3546
  "trial_params": null
 
1
  {
2
+ "best_global_step": 52000,
3
+ "best_metric": 0.9908199660129274,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-52000",
5
+ "epoch": 1.6640000000000001,
6
  "eval_steps": 4000,
7
+ "global_step": 52000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3512
  "eval_samples_per_second": 128.979,
3513
  "eval_steps_per_second": 8.061,
3514
  "step": 48000
3515
+ },
3516
+ {
3517
+ "epoch": 1.5392000000000001,
3518
+ "grad_norm": 6.397082805633545,
3519
+ "learning_rate": 1.388776886035313e-05,
3520
+ "loss": 0.035,
3521
+ "step": 48100
3522
+ },
3523
+ {
3524
+ "epoch": 1.5424,
3525
+ "grad_norm": 0.11342862993478775,
3526
+ "learning_rate": 1.3874927768860353e-05,
3527
+ "loss": 0.0541,
3528
+ "step": 48200
3529
+ },
3530
+ {
3531
+ "epoch": 1.5455999999999999,
3532
+ "grad_norm": 0.02208523452281952,
3533
+ "learning_rate": 1.3862086677367576e-05,
3534
+ "loss": 0.0506,
3535
+ "step": 48300
3536
+ },
3537
+ {
3538
+ "epoch": 1.5488,
3539
+ "grad_norm": 0.15493257343769073,
3540
+ "learning_rate": 1.38492455858748e-05,
3541
+ "loss": 0.0283,
3542
+ "step": 48400
3543
+ },
3544
+ {
3545
+ "epoch": 1.552,
3546
+ "grad_norm": 0.004556428641080856,
3547
+ "learning_rate": 1.3836404494382023e-05,
3548
+ "loss": 0.0583,
3549
+ "step": 48500
3550
+ },
3551
+ {
3552
+ "epoch": 1.5552000000000001,
3553
+ "grad_norm": 0.10255400836467743,
3554
+ "learning_rate": 1.3823563402889246e-05,
3555
+ "loss": 0.0379,
3556
+ "step": 48600
3557
+ },
3558
+ {
3559
+ "epoch": 1.5584,
3560
+ "grad_norm": 0.04857850447297096,
3561
+ "learning_rate": 1.3810722311396469e-05,
3562
+ "loss": 0.0362,
3563
+ "step": 48700
3564
+ },
3565
+ {
3566
+ "epoch": 1.5615999999999999,
3567
+ "grad_norm": 0.07100783288478851,
3568
+ "learning_rate": 1.3797881219903692e-05,
3569
+ "loss": 0.0691,
3570
+ "step": 48800
3571
+ },
3572
+ {
3573
+ "epoch": 1.5648,
3574
+ "grad_norm": 10.417929649353027,
3575
+ "learning_rate": 1.3785040128410915e-05,
3576
+ "loss": 0.0555,
3577
+ "step": 48900
3578
+ },
3579
+ {
3580
+ "epoch": 1.568,
3581
+ "grad_norm": 0.06572629511356354,
3582
+ "learning_rate": 1.3772199036918138e-05,
3583
+ "loss": 0.0373,
3584
+ "step": 49000
3585
+ },
3586
+ {
3587
+ "epoch": 1.5712000000000002,
3588
+ "grad_norm": 0.47768735885620117,
3589
+ "learning_rate": 1.3759357945425361e-05,
3590
+ "loss": 0.0483,
3591
+ "step": 49100
3592
+ },
3593
+ {
3594
+ "epoch": 1.5744,
3595
+ "grad_norm": 0.07465988397598267,
3596
+ "learning_rate": 1.3746516853932586e-05,
3597
+ "loss": 0.0327,
3598
+ "step": 49200
3599
+ },
3600
+ {
3601
+ "epoch": 1.5776,
3602
+ "grad_norm": 0.018970176577568054,
3603
+ "learning_rate": 1.373367576243981e-05,
3604
+ "loss": 0.0493,
3605
+ "step": 49300
3606
+ },
3607
+ {
3608
+ "epoch": 1.5808,
3609
+ "grad_norm": 0.0272968802601099,
3610
+ "learning_rate": 1.3720834670947033e-05,
3611
+ "loss": 0.0418,
3612
+ "step": 49400
3613
+ },
3614
+ {
3615
+ "epoch": 1.584,
3616
+ "grad_norm": 0.010071586817502975,
3617
+ "learning_rate": 1.3707993579454256e-05,
3618
+ "loss": 0.0416,
3619
+ "step": 49500
3620
+ },
3621
+ {
3622
+ "epoch": 1.5872000000000002,
3623
+ "grad_norm": 0.13110236823558807,
3624
+ "learning_rate": 1.3695152487961479e-05,
3625
+ "loss": 0.0353,
3626
+ "step": 49600
3627
+ },
3628
+ {
3629
+ "epoch": 1.5904,
3630
+ "grad_norm": 0.9969918727874756,
3631
+ "learning_rate": 1.3682311396468702e-05,
3632
+ "loss": 0.046,
3633
+ "step": 49700
3634
+ },
3635
+ {
3636
+ "epoch": 1.5936,
3637
+ "grad_norm": 0.010477591305971146,
3638
+ "learning_rate": 1.3669470304975925e-05,
3639
+ "loss": 0.0351,
3640
+ "step": 49800
3641
+ },
3642
+ {
3643
+ "epoch": 1.5968,
3644
+ "grad_norm": 14.14805793762207,
3645
+ "learning_rate": 1.3656629213483148e-05,
3646
+ "loss": 0.0376,
3647
+ "step": 49900
3648
+ },
3649
+ {
3650
+ "epoch": 1.6,
3651
+ "grad_norm": 0.004804316442459822,
3652
+ "learning_rate": 1.3643788121990371e-05,
3653
+ "loss": 0.0381,
3654
+ "step": 50000
3655
+ },
3656
+ {
3657
+ "epoch": 1.6032,
3658
+ "grad_norm": 0.9756079912185669,
3659
+ "learning_rate": 1.3630947030497595e-05,
3660
+ "loss": 0.0495,
3661
+ "step": 50100
3662
+ },
3663
+ {
3664
+ "epoch": 1.6064,
3665
+ "grad_norm": 0.005941998213529587,
3666
+ "learning_rate": 1.3618105939004818e-05,
3667
+ "loss": 0.0404,
3668
+ "step": 50200
3669
+ },
3670
+ {
3671
+ "epoch": 1.6096,
3672
+ "grad_norm": 0.017781252041459084,
3673
+ "learning_rate": 1.3605264847512039e-05,
3674
+ "loss": 0.0467,
3675
+ "step": 50300
3676
+ },
3677
+ {
3678
+ "epoch": 1.6128,
3679
+ "grad_norm": 0.11217786371707916,
3680
+ "learning_rate": 1.3592423756019262e-05,
3681
+ "loss": 0.0348,
3682
+ "step": 50400
3683
+ },
3684
+ {
3685
+ "epoch": 1.616,
3686
+ "grad_norm": 1.8749943971633911,
3687
+ "learning_rate": 1.3579582664526485e-05,
3688
+ "loss": 0.0323,
3689
+ "step": 50500
3690
+ },
3691
+ {
3692
+ "epoch": 1.6192,
3693
+ "grad_norm": 0.07430779188871384,
3694
+ "learning_rate": 1.3566741573033709e-05,
3695
+ "loss": 0.0481,
3696
+ "step": 50600
3697
+ },
3698
+ {
3699
+ "epoch": 1.6223999999999998,
3700
+ "grad_norm": 0.22179456055164337,
3701
+ "learning_rate": 1.3553900481540932e-05,
3702
+ "loss": 0.0424,
3703
+ "step": 50700
3704
+ },
3705
+ {
3706
+ "epoch": 1.6256,
3707
+ "grad_norm": 0.020223159343004227,
3708
+ "learning_rate": 1.3541059390048155e-05,
3709
+ "loss": 0.0398,
3710
+ "step": 50800
3711
+ },
3712
+ {
3713
+ "epoch": 1.6288,
3714
+ "grad_norm": 0.021107584238052368,
3715
+ "learning_rate": 1.3528218298555378e-05,
3716
+ "loss": 0.0326,
3717
+ "step": 50900
3718
+ },
3719
+ {
3720
+ "epoch": 1.6320000000000001,
3721
+ "grad_norm": 0.047376956790685654,
3722
+ "learning_rate": 1.3515377207062601e-05,
3723
+ "loss": 0.0544,
3724
+ "step": 51000
3725
+ },
3726
+ {
3727
+ "epoch": 1.6352,
3728
+ "grad_norm": 17.578815460205078,
3729
+ "learning_rate": 1.3502536115569824e-05,
3730
+ "loss": 0.0629,
3731
+ "step": 51100
3732
+ },
3733
+ {
3734
+ "epoch": 1.6383999999999999,
3735
+ "grad_norm": 18.477867126464844,
3736
+ "learning_rate": 1.3489695024077047e-05,
3737
+ "loss": 0.039,
3738
+ "step": 51200
3739
+ },
3740
+ {
3741
+ "epoch": 1.6416,
3742
+ "grad_norm": 1.2129385471343994,
3743
+ "learning_rate": 1.347685393258427e-05,
3744
+ "loss": 0.0442,
3745
+ "step": 51300
3746
+ },
3747
+ {
3748
+ "epoch": 1.6448,
3749
+ "grad_norm": 0.21100889146327972,
3750
+ "learning_rate": 1.3464012841091494e-05,
3751
+ "loss": 0.0588,
3752
+ "step": 51400
3753
+ },
3754
+ {
3755
+ "epoch": 1.6480000000000001,
3756
+ "grad_norm": 0.01275007613003254,
3757
+ "learning_rate": 1.3451171749598717e-05,
3758
+ "loss": 0.0467,
3759
+ "step": 51500
3760
+ },
3761
+ {
3762
+ "epoch": 1.6512,
3763
+ "grad_norm": 8.823915481567383,
3764
+ "learning_rate": 1.343833065810594e-05,
3765
+ "loss": 0.0379,
3766
+ "step": 51600
3767
+ },
3768
+ {
3769
+ "epoch": 1.6543999999999999,
3770
+ "grad_norm": 4.7289252281188965,
3771
+ "learning_rate": 1.3425489566613163e-05,
3772
+ "loss": 0.041,
3773
+ "step": 51700
3774
+ },
3775
+ {
3776
+ "epoch": 1.6576,
3777
+ "grad_norm": 0.026279212906956673,
3778
+ "learning_rate": 1.3412648475120386e-05,
3779
+ "loss": 0.0551,
3780
+ "step": 51800
3781
+ },
3782
+ {
3783
+ "epoch": 1.6608,
3784
+ "grad_norm": 9.168363571166992,
3785
+ "learning_rate": 1.339980738362761e-05,
3786
+ "loss": 0.0357,
3787
+ "step": 51900
3788
+ },
3789
+ {
3790
+ "epoch": 1.6640000000000001,
3791
+ "grad_norm": 0.06355811655521393,
3792
+ "learning_rate": 1.3386966292134832e-05,
3793
+ "loss": 0.0416,
3794
+ "step": 52000
3795
+ },
3796
+ {
3797
+ "epoch": 1.6640000000000001,
3798
+ "eval_accuracy": 0.99082,
3799
+ "eval_f1": 0.9908199660129274,
3800
+ "eval_loss": 0.035849522799253464,
3801
+ "eval_precision": 0.9908199447350458,
3802
+ "eval_recall": 0.99082,
3803
+ "eval_runtime": 774.0001,
3804
+ "eval_samples_per_second": 129.199,
3805
+ "eval_steps_per_second": 8.075,
3806
+ "step": 52000
3807
  }
3808
  ],
3809
  "logging_steps": 100,
 
3818
  "early_stopping_threshold": 0.0
3819
  },
3820
  "attributes": {
3821
+ "early_stopping_patience_counter": 0
3822
  }
3823
  },
3824
  "TrainerControl": {
 
3832
  "attributes": {}
3833
  }
3834
  },
3835
+ "total_flos": 2.208251621920823e+17,
3836
  "train_batch_size": 16,
3837
  "trial_name": null,
3838
  "trial_params": null