azherali commited on
Commit
8a06368
·
verified ·
1 Parent(s): d87e426

Training in progress, step 48000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16dadfb9608dcaa99e56c16537431ef4528e7f7edc4ac58dfea4bb46f7e1c8a9
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a65415a9d174aeb6628f2a1f3312063a7b378d9e9b6140f0c42d8a550caf91d7
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2d226450cdcebcf2615a1d39959652fe5438e10e2d6cd2cb8d2468a792f8b2
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd517c20ac98c90d8ce6f5887bb515507de628ca9fd8fe91aa578d3677906648
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97462624a2fc53c8574a0620aac025280c9bdbbb7138ff03f47f37018b457bf4
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba01fbde1e06f07c52269f0f4ecd17f79378b8843a5ca873c42d0450dd248933
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e9a989616374c693d6e283e9a661c77047898be59d9e06a73f69b65c271f395
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2506b019d401bb5719d882cc120b44ae3c311583a2faec565dae037f51160d5
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7880c38c083e20dc3aacb94693eef3b1547dc3e69aff0279d80323326c2ebc49
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0c8e4e92a6a4d6dc0cfaa1d114795a3cfc8bb22eeb20851eec07e893ae0e183
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 44000,
3
  "best_metric": 0.9900904784547742,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-44000",
5
- "epoch": 1.408,
6
  "eval_steps": 4000,
7
- "global_step": 44000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3220,6 +3220,298 @@
3220
  "eval_samples_per_second": 129.014,
3221
  "eval_steps_per_second": 8.063,
3222
  "step": 44000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3223
  }
3224
  ],
3225
  "logging_steps": 100,
@@ -3234,7 +3526,7 @@
3234
  "early_stopping_threshold": 0.0
3235
  },
3236
  "attributes": {
3237
- "early_stopping_patience_counter": 0
3238
  }
3239
  },
3240
  "TrainerControl": {
@@ -3248,7 +3540,7 @@
3248
  "attributes": {}
3249
  }
3250
  },
3251
- "total_flos": 1.8683917813152307e+17,
3252
  "train_batch_size": 16,
3253
  "trial_name": null,
3254
  "trial_params": null
 
2
  "best_global_step": 44000,
3
  "best_metric": 0.9900904784547742,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-44000",
5
+ "epoch": 1.536,
6
  "eval_steps": 4000,
7
+ "global_step": 48000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3220
  "eval_samples_per_second": 129.014,
3221
  "eval_steps_per_second": 8.063,
3222
  "step": 44000
3223
+ },
3224
+ {
3225
+ "epoch": 1.4112,
3226
+ "grad_norm": 0.6451414823532104,
3227
+ "learning_rate": 1.4401412520064206e-05,
3228
+ "loss": 0.044,
3229
+ "step": 44100
3230
+ },
3231
+ {
3232
+ "epoch": 1.4144,
3233
+ "grad_norm": 0.03619956597685814,
3234
+ "learning_rate": 1.438857142857143e-05,
3235
+ "loss": 0.0386,
3236
+ "step": 44200
3237
+ },
3238
+ {
3239
+ "epoch": 1.4176,
3240
+ "grad_norm": 0.005635638255625963,
3241
+ "learning_rate": 1.4375730337078653e-05,
3242
+ "loss": 0.0371,
3243
+ "step": 44300
3244
+ },
3245
+ {
3246
+ "epoch": 1.4208,
3247
+ "grad_norm": 20.394947052001953,
3248
+ "learning_rate": 1.4362889245585876e-05,
3249
+ "loss": 0.0468,
3250
+ "step": 44400
3251
+ },
3252
+ {
3253
+ "epoch": 1.424,
3254
+ "grad_norm": 0.008449492044746876,
3255
+ "learning_rate": 1.4350048154093099e-05,
3256
+ "loss": 0.0536,
3257
+ "step": 44500
3258
+ },
3259
+ {
3260
+ "epoch": 1.4272,
3261
+ "grad_norm": 0.06385083496570587,
3262
+ "learning_rate": 1.4337207062600322e-05,
3263
+ "loss": 0.0383,
3264
+ "step": 44600
3265
+ },
3266
+ {
3267
+ "epoch": 1.4304000000000001,
3268
+ "grad_norm": 0.016077406704425812,
3269
+ "learning_rate": 1.4324365971107545e-05,
3270
+ "loss": 0.0341,
3271
+ "step": 44700
3272
+ },
3273
+ {
3274
+ "epoch": 1.4336,
3275
+ "grad_norm": 0.012415707111358643,
3276
+ "learning_rate": 1.4311524879614768e-05,
3277
+ "loss": 0.0356,
3278
+ "step": 44800
3279
+ },
3280
+ {
3281
+ "epoch": 1.4368,
3282
+ "grad_norm": 0.03126490116119385,
3283
+ "learning_rate": 1.4298683788121991e-05,
3284
+ "loss": 0.0456,
3285
+ "step": 44900
3286
+ },
3287
+ {
3288
+ "epoch": 1.44,
3289
+ "grad_norm": 5.324023723602295,
3290
+ "learning_rate": 1.4285842696629215e-05,
3291
+ "loss": 0.0653,
3292
+ "step": 45000
3293
+ },
3294
+ {
3295
+ "epoch": 1.4432,
3296
+ "grad_norm": 0.02962222322821617,
3297
+ "learning_rate": 1.4273001605136438e-05,
3298
+ "loss": 0.0664,
3299
+ "step": 45100
3300
+ },
3301
+ {
3302
+ "epoch": 1.4464000000000001,
3303
+ "grad_norm": 0.03100472316145897,
3304
+ "learning_rate": 1.426016051364366e-05,
3305
+ "loss": 0.0494,
3306
+ "step": 45200
3307
+ },
3308
+ {
3309
+ "epoch": 1.4496,
3310
+ "grad_norm": 8.572321891784668,
3311
+ "learning_rate": 1.4247319422150884e-05,
3312
+ "loss": 0.0276,
3313
+ "step": 45300
3314
+ },
3315
+ {
3316
+ "epoch": 1.4527999999999999,
3317
+ "grad_norm": 0.004455466754734516,
3318
+ "learning_rate": 1.4234478330658109e-05,
3319
+ "loss": 0.0292,
3320
+ "step": 45400
3321
+ },
3322
+ {
3323
+ "epoch": 1.456,
3324
+ "grad_norm": 0.03781688213348389,
3325
+ "learning_rate": 1.4221637239165332e-05,
3326
+ "loss": 0.0407,
3327
+ "step": 45500
3328
+ },
3329
+ {
3330
+ "epoch": 1.4592,
3331
+ "grad_norm": 10.108696937561035,
3332
+ "learning_rate": 1.4208796147672552e-05,
3333
+ "loss": 0.0531,
3334
+ "step": 45600
3335
+ },
3336
+ {
3337
+ "epoch": 1.4624,
3338
+ "grad_norm": 0.059839170426130295,
3339
+ "learning_rate": 1.4195955056179775e-05,
3340
+ "loss": 0.0373,
3341
+ "step": 45700
3342
+ },
3343
+ {
3344
+ "epoch": 1.4656,
3345
+ "grad_norm": 0.00919304322451353,
3346
+ "learning_rate": 1.4183113964686998e-05,
3347
+ "loss": 0.0402,
3348
+ "step": 45800
3349
+ },
3350
+ {
3351
+ "epoch": 1.4687999999999999,
3352
+ "grad_norm": 12.830222129821777,
3353
+ "learning_rate": 1.4170272873194221e-05,
3354
+ "loss": 0.0321,
3355
+ "step": 45900
3356
+ },
3357
+ {
3358
+ "epoch": 1.472,
3359
+ "grad_norm": 0.006019544322043657,
3360
+ "learning_rate": 1.4157431781701444e-05,
3361
+ "loss": 0.0327,
3362
+ "step": 46000
3363
+ },
3364
+ {
3365
+ "epoch": 1.4752,
3366
+ "grad_norm": 0.024647433310747147,
3367
+ "learning_rate": 1.4144590690208669e-05,
3368
+ "loss": 0.0386,
3369
+ "step": 46100
3370
+ },
3371
+ {
3372
+ "epoch": 1.4784,
3373
+ "grad_norm": 0.01691538281738758,
3374
+ "learning_rate": 1.4131749598715892e-05,
3375
+ "loss": 0.0523,
3376
+ "step": 46200
3377
+ },
3378
+ {
3379
+ "epoch": 1.4816,
3380
+ "grad_norm": 0.007636231370270252,
3381
+ "learning_rate": 1.4118908507223115e-05,
3382
+ "loss": 0.043,
3383
+ "step": 46300
3384
+ },
3385
+ {
3386
+ "epoch": 1.4848,
3387
+ "grad_norm": 0.04239976033568382,
3388
+ "learning_rate": 1.4106067415730338e-05,
3389
+ "loss": 0.0422,
3390
+ "step": 46400
3391
+ },
3392
+ {
3393
+ "epoch": 1.488,
3394
+ "grad_norm": 6.693536758422852,
3395
+ "learning_rate": 1.4093226324237562e-05,
3396
+ "loss": 0.0412,
3397
+ "step": 46500
3398
+ },
3399
+ {
3400
+ "epoch": 1.4912,
3401
+ "grad_norm": 0.014838839881122112,
3402
+ "learning_rate": 1.4080385232744785e-05,
3403
+ "loss": 0.038,
3404
+ "step": 46600
3405
+ },
3406
+ {
3407
+ "epoch": 1.4944,
3408
+ "grad_norm": 9.719799995422363,
3409
+ "learning_rate": 1.4067544141252008e-05,
3410
+ "loss": 0.056,
3411
+ "step": 46700
3412
+ },
3413
+ {
3414
+ "epoch": 1.4976,
3415
+ "grad_norm": 4.120741367340088,
3416
+ "learning_rate": 1.4054703049759231e-05,
3417
+ "loss": 0.0453,
3418
+ "step": 46800
3419
+ },
3420
+ {
3421
+ "epoch": 1.5008,
3422
+ "grad_norm": 0.019338663667440414,
3423
+ "learning_rate": 1.4041861958266454e-05,
3424
+ "loss": 0.0355,
3425
+ "step": 46900
3426
+ },
3427
+ {
3428
+ "epoch": 1.504,
3429
+ "grad_norm": 0.015643298625946045,
3430
+ "learning_rate": 1.4029020866773677e-05,
3431
+ "loss": 0.0276,
3432
+ "step": 47000
3433
+ },
3434
+ {
3435
+ "epoch": 1.5072,
3436
+ "grad_norm": 11.701508522033691,
3437
+ "learning_rate": 1.40161797752809e-05,
3438
+ "loss": 0.0513,
3439
+ "step": 47100
3440
+ },
3441
+ {
3442
+ "epoch": 1.5104,
3443
+ "grad_norm": 0.00980925839394331,
3444
+ "learning_rate": 1.4003338683788124e-05,
3445
+ "loss": 0.0304,
3446
+ "step": 47200
3447
+ },
3448
+ {
3449
+ "epoch": 1.5135999999999998,
3450
+ "grad_norm": 0.018979301676154137,
3451
+ "learning_rate": 1.3990497592295347e-05,
3452
+ "loss": 0.0443,
3453
+ "step": 47300
3454
+ },
3455
+ {
3456
+ "epoch": 1.5168,
3457
+ "grad_norm": 9.20014762878418,
3458
+ "learning_rate": 1.397765650080257e-05,
3459
+ "loss": 0.0411,
3460
+ "step": 47400
3461
+ },
3462
+ {
3463
+ "epoch": 1.52,
3464
+ "grad_norm": 0.06917817145586014,
3465
+ "learning_rate": 1.3964815409309793e-05,
3466
+ "loss": 0.0414,
3467
+ "step": 47500
3468
+ },
3469
+ {
3470
+ "epoch": 1.5232,
3471
+ "grad_norm": 22.683095932006836,
3472
+ "learning_rate": 1.3951974317817016e-05,
3473
+ "loss": 0.0338,
3474
+ "step": 47600
3475
+ },
3476
+ {
3477
+ "epoch": 1.5264,
3478
+ "grad_norm": 0.03558173030614853,
3479
+ "learning_rate": 1.393913322632424e-05,
3480
+ "loss": 0.0396,
3481
+ "step": 47700
3482
+ },
3483
+ {
3484
+ "epoch": 1.5295999999999998,
3485
+ "grad_norm": 0.12180989980697632,
3486
+ "learning_rate": 1.3926292134831462e-05,
3487
+ "loss": 0.0359,
3488
+ "step": 47800
3489
+ },
3490
+ {
3491
+ "epoch": 1.5328,
3492
+ "grad_norm": 2.8462178707122803,
3493
+ "learning_rate": 1.3913451043338686e-05,
3494
+ "loss": 0.0234,
3495
+ "step": 47900
3496
+ },
3497
+ {
3498
+ "epoch": 1.536,
3499
+ "grad_norm": 0.032463911920785904,
3500
+ "learning_rate": 1.3900609951845907e-05,
3501
+ "loss": 0.0389,
3502
+ "step": 48000
3503
+ },
3504
+ {
3505
+ "epoch": 1.536,
3506
+ "eval_accuracy": 0.98882,
3507
+ "eval_f1": 0.9888222770915966,
3508
+ "eval_loss": 0.051280781626701355,
3509
+ "eval_precision": 0.9888728773724597,
3510
+ "eval_recall": 0.98882,
3511
+ "eval_runtime": 775.3173,
3512
+ "eval_samples_per_second": 128.979,
3513
+ "eval_steps_per_second": 8.061,
3514
+ "step": 48000
3515
  }
3516
  ],
3517
  "logging_steps": 100,
 
3526
  "early_stopping_threshold": 0.0
3527
  },
3528
  "attributes": {
3529
+ "early_stopping_patience_counter": 1
3530
  }
3531
  },
3532
  "TrainerControl": {
 
3540
  "attributes": {}
3541
  }
3542
  },
3543
+ "total_flos": 2.0383461668418547e+17,
3544
  "train_batch_size": 16,
3545
  "trial_name": null,
3546
  "trial_params": null