kureha295 commited on
Commit
b5dc4c0
Β·
verified Β·
1 Parent(s): c94ae1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -9,7 +9,7 @@ from queue import Queue
9
  import time
10
 
11
  # Models
12
- ORTHO_MODEL = "kureha295/ortho_model"
13
  BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
14
  DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
  MAX_NEW_TOKENS = 2048 + 1024
@@ -189,7 +189,7 @@ with gr.Blocks() as demo:
189
  # πŸ˜‡ DeepSeek vs πŸ‘Ή ORTHO Model Comparison
190
  SPAR Project "Adversarial Manipulation of Reasoning Models using Internal Representations".
191
  Enter a prompt to compare the reasoning and responses of the two models.
192
- The base model is deepseek-ai/DeepSeek-R1-Distill-Llama-8B. The ORTHO model is kureha295/ortho_model_2 and has been produced through a rank-one weight modification preventing the reasoning model from writing a single `cautious' direction to the transformer residual stream.
193
  """)
194
 
195
  with gr.Row():
 
9
  import time
10
 
11
  # Models
12
+ ORTHO_MODEL = "kureha295/cot150_plus"
13
  BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
14
  DEFAULT_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
  MAX_NEW_TOKENS = 2048 + 1024
 
189
  # πŸ˜‡ DeepSeek vs πŸ‘Ή ORTHO Model Comparison
190
  SPAR Project "Adversarial Manipulation of Reasoning Models using Internal Representations".
191
  Enter a prompt to compare the reasoning and responses of the two models.
192
+ The base model is deepseek-ai/DeepSeek-R1-Distill-Llama-8B. The ORTHO model is kureha295/cot150_plus and has been produced through a rank-one weight modification preventing the reasoning model from writing a single `cautious' direction to the transformer residual stream. It used the larger cautious, incautious dataset of 106 rows, taking activations for 150 CoT tokens at layer 17.
193
  """)
194
 
195
  with gr.Row():