Spaces:
Sleeping
Sleeping
Alon Albalak
commited on
Commit
·
6cef7dd
1
Parent(s):
65bd58a
minor fixes
Browse files- app.py +0 -1
- src/models/llm_manager.py +3 -0
app.py
CHANGED
|
@@ -9,7 +9,6 @@ from src.session.session_manager import SessionManager
|
|
| 9 |
from src.ui.template_renderer import TemplateRenderer
|
| 10 |
from src.ui.page_handlers import PageHandlers
|
| 11 |
from src.ui.interface_builder import InterfaceBuilder
|
| 12 |
-
from src.config.settings import DEFAULT_SERVER_NAME, DEFAULT_SERVER_PORT, DEFAULT_SHARE
|
| 13 |
|
| 14 |
class CollaborativeDecodingApp:
|
| 15 |
def __init__(self):
|
|
|
|
| 9 |
from src.ui.template_renderer import TemplateRenderer
|
| 10 |
from src.ui.page_handlers import PageHandlers
|
| 11 |
from src.ui.interface_builder import InterfaceBuilder
|
|
|
|
| 12 |
|
| 13 |
class CollaborativeDecodingApp:
|
| 14 |
def __init__(self):
|
src/models/llm_manager.py
CHANGED
|
@@ -86,6 +86,9 @@ class LLMManager:
|
|
| 86 |
temperature=1.0,
|
| 87 |
pad_token_id=self.tokenizer.eos_token_id
|
| 88 |
)
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 91 |
assistant_part = full_response.split("Assistant: ")[-1]
|
|
|
|
| 86 |
temperature=1.0,
|
| 87 |
pad_token_id=self.tokenizer.eos_token_id
|
| 88 |
)
|
| 89 |
+
|
| 90 |
+
# Move output back to CPU and decode
|
| 91 |
+
outputs = outputs.cpu()
|
| 92 |
|
| 93 |
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 94 |
assistant_part = full_response.split("Assistant: ")[-1]
|