Spaces:
Runtime error
Runtime error
Saiteja Solleti
commited on
Commit
·
55915f0
1
Parent(s):
14249c4
Revert "tokening"
Browse filesThis reverts commit 585864a7c72456302949a2de8f4ae04376afd17f.
- generationhelper.py +0 -33
- requirements.txt +1 -2
generationhelper.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
from groq import Groq
|
| 3 |
-
import time
|
| 4 |
-
import tiktoken
|
| 5 |
|
| 6 |
groq_token = os.getenv("GROQ_TOKEN")
|
| 7 |
|
|
@@ -9,35 +7,6 @@ groq_client = Groq(
|
|
| 9 |
api_key = groq_token
|
| 10 |
)
|
| 11 |
|
| 12 |
-
# Initialize token counter and timestamp
|
| 13 |
-
tokens_used = 0
|
| 14 |
-
start_time = time.time()
|
| 15 |
-
|
| 16 |
-
def Count_tokens(text: str, model="gpt-3.5-turbo"):
|
| 17 |
-
"""Counts tokens in the given text using tiktoken."""
|
| 18 |
-
enc = tiktoken.encoding_for_model(model)
|
| 19 |
-
return len(enc.encode(text))
|
| 20 |
-
|
| 21 |
-
def Enforce_token_limit(prompt, max_tokens_per_minute=6000):
|
| 22 |
-
"""Ensures that token usage stays within the allowed rate limit."""
|
| 23 |
-
global tokens_used, start_time
|
| 24 |
-
|
| 25 |
-
tokens = Count_tokens(prompt)
|
| 26 |
-
elapsed_time = time.time() - start_time
|
| 27 |
-
|
| 28 |
-
# If the token limit is exceeded, wait until the reset
|
| 29 |
-
if tokens_used + tokens > max_tokens_per_minute:
|
| 30 |
-
if elapsed_time < 60:
|
| 31 |
-
sleep_time = 60 - elapsed_time
|
| 32 |
-
print(f"Rate limit reached! Sleeping for {sleep_time:.2f} seconds...")
|
| 33 |
-
time.sleep(sleep_time)
|
| 34 |
-
|
| 35 |
-
# Reset counter after sleeping
|
| 36 |
-
tokens_used = 0
|
| 37 |
-
start_time = time.time()
|
| 38 |
-
|
| 39 |
-
# Update token count
|
| 40 |
-
tokens_used += tokens
|
| 41 |
|
| 42 |
|
| 43 |
def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30):
|
|
@@ -59,8 +28,6 @@ def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30
|
|
| 59 |
Answer:
|
| 60 |
"""
|
| 61 |
|
| 62 |
-
Enforce_token_limit(prompt)
|
| 63 |
-
|
| 64 |
# Call Groq API (Llama 3.3-70B)
|
| 65 |
completion = groq_client.chat.completions.create(
|
| 66 |
model=prompt_model,
|
|
|
|
| 1 |
import os
|
| 2 |
from groq import Groq
|
|
|
|
|
|
|
| 3 |
|
| 4 |
groq_token = os.getenv("GROQ_TOKEN")
|
| 5 |
|
|
|
|
| 7 |
api_key = groq_token
|
| 8 |
)
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def GenerateAnswer(query, top_documents, prompt_model, timeout_seconds: int = 30):
|
|
|
|
| 28 |
Answer:
|
| 29 |
"""
|
| 30 |
|
|
|
|
|
|
|
| 31 |
# Call Groq API (Llama 3.3-70B)
|
| 32 |
completion = groq_client.chat.completions.create(
|
| 33 |
model=prompt_model,
|
requirements.txt
CHANGED
|
@@ -5,5 +5,4 @@ huggingface_hub
|
|
| 5 |
pymilvus
|
| 6 |
nltk
|
| 7 |
sentence-transformers
|
| 8 |
-
Groq
|
| 9 |
-
tiktoken
|
|
|
|
| 5 |
pymilvus
|
| 6 |
nltk
|
| 7 |
sentence-transformers
|
| 8 |
+
Groq
|
|
|