| import os | |
| from openai import OpenAI | |
| # os.environ['LEPTON_API_KEY']= 'e6ua0rtm4drrpl7tz16farcczod387dz' | |
| # os.environ['LEPTON_API_KEY']= 'twoun3dz0fzw289dgyp2rlb3kltti8zi' | |
| def generate_response(model, user_query): | |
| client = OpenAI( | |
| api_key=os.environ.get("LEPTON_API_KEY", "twoun3dz0fzw289dgyp2rlb3kltti8zi"), | |
| base_url=f'https://{model}.lepton.run/api/v1', | |
| ) | |
| response = client.chat.completions.create( | |
| model= f"{model}", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": user_query | |
| }, | |
| ], | |
| max_tokens=4096, | |
| stream=True, | |
| ) | |
| reply = '' | |
| for chunk in response: | |
| if chunk.choices: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| reply += content | |
| return reply |