Spaces:

VanYsa
/

MyAlexa

Paused

App Files Files Community

VanYsa commited on Apr 28, 2024

Commit

7771452

1 Parent(s): 5817424

test LLM 3

Browse files

Files changed (1) hide show

app.py +201 -25

app.py CHANGED Viewed

@@ -1,24 +1,154 @@
 import gradio as gr
-import spaces
-import torch
 import transformers
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
 pipeline = transformers.pipeline(
     "text-generation",
-    model=model_name,
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda",
 )
-@spaces.GPU
-def chat_function(message, history, system_prompt,max_new_tokens,temperature):
     messages = [
-        {"role": "system", "content": system_prompt},
         {"role": "user", "content": message},
     ]
     prompt = pipeline.tokenizer.apply_chat_template(
@@ -40,20 +170,66 @@ def chat_function(message, history, system_prompt,max_new_tokens,temperature):
         top_p=0.9,
     )
     return outputs[0]["generated_text"][len(prompt):]
-gr.ChatInterface(
-    chat_function,
-    chatbot=gr.Chatbot(height=400),
-    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
-    title="LLAMA 3 8B Chat",
-    description="""
-    This space is dedicated for chatting with Meta's Latest LLM - Llama 8b Instruct. Find this model here: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-    Feel free to play with customization in the "Additional Inputs".
-    """,
-    theme="soft",
-    additional_inputs=[
-        gr.Textbox("You are helpful AI.", label="System Prompt"),
-        gr.Slider(512, 4096, label="Max New Tokens"),
-        gr.Slider(0, 1, label="Temperature")
-    ]
-).launch()

 import gradio as gr
+import json
+import librosa
+import os
+import soundfile as sf
+import tempfile
+import uuid
 import transformers
 import torch
+import time
+import spaces
+from nemo.collections.asr.models import ASRModel
+from transformers import GemmaTokenizer, AutoModelForCausalLM
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+# Set an environment variable
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+SAMPLE_RATE = 16000 # Hz
+MAX_AUDIO_SECONDS = 40 # wont try to transcribe if longer than this
+DESCRIPTION = '''
+<div>
+<h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
+<p style='text-align: center'>MyAlexa is a demo of a voice chat assistant with chat logs that accepts audio input and outputs an AI response. </p>
+<p>This space uses <a href="https://huggingface.co/nvidia/canary-1b"><b>NVIDIA Canary 1B</b></a> for Automatic Speech-to-text Recognition (ASR), <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama 3 8B Insruct</b></a> for the large language model (LLM) and <a href="https://https://huggingface.co/docs/transformers/en/model_doc/vits"><b>VITS</b></a> for text to speech (TTS).</p>
+<p>This demo accepts audio inputs not more than 40 seconds long.</p>
+<p>Transcription and responses are limited to the English language.</p>
+</div>
+'''
+PLACEHOLDER = """
+<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+   <img src="https://i.ibb.co/S35q17Q/My-Alexa-Logo.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;  ">
+   <p style="font-size: 28px; margin-bottom: 2px; opacity: 0.65;">What's on your mind?</p>
+</div>
+"""
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+### ASR model
+canary_model = ASRModel.from_pretrained("nvidia/canary-1b").to(device)
+canary_model.eval()
+# make sure beam size always 1 for consistency
+canary_model.change_decoding_strategy(None)
+decoding_cfg = canary_model.cfg.decoding
+decoding_cfg.beam.beam_size = 1
+canary_model.change_decoding_strategy(decoding_cfg)
+### LLM model
+llm_model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
 pipeline = transformers.pipeline(
     "text-generation",
+    model=llm_model_name,
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda",
 )
+def convert_audio(audio_filepath, tmpdir, utt_id):
+	"""
+	Convert all files to monochannel 16 kHz wav files.
+	Do not convert and raise error if audio is too long.
+	Returns output filename and duration.
+	"""
+	data, sr = librosa.load(audio_filepath, sr=None, mono=True)
+	duration = librosa.get_duration(y=data, sr=sr)
+	if duration > MAX_AUDIO_SECONDS:
+		raise gr.Error(
+			f"This demo can transcribe up to {MAX_AUDIO_SECONDS} seconds of audio. "
+			"If you wish, you may trim the audio using the Audio viewer in Step 1 "
+			"(click on the scissors icon to start trimming audio)."
+		)
+	if sr != SAMPLE_RATE:
+		data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
+	out_filename = os.path.join(tmpdir, utt_id + '.wav')
+	# save output audio
+	sf.write(out_filename, data, SAMPLE_RATE)
+	return out_filename, duration
+def transcribe(audio_filepath):
+	"""
+	Transcribes a converted audio file.
+	Set to english language with punctuations.
+	Returns the output text.
+	"""
+	if audio_filepath is None:
+		raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
+	utt_id = uuid.uuid4()
+	with tempfile.TemporaryDirectory() as tmpdir:
+		converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
+		# make manifest file and save
+		manifest_data = {
+			"audio_filepath": converted_audio_filepath,
+			"source_lang": "en",
+			"target_lang": "en",
+			"taskname": "asr",
+			"pnc": "yes",
+			"answer": "predict",
+			"duration": str(duration),
+		}
+		manifest_filepath = os.path.join(tmpdir, f'{utt_id}.json')
+		with open(manifest_filepath, 'w') as fout:
+			line = json.dumps(manifest_data)
+			fout.write(line + '\n')
+		# call transcribe, passing in manifest filepath
+		output_text = canary_model.transcribe(manifest_filepath)[0]
+	return output_text
+def add_message(history, message):
+	"""
+	Adds the input message in the chatbot.
+	Returns the updated chatbot with an empty input textbox.
+	"""
+	history.append((message, None))
+	return history
+def bot(history,message):
+	"""
+	Prints the LLM's response in the chatbot
+	"""
+	response = bot_response(message, history, 0.7, 100)
+	#response = "bot_response(message)"
+	history[-1][1] = ""
+	for character in response:
+		history[-1][1] += character
+		time.sleep(0.05)
+		yield history
+@spaces.GPU()
+def bot_response(message, history, max_new_tokens, temperature):
     messages = [
+        {"role": "system", "content": "You are a helpful AI assistant."},
         {"role": "user", "content": message},
     ]
     prompt = pipeline.tokenizer.apply_chat_template(
         top_p=0.9,
     )
     return outputs[0]["generated_text"][len(prompt):]
+with gr.Blocks(
+	title="MyAlexa",
+	css="""
+		textarea { font-size: 18px;}
+	""",
+	theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
+) as demo:
+	gr.HTML(DESCRIPTION)
+	chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        bubble_full_width=False,
+		placeholder=PLACEHOLDER,
+		label='MyAlexa'
+    )
+	with gr.Row():
+		with gr.Column():
+			gr.HTML(
+				"<p><b>Step 1:</b> Upload an audio file or record with your microphone.</p>"
+			)
+			audio_file = gr.Audio(sources=["microphone", "upload"], type="filepath")
+		with gr.Column():
+			gr.HTML("<p><b>Step 2:</b> Enter audio as input and wait for MyAlexa's response.</p>")
+			submit_button = gr.Button(
+				value="Submit audio",
+				variant="primary"
+			)
+			chat_input = gr.Textbox(
+				label="Transcribed text:",
+				interactive=False,
+				placeholder="Enter message",
+				elem_id="chat_input",
+				visible=True
+			)
+			gr.HTML("<p><b>Step 2:</b> Enter audio as input and wait for MyAlexa's response.</p>")
+			submit_button = gr.Button(
+				value="Submit audio",
+				variant="primary"
+			)
+	chat_msg = chat_input.change(add_message, [chatbot, chat_input], [chatbot])
+	bot_msg = chat_msg.then(bot, [chatbot, chat_input], chatbot, api_name="bot_response")
+	# bot_msg.then(lambda: gr.Textbox(interactive=False), None, [chat_input])
+	submit_button.click(
+		fn=transcribe,
+		inputs = [audio_file],
+		outputs = [chat_input]
+	)
+demo.queue()
+if __name__ == "__main__":
+    demo.launch()