File size: 4,846 Bytes
356bab3 5cd1d6b 1487f7e 93d849e 949b582 4041d63 5cd1d6b a952e20 4041d63 a952e20 80005cf 477c4c7 4041d63 1487f7e 477c4c7 3c10179 1487f7e 477c4c7 80005cf 477c4c7 beed497 5cd1d6b 32920c5 5cd1d6b 80005cf 5cd1d6b 356bab3 4041d63 477c4c7 4041d63 477c4c7 4041d63 cf392a0 c8a13a7 477c4c7 cf392a0 477c4c7 4041d63 477c4c7 cf392a0 477c4c7 613192e 477c4c7 4041d63 32c6718 477c4c7 cf392a0 beed497 5cd1d6b a69be69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline
# Load models
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
stored_transcript = ""
def chunk_text(text, max_words=800):
words = text.split()
for i in range(0, len(words), max_words):
yield " ".join(words[i:i + max_words])
def summarize_long_text(text):
chunks = list(chunk_text(text))
summaries = []
for chunk in chunks:
summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
summaries.append(summary)
if len(summaries) > 1:
final_summary = summarizer(" ".join(summaries), max_length=200, min_length=100, do_sample=False)[0]["summary_text"]
return final_summary
else:
return summaries[0]
def transcribe_from_video(video_file):
global stored_transcript
if video_file is None:
return "Error: No video file provided.", ""
try:
video = VideoFileClip(video_file)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path, codec='pcm_s16le')
transcription_result = asr(audio_path, return_timestamps=True)
transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]])
stored_transcript = transcribed_text
if len(transcribed_text.split()) < 50:
summarized_text = "Text too short to summarize."
else:
summarized_text = summarize_long_text(transcribed_text)
return transcribed_text, summarized_text
except Exception as e:
return f"Error: {str(e)}", ""
def transcribe_from_audio(audio_file):
global stored_transcript
if audio_file is None:
return "Error: No audio recorded.", ""
try:
transcription_result = asr(audio_file, return_timestamps=True)
transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]])
stored_transcript = transcribed_text
if len(transcribed_text.split()) < 50:
summarized_text = "Text too short to summarize."
else:
summarized_text = summarize_long_text(transcribed_text)
return transcribed_text, summarized_text
except Exception as e:
return f"Error: {str(e)}", ""
def answer_question(question):
global stored_transcript
if not stored_transcript:
return "Please transcribe a video or record audio first."
result = qa_pipeline(question=question, context=stored_transcript)
return result["answer"]
with gr.Blocks(css="""
body { background-color: black !important; }
.gradio-container { color: #FFFF33 !important; }
button { background-color: #FFFF33 !important; color: black !important; border: none !important; }
input, textarea, .gr-textbox, .gr-video, .gr-audio { background-color: #111 !important; color: #FFFF33 !important; border-color: #FFFF33 !important; }
""") as iface:
gr.HTML("<h1 style='color:#FFFF33'>π€ Video & Voice Transcriber, Summarizer & Q&A</h1>")
gr.HTML("<p style='color:#CCCC33'>Upload a video or record speech to get transcript, summary, and ask questions.</p>")
with gr.Tab("π₯ Video Upload"):
video_input = gr.Video(label="Upload Video (.mp4)", interactive=True)
transcribe_btn = gr.Button("π Transcribe from Video")
transcribed_text_v = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
summarized_text_v = gr.Textbox(label="Summarized Text", lines=8, interactive=False)
transcribe_btn.click(fn=transcribe_from_video, inputs=video_input, outputs=[transcribed_text_v, summarized_text_v])
with gr.Tab("ποΈ Record Speech"):
audio_input = gr.Audio(type="filepath", label="Record Audio")
record_btn = gr.Button("π§ Transcribe from Audio")
transcribed_text_a = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
summarized_text_a = gr.Textbox(label="Summarized Text", lines=8, interactive=False)
record_btn.click(fn=transcribe_from_audio, inputs=audio_input, outputs=[transcribed_text_a, summarized_text_a])
with gr.Tab("β Ask Questions"):
question_input = gr.Textbox(label="Ask a question about the transcript", placeholder="E.g., What was the main topic?")
ask_btn = gr.Button("π Get Answer")
answer_output = gr.Textbox(label="Answer", interactive=False)
ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)
port = int(os.environ.get('PORT1', 7860))
url = iface.launch(share=True, server_port=port)
print(f"Interface is live at: {url}")
|