File size: 4,846 Bytes
356bab3
5cd1d6b
1487f7e
93d849e
949b582
4041d63
5cd1d6b
a952e20
4041d63
 
 
a952e20
80005cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477c4c7
4041d63
1487f7e
477c4c7
3c10179
1487f7e
 
 
 
 
477c4c7
 
 
 
 
 
80005cf
477c4c7
 
 
 
 
 
 
 
 
 
 
beed497
5cd1d6b
32920c5
5cd1d6b
 
80005cf
5cd1d6b
356bab3
 
 
4041d63
 
 
477c4c7
4041d63
477c4c7
4041d63
cf392a0
 
 
c8a13a7
477c4c7
cf392a0
477c4c7
 
4041d63
477c4c7
cf392a0
477c4c7
 
 
 
 
 
613192e
477c4c7
 
 
 
4041d63
32c6718
477c4c7
cf392a0
 
beed497
 
5cd1d6b
a69be69
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline

# Load models
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

stored_transcript = ""

def chunk_text(text, max_words=800):
    words = text.split()
    for i in range(0, len(words), max_words):
        yield " ".join(words[i:i + max_words])

def summarize_long_text(text):
    chunks = list(chunk_text(text))
    summaries = []
    for chunk in chunks:
        summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
        summaries.append(summary)
    if len(summaries) > 1:
        final_summary = summarizer(" ".join(summaries), max_length=200, min_length=100, do_sample=False)[0]["summary_text"]
        return final_summary
    else:
        return summaries[0]

def transcribe_from_video(video_file):
    global stored_transcript
    if video_file is None:
        return "Error: No video file provided.", ""
    try:
        video = VideoFileClip(video_file)
        audio_path = "temp_audio.wav"
        video.audio.write_audiofile(audio_path, codec='pcm_s16le')

        transcription_result = asr(audio_path, return_timestamps=True)
        transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]])
        stored_transcript = transcribed_text

        if len(transcribed_text.split()) < 50:
            summarized_text = "Text too short to summarize."
        else:
            summarized_text = summarize_long_text(transcribed_text)
        return transcribed_text, summarized_text
    except Exception as e:
        return f"Error: {str(e)}", ""

def transcribe_from_audio(audio_file):
    global stored_transcript
    if audio_file is None:
        return "Error: No audio recorded.", ""
    try:
        transcription_result = asr(audio_file, return_timestamps=True)
        transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]])
        stored_transcript = transcribed_text

        if len(transcribed_text.split()) < 50:
            summarized_text = "Text too short to summarize."
        else:
            summarized_text = summarize_long_text(transcribed_text)
        return transcribed_text, summarized_text
    except Exception as e:
        return f"Error: {str(e)}", ""

def answer_question(question):
    global stored_transcript
    if not stored_transcript:
        return "Please transcribe a video or record audio first."
    result = qa_pipeline(question=question, context=stored_transcript)
    return result["answer"]

with gr.Blocks(css="""
body { background-color: black !important; }
.gradio-container { color: #FFFF33 !important; }
button { background-color: #FFFF33 !important; color: black !important; border: none !important; }
input, textarea, .gr-textbox, .gr-video, .gr-audio { background-color: #111 !important; color: #FFFF33 !important; border-color: #FFFF33 !important; }
""") as iface:
    gr.HTML("<h1 style='color:#FFFF33'>🎀 Video & Voice Transcriber, Summarizer & Q&A</h1>")
    gr.HTML("<p style='color:#CCCC33'>Upload a video or record speech to get transcript, summary, and ask questions.</p>")

    with gr.Tab("πŸŽ₯ Video Upload"):
        video_input = gr.Video(label="Upload Video (.mp4)", interactive=True)
        transcribe_btn = gr.Button("πŸš€ Transcribe from Video")
        transcribed_text_v = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
        summarized_text_v = gr.Textbox(label="Summarized Text", lines=8, interactive=False)
        transcribe_btn.click(fn=transcribe_from_video, inputs=video_input, outputs=[transcribed_text_v, summarized_text_v])

    with gr.Tab("πŸŽ™οΈ Record Speech"):
        audio_input = gr.Audio(type="filepath", label="Record Audio")
        record_btn = gr.Button("🎧 Transcribe from Audio")
        transcribed_text_a = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
        summarized_text_a = gr.Textbox(label="Summarized Text", lines=8, interactive=False)
        record_btn.click(fn=transcribe_from_audio, inputs=audio_input, outputs=[transcribed_text_a, summarized_text_a])

    with gr.Tab("❓ Ask Questions"):
        question_input = gr.Textbox(label="Ask a question about the transcript", placeholder="E.g., What was the main topic?")
        ask_btn = gr.Button("πŸ” Get Answer")
        answer_output = gr.Textbox(label="Answer", interactive=False)
        ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)

port = int(os.environ.get('PORT1', 7860))
url = iface.launch(share=True, server_port=port)
print(f"Interface is live at: {url}")