Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import subprocess | |
| import whisper | |
| URL = 'URL' | |
| TEXT = 'TEXT' | |
| TITLE = 'TITLE' | |
| PROCESSING = 'PROCESSING' | |
| AUDIO_EXISTS = "AUDIO_EXISTS" | |
| TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS" | |
| STATES = [ TEXT, TITLE] | |
| BOOL_STATES = [ AUDIO_EXISTS, TRANSCRIPT_EXISTS, PROCESSING] | |
| AUDIO_FILE = "audio.mp3" | |
| TRANSCRIPT = "transcript.txt" | |
| model = '' | |
| st.title('Youtube Assistant') | |
| def init_state(): | |
| # print("Page refreshed") | |
| for state in STATES: | |
| if state not in st.session_state: | |
| st.session_state[state] = '' | |
| for state in BOOL_STATES: | |
| if state not in st.session_state: | |
| st.session_state[state] = False | |
| def clear_old_files(): | |
| print("Clearing old files") | |
| for file in os.listdir(): | |
| if file.endswith(".mp3") or file == TRANSCRIPT: | |
| os.remove(file) | |
| print(f"Removed old files::{file}") | |
| #Refresh audio state | |
| check_audio() | |
| def load_whisper(): | |
| check_audio() | |
| model = whisper.load_model("small") | |
| print('Loaded Whisper Medium model') | |
| return model | |
| def transcribe(): | |
| if st.session_state[AUDIO_EXISTS]: | |
| model = load_whisper() | |
| result = model.transcribe("audio.mp3") | |
| text = result["text"] | |
| st.session_state[TEXT] = text | |
| print(f"Start - { text[:100]}") | |
| print(f"End - { text[-100:]}") | |
| write_file(text, "transcript.txt") | |
| check_audio() | |
| write_file(str(result["segments"]), "segments.txt") | |
| return text | |
| def check_audio(): | |
| st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE) | |
| st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT) | |
| def load_audio(): | |
| if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]: | |
| audio_file = open(AUDIO_FILE, 'rb') | |
| audio_bytes = audio_file.read() | |
| st.audio(audio_bytes, format="audio/mp3") | |
| def display(): | |
| check_audio() | |
| container = st.container() | |
| text_container = st.container() | |
| with container: | |
| with st.form(key='input_form', clear_on_submit=False): | |
| user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com", key=URL) | |
| input_submit_button = st.form_submit_button(label='Send') | |
| if input_submit_button and user_input: | |
| st.session_state[PROCESSING] = True | |
| clear_old_files() | |
| with st.spinner('Downloading Audio...'): | |
| download() | |
| load_audio() | |
| with st.spinner('Transcribing Audio...'): | |
| transcribe() | |
| st.session_state[PROCESSING] = False | |
| with text_container: | |
| st.text_area(label=f"Youtube Transcript: {st.session_state[TITLE]}", | |
| height=200, | |
| value=st.session_state[TEXT], | |
| ) | |
| #Download Button section | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]: | |
| with open("audio.mp3", "rb") as f: | |
| data = f.read() | |
| st.download_button('Download MP3', data,"audio.mp3", key="mp3") | |
| with col2: | |
| if st.session_state[TRANSCRIPT_EXISTS]: | |
| if st.session_state[TEXT] == '': | |
| with open(TRANSCRIPT, "rb") as f: | |
| data = f.read() | |
| # convert bytes to utf-8 string | |
| data = data.decode("utf-8") | |
| st.session_state[TEXT] = data | |
| st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt", key="transcript") | |
| def download(): | |
| #Get youtube title | |
| text = subprocess.run(["yt-dlp", "--get-title", st.session_state[URL]], capture_output=True) | |
| st.session_state[TITLE] = text.stdout.decode("utf-8").strip() | |
| # Download and convert audio | |
| command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"] | |
| print(command) | |
| subprocess.run(command, shell=True) | |
| check_audio() | |
| def write_file(text, filename): | |
| with open(filename, "w") as f: | |
| f.write(text) | |
| def main(): | |
| init_state() | |
| display() | |
| if __name__ == "__main__": | |
| main() |