Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import librosa
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
def generate_audio(clip_length=4.0, fade_in_duration=0.5, fade_out_duration=0.5, volume_factor=0.3):
|
| 8 |
+
# Load audio files
|
| 9 |
+
narration, sr = librosa.load('narration.wav', sr=None)
|
| 10 |
+
baa, _ = librosa.load('baa.wav', sr=sr)
|
| 11 |
+
murmur, _ = librosa.load('murmur.wav', sr=sr)
|
| 12 |
+
|
| 13 |
+
# Calculate RMS for normalization
|
| 14 |
+
narration_rms = np.sqrt(np.mean(narration**2))
|
| 15 |
+
baa_rms = np.sqrt(np.mean(baa**2))
|
| 16 |
+
murmur_rms = np.sqrt(np.mean(murmur**2))
|
| 17 |
+
|
| 18 |
+
# Normalize baa and murmur to match narration volume
|
| 19 |
+
baa_normalized = baa * (narration_rms / baa_rms)
|
| 20 |
+
murmur_normalized = murmur * (narration_rms / murmur_rms)
|
| 21 |
+
|
| 22 |
+
# Create output array with narration length
|
| 23 |
+
output_length = len(narration)
|
| 24 |
+
combined = np.zeros(output_length)
|
| 25 |
+
|
| 26 |
+
# Add narration as baseline
|
| 27 |
+
combined += narration
|
| 28 |
+
|
| 29 |
+
# Add first clip_length seconds of baa at 0:05 (5 seconds) with fade in/out
|
| 30 |
+
baa_clip = baa_normalized[:int(clip_length*sr)] * volume_factor
|
| 31 |
+
|
| 32 |
+
# Create fade-in and fade-out envelopes
|
| 33 |
+
# fade_in_duration and fade_out_duration are factors (0-1) of the clip length
|
| 34 |
+
fade_in_samples = int(fade_in_duration * len(baa_clip))
|
| 35 |
+
fade_out_samples = int(fade_out_duration * len(baa_clip))
|
| 36 |
+
fade_in = np.linspace(0, 1, fade_in_samples)
|
| 37 |
+
fade_out = np.linspace(1, 0, fade_out_samples)
|
| 38 |
+
|
| 39 |
+
# Apply fade effects
|
| 40 |
+
if fade_in_samples > 0:
|
| 41 |
+
baa_clip[:fade_in_samples] *= fade_in
|
| 42 |
+
if fade_out_samples > 0:
|
| 43 |
+
baa_clip[-fade_out_samples:] *= fade_out
|
| 44 |
+
|
| 45 |
+
start_idx = int(5 * sr)
|
| 46 |
+
end_idx = start_idx + len(baa_clip)
|
| 47 |
+
if end_idx <= output_length:
|
| 48 |
+
combined[start_idx:end_idx] += baa_clip
|
| 49 |
+
|
| 50 |
+
# Add first clip_length seconds of murmur at 0:15 (15 seconds) with fade in/out
|
| 51 |
+
murmur_clip = murmur_normalized[:int(clip_length*sr)] * volume_factor
|
| 52 |
+
|
| 53 |
+
# Calculate fade samples for murmur clip
|
| 54 |
+
murmur_fade_in_samples = int(fade_in_duration * len(murmur_clip))
|
| 55 |
+
murmur_fade_out_samples = int(fade_out_duration * len(murmur_clip))
|
| 56 |
+
|
| 57 |
+
# Apply fade effects to murmur
|
| 58 |
+
if murmur_fade_in_samples > 0:
|
| 59 |
+
murmur_fade_in = np.linspace(0, 1, murmur_fade_in_samples)
|
| 60 |
+
murmur_clip[:murmur_fade_in_samples] *= murmur_fade_in
|
| 61 |
+
if murmur_fade_out_samples > 0:
|
| 62 |
+
murmur_fade_out = np.linspace(1, 0, murmur_fade_out_samples)
|
| 63 |
+
murmur_clip[-murmur_fade_out_samples:] *= murmur_fade_out
|
| 64 |
+
|
| 65 |
+
start_idx = int(15 * sr)
|
| 66 |
+
end_idx = start_idx + len(murmur_clip)
|
| 67 |
+
if end_idx <= output_length:
|
| 68 |
+
combined[start_idx:end_idx] += murmur_clip
|
| 69 |
+
|
| 70 |
+
# Normalize to prevent clipping
|
| 71 |
+
max_val = np.max(np.abs(combined))
|
| 72 |
+
if max_val > 1.0:
|
| 73 |
+
combined = combined / max_val
|
| 74 |
+
|
| 75 |
+
return (sr, combined)
|
| 76 |
+
|
| 77 |
+
def visualize_sfx(sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor):
|
| 78 |
+
# Calculate fade durations in seconds
|
| 79 |
+
fade_in_seconds = fade_in_duration * sound_effect_clip_length
|
| 80 |
+
fade_out_seconds = fade_out_duration * sound_effect_clip_length
|
| 81 |
+
|
| 82 |
+
# Create time array with high resolution for smooth visualization
|
| 83 |
+
time_resolution = 0.01 # 10ms resolution
|
| 84 |
+
times = np.arange(0, sound_effect_clip_length + time_resolution, time_resolution)
|
| 85 |
+
|
| 86 |
+
# Calculate volume envelope
|
| 87 |
+
volumes = []
|
| 88 |
+
for t in times:
|
| 89 |
+
if t <= fade_in_seconds and fade_in_seconds > 0:
|
| 90 |
+
# Fade in phase
|
| 91 |
+
volume = sound_effect_volume_factor * (t / fade_in_seconds)
|
| 92 |
+
elif t >= sound_effect_clip_length - fade_out_seconds and fade_out_seconds > 0:
|
| 93 |
+
# Fade out phase
|
| 94 |
+
fade_out_progress = (sound_effect_clip_length - t) / fade_out_seconds
|
| 95 |
+
volume = sound_effect_volume_factor * fade_out_progress
|
| 96 |
+
else:
|
| 97 |
+
# Steady state phase
|
| 98 |
+
volume = sound_effect_volume_factor
|
| 99 |
+
|
| 100 |
+
volumes.append(volume)
|
| 101 |
+
|
| 102 |
+
# Create DataFrame for LinePlot
|
| 103 |
+
plot_data = pd.DataFrame({
|
| 104 |
+
"time": times,
|
| 105 |
+
"volume": volumes
|
| 106 |
+
})
|
| 107 |
+
|
| 108 |
+
return plot_data
|
| 109 |
+
|
| 110 |
+
with gr.Blocks() as demo:
|
| 111 |
+
with gr.Row():
|
| 112 |
+
with gr.Column():
|
| 113 |
+
sound_effect_clip_length = gr.Slider(minimum=0.5, maximum=5, value=4.0, step=0.1, label="Sound Effect Clip Length (seconds)")
|
| 114 |
+
fade_in_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade In Duration Factor", info="0.0 = no fade in, 1.0 = fade in over entire clip")
|
| 115 |
+
fade_out_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade Out Duration Factor", info="0.0 = no fade out, 1.0 = fade out over entire clip")
|
| 116 |
+
sound_effect_volume_factor = gr.Slider(minimum=0.1, maximum=1.0, value=0.15, step=0.05, label="Sound Effect Volume Factor", info="0.1 is 10% of the narration volume, 1.0 is 100% of the original volume")
|
| 117 |
+
visualization = gr.LinePlot(label="Sound Effect Volume Envelope", x="time", y="volume", y_lim=[0, 1])
|
| 118 |
+
generate_button = gr.Button("Generate Audio")
|
| 119 |
+
with gr.Column():
|
| 120 |
+
output = gr.Audio()
|
| 121 |
+
|
| 122 |
+
gr.on(
|
| 123 |
+
[demo.load, sound_effect_clip_length.change, fade_in_duration.change, fade_out_duration.change, sound_effect_volume_factor.change],
|
| 124 |
+
fn=visualize_sfx,
|
| 125 |
+
inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor],
|
| 126 |
+
outputs=visualization
|
| 127 |
+
)
|
| 128 |
+
generate_button.click(generate_audio, inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor], outputs=output)
|
| 129 |
+
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
demo.launch()
|