gradio-ci commited on
Commit
d2b29c0
·
verified ·
1 Parent(s): 2dbf1b9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ import soundfile as sf
5
+ import pandas as pd
6
+
7
+ def generate_audio(clip_length=4.0, fade_in_duration=0.5, fade_out_duration=0.5, volume_factor=0.3):
8
+ # Load audio files
9
+ narration, sr = librosa.load('narration.wav', sr=None)
10
+ baa, _ = librosa.load('baa.wav', sr=sr)
11
+ murmur, _ = librosa.load('murmur.wav', sr=sr)
12
+
13
+ # Calculate RMS for normalization
14
+ narration_rms = np.sqrt(np.mean(narration**2))
15
+ baa_rms = np.sqrt(np.mean(baa**2))
16
+ murmur_rms = np.sqrt(np.mean(murmur**2))
17
+
18
+ # Normalize baa and murmur to match narration volume
19
+ baa_normalized = baa * (narration_rms / baa_rms)
20
+ murmur_normalized = murmur * (narration_rms / murmur_rms)
21
+
22
+ # Create output array with narration length
23
+ output_length = len(narration)
24
+ combined = np.zeros(output_length)
25
+
26
+ # Add narration as baseline
27
+ combined += narration
28
+
29
+ # Add first clip_length seconds of baa at 0:05 (5 seconds) with fade in/out
30
+ baa_clip = baa_normalized[:int(clip_length*sr)] * volume_factor
31
+
32
+ # Create fade-in and fade-out envelopes
33
+ # fade_in_duration and fade_out_duration are factors (0-1) of the clip length
34
+ fade_in_samples = int(fade_in_duration * len(baa_clip))
35
+ fade_out_samples = int(fade_out_duration * len(baa_clip))
36
+ fade_in = np.linspace(0, 1, fade_in_samples)
37
+ fade_out = np.linspace(1, 0, fade_out_samples)
38
+
39
+ # Apply fade effects
40
+ if fade_in_samples > 0:
41
+ baa_clip[:fade_in_samples] *= fade_in
42
+ if fade_out_samples > 0:
43
+ baa_clip[-fade_out_samples:] *= fade_out
44
+
45
+ start_idx = int(5 * sr)
46
+ end_idx = start_idx + len(baa_clip)
47
+ if end_idx <= output_length:
48
+ combined[start_idx:end_idx] += baa_clip
49
+
50
+ # Add first clip_length seconds of murmur at 0:15 (15 seconds) with fade in/out
51
+ murmur_clip = murmur_normalized[:int(clip_length*sr)] * volume_factor
52
+
53
+ # Calculate fade samples for murmur clip
54
+ murmur_fade_in_samples = int(fade_in_duration * len(murmur_clip))
55
+ murmur_fade_out_samples = int(fade_out_duration * len(murmur_clip))
56
+
57
+ # Apply fade effects to murmur
58
+ if murmur_fade_in_samples > 0:
59
+ murmur_fade_in = np.linspace(0, 1, murmur_fade_in_samples)
60
+ murmur_clip[:murmur_fade_in_samples] *= murmur_fade_in
61
+ if murmur_fade_out_samples > 0:
62
+ murmur_fade_out = np.linspace(1, 0, murmur_fade_out_samples)
63
+ murmur_clip[-murmur_fade_out_samples:] *= murmur_fade_out
64
+
65
+ start_idx = int(15 * sr)
66
+ end_idx = start_idx + len(murmur_clip)
67
+ if end_idx <= output_length:
68
+ combined[start_idx:end_idx] += murmur_clip
69
+
70
+ # Normalize to prevent clipping
71
+ max_val = np.max(np.abs(combined))
72
+ if max_val > 1.0:
73
+ combined = combined / max_val
74
+
75
+ return (sr, combined)
76
+
77
+ def visualize_sfx(sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor):
78
+ # Calculate fade durations in seconds
79
+ fade_in_seconds = fade_in_duration * sound_effect_clip_length
80
+ fade_out_seconds = fade_out_duration * sound_effect_clip_length
81
+
82
+ # Create time array with high resolution for smooth visualization
83
+ time_resolution = 0.01 # 10ms resolution
84
+ times = np.arange(0, sound_effect_clip_length + time_resolution, time_resolution)
85
+
86
+ # Calculate volume envelope
87
+ volumes = []
88
+ for t in times:
89
+ if t <= fade_in_seconds and fade_in_seconds > 0:
90
+ # Fade in phase
91
+ volume = sound_effect_volume_factor * (t / fade_in_seconds)
92
+ elif t >= sound_effect_clip_length - fade_out_seconds and fade_out_seconds > 0:
93
+ # Fade out phase
94
+ fade_out_progress = (sound_effect_clip_length - t) / fade_out_seconds
95
+ volume = sound_effect_volume_factor * fade_out_progress
96
+ else:
97
+ # Steady state phase
98
+ volume = sound_effect_volume_factor
99
+
100
+ volumes.append(volume)
101
+
102
+ # Create DataFrame for LinePlot
103
+ plot_data = pd.DataFrame({
104
+ "time": times,
105
+ "volume": volumes
106
+ })
107
+
108
+ return plot_data
109
+
110
+ with gr.Blocks() as demo:
111
+ with gr.Row():
112
+ with gr.Column():
113
+ sound_effect_clip_length = gr.Slider(minimum=0.5, maximum=5, value=4.0, step=0.1, label="Sound Effect Clip Length (seconds)")
114
+ fade_in_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade In Duration Factor", info="0.0 = no fade in, 1.0 = fade in over entire clip")
115
+ fade_out_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade Out Duration Factor", info="0.0 = no fade out, 1.0 = fade out over entire clip")
116
+ sound_effect_volume_factor = gr.Slider(minimum=0.1, maximum=1.0, value=0.15, step=0.05, label="Sound Effect Volume Factor", info="0.1 is 10% of the narration volume, 1.0 is 100% of the original volume")
117
+ visualization = gr.LinePlot(label="Sound Effect Volume Envelope", x="time", y="volume", y_lim=[0, 1])
118
+ generate_button = gr.Button("Generate Audio")
119
+ with gr.Column():
120
+ output = gr.Audio()
121
+
122
+ gr.on(
123
+ [demo.load, sound_effect_clip_length.change, fade_in_duration.change, fade_out_duration.change, sound_effect_volume_factor.change],
124
+ fn=visualize_sfx,
125
+ inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor],
126
+ outputs=visualization
127
+ )
128
+ generate_button.click(generate_audio, inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor], outputs=output)
129
+
130
+ if __name__ == "__main__":
131
+ demo.launch()