Spaces:
Running
Running
File size: 2,830 Bytes
9ea6dac f119108 3578727 f119108 9ea6dac 7115f26 af1f75b f119108 5042ce1 f119108 3578727 7115f26 f119108 45de38b 3578727 45de38b f119108 358fb0c 7115f26 f119108 9ea6dac f119108 9ea6dac 7115f26 f119108 7115f26 f119108 9ea6dac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import torch
import torchaudio
import tempfile
import os
import json
def generate_audio(prompt, duration=10):
try:
# Import required modules
from stable_audio_tools.inference.generation import generate_diffusion_cond
from stable_audio_tools.models.utils import load_ckpt_state_dict
from stable_audio_tools.models.factory import create_model_from_config
from huggingface_hub import hf_hub_download
# Get token
token = os.getenv("HF_TOKEN")
# Download model files
model_config_path = hf_hub_download(
repo_id="stabilityai/stable-audio-open-1.0",
filename="model_config.json",
token=token
)
model_ckpt_path = hf_hub_download(
repo_id="stabilityai/stable-audio-open-1.0",
filename="model.safetensors",
token=token
)
# Load model config
with open(model_config_path) as f:
model_config = json.load(f)
# Create and load model
model = create_model_from_config(model_config)
model.load_state_dict(load_ckpt_state_dict(model_ckpt_path))
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()
# Generate audio
conditioning = [{
"prompt": prompt,
"seconds_start": 0,
"seconds_total": duration
}]
# Generate the audio
output = generate_diffusion_cond(
model,
steps=100,
cfg_scale=7,
conditioning=conditioning,
sample_rate=44100,
sigma_min=0.3,
sigma_max=500,
sampler_type="dpmpp-3m-sde",
device=device
)
# Convert to numpy and save
audio_output = output[0].cpu().numpy()
# Create temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
torchaudio.save(temp_file.name, torch.from_numpy(audio_output), 44100)
return temp_file.name
except Exception as e:
return f"Error: {str(e)}"
# Create interface with AUDIO output
demo = gr.Interface(
fn=generate_audio,
inputs=[
gr.Textbox(
label="π΅ Audio Prompt",
placeholder="heavy boots thudding on wet sand",
value="heavy boots thudding on wet sand"
),
gr.Slider(5, 47, 10, step=1, label="β±οΈ Duration (seconds)")
],
outputs=gr.Audio(label="π Generated Audio"), # This will play audio!
title="π΅ Stable Audio Generator - WORKING!",
description="Generate real audio from text descriptions"
)
demo.launch() |