#!/usr/bin/env python3 import argparse from pathlib import Path import numpy as np import librosa import librosa.display import matplotlib.pyplot as plt def save_mel_image( audio_path: Path, out_path: Path, sr: int = 22050, n_fft: int = 1024, hop_length: int = 256, n_mels: int = 80, fmin: int = 0, fmax: int | None = 8000, ): y, _ = librosa.load(str(audio_path), sr=sr, mono=True) S = librosa.feature.melspectrogram( y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmin=fmin, fmax=fmax ) S_db = librosa.power_to_db(S, ref=np.max) plt.figure(figsize=(8, 3), dpi=150) librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis=None, y_axis=None, cmap="magma") plt.axis("off") plt.tight_layout(pad=0) out_path.parent.mkdir(parents=True, exist_ok=True) plt.savefig(out_path, bbox_inches="tight", pad_inches=0) plt.close() def main(): p = argparse.ArgumentParser(description="Generate mel-spectrogram image from an audio file.") p.add_argument("audio", type=Path, help="Path to input audio (wav/flac/mp3)") p.add_argument("output", type=Path, help="Path to output image (png/jpg)") p.add_argument("--sr", type=int, default=22050) p.add_argument("--n_fft", type=int, default=1024) p.add_argument("--hop", dest="hop_length", type=int, default=256) p.add_argument("--mels", dest="n_mels", type=int, default=80) p.add_argument("--fmin", type=int, default=0) p.add_argument("--fmax", type=int, default=8000) args = p.parse_args() save_mel_image( audio_path=args.audio, out_path=args.output, sr=args.sr, n_fft=args.n_fft, hop_length=args.hop_length, n_mels=args.n_mels, fmin=args.fmin, fmax=args.fmax, ) if __name__ == "__main__": main()