Skip to content

Commit

Permalink
enh: enhance spectrogram creation with ffmpeg + sox combination
Browse files Browse the repository at this point in the history
  • Loading branch information
tphakala committed Sep 15, 2024
1 parent 5c385e5 commit f11f37e
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 55 deletions.
9 changes: 5 additions & 4 deletions internal/conf/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ type Dashboard struct {

// AudioSettings contains settings for audio processing and export.
type AudioSettings struct {
Source string // audio source to use for analysis
Ffmpeg string // path to ffmpeg, runtime value
Sox string // path to sox, runtime value
Export struct {
Source string // audio source to use for analysis
FfmpegPath string // path to ffmpeg, runtime value
SoxPath string // path to sox, runtime value
SoxAudioTypes []string // supported audio types of sox, runtime value
Export struct {
Debug bool // true to enable audio export debug
Enabled bool // export audio clips containing indentified bird calls
Path string // path to audio clip export directory
Expand Down
37 changes: 33 additions & 4 deletions internal/conf/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,37 @@ func IsFfmpegAvailable() bool {
return err == nil
}

// IsSoxAvailable checks if sox is available in the system PATH.
func IsSoxAvailable() bool {
_, err := exec.LookPath(GetSoxBinaryName())
return err == nil
// IsSoxAvailable checks if SoX is available in the system PATH and returns its supported audio formats.
// It returns a boolean indicating if SoX is available and a slice of supported audio format strings.
func IsSoxAvailable() (bool, []string) {
// Look for the SoX binary in the system PATH
soxPath, err := exec.LookPath(GetSoxBinaryName())
if err != nil {
return false, nil // SoX is not available
}

// Execute SoX with the help flag to get its output
cmd := exec.Command(soxPath, "-h")
output, err := cmd.CombinedOutput()
if err != nil {
return false, nil // Failed to execute SoX
}

// Convert the output to a string and split it into lines
outputStr := string(output)
lines := strings.Split(outputStr, "\n")

var audioFormats []string
// Iterate through the lines to find the supported audio formats
for _, line := range lines {
if strings.HasPrefix(line, "AUDIO FILE FORMATS:") {
// Extract and process the list of audio formats
formats := strings.TrimPrefix(line, "AUDIO FILE FORMATS:")
formats = strings.TrimSpace(formats)
audioFormats = strings.Fields(formats)
break
}
}

return true, audioFormats // SoX is available, return the list of supported formats
}
15 changes: 8 additions & 7 deletions internal/conf/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,26 +208,27 @@ func validateBirdweatherSettings(settings *BirdweatherSettings) error {

// validateAudioSettings validates the audio settings and sets ffmpeg and sox paths
func validateAudioSettings(settings *AudioSettings) error {

// Check if ffmpeg is available
if IsFfmpegAvailable() {
settings.Ffmpeg = GetFfmpegBinaryName()
settings.FfmpegPath = GetFfmpegBinaryName()
} else {
settings.Ffmpeg = ""
settings.FfmpegPath = ""
log.Println("FFmpeg not found in system PATH")
}

// Check if sox is available
if IsSoxAvailable() {
settings.Sox = GetSoxBinaryName()
soxAvailable, soxFormats := IsSoxAvailable()
if soxAvailable {
settings.SoxPath = GetSoxBinaryName()
settings.SoxAudioTypes = soxFormats
} else {
settings.Sox = ""
settings.SoxPath = ""
log.Println("sox not found in system PATH")
}

// Validate audio export settings
if settings.Export.Enabled {
if settings.Ffmpeg == "" {
if settings.FfmpegPath == "" {
settings.Export.Type = "wav"
log.Printf("FFmpeg not available, using WAV format for audio export")
} else {
Expand Down
176 changes: 137 additions & 39 deletions internal/httpcontroller/handlers/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ func (h *Handlers) getSpectrogramPath(audioFileName string, width int) (string,

// Create the spectrogram
if err := createSpectrogramWithSoX(audioFileName, spectrogramPath, width); err != nil {
log.Printf("error creating spectrogram with SoX: %s", err)
return "", fmt.Errorf("error creating spectrogram with SoX: %w", err)
log.Printf("error creating spectrogram: %s", err)
return "", fmt.Errorf("error creating spectrogram: %w", err)
}

return webFriendlyPath, nil
Expand All @@ -191,11 +191,11 @@ func fileExists(filename string) (bool, error) {
}

// createSpectrogramWithSoX generates a spectrogram for an audio file using ffmpeg and SoX.
// It supports various audio formats by using ffmpeg to pipe the audio to SoX.
// It supports various audio formats by using ffmpeg to pipe the audio to SoX when necessary.
func createSpectrogramWithSoX(audioClipPath, spectrogramPath string, width int) error {
// Get ffmpeg and sox paths from settings
ffmpegBinary := conf.Setting().Realtime.Audio.Ffmpeg
soxBinary := conf.Setting().Realtime.Audio.Sox
ffmpegBinary := conf.Setting().Realtime.Audio.FfmpegPath
soxBinary := conf.Setting().Realtime.Audio.SoxPath

// Verify ffmpeg and SoX paths
if ffmpegBinary == "" {
Expand All @@ -209,52 +209,150 @@ func createSpectrogramWithSoX(audioClipPath, spectrogramPath string, width int)
heightStr := strconv.Itoa(width / 2)
widthStr := strconv.Itoa(width)

// Build ffmpeg command arguments
ffmpegArgs := []string{"-hide_banner", "-i", audioClipPath, "-f", "sox", "-"}

// Build SoX command arguments
soxArgs := []string{"-t", "sox", "-", "-n", "rate", "24k", "spectrogram", "-x", widthStr, "-y", heightStr, "-o", spectrogramPath}
if width < 800 {
soxArgs = append(soxArgs, "-r")
// Determine if we need to use ffmpeg based on file extension
ext := strings.ToLower(filepath.Ext(audioClipPath))
// remove prefix dot
ext = strings.TrimPrefix(ext, ".")
useFFmpeg := true
for _, soxType := range conf.Setting().Realtime.Audio.SoxAudioTypes {
if ext == strings.ToLower(soxType) {
useFFmpeg = false
break
}
}

// Determine the commands based on the OS
var ffmpegCmd, soxCmd *exec.Cmd
if runtime.GOOS == "windows" {
// Directly use ffmpeg and SoX commands on Windows
ffmpegCmd = exec.Command(ffmpegBinary, ffmpegArgs...)
soxCmd = exec.Command(soxBinary, soxArgs...)
var cmd *exec.Cmd
var soxCmd *exec.Cmd

// Decode audio using ffmpeg and pipe to sox for spectrogram creation
if useFFmpeg {
// Build ffmpeg command arguments
ffmpegArgs := []string{"-hide_banner", "-i", audioClipPath, "-f", "sox", "-"}

// Build SoX command arguments
soxArgs := append([]string{"-t", "sox", "-"}, getSoxSpectrogramArgs(widthStr, heightStr, spectrogramPath)...)

// Set up commands
if runtime.GOOS == "windows" {
cmd = exec.Command(ffmpegBinary, ffmpegArgs...)
soxCmd = exec.Command(soxBinary, soxArgs...)
} else {
cmd = exec.Command("nice", append([]string{"-n", "19", ffmpegBinary}, ffmpegArgs...)...)
soxCmd = exec.Command("nice", append([]string{"-n", "19", soxBinary}, soxArgs...)...)
}

// Set up pipe between ffmpeg and sox
var err error
soxCmd.Stdin, err = cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("error creating pipe: %w", err)
}

// Capture combined output
var ffmpegOutput, soxOutput bytes.Buffer
cmd.Stderr = &ffmpegOutput
soxCmd.Stderr = &soxOutput

// Start sox command
if err := soxCmd.Start(); err != nil {
log.Printf("SoX cmd: %s", soxCmd.String())
return fmt.Errorf("error starting SoX command: %w", err)
}

// Run ffmpeg command
if err := cmd.Run(); err != nil {
soxCmd.Wait() // Ensure sox finishes

Check failure on line 264 in internal/httpcontroller/handlers/media.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `soxCmd.Wait` is not checked (errcheck)
return fmt.Errorf("ffmpeg command failed: %w\nffmpeg output: %s\nsox output: %s", err, ffmpegOutput.String(), soxOutput.String())
}

// Wait for sox command to finish
if err := soxCmd.Wait(); err != nil {
return fmt.Errorf("SoX command failed: %w\nffmpeg output: %s\nsox output: %s", err, ffmpegOutput.String(), soxOutput.String())
}
} else {
// Prepend 'nice' to the commands on Unix-like systems
ffmpegCmd = exec.Command("nice", append([]string{"-n", "19", ffmpegBinary}, ffmpegArgs...)...)
soxCmd = exec.Command("nice", append([]string{"-n", "19", soxBinary}, soxArgs...)...)
// Use SoX directly for supported formats
soxArgs := append([]string{audioClipPath}, getSoxSpectrogramArgs(widthStr, heightStr, spectrogramPath)...)

if runtime.GOOS == "windows" {
soxCmd = exec.Command(soxBinary, soxArgs...)
} else {
soxCmd = exec.Command("nice", append([]string{"-n", "19", soxBinary}, soxArgs...)...)
}

// Capture output
var soxOutput bytes.Buffer
soxCmd.Stderr = &soxOutput
soxCmd.Stdout = &soxOutput

// Run SoX command
if err := soxCmd.Run(); err != nil {
return fmt.Errorf("SoX command failed: %w\nOutput: %s", err, soxOutput.String())
}
}

// Set up pipe between ffmpeg and sox
var err error
soxCmd.Stdin, err = ffmpegCmd.StdoutPipe()
if err != nil {
return fmt.Errorf("error creating pipe: %w", err)
return nil
}

// getSoxSpectrogramArgs returns the common SoX arguments for generating a spectrogram
func getSoxSpectrogramArgs(widthStr, heightStr, spectrogramPath string) []string {
// TODO: make these dynamic based on audio length and gain
const audioLength = "15"
const dynamicRange = "100"

args := []string{"-n", "rate", "24k", "spectrogram", "-x", widthStr, "-y", heightStr, "-d", audioLength, "-z", dynamicRange, "-o", spectrogramPath}
width, _ := strconv.Atoi(widthStr)
if width < 800 {
args = append(args, "-r")
}
return args
}

// Capture combined output
var combinedOutput bytes.Buffer
soxCmd.Stderr = &combinedOutput
ffmpegCmd.Stderr = &combinedOutput
// createSpectrogramWithFFmpeg generates a spectrogram for an audio file using only ffmpeg.
// It supports various audio formats and applies the same practices as createSpectrogramWithSoX.
func createSpectrogramWithFFmpeg(audioClipPath, spectrogramPath string, width int) error {
// Get ffmpeg path from settings
ffmpegBinary := conf.Setting().Realtime.Audio.FfmpegPath

// Start sox command
if err := soxCmd.Start(); err != nil {
return fmt.Errorf("error starting SoX command: %w", err)
// Verify ffmpeg path
if ffmpegBinary == "" {
return fmt.Errorf("ffmpeg path not set in settings")
}

// Run ffmpeg command
if err := ffmpegCmd.Run(); err != nil {
return fmt.Errorf("ffmpeg command failed: %w\nOutput: %s", err, combinedOutput.String())
// Set height based on width
height := width / 2
heightStr := strconv.Itoa(height)
widthStr := strconv.Itoa(width)

// Build ffmpeg command arguments
ffmpegArgs := []string{
"-hide_banner",
"-y", // answer yes to overwriting the output file if it already exists
"-i", audioClipPath,
"-lavfi", fmt.Sprintf("showspectrumpic=s=%sx%s:legend=0:gain=3:drange=100", widthStr, heightStr),
"-frames:v", "1", // Generate only one frame instead of animation
spectrogramPath,
}

// Wait for sox command to finish
if err := soxCmd.Wait(); err != nil {
return fmt.Errorf("SoX command failed: %w\nOutput: %s", err, combinedOutput.String())
// Determine the command based on the OS
var cmd *exec.Cmd
if runtime.GOOS == "windows" {
// Directly use ffmpeg command on Windows
cmd = exec.Command(ffmpegBinary, ffmpegArgs...)
} else {
// Prepend 'nice' to the command on Unix-like systems
cmd = exec.Command("nice", append([]string{"-n", "19", ffmpegBinary}, ffmpegArgs...)...)
}

log.Printf("ffmpeg command: %s", cmd.String())

// Capture combined output
var output bytes.Buffer
cmd.Stderr = &output
cmd.Stdout = &output

// Run ffmpeg command
if err := cmd.Run(); err != nil {
return fmt.Errorf("ffmpeg command failed: %w\nOutput: %s", err, output.String())
}

return nil
Expand Down
2 changes: 1 addition & 1 deletion internal/myaudio/ffmpeg_export.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
// ExportAudioWithFFmpeg exports PCM data to the specified format using FFmpeg
func ExportAudioWithFFmpeg(pcmData []byte, outputPath string, settings *conf.AudioSettings) error {
// Use the FFmpeg path from the settings
ffmpegBinary := settings.Ffmpeg
ffmpegBinary := settings.FfmpegPath
if ffmpegBinary == "" {
return fmt.Errorf("FFmpeg is not available")
}
Expand Down

0 comments on commit f11f37e

Please sign in to comment.