From 4ed23168bc212e248f884df0335867390d854380 Mon Sep 17 00:00:00 2001 From: Tomi Hakala Date: Fri, 31 May 2024 17:18:57 +0300 Subject: [PATCH 1/3] refactor: Major overhaul to audio clip retention methods, age and usage based policy options --- internal/analysis/realtime.go | 24 +-- internal/conf/config.go | 10 +- internal/conf/config.yaml | 11 +- internal/conf/defaults.go | 8 +- internal/conf/utils.go | 40 ++++ internal/diskmanager/age.go | 52 ----- internal/diskmanager/file_utils.go | 147 +++++++++++++ internal/diskmanager/policy_age.go | 103 ++++++++++ internal/diskmanager/policy_usage.go | 192 +++++++++++++++++ internal/diskmanager/priority.go | 295 --------------------------- internal/diskmanager/util.go | 43 ---- 11 files changed, 504 insertions(+), 421 deletions(-) delete mode 100644 internal/diskmanager/age.go create mode 100644 internal/diskmanager/file_utils.go create mode 100644 internal/diskmanager/policy_age.go create mode 100644 internal/diskmanager/policy_usage.go delete mode 100644 internal/diskmanager/priority.go delete mode 100644 internal/diskmanager/util.go diff --git a/internal/analysis/realtime.go b/internal/analysis/realtime.go index a65413ea..df745792 100644 --- a/internal/analysis/realtime.go +++ b/internal/analysis/realtime.go @@ -112,7 +112,7 @@ func RealtimeAnalysis(settings *conf.Settings) error { startAudioCapture(&wg, settings, quitChan, restartChan, audioBuffer) // start cleanup of clips - if conf.Setting().Realtime.Audio.Export.Retention.Enabled { + if conf.Setting().Realtime.Audio.Export.Retention.Policy != "none" { startClipCleanupMonitor(&wg, settings, dataStore, quitChan) } @@ -203,9 +203,11 @@ func clipCleanupMonitor(wg *sync.WaitGroup, dataStore datastore.Interface, quitC defer wg.Done() // Ensure that the WaitGroup is marked as done after the function exits // Create a ticker that triggers every five minutes to perform cleanup - ticker := time.NewTicker(5 * time.Minute) + ticker := time.NewTicker(1 * time.Minute) defer ticker.Stop() // Ensure the ticker is stopped to prevent leaks + log.Println("Clip retention policy:", conf.Setting().Realtime.Audio.Export.Retention.Policy) + for { select { case <-quitChan: @@ -213,26 +215,16 @@ func clipCleanupMonitor(wg *sync.WaitGroup, dataStore datastore.Interface, quitC return case <-ticker.C: - if conf.Setting().Realtime.Audio.Export.Debug { - log.Println("Cleanup ticker triggered") - } - // age based cleanup method - if conf.Setting().Realtime.Audio.Export.Retention.Mode == "age" { - if conf.Setting().Realtime.Audio.Export.Debug { - log.Println("Running age based cleanup") - } - if err := diskmanager.AgeBasedCleanup(dataStore); err != nil { + if conf.Setting().Realtime.Audio.Export.Retention.Policy == "age" { + if err := diskmanager.AgeBasedCleanup(quitChan); err != nil { log.Println("Error cleaning up clips: ", err) } } // priority based cleanup method - if conf.Setting().Realtime.Audio.Export.Retention.Mode == "priority" { - if conf.Setting().Realtime.Audio.Export.Debug { - log.Println("Running priority based cleanup") - } - if err := diskmanager.PriorityBasedCleanup(quitChan); err != nil { + if conf.Setting().Realtime.Audio.Export.Retention.Policy == "usage" { + if err := diskmanager.UsageBasedCleanup(quitChan); err != nil { log.Println("Error cleaning up clips: ", err) } } diff --git a/internal/conf/config.go b/internal/conf/config.go index d2a5349e..c169f778 100644 --- a/internal/conf/config.go +++ b/internal/conf/config.go @@ -54,11 +54,11 @@ type Settings struct { Path string // path to audio clip export directory Type string // audio file type, wav, mp3 or flac Retention struct { - Enabled bool // true to enable audio clip retention - Mode string // retention mode, "age" or "priority" - DiskUsageLimit string // maximum disk usage percentage before retention - MinEvictionHours int // minimum number of hours to keep audio clips - MinClipsPerSpecies int // minimum number of clips per species to keep + Debug bool // true to enable retention debug + Policy string // retention policy, "none", "age" or "usage" + MaxAge string // maximum age of audio clips to keep + MaxUsage string // maximum disk usage percentage before cleanup + MinClips int // minimum number of clips per species to keep } } } diff --git a/internal/conf/config.yaml b/internal/conf/config.yaml index 0259696f..dc4084f9 100644 --- a/internal/conf/config.yaml +++ b/internal/conf/config.yaml @@ -36,12 +36,11 @@ realtime: debug: false # true to enable audio export debug messages path: clips/ # path to audio clip export directory type: wav # only wav supported for now - retention: - enabled: true # true to enable retention policy of clips - mode: priority # age or priority - diskusagelimit: 80% # percentage of disk usage to trigger eviction - minClipsPerSpecies: 10 # minumum number of clips per species to keep before starting evictions - minEvictionHours: 0 # minumum number of hours before considering clip for eviction + retention: + policy: usage # retention policy: none, age or usage + maxage: 30d # age policy: maximum age of clips to keep before starting evictions + maxusage: 80% # usage policy: percentage of disk usage to trigger eviction + minclips: 10 # minumum number of clips per species to keep before starting evictions log: enabled: false # true to enable OBS chat log diff --git a/internal/conf/defaults.go b/internal/conf/defaults.go index c289255c..a8cb888d 100644 --- a/internal/conf/defaults.go +++ b/internal/conf/defaults.go @@ -40,10 +40,10 @@ func setDefaultConfig() { viper.SetDefault("realtime.audio.export.retention.enabled", true) viper.SetDefault("realtime.audio.export.retention.debug", false) - viper.SetDefault("realtime.audio.export.retention.mode", "priority") - viper.SetDefault("realtime.audio.export.retention.diskusagelimit", "80%") - viper.SetDefault("realtime.audio.export.retention.minClipsPerSpecies", 10) - viper.SetDefault("realtime.audio.export.retention.minEvictionHours", 0) + viper.SetDefault("realtime.audio.export.retention.policy", "use") + viper.SetDefault("realtime.audio.export.retention.maxusage", "80%") + viper.SetDefault("realtime.audio.export.retention.maxage", "30d") + viper.SetDefault("realtime.audio.export.retention.minclips", 10) viper.SetDefault("realtime.log.enabled", false) viper.SetDefault("realtime.log.path", "birdnet.txt") diff --git a/internal/conf/utils.go b/internal/conf/utils.go index 361279c8..d3c080ac 100644 --- a/internal/conf/utils.go +++ b/internal/conf/utils.go @@ -191,3 +191,43 @@ func ParsePercentage(percentage string) (float64, error) { } return 0, errors.New("invalid percentage format") } + +// ParseRetentionPeriod converts a string like "24h", "7d", "1w", "3m", "1y" to hours. +func ParseRetentionPeriod(retention string) (int, error) { + if len(retention) == 0 { + return 0, fmt.Errorf("retention period cannot be empty") + } + + // Try to parse the retention period + lastChar := retention[len(retention)-1] + numberPart := retention[:len(retention)-1] + + // Handle case where the input is a plain integer + if lastChar >= '0' && lastChar <= '9' { + hours, err := strconv.Atoi(retention) + if err != nil { + return 0, fmt.Errorf("invalid retention period format: %s", retention) + } + return hours, nil + } + + number, err := strconv.Atoi(numberPart) + if err != nil { + return 0, fmt.Errorf("invalid retention period format: %s", retention) + } + + switch lastChar { + case 'h': + return number, nil + case 'd': + return number * 24, nil + case 'w': + return number * 24 * 7, nil + case 'm': + return number * 24 * 30, nil // Approximation, as months can vary in length + case 'y': + return number * 24 * 365, nil // Ignoring leap years for simplicity + default: + return 0, fmt.Errorf("invalid suffix for retention period: %c", lastChar) + } +} diff --git a/internal/diskmanager/age.go b/internal/diskmanager/age.go deleted file mode 100644 index 1e209ec1..00000000 --- a/internal/diskmanager/age.go +++ /dev/null @@ -1,52 +0,0 @@ -// agemode.go age based cleanup code -package diskmanager - -import ( - "log" - "os" - - "github.com/tphakala/birdnet-go/internal/conf" - "github.com/tphakala/birdnet-go/internal/datastore" -) - -// AgeBasedCleanup removes clips from the filesystem based on their age and the number of clips per species. -// TODO: handle quit channel properly if it happens during cleanup -func AgeBasedCleanup(dataStore datastore.Interface) error { - MinEvictionHours := conf.Setting().Realtime.Audio.Export.Retention.MinEvictionHours - MinClipsPerSpecies := conf.Setting().Realtime.Audio.Export.Retention.MinClipsPerSpecies - - // Perform cleanup operation on every tick - clipsForRemoval, err := dataStore.GetClipsQualifyingForRemoval(MinEvictionHours, MinClipsPerSpecies) - if err != nil { - log.Printf("Error retrieving clips for removal: %s\n", err) - return err - } - - log.Printf("Found %d clips to remove\n", len(clipsForRemoval)) - - for _, clip := range clipsForRemoval { - // Attempt to remove the clip file from the filesystem - if err := os.Remove(clip.ClipName); err != nil { - if os.IsNotExist(err) { - // Attempt to delete the database record if the clip file aleady doesn't exist - if err := dataStore.DeleteNoteClipPath(clip.ID); err != nil { - log.Printf("Failed to delete clip path for %s: %s\n", clip.ID, err) - - } else { - log.Printf("Cleared clip path of missing clip for %s\n", clip.ID) - } - } else { - log.Printf("Failed to remove %s: %s\n", clip.ClipName, err) - } - return err - } else { - log.Printf("Removed %s\n", clip.ClipName) - // Attempt to delete the database record if the file removal was successful - if err := dataStore.DeleteNoteClipPath(clip.ID); err != nil { - log.Printf("Failed to delete clip path for %s: %s\n", clip.ID, err) - } - } - } - - return nil -} diff --git a/internal/diskmanager/file_utils.go b/internal/diskmanager/file_utils.go new file mode 100644 index 00000000..05147b64 --- /dev/null +++ b/internal/diskmanager/file_utils.go @@ -0,0 +1,147 @@ +// file_utils.go - shared file management code +package diskmanager + +import ( + "encoding/csv" + "errors" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" + "time" +) + +// FileInfo holds information about a file +type FileInfo struct { + Path string + Species string + Confidence int + Timestamp time.Time + Size int64 +} + +// LoadPolicy loads the cleanup policies from a CSV file +func LoadPolicy(policyFile string) (*Policy, error) { + file, err := os.Open(policyFile) + if err != nil { + return nil, err + } + defer file.Close() + + reader := csv.NewReader(file) + records, err := reader.ReadAll() + if err != nil { + return nil, err + } + + policy := &Policy{ + AlwaysCleanupFirst: make(map[string]bool), + NeverCleanup: make(map[string]bool), + } + + for _, record := range records { + if len(record) != 2 { + return nil, errors.New("invalid policy record") + } + if record[1] == "always" { + policy.AlwaysCleanupFirst[record[0]] = true + } else if record[1] == "never" { + policy.NeverCleanup[record[0]] = true + } + } + + return policy, nil +} + +// GetAudioFiles returns a list of audio files in the directory and its subdirectories +func GetAudioFiles(baseDir string, allowedExts []string, debug bool) ([]FileInfo, error) { + var files []FileInfo + + err := filepath.Walk(baseDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + ext := filepath.Ext(info.Name()) + if contains(allowedExts, ext) { + fileInfo, err := parseFileInfo(path, info) + if err != nil { + return err + } + files = append(files, fileInfo) + } + } + return nil + }) + + return files, err +} + +// parseFileInfo parses the file information from the file path and os.FileInfo +func parseFileInfo(path string, info os.FileInfo) (FileInfo, error) { + name := filepath.Base(info.Name()) + parts := strings.Split(name, "_") + if len(parts) < 3 { + return FileInfo{}, errors.New("invalid file name format") + } + + // The species name might contain underscores, so we need to handle the last two parts separately + confidenceStr := parts[len(parts)-2] + timestampStr := parts[len(parts)-1] + species := strings.Join(parts[:len(parts)-2], "_") + + confidence, err := strconv.Atoi(strings.TrimSuffix(confidenceStr, "p")) + if err != nil { + return FileInfo{}, err + } + + timestamp, err := time.Parse("20060102T150405Z", strings.TrimSuffix(timestampStr, ".wav")) + if err != nil { + return FileInfo{}, err + } + + return FileInfo{ + Path: path, + Species: species, + Confidence: confidence, + Timestamp: timestamp, + Size: info.Size(), + }, nil +} + +// contains checks if a string is in a slice +func contains(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} + +// WriteSortedFilesToFile writes the sorted list of files to a text file for investigation +func WriteSortedFilesToFile(files []FileInfo, filePath string) error { + file, err := os.Create(filePath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + + for _, fileInfo := range files { + line := fmt.Sprintf("Path: %s, Species: %s, Confidence: %d, Timestamp: %s, Size: %d\n", + fileInfo.Path, fileInfo.Species, fileInfo.Confidence, fileInfo.Timestamp.Format(time.RFC3339), fileInfo.Size) + _, err := file.WriteString(line) + if err != nil { + return fmt.Errorf("failed to write to file: %w", err) + } + } + + if err := file.Sync(); err != nil { + return fmt.Errorf("failed to sync file: %w", err) + } + + log.Printf("Sorted files have been written to %s", filePath) + return nil +} diff --git a/internal/diskmanager/policy_age.go b/internal/diskmanager/policy_age.go new file mode 100644 index 00000000..10e0f635 --- /dev/null +++ b/internal/diskmanager/policy_age.go @@ -0,0 +1,103 @@ +// policy_age.go - code for age retention policy +package diskmanager + +import ( + "log" + "os" + "path/filepath" + "time" + + "github.com/tphakala/birdnet-go/internal/conf" +) + +// AgeBasedCleanup removes clips from the filesystem based on their age and the number of clips per species. +func AgeBasedCleanup(quit <-chan struct{}) error { + settings := conf.Setting() + + debug := settings.Realtime.Audio.Export.Retention.Debug + baseDir := settings.Realtime.Audio.Export.Path + minClipsPerSpecies := settings.Realtime.Audio.Export.Retention.MinClips + retentionPeriod := settings.Realtime.Audio.Export.Retention.MaxAge + + retentionPeriodInHours, err := conf.ParseRetentionPeriod(retentionPeriod) + if err != nil { + log.Printf("Invalid retention period: %s\n", err) + return err + } + + allowedExts := []string{".wav"} + + if debug { + log.Printf("Starting age-based cleanup process. Base directory: %s, Retention period: %s", baseDir, retentionPeriod) + } + + files, err := GetAudioFiles(baseDir, allowedExts, debug) + if err != nil { + return err + } + + // Create a map to keep track of the number of files per species per subdirectory + speciesMonthCount := make(map[string]map[string]int) + for _, file := range files { + subDir := filepath.Dir(file.Path) + if _, exists := speciesMonthCount[file.Species]; !exists { + speciesMonthCount[file.Species] = make(map[string]int) + } + speciesMonthCount[file.Species][subDir]++ + } + + expirationTime := time.Now().Add(-time.Duration(retentionPeriodInHours) * time.Hour) + + maxDeletions := 1000 // Maximum number of files to delete in one run + deletedFiles := 0 // Counter for the number of deleted files + + for _, file := range files { + select { + case <-quit: + log.Printf("Cleanup interrupted by quit signal\n") + return nil + default: + if file.Timestamp.Before(expirationTime) { + subDir := filepath.Dir(file.Path) + + if speciesMonthCount[file.Species][subDir] <= minClipsPerSpecies { + if debug { + log.Printf("Species clip count for %s in %s is at the minimum threshold (%d). Skipping file deletion.", file.Species, subDir, minClipsPerSpecies) + } + continue + } + + if debug { + log.Printf("File %s is older than retention period, deleting.", file.Path) + } + + err = os.Remove(file.Path) + if err != nil { + log.Printf("Failed to remove %s: %s\n", file.Path, err) + return err + } + + speciesMonthCount[file.Species][subDir]-- + deletedFiles++ + + if debug { + log.Printf("File %s deleted", file.Path) + } + + // Check if we have reached the maximum number of deletions + if deletedFiles >= maxDeletions { + if debug { + log.Printf("Reached maximum number of deletions (%d). Ending cleanup.", maxDeletions) + } + return nil + } + } + } + } + + if debug { + log.Printf("Age retention policy applied, total files deleted: %d", deletedFiles) + } + + return nil +} diff --git a/internal/diskmanager/policy_usage.go b/internal/diskmanager/policy_usage.go new file mode 100644 index 00000000..ac334de8 --- /dev/null +++ b/internal/diskmanager/policy_usage.go @@ -0,0 +1,192 @@ +// policy_usage.go - code for use retention policy +package diskmanager + +import ( + "log" + "os" + "path/filepath" + "sort" + + "github.com/tphakala/birdnet-go/internal/conf" +) + +// Policy defines cleanup policies +type Policy struct { + AlwaysCleanupFirst map[string]bool // Species to always cleanup first + NeverCleanup map[string]bool // Species to never cleanup +} + +// UsageBasedCleanup cleans up old audio files based on the configuration and monitors for quit signals +func UsageBasedCleanup(quitChan chan struct{}) error { + settings := conf.Setting() + + debug := settings.Realtime.Audio.Export.Retention.Debug + baseDir := settings.Realtime.Audio.Export.Path + minClipsPerSpecies := settings.Realtime.Audio.Export.Retention.MinClips + + // Convert 80% string etc. to 80.0 float64 + threshold, err := conf.ParsePercentage(settings.Realtime.Audio.Export.Retention.MaxUsage) + if err != nil { + return err + } + + // Only remove files with extensions in this list + allowedExts := []string{".wav"} + + if debug { + log.Printf("Starting cleanup process. Base directory: %s, Threshold: %.1f%%", baseDir, threshold) + } + + // Check handle disk usage + diskUsage, err := GetDiskUsage(baseDir) + if err != nil { + return err + } + + if diskUsage > threshold { + if debug { + log.Printf("Disk usage %.1f%% is above the %.1f%% threshold. Cleanup needed.", diskUsage, threshold) + } + + // Get the list of audio files + files, err := GetAudioFiles(baseDir, allowedExts, debug) + if err != nil { + return err + } + + // Sort files by the cleanup priority and get the initial count of files per species per subdirectory + speciesMonthCount := sortFiles(files, debug) + + // Debug: write sorted files to a file + if debug { + if err := WriteSortedFilesToFile(files, "file_cleanup_order.txt"); err != nil { + return err + } + } + + // Perform the cleanup + return performCleanup(files, baseDir, threshold, minClipsPerSpecies, speciesMonthCount, debug, quitChan) + } else { + if debug { + log.Printf("Disk usage %.1f%% is below the %.1f%% threshold. No cleanup needed.", diskUsage, threshold) + } + } + + return nil +} + +func performCleanup(files []FileInfo, baseDir string, threshold float64, minClipsPerSpecies int, speciesMonthCount map[string]map[string]int, debug bool, quitChan chan struct{}) error { + // Delete files until disk usage is below the threshold or 100 files have been deleted + deletedFiles := 0 + maxDeletions := 1000 + totalFreedSpace := int64(0) + + for _, file := range files { + select { + case <-quitChan: + log.Println("Received quit signal, ending cleanup run.") + return nil + default: + // Get the subdirectory name + subDir := filepath.Dir(file.Path) + month := file.Timestamp.Format("2006-01") + + diskUsage, err := GetDiskUsage(baseDir) + if err != nil { + return err + } + + // Check if disk usage is below threshold or max deletions reached + if diskUsage < threshold || deletedFiles >= maxDeletions { + // all done for now, exit select loop + break + } + + if debug { + log.Printf("Species %s has %d clips in %s", file.Species, speciesMonthCount[file.Species][subDir], subDir) + } + + if speciesMonthCount[file.Species][subDir] <= minClipsPerSpecies { + if debug { + log.Printf("Species clip count for %s in %s/%s is below the minimum threshold (%d). Skipping file deletion.", file.Species, month, subDir, minClipsPerSpecies) + } + continue + } + if debug { + log.Printf("Deleting file: %s", file.Path) + } + + // Delete the file deemed for cleanup + err = os.Remove(file.Path) + if err != nil { + return err + } + + // Increment deleted files count and update species count + deletedFiles++ + speciesMonthCount[file.Species][subDir]-- + + // Add file size to total freed space + totalFreedSpace += file.Size + + if debug { + log.Printf("File deleted. %d clips left for species %s in %s", speciesMonthCount[file.Species][subDir], file.Species, subDir) + } + } + } + + if debug { + log.Printf("Usage retention policy applied, total files deleted: %d", deletedFiles) + } + + return nil +} + +func sortFiles(files []FileInfo, debug bool) map[string]map[string]int { + if debug { + log.Printf("Sorting files by cleanup priority.") + } + + // Count the number of files for each species in each subdirectory + speciesMonthCount := make(map[string]map[string]int) + for _, file := range files { + subDir := filepath.Dir(file.Path) + if _, exists := speciesMonthCount[file.Species]; !exists { + speciesMonthCount[file.Species] = make(map[string]int) + } + speciesMonthCount[file.Species][subDir]++ + } + + sort.Slice(files, func(i, j int) bool { + // Defensive check for nil pointers + if files[i].Path == "" || files[j].Path == "" { + return false + } + + // Priority 1: Oldest files first + if files[i].Timestamp != files[j].Timestamp { + return files[i].Timestamp.Before(files[j].Timestamp) + } + + // Priority 3: Species with the most occurrences in the subdirectory + subDirI := filepath.Dir(files[i].Path) + subDirJ := filepath.Dir(files[j].Path) + if speciesMonthCount[files[i].Species][subDirI] != speciesMonthCount[files[j].Species][subDirJ] { + return speciesMonthCount[files[i].Species][subDirI] > speciesMonthCount[files[j].Species][subDirJ] + } + + // Priority 4: Confidence level + if files[i].Confidence != files[j].Confidence { + return files[i].Confidence > files[j].Confidence + } + + // Default to oldest timestamp + return files[i].Timestamp.Before(files[j].Timestamp) + }) + + if debug { + log.Printf("Files sorted.") + } + + return speciesMonthCount +} diff --git a/internal/diskmanager/priority.go b/internal/diskmanager/priority.go deleted file mode 100644 index ed469063..00000000 --- a/internal/diskmanager/priority.go +++ /dev/null @@ -1,295 +0,0 @@ -// priority.go priority based cleanup code -package diskmanager - -import ( - "encoding/csv" - "errors" - "log" - "os" - "path/filepath" - "sort" - "strconv" - "strings" - "time" - - "github.com/tphakala/birdnet-go/internal/conf" -) - -// Policy defines cleanup policies -type Policy struct { - AlwaysCleanupFirst map[string]bool // Species to always cleanup first - NeverCleanup map[string]bool // Species to never cleanup -} - -// FileInfo holds information about a file -type FileInfo struct { - Path string - Species string - Confidence int - Timestamp time.Time - Size int64 -} - -// LoadPolicy loads the cleanup policies from a CSV file -func LoadPolicy(policyFile string) (*Policy, error) { - file, err := os.Open(policyFile) - if err != nil { - return nil, err - } - defer file.Close() - - reader := csv.NewReader(file) - records, err := reader.ReadAll() - if err != nil { - return nil, err - } - - policy := &Policy{ - AlwaysCleanupFirst: make(map[string]bool), - NeverCleanup: make(map[string]bool), - } - - for _, record := range records { - if len(record) != 2 { - return nil, errors.New("invalid policy record") - } - if record[1] == "always" { - policy.AlwaysCleanupFirst[record[0]] = true - } else if record[1] == "never" { - policy.NeverCleanup[record[0]] = true - } - } - - return policy, nil -} - -// GetAudioFiles returns a list of audio files in the directory and its subdirectories -func GetAudioFiles(baseDir string, allowedExts []string, debug bool) ([]FileInfo, error) { - var files []FileInfo - - err := filepath.Walk(baseDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() { - ext := filepath.Ext(info.Name()) - if contains(allowedExts, ext) { - fileInfo, err := parseFileInfo(path, info) - if err != nil { - return err - } - /*if debug { - log.Printf("Found file: %s, Species: %s, Confidence: %d, Timestamp: %s", fileInfo.Path, fileInfo.Species, fileInfo.Confidence, fileInfo.Timestamp) - }*/ - files = append(files, fileInfo) - } - } - return nil - }) - - return files, err -} - -// parseFileInfo parses the file information from the file path and os.FileInfo -func parseFileInfo(path string, info os.FileInfo) (FileInfo, error) { - name := filepath.Base(info.Name()) - parts := strings.Split(name, "_") - if len(parts) < 3 { - return FileInfo{}, errors.New("invalid file name format") - } - - // The species name might contain underscores, so we need to handle the last two parts separately - confidenceStr := parts[len(parts)-2] - timestampStr := parts[len(parts)-1] - species := strings.Join(parts[:len(parts)-2], "_") - - confidence, err := strconv.Atoi(strings.TrimSuffix(confidenceStr, "p")) - if err != nil { - return FileInfo{}, err - } - - timestamp, err := time.Parse("20060102T150405Z", strings.TrimSuffix(timestampStr, ".wav")) - if err != nil { - return FileInfo{}, err - } - - return FileInfo{ - Path: path, - Species: species, - Confidence: confidence, - Timestamp: timestamp, - Size: info.Size(), - }, nil -} - -// PriorityBasedCleanup cleans up old audio files based on the configuration and monitors for quit signals -func PriorityBasedCleanup(quitChan chan struct{}) error { - settings := conf.Setting() - - debug := settings.Realtime.Audio.Export.Debug - baseDir := settings.Realtime.Audio.Export.Path - thresholdStr := settings.Realtime.Audio.Export.Retention.DiskUsageLimit - minClipsPerSpecies := settings.Realtime.Audio.Export.Retention.MinClipsPerSpecies - - // Convert 80% string etc. to 80.0 float64 - threshold, err := conf.ParsePercentage(thresholdStr) - if err != nil { - return err - } - - // Only remove files with extensions in this list - allowedExts := []string{".wav"} - - if debug { - log.Printf("Starting cleanup process. Base directory: %s, Threshold: %.1f%%", baseDir, threshold) - } - - // Check and handle disk usage - if err := handleDiskUsage(baseDir, threshold, debug); err != nil { - return err - } - - // Get the list of audio files - files, err := GetAudioFiles(baseDir, allowedExts, debug) - if err != nil { - return err - } - - // Sort files by the cleanup priority and get the initial count of files per species per subdirectory - speciesMonthCount := sortFiles(files, debug) - - // Debug: write sorted files to a file - if debug { - if err := WriteSortedFilesToFile(files, "file_cleanup_order.txt"); err != nil { - return err - } - } - - // Perform the cleanup - return performCleanup(files, baseDir, threshold, minClipsPerSpecies, speciesMonthCount, debug, quitChan) -} - -func handleDiskUsage(baseDir string, threshold float64, debug bool) error { - // Get the current disk usage - diskUsage, err := GetDiskUsage(baseDir) - if err != nil { - return err - } - - if debug { - log.Printf("Current disk usage: %.1f%%", diskUsage) - } - - // If disk usage is below the threshold, no cleanup is needed - if diskUsage < threshold { - if debug { - log.Printf("Disk usage %.1f%% is below the %.1f%% threshold. No cleanup needed.", diskUsage, threshold) - } - return nil - } else { - if debug { - log.Printf("Disk usage %.1f%% is above the %.1f%% threshold. Cleanup needed.", diskUsage, threshold) - } - } - - return nil -} - -func performCleanup(files []FileInfo, baseDir string, threshold float64, minClipsPerSpecies int, speciesMonthCount map[string]map[string]int, debug bool, quitChan chan struct{}) error { - // Delete files until disk usage is below the threshold or 100 files have been deleted - deletedFiles := 0 - maxDeletions := 1000 - totalFreedSpace := int64(0) - - for _, file := range files { - select { - case <-quitChan: - if debug { - log.Println("Received quit signal, exiting cleanup loop.") - } - return nil - default: - // Get the subdirectory name - subDir := filepath.Dir(file.Path) - diskUsage, err := GetDiskUsage(baseDir) - if err != nil { - return err - } - if diskUsage < threshold || deletedFiles >= maxDeletions || speciesMonthCount[file.Species][subDir] <= minClipsPerSpecies { - continue - } - - if debug { - log.Printf("Deleting file: %s", file.Path) - } - - // Delete the file deemed for cleanup - err = os.Remove(file.Path) - if err != nil { - return err - } - - // Increment deleted files count and update species count - deletedFiles++ - speciesMonthCount[file.Species][subDir]-- - - // Add file size to total freed space - totalFreedSpace += file.Size - } - } - - if debug { - log.Printf("Cleanup process completed. %d files deleted. Total space freed: %.2f MB", deletedFiles, float64(totalFreedSpace)/(1024*1024)) - } - - return nil -} - -func sortFiles(files []FileInfo, debug bool) map[string]map[string]int { - if debug { - log.Printf("Sorting files by cleanup priority.") - } - - // Count the number of files for each species in each subdirectory - speciesMonthCount := make(map[string]map[string]int) - for _, file := range files { - subDir := filepath.Dir(file.Path) - if _, exists := speciesMonthCount[file.Species]; !exists { - speciesMonthCount[file.Species] = make(map[string]int) - } - speciesMonthCount[file.Species][subDir]++ - } - - sort.Slice(files, func(i, j int) bool { - // Defensive check for nil pointers - if files[i].Path == "" || files[j].Path == "" { - return false - } - - // Priority 1: Oldest files first - if files[i].Timestamp != files[j].Timestamp { - return files[i].Timestamp.Before(files[j].Timestamp) - } - - // Priority 3: Species with the most occurrences in the subdirectory - subDirI := filepath.Dir(files[i].Path) - subDirJ := filepath.Dir(files[j].Path) - if speciesMonthCount[files[i].Species][subDirI] != speciesMonthCount[files[j].Species][subDirJ] { - return speciesMonthCount[files[i].Species][subDirI] > speciesMonthCount[files[j].Species][subDirJ] - } - - // Priority 4: Confidence level - if files[i].Confidence != files[j].Confidence { - return files[i].Confidence > files[j].Confidence - } - - // Default to oldest timestamp - return files[i].Timestamp.Before(files[j].Timestamp) - }) - - if debug { - log.Printf("Files sorted.") - } - - return speciesMonthCount -} diff --git a/internal/diskmanager/util.go b/internal/diskmanager/util.go deleted file mode 100644 index 31e48000..00000000 --- a/internal/diskmanager/util.go +++ /dev/null @@ -1,43 +0,0 @@ -package diskmanager - -import ( - "fmt" - "log" - "os" - "time" -) - -// WriteSortedFilesToFile writes the sorted list of files to a text file for investigation -func WriteSortedFilesToFile(files []FileInfo, filePath string) error { - file, err := os.Create(filePath) - if err != nil { - return fmt.Errorf("failed to create file: %w", err) - } - defer file.Close() - - for _, fileInfo := range files { - line := fmt.Sprintf("Path: %s, Species: %s, Confidence: %d, Timestamp: %s, Size: %d\n", - fileInfo.Path, fileInfo.Species, fileInfo.Confidence, fileInfo.Timestamp.Format(time.RFC3339), fileInfo.Size) - _, err := file.WriteString(line) - if err != nil { - return fmt.Errorf("failed to write to file: %w", err) - } - } - - if err := file.Sync(); err != nil { - return fmt.Errorf("failed to sync file: %w", err) - } - - log.Printf("Sorted files have been written to %s", filePath) - return nil -} - -// contains checks if a string is in a slice -func contains(slice []string, item string) bool { - for _, s := range slice { - if s == item { - return true - } - } - return false -} From d7ae595da2c2da341d4d654bfe729e887303e02d Mon Sep 17 00:00:00 2001 From: Tomi Hakala Date: Fri, 31 May 2024 17:24:02 +0300 Subject: [PATCH 2/3] fix: fix clip cleanup interval to 5 minutes --- internal/analysis/realtime.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/analysis/realtime.go b/internal/analysis/realtime.go index df745792..09479490 100644 --- a/internal/analysis/realtime.go +++ b/internal/analysis/realtime.go @@ -203,7 +203,7 @@ func clipCleanupMonitor(wg *sync.WaitGroup, dataStore datastore.Interface, quitC defer wg.Done() // Ensure that the WaitGroup is marked as done after the function exits // Create a ticker that triggers every five minutes to perform cleanup - ticker := time.NewTicker(1 * time.Minute) + ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() // Ensure the ticker is stopped to prevent leaks log.Println("Clip retention policy:", conf.Setting().Realtime.Audio.Export.Retention.Policy) From 5bfa08d09b240a6db69ce26d1a9eeddb7b818801 Mon Sep 17 00:00:00 2001 From: Tomi Hakala Date: Fri, 31 May 2024 17:30:45 +0300 Subject: [PATCH 3/3] refactor: Update default audio export retention policy to "usage" --- internal/conf/defaults.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/conf/defaults.go b/internal/conf/defaults.go index a8cb888d..1918b222 100644 --- a/internal/conf/defaults.go +++ b/internal/conf/defaults.go @@ -40,7 +40,7 @@ func setDefaultConfig() { viper.SetDefault("realtime.audio.export.retention.enabled", true) viper.SetDefault("realtime.audio.export.retention.debug", false) - viper.SetDefault("realtime.audio.export.retention.policy", "use") + viper.SetDefault("realtime.audio.export.retention.policy", "usage") viper.SetDefault("realtime.audio.export.retention.maxusage", "80%") viper.SetDefault("realtime.audio.export.retention.maxage", "30d") viper.SetDefault("realtime.audio.export.retention.minclips", 10)