Source code for ava.segmenting.segment

"""
Segment audio files and write segmenting decisions.

TO DO:
	- tune window size
	- segment could be sped up if it operated file by file.

"""
__date__ = "December 2018 - November 2020"


import matplotlib.pyplot as plt
plt.switch_backend('agg')
import numpy as np
import os
from scipy.io import wavfile, loadmat
from scipy.io.wavfile import WavFileWarning
from scipy.signal import stft
import warnings

from ava.segmenting.utils import get_spec

# Constants
EPSILON = 1e-12



[docs]def segment(audio_dir, seg_dir, p, verbose=True): """ Segment audio files in `audio_dir` and write decisions to `seg_dir`. Parameters ---------- audio_dir : str Directory containing audio files. seg_dir : str Directory containing segmenting decisions. p : dict Segmenting parameters. Must map the key `'algorithm'` to a segmenting algorithm, for example `ava.segmenting.amplitude_segmentation.get_onsets_offsets`. Must additionally contain keys requested by the segmenting algorithm. verbose : bool, optional Defaults to ``True``. """ if verbose: print("Segmenting audio in", audio_dir) if not os.path.exists(seg_dir): os.makedirs(seg_dir) num_sylls = 0 audio_fns, seg_fns = get_audio_seg_filenames(audio_dir, seg_dir, None) for audio_fn, seg_fn in zip(audio_fns, seg_fns): # Collect audio. with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=WavFileWarning) fs, audio = wavfile.read(audio_fn) # Segment. onsets, offsets = p['algorithm'](audio, p) combined = np.stack([onsets, offsets]).T num_sylls += len(combined) # Write. header = "Onsets/offsets for " + audio_fn np.savetxt(seg_fn, combined, fmt='%.5f', header=header) if verbose: print("\tFound", num_sylls, "segments in", audio_dir)
[docs]def tune_segmenting_params(audio_dirs, p, img_fn='temp.pdf'): """ Tune segementing parameters by visualizing segmenting decisions. Chunks of audio will be drawn at random, segmented, and a plot showing the segmenting decisions will be saved as ``img_fn``, by default ``'temp.pdf'``. Parameters ---------- audio_dirs : list of str Directories containing audio files. p : dict Segmenting parameters. Must contain the keys: -`'max_dur'`: maximum segment duration, in seconds -`'algorithm'`: segmenting algorithm, for example `ava.segmenting.amplitude_segmentation.get_onsets_offsets`. in addition to the keys required by `ava.segmenting.utils.get_spec`. img_fn : str, optional Where to save segmenting images. Returns ------- p : dict Adjusted segmenting parameters. """ print("Tune segmenting parameters\n---------------------------") # Collect filenames. filenames = [] for load_dir in audio_dirs: filenames += [os.path.join(load_dir, i) for i in os.listdir(load_dir) \ if _is_audio_file(i)] if len(filenames) == 0: warnings.warn("Found no audio files in directories: "+str(audio_dirs)) return # Set the amount of audio to display. if 'window_dur' in p: window_dur = p['window_dur'] else: window_dur = 2.0 * p['max_dur'] window_samples = int(window_dur * p['fs']) # Main loop: keep tuning parameters... while True: # Tune the parameters. for key in p: # Skip non-tunable parameters. if key in ['num_time_bins', 'num_freq_bins'] or not _is_number(p[key]): continue temp = 'not number and not empty' while not _is_number_or_empty(temp): temp = input('Set value for '+key+': ['+str(p[key])+ '] ') if temp != '': p[key] = float(temp) # Plot segmenting decisions. temp = 'not (s or r)' iteration = 0 while temp != 's' and temp != 'r': # Get a random audio file. file_index = np.random.randint(len(filenames)) filename = filenames[file_index] # Get spectrogram. with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=WavFileWarning) fs, audio = wavfile.read(filename) assert fs == p['fs'], 'Found fs='+str(fs)+', expected '+str(p['fs']) if len(audio) < 3*window_samples + 1: temp = len(audio) / p['fs'] print("Skipping short file: "+filename+" ("+str(temp)+"s)") continue start_index = np.random.randint(len(audio) - 3*window_samples) stop_index = start_index + 3*window_samples audio = audio[start_index:stop_index] spec, dt, f = get_spec(audio, p) # Get onsets and offsets. onsets, offsets, traces = \ p['algorithm'](audio, p, return_traces=True) onsets = [onset/dt for onset in onsets] offsets = [offset/dt for offset in offsets] # Plot. i1 = int(window_dur / dt) i2 = 2 * i1 t1, t2 = i1 * dt, i2 * dt _, axarr = plt.subplots(2,1, sharex=True) axarr[0].set_title(filename, fontsize=7) axarr[0].imshow(spec[:,i1:i2], origin='lower', \ aspect='auto', \ extent=[t1, t2, f[0]/1e3, f[-1]/1e3]) axarr[0].set_ylabel('Frequency (kHz)') for j in range(len(onsets)): if onsets[j] >= i1 and onsets[j] < i2: time = onsets[j] * dt for k in [0,1]: axarr[k].axvline(x=time, c='b', lw=0.5) if offsets[j] >= i1 and offsets[j] < i2: time = offsets[j] * dt for k in [0,1]: axarr[k].axvline(x=time, c='r', lw=0.5) for key in ['th_1', 'th_2', 'th_3']: # NOTE: clean this if key in p: axarr[1].axhline(y=p[key], lw=0.5, c='b') xvals = np.linspace(t1, t2, i2-i1) for trace in traces: axarr[1].plot(xvals, trace[i1:i2]) axarr[1].set_xlabel('Time (s)') plt.savefig(img_fn) plt.close('all') # Continue. all_events = [j for j in onsets if j>i1 and j<i2] + \ [j for j in offsets if j>i1 and j<i2] if len(all_events) > 0 or (iteration+1) % 20 == 0: temp = input('Continue? [y] or [s]top tuning or [r]etune params: ') else: iteration += 1 print("searching") temp = 'not (s or r)' if temp == 's': return p
[docs]def get_audio_seg_filenames(audio_dir, segment_dir, p=None): """ Return lists of sorted filenames. Warning ------- - `p` is unused. This will be removed in a future version! Parameters ---------- audio_dir : str Audio directory. segment_dir : str Segments directory. p : dict, optional Unused! Defaults to ``None``. """ temp_filenames = [i for i in sorted(os.listdir(audio_dir)) if \ _is_audio_file(i)] audio_filenames = [os.path.join(audio_dir, i) for i in temp_filenames] temp_filenames = [i[:-4] + '.txt' for i in temp_filenames] seg_filenames = [os.path.join(segment_dir, i) for i in temp_filenames] return audio_filenames, seg_filenames
def _is_audio_file(fn): return len(fn) >= 4 and fn[-4:] == '.wav' def _is_number_or_empty(s): if s == '': return True try: float(s) return True except: return False def _is_number(s): return type(s) == type(4) or type(s) == type(4.0) if __name__ == '__main__': pass ###