Source code for ava.segmenting.segment

"""
Segment audio files and write segmenting decisions.

TO DO:
	- tune window size
	- segment could be sped up if it operated file by file.

"""
__date__ = "December 2018 - November 2020"


import matplotlib.pyplot as plt
plt.switch_backend('agg')
import numpy as np
import os
from scipy.io import wavfile, loadmat
from scipy.io.wavfile import WavFileWarning
from scipy.signal import stft
import warnings

from ava.segmenting.utils import get_spec

# Constants
EPSILON = 1e-12



[docs]def segment(audio_dir, seg_dir, p, verbose=True):
	"""
	Segment audio files in `audio_dir` and write decisions to `seg_dir`.

	Parameters
	----------
	audio_dir : str
		Directory containing audio files.
	seg_dir : str
		Directory containing segmenting decisions.
	p : dict
		Segmenting parameters. Must map the key `'algorithm'` to a segmenting
		algorithm, for example
		`ava.segmenting.amplitude_segmentation.get_onsets_offsets`. Must
		additionally contain keys requested by the segmenting algorithm.
	verbose : bool, optional
		Defaults to ``True``.
	"""
	if verbose:
		print("Segmenting audio in", audio_dir)
	if not os.path.exists(seg_dir):
		os.makedirs(seg_dir)
	num_sylls = 0
	audio_fns, seg_fns = get_audio_seg_filenames(audio_dir, seg_dir, None)
	for audio_fn, seg_fn in zip(audio_fns, seg_fns):
		# Collect audio.
		with warnings.catch_warnings():
			warnings.filterwarnings("ignore", category=WavFileWarning)
			fs, audio = wavfile.read(audio_fn)
		# Segment.
		onsets, offsets = p['algorithm'](audio, p)
		combined = np.stack([onsets, offsets]).T
		num_sylls += len(combined)
		# Write.
		header = "Onsets/offsets for " + audio_fn
		np.savetxt(seg_fn, combined, fmt='%.5f', header=header)
	if verbose:
		print("\tFound", num_sylls, "segments in", audio_dir)


[docs]def tune_segmenting_params(audio_dirs, p, img_fn='temp.pdf'):
	"""
	Tune segementing parameters by visualizing segmenting decisions.

	Chunks of audio will be drawn at random, segmented, and a plot showing the
	segmenting decisions will be saved as ``img_fn``, by default ``'temp.pdf'``.

	Parameters
	----------
	audio_dirs : list of str
		Directories containing audio files.
	p : dict
		Segmenting parameters. Must contain the keys:
			-`'max_dur'`: maximum segment duration, in seconds
			-`'algorithm'`: segmenting algorithm, for example
			 `ava.segmenting.amplitude_segmentation.get_onsets_offsets`.
		in addition to the keys required by `ava.segmenting.utils.get_spec`.
	img_fn : str, optional
		Where to save segmenting images.

	Returns
	-------
	p : dict
		Adjusted segmenting parameters.
	"""
	print("Tune segmenting parameters\n---------------------------")
	# Collect filenames.
	filenames = []
	for load_dir in audio_dirs:
		filenames += [os.path.join(load_dir, i) for i in os.listdir(load_dir) \
				if _is_audio_file(i)]
	if len(filenames) == 0:
		warnings.warn("Found no audio files in directories: "+str(audio_dirs))
		return
	# Set the amount of audio to display.
	if 'window_dur' in p:
		window_dur = p['window_dur']
	else:
		window_dur = 2.0 * p['max_dur']
	window_samples = int(window_dur * p['fs'])

	# Main loop: keep tuning parameters...
	while True:

		# Tune the parameters.
		for key in p:
			# Skip non-tunable parameters.
			if key in ['num_time_bins', 'num_freq_bins'] or not _is_number(p[key]):
				continue
			temp = 'not number and not empty'
			while not _is_number_or_empty(temp):
				temp = input('Set value for '+key+': ['+str(p[key])+ '] ')
			if temp != '':
				p[key] = float(temp)

		# Plot segmenting decisions.
		temp = 'not (s or r)'
		iteration = 0
		while temp != 's' and temp != 'r':

			# Get a random audio file.
			file_index = np.random.randint(len(filenames))
			filename = filenames[file_index]

			# Get spectrogram.
			with warnings.catch_warnings():
				warnings.filterwarnings("ignore", category=WavFileWarning)
				fs, audio = wavfile.read(filename)
			assert fs == p['fs'], 'Found fs='+str(fs)+', expected '+str(p['fs'])
			if len(audio) < 3*window_samples + 1:
				temp = len(audio) / p['fs']
				print("Skipping short file: "+filename+" ("+str(temp)+"s)")
				continue
			start_index = np.random.randint(len(audio) - 3*window_samples)
			stop_index = start_index + 3*window_samples
			audio = audio[start_index:stop_index]
			spec, dt, f = get_spec(audio, p)

			# Get onsets and offsets.
			onsets, offsets, traces = \
					p['algorithm'](audio, p, return_traces=True)
			onsets = [onset/dt for onset in onsets]
			offsets = [offset/dt for offset in offsets]

			# Plot.
			i1 = int(window_dur / dt)
			i2 = 2 * i1
			t1, t2 = i1 * dt, i2 * dt
			_, axarr = plt.subplots(2,1, sharex=True)
			axarr[0].set_title(filename, fontsize=7)
			axarr[0].imshow(spec[:,i1:i2], origin='lower', \
					aspect='auto', \
					extent=[t1, t2, f[0]/1e3, f[-1]/1e3])
			axarr[0].set_ylabel('Frequency (kHz)')
			for j in range(len(onsets)):
				if onsets[j] >= i1 and onsets[j] < i2:
					time = onsets[j] * dt
					for k in [0,1]:
						axarr[k].axvline(x=time, c='b', lw=0.5)
				if offsets[j] >= i1 and offsets[j] < i2:
					time = offsets[j] * dt
					for k in [0,1]:
						axarr[k].axvline(x=time, c='r', lw=0.5)
			for key in ['th_1', 'th_2', 'th_3']: # NOTE: clean this
				if key in p:
					axarr[1].axhline(y=p[key], lw=0.5, c='b')
			xvals = np.linspace(t1, t2, i2-i1)
			for trace in traces:
				axarr[1].plot(xvals, trace[i1:i2])
			axarr[1].set_xlabel('Time (s)')
			plt.savefig(img_fn)
			plt.close('all')

			# Continue.
			all_events = [j for j in onsets if j>i1 and j<i2] + \
					[j for j in offsets if j>i1 and j<i2]
			if len(all_events) > 0 or (iteration+1) % 20 == 0:
				temp = input('Continue? [y] or [s]top tuning or [r]etune params: ')
			else:
				iteration += 1
				print("searching")
				temp = 'not (s or r)'
			if temp == 's':
				return p


[docs]def get_audio_seg_filenames(audio_dir, segment_dir, p=None):
	"""
	Return lists of sorted filenames.

	Warning
	-------
	- `p` is unused. This will be removed in a future version!

	Parameters
	----------
	audio_dir : str
		Audio directory.
	segment_dir : str
		Segments directory.
	p : dict, optional
		Unused! Defaults to ``None``.
	"""
	temp_filenames = [i for i in sorted(os.listdir(audio_dir)) if \
			_is_audio_file(i)]
	audio_filenames = [os.path.join(audio_dir, i) for i in temp_filenames]
	temp_filenames = [i[:-4] + '.txt' for i in temp_filenames]
	seg_filenames = [os.path.join(segment_dir, i) for i in temp_filenames]
	return audio_filenames, seg_filenames


def _is_audio_file(fn):
	return len(fn) >= 4 and fn[-4:] == '.wav'


def _is_number_or_empty(s):
	if s == '':
		return True
	try:
		float(s)
		return True
	except:
		return False


def _is_number(s):
	return type(s) == type(4) or type(s) == type(4.0)



if __name__ == '__main__':
	pass


###