Mercurial > hg > simscene-py
diff python/simscene.py @ 35:5d19c2254677
added simscene.py with the accompanying input files to generate acoustic scenes using python
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Thu, 05 Oct 2017 14:53:15 +0100 |
parents | |
children | a0eb120940b1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/simscene.py Thu Oct 05 14:53:15 2017 +0100 @@ -0,0 +1,867 @@ +#!/bin/python +# -*- coding: utf-8 -*- +# For licensing please see: LICENSE +# Copyright (c) Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> + +# Argparse +import argparse + +# Logging +import logging + +# Pandas +import pandas as pd + +# Numpy +import numpy as np +import sys + +# Glob +import glob +import random + +# Librosa +import librosa +import librosa.display +import librosa.output + +# Matplotlib +from matplotlib import rc +# rc('text', usetex=True) +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from cycler import cycler + +# Tabulate +from tabulate import tabulate + +def _N(t, sr=44100): + """ + Helper function: Converts time to samples + """ + return int(t*sr) + +def compute_energy(x): + return np.sqrt(np.mean(x**2)) + +# def compute_energy_profile(x, w=1000): +# # Resize/Window signal +# #x = np.resize(x, (w,int(np.ceil(float(len(x)/w))))) +# x = np.array([[ii+jj for jj in range(w)] for ii in range(len(x)-w)]) +# return np.sqrt(np.mean(x**2, 1)) + +def render_pattern(fname, input_path, sr=44100): + pattern = read_pattern_file(fname) + + start_times_samples = [] + end_times_samples = [] + durations_samples = [] + wav_files = [] + + for n in range(len(pattern)): + # Try loading the file, + sampleid = pattern['sampleid'].loc[n] + candidates = glob.glob('{}/event/{}*wav'.format(input_path,sampleid)) + chosen_fname = random.sample(candidates, 1)[0] + + logging.debug('Loading {}'.format(chosen_fname)) + + # For each sound in the pattern file, place it starting from starttime + an offset + # with a mean value of 0 and standard deviation of offset_stddev. The first event can + # not start earlier than time 0. If endtime is defined (not nan), then cut the event at + # end time. + wav, SR = librosa.load(chosen_fname, sr=sr) + + # Read and assign an amplitude + amplitude_mean = float(pattern['amplitude'].loc[n]) + amplitude_stddev = float(pattern['amplitude_stdev'].loc[n]) + amplitude = amplitude_mean + np.random.randn()*amplitude_stddev + wav *= amplitude + + start_time = max(float(pattern['start_time'].loc[n]),0) + start_time_samples = int(start_time*SR) + + fade_in_time = float(pattern['fade_in_time'].loc[n]) + fade_out_time = float(pattern['fade_out_time'].loc[n]) + end_time = float(pattern['end_time'].loc[n]) + + # If end_time is not defined (-1 or just empty) + # then just derive it from the length of the sample + if np.isnan(end_time) or float(end_time) == -1: + duration_samples = len(wav) + end_time_samples = start_time_samples + duration_samples + elif end_time - start_time > len(wav)/float(SR): + + # If given end_time is more than start_time + duration of sample + # then pad the file with zeros to reach the desired end time. + duration = end_time - start_time + duration_samples = int(duration*SR) + end_time_samples = start_time_samples + duration_samples + wav_arr = np.zeros(duration_samples) + wav_arr[:len(wav)] = wav + wav = wav_arr + else: + duration = end_time - start_time + duration_samples = int(duration*SR) + end_time_samples = start_time_samples + duration_samples + + event_render = fade(wav[:duration_samples], fade_in_time, fade_out_time) + + start_times_samples.append(start_time_samples) + end_times_samples.append(end_time_samples) + durations_samples.append(duration_samples) + wav_files.append(event_render) + + pattern_duration = end_time_samples + pattern_arr = np.zeros(pattern_duration) + + for n, s in enumerate(start_times_samples): + wav = wav_files[n] + pattern_arr[s:s+len(wav)] = wav + + return pattern_arr, 44100 + +def read_events_file(fname): + if fname[-3:].lower() == 'xls': + df = pd.read_excel(fname) + elif fname[-4:].lower() == 'json': + df = pd.read_json(fname) + elif fname[-3:].lower() in ['txt']: + with open(fname) as f: + s = f.readline() + f.seek(0,0) + if ',' in s: + sep = ',' + elif '\t' in s: + sep = '\t' + else: + sep = ' ' + logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') + df = pd.read_csv(f, header=None, sep=sep) + df.columns = ['label','sampleid','ebr','ebr_stddev','mean_time_between_instances','time_between_instances_stddev','start_time','end_time','fade_in_time','fade_out_time'] + elif fname[-3:].lower() in ['csv']: + df = pd.read_json(fname) + + logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + return df + +def read_pattern_file(fname): + if fname[-3:].lower() == 'xls': + df = pd.read_excel(fname) + elif fname[-4:].lower() == 'json': + df = pd.read_json(fname) + elif fname[-3:].lower() in ['txt']: + with open(fname) as f: + s = f.readline() + f.seek(0,0) + if ',' in s: + sep = ',' + elif '\t' in s: + sep = '\t' + else: + sep = ' ' + logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') + df = pd.read_csv(f, header=None, sep=sep) + df.columns = ['eventid','start_time','end_time','time_offset_stdev','fade_in_time', 'fade_out_time', 'amplitude', 'amplitude_stdev'] + elif fname[-3:].lower() in ['csv']: + df = pd.read_json(fname) + + logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + return df + +def read_backgrounds_file(fname): + if fname[-3:].lower() == 'xls': + df = pd.read_excel(fname) + elif fname[-4:].lower() == 'json': + df = pd.read_json(fname) + elif fname[-3:].lower() in ['txt']: + with open(fname) as f: + s = f.readline() + f.seek(0,0) + if ',' in s: + sep = ',' + elif '\t' in s: + sep = '\t' + else: + sep = ' ' + logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') + df = pd.read_csv(f, header=None, sep=sep) + df.columns = ['label','sampleid','snr'] + elif fname[-3:].lower() in ['csv']: + df = pd.read_json(fname) + + logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + return df + +def read_annotations_file(fname): + if fname[-3:].lower() == 'xls': + df = pd.read_excel(fname) + elif fname[-4:].lower() == 'json': + df = pd.read_json(fname) + elif fname[-3:].lower() in ['txt', 'csv']: + + with open(fname) as f: + header = f.readline() + + s = f.readline() + f.seek(0,0) + if ',' in s: + sep = ',' + elif '\t' in s: + sep = '\t' + else: + sep = ' ' + if sep in header: + logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') + df = pd.read_csv(f, header=None, sep=sep) + df.columns = ['start', 'stop', 'class'] + else: + df.columns = ['start', 'stop', 'class'] + df = pd.read_csv(f, sep=sep) + df = None + + logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + return df + +def run_demo(): + print("TODO: Implement run_demo()") + +def fade(x, fade_in, fade_out, sr=44100): + """ + Creates a fade-in-fade-out envelope + for audio array x. + """ + + if len(x) == 0: + return x + + fade_in_samples = int(fade_in*sr) + fade_out_samples = int(fade_out*sr) + + outp = np.ones_like(x) + for n in range(fade_in_samples): + outp[n] = n*1./fade_in_samples + + for n in range(fade_out_samples): + outp[len(outp)-fade_out_samples+n] = 1-1./fade_out_samples*n + return outp*x + +def simscene(input_path, + output_path, + scene_duration, + score_events, + score_backgrounds, + **kwargs): + logging.info('simscene() is not yet implemented fully') + SR = 44100 # Samplerate. Should probably not be hardcoded + + events_df = score_events + backgrounds_df = score_backgrounds + + # Create empty numpy array + scene_arr = np.zeros(int(scene_duration*SR)) + + if 'append_to_filename' in kwargs: + append_to_filename = kwargs['append_to_filename'] + else: + append_to_filename = None + + if 'end_cut' in kwargs: + end_cut = kwargs['end_cut'] + else: + end_cut = False + + if 'figure_verbosity' in kwargs: + figure_verbosity = kwargs['figure_verbosity'] + else: + figure_verbosity = 0 + + if 'image_format' in kwargs: + image_format = kwargs['image_format'] + else: + image_format = 'png' + + # Stores the starting and ending times of every track for visualization + # purposes + scene_starting_times = [] + scene_ending_times = [] + + # List of tracks + track_list = [] + background_energies = [] + + for n in range(len(backgrounds_df)): + # Get label of background + label = str(backgrounds_df['label'].loc[n]) + + # First check if there are any pattern candidates. Give priorities + # To pattern files. + candidates = [] + for pattern_format in ['xls', 'json', 'txt', 'csv']: + candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, backgrounds_df['sampleid'].loc[n], pattern_format)) + + if len(candidates) == 0: + # If no patterns are found, search for normal audio files + candidates = glob.glob('{}/background/{}*.wav'.format(input_path, backgrounds_df['sampleid'].loc[n])) + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = librosa.load(chosen_fname, sr=SR) + else: + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = render_pattern(chosen_fname, input_path) + + duration = len(wav)/float(SR) + target_snr_db = float(backgrounds_df['snr'].loc[n]) + target_snr = 10**(target_snr_db/20.0) + + energy = compute_energy(wav) + + logging.debug('{}:energy:{}'.format(label,energy)) + + + if n == 0: + # For the first background track, snr + # gives an amount by which it's going to be scaled (i.e. make it more silent) + amplitude_factor = target_snr + wav *= amplitude_factor + + if n > 0: + noise_energy = compute_energy(np.sum(np.array(track_list), axis=0)) + logging.info('{}:noise_energy:{}'.format(label,noise_energy)) + + old_snr = energy/noise_energy + old_snr_db = 20*np.log10(old_snr) + logging.info('{}:old_snr:{}'.format(label,old_snr_db)) + + amplitude_factor = target_snr/old_snr + + + wav *= amplitude_factor + new_energy = compute_energy(wav) + new_snr = new_energy/noise_energy + new_snr_db = 20. * np.log10(new_snr) + logging.info('{}:new_snr:{}'.format(label,new_snr_db)) + + + # Track array + track_arr = np.zeros(int(scene_duration*SR)) + start_times = [0.0] + end_times = [start_times[-1]+len(wav)/float(SR)] + + + # Start with the first time in the list + new_start_time = start_times[-1] + new_end_time = end_times[-1] + + while new_start_time < scene_duration: + offset = duration + new_start_time += offset + new_end_time += offset + + start_times.append(new_start_time) + end_times.append(new_end_time) + + for n,t in enumerate(start_times): + # We need to be careful with the limits here + # since numpy will just ignore indexing that + # exceeds + + # Fading times in case we need to join many + # consecutive samples together. + # if n == 0: + # # Little fade-out, fade-in to smoothly repeat the + # # background. + # fade_in_time = 0.0 + # fade_out_time = 0.01 + # elif n > 0 and n < len(start_times) - 1: + # fade_in_time = 0.01 + # fade_out_time = 0.01 + # else: + # fade_in_time = 0.01 + # fade_out_time = 0.0 + begin = min(_N(t), len(track_arr)) + end = min(len(track_arr), _N(t)+len(wav)) + + # Part of the wav to store + # part = fade(wav[:end-begin],fade_in_time,fade_out_time) + part = wav[:end-begin] + + + track_arr[begin:end] += part + + track_list.append(track_arr) + scene_arr[:len(track_arr)] += track_arr + + if channel_mode == 'separate': + librosa.output.write_wav('{}/{}_background_track.wav'.format(output_path, label), track_arr, SR) + + F = librosa.stft(track_arr, 1024) + energy_prof = librosa.feature.rmse(S=F) + background_energies.append(energy_prof) + + if figure_verbosity > 0: + plt.figure() + plt.subplot(3, 1, 1) + plt.title('`{}\' background waveform and spectrogram'.format(label)) + librosa.display.waveplot(track_arr,sr=SR) + + # Plot spectrogram + Fdb = librosa.amplitude_to_db(F) + plt.subplot(3, 1, 2) + librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') + + # Plot energy profile + plt.subplot(3, 1, 3) + time = np.linspace(0, len(track_arr)/SR, len(energy_prof.T)) + plt.semilogy(time, energy_prof.T) + plt.xlim([0, len(track_arr)/SR]) + plt.ylabel('energy (rms)') + + + # Tidy up and save to file + plt.tight_layout() + if append_to_filename: + plt.savefig('{}/background_{}_{}.{}'.format(output_path, label, append_to_filename, image_format), dpi=300) + else: + plt.savefig('{}/background_{}.{}'.format(output_path, label, image_format), dpi=300) + + # Compute total energy of background + if len(backgrounds_df) > 0: + background_arr = np.sum(track_list, 0) + B = librosa.stft(background_arr, 1024) + background_energy = librosa.feature.rmse(S=B).flatten() + else: + background_energy = 0.0 + + for n in range(len(events_df)): + # Get label of track + label = str(events_df['label'].loc[n]) + + # First check if there are any pattern candidates. Give priorities + # To pattern files. + candidates = [] + for pattern_format in ['xls', 'json', 'txt', 'csv']: + candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, events_df['sampleid'].loc[n], pattern_format)) + + if len(candidates) == 0: + # If no patterns are found, search for normal audio files + candidates = glob.glob('{}/event/{}*.wav'.format(input_path, events_df['sampleid'].loc[n])) + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = librosa.load(chosen_fname, sr=SR) + else: + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = render_pattern(chosen_fname, input_path) + + + # Apply a fader envelope + fade_in_time = float(events_df['fade_in_time'].loc[n]) + fade_out_time = float(events_df['fade_out_time'].loc[n]) + wav = fade(wav, fade_in_time, fade_out_time) + + # Set target EBR + target_ebr = 10**(float(events_df['ebr'].loc[n])/20.0 + np.random.randn()*float(events_df['ebr_stddev'].loc[n])/20.0) + + # Mean time between instances \mu. + mean_time_between_instances = events_df['mean_time_between_instances'].loc[n] + track_end_time = events_df['end_time'].loc[n] + + # Track array + track_arr = np.zeros(int(scene_duration*SR)) + + #If \mu is -1, then play the event only once. + if mean_time_between_instances == -1: + track_arr[_N(events_df['start_time'].loc[n]):_N(events_df['start_time'].loc[n])+len(wav)] += wav + start_times = [float(events_df['start_time'].loc[n])] + end_times = [float(events_df['end_time'].loc[n])] + else: + # If 0, then start next sample after this one (set it to the duration of the sample) + if mean_time_between_instances == 0: + mean_time_between_instances = len(wav)/float(SR) + + # Store the successive starting and ending times of the events (given e.g. the model) + # in the following lists. + start_times = [events_df['start_time'].loc[n]] + end_times = [start_times[-1]+len(wav)/float(SR)] + + # Start with the first time in the list + new_start_time = start_times[-1] + new_end_time = end_times[-1] + + # Until the scene is full + while new_start_time < track_end_time: + offset = float(mean_time_between_instances) +\ + float(events_df['time_between_instances_stddev'].loc[n]*np.random.randn()) + new_start_time += offset + new_end_time += offset + + # Only exception is if we have set the 'end_cut' flag + # and the end time of the event surpasses the end time + # of the track + if end_cut and new_end_time > track_end_time: + break + else: + start_times.append(new_start_time) + end_times.append(new_end_time) + + for t in start_times: + # We need to be careful with the limits here + # since numpy will just ignore indexing that + # exceeds the size of the array + begin = min(_N(t), len(track_arr)) + end = min(len(track_arr), _N(t)+len(wav)) + + # Part of the wav to store + part = wav[:end-begin] + + # If wav file was concatenated, fade out + # quickly to avoid clicks + if len(part) < len(wav) and len(part) > fade_out_time*SR: + part = fade(part, 0, fade_out_time) + + track_arr[begin:end] += part + + track_list.append(track_arr) + scene_arr[:len(track_arr)] += track_arr + + # Compute energies + F = librosa.stft(track_arr, 1024) + energy_prof = librosa.feature.rmse(S=F).flatten() + + # Compute current ebr + + if len(backgrounds_df) > 0: + ebr_prof = energy_prof/background_energy[:len(energy_prof)].flatten() + curr_ebr = np.max(ebr_prof) + logging.debug('{}:Target ebr: {}db'.format(label,20*np.log10(target_ebr))) + logging.debug('{}:Current track ebr: {}db'.format(label,20*np.log10(curr_ebr))) + + # Set correct ebr + track_arr = track_arr/curr_ebr*target_ebr + + Fnew = librosa.stft(track_arr, 1024) + new_energy_prof = librosa.feature.rmse(S=Fnew).flatten() + new_ebr_prof = new_energy_prof/background_energy[:len(energy_prof)].flatten() + new_ebr = np.max(new_ebr_prof) + logging.debug('{}:New track ebr: {}db'.format(label,20*np.log10(new_ebr))) + + + + if channel_mode == 'separate': + librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), track_arr/np.max(track_arr), SR) + + + + + + if figure_verbosity > 0: + plt.figure() + + plt.subplot(3,1,1) + plt.title('`{}\' event waveform and spectrogram'.format(label)) + + librosa.display.waveplot(track_arr,sr=SR) + Fdb = librosa.amplitude_to_db(F) + plt.subplot(3, 1, 2) + librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') + + # Plot energy profile + plt.subplot(3, 1, 3) + time = np.linspace(0, len(track_arr)/SR, len(energy_prof.T)) + plt.semilogy(time, energy_prof.T) + plt.xlim([0, len(track_arr)/SR]) + plt.ylabel('energy (rms)') + + + plt.tight_layout() + if append_to_filename: + plt.savefig('{}/event_{}_{}.{}'.format(output_path, label, append_to_filename, image_format), dpi=300) + else: + plt.savefig('{}/event_{}.{}'.format(output_path, label, image_format), dpi=300) + + + + + scene_starting_times.append((label, start_times)) + scene_ending_times.append((label, end_times)) + + if figure_verbosity > 0: + plt.figure() + ax0 = plt.subplot(3,1,1) + plt.title('Synthesized Scene') + librosa.display.waveplot(scene_arr, sr=SR) + F = librosa.stft(scene_arr) + Fdb = librosa.amplitude_to_db(F) + ax1 = plt.subplot(3,1,2) + librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') + ax2 = plt.subplot(3,1,3) + ax2.set_xlim([0,scene_duration]) + + # Get labels + labels = [s[0] for s in scene_starting_times] + + + + + # If background is active + if len(backgrounds_df) > 0: + labels.append('background') + + # Set y axis limit. With a padding of 0.5. + ax2.set_ylim([-0.5, len(labels)-0.5]) + + plt.yticks(range(len(labels)), labels) + + for n in range(len(scene_starting_times)): + label = scene_starting_times[n][0] + start_times = scene_starting_times[n][1] + end_times = scene_ending_times[n][1] + color = ['r', 'g', 'y'][n % 3] + + for m in range(len(start_times)): + plt.hlines(y=float(n), xmin=start_times[m], xmax=end_times[m], alpha=0.5, color=color, linewidth=4) + if figure_verbosity > 2: + ax0.axvline(start_times[m], color=color, alpha=0.1) + ax0.axvline(end_times[m], color=color, alpha=0.1) + ax0.axvspan(start_times[m], end_times[m], color=color, alpha=0.1) + ax1.axvline(start_times[m], color=color, alpha=0.1) + ax1.axvline(end_times[m], color=color, alpha=0.1) + ax1.axvline(end_times[m], color=color, alpha=0.1) + ax1.axvspan(start_times[m], end_times[m], color=color, alpha=0.1) + ax2.axvline(start_times[m], color=color, alpha=0.1) + ax2.axvline(end_times[m], color=color, alpha=0.1) + ax2.axvline(end_times[m], color=color, alpha=0.1) + ax2.axvspan(start_times[m], end_times[m], color=color, alpha=0.1) + + if len(backgrounds_df) > 0: + plt.axhline(y=len(scene_starting_times), alpha=0.5, color='k', linewidth=4) + + plt.tight_layout() + + if append_to_filename: + plt.savefig('{}/scene_{}.{}'.format(output_path, append_to_filename, image_format), dpi=300) + else: + plt.savefig('{}/scene.{}'.format(output_path, image_format), dpi=300) + + if figure_verbosity > 1: + plt.show() + + # Replace nans (i.e. because of division-by-zero) of the scene with zeros. + scene_arr = np.nan_to_num(scene_arr) + + if channel_mode == 'mono': + if append_to_filename: + librosa.output.write_wav('{}/scene_{}.wav'.format(output_path, append_to_filename), scene_arr, SR) + else: + librosa.output.write_wav('{}/scene.wav'.format(output_path), scene_arr, SR) + + + return scene_arr + + + +def not_implemented(): + print("TODO: not implemented") + +if __name__=="__main__": + """ + Main function, parses options and calls the simscene generation function + or a demo. The options given are almost identical to Lagrange et al's + simscene. + """ + argparser = argparse.ArgumentParser( + description="SimScene.py acoustic scene generator", + ) + argparser.add_argument( + 'input_path', + type=str, + help="Path of a directory containing wave files for sound backgrounds (in the `background' sub-directory) or events (in `event')" + ) + argparser.add_argument( + 'output_path', + type=str, + help="The directory the generated scenes and annotations will reside." + ) + argparser.add_argument( + 'scene_duration', + type=float, + help="Duration of scene in seconds", + ) + scene_duration = None + + argparser.add_argument( + '-e', '--score-events', + type=str, + help="Score events file as a comma-separated text file (.csv, .txt), JSON (.json), or Excel (.xls) file" + ) + score_events = None + + argparser.add_argument( + '-b', '--score-backgrounds', + type=str, + help="Score backgrounds file as a comma-separated text file (.csv, .txt), JSON (.json), or Excel (.xls) file" + ) + score_backgrounds = None + + argparser.add_argument( + '--tag', + type=str, + help="Append _TAG_XXX to filenames, where XXX is an increment." + ) + tag = None + + argparser.add_argument( + '-N', + type=int, + help="Generate N instances of the scene. If not specified only generate a single instance. Note that if N > 1, then the verbosity must be less or equal to 1" + ) + generate_n = 1 + + argparser.add_argument( + '-t', '--time-mode', + type=str, + help="Mode of spacing between events. `generate': values must be set for each track in the score files. `abstract': values are computed from an abstract representation of an existing acoustic scene. `replicate': values are replicated from an existing acousting scene. (NOT IMPLEMENTED)", + choices=['generate', 'abstract', 'replicate'] + ) + time_mode = 'generate' + + argparser.add_argument( + '-R', '--ebr-mode', + type=str, + help="Mode for Event to Background power level ratio. `generate': values must be set for each track in the score files. `abstract': values are computed from an abstract representation of an existing acoustic scene. `replicate': values are replicated from an existing acousting scene. (NOT IMPLEMENTED)", + choices=['generate', 'abstract', 'replicate'] + ) + ebr_mode = 'generate' + + argparser.add_argument( + '-A', '--annotation-file', + type=float, + help="If -R or -m are selected, this provides the source for sourcing the times or EBRs from ANNOTATION_FILE. ANNOTATION_FILE must be comma-separated text file (.csv, .txt), JSON (.json), or Excel (.xls). (NOT IMPLEMENTED)" + ) + annotation_file = None + + argparser.add_argument( + '-a', '--audio-file', + type=float, + help="If -R or -m are selected, this provides the source for sourcing the times or EBRs from AUDIO_FILE. AUDIO_FILE must be a 44100Hz .wav file. (NOT IMPLEMENTED)" + ) + audio_file = None + + argparser.add_argument( + '-v', '--figure-verbosity', action='count', + help="Increase figure verbosity. (Default) 0 - Don't save or display figures, 1 - Save pictures but do not display them, 2 - Save and display figures, 3 - Add shades over the events in the final plot" + ) + figure_verbosity = 0 + + argparser.add_argument( + '-x', '--image-format', + help="Image format for the figures", + choices=['png', 'jpg', 'pdf'] + ) + image_format = 'png' + + argparser.add_argument( + '-C', '--channel-mode', + type=str, + help="number of audio channels contained in file. (Default) 'mono' - 1 channel (mono), 'separate' - Same as 'classes', each channel is saved in a separate .wav file.", + choices=['mono', 'separate'] + ) + channel_mode = 'mono' + + # argparser.add_argument( + # '-m', '--min-space', + # type=float, + # help="Minimum space allowed between successive events (seconds). If -1, then allow overlapping between events." + # ) + min_space = -1 + + argparser.add_argument( + '-c', '--end-cut', + action='store_true', + help="If the last sample ends after the scene ends then: if enabled, cut the sample to duration, else remove the sample." + ) + end_cut = None + + logging.basicConfig(level=logging.DEBUG) + + args = argparser.parse_args() + if args.input_path: + input_path = args.input_path + logging.debug("Using `{}' as input path".format(input_path)) + if args.output_path: + output_path = args.output_path + logging.debug("Saving to `{}'".format(output_path)) + if args.scene_duration: + if not (args.score_backgrounds or args.score_events): + print("You must provide one of -e or -b") + else: + if args.image_format: + image_format = args.image_format + if args.channel_mode: + channel_mode = args.channel_mode + if args.ebr_mode: + ebr_mode = args.ebr_mode + if ebr_mode not in ['generate']: + logging.warning("`{}' not yet implemented for EBR_MODE, using default.".format(ebr_mode)) + ebr_mode = 'generate' + if args.time_mode: + time_mode = args.time_mode + if time_mode not in ['generate']: + logging.warning("`{}' not yet implemented for TIME_MODE, using default.".format(time_mode)) + time_mode = 'generate' + if args.annotation_file: + annotations = read_annotations_file(args.annotation_file) + + scene_duration = float(args.scene_duration) + + if args.score_backgrounds: + score_backgrounds = read_backgrounds_file(args.score_backgrounds) + else: + score_backgrounds = [] + + if args.score_events: + score_events = read_events_file(args.score_events) + else: + score_events = [] + + if args.figure_verbosity: + figure_verbosity = args.figure_verbosity + + if args.N: + generate_n = args.N + + if args.tag: + tag = args.tag + + if generate_n == 1: + append_to_filename = None + simscene(input_path, output_path, scene_duration, score_events, score_backgrounds, + time_mode=time_mode, + ebr_mode=ebr_mode, + channel_mode=channel_mode, + annotation_file=annotation_file, + audio_file=audio_file, + figure_verbosity=figure_verbosity, + min_space=min_space, + end_cut=end_cut, + image_format=image_format, + append_to_filename=append_to_filename) + else: + for n in range(generate_n): + if tag: + append_to_filename = '{}_{}'.format(tag, n) + else: + append_to_filename = '{}'.format(n) + + logging.info("Generating scene {}".format(n)) + + simscene(input_path, output_path, scene_duration, score_events, score_backgrounds, + time_mode=time_mode, + ebr_mode=ebr_mode, + channel_mode=channel_mode, + annotation_file=annotation_file, + audio_file=audio_file, + figure_verbosity=min(figure_verbosity, 1), + min_space=min_space, + end_cut=end_cut, + image_format=image_format, + append_to_filename=append_to_filename) +