Mercurial > hg > simscene-py
changeset 42:f30d2066eebb
Fixed a bug with the patterns not appearing in the resulting .xls file. More pep8 fixes.
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Sun, 08 Oct 2017 15:22:21 +0100 |
parents | d97f5b9ac6a9 |
children | a8e3d63653b6 |
files | python/simscene.py |
diffstat | 1 files changed, 127 insertions(+), 85 deletions(-) [+] |
line wrap: on
line diff
--- a/python/simscene.py Sat Oct 07 15:22:02 2017 +0100 +++ b/python/simscene.py Sun Oct 08 15:22:21 2017 +0100 @@ -14,7 +14,6 @@ # Numpy import numpy as np -import sys # Glob import glob @@ -26,36 +25,28 @@ import librosa.output # Matplotlib -from matplotlib import rc -# rc('text', usetex=True) import matplotlib.pyplot as plt -import matplotlib.patches as patches -from cycler import cycler # Tabulate from tabulate import tabulate - -def _N(t, sr=44100): +def _D(t, sr=44100): """ Helper function: Converts time to samples """ return int(t*sr) + def compute_energy(x): return np.sqrt(np.mean(x**2)) -# def compute_energy_profile(x, w=1000): -# # Resize/Window signal -# #x = np.resize(x, (w,int(np.ceil(float(len(x)/w))))) -# x = np.array([[ii+jj for jj in range(w)] for ii in range(len(x)-w)]) -# return np.sqrt(np.mean(x**2, 1)) def timedict_to_dataframe(timedict): return pd.DataFrame([(key, val[0], val[1]) for key in timedict for val in timedict[key]], columns=('filename', 'start_time', 'end_time')) + def render_pattern(fname, input_path, sr=44100): pattern = read_pattern_file(fname) @@ -79,7 +70,7 @@ candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, sampleid, pattern_format)) if len(candidates) == 0: - candidates = glob.glob('{}/event/{}*wav'.format(input_path,sampleid)) + candidates = glob.glob('{}/event/{}*wav'.format(input_path, sampleid)) chosen_fname = random.sample(candidates, 1)[0] wav, SR = librosa.load(chosen_fname, sr=sr) @@ -88,8 +79,7 @@ logging.debug('Loading {}'.format(chosen_fname)) wav, SR, pattern_timedict = render_pattern(chosen_fname, input_path) - - + # For each sound in the pattern file, place it starting from starttime + an offset # with a mean value of 0 and standard deviation of offset_stddev. The first event can # not start earlier than time 0. If endtime is defined (not nan), then cut the event at @@ -101,7 +91,7 @@ amplitude = amplitude_mean + np.random.randn()*amplitude_stddev wav *= amplitude - start_time = max(float(pattern['start_time'].loc[n]),0) + start_time = max(float(pattern['start_time'].loc[n]), 0) start_time_samples = int(start_time*SR) fade_in_time = float(pattern['fade_in_time'].loc[n]) @@ -168,7 +158,7 @@ elif fname[-3:].lower() in ['txt']: with open(fname) as f: s = f.readline() - f.seek(0,0) + f.seek(0, 0) if ',' in s: sep = ',' elif '\t' in s: @@ -177,11 +167,20 @@ sep = ' ' logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') df = pd.read_csv(f, header=None, sep=sep) - df.columns = ['label','sampleid','ebr','ebr_stddev','mean_time_between_instances','time_between_instances_stddev','start_time','end_time','fade_in_time','fade_out_time'] + df.columns = ['label', + 'sampleid', + 'ebr', + 'ebr_stddev', + 'mean_time_between_instances', + 'time_between_instances_stddev', + 'start_time', + 'end_time', + 'fade_in_time', + 'fade_out_time'] elif fname[-3:].lower() in ['csv']: df = pd.read_json(fname) - logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + logging.debug('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) return df @@ -193,7 +192,7 @@ elif fname[-3:].lower() in ['txt']: with open(fname) as f: s = f.readline() - f.seek(0,0) + f.seek(0, 0) if ',' in s: sep = ',' elif '\t' in s: @@ -202,11 +201,18 @@ sep = ' ' logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') df = pd.read_csv(f, header=None, sep=sep) - df.columns = ['eventid','start_time','end_time','time_offset_stdev','fade_in_time', 'fade_out_time', 'amplitude', 'amplitude_stdev'] + df.columns = ['eventid', + 'start_time', + 'end_time', + 'time_offset_stdev', + 'fade_in_time', + 'fade_out_time', + 'amplitude', + 'amplitude_stdev'] elif fname[-3:].lower() in ['csv']: df = pd.read_json(fname) - logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + logging.debug('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) return df @@ -218,7 +224,7 @@ elif fname[-3:].lower() in ['txt']: with open(fname) as f: s = f.readline() - f.seek(0,0) + f.seek(0, 0) if ',' in s: sep = ',' elif '\t' in s: @@ -227,11 +233,11 @@ sep = ' ' logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') df = pd.read_csv(f, header=None, sep=sep) - df.columns = ['label','sampleid','snr'] + df.columns = ['label', 'sampleid', 'snr'] elif fname[-3:].lower() in ['csv']: df = pd.read_json(fname) - logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + logging.debug('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) return df @@ -246,7 +252,7 @@ header = f.readline() s = f.readline() - f.seek(0,0) + f.seek(0, 0) if ',' in s: sep = ',' elif '\t' in s: @@ -297,8 +303,8 @@ score_events, score_backgrounds, **kwargs): - logging.info('simscene() is not yet implemented fully') - SR = 44100 # Samplerate. Should probably not be hardcoded + logging.warning('simscene() is not yet implemented fully') + SR = 44100 # Samplerate. Should probably not be hardcoded events_df = score_events backgrounds_df = score_backgrounds @@ -351,7 +357,9 @@ pattern_timedict = [] for pattern_format in ['xls', 'json', 'txt', 'csv']: - candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, backgrounds_df['sampleid'].loc[n], pattern_format)) + candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, + backgrounds_df['sampleid'].loc[n], + pattern_format)) if len(candidates) == 0: # If no patterns are found, search for normal audio files @@ -362,7 +370,6 @@ chosen_fname = random.sample(candidates, 1)[0] wav, sr, pattern_timedict = render_pattern(chosen_fname, input_path) - duration = len(wav)/float(SR) target_snr_db = float(backgrounds_df['snr'].loc[n]) target_snr = 10**(target_snr_db/20.0) @@ -406,7 +413,6 @@ timedict[chosen_fname].append((new_start_time, min(scene_duration, new_end_time))) else: timedict[chosen_fname] = [(new_start_time, min(scene_duration, new_end_time))] - # while new_start_time < scene_duration: offset = duration @@ -429,7 +435,7 @@ # Also update the times from the patterns for pt in pattern_timedict: - pattern_timedict[pt] = [(s[0] + new_start_time, s[1] + new + new_end_time) for s in + pattern_timedict[pt] = [(s[0] + new_start_time, s[1] + new_start_time) for s in pattern_timedict[pt]] if pt in timedict: @@ -439,8 +445,7 @@ # And add those to the timedict dictionary - - for n,t in enumerate(start_times): + for t in start_times: # We need to be careful with the limits here # since numpy will just ignore indexing that # exceeds @@ -458,8 +463,8 @@ # else: # fade_in_time = 0.01 # fade_out_time = 0.0 - begin = min(_N(t), len(track_arr)) - end = min(len(track_arr), _N(t)+len(wav)) + begin = min(_D(t), len(track_arr)) + end = min(len(track_arr), _D(t) + len(wav)) # Part of the wav to store # part = fade(wav[:end-begin],fade_in_time,fade_out_time) @@ -497,9 +502,16 @@ # Tidy up and save to file plt.tight_layout() if append_to_filename: - plt.savefig('{}/background_{}_{}.{}'.format(output_path, label, append_to_filename, image_format), dpi=300) + plt.savefig('{}/background_{}_{}.{}'.format(output_path, + label, + append_to_filename, + image_format), + dpi=300) else: - plt.savefig('{}/background_{}.{}'.format(output_path, label, image_format), dpi=300) + plt.savefig('{}/background_{}.{}'.format(output_path, + label, + image_format), + dpi=300) # Compute total energy of background if len(backgrounds_df) > 0: @@ -509,8 +521,6 @@ else: background_energy = 0.0 - - for n in range(len(events_df)): # Get label of track label = str(events_df['label'].loc[n]) @@ -523,7 +533,9 @@ pattern_timedict = [] for pattern_format in ['xls', 'json', 'txt', 'csv']: - candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, events_df['sampleid'].loc[n], pattern_format)) + candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, + events_df['sampleid'].loc[n], + pattern_format)) if len(candidates) == 0: # If no patterns are found, search for normal audio files @@ -532,17 +544,17 @@ wav, sr = librosa.load(chosen_fname, sr=SR) else: chosen_fname = random.sample(candidates, 1)[0] - logging.info('rendering pattern') wav, sr, pattern_timedict = render_pattern(chosen_fname, input_path) - + logging.debug(chosen_fname) # Apply a fader envelope fade_in_time = float(events_df['fade_in_time'].loc[n]) fade_out_time = float(events_df['fade_out_time'].loc[n]) wav = fade(wav, fade_in_time, fade_out_time) # Set target EBR - target_ebr = 10**(float(events_df['ebr'].loc[n])/20.0 + np.random.randn()*float(events_df['ebr_stddev'].loc[n])/20.0) + target_ebr = 10**(float(events_df['ebr'].loc[n])/20.0 + + np.random.randn()*float(events_df['ebr_stddev'].loc[n])/20.0) # Mean time between instances \mu. mean_time_between_instances = events_df['mean_time_between_instances'].loc[n] @@ -551,15 +563,20 @@ # Track array track_arr = np.zeros(int(scene_duration*SR)) - #If \mu is -1, then play the event only once. + # If \mu is -1, then play the event only once. if mean_time_between_instances == -1: - track_arr[_N(events_df['start_time'].loc[n]):_N(events_df['start_time'].loc[n])+len(wav)] += wav + track_arr[_D(events_df['start_time'].loc[n]):_D(events_df['start_time'].loc[n]) + len(wav)] += wav start_times = [float(events_df['start_time'].loc[n])] end_times = [float(events_df['end_time'].loc[n])] new_start_time = start_times[-1] new_end_time = end_times[-1] + if chosen_fname in timedict: + timedict[chosen_fname].append((new_start_time, min(scene_duration, new_end_time))) + else: + timedict[chosen_fname] = [(new_start_time, min(scene_duration, new_end_time))] + for pt in pattern_timedict: pattern_timedict[pt] = [(s[0] + new_start_time, s[1] + new_start_time) for s in pattern_timedict[pt]] @@ -628,15 +645,15 @@ # We need to be careful with the limits here # since numpy will just ignore indexing that # exceeds the size of the array - begin = min(_N(t), len(track_arr)) - end = min(len(track_arr), _N(t)+len(wav)) + begin = min(_D(t), len(track_arr)) + end = min(len(track_arr), _D(t) + len(wav)) # Part of the wav to store part = wav[:end-begin] # If wav file was concatenated, fade out # quickly to avoid clicks - if len(part) < len(wav) and len(part) > fade_out_time*SR: + if len(wav) > len(part) > fade_out_time*SR: part = fade(part, 0, fade_out_time) track_arr[begin:end] += part @@ -653,8 +670,10 @@ if len(backgrounds_df) > 0: ebr_prof = energy_prof/background_energy[:len(energy_prof)].flatten() curr_ebr = np.max(ebr_prof) - logging.debug('{}:Target ebr: {}db'.format(label,20*np.log10(target_ebr))) - logging.debug('{}:Current track ebr: {}db'.format(label,20*np.log10(curr_ebr))) + logging.debug('{}:Target ebr: {}db'.format(label, + 20*np.log10(target_ebr))) + logging.debug('{}:Current track ebr: {}db'.format(label, + 20*np.log10(curr_ebr))) # Set correct ebr track_arr = track_arr/curr_ebr*target_ebr @@ -663,18 +682,20 @@ new_energy_prof = librosa.feature.rmse(S=Fnew).flatten() new_ebr_prof = new_energy_prof/background_energy[:len(energy_prof)].flatten() new_ebr = np.max(new_ebr_prof) - logging.debug('{}:New track ebr: {}db'.format(label,20*np.log10(new_ebr))) + logging.debug('{}:New track ebr: {}db'.format(label, 20*np.log10(new_ebr))) if channel_mode == 'separate': - librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), track_arr/np.max(track_arr), SR) + librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), + track_arr/np.max(track_arr), + SR) if figure_verbosity > 0: plt.figure() - plt.subplot(3,1,1) + plt.subplot(3, 1, 1) plt.title('`{}\' event waveform and spectrogram'.format(label)) - librosa.display.waveplot(track_arr,sr=SR) + librosa.display.waveplot(track_arr, sr=SR) Fdb = librosa.amplitude_to_db(F) plt.subplot(3, 1, 2) librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') @@ -697,15 +718,15 @@ if figure_verbosity > 0: plt.figure() - ax0 = plt.subplot(3,1,1) + ax0 = plt.subplot(3, 1, 1) plt.title('Synthesized Scene') librosa.display.waveplot(scene_arr, sr=SR) F = librosa.stft(scene_arr) Fdb = librosa.amplitude_to_db(F) - ax1 = plt.subplot(3,1,2) + ax1 = plt.subplot(3, 1, 2) librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') - ax2 = plt.subplot(3,1,3) - ax2.set_xlim([0,scene_duration]) + ax2 = plt.subplot(3, 1, 3) + ax2.set_xlim([0, scene_duration]) # Get labels labels = [s[0] for s in scene_starting_times] @@ -720,7 +741,6 @@ plt.yticks(range(len(labels)), labels) for n in range(len(scene_starting_times)): - label = scene_starting_times[n][0] start_times = scene_starting_times[n][1] end_times = scene_ending_times[n][1] color = ['r', 'g', 'y'][n % 3] @@ -745,8 +765,6 @@ plt.tight_layout() - - if append_to_filename: plt.savefig('{}/scene_{}.{}'.format(output_path, append_to_filename, image_format), dpi=300) else: @@ -793,13 +811,20 @@ argparser.add_argument( 'input_path', type=str, - help="Path of a directory containing wave files for sound backgrounds (in the `background' sub-directory) or events (in `event')" + help="Path of a directory containing wave files for sound backgrounds" + "(in the `background' sub-directory) or events (in `event')" ) + + input_path = '.' + argparser.add_argument( 'output_path', type=str, help="The directory the generated scenes and annotations will reside." - ) + ) + + output_path = '.' + argparser.add_argument( 'scene_duration', type=float, @@ -831,14 +856,17 @@ argparser.add_argument( '-N', type=int, - help="Generate N instances of the scene. If not specified only generate a single instance. Note that if N > 1, then the verbosity must be less or equal to 1" + help="Generate N instances of the scene. If not specified only generate a single instance. Note that if N > 1, " + "then the verbosity must be less or equal to 1" ) generate_n = 1 argparser.add_argument( '-t', '--time-mode', type=str, - help="Mode of spacing between events. `generate': values must be set for each track in the score files. `abstract': values are computed from an abstract representation of an existing acoustic scene. `replicate': values are replicated from an existing acousting scene. (NOT IMPLEMENTED)", + help="Mode of spacing between events. `generate': values must be set for each track in the score files. " + "`abstract': values are computed from an abstract representation of an existing acoustic scene. " + "`replicate': values are replicated from an existing acousting scene. (NOT IMPLEMENTED)", choices=['generate', 'abstract', 'replicate'] ) time_mode = 'generate' @@ -846,7 +874,9 @@ argparser.add_argument( '-R', '--ebr-mode', type=str, - help="Mode for Event to Background power level ratio. `generate': values must be set for each track in the score files. `abstract': values are computed from an abstract representation of an existing acoustic scene. `replicate': values are replicated from an existing acousting scene. (NOT IMPLEMENTED)", + help="Mode for Event to Background power level ratio. `generate': values must be set for each track in the " + "score files. `abstract': values are computed from an abstract representation of an existing acoustic " + "scene. `replicate': values are replicated from an existing acousting scene. (NOT IMPLEMENTED)", choices=['generate', 'abstract', 'replicate'] ) ebr_mode = 'generate' @@ -854,20 +884,24 @@ argparser.add_argument( '-A', '--annotation-file', type=float, - help="If -R or -m are selected, this provides the source for sourcing the times or EBRs from ANNOTATION_FILE. ANNOTATION_FILE must be comma-separated text file (.csv, .txt), JSON (.json), or Excel (.xls). (NOT IMPLEMENTED)" + help="If -R or -m are selected, this provides the source for sourcing the times or EBRs from ANNOTATION_FILE. " + "ANNOTATION_FILE must be comma-separated text file (.csv, .txt), JSON (.json), or Excel (.xls). " + "(NOT IMPLEMENTED)" ) annotation_file = None argparser.add_argument( '-a', '--audio-file', type=float, - help="If -R or -m are selected, this provides the source for sourcing the times or EBRs from AUDIO_FILE. AUDIO_FILE must be a 44100Hz .wav file. (NOT IMPLEMENTED)" + help="If -R or -m are selected, this provides the source for sourcing the times or EBRs " + "from AUDIO_FILE. AUDIO_FILE must be a 44100Hz .wav file. (NOT IMPLEMENTED)" ) audio_file = None argparser.add_argument( '-v', '--figure-verbosity', action='count', - help="Increase figure verbosity. (Default) 0 - Don't save or display figures, 1 - Save pictures but do not display them, 2 - Save and display figures, 3 - Add shades over the events in the final plot" + help="Increase figure verbosity. (Default) 0 - Don't save or display figures, 1 - Save pictures but do not " + "display them, 2 - Save and display figures, 3 - Add shades over the events in the final plot" ) figure_verbosity = 0 @@ -881,28 +915,39 @@ argparser.add_argument( '-C', '--channel-mode', type=str, - help="number of audio channels contained in file. (Default) 'mono' - 1 channel (mono), 'separate' - Same as 'classes', each channel is saved in a separate .wav file.", + help="number of audio channels contained in file. (Default) 'mono' - 1 channel (mono), 'separate' - Same as " + "'classes', each channel is saved in a separate .wav file.", choices=['mono', 'separate'] ) channel_mode = 'mono' - # argparser.add_argument( - # '-m', '--min-space', - # type=float, - # help="Minimum space allowed between successive events (seconds). If -1, then allow overlapping between events." - # ) - min_space = -1 - argparser.add_argument( '-c', '--end-cut', action='store_true', - help="If the last sample ends after the scene ends then: if enabled, cut the sample to duration, else remove the sample." + help="If the last sample ends after the scene ends then: if enabled, cut the sample to duration, " + "else remove the sample." ) end_cut = None - - logging.basicConfig(level=logging.INFO) - + + argparser.add_argument( + '-L', '--logging-level', + type=str, + help="Set lowest logging level", + choices=['debug', 'warning', 'info'] + ) + args = argparser.parse_args() + + if args.logging_level: + if args.logging_level == 'debug': + logging.basicConfig(level=logging.DEBUG) + elif args.logging_level == 'info': + logging.basicConfig(level=logging.INFO) + elif args.logging_level == 'warning': + logging.basicConfig(level=logging.WARNING) + else: + logging.basicConfig(level=logging.INFO) + if args.input_path: input_path = args.input_path logging.debug("Using `{}' as input path".format(input_path)) @@ -960,7 +1005,6 @@ annotation_file=annotation_file, audio_file=audio_file, figure_verbosity=figure_verbosity, - min_space=min_space, end_cut=end_cut, image_format=image_format, append_to_filename=append_to_filename) @@ -980,8 +1024,6 @@ annotation_file=annotation_file, audio_file=audio_file, figure_verbosity=min(figure_verbosity, 1), - min_space=min_space, end_cut=end_cut, image_format=image_format, append_to_filename=append_to_filename) -