# HG changeset patch # User Emmanouil Thoefanis Chourdakis # Date 1506968491 -3600 # Node ID f5edaa5ca16732c852096d51482edb25717beaac # Parent 42f189846ba81a83afc48100c2d67344685ebf6f Added simscene() implementation; event only; loads events, plots and saves a scene. Piano roll not working diff -r 42f189846ba8 -r f5edaa5ca167 requirements.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/requirements.txt Mon Oct 02 19:21:31 2017 +0100 @@ -0,0 +1,7 @@ +argparse +pandas +numpy +glob +librosa +matplotlib +tabulate diff -r 42f189846ba8 -r f5edaa5ca167 simscene.py --- a/simscene.py Mon Oct 02 15:54:26 2017 +0100 +++ b/simscene.py Mon Oct 02 19:21:31 2017 +0100 @@ -3,13 +3,41 @@ # For licensing please see: LICENSE # Copyright (c) Emmanouil Theofanis Chourdakis +# Argparse import argparse + +# Logging import logging + +# Pandas import pandas as pd + +# Numpy +import numpy as np import sys +# Glob +import glob +import random + +# Librosa +import librosa +import librosa.display +import librosa.output + +# Matplotlib +import matplotlib.pyplot as plt +import matplotlib.patches as patches + +# Tabulate from tabulate import tabulate +def _N(t, sr=44100): + """ + Helper function: Converts time to samples + """ + return int(t*sr) + def read_events_file(fname): if fname[-3:].lower() == 'xls': df = pd.read_excel(fname) @@ -62,8 +90,7 @@ df = pd.read_csv(f, sep=sep) df = None df.columns = ['label','sampleid','snr'] - - + logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) return df @@ -99,7 +126,24 @@ def run_demo(): print("TODO: Implement run_demo()") + +def fade(x, fade_in, fade_out, sr=44100): + """ + Creates a fade-in-fade-out envelope + for audio array x. + """ + fade_in_samples = int(fade_in*sr) + fade_out_samples = int(fade_out*sr) + + outp = np.ones_like(x) + for n in range(fade_in_samples): + outp[n] = n*1./fade_in_samples + + for n in range(fade_out_samples): + outp[len(outp)-fade_out_samples+n] = 1-1./fade_out_samples*n + return outp*x + def simscene(input_path, output_path, scene_duration, @@ -107,7 +151,156 @@ score_backgrounds, **kwargs): logging.info('simscene() is not yet implemented') + SR = 44100 # Samplerate. Should probably not be hardcoded + + events_df = score_events + + # Create empty numpy array + scene_arr = np.zeros(int(scene_duration*SR)) + + if 'end_cut' in kwargs: + end_cut = kwargs['end_cut'] + else: + end_cut = False + + if 'figure_verbosity' in kwargs: + figure_verbosity = kwargs['figure_verbosity'] + else: + figure_verbosity = 0 + + if 'image_format' in kwargs: + image_format = kwargs['image_format'] + else: + image_format = 'png' + + # Stores the starting and ending times of every track for visualization + # purposes + scene_starting_times = [] + scene_ending_times = [] + + + for n in range(len(events_df)): + # Get label of track + label = str(events_df['label'].loc[n]) + + candidates = glob.glob('{}/event/{}*.wav'.format(input_path, events_df['sampleid'].loc[n])) + wav, SR = librosa.load(random.sample(candidates,1)[0]) + + # Apply a fader envelope + fade_in_time = float(events_df['fade_in_time'].loc[n]) + fade_out_time = float(events_df['fade_out_time'].loc[n]) + wav = fade(wav, fade_in_time, fade_out_time) + + # Mean time between instances \mu. + mean_time_between_instances = events_df['mean_time_between_instances'].loc[n] + track_end_time = events_df['end_time'].loc[n] + + # Track array + track_arr = np.zeros(int(scene_duration*SR)) + + #If \mu is -1, then play the event only once. + if mean_time_between_instances == -1: + track_arr[_N(events_df['start_time'].loc[n]):_N(events_df['start_time'].loc[n])+len(wav)] += wav + else: + + # If 0, then start next sample after this one (set it to the duration of the sample) + if mean_time_between_instances == 0: + mean_time_between_instances = len(wav)/float(SR) + + # Store the successive starting and ending times of the events (given e.g. the model) + # in the following lists. + start_times = [events_df['start_time'].loc[n]] + end_times = [events_df['end_time'].loc[n]] + # Start with the first time in the list + new_start_time = start_times[-1] + new_end_time = end_times[-1] + + # Until the scene is full + while new_start_time < track_end_time: + offset = float(mean_time_between_instances) +\ + float(events_df['time_between_instances_stddev'].loc[n]*np.random.randn()) + new_start_time += offset + new_end_time += offset + + # Only exception is if we have set the 'end_cut' flag + # and the end time of the event surpasses the end time + # of the track + if end_cut and new_end_time > track_end_time: + break + else: + start_times.append(new_start_time) + end_times.append(new_end_time) + + for t in start_times: + + # We need to be careful with the limits here + # since numpy will just ignore indexing that + # exceeds the size of the array + begin = min(_N(t), len(track_arr)) + end = min(len(track_arr), _N(t)+len(wav)) + + # Part of the wav to store + part = wav[:end-begin] + + # If wav file was concatenated, fade out + # quickly to avoid clicks + if len(part) < len(wav) and len(part) > fade_out_time*SR: + part = fade(part, 0, fade_out_time) + + track_arr[begin:end] += part + + scene_arr[:len(track_arr)] += track_arr + + if figure_verbosity > 0: + plt.figure() + + plt.subplot(2,1,1) + plt.title('`{}\' waveform and spectrogram'.format(label)) + + visible_track = track_arr[int(start_times[0]*SR):int(end_times[-1]*SR)] + librosa.display.waveplot(visible_track) + F = librosa.stft(visible_track) + Fdb = librosa.amplitude_to_db(F) + plt.subplot(2,1,2) + librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') + plt.savefig('{}/{}.{}'.format(output_path, label, image_format)) + + + scene_starting_times.append((label, start_times)) + scene_ending_times.append((label, end_times)) + + if figure_verbosity > 0: + plt.figure() + plt.subplot(3,1,1) + plt.title('Waveform and spectrogram for the full track') + librosa.display.waveplot(scene_arr) + F = librosa.stft(scene_arr) + Fdb = librosa.amplitude_to_db(F) + plt.subplot(3,1,2) + librosa.display.specshow(Fdb, sr=SR, x_axis='time', y_axis='hz') + ax = plt.subplot(3,1,3) + + for n in range(len(scene_starting_times)): + label = scene_starting_times[n][0] + start_times = scene_starting_times[n][1] + end_times = scene_ending_times[n][1] + for m in range(len(start_times)): + ax.add_patch( + patches.Rectangle( + (start_times[m], float(n)), + end_times[m]-start_times[m], 0.2 + ) + ) + + plt.savefig('{}/full-scene.{}'.format(output_path, image_format)) + if figure_verbosity > 1: + plt.show() + + if channel_mode == 'mono': + librosa.output.write_wav('{}/full-scene.wav'.format(output_path), scene_arr, SR) + + return scene_arr @@ -189,6 +382,13 @@ help="Increase figure verbosity. (Default) 0 - Don't save or display figures, 1 - Save pictures but do not display them, 2 - Save and display figures" ) figure_verbosity = None + + argparser.add_argument( + '-x', '--image-format', + help="Image format for the figures", + choices=['png', 'jpg', 'pdf'] + ) + image_format = 'png' argparser.add_argument( '-C', '--channel-mode', @@ -196,7 +396,7 @@ help="number of audio channels contained in file. (Default) 'mono' - 1 channel (mono), 'classes' - As many channels as sound classes (events+textures), 'separate' - Same as 'classes', each channel is saved in a separate .wav file.", choices=['mono', 'classes', 'separate'] ) - channel_mode = None + channel_mode = 'mono' argparser.add_argument( '-m', '--min-space', @@ -225,6 +425,10 @@ if not (args.score_backgrounds or args.score_events): print("You must provide one of -e or -b") else: + if args.image_format: + image_format = args.image_format + if args.channel_mode: + channel_mode = args.channel_mode if args.ebr_mode: ebr_mode = args.ebr_mode if ebr_mode not in ['generate']: @@ -244,6 +448,8 @@ score_backgrounds = read_backgrounds_file(args.score_backgrounds) if args.score_events: score_events = read_events_file(args.score_events) + if args.figure_verbosity: + figure_verbosity = args.figure_verbosity simscene(input_path, output_path, scene_duration, score_events, score_backgrounds, time_mode=time_mode, @@ -253,4 +459,5 @@ audio_file=audio_file, figure_verbosity=figure_verbosity, min_space=min_space, - end_cut=end_cut) + end_cut=end_cut, + image_format=image_format)