# HG changeset patch # User Emmanouil Thoefanis Chourdakis # Date 1507210441 -3600 # Node ID adda18b3b340bccc2129f7b1c593f049afaa1508 # Parent c4b79ec9810446d760ed9db72699357893c115ff patters now can substitute events and backgrounds; snrs are in db diff -r c4b79ec98104 -r adda18b3b340 example/forest_backgrounds.xls Binary file example/forest_backgrounds.xls has changed diff -r c4b79ec98104 -r adda18b3b340 example/forest_events.xls Binary file example/forest_events.xls has changed diff -r c4b79ec98104 -r adda18b3b340 example/sound/event/footstep01.wav Binary file example/sound/event/footstep01.wav has changed diff -r c4b79ec98104 -r adda18b3b340 example/sound/event/footstep02.wav Binary file example/sound/event/footstep02.wav has changed diff -r c4b79ec98104 -r adda18b3b340 example/sound/pattern/approaching_pattern0.xls Binary file example/sound/pattern/approaching_pattern0.xls has changed diff -r c4b79ec98104 -r adda18b3b340 example/sound/pattern/footsteps_pattern0.xls Binary file example/sound/pattern/footsteps_pattern0.xls has changed diff -r c4b79ec98104 -r adda18b3b340 simscene.py --- a/simscene.py Wed Oct 04 19:14:27 2017 +0100 +++ b/simscene.py Thu Oct 05 14:34:01 2017 +0100 @@ -49,7 +49,77 @@ # #x = np.resize(x, (w,int(np.ceil(float(len(x)/w))))) # x = np.array([[ii+jj for jj in range(w)] for ii in range(len(x)-w)]) # return np.sqrt(np.mean(x**2, 1)) + +def render_pattern(fname, input_path, sr=44100): + pattern = read_pattern_file(fname) + + start_times_samples = [] + end_times_samples = [] + durations_samples = [] + wav_files = [] + for n in range(len(pattern)): + # Try loading the file, + sampleid = pattern['sampleid'].loc[n] + candidates = glob.glob('{}/event/{}*wav'.format(input_path,sampleid)) + chosen_fname = random.sample(candidates, 1)[0] + + logging.debug('Loading {}'.format(chosen_fname)) + + # For each sound in the pattern file, place it starting from starttime + an offset + # with a mean value of 0 and standard deviation of offset_stddev. The first event can + # not start earlier than time 0. If endtime is defined (not nan), then cut the event at + # end time. + wav, SR = librosa.load(chosen_fname, sr=sr) + + # Read and assign an amplitude + amplitude_mean = float(pattern['amplitude'].loc[n]) + amplitude_stddev = float(pattern['amplitude_stdev'].loc[n]) + amplitude = amplitude_mean + np.random.randn()*amplitude_stddev + wav *= amplitude + + start_time = max(float(pattern['start_time'].loc[n]),0) + start_time_samples = int(start_time*SR) + + fade_in_time = float(pattern['fade_in_time'].loc[n]) + fade_out_time = float(pattern['fade_out_time'].loc[n]) + end_time = float(pattern['end_time'].loc[n]) + + # If end_time is not defined (-1 or just empty) + # then just derive it from the length of the sample + if np.isnan(end_time) or float(end_time) == -1: + duration_samples = len(wav) + end_time_samples = start_time_samples + duration_samples + elif end_time - start_time > len(wav)/float(SR): + + # If given end_time is more than start_time + duration of sample + # then pad the file with zeros to reach the desired end time. + duration = end_time - start_time + duration_samples = int(duration*SR) + end_time_samples = start_time_samples + duration_samples + wav_arr = np.zeros(duration_samples) + wav_arr[:len(wav)] = wav + wav = wav_arr + else: + duration = end_time - start_time + duration_samples = int(duration*SR) + end_time_samples = start_time_samples + duration_samples + + event_render = fade(wav[:duration_samples], fade_in_time, fade_out_time) + + start_times_samples.append(start_time_samples) + end_times_samples.append(end_time_samples) + durations_samples.append(duration_samples) + wav_files.append(event_render) + + pattern_duration = end_time_samples + pattern_arr = np.zeros(pattern_duration) + + for n, s in enumerate(start_times_samples): + wav = wav_files[n] + pattern_arr[s:s+len(wav)] = wav + + return pattern_arr, 44100 def read_events_file(fname): if fname[-3:].lower() == 'xls': @@ -75,6 +145,30 @@ logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) return df +def read_pattern_file(fname): + if fname[-3:].lower() == 'xls': + df = pd.read_excel(fname) + elif fname[-4:].lower() == 'json': + df = pd.read_json(fname) + elif fname[-3:].lower() in ['txt']: + with open(fname) as f: + s = f.readline() + f.seek(0,0) + if ',' in s: + sep = ',' + elif '\t' in s: + sep = '\t' + else: + sep = ' ' + logging.warning('Probably no header or malformed .csv. Will try to parse it raw.') + df = pd.read_csv(f, header=None, sep=sep) + df.columns = ['eventid','start_time','end_time','time_offset_stdev','fade_in_time', 'fade_out_time', 'amplitude', 'amplitude_stdev'] + elif fname[-3:].lower() in ['csv']: + df = pd.read_json(fname) + + logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql')) + return df + def read_backgrounds_file(fname): if fname[-3:].lower() == 'xls': df = pd.read_excel(fname) @@ -200,11 +294,25 @@ # Get label of background label = str(backgrounds_df['label'].loc[n]) - candidates = glob.glob('{}/background/{}*.wav'.format(input_path, backgrounds_df['sampleid'].loc[n])) - chosen_fname = random.sample(candidates, 1)[0] - wav, sr = librosa.load(chosen_fname, sr=SR) + # First check if there are any pattern candidates. Give priorities + # To pattern files. + candidates = [] + for pattern_format in ['xls', 'json', 'txt', 'csv']: + candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, backgrounds_df['sampleid'].loc[n], pattern_format)) + + if len(candidates) == 0: + # If no patterns are found, search for normal audio files + candidates = glob.glob('{}/background/{}*.wav'.format(input_path, backgrounds_df['sampleid'].loc[n])) + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = librosa.load(chosen_fname, sr=SR) + else: + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = render_pattern(chosen_fname, input_path) + duration = len(wav)/float(SR) - target_snr = float(backgrounds_df['snr'].loc[n]) + target_snr_db = float(backgrounds_df['snr'].loc[n]) + target_snr = 10**(target_snr_db/20.0) + energy = compute_energy(wav) logging.debug('{}:energy:{}'.format(label,energy)) @@ -221,7 +329,8 @@ logging.info('{}:noise_energy:{}'.format(label,noise_energy)) old_snr = energy/noise_energy - logging.info('{}:old_snr:{}'.format(label,old_snr)) + old_snr_db = 20*np.log10(old_snr) + logging.info('{}:old_snr:{}'.format(label,old_snr_db)) amplitude_factor = target_snr/old_snr @@ -229,7 +338,8 @@ wav *= amplitude_factor new_energy = compute_energy(wav) new_snr = new_energy/noise_energy - logging.info('{}:new_snr:{}'.format(label,new_snr)) + new_snr_db = 20. * np.log10(new_snr) + logging.info('{}:new_snr:{}'.format(label,new_snr_db)) # Track array @@ -325,11 +435,21 @@ for n in range(len(events_df)): # Get label of track label = str(events_df['label'].loc[n]) - - candidates = glob.glob('{}/event/{}*.wav'.format(input_path, events_df['sampleid'].loc[n])) - chosen_fname = random.sample(candidates, 1)[0] - wav, sr = librosa.load(chosen_fname, sr=SR) - assert sr == SR, "Sample rate of individual tracks must be 44100Hz (Failed: `{}' with sample rate: {} )".format(chosen_fname, sr) + + # First check if there are any pattern candidates. Give priorities + # To pattern files. + candidates = [] + for pattern_format in ['xls', 'json', 'txt', 'csv']: + candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, events_df['sampleid'].loc[n], pattern_format)) + + if len(candidates) == 0: + # If no patterns are found, search for normal audio files + candidates = glob.glob('{}/event/{}*.wav'.format(input_path, events_df['sampleid'].loc[n])) + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = librosa.load(chosen_fname, sr=SR) + else: + chosen_fname = random.sample(candidates, 1)[0] + wav, sr = render_pattern(chosen_fname, input_path) # Apply a fader envelope @@ -350,6 +470,8 @@ #If \mu is -1, then play the event only once. if mean_time_between_instances == -1: track_arr[_N(events_df['start_time'].loc[n]):_N(events_df['start_time'].loc[n])+len(wav)] += wav + start_times = [float(events_df['start_time'].loc[n])] + end_times = [float(events_df['end_time'].loc[n])] else: # If 0, then start next sample after this one (set it to the duration of the sample) if mean_time_between_instances == 0: @@ -397,8 +519,8 @@ track_arr[begin:end] += part - track_list.append(track_arr) - scene_arr[:len(track_arr)] += track_arr + track_list.append(track_arr) + scene_arr[:len(track_arr)] += track_arr # Compute energies F = librosa.stft(track_arr, 1024) @@ -424,7 +546,7 @@ if channel_mode == 'separate': - librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), track_arr, SR) + librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), track_arr/np.max(track_arr), SR) @@ -457,7 +579,7 @@ - + scene_starting_times.append((label, start_times)) scene_ending_times.append((label, end_times)) @@ -522,6 +644,9 @@ if figure_verbosity > 1: plt.show() + # Replace nans (i.e. because of division-by-zero) of the scene with zeros. + scene_arr = np.nan_to_num(scene_arr) + if channel_mode == 'mono': if append_to_filename: librosa.output.write_wav('{}/scene_{}.wav'.format(output_path, append_to_filename), scene_arr, SR)