changeset 13:adda18b3b340

patters now can substitute events and backgrounds; snrs are in db
author Emmanouil Thoefanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Thu, 05 Oct 2017 14:34:01 +0100
parents c4b79ec98104
children
files example/forest_backgrounds.xls example/forest_events.xls example/sound/event/footstep01.wav example/sound/event/footstep02.wav example/sound/pattern/approaching_pattern0.xls example/sound/pattern/footsteps_pattern0.xls simscene.py
diffstat 7 files changed, 140 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
Binary file example/forest_backgrounds.xls has changed
Binary file example/forest_events.xls has changed
Binary file example/sound/event/footstep01.wav has changed
Binary file example/sound/event/footstep02.wav has changed
Binary file example/sound/pattern/approaching_pattern0.xls has changed
Binary file example/sound/pattern/footsteps_pattern0.xls has changed
--- a/simscene.py	Wed Oct 04 19:14:27 2017 +0100
+++ b/simscene.py	Thu Oct 05 14:34:01 2017 +0100
@@ -49,7 +49,77 @@
 #     #x = np.resize(x, (w,int(np.ceil(float(len(x)/w)))))
 #     x = np.array([[ii+jj for jj in range(w)] for ii in range(len(x)-w)])    
 #     return np.sqrt(np.mean(x**2, 1))
+
+def render_pattern(fname, input_path, sr=44100):
+    pattern = read_pattern_file(fname)
+
+    start_times_samples = []
+    end_times_samples = []
+    durations_samples = []
+    wav_files = []
     
+    for n in range(len(pattern)):
+        # Try loading the file,
+        sampleid = pattern['sampleid'].loc[n]
+        candidates = glob.glob('{}/event/{}*wav'.format(input_path,sampleid))
+        chosen_fname = random.sample(candidates, 1)[0]
+
+        logging.debug('Loading {}'.format(chosen_fname))
+        
+        # For each sound in the pattern file, place it starting from starttime + an offset
+        # with a mean value of 0 and standard deviation of offset_stddev. The first event can
+        # not start earlier than time 0. If endtime is defined (not nan), then cut the event at
+        # end time.
+        wav, SR = librosa.load(chosen_fname, sr=sr)
+
+        # Read and assign an amplitude
+        amplitude_mean = float(pattern['amplitude'].loc[n])
+        amplitude_stddev = float(pattern['amplitude_stdev'].loc[n])
+        amplitude = amplitude_mean + np.random.randn()*amplitude_stddev
+        wav *= amplitude
+        
+        start_time = max(float(pattern['start_time'].loc[n]),0)
+        start_time_samples = int(start_time*SR)
+
+        fade_in_time = float(pattern['fade_in_time'].loc[n])
+        fade_out_time = float(pattern['fade_out_time'].loc[n])
+        end_time = float(pattern['end_time'].loc[n])
+
+        # If end_time is not defined (-1 or just empty)
+        # then just derive it from the length of the sample 
+        if np.isnan(end_time) or float(end_time) == -1:
+            duration_samples = len(wav)            
+            end_time_samples = start_time_samples + duration_samples
+        elif end_time - start_time > len(wav)/float(SR):
+            
+            # If given end_time is more than start_time + duration of sample
+            # then pad the file with zeros to reach the desired end time.
+            duration = end_time - start_time
+            duration_samples = int(duration*SR)
+            end_time_samples = start_time_samples + duration_samples
+            wav_arr = np.zeros(duration_samples)
+            wav_arr[:len(wav)] = wav
+            wav = wav_arr
+        else:
+            duration = end_time - start_time
+            duration_samples = int(duration*SR)
+            end_time_samples = start_time_samples + duration_samples
+
+        event_render = fade(wav[:duration_samples], fade_in_time, fade_out_time)
+        
+        start_times_samples.append(start_time_samples)
+        end_times_samples.append(end_time_samples)
+        durations_samples.append(duration_samples)
+        wav_files.append(event_render)
+
+    pattern_duration = end_time_samples
+    pattern_arr = np.zeros(pattern_duration)
+
+    for n, s in enumerate(start_times_samples):
+        wav = wav_files[n]
+        pattern_arr[s:s+len(wav)] = wav
+    
+    return pattern_arr, 44100
 
 def read_events_file(fname):
     if fname[-3:].lower() == 'xls':
@@ -75,6 +145,30 @@
     logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql'))                   
     return df
 
+def read_pattern_file(fname):
+    if fname[-3:].lower() == 'xls':
+        df = pd.read_excel(fname)
+    elif fname[-4:].lower() == 'json':
+        df = pd.read_json(fname)    
+    elif fname[-3:].lower() in ['txt']:           
+        with open(fname) as f:
+            s = f.readline()
+            f.seek(0,0)
+            if ',' in s:
+                sep = ','
+            elif '\t' in s:
+                sep = '\t'
+            else:
+                sep = ' '
+            logging.warning('Probably no header or malformed .csv. Will try to parse it raw.')
+            df = pd.read_csv(f, header=None, sep=sep)        
+            df.columns = ['eventid','start_time','end_time','time_offset_stdev','fade_in_time', 'fade_out_time', 'amplitude', 'amplitude_stdev']
+    elif fname[-3:].lower() in ['csv']:
+        df = pd.read_json(fname)            
+            
+    logging.info('Using input:\n'+tabulate(df, headers='keys', tablefmt='psql'))                   
+    return df
+    
 def read_backgrounds_file(fname):
     if fname[-3:].lower() == 'xls':
         df = pd.read_excel(fname)
@@ -200,11 +294,25 @@
         # Get label of background
         label = str(backgrounds_df['label'].loc[n])
 
-        candidates = glob.glob('{}/background/{}*.wav'.format(input_path, backgrounds_df['sampleid'].loc[n]))
-        chosen_fname = random.sample(candidates, 1)[0]
-        wav, sr = librosa.load(chosen_fname, sr=SR)
+        # First check if there are any pattern candidates. Give priorities
+        # To pattern files.
+        candidates = []
+        for pattern_format in ['xls', 'json', 'txt', 'csv']:
+            candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, backgrounds_df['sampleid'].loc[n], pattern_format))
+
+        if len(candidates) == 0:
+            # If no patterns are found, search for normal audio files
+            candidates = glob.glob('{}/background/{}*.wav'.format(input_path, backgrounds_df['sampleid'].loc[n]))           
+            chosen_fname = random.sample(candidates, 1)[0]
+            wav, sr = librosa.load(chosen_fname, sr=SR)
+        else:
+            chosen_fname = random.sample(candidates, 1)[0]
+            wav, sr = render_pattern(chosen_fname, input_path)
+            
         duration = len(wav)/float(SR)
-        target_snr = float(backgrounds_df['snr'].loc[n])
+        target_snr_db = float(backgrounds_df['snr'].loc[n])
+        target_snr = 10**(target_snr_db/20.0)
+        
         energy = compute_energy(wav)
 
         logging.debug('{}:energy:{}'.format(label,energy))
@@ -221,7 +329,8 @@
             logging.info('{}:noise_energy:{}'.format(label,noise_energy))
 
             old_snr = energy/noise_energy
-            logging.info('{}:old_snr:{}'.format(label,old_snr))
+            old_snr_db = 20*np.log10(old_snr)
+            logging.info('{}:old_snr:{}'.format(label,old_snr_db))
             
             amplitude_factor = target_snr/old_snr
             
@@ -229,7 +338,8 @@
             wav *= amplitude_factor
             new_energy = compute_energy(wav)
             new_snr = new_energy/noise_energy
-            logging.info('{}:new_snr:{}'.format(label,new_snr))
+            new_snr_db = 20. * np.log10(new_snr)
+            logging.info('{}:new_snr:{}'.format(label,new_snr_db))
             
         
         # Track array
@@ -325,11 +435,21 @@
     for n in range(len(events_df)):
         # Get label of track
         label = str(events_df['label'].loc[n])
-        
-        candidates = glob.glob('{}/event/{}*.wav'.format(input_path, events_df['sampleid'].loc[n]))
-        chosen_fname = random.sample(candidates, 1)[0]
-        wav, sr = librosa.load(chosen_fname, sr=SR)
-        assert sr == SR, "Sample rate of individual tracks must be 44100Hz (Failed: `{}' with sample rate: {} )".format(chosen_fname, sr)
+
+        # First check if there are any pattern candidates. Give priorities
+        # To pattern files.
+        candidates = []
+        for pattern_format in ['xls', 'json', 'txt', 'csv']:
+            candidates += glob.glob('{}/pattern/{}*.{}'.format(input_path, events_df['sampleid'].loc[n], pattern_format))
+
+        if len(candidates) == 0:
+            # If no patterns are found, search for normal audio files
+            candidates = glob.glob('{}/event/{}*.wav'.format(input_path, events_df['sampleid'].loc[n]))           
+            chosen_fname = random.sample(candidates, 1)[0]
+            wav, sr = librosa.load(chosen_fname, sr=SR)
+        else:
+            chosen_fname = random.sample(candidates, 1)[0]
+            wav, sr = render_pattern(chosen_fname, input_path)        
         
                   
         # Apply a fader envelope
@@ -350,6 +470,8 @@
         #If \mu is -1, then play the event only once.
         if mean_time_between_instances == -1:
             track_arr[_N(events_df['start_time'].loc[n]):_N(events_df['start_time'].loc[n])+len(wav)] += wav
+            start_times = [float(events_df['start_time'].loc[n])]
+            end_times = [float(events_df['end_time'].loc[n])]
         else:
             # If 0, then start next sample after this one (set it to the duration of the sample)
             if mean_time_between_instances == 0:
@@ -397,8 +519,8 @@
                     
                 track_arr[begin:end] += part
 
-            track_list.append(track_arr)
-            scene_arr[:len(track_arr)] += track_arr
+        track_list.append(track_arr)
+        scene_arr[:len(track_arr)] += track_arr
 
         # Compute energies
         F = librosa.stft(track_arr, 1024)
@@ -424,7 +546,7 @@
         
 
         if channel_mode == 'separate':
-            librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), track_arr, SR)
+            librosa.output.write_wav('{}/{}_event_track.wav'.format(output_path, label), track_arr/np.max(track_arr), SR)
 
 
         
@@ -457,7 +579,7 @@
 
             
 
-            
+
         scene_starting_times.append((label, start_times))
         scene_ending_times.append((label, end_times))
 
@@ -522,6 +644,9 @@
     if figure_verbosity > 1:
         plt.show()
 
+    # Replace nans (i.e. because of division-by-zero) of the scene with zeros.
+    scene_arr = np.nan_to_num(scene_arr)
+        
     if channel_mode == 'mono':
         if append_to_filename:
             librosa.output.write_wav('{}/scene_{}.wav'.format(output_path, append_to_filename), scene_arr, SR)