changeset 47:659fa2db6fc7

Changed output annotation default to be the same as the TUT DCASE 2017 challenge sound event evaluator
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Fri, 20 Oct 2017 11:57:28 +0100
parents 40f8b4b5750b
children 521c9b65f738
files python/simscene.py
diffstat 1 files changed, 41 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/python/simscene.py	Thu Oct 12 15:02:48 2017 +0100
+++ b/python/simscene.py	Fri Oct 20 11:57:28 2017 +0100
@@ -46,9 +46,17 @@
 
 
 def timedict_to_dataframe(timedict):
+    print(timedict)
     return pd.DataFrame([(key, val[0], val[1], val[2]) for key in timedict for val in timedict[key]],
                         columns=('label', 'filename', 'start_time', 'end_time'))
 
+def timedict_to_txt(timedict):
+    str_  = ""
+    for key in timedict:
+        for val in timedict[key]:
+            str_ += "{}\t{}\t{}\n".format(float(val[1]), float(val[2]), key)
+    str_ += '\n'
+    return str_
 
 def render_pattern(fname, input_path, sr=44100):
     pattern = read_pattern_file(fname)
@@ -309,7 +317,7 @@
              score_events,
              score_backgrounds,
              **kwargs):
-    logging.warning('simscene() is not yet implemented fully')
+    logging.warning('BER ratios have not yet been verified')
     SR = 44100  # Samplerate. Should probably not be hardcoded
     
     events_df = score_events
@@ -341,6 +349,11 @@
         image_format = kwargs['image_format']
     else:
         image_format = 'png'
+
+    if 'annot_format' in kwargs:
+        annot_format = kwargs['annot_format']
+    else:
+        annot_format = 'sed_eval'
     
     # Stores the starting and ending times of every track for visualization
     # purposes
@@ -776,13 +789,25 @@
         else:
             plt.savefig('{}/scene.{}'.format(output_path, image_format), dpi=300)
 
-    timedict_df = timedict_to_dataframe(timedict)
-    logging.debug(timedict_df)
+    if annot_format == 'sed_eval':
+        timedict_txt = timedict_to_txt(timedict)
+        logging.debug(timedict_txt)
 
-    if append_to_filename:
-        timedict_df.to_csv('{}/scene_{}_offsets.csv'.format(output_path, append_to_filename))
-    else:
-        timedict_df.to_csv('{}/scene_offsets.csv'.format(output_path))
+        if append_to_filename:
+            with open('{}/scene_{}_offsets.csv'.format(output_path, append_to_filename), 'w') as f:
+                f.write(timedict_txt)
+        else:
+            with open('{}/scene_offsets.csv'.format(output_path), 'w') as f:
+                f.write(timedict_txt)
+
+    elif annot_format == 'pandas':
+        timedict_df = timedict_to_dataframe(timedict)
+        logging.debug(timedict_df)
+
+        if append_to_filename:
+            timedict_df.to_csv('{}/scene_{}_offsets.csv'.format(output_path, append_to_filename))
+        else:
+            timedict_df.to_csv('{}/scene_offsets.csv'.format(output_path))
 
     if figure_verbosity > 1:
         plt.show()
@@ -942,6 +967,15 @@
         choices=['debug', 'warning', 'info']
     )
 
+    argparser.add_argument(
+        '--annot-format',
+        type=str,
+        help="Annotation format for generated scenes. Choices are: 'sed_eval' (default) - Format appropriate for "
+             "DCASE 2017 challenge evaluator, 'pandas' - A more detailed format for the form <label, orig_filename, "
+             "start, stop>",
+        choices=['sed_eval', 'pandas']
+    )
+
     args = argparser.parse_args()
 
     if args.logging_level: