diff scripts/evaluation_stats.py @ 1316:279930a008ca

All interfaces support comment boxes. Comment box identification matches presented tag (for instance, AB will be Comment on fragment A, rather than 1). Tighter buffer loading protocol, audioObjects register with the buffer rather than checking for buffer existence (which can be buggy depending on the buffer state). Buffers now have a state to ensure exact location in loading chain (downloading, decoding, LUFS, ready).
author Nicholas Jillings <nickjillings@users.noreply.github.com>
date Fri, 29 Jan 2016 11:11:57 +0000
parents
children 1b6fa37d46a4 235594325b84 b5bf2f57187c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/evaluation_stats.py	Fri Jan 29 11:11:57 2016 +0000
@@ -0,0 +1,216 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import xml.etree.ElementTree as ET
+import os       # for getting files from directory
+import operator # for sorting data with multiple keys
+import sys      # for accessing command line arguments
+
+# Command line arguments
+assert len(sys.argv)<3, "evaluation_stats takes at most 1 command line argument\n"+\
+                        "Use: python evaluation_stats.py [results_folder]"
+
+# XML results files location
+if len(sys.argv) == 1:
+    folder_name = "../saves"    # Looks in 'saves/' folder from 'scripts/' folder
+    print "Use: python evaluation_stats.py [results_folder]"
+    print "Using default path: " + folder_name
+elif len(sys.argv) == 2:
+    folder_name = sys.argv[1]   # First command line argument is folder
+
+# Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
+def seconds2timestr(time_in_seconds):
+    time_in_minutes = int(time_in_seconds/60)
+    remaining_seconds = int(time_in_seconds%60)
+    return str(time_in_minutes) + " min " + str(remaining_seconds) + " s"
+
+# stats initialisation
+number_of_XML_files  = 0
+number_of_pages      = 0
+number_of_fragments  = 0
+total_empty_comments = 0
+total_not_played     = 0
+total_not_moved      = 0
+time_per_page_accum  = 0
+
+# arrays initialisation
+page_names       = []
+page_count       = []
+duration_page    = []      # duration of experiment in function of page content
+duration_order   = []      # duration of experiment in function of page number
+fragments_per_page = []    # number of fragments for corresponding page
+
+# get every XML file in folder
+files_list = os.listdir(folder_name)
+for file in files_list: # iterate over all files in files_list
+    if file.endswith(".xml"): # check if XML file
+        number_of_XML_files += 1
+        tree = ET.parse(folder_name + '/' + file)
+        root = tree.getroot()
+        
+        print file # print file name (subject name)
+        
+        # reset for new subject
+        total_duration = 0
+        page_number = 0
+        
+        # get list of all page names
+        for audioholder in root.findall("./audioholder"):   # iterate over pages
+            page_name = audioholder.get('id')               # get page name
+            
+            if page_name is None: # ignore 'empty' audio_holders
+                print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)"
+                break # move on to next
+            
+            number_of_comments = 0 # for this page
+            number_of_missing_comments = 0 # for this page
+            not_played = 0 # for this page
+            not_moved = 0 # for this page
+            
+            # 'testTime' keeps total duration: subtract time so far for duration of this audioholder
+            duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration
+            
+            # total duration of test
+            total_duration += duration
+            
+            # number of audio elements
+            audioelements = audioholder.findall("./audioelement") # get audioelements
+            number_of_fragments += len(audioelements) # add length of this list to total
+            
+            # number of comments (interesting if comments not mandatory)
+            for audioelement in audioelements:
+                response = audioelement.find("./comment/response")
+                was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']")
+                was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']")
+                if response.text is not None and len(response.text) > 1: 
+                    number_of_comments += 1
+                else: 
+                    number_of_missing_comments += 1
+                if was_played is not None and was_played.text == 'false': 
+                    not_played += 1
+                if was_moved is not None and was_moved.text == 'false': 
+                    not_moved += 1
+            
+            # update global counters
+            total_empty_comments += number_of_missing_comments
+            total_not_played += not_played
+            total_not_moved += not_moved
+            
+            # print audioholder id and duration
+            print "    " + page_name + ": " + seconds2timestr(duration) + ", "\
+                  + str(number_of_comments)+"/"\
+                  +str(number_of_comments+number_of_missing_comments)+" comments"
+            
+            # number of audio elements not played
+            if not_played > 1:
+                print 'ATTENTION: '+str(not_played)+' fragments were not listened to!'
+            if not_played == 1: 
+                print 'ATTENTION: one fragment was not listened to!'
+            
+            # number of audio element markers not moved
+            if not_moved > 1:
+                print 'ATTENTION: '+str(not_moved)+' markers were not moved!'
+            if not_moved == 1: 
+                print 'ATTENTION: one marker was not moved!'
+            
+            # keep track of duration in function of page index
+            if len(duration_order)>page_number:
+                duration_order[page_number].append(duration)
+            else:
+                duration_order.append([duration])
+                
+            # keep list of audioholder ids and count how many times each audioholder id
+            # was tested, how long it took, and how many fragments there were (if number of 
+            # fragments is different, store as different audioholder id)
+            if page_name in page_names: 
+                page_index = page_names.index(page_name) # get index
+                # check if number of audioelements the same
+                if len(audioelements) == fragments_per_page[page_index]: 
+                    page_count[page_index] += 1
+                    duration_page[page_index].append(duration)
+                else: # make new entry
+                    alt_page_name = page_name+"("+str(len(audioelements))+")"
+                    if alt_page_name in page_names: # if already there
+                        alt_page_index = page_names.index(alt_page_name) # get index
+                        page_count[alt_page_index] += 1
+                        duration_page[alt_page_index].append(duration)
+                    else: 
+                        page_names.append(alt_page_name)
+                        page_count.append(1)
+                        duration_page.append([duration])
+                        fragments_per_page.append(len(audioelements))
+            else: 
+                page_names.append(page_name)
+                page_count.append(1)
+                duration_page.append([duration])
+                fragments_per_page.append(len(audioelements))
+                
+            # bookkeeping
+            page_number += 1 # increase page count for this specific test
+            number_of_pages += 1 # increase total number of pages
+            time_per_page_accum += duration # total duration (for average time spent per page)
+
+        # print total duration of this test
+        print "    TOTAL: " + seconds2timestr(total_duration)
+
+
+# PRINT EVERYTHING
+
+print "Number of XML files: " + str(number_of_XML_files)
+print "Number of pages: " + str(number_of_pages)
+print "Number of fragments: " + str(number_of_fragments)
+print "Number of empty comments: " + str(total_empty_comments) +\
+      " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + "%)"
+print "Number of unplayed fragments: " + str(total_not_played) +\
+      " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + "%)"
+print "Number of unmoved markers: " + str(total_not_moved) +\
+      " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + "%)"
+print "Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages)
+
+# Pages and number of times tested
+page_count_strings = list(str(x) for x in page_count)
+count_list = page_names + page_count_strings
+count_list[::2] = page_names
+count_list[1::2] = page_count_strings
+print "Pages tested: " + str(count_list)
+
+# Average duration for first, second, ... page
+print "Average duration per page:"
+for page_number in range(len(duration_order)): 
+    print "        page " + str(page_number+1) + ": " +\
+        seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\
+            " ("+str(len(duration_order[page_number]))+" subjects)"
+
+
+# Sort pages by number of audioelements, then by duration
+
+# average duration and number of subjects per page
+average_duration_page = []
+number_of_subjects_page = []
+for line in duration_page:
+    number_of_subjects_page.append(len(line))
+    average_duration_page.append(sum(line)/len(line))
+
+# combine and sort in function of number of audioelements and duration
+combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page]
+combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort
+
+# Show average duration for all songs
+print "Average duration per audioholder:"
+for page_index in range(len(page_names)):
+    print "        "+combined_list[page_index][0] + ": " \
+          + seconds2timestr(combined_list[page_index][1]) \
+          + " (" + str(combined_list[page_index][3]) + " subjects, " \
+          + str(combined_list[page_index][2]) + " fragments)"
+
+
+#TODO
+# time per page in function of number of fragments (plot)
+# time per participant in function of number of pages
+# plot total time for each participant
+# plot total time
+# show 'count' per page (in order)
+
+# clear up page_index <> page_count <> page_number confusion
+
+# LaTeX -> PDF print out