annotate scripts/evaluation_stats.py @ 1316:279930a008ca

All interfaces support comment boxes. Comment box identification matches presented tag (for instance, AB will be Comment on fragment A, rather than 1). Tighter buffer loading protocol, audioObjects register with the buffer rather than checking for buffer existence (which can be buggy depending on the buffer state). Buffers now have a state to ensure exact location in loading chain (downloading, decoding, LUFS, ready).
author Nicholas Jillings <nickjillings@users.noreply.github.com>
date Fri, 29 Jan 2016 11:11:57 +0000
parents
children 1b6fa37d46a4 235594325b84 b5bf2f57187c
rev   line source
nickjillings@1316 1 #!/usr/bin/python
nickjillings@1316 2 # -*- coding: utf-8 -*-
nickjillings@1316 3
nickjillings@1316 4 import xml.etree.ElementTree as ET
nickjillings@1316 5 import os # for getting files from directory
nickjillings@1316 6 import operator # for sorting data with multiple keys
nickjillings@1316 7 import sys # for accessing command line arguments
nickjillings@1316 8
nickjillings@1316 9 # Command line arguments
nickjillings@1316 10 assert len(sys.argv)<3, "evaluation_stats takes at most 1 command line argument\n"+\
nickjillings@1316 11 "Use: python evaluation_stats.py [results_folder]"
nickjillings@1316 12
nickjillings@1316 13 # XML results files location
nickjillings@1316 14 if len(sys.argv) == 1:
nickjillings@1316 15 folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder
nickjillings@1316 16 print "Use: python evaluation_stats.py [results_folder]"
nickjillings@1316 17 print "Using default path: " + folder_name
nickjillings@1316 18 elif len(sys.argv) == 2:
nickjillings@1316 19 folder_name = sys.argv[1] # First command line argument is folder
nickjillings@1316 20
nickjillings@1316 21 # Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
nickjillings@1316 22 def seconds2timestr(time_in_seconds):
nickjillings@1316 23 time_in_minutes = int(time_in_seconds/60)
nickjillings@1316 24 remaining_seconds = int(time_in_seconds%60)
nickjillings@1316 25 return str(time_in_minutes) + " min " + str(remaining_seconds) + " s"
nickjillings@1316 26
nickjillings@1316 27 # stats initialisation
nickjillings@1316 28 number_of_XML_files = 0
nickjillings@1316 29 number_of_pages = 0
nickjillings@1316 30 number_of_fragments = 0
nickjillings@1316 31 total_empty_comments = 0
nickjillings@1316 32 total_not_played = 0
nickjillings@1316 33 total_not_moved = 0
nickjillings@1316 34 time_per_page_accum = 0
nickjillings@1316 35
nickjillings@1316 36 # arrays initialisation
nickjillings@1316 37 page_names = []
nickjillings@1316 38 page_count = []
nickjillings@1316 39 duration_page = [] # duration of experiment in function of page content
nickjillings@1316 40 duration_order = [] # duration of experiment in function of page number
nickjillings@1316 41 fragments_per_page = [] # number of fragments for corresponding page
nickjillings@1316 42
nickjillings@1316 43 # get every XML file in folder
nickjillings@1316 44 files_list = os.listdir(folder_name)
nickjillings@1316 45 for file in files_list: # iterate over all files in files_list
nickjillings@1316 46 if file.endswith(".xml"): # check if XML file
nickjillings@1316 47 number_of_XML_files += 1
nickjillings@1316 48 tree = ET.parse(folder_name + '/' + file)
nickjillings@1316 49 root = tree.getroot()
nickjillings@1316 50
nickjillings@1316 51 print file # print file name (subject name)
nickjillings@1316 52
nickjillings@1316 53 # reset for new subject
nickjillings@1316 54 total_duration = 0
nickjillings@1316 55 page_number = 0
nickjillings@1316 56
nickjillings@1316 57 # get list of all page names
nickjillings@1316 58 for audioholder in root.findall("./audioholder"): # iterate over pages
nickjillings@1316 59 page_name = audioholder.get('id') # get page name
nickjillings@1316 60
nickjillings@1316 61 if page_name is None: # ignore 'empty' audio_holders
nickjillings@1316 62 print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)"
nickjillings@1316 63 break # move on to next
nickjillings@1316 64
nickjillings@1316 65 number_of_comments = 0 # for this page
nickjillings@1316 66 number_of_missing_comments = 0 # for this page
nickjillings@1316 67 not_played = 0 # for this page
nickjillings@1316 68 not_moved = 0 # for this page
nickjillings@1316 69
nickjillings@1316 70 # 'testTime' keeps total duration: subtract time so far for duration of this audioholder
nickjillings@1316 71 duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration
nickjillings@1316 72
nickjillings@1316 73 # total duration of test
nickjillings@1316 74 total_duration += duration
nickjillings@1316 75
nickjillings@1316 76 # number of audio elements
nickjillings@1316 77 audioelements = audioholder.findall("./audioelement") # get audioelements
nickjillings@1316 78 number_of_fragments += len(audioelements) # add length of this list to total
nickjillings@1316 79
nickjillings@1316 80 # number of comments (interesting if comments not mandatory)
nickjillings@1316 81 for audioelement in audioelements:
nickjillings@1316 82 response = audioelement.find("./comment/response")
nickjillings@1316 83 was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']")
nickjillings@1316 84 was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']")
nickjillings@1316 85 if response.text is not None and len(response.text) > 1:
nickjillings@1316 86 number_of_comments += 1
nickjillings@1316 87 else:
nickjillings@1316 88 number_of_missing_comments += 1
nickjillings@1316 89 if was_played is not None and was_played.text == 'false':
nickjillings@1316 90 not_played += 1
nickjillings@1316 91 if was_moved is not None and was_moved.text == 'false':
nickjillings@1316 92 not_moved += 1
nickjillings@1316 93
nickjillings@1316 94 # update global counters
nickjillings@1316 95 total_empty_comments += number_of_missing_comments
nickjillings@1316 96 total_not_played += not_played
nickjillings@1316 97 total_not_moved += not_moved
nickjillings@1316 98
nickjillings@1316 99 # print audioholder id and duration
nickjillings@1316 100 print " " + page_name + ": " + seconds2timestr(duration) + ", "\
nickjillings@1316 101 + str(number_of_comments)+"/"\
nickjillings@1316 102 +str(number_of_comments+number_of_missing_comments)+" comments"
nickjillings@1316 103
nickjillings@1316 104 # number of audio elements not played
nickjillings@1316 105 if not_played > 1:
nickjillings@1316 106 print 'ATTENTION: '+str(not_played)+' fragments were not listened to!'
nickjillings@1316 107 if not_played == 1:
nickjillings@1316 108 print 'ATTENTION: one fragment was not listened to!'
nickjillings@1316 109
nickjillings@1316 110 # number of audio element markers not moved
nickjillings@1316 111 if not_moved > 1:
nickjillings@1316 112 print 'ATTENTION: '+str(not_moved)+' markers were not moved!'
nickjillings@1316 113 if not_moved == 1:
nickjillings@1316 114 print 'ATTENTION: one marker was not moved!'
nickjillings@1316 115
nickjillings@1316 116 # keep track of duration in function of page index
nickjillings@1316 117 if len(duration_order)>page_number:
nickjillings@1316 118 duration_order[page_number].append(duration)
nickjillings@1316 119 else:
nickjillings@1316 120 duration_order.append([duration])
nickjillings@1316 121
nickjillings@1316 122 # keep list of audioholder ids and count how many times each audioholder id
nickjillings@1316 123 # was tested, how long it took, and how many fragments there were (if number of
nickjillings@1316 124 # fragments is different, store as different audioholder id)
nickjillings@1316 125 if page_name in page_names:
nickjillings@1316 126 page_index = page_names.index(page_name) # get index
nickjillings@1316 127 # check if number of audioelements the same
nickjillings@1316 128 if len(audioelements) == fragments_per_page[page_index]:
nickjillings@1316 129 page_count[page_index] += 1
nickjillings@1316 130 duration_page[page_index].append(duration)
nickjillings@1316 131 else: # make new entry
nickjillings@1316 132 alt_page_name = page_name+"("+str(len(audioelements))+")"
nickjillings@1316 133 if alt_page_name in page_names: # if already there
nickjillings@1316 134 alt_page_index = page_names.index(alt_page_name) # get index
nickjillings@1316 135 page_count[alt_page_index] += 1
nickjillings@1316 136 duration_page[alt_page_index].append(duration)
nickjillings@1316 137 else:
nickjillings@1316 138 page_names.append(alt_page_name)
nickjillings@1316 139 page_count.append(1)
nickjillings@1316 140 duration_page.append([duration])
nickjillings@1316 141 fragments_per_page.append(len(audioelements))
nickjillings@1316 142 else:
nickjillings@1316 143 page_names.append(page_name)
nickjillings@1316 144 page_count.append(1)
nickjillings@1316 145 duration_page.append([duration])
nickjillings@1316 146 fragments_per_page.append(len(audioelements))
nickjillings@1316 147
nickjillings@1316 148 # bookkeeping
nickjillings@1316 149 page_number += 1 # increase page count for this specific test
nickjillings@1316 150 number_of_pages += 1 # increase total number of pages
nickjillings@1316 151 time_per_page_accum += duration # total duration (for average time spent per page)
nickjillings@1316 152
nickjillings@1316 153 # print total duration of this test
nickjillings@1316 154 print " TOTAL: " + seconds2timestr(total_duration)
nickjillings@1316 155
nickjillings@1316 156
nickjillings@1316 157 # PRINT EVERYTHING
nickjillings@1316 158
nickjillings@1316 159 print "Number of XML files: " + str(number_of_XML_files)
nickjillings@1316 160 print "Number of pages: " + str(number_of_pages)
nickjillings@1316 161 print "Number of fragments: " + str(number_of_fragments)
nickjillings@1316 162 print "Number of empty comments: " + str(total_empty_comments) +\
nickjillings@1316 163 " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + "%)"
nickjillings@1316 164 print "Number of unplayed fragments: " + str(total_not_played) +\
nickjillings@1316 165 " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + "%)"
nickjillings@1316 166 print "Number of unmoved markers: " + str(total_not_moved) +\
nickjillings@1316 167 " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + "%)"
nickjillings@1316 168 print "Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages)
nickjillings@1316 169
nickjillings@1316 170 # Pages and number of times tested
nickjillings@1316 171 page_count_strings = list(str(x) for x in page_count)
nickjillings@1316 172 count_list = page_names + page_count_strings
nickjillings@1316 173 count_list[::2] = page_names
nickjillings@1316 174 count_list[1::2] = page_count_strings
nickjillings@1316 175 print "Pages tested: " + str(count_list)
nickjillings@1316 176
nickjillings@1316 177 # Average duration for first, second, ... page
nickjillings@1316 178 print "Average duration per page:"
nickjillings@1316 179 for page_number in range(len(duration_order)):
nickjillings@1316 180 print " page " + str(page_number+1) + ": " +\
nickjillings@1316 181 seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\
nickjillings@1316 182 " ("+str(len(duration_order[page_number]))+" subjects)"
nickjillings@1316 183
nickjillings@1316 184
nickjillings@1316 185 # Sort pages by number of audioelements, then by duration
nickjillings@1316 186
nickjillings@1316 187 # average duration and number of subjects per page
nickjillings@1316 188 average_duration_page = []
nickjillings@1316 189 number_of_subjects_page = []
nickjillings@1316 190 for line in duration_page:
nickjillings@1316 191 number_of_subjects_page.append(len(line))
nickjillings@1316 192 average_duration_page.append(sum(line)/len(line))
nickjillings@1316 193
nickjillings@1316 194 # combine and sort in function of number of audioelements and duration
nickjillings@1316 195 combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page]
nickjillings@1316 196 combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort
nickjillings@1316 197
nickjillings@1316 198 # Show average duration for all songs
nickjillings@1316 199 print "Average duration per audioholder:"
nickjillings@1316 200 for page_index in range(len(page_names)):
nickjillings@1316 201 print " "+combined_list[page_index][0] + ": " \
nickjillings@1316 202 + seconds2timestr(combined_list[page_index][1]) \
nickjillings@1316 203 + " (" + str(combined_list[page_index][3]) + " subjects, " \
nickjillings@1316 204 + str(combined_list[page_index][2]) + " fragments)"
nickjillings@1316 205
nickjillings@1316 206
nickjillings@1316 207 #TODO
nickjillings@1316 208 # time per page in function of number of fragments (plot)
nickjillings@1316 209 # time per participant in function of number of pages
nickjillings@1316 210 # plot total time for each participant
nickjillings@1316 211 # plot total time
nickjillings@1316 212 # show 'count' per page (in order)
nickjillings@1316 213
nickjillings@1316 214 # clear up page_index <> page_count <> page_number confusion
nickjillings@1316 215
nickjillings@1316 216 # LaTeX -> PDF print out