comparison scripts/evaluation_stats.py @ 1316:279930a008ca

All interfaces support comment boxes. Comment box identification matches presented tag (for instance, AB will be Comment on fragment A, rather than 1). Tighter buffer loading protocol, audioObjects register with the buffer rather than checking for buffer existence (which can be buggy depending on the buffer state). Buffers now have a state to ensure exact location in loading chain (downloading, decoding, LUFS, ready).
author Nicholas Jillings <nickjillings@users.noreply.github.com>
date Fri, 29 Jan 2016 11:11:57 +0000
parents
children 1b6fa37d46a4 235594325b84 b5bf2f57187c
comparison
equal deleted inserted replaced
-1:000000000000 1316:279930a008ca
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import xml.etree.ElementTree as ET
5 import os # for getting files from directory
6 import operator # for sorting data with multiple keys
7 import sys # for accessing command line arguments
8
9 # Command line arguments
10 assert len(sys.argv)<3, "evaluation_stats takes at most 1 command line argument\n"+\
11 "Use: python evaluation_stats.py [results_folder]"
12
13 # XML results files location
14 if len(sys.argv) == 1:
15 folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder
16 print "Use: python evaluation_stats.py [results_folder]"
17 print "Using default path: " + folder_name
18 elif len(sys.argv) == 2:
19 folder_name = sys.argv[1] # First command line argument is folder
20
21 # Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
22 def seconds2timestr(time_in_seconds):
23 time_in_minutes = int(time_in_seconds/60)
24 remaining_seconds = int(time_in_seconds%60)
25 return str(time_in_minutes) + " min " + str(remaining_seconds) + " s"
26
27 # stats initialisation
28 number_of_XML_files = 0
29 number_of_pages = 0
30 number_of_fragments = 0
31 total_empty_comments = 0
32 total_not_played = 0
33 total_not_moved = 0
34 time_per_page_accum = 0
35
36 # arrays initialisation
37 page_names = []
38 page_count = []
39 duration_page = [] # duration of experiment in function of page content
40 duration_order = [] # duration of experiment in function of page number
41 fragments_per_page = [] # number of fragments for corresponding page
42
43 # get every XML file in folder
44 files_list = os.listdir(folder_name)
45 for file in files_list: # iterate over all files in files_list
46 if file.endswith(".xml"): # check if XML file
47 number_of_XML_files += 1
48 tree = ET.parse(folder_name + '/' + file)
49 root = tree.getroot()
50
51 print file # print file name (subject name)
52
53 # reset for new subject
54 total_duration = 0
55 page_number = 0
56
57 # get list of all page names
58 for audioholder in root.findall("./audioholder"): # iterate over pages
59 page_name = audioholder.get('id') # get page name
60
61 if page_name is None: # ignore 'empty' audio_holders
62 print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)"
63 break # move on to next
64
65 number_of_comments = 0 # for this page
66 number_of_missing_comments = 0 # for this page
67 not_played = 0 # for this page
68 not_moved = 0 # for this page
69
70 # 'testTime' keeps total duration: subtract time so far for duration of this audioholder
71 duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration
72
73 # total duration of test
74 total_duration += duration
75
76 # number of audio elements
77 audioelements = audioholder.findall("./audioelement") # get audioelements
78 number_of_fragments += len(audioelements) # add length of this list to total
79
80 # number of comments (interesting if comments not mandatory)
81 for audioelement in audioelements:
82 response = audioelement.find("./comment/response")
83 was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']")
84 was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']")
85 if response.text is not None and len(response.text) > 1:
86 number_of_comments += 1
87 else:
88 number_of_missing_comments += 1
89 if was_played is not None and was_played.text == 'false':
90 not_played += 1
91 if was_moved is not None and was_moved.text == 'false':
92 not_moved += 1
93
94 # update global counters
95 total_empty_comments += number_of_missing_comments
96 total_not_played += not_played
97 total_not_moved += not_moved
98
99 # print audioholder id and duration
100 print " " + page_name + ": " + seconds2timestr(duration) + ", "\
101 + str(number_of_comments)+"/"\
102 +str(number_of_comments+number_of_missing_comments)+" comments"
103
104 # number of audio elements not played
105 if not_played > 1:
106 print 'ATTENTION: '+str(not_played)+' fragments were not listened to!'
107 if not_played == 1:
108 print 'ATTENTION: one fragment was not listened to!'
109
110 # number of audio element markers not moved
111 if not_moved > 1:
112 print 'ATTENTION: '+str(not_moved)+' markers were not moved!'
113 if not_moved == 1:
114 print 'ATTENTION: one marker was not moved!'
115
116 # keep track of duration in function of page index
117 if len(duration_order)>page_number:
118 duration_order[page_number].append(duration)
119 else:
120 duration_order.append([duration])
121
122 # keep list of audioholder ids and count how many times each audioholder id
123 # was tested, how long it took, and how many fragments there were (if number of
124 # fragments is different, store as different audioholder id)
125 if page_name in page_names:
126 page_index = page_names.index(page_name) # get index
127 # check if number of audioelements the same
128 if len(audioelements) == fragments_per_page[page_index]:
129 page_count[page_index] += 1
130 duration_page[page_index].append(duration)
131 else: # make new entry
132 alt_page_name = page_name+"("+str(len(audioelements))+")"
133 if alt_page_name in page_names: # if already there
134 alt_page_index = page_names.index(alt_page_name) # get index
135 page_count[alt_page_index] += 1
136 duration_page[alt_page_index].append(duration)
137 else:
138 page_names.append(alt_page_name)
139 page_count.append(1)
140 duration_page.append([duration])
141 fragments_per_page.append(len(audioelements))
142 else:
143 page_names.append(page_name)
144 page_count.append(1)
145 duration_page.append([duration])
146 fragments_per_page.append(len(audioelements))
147
148 # bookkeeping
149 page_number += 1 # increase page count for this specific test
150 number_of_pages += 1 # increase total number of pages
151 time_per_page_accum += duration # total duration (for average time spent per page)
152
153 # print total duration of this test
154 print " TOTAL: " + seconds2timestr(total_duration)
155
156
157 # PRINT EVERYTHING
158
159 print "Number of XML files: " + str(number_of_XML_files)
160 print "Number of pages: " + str(number_of_pages)
161 print "Number of fragments: " + str(number_of_fragments)
162 print "Number of empty comments: " + str(total_empty_comments) +\
163 " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + "%)"
164 print "Number of unplayed fragments: " + str(total_not_played) +\
165 " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + "%)"
166 print "Number of unmoved markers: " + str(total_not_moved) +\
167 " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + "%)"
168 print "Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages)
169
170 # Pages and number of times tested
171 page_count_strings = list(str(x) for x in page_count)
172 count_list = page_names + page_count_strings
173 count_list[::2] = page_names
174 count_list[1::2] = page_count_strings
175 print "Pages tested: " + str(count_list)
176
177 # Average duration for first, second, ... page
178 print "Average duration per page:"
179 for page_number in range(len(duration_order)):
180 print " page " + str(page_number+1) + ": " +\
181 seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\
182 " ("+str(len(duration_order[page_number]))+" subjects)"
183
184
185 # Sort pages by number of audioelements, then by duration
186
187 # average duration and number of subjects per page
188 average_duration_page = []
189 number_of_subjects_page = []
190 for line in duration_page:
191 number_of_subjects_page.append(len(line))
192 average_duration_page.append(sum(line)/len(line))
193
194 # combine and sort in function of number of audioelements and duration
195 combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page]
196 combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort
197
198 # Show average duration for all songs
199 print "Average duration per audioholder:"
200 for page_index in range(len(page_names)):
201 print " "+combined_list[page_index][0] + ": " \
202 + seconds2timestr(combined_list[page_index][1]) \
203 + " (" + str(combined_list[page_index][3]) + " subjects, " \
204 + str(combined_list[page_index][2]) + " fragments)"
205
206
207 #TODO
208 # time per page in function of number of fragments (plot)
209 # time per participant in function of number of pages
210 # plot total time for each participant
211 # plot total time
212 # show 'count' per page (in order)
213
214 # clear up page_index <> page_count <> page_number confusion
215
216 # LaTeX -> PDF print out