comparison scripts/generate_report.py @ 1316:279930a008ca

All interfaces support comment boxes. Comment box identification matches presented tag (for instance, AB will be Comment on fragment A, rather than 1). Tighter buffer loading protocol, audioObjects register with the buffer rather than checking for buffer existence (which can be buggy depending on the buffer state). Buffers now have a state to ensure exact location in loading chain (downloading, decoding, LUFS, ready).
author Nicholas Jillings <nickjillings@users.noreply.github.com>
date Fri, 29 Jan 2016 11:11:57 +0000
parents
children 8540d153caec b5bf2f57187c
comparison
equal deleted inserted replaced
-1:000000000000 1316:279930a008ca
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import xml.etree.ElementTree as ET
5 import os # for getting files from directory
6 import operator # for sorting data with multiple keys
7 import sys # for accessing command line arguments
8 import subprocess # for calling pdflatex
9 import shlex # for calling pdflatex
10 import matplotlib.pyplot as plt # plots
11 import numpy as np # numbers
12
13 # Command line arguments
14 assert len(sys.argv)<4, "generate_report takes at most 2 command line arguments\n"+\
15 "Use: python generate_report.py [results_folder] [no_render | -nr]"
16
17 render_figures = True
18
19 # XML results files location
20 if len(sys.argv) == 1:
21 folder_name = "../saves/" # Looks in 'saves/' folder from 'scripts/' folder
22 print "Use: python generate_report.py [results_folder] [no_render | -nr]"
23 print "Using default path: " + folder_name
24 elif len(sys.argv) == 2:
25 folder_name = sys.argv[1] # First command line argument is folder
26 elif len(sys.argv) == 3:
27 folder_name = sys.argv[1] # First command line argument is folder
28 assert sys.argv[2] in ('no_render','-nr'), "Second argument not recognised. \n" +\
29 "Use: python generate_report.py [results_folder] [no_render | -nr]"
30 # Second command line argument is [no_render | -nr]
31 render_figures = False
32
33 def isNaN(num):
34 return num != num
35
36 # Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
37 def seconds2timestr(time_in_seconds):
38 if time_in_seconds is not None and not isNaN(time_in_seconds):
39 time_in_minutes = int(time_in_seconds/60)
40 remaining_seconds = int(time_in_seconds%60)
41 return str(time_in_minutes) + " min " + str(remaining_seconds) + " s"
42 else:
43 return 'N/A'
44
45 # stats initialisation
46 number_of_XML_files = 0
47 number_of_pages = 0
48 number_of_fragments = 0
49 total_empty_comments = 0
50 total_not_played = 0
51 total_not_moved = 0
52 time_per_page_accum = 0
53
54 # arrays initialisation
55 page_names = []
56 real_page_names = [] # regardless of differing numbers of fragments
57 subject_count = [] # subjects per audioholder name
58 page_count = []
59 duration_page = [] # duration of experiment in function of page content
60 duration_order = [] # duration of experiment in function of page number
61 fragments_per_page = [] # number of fragments for corresponding page
62
63 # survey stats
64 gender = []
65 age = []
66
67 # get username if available
68 for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'):
69 user = os.environ.get(name)
70 if user:
71 break
72 else:
73 user = ''
74
75
76 # begin LaTeX document
77 header = r'''\documentclass[11pt, oneside]{article}
78 \usepackage{geometry}
79 \geometry{a4paper}
80 \usepackage[parfill]{parskip} % empty line instead of indent
81 \usepackage{graphicx} % figures
82 \usepackage[space]{grffile} % include figures with spaces in paths
83 \usepackage{hyperref}
84 \usepackage{tikz} % pie charts
85 \title{Report}
86 \author{'''+\
87 user+\
88 r'''}
89 \graphicspath{{'''+\
90 folder_name+\
91 r'''}}
92 %\setcounter{section}{-1} % Summary section 0 so number of sections equals number of files
93 \begin{document}
94 \maketitle
95 This is an automatically generated report using the `generate\_report.py' Python script
96 included with the Web Audio Evaluation Tool \cite{WAET} distribution which can be found
97 at \texttt{code.soundsoftware.ac.uk/projects/webaudioevaluationtool}.
98 \tableofcontents
99
100 '''
101
102 footer = '\n\t\t'+r'''\begin{thebibliography}{9}
103 \bibitem{WAET} % reference to accompanying publication
104 Nicholas Jillings, Brecht De Man, David Moffat and Joshua D. Reiss,
105 ``Web Audio Evaluation Tool: A browser-based listening test environment,''
106 presented at the 12th Sound and Music Computing Conference, July 2015.
107 \end{thebibliography}
108 \end{document}'''
109
110 body = ''
111
112 # make sure folder_name ends in '/'
113 folder_name = os.path.join(folder_name, '')
114
115 # generate images for later use
116 if render_figures:
117 subprocess.call("python timeline_view_movement.py '"+folder_name+"'", shell=True)
118 subprocess.call("python score_parser.py '"+folder_name+"'", shell=True)
119 subprocess.call("python score_plot.py '"+folder_name+"ratings/'", shell=True)
120
121 # get every XML file in folder
122 files_list = os.listdir(folder_name)
123 for file in files_list: # iterate over all files in files_list
124 if file.endswith(".xml"): # check if XML file
125 number_of_XML_files += 1
126 tree = ET.parse(folder_name + file)
127 root = tree.getroot()
128
129 # PRINT name as section
130 body+= '\n\section{'+file[:-4].capitalize()+'}\n' # make section header from name without extension
131
132 # reset for new subject
133 total_duration = 0
134 page_number = 0
135
136 individual_table = '\n' # table with stats for this individual test file
137 timeline_plots = '' # plots of timeline (movements and plays)
138
139 # DEMO survey stats
140 # get gender
141 this_subjects_gender = root.find("./posttest/radio/[@id='gender']")
142 if this_subjects_gender is not None:
143 gender.append(this_subjects_gender.get("name"))
144 else:
145 gender.append('UNAVAILABLE')
146 # get age
147 this_subjects_age = root.find("./posttest/number/[@id='age']")
148 if this_subjects_age is not None:
149 age.append(this_subjects_age.text)
150 #TODO add plot of age
151
152 # get list of all page names
153 for audioholder in root.findall("./audioholder"): # iterate over pages
154 page_name = audioholder.get('id') # get page name
155
156 if page_name is None: # ignore 'empty' audio_holders
157 print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)"
158 break # move on to next
159
160 number_of_comments = 0 # for this page
161 number_of_missing_comments = 0 # for this page
162 not_played = [] # for this page
163 not_moved = [] # for this page
164
165 if audioholder.find("./metric/metricresult[@id='testTime']") is not None: # check if time is included
166 # 'testTime' keeps total duration: subtract time so far for duration of this audioholder
167 duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration
168
169 # total duration of test
170 total_duration += duration
171 else:
172 duration = float('nan')
173 total_duration = float('nan')
174
175 # number of audio elements
176 audioelements = audioholder.findall("./audioelement") # get audioelements
177 number_of_fragments += len(audioelements) # add length of this list to total
178
179 # number of comments (interesting if comments not mandatory)
180 for audioelement in audioelements:
181 response = audioelement.find("./comment/response")
182 was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']")
183 was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']")
184 if response.text is not None and len(response.text) > 1:
185 number_of_comments += 1
186 else:
187 number_of_missing_comments += 1
188 if was_played is not None and was_played.text == 'false':
189 not_played.append(audioelement.get('id'))
190 if was_moved is not None and was_moved.text == 'false':
191 not_moved.append(audioelement.get('id'))
192
193 # update global counters
194 total_empty_comments += number_of_missing_comments
195 total_not_played += len(not_played)
196 total_not_moved += len(not_moved)
197
198 # PRINT alerts when elements not played or markers not moved
199 # number of audio elements not played
200 if len(not_played) > 1:
201 body += '\t\t\\emph{\\textbf{ATTENTION: '+str(len(not_played))+\
202 ' fragments were not listened to in '+page_name+'! }}'+\
203 ', '.join(not_played)+'\\\\ \n'
204 if len(not_played) == 1:
205 body += '\t\t\\emph{\\textbf{ATTENTION: one fragment was not listened to in '+page_name+'! }}'+\
206 not_played[0]+'\\\\ \n'
207
208 # number of audio element markers not moved
209 if len(not_moved) > 1:
210 body += '\t\t\\emph{\\textbf{ATTENTION: '+str(len(not_moved))+\
211 ' markers were not moved in '+page_name+'! }}'+\
212 ', '.join(not_moved)+'\\\\ \n'
213 if len(not_moved) == 1:
214 body += '\t\t\\emph{\\textbf{ATTENTION: one marker was not moved in '+page_name+'! }}'+\
215 not_moved[0]+'\\\\ \n'
216
217 # PRINT song-specific statistic
218 individual_table += '\t\t'+page_name+'&'+\
219 str(number_of_comments) + '/' +\
220 str(number_of_comments+number_of_missing_comments)+'&'+\
221 seconds2timestr(duration)+'\\\\\n'
222
223 # get timeline for this audioholder
224 img_path = 'timelines_movement/'+file[:-4]+'-'+page_name+'.pdf'
225
226 # check if available
227 if os.path.isfile(folder_name+img_path):
228 # SHOW timeline image
229 timeline_plots += '\\includegraphics[width=\\textwidth]{'+\
230 folder_name+img_path+'}\n\t\t'
231
232 # keep track of duration in function of page index
233 if len(duration_order)>page_number:
234 duration_order[page_number].append(duration)
235 else:
236 duration_order.append([duration])
237
238 # keep list of audioholder ids and count how many times each audioholder id
239 # was tested, how long it took, and how many fragments there were
240 # (if number of fragments is different, store as different audioholder id)
241 if page_name in page_names:
242 page_index = page_names.index(page_name) # get index
243 # check if number of audioelements the same
244 if len(audioelements) == fragments_per_page[page_index]:
245 page_count[page_index] += 1
246 duration_page[page_index].append(duration)
247 else: # make new entry
248 alt_page_name = page_name+"("+str(len(audioelements))+")"
249 if alt_page_name in page_names: # if already there
250 alt_page_index = page_names.index(alt_page_name) # get index
251 page_count[alt_page_index] += 1
252 duration_page[alt_page_index].append(duration)
253 else:
254 page_names.append(alt_page_name)
255 page_count.append(1)
256 duration_page.append([duration])
257 fragments_per_page.append(len(audioelements))
258 else:
259 page_names.append(page_name)
260 page_count.append(1)
261 duration_page.append([duration])
262 fragments_per_page.append(len(audioelements))
263
264 # number of subjects per audioholder regardless of differing numbers of
265 # fragments (for inclusion in box plots)
266 if page_name in real_page_names:
267 page_index = real_page_names.index(page_name) # get index
268 subject_count[page_index] += 1
269 else:
270 real_page_names.append(page_name)
271 subject_count.append(1)
272
273 # bookkeeping
274 page_number += 1 # increase page count for this specific test
275 number_of_pages += 1 # increase total number of pages
276 time_per_page_accum += duration # total duration (for average time spent per page)
277
278 # PRINT table with statistics about this test
279 body += '\t\t'+r'''\begin{tabular}{|p{3.5cm}|c|p{2.5cm}|}
280 \hline
281 \textbf{Song name} & \textbf{Comments} & \textbf{Duration} \\ \hline '''+\
282 individual_table+'\t\t'+\
283 r'''\hline
284 \textbf{TOTAL} & & \textbf{'''+\
285 seconds2timestr(total_duration)+\
286 r'''}\\
287 \hline
288 \end{tabular}
289
290 '''
291 # PRINT timeline plots
292 body += timeline_plots
293
294 # join to footer
295 footer = body + footer
296
297 # empty body again
298 body = ''
299
300 # PRINT summary of everything (at start)
301 # unnumbered so that number of sections equals number of files
302 body += '\section*{Summary}\n\t\t\\addcontentsline{toc}{section}{Summary}\n'
303
304 # PRINT table with statistics
305 body += '\t\t\\begin{tabular}{ll}\n\t\t\t'
306 body += r'Number of XML files: &' + str(number_of_XML_files) + r'\\'+'\n\t\t\t'
307 body += r'Number of pages: &' + str(number_of_pages) + r'\\'+'\n\t\t\t'
308 body += r'Number of fragments: &' + str(number_of_fragments) + r'\\'+'\n\t\t\t'
309 body += r'Number of empty comments: &' + str(total_empty_comments) +\
310 " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t'
311 body += r'Number of unplayed fragments: &' + str(total_not_played) +\
312 " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t'
313 body += r'Number of unmoved markers: &' + str(total_not_moved) +\
314 " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t'
315 body += r'Average time per page: &' + seconds2timestr(time_per_page_accum/number_of_pages) + r"\\"+'\n\t\t'
316 body += '\\end{tabular} \\vspace{1.5cm} \\\\ \n'
317
318 # Average duration for first, second, ... page
319 body += "\t\t\\vspace{.5cm} \n\n\t\tAverage duration per page (see also Figure \\ref{fig:avgtimeperpage}): \\\\ \n\t\t"
320 body += r'''\begin{tabular}{lll}
321 \textbf{Page} & \textbf{Duration} & \textbf{\# subjects}\\'''
322 tpp_averages = [] # store average time per page
323 for page_number in range(len(duration_order)):
324 body += '\n\t\t\t'+str(page_number+1) + "&" +\
325 seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\
326 "&"+str(len(duration_order[page_number]))+r"\\"
327 tpp_averages.append(sum(duration_order[page_number])/len(duration_order[page_number]))
328
329 body += '\n\t\t\\end{tabular} \\vspace{1.5cm} \\\\ \n\n\t\t'
330
331 # SHOW bar plot of average time per page
332 plt.bar(range(1,len(duration_order)+1), np.array(tpp_averages)/60)
333 plt.xlabel('Page order')
334 plt.xlim(.8, len(duration_order)+1)
335 plt.xticks(np.arange(1,len(duration_order)+1)+.4, range(1,len(duration_order)+1))
336 plt.ylabel('Average time [minutes]')
337 plt.savefig(folder_name+"time_per_page.pdf", bbox_inches='tight')
338 plt.close()
339 #TODO add error bars
340
341
342 # Sort pages by number of audioelements, then by duration
343
344 # average duration and number of subjects per page
345 average_duration_page = []
346 number_of_subjects_page = []
347 for line in duration_page:
348 number_of_subjects_page.append(len(line))
349 average_duration_page.append(sum(line)/len(line))
350
351 # combine and sort in function of number of audioelements and duration
352 combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page]
353 combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort
354
355 # Show average duration for all songs
356 body += r'''\vspace{.5cm}
357 Average duration per audioholder (see also Figure \ref{fig:avgtimeperaudioholder}): \\
358 \begin{tabular}{llll}
359 \textbf{Audioholder} & \textbf{Duration} & \textbf{\# subjects} & \textbf{\# fragments} \\'''
360 audioholder_names_ordered = []
361 average_duration_audioholder_ordered = []
362 number_of_subjects = []
363 for page_index in range(len(page_names)):
364 audioholder_names_ordered.append(combined_list[page_index][0])
365 average_duration_audioholder_ordered.append(combined_list[page_index][1])
366 number_of_subjects.append(combined_list[page_index][3])
367 body += '\n\t\t\t'+combined_list[page_index][0] + "&" +\
368 seconds2timestr(combined_list[page_index][1]) + "&" +\
369 str(combined_list[page_index][3]) + "&" +\
370 str(combined_list[page_index][2]) + r"\\"
371 body += '\n\t\t\\end{tabular}\n'
372
373 # SHOW bar plot of average time per page
374 plt.bar(range(1,len(audioholder_names_ordered)+1), np.array(average_duration_audioholder_ordered)/60)
375 plt.xlabel('Audioholder')
376 plt.xlim(.8, len(audioholder_names_ordered)+1)
377 plt.xticks(np.arange(1,len(audioholder_names_ordered)+1)+.4, audioholder_names_ordered, rotation=90)
378 plt.ylabel('Average time [minutes]')
379 plt.savefig(folder_name+"time_per_audioholder.pdf", bbox_inches='tight')
380 plt.close()
381
382 # SHOW bar plot of average time per page
383 plt.bar(range(1,len(audioholder_names_ordered)+1), number_of_subjects)
384 plt.xlabel('Audioholder')
385 plt.xlim(.8, len(audioholder_names_ordered)+1)
386 plt.xticks(np.arange(1,len(audioholder_names_ordered)+1)+.4, audioholder_names_ordered, rotation=90)
387 plt.ylabel('Number of subjects')
388 ax = plt.gca()
389 ylims = ax.get_ylim()
390 yint = np.arange(int(np.floor(ylims[0])), int(np.ceil(ylims[1]))+1)
391 plt.yticks(yint)
392 plt.savefig(folder_name+"subjects_per_audioholder.pdf", bbox_inches='tight')
393 plt.close()
394
395 # SHOW both figures
396 body += r'''
397 \begin{figure}[htbp]
398 \begin{center}
399 \includegraphics[width=.65\textwidth]{'''+\
400 folder_name+'time_per_page.pdf'+\
401 r'''}
402 \caption{Average time spent per page.}
403 \label{fig:avgtimeperpage}
404 \end{center}
405 \end{figure}
406
407 '''
408 body += r'''\begin{figure}[htbp]
409 \begin{center}
410 \includegraphics[width=.65\textwidth]{'''+\
411 folder_name+'time_per_audioholder.pdf'+\
412 r'''}
413 \caption{Average time spent per audioholder.}
414 \label{fig:avgtimeperaudioholder}
415 \end{center}
416 \end{figure}
417
418 '''
419 body += r'''\begin{figure}[htbp]
420 \begin{center}
421 \includegraphics[width=.65\textwidth]{'''+\
422 folder_name+'subjects_per_audioholder.pdf'+\
423 r'''}
424 \caption{Number of subjects per audioholder.}
425 \label{fig:subjectsperaudioholder}
426 \end{center}
427 \end{figure}
428
429 '''
430 #TODO add error bars
431 #TODO layout of figures
432
433 # SHOW boxplot per audioholder
434 #TODO order in decreasing order of participants
435 for audioholder_name in page_names: # get each name
436 # plot boxplot if exists (not so for the 'alt' names)
437 if os.path.isfile(folder_name+'ratings/'+audioholder_name+'-ratings-box.pdf'):
438 body += r'''\begin{figure}[htbp]
439 \begin{center}
440 \includegraphics[width=.65\textwidth]{'''+\
441 folder_name+"ratings/"+audioholder_name+'-ratings-box.pdf'+\
442 r'''}
443 \caption{Box plot of ratings for audioholder '''+\
444 audioholder_name+' ('+str(subject_count[real_page_names.index(audioholder_name)])+\
445 ''' participants).}
446 \label{fig:boxplot'''+audioholder_name.replace(" ", "")+'''}
447 \end{center}
448 \end{figure}
449
450 '''
451
452 # DEMO pie chart of gender distribution among subjects
453 genders = ['male', 'female', 'other', 'preferNotToSay', 'UNAVAILABLE']
454 # TODO: get the above automatically
455 gender_distribution = ''
456 for item in genders:
457 number = gender.count(item)
458 if number>0:
459 gender_distribution += str("{:.2f}".format((100.0*number)/len(gender)))+\
460 '/'+item.capitalize()+' ('+str(number)+'),\n'
461
462 body += r'''
463 % Pie chart of gender distribution
464 \def\angle{0}
465 \def\radius{3}
466 \def\cyclelist{{"orange","blue","red","green"}}
467 \newcount\cyclecount \cyclecount=-1
468 \newcount\ind \ind=-1
469 \begin{figure}[htbp]
470 \begin{center}\begin{tikzpicture}[nodes = {font=\sffamily}]
471 \foreach \percent/\name in {'''+\
472 gender_distribution+\
473 r'''} {\ifx\percent\empty\else % If \percent is empty, do nothing
474 \global\advance\cyclecount by 1 % Advance cyclecount
475 \global\advance\ind by 1 % Advance list index
476 \ifnum6<\cyclecount % If cyclecount is larger than list
477 \global\cyclecount=0 % reset cyclecount and
478 \global\ind=0 % reset list index
479 \fi
480 \pgfmathparse{\cyclelist[\the\ind]} % Get color from cycle list
481 \edef\color{\pgfmathresult} % and store as \color
482 % Draw angle and set labels
483 \draw[fill={\color!50},draw={\color}] (0,0) -- (\angle:\radius)
484 arc (\angle:\angle+\percent*3.6:\radius) -- cycle;
485 \node at (\angle+0.5*\percent*3.6:0.7*\radius) {\percent\,\%};
486 \node[pin=\angle+0.5*\percent*3.6:\name]
487 at (\angle+0.5*\percent*3.6:\radius) {};
488 \pgfmathparse{\angle+\percent*3.6} % Advance angle
489 \xdef\angle{\pgfmathresult} % and store in \angle
490 \fi
491 };
492 \end{tikzpicture}
493 \caption{Representation of gender across subjects}
494 \label{default}
495 \end{center}
496 \end{figure}
497
498 '''
499 # problem: some people entered twice?
500
501 #TODO
502 # time per page in function of number of fragments (plot)
503 # time per participant in function of number of pages
504 # plot total time for each participant
505 # show 'count' per page (in order)
506
507 # clear up page_index <> page_count <> page_number confusion
508
509
510 texfile = header+body+footer # add bits together
511
512 print 'pdflatex -output-directory="'+folder_name+'"" "'+ folder_name + 'Report.tex"' # DEBUG
513
514 # write TeX file
515 with open(folder_name + 'Report.tex','w') as f:
516 f.write(texfile)
517 proc=subprocess.Popen(shlex.split('pdflatex -output-directory="'+folder_name+'" "'+ folder_name + 'Report.tex"'))
518 proc.communicate()
519 # run again
520 proc=subprocess.Popen(shlex.split('pdflatex -output-directory="'+folder_name+'" "'+ folder_name + 'Report.tex"'))
521 proc.communicate()
522
523 #TODO remove auxiliary LaTeX files
524 try:
525 os.remove(folder_name + 'Report.aux')
526 os.remove(folder_name + 'Report.log')
527 os.remove(folder_name + 'Report.out')
528 os.remove(folder_name + 'Report.toc')
529 except OSError:
530 pass
531