Mercurial > hg > webaudioevaluationtool
changeset 2264:556b79c72eee
Change folder name 'scripts' to 'python'
author | Brecht De Man <b.deman@qmul.ac.uk> |
---|---|
date | Wed, 20 Apr 2016 16:33:22 +0200 |
parents | 5bf0555905de |
children | c8f05d753f12 4843377d9976 |
files | docs/Instructions/Instructions.tex python/comment_parser.html python/comment_parser.py python/evaluation_stats.py python/generate_report.py python/pythonServer.py python/score_parser.py python/score_plot.py python/timeline_view.py python/timeline_view_movement.py scripts/comment_parser.html scripts/comment_parser.py scripts/evaluation_stats.py scripts/generate_report.py scripts/pythonServer.py scripts/score_parser.py scripts/score_plot.py scripts/timeline_view.py scripts/timeline_view_movement.py |
diffstat | 19 files changed, 1975 insertions(+), 1975 deletions(-) [+] |
line wrap: on
line diff
--- a/docs/Instructions/Instructions.tex Wed Apr 20 16:02:17 2016 +0200 +++ b/docs/Instructions/Instructions.tex Wed Apr 20 16:33:22 2016 +0200 @@ -102,14 +102,14 @@ \item \texttt{save.php}: PHP script to store result XML files to web server \item PHP analysis scripts % ELABORATE \end{itemize} + \textbf{Python scripts (\texttt{./python/})} + \begin{itemize} + \item Helpful Python and PHP scripts for extraction and visualisation of data.\\ + \end{itemize} \textbf{Output files (\texttt{./saves/})} \begin{itemize} \item The output XML files of tests will be stored here by default by the \texttt{pythonServer.py} script.\\ \end{itemize} - \textbf{Auxiliary scripts (\texttt{./scripts/})} - \begin{itemize} - \item Helpful Python and PHP scripts for extraction and visualisation of data.\\ - \end{itemize} \textbf{Test creation tool (\texttt{./test\_create/})} \begin{itemize} \item Webpage for easily setting up your own test without having to delve into the XML.\\ @@ -149,7 +149,7 @@ \item Go to \path{test_create.html} and configure your test. \item Save your test file in the folder \path{.\tests\}. \item Your test will be live at \path{[web server address]/index.html?url=tests/[testname].xml}. If you are not using a web server, you can simulate one locally by running - \path{scripts/pythonServer.py} (requires Python), after which you can access the test at \\ % hack + \path{python/pythonServer.py} (requires Python), after which you can access the test at \\ % hack \path{http://localhost:8000/index.html?url=tests/[testname].xml} \end{itemize} @@ -194,7 +194,7 @@ Then hit enter and run the Python script by typing - \texttt{python scripts/pythonServer.py} + \texttt{python python/pythonServer.py} and hit enter again. See also Figure \ref{fig:terminal}. @@ -728,10 +728,10 @@ See `analysis.html' in the main folder: immediate visualisation of (by default) all results in the `saves/' folder. \subsection{Python scripts} - The package includes Python (2.7) scripts (in `scripts/') to extract ratings and comments, generate visualisations of ratings and timelines, and produce a fully fledged report. + The package includes Python (2.7) scripts (in `python/') to extract ratings and comments, generate visualisations of ratings and timelines, and produce a fully fledged report. Visualisation requires the free matplotlib toolbox (http://matplotlib.org), numpy and scipy. - By default, the scripts can be run from the `scripts' folder, with the result files in the `saves' folder (the default location where result XMLs are stored). Each script takes the XML file folder as an argument, along with other arguments in some cases. + By default, the scripts can be run from the `python' folder, with the result files in the `saves' folder (the default location where result XMLs are stored). Each script takes the XML file folder as an argument, along with other arguments in some cases. Note: to avoid all kinds of problems, please avoid using spaces in file and folder names (this may work on some systems, but others don't like it). \subsubsection{comment\_parser.py}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/comment_parser.html Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,72 @@ +<html lang="en"> + <head> + <meta charset="utf-8" /> + <script type="text/javascript"> + function getXML() + { + var XMLHttp = new XMLHttpRequest(); + XMLHttp.open("GET","comment_parser.php?format=XML",true); + XMLHttp.onload = function() { + // Now we have the XML data, extract + var parse = new DOMParser(); + var ajax = parse.parseFromString(XMLHttp.response,'text/xml'); + + var parent = document.createElement("div"); + parent.appendChild(ajax.children[0]); + var file = [parent.innerHTML]; + var bb = new Blob(file,{type : 'application/xml'}); + generateLink(bb,".xml"); + } + XMLHttp.send(); + } + + function getJSON() + { + var XMLHttp = new XMLHttpRequest(); + XMLHttp.open("GET","comment_parser.php?format=JSON",true); + XMLHttp.onload = function() { + // Now we have the XML data, extract + var file = [XMLHttp.response]; + var bb = new Blob(file,{type : 'application/json'}); + generateLink(bb,".json"); + } + XMLHttp.send(); + } + + function getCSV() + { + var XMLHttp = new XMLHttpRequest(); + XMLHttp.open("GET","comment_parser.php?format=CSV",true); + XMLHttp.onload = function() { + // Now we have the XML data, extract + var file = [XMLHttp.response]; + var bb = new Blob(file,{type : 'text/csv'}); + generateLink(bb,".csv"); + } + XMLHttp.send(); + } + + function generateLink(blobfile,fmt) + { + var dnlk = window.URL.createObjectURL(blobfile); + var a = document.createElement("a"); + a.hidden = ''; + a.href = dnlk; + a.download = "save"+fmt; + a.textContent = "Save File"; + document.getElementById("download").appendChild(a); + } + </script> + </head> + <body> + <h1>WAET Test Results Analysis</h1> + <h2>Comment Extraction</h2> + <p>All of the XMLs in the server 'saves/' directory are automatically parsed and downloaded, extracting only the comments. Simply select the comments you wish to extract below and your desired data format.</p> + <div id="download"></div> + <div> + <button onclick="getXML();">XML</button> + <button onclick="getJSON();">JSON</button> + <button onclick="getCSV();">CSV</button> + </div> + </body> +</html> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/comment_parser.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,90 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import xml.etree.ElementTree as ET +import os +import csv +import sys + +# COMMAND LINE ARGUMENTS + +assert len(sys.argv)<3, "comment_parser takes at most 1 command line argument\n"+\ + "Use: python score_parser.py [rating_folder_location]" + +# XML results files location +if len(sys.argv) == 1: + folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder + print "Use: python comment_parser.py [XML_files_location]" + print "Using default path: " + folder_name +elif len(sys.argv) == 2: + folder_name = sys.argv[1] # First command line argument is folder + +# check if folder_name exists +if not os.path.exists(folder_name): + #the file is not there + print "Folder '"+folder_name+"' does not exist." + sys.exit() # terminate script execution +elif not os.access(os.path.dirname(folder_name), os.W_OK): + #the file does exist but write privileges are not given + print "No write privileges in folder '"+folder_name+"'." + + +# CODE + +# remember which files have been opened this time +file_history = [] + +# get every XML file in folder +for file in os.listdir(folder_name): + if file.endswith(".xml"): + tree = ET.parse(folder_name + '/' + file) + root = tree.getroot() + + # get list of all page names + for audioholder in root.findall("./page"): # iterate over pages + page_name = audioholder.get('ref') # get page name + + if page_name is None: # ignore 'empty' audio_holders + print "WARNING: " + file + " contains empty page. (comment_parser.py)" + break + + if audioholder.get("state") != "complete": + print "WARNING: " + file + "test page " + page_name + " is not complete, skipping." + else: + # create folder [page_name] if not yet created + if not os.path.exists(folder_name + "/" + page_name): + os.makedirs(folder_name + "/" + page_name) + + # for page [page_name], print comments related to fragment [id] + for audioelement in audioholder.findall("./audioelement"): + if audioelement is not None: # Check it exists + audio_id = str(audioelement.get('ref')) + + csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv' + + # If file hasn't been opened yet this time, empty + if csv_name not in file_history: + csvfile = open(csv_name, 'w') + file_history.append(csv_name) # remember this file has been written to this time around + else: + # append (!) to file [page_name]/[page_name]-comments-[id].csv + csvfile = open(csv_name, 'a') + writer = csv.writer(csvfile, + delimiter=',', + dialect="excel", + quoting=csv.QUOTE_ALL) + commentstr = audioelement.find("./comment/response").text + + if commentstr is None: + commentstr = '' + + # anonymous comments: + #writer.writerow([commentstr.encode("utf-8")]) + # comments with (file) name: + writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) + + #TODO Replace 'new line' in comment with something else? + +# PRO TIP: Change from csv to txt by running this in bash: +# $ cd folder_where_csvs_are/ +# $ for i in *.csv; do mv "$i" "${i/.csv}".txt; done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/evaluation_stats.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,219 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import xml.etree.ElementTree as ET +import os # for getting files from directory +import operator # for sorting data with multiple keys +import sys # for accessing command line arguments + +# Command line arguments +assert len(sys.argv)<3, "evaluation_stats takes at most 1 command line argument\n"+\ + "Use: python evaluation_stats.py [results_folder]" + +# XML results files location +if len(sys.argv) == 1: + folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder + print "Use: python evaluation_stats.py [results_folder]" + print "Using default path: " + folder_name +elif len(sys.argv) == 2: + folder_name = sys.argv[1] # First command line argument is folder + +# Turn number of seconds (int) to '[minutes] min [seconds] s' (string) +def seconds2timestr(time_in_seconds): + time_in_minutes = int(time_in_seconds/60) + remaining_seconds = int(time_in_seconds%60) + return str(time_in_minutes) + " min " + str(remaining_seconds) + " s" + +# stats initialisation +number_of_XML_files = 0 +number_of_pages = 0 +number_of_fragments = 0 +total_empty_comments = 0 +total_not_played = 0 +total_not_moved = 0 +time_per_page_accum = 0 + +# arrays initialisation +page_names = [] +page_count = [] +duration_page = [] # duration of experiment in function of page content +duration_order = [] # duration of experiment in function of page number +fragments_per_page = [] # number of fragments for corresponding page + +# get every XML file in folder +files_list = os.listdir(folder_name) +for file in files_list: # iterate over all files in files_list + if file.endswith(".xml"): # check if XML file + number_of_XML_files += 1 + tree = ET.parse(folder_name + '/' + file) + root = tree.getroot() + + print file # print file name (subject name) + + # reset for new subject + total_duration = 0 + page_number = 0 + + # get list of all page names + for audioholder in root.findall("./page"): # iterate over pages + page_name = audioholder.get('ref') # get page name + + if page_name is None: # ignore 'empty' audio_holders + print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)" + break # move on to next + if audioholder.get("state") != "complete": + print "WARNING" + file + " contains incomplete audio holder." + break + number_of_comments = 0 # for this page + number_of_missing_comments = 0 # for this page + not_played = 0 # for this page + not_moved = 0 # for this page + + # 'testTime' keeps total duration: subtract time so far for duration of this audioholder + duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration + + # total duration of test + total_duration += duration + + # number of audio elements + audioelements = audioholder.findall("./audioelement") # get audioelements + number_of_fragments += len(audioelements) # add length of this list to total + + # number of comments (interesting if comments not mandatory) + for audioelement in audioelements: + if audioelement.get("type") != "outside-reference": + response = audioelement.find("./comment/response") + was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']") + was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']") + if response.text is not None and len(response.text) > 1: + number_of_comments += 1 + else: + number_of_missing_comments += 1 + if was_played is not None and was_played.text == 'false': + not_played += 1 + if was_moved is not None and was_moved.text == 'false': + not_moved += 1 + + # update global counters + total_empty_comments += number_of_missing_comments + total_not_played += not_played + total_not_moved += not_moved + + # print audioholder id and duration + print " " + page_name + ": " + seconds2timestr(duration) + ", "\ + + str(number_of_comments)+"/"\ + +str(number_of_comments+number_of_missing_comments)+" comments" + + # number of audio elements not played + if not_played > 1: + print 'ATTENTION: '+str(not_played)+' fragments were not listened to!' + if not_played == 1: + print 'ATTENTION: one fragment was not listened to!' + + # number of audio element markers not moved + if not_moved > 1: + print 'ATTENTION: '+str(not_moved)+' markers were not moved!' + if not_moved == 1: + print 'ATTENTION: one marker was not moved!' + + # keep track of duration in function of page index + if len(duration_order)>page_number: + duration_order[page_number].append(duration) + else: + duration_order.append([duration]) + + # keep list of audioholder ids and count how many times each audioholder id + # was tested, how long it took, and how many fragments there were (if number of + # fragments is different, store as different audioholder id) + if page_name in page_names: + page_index = page_names.index(page_name) # get index + # check if number of audioelements the same + if len(audioelements) == fragments_per_page[page_index]: + page_count[page_index] += 1 + duration_page[page_index].append(duration) + else: # make new entry + alt_page_name = page_name+"("+str(len(audioelements))+")" + if alt_page_name in page_names: # if already there + alt_page_index = page_names.index(alt_page_name) # get index + page_count[alt_page_index] += 1 + duration_page[alt_page_index].append(duration) + else: + page_names.append(alt_page_name) + page_count.append(1) + duration_page.append([duration]) + fragments_per_page.append(len(audioelements)) + else: + page_names.append(page_name) + page_count.append(1) + duration_page.append([duration]) + fragments_per_page.append(len(audioelements)) + + # bookkeeping + page_number += 1 # increase page count for this specific test + number_of_pages += 1 # increase total number of pages + time_per_page_accum += duration # total duration (for average time spent per page) + + # print total duration of this test + print " TOTAL: " + seconds2timestr(total_duration) + + +# PRINT EVERYTHING + +print "Number of XML files: " + str(number_of_XML_files) +print "Number of pages: " + str(number_of_pages) +print "Number of fragments: " + str(number_of_fragments) +print "Number of empty comments: " + str(total_empty_comments) +\ + " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + "%)" +print "Number of unplayed fragments: " + str(total_not_played) +\ + " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + "%)" +print "Number of unmoved markers: " + str(total_not_moved) +\ + " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + "%)" +print "Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages) + +# Pages and number of times tested +page_count_strings = list(str(x) for x in page_count) +count_list = page_names + page_count_strings +count_list[::2] = page_names +count_list[1::2] = page_count_strings +print "Pages tested: " + str(count_list) + +# Average duration for first, second, ... page +print "Average duration per page:" +for page_number in range(len(duration_order)): + print " page " + str(page_number+1) + ": " +\ + seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\ + " ("+str(len(duration_order[page_number]))+" subjects)" + + +# Sort pages by number of audioelements, then by duration + +# average duration and number of subjects per page +average_duration_page = [] +number_of_subjects_page = [] +for line in duration_page: + number_of_subjects_page.append(len(line)) + average_duration_page.append(sum(line)/len(line)) + +# combine and sort in function of number of audioelements and duration +combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page] +combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort + +# Show average duration for all songs +print "Average duration per audioholder:" +for page_index in range(len(page_names)): + print " "+combined_list[page_index][0] + ": " \ + + seconds2timestr(combined_list[page_index][1]) \ + + " (" + str(combined_list[page_index][3]) + " subjects, " \ + + str(combined_list[page_index][2]) + " fragments)" + + +#TODO +# time per page in function of number of fragments (plot) +# time per participant in function of number of pages +# plot total time for each participant +# plot total time +# show 'count' per page (in order) + +# clear up page_index <> page_count <> page_number confusion + +# LaTeX -> PDF print out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/generate_report.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,531 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import xml.etree.ElementTree as ET +import os # for getting files from directory +import operator # for sorting data with multiple keys +import sys # for accessing command line arguments +import subprocess # for calling pdflatex +import shlex # for calling pdflatex +import matplotlib.pyplot as plt # plots +import numpy as np # numbers + +# Command line arguments +assert len(sys.argv)<4, "generate_report takes at most 2 command line arguments\n"+\ + "Use: python generate_report.py [results_folder] [no_render | -nr]" + +render_figures = True + +# XML results files location +if len(sys.argv) == 1: + folder_name = "../saves/" # Looks in 'saves/' folder from 'scripts/' folder + print "Use: python generate_report.py [results_folder] [no_render | -nr]" + print "Using default path: " + folder_name +elif len(sys.argv) == 2: + folder_name = sys.argv[1] # First command line argument is folder +elif len(sys.argv) == 3: + folder_name = sys.argv[1] # First command line argument is folder + assert sys.argv[2] in ('no_render','-nr'), "Second argument not recognised. \n" +\ + "Use: python generate_report.py [results_folder] [no_render | -nr]" + # Second command line argument is [no_render | -nr] + render_figures = False + +def isNaN(num): + return num != num + +# Turn number of seconds (int) to '[minutes] min [seconds] s' (string) +def seconds2timestr(time_in_seconds): + if time_in_seconds is not None and not isNaN(time_in_seconds): + time_in_minutes = int(time_in_seconds/60) + remaining_seconds = int(time_in_seconds%60) + return str(time_in_minutes) + " min " + str(remaining_seconds) + " s" + else: + return 'N/A' + +# stats initialisation +number_of_XML_files = 0 +number_of_pages = 0 +number_of_fragments = 0 +total_empty_comments = 0 +total_not_played = 0 +total_not_moved = 0 +time_per_page_accum = 0 + +# arrays initialisation +page_names = [] +real_page_names = [] # regardless of differing numbers of fragments +subject_count = [] # subjects per audioholder name +page_count = [] +duration_page = [] # duration of experiment in function of page content +duration_order = [] # duration of experiment in function of page number +fragments_per_page = [] # number of fragments for corresponding page + +# survey stats +gender = [] +age = [] + +# get username if available +for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'): + user = os.environ.get(name) + if user: + break + else: + user = '' + + +# begin LaTeX document +header = r'''\documentclass[11pt, oneside]{article} + \usepackage{geometry} + \geometry{a4paper} + \usepackage[parfill]{parskip} % empty line instead of indent + \usepackage{graphicx} % figures + \usepackage[space]{grffile} % include figures with spaces in paths + \usepackage{hyperref} + \usepackage{tikz} % pie charts + \title{Report} + \author{'''+\ + user+\ + r'''} + \graphicspath{{'''+\ + folder_name+\ + r'''}} + %\setcounter{section}{-1} % Summary section 0 so number of sections equals number of files + \begin{document} + \maketitle + This is an automatically generated report using the `generate\_report.py' Python script + included with the Web Audio Evaluation Tool \cite{WAET} distribution which can be found + at \texttt{code.soundsoftware.ac.uk/projects/webaudioevaluationtool}. + \tableofcontents + + ''' + +footer = '\n\t\t'+r'''\begin{thebibliography}{9} + \bibitem{WAET} % reference to accompanying publication + Nicholas Jillings, Brecht De Man, David Moffat and Joshua D. Reiss, + ``Web Audio Evaluation Tool: A browser-based listening test environment,'' + presented at the 12th Sound and Music Computing Conference, July 2015. + \end{thebibliography} + \end{document}''' + +body = '' + +# make sure folder_name ends in '/' +folder_name = os.path.join(folder_name, '') + +# generate images for later use +if render_figures: + subprocess.call("python timeline_view_movement.py '"+folder_name+"'", shell=True) + subprocess.call("python score_parser.py '"+folder_name+"'", shell=True) + subprocess.call("python score_plot.py '"+folder_name+"ratings/'", shell=True) + +# get every XML file in folder +files_list = os.listdir(folder_name) +for file in files_list: # iterate over all files in files_list + if file.endswith(".xml"): # check if XML file + number_of_XML_files += 1 + tree = ET.parse(folder_name + file) + root = tree.getroot() + + # PRINT name as section + body+= '\n\section{'+file[:-4].capitalize()+'}\n' # make section header from name without extension + + # reset for new subject + total_duration = 0 + page_number = 0 + + individual_table = '\n' # table with stats for this individual test file + timeline_plots = '' # plots of timeline (movements and plays) + + # DEMO survey stats + # get gender + this_subjects_gender = root.find("./posttest/radio/[@id='gender']") + if this_subjects_gender is not None: + gender.append(this_subjects_gender.get("name")) + else: + gender.append('UNAVAILABLE') + # get age + this_subjects_age = root.find("./posttest/number/[@id='age']") + if this_subjects_age is not None: + age.append(this_subjects_age.text) + #TODO add plot of age + + # get list of all page names + for audioholder in root.findall("./page"): # iterate over pages + page_name = audioholder.get('id') # get page name + + if page_name is None: # ignore 'empty' audio_holders + print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)" + break # move on to next + + number_of_comments = 0 # for this page + number_of_missing_comments = 0 # for this page + not_played = [] # for this page + not_moved = [] # for this page + + if audioholder.find("./metric/metricresult[@id='testTime']") is not None: # check if time is included + # 'testTime' keeps total duration: subtract time so far for duration of this audioholder + duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration + + # total duration of test + total_duration += duration + else: + duration = float('nan') + total_duration = float('nan') + + # number of audio elements + audioelements = audioholder.findall("./audioelement") # get audioelements + number_of_fragments += len(audioelements) # add length of this list to total + + # number of comments (interesting if comments not mandatory) + for audioelement in audioelements: + response = audioelement.find("./comment/response") + was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']") + was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']") + if response.text is not None and len(response.text) > 1: + number_of_comments += 1 + else: + number_of_missing_comments += 1 + if was_played is not None and was_played.text == 'false': + not_played.append(audioelement.get('id')) + if was_moved is not None and was_moved.text == 'false': + not_moved.append(audioelement.get('id')) + + # update global counters + total_empty_comments += number_of_missing_comments + total_not_played += len(not_played) + total_not_moved += len(not_moved) + + # PRINT alerts when elements not played or markers not moved + # number of audio elements not played + if len(not_played) > 1: + body += '\t\t\\emph{\\textbf{ATTENTION: '+str(len(not_played))+\ + ' fragments were not listened to in '+page_name+'! }}'+\ + ', '.join(not_played)+'\\\\ \n' + if len(not_played) == 1: + body += '\t\t\\emph{\\textbf{ATTENTION: one fragment was not listened to in '+page_name+'! }}'+\ + not_played[0]+'\\\\ \n' + + # number of audio element markers not moved + if len(not_moved) > 1: + body += '\t\t\\emph{\\textbf{ATTENTION: '+str(len(not_moved))+\ + ' markers were not moved in '+page_name+'! }}'+\ + ', '.join(not_moved)+'\\\\ \n' + if len(not_moved) == 1: + body += '\t\t\\emph{\\textbf{ATTENTION: one marker was not moved in '+page_name+'! }}'+\ + not_moved[0]+'\\\\ \n' + + # PRINT song-specific statistic + individual_table += '\t\t'+page_name+'&'+\ + str(number_of_comments) + '/' +\ + str(number_of_comments+number_of_missing_comments)+'&'+\ + seconds2timestr(duration)+'\\\\\n' + + # get timeline for this audioholder + img_path = 'timelines_movement/'+file[:-4]+'-'+page_name+'.pdf' + + # check if available + if os.path.isfile(folder_name+img_path): + # SHOW timeline image + timeline_plots += '\\includegraphics[width=\\textwidth]{'+\ + folder_name+img_path+'}\n\t\t' + + # keep track of duration in function of page index + if len(duration_order)>page_number: + duration_order[page_number].append(duration) + else: + duration_order.append([duration]) + + # keep list of audioholder ids and count how many times each audioholder id + # was tested, how long it took, and how many fragments there were + # (if number of fragments is different, store as different audioholder id) + if page_name in page_names: + page_index = page_names.index(page_name) # get index + # check if number of audioelements the same + if len(audioelements) == fragments_per_page[page_index]: + page_count[page_index] += 1 + duration_page[page_index].append(duration) + else: # make new entry + alt_page_name = page_name+"("+str(len(audioelements))+")" + if alt_page_name in page_names: # if already there + alt_page_index = page_names.index(alt_page_name) # get index + page_count[alt_page_index] += 1 + duration_page[alt_page_index].append(duration) + else: + page_names.append(alt_page_name) + page_count.append(1) + duration_page.append([duration]) + fragments_per_page.append(len(audioelements)) + else: + page_names.append(page_name) + page_count.append(1) + duration_page.append([duration]) + fragments_per_page.append(len(audioelements)) + + # number of subjects per audioholder regardless of differing numbers of + # fragments (for inclusion in box plots) + if page_name in real_page_names: + page_index = real_page_names.index(page_name) # get index + subject_count[page_index] += 1 + else: + real_page_names.append(page_name) + subject_count.append(1) + + # bookkeeping + page_number += 1 # increase page count for this specific test + number_of_pages += 1 # increase total number of pages + time_per_page_accum += duration # total duration (for average time spent per page) + + # PRINT table with statistics about this test + body += '\t\t'+r'''\begin{tabular}{|p{3.5cm}|c|p{2.5cm}|} + \hline + \textbf{Song name} & \textbf{Comments} & \textbf{Duration} \\ \hline '''+\ + individual_table+'\t\t'+\ + r'''\hline + \textbf{TOTAL} & & \textbf{'''+\ + seconds2timestr(total_duration)+\ + r'''}\\ + \hline + \end{tabular} + + ''' + # PRINT timeline plots + body += timeline_plots + +# join to footer +footer = body + footer + +# empty body again +body = '' + +# PRINT summary of everything (at start) +# unnumbered so that number of sections equals number of files +body += '\section*{Summary}\n\t\t\\addcontentsline{toc}{section}{Summary}\n' + +# PRINT table with statistics +body += '\t\t\\begin{tabular}{ll}\n\t\t\t' +body += r'Number of XML files: &' + str(number_of_XML_files) + r'\\'+'\n\t\t\t' +body += r'Number of pages: &' + str(number_of_pages) + r'\\'+'\n\t\t\t' +body += r'Number of fragments: &' + str(number_of_fragments) + r'\\'+'\n\t\t\t' +body += r'Number of empty comments: &' + str(total_empty_comments) +\ + " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t' +body += r'Number of unplayed fragments: &' + str(total_not_played) +\ + " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t' +body += r'Number of unmoved markers: &' + str(total_not_moved) +\ + " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t' +body += r'Average time per page: &' + seconds2timestr(time_per_page_accum/number_of_pages) + r"\\"+'\n\t\t' +body += '\\end{tabular} \\vspace{1.5cm} \\\\ \n' + +# Average duration for first, second, ... page +body += "\t\t\\vspace{.5cm} \n\n\t\tAverage duration per page (see also Figure \\ref{fig:avgtimeperpage}): \\\\ \n\t\t" +body += r'''\begin{tabular}{lll} + \textbf{Page} & \textbf{Duration} & \textbf{\# subjects}\\''' +tpp_averages = [] # store average time per page +for page_number in range(len(duration_order)): + body += '\n\t\t\t'+str(page_number+1) + "&" +\ + seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\ + "&"+str(len(duration_order[page_number]))+r"\\" + tpp_averages.append(sum(duration_order[page_number])/len(duration_order[page_number])) + +body += '\n\t\t\\end{tabular} \\vspace{1.5cm} \\\\ \n\n\t\t' + +# SHOW bar plot of average time per page +plt.bar(range(1,len(duration_order)+1), np.array(tpp_averages)/60) +plt.xlabel('Page order') +plt.xlim(.8, len(duration_order)+1) +plt.xticks(np.arange(1,len(duration_order)+1)+.4, range(1,len(duration_order)+1)) +plt.ylabel('Average time [minutes]') +plt.savefig(folder_name+"time_per_page.pdf", bbox_inches='tight') +plt.close() +#TODO add error bars + + +# Sort pages by number of audioelements, then by duration + +# average duration and number of subjects per page +average_duration_page = [] +number_of_subjects_page = [] +for line in duration_page: + number_of_subjects_page.append(len(line)) + average_duration_page.append(sum(line)/len(line)) + +# combine and sort in function of number of audioelements and duration +combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page] +combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort + +# Show average duration for all songs +body += r'''\vspace{.5cm} + Average duration per audioholder (see also Figure \ref{fig:avgtimeperaudioholder}): \\ + \begin{tabular}{llll} + \textbf{Audioholder} & \textbf{Duration} & \textbf{\# subjects} & \textbf{\# fragments} \\''' +audioholder_names_ordered = [] +average_duration_audioholder_ordered = [] +number_of_subjects = [] +for page_index in range(len(page_names)): + audioholder_names_ordered.append(combined_list[page_index][0]) + average_duration_audioholder_ordered.append(combined_list[page_index][1]) + number_of_subjects.append(combined_list[page_index][3]) + body += '\n\t\t\t'+combined_list[page_index][0] + "&" +\ + seconds2timestr(combined_list[page_index][1]) + "&" +\ + str(combined_list[page_index][3]) + "&" +\ + str(combined_list[page_index][2]) + r"\\" +body += '\n\t\t\\end{tabular}\n' + +# SHOW bar plot of average time per page +plt.bar(range(1,len(audioholder_names_ordered)+1), np.array(average_duration_audioholder_ordered)/60) +plt.xlabel('Audioholder') +plt.xlim(.8, len(audioholder_names_ordered)+1) +plt.xticks(np.arange(1,len(audioholder_names_ordered)+1)+.4, audioholder_names_ordered, rotation=90) +plt.ylabel('Average time [minutes]') +plt.savefig(folder_name+"time_per_audioholder.pdf", bbox_inches='tight') +plt.close() + +# SHOW bar plot of average time per page +plt.bar(range(1,len(audioholder_names_ordered)+1), number_of_subjects) +plt.xlabel('Audioholder') +plt.xlim(.8, len(audioholder_names_ordered)+1) +plt.xticks(np.arange(1,len(audioholder_names_ordered)+1)+.4, audioholder_names_ordered, rotation=90) +plt.ylabel('Number of subjects') +ax = plt.gca() +ylims = ax.get_ylim() +yint = np.arange(int(np.floor(ylims[0])), int(np.ceil(ylims[1]))+1) +plt.yticks(yint) +plt.savefig(folder_name+"subjects_per_audioholder.pdf", bbox_inches='tight') +plt.close() + +# SHOW both figures +body += r''' + \begin{figure}[htbp] + \begin{center} + \includegraphics[width=.65\textwidth]{'''+\ + folder_name+'time_per_page.pdf'+\ + r'''} + \caption{Average time spent per page.} + \label{fig:avgtimeperpage} + \end{center} + \end{figure} + + ''' +body += r'''\begin{figure}[htbp] + \begin{center} + \includegraphics[width=.65\textwidth]{'''+\ + folder_name+'time_per_audioholder.pdf'+\ + r'''} + \caption{Average time spent per audioholder.} + \label{fig:avgtimeperaudioholder} + \end{center} + \end{figure} + + ''' +body += r'''\begin{figure}[htbp] + \begin{center} + \includegraphics[width=.65\textwidth]{'''+\ + folder_name+'subjects_per_audioholder.pdf'+\ + r'''} + \caption{Number of subjects per audioholder.} + \label{fig:subjectsperaudioholder} + \end{center} + \end{figure} + + ''' +#TODO add error bars +#TODO layout of figures + +# SHOW boxplot per audioholder +#TODO order in decreasing order of participants +for audioholder_name in page_names: # get each name + # plot boxplot if exists (not so for the 'alt' names) + if os.path.isfile(folder_name+'ratings/'+audioholder_name+'-ratings-box.pdf'): + body += r'''\begin{figure}[htbp] + \begin{center} + \includegraphics[width=.65\textwidth]{'''+\ + folder_name+"ratings/"+audioholder_name+'-ratings-box.pdf'+\ + r'''} + \caption{Box plot of ratings for audioholder '''+\ + audioholder_name+' ('+str(subject_count[real_page_names.index(audioholder_name)])+\ + ''' participants).} + \label{fig:boxplot'''+audioholder_name.replace(" ", "")+'''} + \end{center} + \end{figure} + + ''' + +# DEMO pie chart of gender distribution among subjects +genders = ['male', 'female', 'other', 'preferNotToSay', 'UNAVAILABLE'] +# TODO: get the above automatically +gender_distribution = '' +for item in genders: + number = gender.count(item) + if number>0: + gender_distribution += str("{:.2f}".format((100.0*number)/len(gender)))+\ + '/'+item.capitalize()+' ('+str(number)+'),\n' + +body += r''' + % Pie chart of gender distribution + \def\angle{0} + \def\radius{3} + \def\cyclelist{{"orange","blue","red","green"}} + \newcount\cyclecount \cyclecount=-1 + \newcount\ind \ind=-1 + \begin{figure}[htbp] + \begin{center}\begin{tikzpicture}[nodes = {font=\sffamily}] + \foreach \percent/\name in {'''+\ + gender_distribution+\ + r'''} {\ifx\percent\empty\else % If \percent is empty, do nothing + \global\advance\cyclecount by 1 % Advance cyclecount + \global\advance\ind by 1 % Advance list index + \ifnum6<\cyclecount % If cyclecount is larger than list + \global\cyclecount=0 % reset cyclecount and + \global\ind=0 % reset list index + \fi + \pgfmathparse{\cyclelist[\the\ind]} % Get color from cycle list + \edef\color{\pgfmathresult} % and store as \color + % Draw angle and set labels + \draw[fill={\color!50},draw={\color}] (0,0) -- (\angle:\radius) + arc (\angle:\angle+\percent*3.6:\radius) -- cycle; + \node at (\angle+0.5*\percent*3.6:0.7*\radius) {\percent\,\%}; + \node[pin=\angle+0.5*\percent*3.6:\name] + at (\angle+0.5*\percent*3.6:\radius) {}; + \pgfmathparse{\angle+\percent*3.6} % Advance angle + \xdef\angle{\pgfmathresult} % and store in \angle + \fi + }; + \end{tikzpicture} + \caption{Representation of gender across subjects} + \label{default} + \end{center} + \end{figure} + + ''' +# problem: some people entered twice? + +#TODO +# time per page in function of number of fragments (plot) +# time per participant in function of number of pages +# plot total time for each participant +# show 'count' per page (in order) + +# clear up page_index <> page_count <> page_number confusion + + +texfile = header+body+footer # add bits together + +print 'pdflatex -output-directory="'+folder_name+'"" "'+ folder_name + 'Report.tex"' # DEBUG + +# write TeX file +with open(folder_name + 'Report.tex','w') as f: + f.write(texfile) +proc=subprocess.Popen(shlex.split('pdflatex -output-directory="'+folder_name+'" "'+ folder_name + 'Report.tex"')) +proc.communicate() +# run again +proc=subprocess.Popen(shlex.split('pdflatex -output-directory="'+folder_name+'" "'+ folder_name + 'Report.tex"')) +proc.communicate() + +#TODO remove auxiliary LaTeX files +try: + os.remove(folder_name + 'Report.aux') + os.remove(folder_name + 'Report.log') + os.remove(folder_name + 'Report.out') + os.remove(folder_name + 'Report.toc') +except OSError: + pass +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/pythonServer.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,212 @@ +#!/usr/bin/python + +# Detect the Python version to switch code between 2.x and 3.x +# http://stackoverflow.com/questions/9079036/detect-python-version-at-runtime +import sys + +from os import walk +from os import path +from os import listdir +import inspect +import os +import pickle +import datetime + +if sys.version_info[0] == 2: + # Version 2.x + import BaseHTTPServer + import urllib2 + import urlparse +elif sys.version_info[0] == 3: + # Version 3.x + from http.server import BaseHTTPRequestHandler, HTTPServer + import urllib as urllib2 + +# Go to right folder. +scriptdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # script directory +os.chdir(scriptdir) # does this work? + +PSEUDO_PATH = '../tests/' +pseudo_files = [] +for filename in listdir(PSEUDO_PATH): + print(filename) + if filename.endswith('.xml'): + pseudo_files.append(filename) + +curSaveIndex = 0; +curFileName = 'test-0.xml' +while(path.isfile('../saves/'+curFileName)): + curSaveIndex += 1; + curFileName = 'test-'+str(curSaveIndex)+'.xml' + +if len(pseudo_files) > 0: + pseudo_index = curSaveIndex % len(pseudo_files) +else: + pseudo_index = 0 + +print('URL: http://localhost:8000/index.html') + +def send404(s): + s.send_response(404) + s.send_header("Content-type", "text/html") + s.end_headers() + +def processFile(s): + if sys.version_info[0] == 2: + s.path = s.path.rsplit('?') + s.path = s.path[0] + s.path = s.path[1:len(s.path)] + st = s.path.rsplit(',') + lenSt = len(st) + fmt = st[lenSt-1].rsplit('.') + fpath = "../"+urllib2.unquote(s.path) + size = path.getsize(fpath) + fileDump = open(fpath) + s.send_response(200) + + if (fmt[1] == 'html'): + s.send_header("Content-type", 'text/html') + elif (fmt[1] == 'css'): + s.send_header("Content-type", 'text/css') + elif (fmt[1] == 'js'): + s.send_header("Content-type", 'application/javascript') + else: + s.send_header("Content-type", 'application/octet-stream') + s.send_header("Content-Length", size) + s.end_headers() + s.wfile.write(fileDump.read()) + fileDump.close() + elif sys.version_info[0] == 3: + s.path = s.path.rsplit('?') + s.path = s.path[0] + s.path = s.path[1:len(s.path)] + st = s.path.rsplit(',') + lenSt = len(st) + fmt = st[lenSt-1].rsplit('.') + fpath = "../"+urllib2.parse.unquote(s.path) + s.send_response(200) + if (fmt[1] == 'html'): + s.send_header("Content-type", 'text/html') + fileDump = open(fpath, encoding='utf-8') + fileBytes = bytes(fileDump.read(), "utf-8") + fileDump.close() + elif (fmt[1] == 'css'): + s.send_header("Content-type", 'text/css') + fileDump = open(fpath, encoding='utf-8') + fileBytes = bytes(fileDump.read(), "utf-8") + fileDump.close() + elif (fmt[1] == 'js'): + s.send_header("Content-type", 'application/javascript') + fileDump = open(fpath, encoding='utf-8') + fileBytes = bytes(fileDump.read(), "utf-8") + fileDump.close() + else: + s.send_header("Content-type", 'application/octet-stream') + fileDump = open(fpath, 'rb') + fileBytes = fileDump.read() + fileDump.close() + s.send_header("Content-Length", len(fileBytes)) + s.end_headers() + s.wfile.write(fileBytes) + +def keygen(s): + reply = "" + options = s.path.rsplit('?') + options = options[1].rsplit('=') + key = options[1] + print("Registered key "+key) + if os.path.isfile("saves/save-"+key+".xml"): + reply = "<response><state>NO</state><key>"+key+"</key></response>" + else: + reply = "<response><state>OK</state><key>"+key+"</key></response>" + s.send_response(200) + s.send_header("Content-type", "application/xml") + s.end_headers() + s.wfile.write(reply) + file = open("../saves/save-"+key+".xml",'w') + file.write("<waetresult key="+key+"/>") + file.close(); + +def saveFile(self): + global curFileName + global curSaveIndex + options = self.path.rsplit('?') + options = options[1].rsplit('=') + key = options[1] + varLen = int(self.headers['Content-Length']) + postVars = self.rfile.read(varLen) + print("Saving file key "+key) + file = open('../saves/save-'+key+'.xml','w') + file.write(postVars) + file.close() + try: + wbytes = os.path.getsize('../saves/save-'+key+'.xml') + except OSError: + self.send_response(200) + self.send_header("Content-type", "text/xml") + self.end_headers() + self.wfile.write('<response state="error"><message>Could not open file</message></response>') + self.send_response(200) + self.send_header("Content-type", "text/xml") + self.end_headers() + self.wfile.write('<response state="OK"><message>OK</message><file bytes="'+str(wbytes)+'">"saves/'+curFileName+'"</file></response>') + curSaveIndex += 1 + curFileName = 'test-'+str(curSaveIndex)+'.xml' + +def http_do_HEAD(s): + s.send_response(200) + s.send_header("Content-type", "text/html") + s.end_headers() + +def http_do_GET(request): + if(request.client_address[0] == "127.0.0.1"): + if (request.path == "/favicon.ico"): + send404(request) + elif (request.path.split('?',1)[0] == "/php/keygen.php"): + keygen(request); + else: + request.path = request.path.split('?',1)[0] + if (request.path == '/'): + request.path = '/index.html' + elif (request.path == '/pseudo.xml'): + request.path = '/'+PSEUDO_PATH + pseudo_files[pseudo_index] + print(request.path) + pseudo_index += 1 + pseudo_index %= len(pseudo_files) + processFile(request) + else: + send404(request) + +def http_do_POST(request): + if(request.client_address[0] == "127.0.0.1"): + if (request.path.rsplit('?',1)[0] == "/save" or request.path.rsplit('?',1)[0] == "/php/save.php"): + saveFile(request) + else: + send404(request) + +if sys.version_info[0] == 2: + class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler): + def do_HEAD(s): + http_do_HEAD(s) + def do_GET(request): + http_do_GET(request) + def do_POST(request): + http_do_POST(request) + def run(server_class=BaseHTTPServer.HTTPServer,handler_class=MyHandler): + server_address = ('', 8000) + httpd = server_class(server_address, handler_class) + httpd.serve_forever() + run() +elif sys.version_info[0] == 3: + class MyHandler(BaseHTTPRequestHandler): + def do_HEAD(s): + send404(s) + def do_GET(request): + http_do_GET(request) + def do_POST(request): + http_do_POST(request) + def run(server_class=HTTPServer,handler_class=MyHandler): + server_address = ('', 8000) + httpd = server_class(server_address, handler_class) + httpd.serve_forever() + run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/score_parser.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,136 @@ +#!/usr/bin/python + +import xml.etree.ElementTree as ET +import os +import sys +import csv + +# COMMAND LINE ARGUMENTS + +assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\ + "Use: python score_parser.py [rating_folder_location]" + +# XML results files location +if len(sys.argv) == 1: + folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder + print "Use: python score_parser.py [rating_folder_location]" + print "Using default path: " + folder_name +elif len(sys.argv) == 2: + folder_name = sys.argv[1] # First command line argument is folder + +# check if folder_name exists +if not os.path.exists(folder_name): + #the file is not there + print "Folder '"+folder_name+"' does not exist." + sys.exit() # terminate script execution +elif not os.access(os.path.dirname(folder_name), os.W_OK): + #the file does exist but write privileges are not given + print "No write privileges in folder '"+folder_name+"'." + + +# CODE + +# remember which files have been opened this time +file_history = [] + +# get every XML file in folder +for file_name in os.listdir(folder_name): + if file_name.endswith(".xml"): + tree = ET.parse(folder_name + '/' + file_name) + root = tree.getroot() + + # get subject ID from XML file + subject_id = file_name[:-4] # file name (without extension) as subject ID + + # get list of all pages this subject evaluated + for page in root.findall("./page"): # iterate over pages + page_name = page.get('ref') # get page reference ID + + if page_name is None: # ignore 'empty' audio_holders + print "WARNING: " + file_name + " contains empty audio holder. (score_parser.py)" + break + + if page.get('state') != "complete": + print "WARNING:" + file_name + " contains incomplete page " +page_name+ ". (score_parser.py)" + break; + + file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name + + # create folder 'ratings' if not yet created + if not os.path.exists(folder_name + '/ratings'): + os.makedirs(folder_name + '/ratings') + + # header: fragment IDs in 'alphabetical' order + # go to fragment column, or create new column if it doesn't exist yet + + # get array of audio elements and number of audio elements + audiolist = page.findall("./audioelement") + n_fragments = len(audiolist) + + # get alphabetical array of fragment IDs from this subject's XML + fragmentnamelist = [] # make empty list + for audioelement in audiolist: # iterate over all audioelements + fragmentnamelist.append(audioelement.get('ref')) # add to list + + + # if file exists, get header and add any 'new' fragments not yet in the header + if os.path.isfile(file_name): + with open(file_name, 'r') as readfile: + filereader = csv.reader(readfile, delimiter=',') + headerrow = filereader.next() + + # If file hasn't been opened yet this time, remove all rows except header + if file_name not in file_history: + with open(file_name, 'w') as writefile: + filewriter = csv.writer(writefile, delimiter=',') + headerrow = sorted(headerrow) + filewriter.writerow(headerrow) + file_history.append(file_name) + + # Which of the fragments are in fragmentnamelist but not in headerrow? + newfragments = list(set(fragmentnamelist)-set(headerrow)) + newfragments = sorted(newfragments) # new fragments in alphabetical order + # If not empty, read file and rewrite adding extra columns + if newfragments: # if not empty + with open('temp.csv', 'w') as writefile: + filewriter = csv.writer(writefile, delimiter=',') + filewriter.writerow(headerrow + newfragments) # write new header + with open(file_name, 'r') as readfile: + filereader = csv.reader(readfile, delimiter=',') + filereader.next() # skip header + for row in filereader: # rewrite row plus empty cells for every new fragment name + filewriter.writerow(row + ['']*len(newfragments)) + os.rename('temp.csv', file_name) # replace old file with temp file + headerrow = headerrow + newfragments + + + # if file does not exist yet, create file and make header + else: + headerrow = sorted(fragmentnamelist) # sort alphabetically + headerrow.insert(0,'') + fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist + with open(file_name, 'w') as writefile: + filewriter = csv.writer(writefile, delimiter=',') + filewriter.writerow(headerrow) + file_history.append(file_name) + + # open file to write for this page + writefile = open(file_name, 'a') + filewriter = csv.writer(writefile, delimiter=',') + + # prepare row to be written for this subject for this page + ratingrow = [subject_id] + + # get scores related to fragment [id] + for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column) + elementvalue = page.find("./audioelement/[@ref='" + + fragmentname + + "']/value") + if hasattr(elementvalue, 'text'): # if rating for this fragment exists + ratingrow.append(elementvalue.text) # add to rating row + else: # if this subject has not rated this fragment + ratingrow.append('') # append empty cell + + # write row: [subject ID, rating fragment ID 1, ..., rating fragment ID M] + if any(ratingrow[1:]): # append to file if row non-empty (except subject name) + filewriter.writerow(ratingrow)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/score_plot.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,233 @@ +#!/usr/bin/python + +import sys +import os +import csv +import matplotlib.pyplot as plt +import numpy as np +import scipy as sp +import scipy.stats + +# COMMAND LINE ARGUMENTS + +#TODO: Merge, implement this functionality +#TODO: Control by CLI arguments (plot types, save and/or show, ...) + +assert len(sys.argv)<4, "score_plot takes at most 2 command line arguments\n"+\ + "Use: python score_plot.py [ratings_folder_location]."+\ + "Type 'python score_plot.py -h' for more options" + +# initialise plot types (false by default) and options +enable_boxplot = False # show box plot +enable_confidence = False # show confidence interval +confidence = 0.90 # confidence value (for confidence interval plot) +enable_individual = False # show all individual ratings +show_individual = [] # show specific individuals (empty: show all individuals found) +show_legend = False # show names of individuals + +# DEFAULT: Looks in 'saves/ratings/' folder from 'scripts/' folder +rating_folder = "../saves/ratings/" + +# XML results files location +if len(sys.argv) == 1: # no extra arguments + enable_boxplot = True # show box plot + print "Use: python score_plot.py [rating folder] [plot_type] [-l/-legend]" + print "Type 'python score_plot.py -h' for help." + print "Using default path: " + rating_folder + " with boxplot." +else: + for arg in sys.argv: # go over all arguments + if arg == '-h': + # show help + #TODO: replace with contents of helpfile score_plot.info (or similar) + print "Use: python score_plot.py [rating_folder] [plot_type] [-l] [confidence]" + print " rating_folder:" + print " folder where output of 'score_parser' can be found, and" + print " where plots will be stored." + print " By default, '../saves/ratings/' is used." + print "" + print "PLOT TYPES" + print " Can be used in combination." + print " box | boxplot | -b" + print " Enables the boxplot" + print " conf | confidence | -c" + print " Enables the confidence interval plot" + print " ind | individual | -i" + print " Enables plot of individual ratings" + print "" + print "PLOT OPTIONS" + print " leg | legend | -l" + print " For individual plot: show legend with individual file names" + print " numeric value between 0 and 1, e.g. 0.95" + print " For confidence interval plot: confidence value" + assert False, ""# stop immediately after showing help #TODO cleaner way + + # PLOT TYPES + elif arg == 'box' or arg == 'boxplot' or arg == '-b': + enable_boxplot = True # show box plot + elif arg == 'conf' or arg == 'confidence' or arg == '-c': + enable_confidence = True # show confidence interval + #TODO add confidence value input + elif arg == 'ind' or arg == 'individual' or arg == '-i': + enable_individual = True # show all individual ratings + + # PLOT OPTIONS + elif arg == 'leg' or arg == 'legend' or arg == '-l': + if not enable_individual: + print "WARNING: The 'legend' option is only relevant to plots of "+\ + "individual ratings" + show_legend = True # show all individual ratings + elif arg.isdigit(): + if not enable_confidence: + print "WARNING: The numeric confidence value is only relevant when "+\ + "confidence plot is enabled" + if float(arg)>0 and float(arg)<1: + confidence = float(arg) + else: + print "WARNING: The confidence value needs to be between 0 and 1" + + # FOLDER NAME + else: + # assume it's the folder name + rating_folder = arg + +# at least one plot type should be selected: box plot by default +if not enable_boxplot and not enable_confidence and not enable_individual: + enable_boxplot = True + +# check if folder_name exists +if not os.path.exists(rating_folder): + #the file is not there + print "Folder '"+rating_folder+"' does not exist." + sys.exit() # terminate script execution +elif not os.access(os.path.dirname(rating_folder), os.W_OK): + #the file does exist but write rating_folder are not given + print "No write privileges in folder '"+rating_folder+"'." + + +# CONFIGURATION + +# Font settings +font = {'weight' : 'bold', + 'size' : 10} +plt.rc('font', **font) + + +# CODE + +# get every csv file in folder +for file in os.listdir(rating_folder): + if file.endswith(".csv"): + page_name = file[:-4] # file name (without extension) is page ID + + # get header + with open(rating_folder+file, 'rb') as readfile: # read this csv file + filereader = csv.reader(readfile, delimiter=',') + headerrow = filereader.next() # use headerrow as X-axis + headerrow = headerrow[1:] + + # read ratings into matrix +# ratings = np.loadtxt(open(rating_folder+file,"rb"), +# delimiter=",", +# skiprows=1, +# usecols=range(1,len(headerrow)+1) +# ) + ratings = np.genfromtxt(readfile, + delimiter=",", + #skip_header = 1, + converters = {3: lambda s: float(s or 'Nan')}, + usecols=range(1,len(headerrow)+1) + ) + + # assert at least 2 subjects (move on to next file if violated) + if ratings.shape[0]<2: + print "WARNING: Just one subject for " + page_name + ". Moving on to next file." + break + + # BOXPLOT + if enable_boxplot: + plt.boxplot(ratings) + + # CONFIDENCE INTERVAL + if enable_confidence: + iterator = 0 + for column in ratings.T: # iterate over transposed matrix + # remove all 'Nan's from column + column = column[~np.isnan(column)] + + # get number of non-Nan ratings (= #subjects) + n = column.size + + # get mean + mean_rating = np.mean(column) + + # get errors + err = scipy.stats.sem(column)* sp.stats.t._ppf((1+confidence)/2., n-1) + + # draw plot + plt.errorbar(iterator+1, + mean_rating, + yerr=err, + marker="x", + color ="k", + markersize=12, + linestyle='None') + + iterator += 1 # increase counter + + + # INDIVIDUAL PLOT + if enable_individual or show_individual: + # marker list and color map to cycle through + markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"] + colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] + increment = 0 + linehandles = [] + legendnames = [] + with open(rating_folder+file, 'rb') as readfile: # read this csv file + filereader = csv.reader(readfile, delimiter=',') + headerrow = filereader.next() # use headerrow as X-axis + headerrow = headerrow[1:] + for row in filereader: + subject_id = row[0][:-4] # read from beginning of line + # assume plotting all individuals if no individual(s) specified + if not show_individual or subject_id in show_individual: + plothandle, = plt.plot(range(1,len(row)), # x-values + ratings[increment,:],#row[1:], # y-values: csv values except subject name + color=colormap[increment%len(colormap)], + marker=markerlist[increment%len(markerlist)], + markersize=10, + linestyle='None', + label=subject_id + ) + linehandles.append(plothandle) + legendnames.append(subject_id) + if show_legend: + plt.legend(linehandles, legendnames, + loc='upper right', + bbox_to_anchor=(1.1, 1), + borderaxespad=0., + numpoints=1 # remove extra marker + ) + increment += 1 # increase counter + + # TITLE, AXIS LABELS AND LIMITS + plt.title(page_name) + plt.xlabel('Fragment') + plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right) + plt.xticks(range(1, len(headerrow)+1), headerrow, rotation=90) # show fragment names + plt.ylabel('Rating') + plt.ylim(0,1) + + + + # SHOW PLOT + #plt.show() + #exit() + + # SAVE PLOT + # automatically + plot_type = ("-box" if enable_boxplot else "") + \ + ("-conf" if enable_confidence else "") + \ + ("-ind" if enable_individual else "") + plt.savefig(rating_folder+page_name+plot_type+".pdf", bbox_inches='tight') + plt.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/timeline_view.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,163 @@ +#!/usr/bin/python + +import xml.etree.ElementTree as ET +import os # list files in directory +import sys # command line arguments +import matplotlib.pyplot as plt # plots +import matplotlib.patches as patches # rectangles + +# COMMAND LINE ARGUMENTS + +assert len(sys.argv)<3, "timeline_view takes at most 1 command line argument\n"+\ + "Use: python timeline_view.py [XML_files_location]" + +# XML results files location +if len(sys.argv) == 1: + folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder + print "Use: python timeline_view.py [XML_files_location]" + print "Using default path: " + folder_name +elif len(sys.argv) == 2: + folder_name = sys.argv[1] # First command line argument is folder + +# check if folder_name exists +if not os.path.exists(folder_name): + #the file is not there + print "Folder '"+folder_name+"' does not exist." + sys.exit() # terminate script execution +elif not os.access(os.path.dirname(folder_name), os.W_OK): + #the file does exist but write privileges are not given + print "No write privileges in folder '"+folder_name+"'." + + +# CONFIGURATION + +# Folder where to store timelines +timeline_folder = folder_name + '/timelines/' # Stores in 'saves/timelines/' + +# Font settings +font = {'weight' : 'bold', + 'size' : 16} +plt.rc('font', **font) + +# Colormap for to cycle through +colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] + +# if enabled, x-axis shows time per audioholder, not total test time +show_audioholder_time = True + +# bar height (<1 to avoid overlapping) +bar_height = 0.6 + +# figure size +fig_width = 25 +fig_height = 5 + + +# CODE + +# create timeline_folder if not yet created +if not os.path.exists(timeline_folder): + os.makedirs(timeline_folder) + +# get every XML file in folder +for file in os.listdir(folder_name): + if file.endswith(".xml"): + tree = ET.parse(folder_name + '/' + file) + root = tree.getroot() + subject_id = file[:-4] # drop '.xml' + + time_offset = 0 # test starts at zero + + # ONE TIMELINE PER PAGE - make new plot per page + + # get list of all page names + for audioholder in root.findall("./page"): # iterate over pages + page_name = audioholder.get('ref') # get page name + plot_empty = True # check if any data is plotted + + if page_name is None: # ignore 'empty' audio_holders + print "WARNING: " + file + " contains empty page. (comment_parser.py)" + break + + if audioholder.get("state") != "complete": + print "WARNING: " + file + "test page " + page_name + " is not complete, skipping." + break; + # SORT AUDIO ELEMENTS ALPHABETICALLY + audioelements = audioholder.findall("./audioelement") + + data = [] + for elem in audioelements: # from http://effbot.org/zone/element-sort.htm + key = elem.get("ref") + data.append((key, elem)) + data.sort() + + N_audioelements = len(audioelements) # number of audio elements for this page + increment = 0 # increased for every new audioelement + audioelements_names = [] # store names of audioelements + + # get axes handle + fig = plt.figure(figsize=(fig_width, fig_height)) + ax = fig.add_subplot(111) #, aspect='equal' + + # for page [page_name], print comments related to fragment [id] + for tuple in data: + audioelement = tuple[1] + if audioelement is not None: # Check it exists + audio_id = str(audioelement.get('ref')) + audioelements_names.append(audio_id) + + # for this audioelement, loop over all listen events + listen_events = audioelement.findall("./metric/metricResult/[@name='elementListenTracker']/event") + for event in listen_events: + # mark this plot as not empty + plot_empty = False + + # get testtime: start and stop + start_time = float(event.find('testtime').get('start'))-time_offset + stop_time = float(event.find('testtime').get('stop'))-time_offset + # event lines: + ax.plot([start_time, start_time], # x-values + [0, N_audioelements+1], # y-values + color='k' + ) + ax.plot([stop_time, stop_time], # x-values + [0, N_audioelements+1], # y-values + color='k' + ) + # plot time: + ax.add_patch( + patches.Rectangle( + (start_time, N_audioelements-increment-bar_height/2), # (x, y) + stop_time - start_time, # width + bar_height, # height + color=colormap[increment%len(colormap)] # colour + ) + ) + + increment+=1 # to next audioelement + + # subtract total audioholder length from subsequent audioholder event times + audioholder_time = audioholder.find("./metric/metricresult/[@id='testTime']") + if audioholder_time is not None and show_audioholder_time: + time_offset = float(audioholder_time.text) + + if not plot_empty: + # set plot parameters + plt.title('Timeline ' + file + ": "+page_name) + plt.xlabel('Time [seconds]') + plt.ylabel('Fragment') + plt.ylim(0, N_audioelements+1) + + #y-ticks: fragment IDs, top to bottom + plt.yticks(range(N_audioelements, 0, -1), audioelements_names) # show fragment names + + + #plt.show() # uncomment to show plot; comment when just saving + #exit() + + plt.savefig(timeline_folder+subject_id+"-"+page_name+".pdf", bbox_inches='tight') + plt.close() + + #TODO: if 'nonsensical' or unknown: dashed line until next event + #TODO: Vertical lines for fragment looping point + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/timeline_view_movement.py Wed Apr 20 16:33:22 2016 +0200 @@ -0,0 +1,311 @@ +#!/usr/bin/python + +import xml.etree.ElementTree as ET +import os # list files in directory +import sys # command line arguments +import matplotlib.pyplot as plt # plots +import matplotlib.patches as patches # rectangles + + +# COMMAND LINE ARGUMENTS + +assert len(sys.argv)<3, "timeline_view_movement takes at most 1 command line argument\n"+\ + "Use: python timeline_view_movement.py [XML_files_location]" + +# XML results files location +if len(sys.argv) == 1: + folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder + print "Use: python timeline_view_movement.py [XML_files_location]" + print "Using default path: " + folder_name +elif len(sys.argv) == 2: + folder_name = sys.argv[1] # First command line argument is folder + +# check if folder_name exists +if not os.path.exists(folder_name): + #the file is not there + print "Folder '"+folder_name+"' does not exist." + sys.exit() # terminate script execution +elif not os.access(os.path.dirname(folder_name), os.W_OK): + #the file does exist but write privileges are not given + print "No write privileges in folder '"+folder_name+"'." + + +# CONFIGURATION + +# Folder where to store timelines +timeline_folder = folder_name + '/timelines_movement/' # Stores in 'saves/timelines_movement/' by default + +# Font settings +font = {'weight' : 'bold', + 'size' : 16} +plt.rc('font', **font) + +# Colormap for to cycle through +colormap = ['b', 'g', 'c', 'm', 'y', 'k'] + +# figure size +fig_width = 25 +fig_height = 10 + + +# CODE + +# create timeline_folder if not yet created +if not os.path.exists(timeline_folder): + os.makedirs(timeline_folder) + +# get every XML file in folder +for file in os.listdir(folder_name): + if file.endswith(".xml"): + tree = ET.parse(folder_name + '/' + file) + root = tree.getroot() + subject_id = file[:-4] # drop '.xml' + + previous_page_time = 0 # time spent before current page + time_offset = 0 # test starts at zero + + # ONE TIMELINE PER PAGE - make new plot per page + + # get list of all page names + for page in root.findall("./page"): # iterate over pages + page_name = page.get('ref') # get page name + plot_empty = True # check if any data is plotted + + if page_name is None: # ignore 'empty' audio_holders + print "Skipping empty page name from "+subject_id+"." + break + + # subtract total page length from subsequent page event times + page_time_temp = page.find("./metric/metricresult/[@id='testTime']") + if page_time_temp is not None: + page_time = float(page_time_temp.text) + else: + print "Skipping page without total time specified from "+subject_id+"." + break + + # get audioelements + audioelements = page.findall("./audioelement") + + # sort alphabetically + data = [] + for elem in audioelements: # from http://effbot.org/zone/element-sort.htm + key = elem.get("ref") + data.append((key, elem)) + data.sort() + + N_audioelements = len(audioelements) # number of audio elements for this page + increment = 0 # increased for every new audioelement + + # get axes handle + fig = plt.figure(figsize=(fig_width, fig_height)) + ax = fig.add_subplot(111) + + # for page [page_name], print comments related to fragment [id] + #for tuple in data: + # audioelement = tuple[1] + for tuple in data: + audioelement = tuple[1] + if audioelement is not None: # Check it exists + audio_id = str(audioelement.get('ref')) + + # break if no initial position or move events registered + initial_position_temp = audioelement.find("./metric/metricResult/[@name='elementInitialPosition']") + if initial_position_temp is None: + print "Skipping "+page_name+" from "+subject_id+": does not have initial positions specified." + break + + # get move events, initial and eventual position + initial_position = float(initial_position_temp.text) + move_events = audioelement.findall("./metric/metricResult/[@name='elementTrackerFull']/movement") + final_position = float(audioelement.find("./value").text) + + # get listen events + start_times_global = [] + stop_times_global = [] + listen_events = audioelement.findall("./metric/metricResult/[@name='elementListenTracker']/event") + for event in listen_events: + # get testtime: start and stop + start_times_global.append(float(event.find('testtime').get('start'))-time_offset) + stop_times_global.append(float(event.find('testtime').get('stop'))-time_offset) + + # display fragment name at start + plt.text(0,initial_position+0.02,audio_id,color=colormap[increment%len(colormap)]) #,rotation=45 + + # previous position and time + previous_position = initial_position + previous_time = 0 + + # assume not playing at start + currently_playing = False # keep track of whether fragment is playing during move event + + # draw all segments except final one + for event in move_events: + # mark this plot as not empty + plot_empty = False + + # get time and final position of move event + new_time = float(event.get("time"))-time_offset + new_position = float(event.get("value")) + + # get play/stop events since last move until current move event + stop_times = [] + start_times = [] + # is there a play and/or stop event between previous_time and new_time? + for time in start_times_global: + if time>previous_time and time<new_time: + start_times.append(time) + for time in stop_times_global: + if time>previous_time and time<new_time: + stop_times.append(time) + # if no play/stop events between move events, find out whether playing + + segment_start = previous_time # first segment starts at previous move event + + # draw segments (horizontal line) + while len(start_times)+len(stop_times)>0: # while still play/stop events left + if len(stop_times)<1: # upcoming event is 'play' + # draw non-playing segment from segment_start to 'play' + currently_playing = False + segment_stop = start_times.pop(0) # remove and return first item + elif len(start_times)<1: # upcoming event is 'stop' + # draw playing segment (red) from segment_start to 'stop' + currently_playing = True + segment_stop = stop_times.pop(0) # remove and return first item + elif start_times[0]<stop_times[0]: # upcoming event is 'play' + # draw non-playing segment from segment_start to 'play' + currently_playing = False + segment_stop = start_times.pop(0) # remove and return first item + else: # stop_times[0]<start_times[0]: upcoming event is 'stop' + # draw playing segment (red) from segment_start to 'stop' + currently_playing = True + segment_stop = stop_times.pop(0) # remove and return first item + + # draw segment + plt.plot([segment_start, segment_stop], # x-values + [previous_position, previous_position], # y-values + color='r' if currently_playing else colormap[increment%len(colormap)], + linewidth=3 + ) + segment_start = segment_stop # move on to next segment + currently_playing = not currently_playing # toggle to draw final segment correctly + + # draw final segment (horizontal line) from last 'segment_start' to current move event time + plt.plot([segment_start, new_time], # x-values + [previous_position, previous_position], # y-values + # color depends on playing during move event or not: + color='r' if currently_playing else colormap[increment%len(colormap)], + linewidth=3 + ) + + # vertical line from previous to current position + plt.plot([new_time, new_time], # x-values + [previous_position, new_position], # y-values + # color depends on playing during move event or not: + color='r' if currently_playing else colormap[increment%len(colormap)], + linewidth=3 + ) + + # update previous_position value + previous_position = new_position + previous_time = new_time + + + + # draw final horizontal segment (or only segment if audioelement not moved) + # horizontal line from previous time to end of page + + # get play/stop events since last move until current move event + stop_times = [] + start_times = [] + # is there a play and/or stop event between previous_time and new_time? + for time in start_times_global: + if time>previous_time and time<page_time-time_offset: + start_times.append(time) + for time in stop_times_global: + if time>previous_time and time<page_time-time_offset: + stop_times.append(time) + # if no play/stop events between move events, find out whether playing + + segment_start = previous_time # first segment starts at previous move event + + # draw segments (horizontal line) + while len(start_times)+len(stop_times)>0: # while still play/stop events left + # mark this plot as not empty + plot_empty = False + if len(stop_times)<1: # upcoming event is 'play' + # draw non-playing segment from segment_start to 'play' + currently_playing = False + segment_stop = start_times.pop(0) # remove and return first item + elif len(start_times)<1: # upcoming event is 'stop' + # draw playing segment (red) from segment_start to 'stop' + currently_playing = True + segment_stop = stop_times.pop(0) # remove and return first item + elif start_times[0]<stop_times[0]: # upcoming event is 'play' + # draw non-playing segment from segment_start to 'play' + currently_playing = False + segment_stop = start_times.pop(0) # remove and return first item + else: # stop_times[0]<start_times[0]: upcoming event is 'stop' + # draw playing segment (red) from segment_start to 'stop' + currently_playing = True + segment_stop = stop_times.pop(0) # remove and return first item + + # draw segment + plt.plot([segment_start, segment_stop], # x-values + [previous_position, previous_position], # y-values + color='r' if currently_playing else colormap[increment%len(colormap)], + linewidth=3 + ) + segment_start = segment_stop # move on to next segment + currently_playing = not currently_playing # toggle to draw final segment correctly + + # draw final segment (horizontal line) from last 'segment_start' to current move event time + plt.plot([segment_start, page_time-time_offset], # x-values + [previous_position, previous_position], # y-values + # color depends on playing during move event or not: + color='r' if currently_playing else colormap[increment%len(colormap)], + linewidth=3 + ) + +# plt.plot([previous_time, page_time-time_offset], # x-values +# [previous_position, previous_position], # y-values +# color=colormap[increment%len(colormap)], +# linewidth=3 +# ) + + # display fragment name at end + plt.text(page_time-time_offset,previous_position,\ + audio_id,color=colormap[increment%len(colormap)]) #,rotation=45 + + increment+=1 # to next audioelement + + last_page_duration = page_time-time_offset + time_offset = page_time + + if not plot_empty: # if plot is not empty, show or store + # set plot parameters + plt.title('Timeline ' + file + ": "+page_name) + plt.xlabel('Time [seconds]') + plt.xlim(0, last_page_duration) + plt.ylabel('Rating') # default + plt.ylim(0, 1) # rating between 0 and 1 + + #y-ticks: labels on rating axis + label_positions = [] + label_text = [] + scale_tags = root.findall("./BrowserEvalProjectDocument/audioHolder/interface/scale") + scale_title = root.find("./BrowserEvalProjectDocument/audioHolder/interface/title") + for tag in scale_tags: + label_positions.append(float(tag.get('position'))/100) # on a scale from 0 to 100 + label_text.append(tag.text) + if len(label_positions) > 0: # if any labels available + plt.yticks(label_positions, label_text) # show rating axis labels + # set label Y-axis + if scale_title is not None: + plt.ylabel(scale_title.text) + + #plt.show() # uncomment to show plot; comment when just saving + #exit() + + plt.savefig(timeline_folder+subject_id+"-"+page_name+".pdf", bbox_inches='tight') + plt.close() +
--- a/scripts/comment_parser.html Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -<html lang="en"> - <head> - <meta charset="utf-8" /> - <script type="text/javascript"> - function getXML() - { - var XMLHttp = new XMLHttpRequest(); - XMLHttp.open("GET","comment_parser.php?format=XML",true); - XMLHttp.onload = function() { - // Now we have the XML data, extract - var parse = new DOMParser(); - var ajax = parse.parseFromString(XMLHttp.response,'text/xml'); - - var parent = document.createElement("div"); - parent.appendChild(ajax.children[0]); - var file = [parent.innerHTML]; - var bb = new Blob(file,{type : 'application/xml'}); - generateLink(bb,".xml"); - } - XMLHttp.send(); - } - - function getJSON() - { - var XMLHttp = new XMLHttpRequest(); - XMLHttp.open("GET","comment_parser.php?format=JSON",true); - XMLHttp.onload = function() { - // Now we have the XML data, extract - var file = [XMLHttp.response]; - var bb = new Blob(file,{type : 'application/json'}); - generateLink(bb,".json"); - } - XMLHttp.send(); - } - - function getCSV() - { - var XMLHttp = new XMLHttpRequest(); - XMLHttp.open("GET","comment_parser.php?format=CSV",true); - XMLHttp.onload = function() { - // Now we have the XML data, extract - var file = [XMLHttp.response]; - var bb = new Blob(file,{type : 'text/csv'}); - generateLink(bb,".csv"); - } - XMLHttp.send(); - } - - function generateLink(blobfile,fmt) - { - var dnlk = window.URL.createObjectURL(blobfile); - var a = document.createElement("a"); - a.hidden = ''; - a.href = dnlk; - a.download = "save"+fmt; - a.textContent = "Save File"; - document.getElementById("download").appendChild(a); - } - </script> - </head> - <body> - <h1>WAET Test Results Analysis</h1> - <h2>Comment Extraction</h2> - <p>All of the XMLs in the server 'saves/' directory are automatically parsed and downloaded, extracting only the comments. Simply select the comments you wish to extract below and your desired data format.</p> - <div id="download"></div> - <div> - <button onclick="getXML();">XML</button> - <button onclick="getJSON();">JSON</button> - <button onclick="getCSV();">CSV</button> - </div> - </body> -</html> \ No newline at end of file
--- a/scripts/comment_parser.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import xml.etree.ElementTree as ET -import os -import csv -import sys - -# COMMAND LINE ARGUMENTS - -assert len(sys.argv)<3, "comment_parser takes at most 1 command line argument\n"+\ - "Use: python score_parser.py [rating_folder_location]" - -# XML results files location -if len(sys.argv) == 1: - folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder - print "Use: python comment_parser.py [XML_files_location]" - print "Using default path: " + folder_name -elif len(sys.argv) == 2: - folder_name = sys.argv[1] # First command line argument is folder - -# check if folder_name exists -if not os.path.exists(folder_name): - #the file is not there - print "Folder '"+folder_name+"' does not exist." - sys.exit() # terminate script execution -elif not os.access(os.path.dirname(folder_name), os.W_OK): - #the file does exist but write privileges are not given - print "No write privileges in folder '"+folder_name+"'." - - -# CODE - -# remember which files have been opened this time -file_history = [] - -# get every XML file in folder -for file in os.listdir(folder_name): - if file.endswith(".xml"): - tree = ET.parse(folder_name + '/' + file) - root = tree.getroot() - - # get list of all page names - for audioholder in root.findall("./page"): # iterate over pages - page_name = audioholder.get('ref') # get page name - - if page_name is None: # ignore 'empty' audio_holders - print "WARNING: " + file + " contains empty page. (comment_parser.py)" - break - - if audioholder.get("state") != "complete": - print "WARNING: " + file + "test page " + page_name + " is not complete, skipping." - else: - # create folder [page_name] if not yet created - if not os.path.exists(folder_name + "/" + page_name): - os.makedirs(folder_name + "/" + page_name) - - # for page [page_name], print comments related to fragment [id] - for audioelement in audioholder.findall("./audioelement"): - if audioelement is not None: # Check it exists - audio_id = str(audioelement.get('ref')) - - csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv' - - # If file hasn't been opened yet this time, empty - if csv_name not in file_history: - csvfile = open(csv_name, 'w') - file_history.append(csv_name) # remember this file has been written to this time around - else: - # append (!) to file [page_name]/[page_name]-comments-[id].csv - csvfile = open(csv_name, 'a') - writer = csv.writer(csvfile, - delimiter=',', - dialect="excel", - quoting=csv.QUOTE_ALL) - commentstr = audioelement.find("./comment/response").text - - if commentstr is None: - commentstr = '' - - # anonymous comments: - #writer.writerow([commentstr.encode("utf-8")]) - # comments with (file) name: - writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) - - #TODO Replace 'new line' in comment with something else? - -# PRO TIP: Change from csv to txt by running this in bash: -# $ cd folder_where_csvs_are/ -# $ for i in *.csv; do mv "$i" "${i/.csv}".txt; done
--- a/scripts/evaluation_stats.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,219 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import xml.etree.ElementTree as ET -import os # for getting files from directory -import operator # for sorting data with multiple keys -import sys # for accessing command line arguments - -# Command line arguments -assert len(sys.argv)<3, "evaluation_stats takes at most 1 command line argument\n"+\ - "Use: python evaluation_stats.py [results_folder]" - -# XML results files location -if len(sys.argv) == 1: - folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder - print "Use: python evaluation_stats.py [results_folder]" - print "Using default path: " + folder_name -elif len(sys.argv) == 2: - folder_name = sys.argv[1] # First command line argument is folder - -# Turn number of seconds (int) to '[minutes] min [seconds] s' (string) -def seconds2timestr(time_in_seconds): - time_in_minutes = int(time_in_seconds/60) - remaining_seconds = int(time_in_seconds%60) - return str(time_in_minutes) + " min " + str(remaining_seconds) + " s" - -# stats initialisation -number_of_XML_files = 0 -number_of_pages = 0 -number_of_fragments = 0 -total_empty_comments = 0 -total_not_played = 0 -total_not_moved = 0 -time_per_page_accum = 0 - -# arrays initialisation -page_names = [] -page_count = [] -duration_page = [] # duration of experiment in function of page content -duration_order = [] # duration of experiment in function of page number -fragments_per_page = [] # number of fragments for corresponding page - -# get every XML file in folder -files_list = os.listdir(folder_name) -for file in files_list: # iterate over all files in files_list - if file.endswith(".xml"): # check if XML file - number_of_XML_files += 1 - tree = ET.parse(folder_name + '/' + file) - root = tree.getroot() - - print file # print file name (subject name) - - # reset for new subject - total_duration = 0 - page_number = 0 - - # get list of all page names - for audioholder in root.findall("./page"): # iterate over pages - page_name = audioholder.get('ref') # get page name - - if page_name is None: # ignore 'empty' audio_holders - print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)" - break # move on to next - if audioholder.get("state") != "complete": - print "WARNING" + file + " contains incomplete audio holder." - break - number_of_comments = 0 # for this page - number_of_missing_comments = 0 # for this page - not_played = 0 # for this page - not_moved = 0 # for this page - - # 'testTime' keeps total duration: subtract time so far for duration of this audioholder - duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration - - # total duration of test - total_duration += duration - - # number of audio elements - audioelements = audioholder.findall("./audioelement") # get audioelements - number_of_fragments += len(audioelements) # add length of this list to total - - # number of comments (interesting if comments not mandatory) - for audioelement in audioelements: - if audioelement.get("type") != "outside-reference": - response = audioelement.find("./comment/response") - was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']") - was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']") - if response.text is not None and len(response.text) > 1: - number_of_comments += 1 - else: - number_of_missing_comments += 1 - if was_played is not None and was_played.text == 'false': - not_played += 1 - if was_moved is not None and was_moved.text == 'false': - not_moved += 1 - - # update global counters - total_empty_comments += number_of_missing_comments - total_not_played += not_played - total_not_moved += not_moved - - # print audioholder id and duration - print " " + page_name + ": " + seconds2timestr(duration) + ", "\ - + str(number_of_comments)+"/"\ - +str(number_of_comments+number_of_missing_comments)+" comments" - - # number of audio elements not played - if not_played > 1: - print 'ATTENTION: '+str(not_played)+' fragments were not listened to!' - if not_played == 1: - print 'ATTENTION: one fragment was not listened to!' - - # number of audio element markers not moved - if not_moved > 1: - print 'ATTENTION: '+str(not_moved)+' markers were not moved!' - if not_moved == 1: - print 'ATTENTION: one marker was not moved!' - - # keep track of duration in function of page index - if len(duration_order)>page_number: - duration_order[page_number].append(duration) - else: - duration_order.append([duration]) - - # keep list of audioholder ids and count how many times each audioholder id - # was tested, how long it took, and how many fragments there were (if number of - # fragments is different, store as different audioholder id) - if page_name in page_names: - page_index = page_names.index(page_name) # get index - # check if number of audioelements the same - if len(audioelements) == fragments_per_page[page_index]: - page_count[page_index] += 1 - duration_page[page_index].append(duration) - else: # make new entry - alt_page_name = page_name+"("+str(len(audioelements))+")" - if alt_page_name in page_names: # if already there - alt_page_index = page_names.index(alt_page_name) # get index - page_count[alt_page_index] += 1 - duration_page[alt_page_index].append(duration) - else: - page_names.append(alt_page_name) - page_count.append(1) - duration_page.append([duration]) - fragments_per_page.append(len(audioelements)) - else: - page_names.append(page_name) - page_count.append(1) - duration_page.append([duration]) - fragments_per_page.append(len(audioelements)) - - # bookkeeping - page_number += 1 # increase page count for this specific test - number_of_pages += 1 # increase total number of pages - time_per_page_accum += duration # total duration (for average time spent per page) - - # print total duration of this test - print " TOTAL: " + seconds2timestr(total_duration) - - -# PRINT EVERYTHING - -print "Number of XML files: " + str(number_of_XML_files) -print "Number of pages: " + str(number_of_pages) -print "Number of fragments: " + str(number_of_fragments) -print "Number of empty comments: " + str(total_empty_comments) +\ - " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + "%)" -print "Number of unplayed fragments: " + str(total_not_played) +\ - " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + "%)" -print "Number of unmoved markers: " + str(total_not_moved) +\ - " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + "%)" -print "Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages) - -# Pages and number of times tested -page_count_strings = list(str(x) for x in page_count) -count_list = page_names + page_count_strings -count_list[::2] = page_names -count_list[1::2] = page_count_strings -print "Pages tested: " + str(count_list) - -# Average duration for first, second, ... page -print "Average duration per page:" -for page_number in range(len(duration_order)): - print " page " + str(page_number+1) + ": " +\ - seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\ - " ("+str(len(duration_order[page_number]))+" subjects)" - - -# Sort pages by number of audioelements, then by duration - -# average duration and number of subjects per page -average_duration_page = [] -number_of_subjects_page = [] -for line in duration_page: - number_of_subjects_page.append(len(line)) - average_duration_page.append(sum(line)/len(line)) - -# combine and sort in function of number of audioelements and duration -combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page] -combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort - -# Show average duration for all songs -print "Average duration per audioholder:" -for page_index in range(len(page_names)): - print " "+combined_list[page_index][0] + ": " \ - + seconds2timestr(combined_list[page_index][1]) \ - + " (" + str(combined_list[page_index][3]) + " subjects, " \ - + str(combined_list[page_index][2]) + " fragments)" - - -#TODO -# time per page in function of number of fragments (plot) -# time per participant in function of number of pages -# plot total time for each participant -# plot total time -# show 'count' per page (in order) - -# clear up page_index <> page_count <> page_number confusion - -# LaTeX -> PDF print out
--- a/scripts/generate_report.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,531 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -import xml.etree.ElementTree as ET -import os # for getting files from directory -import operator # for sorting data with multiple keys -import sys # for accessing command line arguments -import subprocess # for calling pdflatex -import shlex # for calling pdflatex -import matplotlib.pyplot as plt # plots -import numpy as np # numbers - -# Command line arguments -assert len(sys.argv)<4, "generate_report takes at most 2 command line arguments\n"+\ - "Use: python generate_report.py [results_folder] [no_render | -nr]" - -render_figures = True - -# XML results files location -if len(sys.argv) == 1: - folder_name = "../saves/" # Looks in 'saves/' folder from 'scripts/' folder - print "Use: python generate_report.py [results_folder] [no_render | -nr]" - print "Using default path: " + folder_name -elif len(sys.argv) == 2: - folder_name = sys.argv[1] # First command line argument is folder -elif len(sys.argv) == 3: - folder_name = sys.argv[1] # First command line argument is folder - assert sys.argv[2] in ('no_render','-nr'), "Second argument not recognised. \n" +\ - "Use: python generate_report.py [results_folder] [no_render | -nr]" - # Second command line argument is [no_render | -nr] - render_figures = False - -def isNaN(num): - return num != num - -# Turn number of seconds (int) to '[minutes] min [seconds] s' (string) -def seconds2timestr(time_in_seconds): - if time_in_seconds is not None and not isNaN(time_in_seconds): - time_in_minutes = int(time_in_seconds/60) - remaining_seconds = int(time_in_seconds%60) - return str(time_in_minutes) + " min " + str(remaining_seconds) + " s" - else: - return 'N/A' - -# stats initialisation -number_of_XML_files = 0 -number_of_pages = 0 -number_of_fragments = 0 -total_empty_comments = 0 -total_not_played = 0 -total_not_moved = 0 -time_per_page_accum = 0 - -# arrays initialisation -page_names = [] -real_page_names = [] # regardless of differing numbers of fragments -subject_count = [] # subjects per audioholder name -page_count = [] -duration_page = [] # duration of experiment in function of page content -duration_order = [] # duration of experiment in function of page number -fragments_per_page = [] # number of fragments for corresponding page - -# survey stats -gender = [] -age = [] - -# get username if available -for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'): - user = os.environ.get(name) - if user: - break - else: - user = '' - - -# begin LaTeX document -header = r'''\documentclass[11pt, oneside]{article} - \usepackage{geometry} - \geometry{a4paper} - \usepackage[parfill]{parskip} % empty line instead of indent - \usepackage{graphicx} % figures - \usepackage[space]{grffile} % include figures with spaces in paths - \usepackage{hyperref} - \usepackage{tikz} % pie charts - \title{Report} - \author{'''+\ - user+\ - r'''} - \graphicspath{{'''+\ - folder_name+\ - r'''}} - %\setcounter{section}{-1} % Summary section 0 so number of sections equals number of files - \begin{document} - \maketitle - This is an automatically generated report using the `generate\_report.py' Python script - included with the Web Audio Evaluation Tool \cite{WAET} distribution which can be found - at \texttt{code.soundsoftware.ac.uk/projects/webaudioevaluationtool}. - \tableofcontents - - ''' - -footer = '\n\t\t'+r'''\begin{thebibliography}{9} - \bibitem{WAET} % reference to accompanying publication - Nicholas Jillings, Brecht De Man, David Moffat and Joshua D. Reiss, - ``Web Audio Evaluation Tool: A browser-based listening test environment,'' - presented at the 12th Sound and Music Computing Conference, July 2015. - \end{thebibliography} - \end{document}''' - -body = '' - -# make sure folder_name ends in '/' -folder_name = os.path.join(folder_name, '') - -# generate images for later use -if render_figures: - subprocess.call("python timeline_view_movement.py '"+folder_name+"'", shell=True) - subprocess.call("python score_parser.py '"+folder_name+"'", shell=True) - subprocess.call("python score_plot.py '"+folder_name+"ratings/'", shell=True) - -# get every XML file in folder -files_list = os.listdir(folder_name) -for file in files_list: # iterate over all files in files_list - if file.endswith(".xml"): # check if XML file - number_of_XML_files += 1 - tree = ET.parse(folder_name + file) - root = tree.getroot() - - # PRINT name as section - body+= '\n\section{'+file[:-4].capitalize()+'}\n' # make section header from name without extension - - # reset for new subject - total_duration = 0 - page_number = 0 - - individual_table = '\n' # table with stats for this individual test file - timeline_plots = '' # plots of timeline (movements and plays) - - # DEMO survey stats - # get gender - this_subjects_gender = root.find("./posttest/radio/[@id='gender']") - if this_subjects_gender is not None: - gender.append(this_subjects_gender.get("name")) - else: - gender.append('UNAVAILABLE') - # get age - this_subjects_age = root.find("./posttest/number/[@id='age']") - if this_subjects_age is not None: - age.append(this_subjects_age.text) - #TODO add plot of age - - # get list of all page names - for audioholder in root.findall("./page"): # iterate over pages - page_name = audioholder.get('id') # get page name - - if page_name is None: # ignore 'empty' audio_holders - print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)" - break # move on to next - - number_of_comments = 0 # for this page - number_of_missing_comments = 0 # for this page - not_played = [] # for this page - not_moved = [] # for this page - - if audioholder.find("./metric/metricresult[@id='testTime']") is not None: # check if time is included - # 'testTime' keeps total duration: subtract time so far for duration of this audioholder - duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration - - # total duration of test - total_duration += duration - else: - duration = float('nan') - total_duration = float('nan') - - # number of audio elements - audioelements = audioholder.findall("./audioelement") # get audioelements - number_of_fragments += len(audioelements) # add length of this list to total - - # number of comments (interesting if comments not mandatory) - for audioelement in audioelements: - response = audioelement.find("./comment/response") - was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']") - was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']") - if response.text is not None and len(response.text) > 1: - number_of_comments += 1 - else: - number_of_missing_comments += 1 - if was_played is not None and was_played.text == 'false': - not_played.append(audioelement.get('id')) - if was_moved is not None and was_moved.text == 'false': - not_moved.append(audioelement.get('id')) - - # update global counters - total_empty_comments += number_of_missing_comments - total_not_played += len(not_played) - total_not_moved += len(not_moved) - - # PRINT alerts when elements not played or markers not moved - # number of audio elements not played - if len(not_played) > 1: - body += '\t\t\\emph{\\textbf{ATTENTION: '+str(len(not_played))+\ - ' fragments were not listened to in '+page_name+'! }}'+\ - ', '.join(not_played)+'\\\\ \n' - if len(not_played) == 1: - body += '\t\t\\emph{\\textbf{ATTENTION: one fragment was not listened to in '+page_name+'! }}'+\ - not_played[0]+'\\\\ \n' - - # number of audio element markers not moved - if len(not_moved) > 1: - body += '\t\t\\emph{\\textbf{ATTENTION: '+str(len(not_moved))+\ - ' markers were not moved in '+page_name+'! }}'+\ - ', '.join(not_moved)+'\\\\ \n' - if len(not_moved) == 1: - body += '\t\t\\emph{\\textbf{ATTENTION: one marker was not moved in '+page_name+'! }}'+\ - not_moved[0]+'\\\\ \n' - - # PRINT song-specific statistic - individual_table += '\t\t'+page_name+'&'+\ - str(number_of_comments) + '/' +\ - str(number_of_comments+number_of_missing_comments)+'&'+\ - seconds2timestr(duration)+'\\\\\n' - - # get timeline for this audioholder - img_path = 'timelines_movement/'+file[:-4]+'-'+page_name+'.pdf' - - # check if available - if os.path.isfile(folder_name+img_path): - # SHOW timeline image - timeline_plots += '\\includegraphics[width=\\textwidth]{'+\ - folder_name+img_path+'}\n\t\t' - - # keep track of duration in function of page index - if len(duration_order)>page_number: - duration_order[page_number].append(duration) - else: - duration_order.append([duration]) - - # keep list of audioholder ids and count how many times each audioholder id - # was tested, how long it took, and how many fragments there were - # (if number of fragments is different, store as different audioholder id) - if page_name in page_names: - page_index = page_names.index(page_name) # get index - # check if number of audioelements the same - if len(audioelements) == fragments_per_page[page_index]: - page_count[page_index] += 1 - duration_page[page_index].append(duration) - else: # make new entry - alt_page_name = page_name+"("+str(len(audioelements))+")" - if alt_page_name in page_names: # if already there - alt_page_index = page_names.index(alt_page_name) # get index - page_count[alt_page_index] += 1 - duration_page[alt_page_index].append(duration) - else: - page_names.append(alt_page_name) - page_count.append(1) - duration_page.append([duration]) - fragments_per_page.append(len(audioelements)) - else: - page_names.append(page_name) - page_count.append(1) - duration_page.append([duration]) - fragments_per_page.append(len(audioelements)) - - # number of subjects per audioholder regardless of differing numbers of - # fragments (for inclusion in box plots) - if page_name in real_page_names: - page_index = real_page_names.index(page_name) # get index - subject_count[page_index] += 1 - else: - real_page_names.append(page_name) - subject_count.append(1) - - # bookkeeping - page_number += 1 # increase page count for this specific test - number_of_pages += 1 # increase total number of pages - time_per_page_accum += duration # total duration (for average time spent per page) - - # PRINT table with statistics about this test - body += '\t\t'+r'''\begin{tabular}{|p{3.5cm}|c|p{2.5cm}|} - \hline - \textbf{Song name} & \textbf{Comments} & \textbf{Duration} \\ \hline '''+\ - individual_table+'\t\t'+\ - r'''\hline - \textbf{TOTAL} & & \textbf{'''+\ - seconds2timestr(total_duration)+\ - r'''}\\ - \hline - \end{tabular} - - ''' - # PRINT timeline plots - body += timeline_plots - -# join to footer -footer = body + footer - -# empty body again -body = '' - -# PRINT summary of everything (at start) -# unnumbered so that number of sections equals number of files -body += '\section*{Summary}\n\t\t\\addcontentsline{toc}{section}{Summary}\n' - -# PRINT table with statistics -body += '\t\t\\begin{tabular}{ll}\n\t\t\t' -body += r'Number of XML files: &' + str(number_of_XML_files) + r'\\'+'\n\t\t\t' -body += r'Number of pages: &' + str(number_of_pages) + r'\\'+'\n\t\t\t' -body += r'Number of fragments: &' + str(number_of_fragments) + r'\\'+'\n\t\t\t' -body += r'Number of empty comments: &' + str(total_empty_comments) +\ - " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t' -body += r'Number of unplayed fragments: &' + str(total_not_played) +\ - " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t' -body += r'Number of unmoved markers: &' + str(total_not_moved) +\ - " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + r"\%)\\"+'\n\t\t\t' -body += r'Average time per page: &' + seconds2timestr(time_per_page_accum/number_of_pages) + r"\\"+'\n\t\t' -body += '\\end{tabular} \\vspace{1.5cm} \\\\ \n' - -# Average duration for first, second, ... page -body += "\t\t\\vspace{.5cm} \n\n\t\tAverage duration per page (see also Figure \\ref{fig:avgtimeperpage}): \\\\ \n\t\t" -body += r'''\begin{tabular}{lll} - \textbf{Page} & \textbf{Duration} & \textbf{\# subjects}\\''' -tpp_averages = [] # store average time per page -for page_number in range(len(duration_order)): - body += '\n\t\t\t'+str(page_number+1) + "&" +\ - seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\ - "&"+str(len(duration_order[page_number]))+r"\\" - tpp_averages.append(sum(duration_order[page_number])/len(duration_order[page_number])) - -body += '\n\t\t\\end{tabular} \\vspace{1.5cm} \\\\ \n\n\t\t' - -# SHOW bar plot of average time per page -plt.bar(range(1,len(duration_order)+1), np.array(tpp_averages)/60) -plt.xlabel('Page order') -plt.xlim(.8, len(duration_order)+1) -plt.xticks(np.arange(1,len(duration_order)+1)+.4, range(1,len(duration_order)+1)) -plt.ylabel('Average time [minutes]') -plt.savefig(folder_name+"time_per_page.pdf", bbox_inches='tight') -plt.close() -#TODO add error bars - - -# Sort pages by number of audioelements, then by duration - -# average duration and number of subjects per page -average_duration_page = [] -number_of_subjects_page = [] -for line in duration_page: - number_of_subjects_page.append(len(line)) - average_duration_page.append(sum(line)/len(line)) - -# combine and sort in function of number of audioelements and duration -combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page] -combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort - -# Show average duration for all songs -body += r'''\vspace{.5cm} - Average duration per audioholder (see also Figure \ref{fig:avgtimeperaudioholder}): \\ - \begin{tabular}{llll} - \textbf{Audioholder} & \textbf{Duration} & \textbf{\# subjects} & \textbf{\# fragments} \\''' -audioholder_names_ordered = [] -average_duration_audioholder_ordered = [] -number_of_subjects = [] -for page_index in range(len(page_names)): - audioholder_names_ordered.append(combined_list[page_index][0]) - average_duration_audioholder_ordered.append(combined_list[page_index][1]) - number_of_subjects.append(combined_list[page_index][3]) - body += '\n\t\t\t'+combined_list[page_index][0] + "&" +\ - seconds2timestr(combined_list[page_index][1]) + "&" +\ - str(combined_list[page_index][3]) + "&" +\ - str(combined_list[page_index][2]) + r"\\" -body += '\n\t\t\\end{tabular}\n' - -# SHOW bar plot of average time per page -plt.bar(range(1,len(audioholder_names_ordered)+1), np.array(average_duration_audioholder_ordered)/60) -plt.xlabel('Audioholder') -plt.xlim(.8, len(audioholder_names_ordered)+1) -plt.xticks(np.arange(1,len(audioholder_names_ordered)+1)+.4, audioholder_names_ordered, rotation=90) -plt.ylabel('Average time [minutes]') -plt.savefig(folder_name+"time_per_audioholder.pdf", bbox_inches='tight') -plt.close() - -# SHOW bar plot of average time per page -plt.bar(range(1,len(audioholder_names_ordered)+1), number_of_subjects) -plt.xlabel('Audioholder') -plt.xlim(.8, len(audioholder_names_ordered)+1) -plt.xticks(np.arange(1,len(audioholder_names_ordered)+1)+.4, audioholder_names_ordered, rotation=90) -plt.ylabel('Number of subjects') -ax = plt.gca() -ylims = ax.get_ylim() -yint = np.arange(int(np.floor(ylims[0])), int(np.ceil(ylims[1]))+1) -plt.yticks(yint) -plt.savefig(folder_name+"subjects_per_audioholder.pdf", bbox_inches='tight') -plt.close() - -# SHOW both figures -body += r''' - \begin{figure}[htbp] - \begin{center} - \includegraphics[width=.65\textwidth]{'''+\ - folder_name+'time_per_page.pdf'+\ - r'''} - \caption{Average time spent per page.} - \label{fig:avgtimeperpage} - \end{center} - \end{figure} - - ''' -body += r'''\begin{figure}[htbp] - \begin{center} - \includegraphics[width=.65\textwidth]{'''+\ - folder_name+'time_per_audioholder.pdf'+\ - r'''} - \caption{Average time spent per audioholder.} - \label{fig:avgtimeperaudioholder} - \end{center} - \end{figure} - - ''' -body += r'''\begin{figure}[htbp] - \begin{center} - \includegraphics[width=.65\textwidth]{'''+\ - folder_name+'subjects_per_audioholder.pdf'+\ - r'''} - \caption{Number of subjects per audioholder.} - \label{fig:subjectsperaudioholder} - \end{center} - \end{figure} - - ''' -#TODO add error bars -#TODO layout of figures - -# SHOW boxplot per audioholder -#TODO order in decreasing order of participants -for audioholder_name in page_names: # get each name - # plot boxplot if exists (not so for the 'alt' names) - if os.path.isfile(folder_name+'ratings/'+audioholder_name+'-ratings-box.pdf'): - body += r'''\begin{figure}[htbp] - \begin{center} - \includegraphics[width=.65\textwidth]{'''+\ - folder_name+"ratings/"+audioholder_name+'-ratings-box.pdf'+\ - r'''} - \caption{Box plot of ratings for audioholder '''+\ - audioholder_name+' ('+str(subject_count[real_page_names.index(audioholder_name)])+\ - ''' participants).} - \label{fig:boxplot'''+audioholder_name.replace(" ", "")+'''} - \end{center} - \end{figure} - - ''' - -# DEMO pie chart of gender distribution among subjects -genders = ['male', 'female', 'other', 'preferNotToSay', 'UNAVAILABLE'] -# TODO: get the above automatically -gender_distribution = '' -for item in genders: - number = gender.count(item) - if number>0: - gender_distribution += str("{:.2f}".format((100.0*number)/len(gender)))+\ - '/'+item.capitalize()+' ('+str(number)+'),\n' - -body += r''' - % Pie chart of gender distribution - \def\angle{0} - \def\radius{3} - \def\cyclelist{{"orange","blue","red","green"}} - \newcount\cyclecount \cyclecount=-1 - \newcount\ind \ind=-1 - \begin{figure}[htbp] - \begin{center}\begin{tikzpicture}[nodes = {font=\sffamily}] - \foreach \percent/\name in {'''+\ - gender_distribution+\ - r'''} {\ifx\percent\empty\else % If \percent is empty, do nothing - \global\advance\cyclecount by 1 % Advance cyclecount - \global\advance\ind by 1 % Advance list index - \ifnum6<\cyclecount % If cyclecount is larger than list - \global\cyclecount=0 % reset cyclecount and - \global\ind=0 % reset list index - \fi - \pgfmathparse{\cyclelist[\the\ind]} % Get color from cycle list - \edef\color{\pgfmathresult} % and store as \color - % Draw angle and set labels - \draw[fill={\color!50},draw={\color}] (0,0) -- (\angle:\radius) - arc (\angle:\angle+\percent*3.6:\radius) -- cycle; - \node at (\angle+0.5*\percent*3.6:0.7*\radius) {\percent\,\%}; - \node[pin=\angle+0.5*\percent*3.6:\name] - at (\angle+0.5*\percent*3.6:\radius) {}; - \pgfmathparse{\angle+\percent*3.6} % Advance angle - \xdef\angle{\pgfmathresult} % and store in \angle - \fi - }; - \end{tikzpicture} - \caption{Representation of gender across subjects} - \label{default} - \end{center} - \end{figure} - - ''' -# problem: some people entered twice? - -#TODO -# time per page in function of number of fragments (plot) -# time per participant in function of number of pages -# plot total time for each participant -# show 'count' per page (in order) - -# clear up page_index <> page_count <> page_number confusion - - -texfile = header+body+footer # add bits together - -print 'pdflatex -output-directory="'+folder_name+'"" "'+ folder_name + 'Report.tex"' # DEBUG - -# write TeX file -with open(folder_name + 'Report.tex','w') as f: - f.write(texfile) -proc=subprocess.Popen(shlex.split('pdflatex -output-directory="'+folder_name+'" "'+ folder_name + 'Report.tex"')) -proc.communicate() -# run again -proc=subprocess.Popen(shlex.split('pdflatex -output-directory="'+folder_name+'" "'+ folder_name + 'Report.tex"')) -proc.communicate() - -#TODO remove auxiliary LaTeX files -try: - os.remove(folder_name + 'Report.aux') - os.remove(folder_name + 'Report.log') - os.remove(folder_name + 'Report.out') - os.remove(folder_name + 'Report.toc') -except OSError: - pass -
--- a/scripts/pythonServer.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,212 +0,0 @@ -#!/usr/bin/python - -# Detect the Python version to switch code between 2.x and 3.x -# http://stackoverflow.com/questions/9079036/detect-python-version-at-runtime -import sys - -from os import walk -from os import path -from os import listdir -import inspect -import os -import pickle -import datetime - -if sys.version_info[0] == 2: - # Version 2.x - import BaseHTTPServer - import urllib2 - import urlparse -elif sys.version_info[0] == 3: - # Version 3.x - from http.server import BaseHTTPRequestHandler, HTTPServer - import urllib as urllib2 - -# Go to right folder. -scriptdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # script directory -os.chdir(scriptdir) # does this work? - -PSEUDO_PATH = '../tests/' -pseudo_files = [] -for filename in listdir(PSEUDO_PATH): - print(filename) - if filename.endswith('.xml'): - pseudo_files.append(filename) - -curSaveIndex = 0; -curFileName = 'test-0.xml' -while(path.isfile('../saves/'+curFileName)): - curSaveIndex += 1; - curFileName = 'test-'+str(curSaveIndex)+'.xml' - -if len(pseudo_files) > 0: - pseudo_index = curSaveIndex % len(pseudo_files) -else: - pseudo_index = 0 - -print('URL: http://localhost:8000/index.html') - -def send404(s): - s.send_response(404) - s.send_header("Content-type", "text/html") - s.end_headers() - -def processFile(s): - if sys.version_info[0] == 2: - s.path = s.path.rsplit('?') - s.path = s.path[0] - s.path = s.path[1:len(s.path)] - st = s.path.rsplit(',') - lenSt = len(st) - fmt = st[lenSt-1].rsplit('.') - fpath = "../"+urllib2.unquote(s.path) - size = path.getsize(fpath) - fileDump = open(fpath) - s.send_response(200) - - if (fmt[1] == 'html'): - s.send_header("Content-type", 'text/html') - elif (fmt[1] == 'css'): - s.send_header("Content-type", 'text/css') - elif (fmt[1] == 'js'): - s.send_header("Content-type", 'application/javascript') - else: - s.send_header("Content-type", 'application/octet-stream') - s.send_header("Content-Length", size) - s.end_headers() - s.wfile.write(fileDump.read()) - fileDump.close() - elif sys.version_info[0] == 3: - s.path = s.path.rsplit('?') - s.path = s.path[0] - s.path = s.path[1:len(s.path)] - st = s.path.rsplit(',') - lenSt = len(st) - fmt = st[lenSt-1].rsplit('.') - fpath = "../"+urllib2.parse.unquote(s.path) - s.send_response(200) - if (fmt[1] == 'html'): - s.send_header("Content-type", 'text/html') - fileDump = open(fpath, encoding='utf-8') - fileBytes = bytes(fileDump.read(), "utf-8") - fileDump.close() - elif (fmt[1] == 'css'): - s.send_header("Content-type", 'text/css') - fileDump = open(fpath, encoding='utf-8') - fileBytes = bytes(fileDump.read(), "utf-8") - fileDump.close() - elif (fmt[1] == 'js'): - s.send_header("Content-type", 'application/javascript') - fileDump = open(fpath, encoding='utf-8') - fileBytes = bytes(fileDump.read(), "utf-8") - fileDump.close() - else: - s.send_header("Content-type", 'application/octet-stream') - fileDump = open(fpath, 'rb') - fileBytes = fileDump.read() - fileDump.close() - s.send_header("Content-Length", len(fileBytes)) - s.end_headers() - s.wfile.write(fileBytes) - -def keygen(s): - reply = "" - options = s.path.rsplit('?') - options = options[1].rsplit('=') - key = options[1] - print("Registered key "+key) - if os.path.isfile("saves/save-"+key+".xml"): - reply = "<response><state>NO</state><key>"+key+"</key></response>" - else: - reply = "<response><state>OK</state><key>"+key+"</key></response>" - s.send_response(200) - s.send_header("Content-type", "application/xml") - s.end_headers() - s.wfile.write(reply) - file = open("../saves/save-"+key+".xml",'w') - file.write("<waetresult key="+key+"/>") - file.close(); - -def saveFile(self): - global curFileName - global curSaveIndex - options = self.path.rsplit('?') - options = options[1].rsplit('=') - key = options[1] - varLen = int(self.headers['Content-Length']) - postVars = self.rfile.read(varLen) - print("Saving file key "+key) - file = open('../saves/save-'+key+'.xml','w') - file.write(postVars) - file.close() - try: - wbytes = os.path.getsize('../saves/save-'+key+'.xml') - except OSError: - self.send_response(200) - self.send_header("Content-type", "text/xml") - self.end_headers() - self.wfile.write('<response state="error"><message>Could not open file</message></response>') - self.send_response(200) - self.send_header("Content-type", "text/xml") - self.end_headers() - self.wfile.write('<response state="OK"><message>OK</message><file bytes="'+str(wbytes)+'">"saves/'+curFileName+'"</file></response>') - curSaveIndex += 1 - curFileName = 'test-'+str(curSaveIndex)+'.xml' - -def http_do_HEAD(s): - s.send_response(200) - s.send_header("Content-type", "text/html") - s.end_headers() - -def http_do_GET(request): - if(request.client_address[0] == "127.0.0.1"): - if (request.path == "/favicon.ico"): - send404(request) - elif (request.path.split('?',1)[0] == "/php/keygen.php"): - keygen(request); - else: - request.path = request.path.split('?',1)[0] - if (request.path == '/'): - request.path = '/index.html' - elif (request.path == '/pseudo.xml'): - request.path = '/'+PSEUDO_PATH + pseudo_files[pseudo_index] - print(request.path) - pseudo_index += 1 - pseudo_index %= len(pseudo_files) - processFile(request) - else: - send404(request) - -def http_do_POST(request): - if(request.client_address[0] == "127.0.0.1"): - if (request.path.rsplit('?',1)[0] == "/save" or request.path.rsplit('?',1)[0] == "/php/save.php"): - saveFile(request) - else: - send404(request) - -if sys.version_info[0] == 2: - class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler): - def do_HEAD(s): - http_do_HEAD(s) - def do_GET(request): - http_do_GET(request) - def do_POST(request): - http_do_POST(request) - def run(server_class=BaseHTTPServer.HTTPServer,handler_class=MyHandler): - server_address = ('', 8000) - httpd = server_class(server_address, handler_class) - httpd.serve_forever() - run() -elif sys.version_info[0] == 3: - class MyHandler(BaseHTTPRequestHandler): - def do_HEAD(s): - send404(s) - def do_GET(request): - http_do_GET(request) - def do_POST(request): - http_do_POST(request) - def run(server_class=HTTPServer,handler_class=MyHandler): - server_address = ('', 8000) - httpd = server_class(server_address, handler_class) - httpd.serve_forever() - run()
--- a/scripts/score_parser.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -#!/usr/bin/python - -import xml.etree.ElementTree as ET -import os -import sys -import csv - -# COMMAND LINE ARGUMENTS - -assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\ - "Use: python score_parser.py [rating_folder_location]" - -# XML results files location -if len(sys.argv) == 1: - folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder - print "Use: python score_parser.py [rating_folder_location]" - print "Using default path: " + folder_name -elif len(sys.argv) == 2: - folder_name = sys.argv[1] # First command line argument is folder - -# check if folder_name exists -if not os.path.exists(folder_name): - #the file is not there - print "Folder '"+folder_name+"' does not exist." - sys.exit() # terminate script execution -elif not os.access(os.path.dirname(folder_name), os.W_OK): - #the file does exist but write privileges are not given - print "No write privileges in folder '"+folder_name+"'." - - -# CODE - -# remember which files have been opened this time -file_history = [] - -# get every XML file in folder -for file_name in os.listdir(folder_name): - if file_name.endswith(".xml"): - tree = ET.parse(folder_name + '/' + file_name) - root = tree.getroot() - - # get subject ID from XML file - subject_id = file_name[:-4] # file name (without extension) as subject ID - - # get list of all pages this subject evaluated - for page in root.findall("./page"): # iterate over pages - page_name = page.get('ref') # get page reference ID - - if page_name is None: # ignore 'empty' audio_holders - print "WARNING: " + file_name + " contains empty audio holder. (score_parser.py)" - break - - if page.get('state') != "complete": - print "WARNING:" + file_name + " contains incomplete page " +page_name+ ". (score_parser.py)" - break; - - file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name - - # create folder 'ratings' if not yet created - if not os.path.exists(folder_name + '/ratings'): - os.makedirs(folder_name + '/ratings') - - # header: fragment IDs in 'alphabetical' order - # go to fragment column, or create new column if it doesn't exist yet - - # get array of audio elements and number of audio elements - audiolist = page.findall("./audioelement") - n_fragments = len(audiolist) - - # get alphabetical array of fragment IDs from this subject's XML - fragmentnamelist = [] # make empty list - for audioelement in audiolist: # iterate over all audioelements - fragmentnamelist.append(audioelement.get('ref')) # add to list - - - # if file exists, get header and add any 'new' fragments not yet in the header - if os.path.isfile(file_name): - with open(file_name, 'r') as readfile: - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() - - # If file hasn't been opened yet this time, remove all rows except header - if file_name not in file_history: - with open(file_name, 'w') as writefile: - filewriter = csv.writer(writefile, delimiter=',') - headerrow = sorted(headerrow) - filewriter.writerow(headerrow) - file_history.append(file_name) - - # Which of the fragments are in fragmentnamelist but not in headerrow? - newfragments = list(set(fragmentnamelist)-set(headerrow)) - newfragments = sorted(newfragments) # new fragments in alphabetical order - # If not empty, read file and rewrite adding extra columns - if newfragments: # if not empty - with open('temp.csv', 'w') as writefile: - filewriter = csv.writer(writefile, delimiter=',') - filewriter.writerow(headerrow + newfragments) # write new header - with open(file_name, 'r') as readfile: - filereader = csv.reader(readfile, delimiter=',') - filereader.next() # skip header - for row in filereader: # rewrite row plus empty cells for every new fragment name - filewriter.writerow(row + ['']*len(newfragments)) - os.rename('temp.csv', file_name) # replace old file with temp file - headerrow = headerrow + newfragments - - - # if file does not exist yet, create file and make header - else: - headerrow = sorted(fragmentnamelist) # sort alphabetically - headerrow.insert(0,'') - fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist - with open(file_name, 'w') as writefile: - filewriter = csv.writer(writefile, delimiter=',') - filewriter.writerow(headerrow) - file_history.append(file_name) - - # open file to write for this page - writefile = open(file_name, 'a') - filewriter = csv.writer(writefile, delimiter=',') - - # prepare row to be written for this subject for this page - ratingrow = [subject_id] - - # get scores related to fragment [id] - for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column) - elementvalue = page.find("./audioelement/[@ref='" - + fragmentname - + "']/value") - if hasattr(elementvalue, 'text'): # if rating for this fragment exists - ratingrow.append(elementvalue.text) # add to rating row - else: # if this subject has not rated this fragment - ratingrow.append('') # append empty cell - - # write row: [subject ID, rating fragment ID 1, ..., rating fragment ID M] - if any(ratingrow[1:]): # append to file if row non-empty (except subject name) - filewriter.writerow(ratingrow)
--- a/scripts/score_plot.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,233 +0,0 @@ -#!/usr/bin/python - -import sys -import os -import csv -import matplotlib.pyplot as plt -import numpy as np -import scipy as sp -import scipy.stats - -# COMMAND LINE ARGUMENTS - -#TODO: Merge, implement this functionality -#TODO: Control by CLI arguments (plot types, save and/or show, ...) - -assert len(sys.argv)<4, "score_plot takes at most 2 command line arguments\n"+\ - "Use: python score_plot.py [ratings_folder_location]."+\ - "Type 'python score_plot.py -h' for more options" - -# initialise plot types (false by default) and options -enable_boxplot = False # show box plot -enable_confidence = False # show confidence interval -confidence = 0.90 # confidence value (for confidence interval plot) -enable_individual = False # show all individual ratings -show_individual = [] # show specific individuals (empty: show all individuals found) -show_legend = False # show names of individuals - -# DEFAULT: Looks in 'saves/ratings/' folder from 'scripts/' folder -rating_folder = "../saves/ratings/" - -# XML results files location -if len(sys.argv) == 1: # no extra arguments - enable_boxplot = True # show box plot - print "Use: python score_plot.py [rating folder] [plot_type] [-l/-legend]" - print "Type 'python score_plot.py -h' for help." - print "Using default path: " + rating_folder + " with boxplot." -else: - for arg in sys.argv: # go over all arguments - if arg == '-h': - # show help - #TODO: replace with contents of helpfile score_plot.info (or similar) - print "Use: python score_plot.py [rating_folder] [plot_type] [-l] [confidence]" - print " rating_folder:" - print " folder where output of 'score_parser' can be found, and" - print " where plots will be stored." - print " By default, '../saves/ratings/' is used." - print "" - print "PLOT TYPES" - print " Can be used in combination." - print " box | boxplot | -b" - print " Enables the boxplot" - print " conf | confidence | -c" - print " Enables the confidence interval plot" - print " ind | individual | -i" - print " Enables plot of individual ratings" - print "" - print "PLOT OPTIONS" - print " leg | legend | -l" - print " For individual plot: show legend with individual file names" - print " numeric value between 0 and 1, e.g. 0.95" - print " For confidence interval plot: confidence value" - assert False, ""# stop immediately after showing help #TODO cleaner way - - # PLOT TYPES - elif arg == 'box' or arg == 'boxplot' or arg == '-b': - enable_boxplot = True # show box plot - elif arg == 'conf' or arg == 'confidence' or arg == '-c': - enable_confidence = True # show confidence interval - #TODO add confidence value input - elif arg == 'ind' or arg == 'individual' or arg == '-i': - enable_individual = True # show all individual ratings - - # PLOT OPTIONS - elif arg == 'leg' or arg == 'legend' or arg == '-l': - if not enable_individual: - print "WARNING: The 'legend' option is only relevant to plots of "+\ - "individual ratings" - show_legend = True # show all individual ratings - elif arg.isdigit(): - if not enable_confidence: - print "WARNING: The numeric confidence value is only relevant when "+\ - "confidence plot is enabled" - if float(arg)>0 and float(arg)<1: - confidence = float(arg) - else: - print "WARNING: The confidence value needs to be between 0 and 1" - - # FOLDER NAME - else: - # assume it's the folder name - rating_folder = arg - -# at least one plot type should be selected: box plot by default -if not enable_boxplot and not enable_confidence and not enable_individual: - enable_boxplot = True - -# check if folder_name exists -if not os.path.exists(rating_folder): - #the file is not there - print "Folder '"+rating_folder+"' does not exist." - sys.exit() # terminate script execution -elif not os.access(os.path.dirname(rating_folder), os.W_OK): - #the file does exist but write rating_folder are not given - print "No write privileges in folder '"+rating_folder+"'." - - -# CONFIGURATION - -# Font settings -font = {'weight' : 'bold', - 'size' : 10} -plt.rc('font', **font) - - -# CODE - -# get every csv file in folder -for file in os.listdir(rating_folder): - if file.endswith(".csv"): - page_name = file[:-4] # file name (without extension) is page ID - - # get header - with open(rating_folder+file, 'rb') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - - # read ratings into matrix -# ratings = np.loadtxt(open(rating_folder+file,"rb"), -# delimiter=",", -# skiprows=1, -# usecols=range(1,len(headerrow)+1) -# ) - ratings = np.genfromtxt(readfile, - delimiter=",", - #skip_header = 1, - converters = {3: lambda s: float(s or 'Nan')}, - usecols=range(1,len(headerrow)+1) - ) - - # assert at least 2 subjects (move on to next file if violated) - if ratings.shape[0]<2: - print "WARNING: Just one subject for " + page_name + ". Moving on to next file." - break - - # BOXPLOT - if enable_boxplot: - plt.boxplot(ratings) - - # CONFIDENCE INTERVAL - if enable_confidence: - iterator = 0 - for column in ratings.T: # iterate over transposed matrix - # remove all 'Nan's from column - column = column[~np.isnan(column)] - - # get number of non-Nan ratings (= #subjects) - n = column.size - - # get mean - mean_rating = np.mean(column) - - # get errors - err = scipy.stats.sem(column)* sp.stats.t._ppf((1+confidence)/2., n-1) - - # draw plot - plt.errorbar(iterator+1, - mean_rating, - yerr=err, - marker="x", - color ="k", - markersize=12, - linestyle='None') - - iterator += 1 # increase counter - - - # INDIVIDUAL PLOT - if enable_individual or show_individual: - # marker list and color map to cycle through - markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"] - colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] - increment = 0 - linehandles = [] - legendnames = [] - with open(rating_folder+file, 'rb') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - for row in filereader: - subject_id = row[0][:-4] # read from beginning of line - # assume plotting all individuals if no individual(s) specified - if not show_individual or subject_id in show_individual: - plothandle, = plt.plot(range(1,len(row)), # x-values - ratings[increment,:],#row[1:], # y-values: csv values except subject name - color=colormap[increment%len(colormap)], - marker=markerlist[increment%len(markerlist)], - markersize=10, - linestyle='None', - label=subject_id - ) - linehandles.append(plothandle) - legendnames.append(subject_id) - if show_legend: - plt.legend(linehandles, legendnames, - loc='upper right', - bbox_to_anchor=(1.1, 1), - borderaxespad=0., - numpoints=1 # remove extra marker - ) - increment += 1 # increase counter - - # TITLE, AXIS LABELS AND LIMITS - plt.title(page_name) - plt.xlabel('Fragment') - plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right) - plt.xticks(range(1, len(headerrow)+1), headerrow, rotation=90) # show fragment names - plt.ylabel('Rating') - plt.ylim(0,1) - - - - # SHOW PLOT - #plt.show() - #exit() - - # SAVE PLOT - # automatically - plot_type = ("-box" if enable_boxplot else "") + \ - ("-conf" if enable_confidence else "") + \ - ("-ind" if enable_individual else "") - plt.savefig(rating_folder+page_name+plot_type+".pdf", bbox_inches='tight') - plt.close()
--- a/scripts/timeline_view.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,163 +0,0 @@ -#!/usr/bin/python - -import xml.etree.ElementTree as ET -import os # list files in directory -import sys # command line arguments -import matplotlib.pyplot as plt # plots -import matplotlib.patches as patches # rectangles - -# COMMAND LINE ARGUMENTS - -assert len(sys.argv)<3, "timeline_view takes at most 1 command line argument\n"+\ - "Use: python timeline_view.py [XML_files_location]" - -# XML results files location -if len(sys.argv) == 1: - folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder - print "Use: python timeline_view.py [XML_files_location]" - print "Using default path: " + folder_name -elif len(sys.argv) == 2: - folder_name = sys.argv[1] # First command line argument is folder - -# check if folder_name exists -if not os.path.exists(folder_name): - #the file is not there - print "Folder '"+folder_name+"' does not exist." - sys.exit() # terminate script execution -elif not os.access(os.path.dirname(folder_name), os.W_OK): - #the file does exist but write privileges are not given - print "No write privileges in folder '"+folder_name+"'." - - -# CONFIGURATION - -# Folder where to store timelines -timeline_folder = folder_name + '/timelines/' # Stores in 'saves/timelines/' - -# Font settings -font = {'weight' : 'bold', - 'size' : 16} -plt.rc('font', **font) - -# Colormap for to cycle through -colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] - -# if enabled, x-axis shows time per audioholder, not total test time -show_audioholder_time = True - -# bar height (<1 to avoid overlapping) -bar_height = 0.6 - -# figure size -fig_width = 25 -fig_height = 5 - - -# CODE - -# create timeline_folder if not yet created -if not os.path.exists(timeline_folder): - os.makedirs(timeline_folder) - -# get every XML file in folder -for file in os.listdir(folder_name): - if file.endswith(".xml"): - tree = ET.parse(folder_name + '/' + file) - root = tree.getroot() - subject_id = file[:-4] # drop '.xml' - - time_offset = 0 # test starts at zero - - # ONE TIMELINE PER PAGE - make new plot per page - - # get list of all page names - for audioholder in root.findall("./page"): # iterate over pages - page_name = audioholder.get('ref') # get page name - plot_empty = True # check if any data is plotted - - if page_name is None: # ignore 'empty' audio_holders - print "WARNING: " + file + " contains empty page. (comment_parser.py)" - break - - if audioholder.get("state") != "complete": - print "WARNING: " + file + "test page " + page_name + " is not complete, skipping." - break; - # SORT AUDIO ELEMENTS ALPHABETICALLY - audioelements = audioholder.findall("./audioelement") - - data = [] - for elem in audioelements: # from http://effbot.org/zone/element-sort.htm - key = elem.get("ref") - data.append((key, elem)) - data.sort() - - N_audioelements = len(audioelements) # number of audio elements for this page - increment = 0 # increased for every new audioelement - audioelements_names = [] # store names of audioelements - - # get axes handle - fig = plt.figure(figsize=(fig_width, fig_height)) - ax = fig.add_subplot(111) #, aspect='equal' - - # for page [page_name], print comments related to fragment [id] - for tuple in data: - audioelement = tuple[1] - if audioelement is not None: # Check it exists - audio_id = str(audioelement.get('ref')) - audioelements_names.append(audio_id) - - # for this audioelement, loop over all listen events - listen_events = audioelement.findall("./metric/metricResult/[@name='elementListenTracker']/event") - for event in listen_events: - # mark this plot as not empty - plot_empty = False - - # get testtime: start and stop - start_time = float(event.find('testtime').get('start'))-time_offset - stop_time = float(event.find('testtime').get('stop'))-time_offset - # event lines: - ax.plot([start_time, start_time], # x-values - [0, N_audioelements+1], # y-values - color='k' - ) - ax.plot([stop_time, stop_time], # x-values - [0, N_audioelements+1], # y-values - color='k' - ) - # plot time: - ax.add_patch( - patches.Rectangle( - (start_time, N_audioelements-increment-bar_height/2), # (x, y) - stop_time - start_time, # width - bar_height, # height - color=colormap[increment%len(colormap)] # colour - ) - ) - - increment+=1 # to next audioelement - - # subtract total audioholder length from subsequent audioholder event times - audioholder_time = audioholder.find("./metric/metricresult/[@id='testTime']") - if audioholder_time is not None and show_audioholder_time: - time_offset = float(audioholder_time.text) - - if not plot_empty: - # set plot parameters - plt.title('Timeline ' + file + ": "+page_name) - plt.xlabel('Time [seconds]') - plt.ylabel('Fragment') - plt.ylim(0, N_audioelements+1) - - #y-ticks: fragment IDs, top to bottom - plt.yticks(range(N_audioelements, 0, -1), audioelements_names) # show fragment names - - - #plt.show() # uncomment to show plot; comment when just saving - #exit() - - plt.savefig(timeline_folder+subject_id+"-"+page_name+".pdf", bbox_inches='tight') - plt.close() - - #TODO: if 'nonsensical' or unknown: dashed line until next event - #TODO: Vertical lines for fragment looping point - \ No newline at end of file
--- a/scripts/timeline_view_movement.py Wed Apr 20 16:02:17 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,311 +0,0 @@ -#!/usr/bin/python - -import xml.etree.ElementTree as ET -import os # list files in directory -import sys # command line arguments -import matplotlib.pyplot as plt # plots -import matplotlib.patches as patches # rectangles - - -# COMMAND LINE ARGUMENTS - -assert len(sys.argv)<3, "timeline_view_movement takes at most 1 command line argument\n"+\ - "Use: python timeline_view_movement.py [XML_files_location]" - -# XML results files location -if len(sys.argv) == 1: - folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder - print "Use: python timeline_view_movement.py [XML_files_location]" - print "Using default path: " + folder_name -elif len(sys.argv) == 2: - folder_name = sys.argv[1] # First command line argument is folder - -# check if folder_name exists -if not os.path.exists(folder_name): - #the file is not there - print "Folder '"+folder_name+"' does not exist." - sys.exit() # terminate script execution -elif not os.access(os.path.dirname(folder_name), os.W_OK): - #the file does exist but write privileges are not given - print "No write privileges in folder '"+folder_name+"'." - - -# CONFIGURATION - -# Folder where to store timelines -timeline_folder = folder_name + '/timelines_movement/' # Stores in 'saves/timelines_movement/' by default - -# Font settings -font = {'weight' : 'bold', - 'size' : 16} -plt.rc('font', **font) - -# Colormap for to cycle through -colormap = ['b', 'g', 'c', 'm', 'y', 'k'] - -# figure size -fig_width = 25 -fig_height = 10 - - -# CODE - -# create timeline_folder if not yet created -if not os.path.exists(timeline_folder): - os.makedirs(timeline_folder) - -# get every XML file in folder -for file in os.listdir(folder_name): - if file.endswith(".xml"): - tree = ET.parse(folder_name + '/' + file) - root = tree.getroot() - subject_id = file[:-4] # drop '.xml' - - previous_page_time = 0 # time spent before current page - time_offset = 0 # test starts at zero - - # ONE TIMELINE PER PAGE - make new plot per page - - # get list of all page names - for page in root.findall("./page"): # iterate over pages - page_name = page.get('ref') # get page name - plot_empty = True # check if any data is plotted - - if page_name is None: # ignore 'empty' audio_holders - print "Skipping empty page name from "+subject_id+"." - break - - # subtract total page length from subsequent page event times - page_time_temp = page.find("./metric/metricresult/[@id='testTime']") - if page_time_temp is not None: - page_time = float(page_time_temp.text) - else: - print "Skipping page without total time specified from "+subject_id+"." - break - - # get audioelements - audioelements = page.findall("./audioelement") - - # sort alphabetically - data = [] - for elem in audioelements: # from http://effbot.org/zone/element-sort.htm - key = elem.get("ref") - data.append((key, elem)) - data.sort() - - N_audioelements = len(audioelements) # number of audio elements for this page - increment = 0 # increased for every new audioelement - - # get axes handle - fig = plt.figure(figsize=(fig_width, fig_height)) - ax = fig.add_subplot(111) - - # for page [page_name], print comments related to fragment [id] - #for tuple in data: - # audioelement = tuple[1] - for tuple in data: - audioelement = tuple[1] - if audioelement is not None: # Check it exists - audio_id = str(audioelement.get('ref')) - - # break if no initial position or move events registered - initial_position_temp = audioelement.find("./metric/metricResult/[@name='elementInitialPosition']") - if initial_position_temp is None: - print "Skipping "+page_name+" from "+subject_id+": does not have initial positions specified." - break - - # get move events, initial and eventual position - initial_position = float(initial_position_temp.text) - move_events = audioelement.findall("./metric/metricResult/[@name='elementTrackerFull']/movement") - final_position = float(audioelement.find("./value").text) - - # get listen events - start_times_global = [] - stop_times_global = [] - listen_events = audioelement.findall("./metric/metricResult/[@name='elementListenTracker']/event") - for event in listen_events: - # get testtime: start and stop - start_times_global.append(float(event.find('testtime').get('start'))-time_offset) - stop_times_global.append(float(event.find('testtime').get('stop'))-time_offset) - - # display fragment name at start - plt.text(0,initial_position+0.02,audio_id,color=colormap[increment%len(colormap)]) #,rotation=45 - - # previous position and time - previous_position = initial_position - previous_time = 0 - - # assume not playing at start - currently_playing = False # keep track of whether fragment is playing during move event - - # draw all segments except final one - for event in move_events: - # mark this plot as not empty - plot_empty = False - - # get time and final position of move event - new_time = float(event.get("time"))-time_offset - new_position = float(event.get("value")) - - # get play/stop events since last move until current move event - stop_times = [] - start_times = [] - # is there a play and/or stop event between previous_time and new_time? - for time in start_times_global: - if time>previous_time and time<new_time: - start_times.append(time) - for time in stop_times_global: - if time>previous_time and time<new_time: - stop_times.append(time) - # if no play/stop events between move events, find out whether playing - - segment_start = previous_time # first segment starts at previous move event - - # draw segments (horizontal line) - while len(start_times)+len(stop_times)>0: # while still play/stop events left - if len(stop_times)<1: # upcoming event is 'play' - # draw non-playing segment from segment_start to 'play' - currently_playing = False - segment_stop = start_times.pop(0) # remove and return first item - elif len(start_times)<1: # upcoming event is 'stop' - # draw playing segment (red) from segment_start to 'stop' - currently_playing = True - segment_stop = stop_times.pop(0) # remove and return first item - elif start_times[0]<stop_times[0]: # upcoming event is 'play' - # draw non-playing segment from segment_start to 'play' - currently_playing = False - segment_stop = start_times.pop(0) # remove and return first item - else: # stop_times[0]<start_times[0]: upcoming event is 'stop' - # draw playing segment (red) from segment_start to 'stop' - currently_playing = True - segment_stop = stop_times.pop(0) # remove and return first item - - # draw segment - plt.plot([segment_start, segment_stop], # x-values - [previous_position, previous_position], # y-values - color='r' if currently_playing else colormap[increment%len(colormap)], - linewidth=3 - ) - segment_start = segment_stop # move on to next segment - currently_playing = not currently_playing # toggle to draw final segment correctly - - # draw final segment (horizontal line) from last 'segment_start' to current move event time - plt.plot([segment_start, new_time], # x-values - [previous_position, previous_position], # y-values - # color depends on playing during move event or not: - color='r' if currently_playing else colormap[increment%len(colormap)], - linewidth=3 - ) - - # vertical line from previous to current position - plt.plot([new_time, new_time], # x-values - [previous_position, new_position], # y-values - # color depends on playing during move event or not: - color='r' if currently_playing else colormap[increment%len(colormap)], - linewidth=3 - ) - - # update previous_position value - previous_position = new_position - previous_time = new_time - - - - # draw final horizontal segment (or only segment if audioelement not moved) - # horizontal line from previous time to end of page - - # get play/stop events since last move until current move event - stop_times = [] - start_times = [] - # is there a play and/or stop event between previous_time and new_time? - for time in start_times_global: - if time>previous_time and time<page_time-time_offset: - start_times.append(time) - for time in stop_times_global: - if time>previous_time and time<page_time-time_offset: - stop_times.append(time) - # if no play/stop events between move events, find out whether playing - - segment_start = previous_time # first segment starts at previous move event - - # draw segments (horizontal line) - while len(start_times)+len(stop_times)>0: # while still play/stop events left - # mark this plot as not empty - plot_empty = False - if len(stop_times)<1: # upcoming event is 'play' - # draw non-playing segment from segment_start to 'play' - currently_playing = False - segment_stop = start_times.pop(0) # remove and return first item - elif len(start_times)<1: # upcoming event is 'stop' - # draw playing segment (red) from segment_start to 'stop' - currently_playing = True - segment_stop = stop_times.pop(0) # remove and return first item - elif start_times[0]<stop_times[0]: # upcoming event is 'play' - # draw non-playing segment from segment_start to 'play' - currently_playing = False - segment_stop = start_times.pop(0) # remove and return first item - else: # stop_times[0]<start_times[0]: upcoming event is 'stop' - # draw playing segment (red) from segment_start to 'stop' - currently_playing = True - segment_stop = stop_times.pop(0) # remove and return first item - - # draw segment - plt.plot([segment_start, segment_stop], # x-values - [previous_position, previous_position], # y-values - color='r' if currently_playing else colormap[increment%len(colormap)], - linewidth=3 - ) - segment_start = segment_stop # move on to next segment - currently_playing = not currently_playing # toggle to draw final segment correctly - - # draw final segment (horizontal line) from last 'segment_start' to current move event time - plt.plot([segment_start, page_time-time_offset], # x-values - [previous_position, previous_position], # y-values - # color depends on playing during move event or not: - color='r' if currently_playing else colormap[increment%len(colormap)], - linewidth=3 - ) - -# plt.plot([previous_time, page_time-time_offset], # x-values -# [previous_position, previous_position], # y-values -# color=colormap[increment%len(colormap)], -# linewidth=3 -# ) - - # display fragment name at end - plt.text(page_time-time_offset,previous_position,\ - audio_id,color=colormap[increment%len(colormap)]) #,rotation=45 - - increment+=1 # to next audioelement - - last_page_duration = page_time-time_offset - time_offset = page_time - - if not plot_empty: # if plot is not empty, show or store - # set plot parameters - plt.title('Timeline ' + file + ": "+page_name) - plt.xlabel('Time [seconds]') - plt.xlim(0, last_page_duration) - plt.ylabel('Rating') # default - plt.ylim(0, 1) # rating between 0 and 1 - - #y-ticks: labels on rating axis - label_positions = [] - label_text = [] - scale_tags = root.findall("./BrowserEvalProjectDocument/audioHolder/interface/scale") - scale_title = root.find("./BrowserEvalProjectDocument/audioHolder/interface/title") - for tag in scale_tags: - label_positions.append(float(tag.get('position'))/100) # on a scale from 0 to 100 - label_text.append(tag.text) - if len(label_positions) > 0: # if any labels available - plt.yticks(label_positions, label_text) # show rating axis labels - # set label Y-axis - if scale_title is not None: - plt.ylabel(scale_title.text) - - #plt.show() # uncomment to show plot; comment when just saving - #exit() - - plt.savefig(timeline_folder+subject_id+"-"+page_name+".pdf", bbox_inches='tight') - plt.close() -