Mercurial > hg > webaudioevaluationtool
changeset 246:83584c6b09b5
Scripts: merge all three plot scripts in to one (box plot, scatter plot, mean plus confidence interval plot); bug fixes
author | Brecht De Man <b.deman@qmul.ac.uk> |
---|---|
date | Mon, 29 Jun 2015 17:19:46 +0100 |
parents | fc056b63e208 |
children | c3f29c2b9b12 |
files | README.txt scripts/score_boxplot.py scripts/score_confidence.py scripts/score_individual.py scripts/score_parser.py scripts/score_plot.py scripts/timeline_view.py |
diffstat | 7 files changed, 221 insertions(+), 264 deletions(-) [+] |
line wrap: on
line diff
--- a/README.txt Mon Jun 29 13:15:00 2015 +0100 +++ b/README.txt Mon Jun 29 17:19:46 2015 +0100 @@ -63,7 +63,30 @@ Please contact the authors if you experience any bugs, if you would like additional functionality, if you have questions about using the interface or if you would like to give any feedback (even positive!) about the interface. We look forward to learning how the tool has (not) been useful to you. +SCRIPTS + +The tool comes with a few handy Python scripts for easy extraction of ratings or comments, and visualisation of ratings and timelines. See below for a quick guide on how to use them. All scripts written for Python 2.7. Visualisation requires the free matplotlib toolbox (http://matplotlib.org), numpy and scipy. +By default, the scripts can be run from the ‘scripts’ folder, with the result files in the ‘saves’ folder (the default location where result XMLs are stored). + + comment_parser.py + Extracts comments from the output XML files corresponding with the different subjects found in ‘saves/’. It creates a folder per ‘audioholder’/page it finds, and stores a CSV file with comments for every ‘audioelement’/fragment within these respective ‘audioholders’/pages. In this CSV file, every line corresponds with a subject/output XML file. Depending on the settings, the first column containing the name of the corresponding XML file can be omitted (for anonymisation). + Beware of Excel: sometimes the UTF-8 is not properly imported, leading to problems with special characters in the comments (particularly cumbersome for foreign languages). + + score_parser.py + Extracts rating values from the XML to CSV - necessary for running visualisation of ratings. Creates the folder ‘saves/ratings/‘ if not yet created, to which it writes a separate file for every ‘audioholder’/page in any of the output XMLs it finds in ‘saves/‘. Within each file, rows represent different subjects (output XML file names) and columns represent different ‘audioelements’/fragments. + + score_plot.py + Plots the ratings as stored in the CSVs created by score_parser.py + Depending on the settings, it displays and/or saves (in ‘saves/ratings/’) a boxplot, confidence interval plot, scatter plot, or a combination of the aforementioned. + Requires the free matplotlib library. + At this point, more than one subjects are needed for this script to work. + + timeline_view.py + Creates a timeline for every subject, for every ‘audioholder’/page, corresponding with any of the output XML files found in ‘/saves’. It shows when and for how long the subject listened to each of the fragments. + + + REFERENCES [1] B. De Man and Joshua D. Reiss, “APE: Audio Perceptual Evaluation toolbox for MATLAB,” 136th Convention of the Audio Engineering Society, 2014. -[2] Nicholas Jillings, Brecht De Man, David Moffat and Joshua D. Reiss, "Web Audio Evaluation Tool: A Browser-Based Listening Test Environment," 12th Sound and Music Computing Conference, July 2015. \ No newline at end of file +[2] Nicholas Jillings, Brecht De Man, David Moffat and Joshua D. Reiss, "Web Audio Evaluation Tool: A Browser-Based Listening Test Environment," 12th Sound and Music Computing Conference, July 2015.
--- a/scripts/score_boxplot.py Mon Jun 29 13:15:00 2015 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -import sys -import os -import csv -import matplotlib.pyplot as plt -import numpy as np - -# Enter folder where rating CSV files are (generated with score_parser.py or same format). -# Add subject names of individual ratings to be marked in 'show_individual'. - -rating_folder = 'ratings/' # folder with rating csv files -show_individual = [] # add name/list of names of individuals to plot - -# get every csv file in folder -for file in os.listdir(rating_folder): # You have to put this in folder where rating csv files are. - if file.endswith(".csv"): - page_name = file[:-4] # file name (without extension) is page ID - - # get header - with open(rating_folder+file, 'r') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - - # read ratings into matrix - ratings = np.loadtxt(open(rating_folder+file,"rb"), - delimiter=",", - skiprows=1, - usecols=range(1,len(headerrow)+1) - ) - - # draw boxplot - plt.boxplot(ratings) - - if not show_individual: - # add rating of individual(s) - with open(rating_folder+file, 'r') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"] - increment = 0 - linehandles = [] - legendnames = [] - for row in filereader: - subject_id = row[0][:-4] - if subject_id in show_individual: - plothandle, = plt.plot(range(1,len(row)), # x-values - row[1:], # y-values: csv values except subject name - color='k', - marker=markerlist[increment%len(markerlist)], - markersize=10, - linestyle='None', - label=subject_id - ) - increment += 1 # increase counter - linehandles.append(plothandle) - legendnames.append(subject_id) - plt.legend(linehandles, legendnames, - loc='upper right', - bbox_to_anchor=(1.1, 1), borderaxespad=0.) - - - plt.xlabel('Fragment') - plt.title('Box plot '+page_name) - plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right) - plt.xticks(range(1, len(headerrow)+1), headerrow) # show fragment names - - plt.ylabel('Rating') - plt.ylim(0,1) - - #plt.show() # show plot - #exit() - - plt.savefig(rating_folder+page_name+"-box.png") - plt.close()
--- a/scripts/score_confidence.py Mon Jun 29 13:15:00 2015 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ -import sys -import os -import csv -import matplotlib.pyplot as plt -import numpy as np -import scipy as sp -import scipy.stats - -# Enter folder where rating CSV files are (generated with score_parser.py or same format). -# Add subject names of individual ratings to be marked in 'show_individual'. -# Choose confidence value. - -rating_folder = 'ratings/' # folder with rating csv files -show_individual = [] # add name/list of names of individuals to plot -confidence = .9 # confidence percentage (usually 80%-99%) - -# get every csv file in folder -for file in os.listdir(rating_folder): # You have to put this in folder where rating csv files are. - if file.endswith(".csv"): - page_name = file[:-4] # file name (without extension) is page ID - - # get header - with open(rating_folder+file, 'r') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - - # read ratings into matrix - ratings = np.loadtxt(open(rating_folder+file,"rb"), - delimiter=",", - skiprows=1, - usecols=range(1,len(headerrow)+1) - ) - - # get number of rows (= subjects) - n = ratings.shape[1] - - # get means - means = np.mean(ratings, axis=0) - - # get errors - err = scipy.stats.sem(ratings)* sp.stats.t._ppf((1+confidence)/2., n-1) - - # draw plot - plt.errorbar(range(1,len(headerrow)+1), - means, - yerr=err, - marker="x", - markersize=10, - linestyle='None') - - if not show_individual: - # add rating of individual(s) - with open(rating_folder+file, 'r') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"] - increment = 0 - linehandles = [] - legendnames = [] - for row in filereader: - subject_id = row[0][:-4] - if subject_id in show_individual: - plothandle, = plt.plot(range(1,len(row)), # x-values - row[1:], # y-values: csv values except subject name - color='k', - marker=markerlist[increment%len(markerlist)], - markersize=10, - linestyle='None', - label=subject_id - ) - increment += 1 # increase counter - linehandles.append(plothandle) - legendnames.append(subject_id) - plt.legend(linehandles, legendnames, - loc='upper right', - bbox_to_anchor=(1.1, 1), borderaxespad=0.) - - - plt.xlabel('Fragment') - plt.title('Confidence interval '+page_name) - plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right) - plt.xticks(range(1, len(headerrow)+1), headerrow) # show fragment names - - plt.ylabel('Rating') - plt.ylim(0,1) - - #plt.show() # show plot - #exit() - - plt.savefig(rating_folder+page_name+"-conf.png") - plt.close()
--- a/scripts/score_individual.py Mon Jun 29 13:15:00 2015 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -import sys -import os -import csv -import matplotlib.pyplot as plt - -rating_folder = 'ratings/' # folder with rating csv files - -colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] # colormap for to cycle through -markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"] - -show_legend = False - -# get every csv file in folder -for file in os.listdir(rating_folder): - if file.endswith(".csv"): - - page_name = file[:-4] # file name (without extension) is page ID - - with open(rating_folder+file, 'r') as readfile: # read this csv file - filereader = csv.reader(readfile, delimiter=',') - headerrow = filereader.next() # use headerrow as X-axis - headerrow = headerrow[1:] - - - increment = 0 - linehandles = [] - legendnames = [] - for row in filereader: - subject_id = row[0][:-4] - plothandle, = plt.plot(range(1,len(row)), # x-values - row[1:], # y-values: csv values except subject name - color=colormap[increment%len(colormap)], - marker=markerlist[increment%len(markerlist)], - markersize=10, - linestyle='None', - label=subject_id - ) - increment += 1 # increase counter - linehandles.append(plothandle) - legendnames.append(subject_id.decode("utf-8")) # avoid decoding problems - - - plt.xlabel('Fragment') - plt.title('Individual ratings '+page_name) - plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right) - plt.xticks(range(1, len(headerrow)+1), headerrow) # show fragment names - - plt.ylabel('Rating') - plt.ylim(0,1) - - if show_legend: - plt.legend(linehandles, legendnames, - loc='upper right', - bbox_to_anchor=(1.1, 1), - borderaxespad=0., - numpoints=1 # remove extra marker - ) - - #TODO Put legend outside of box - - #plt.show() # show plot - #exit() - - plt.savefig(rating_folder+page_name+"-ind.png") - plt.close()
--- a/scripts/score_parser.py Mon Jun 29 13:15:00 2015 +0100 +++ b/scripts/score_parser.py Mon Jun 29 17:19:46 2015 +0100 @@ -1,3 +1,5 @@ +#!/usr/bin/python + import xml.etree.ElementTree as ET import os import csv @@ -12,16 +14,15 @@ if file.endswith(".xml"): tree = ET.parse(folder_name + '/' + file) root = tree.getroot() - #print ["DEBUG Reading " + file + "..."] + #print "DEBUG Reading " + file + "..." # get subject ID from XML file - subject_id = file # file name as subject ID + subject_id = file[:-4] # file name (without extension) as subject ID # get list of all pages this subject evaluated for audioholder in root.findall("./audioholder"): # iterate over pages page_name = audioholder.get('id') # get page name - #print ["DEBUG page " + page_name] - + if page_name is None: # ignore 'empty' audio_holders break @@ -46,28 +47,32 @@ # if file exists, get header and add 'new' fragments if os.path.isfile(file_name): - #print ["DEBUG file " + file_name + " already exists - reading header"] + #print "DEBUG file " + file_name + " already exists - reading header" with open(file_name, 'r') as readfile: filereader = csv.reader(readfile, delimiter=',') headerrow = filereader.next() - #headerrow = headerrow[1:] # remove first column (empty) - # Which of the fragmentes are in fragmentnamelist but not in headerrow? - newfragments = list(set(fragmentnamelist)-set(headerrow)) - newfragments = sorted(newfragments) # new fragments in alphabetical order - # If not empty, read file and rewrite adding extra columns - if newfragments: # if not empty - print ["DEBUG New fragments found: " + str(newfragments)] - with open('temp.csv', 'w') as writefile: - filewriter = csv.writer(writefile, delimiter=',') - filewriter.writerow(headerrow + newfragments) # write new header + # Which of the fragmentes are in fragmentnamelist but not in headerrow? + newfragments = list(set(fragmentnamelist)-set(headerrow)) + newfragments = sorted(newfragments) # new fragments in alphabetical order + # If not empty, read file and rewrite adding extra columns + if newfragments: # if not empty + print ' '+page_name+','+file_name+','+subject_id + #print "DEBUG New fragments found: " + str(newfragments) + with open('temp.csv', 'w') as writefile: + filewriter = csv.writer(writefile, delimiter=',') + filewriter.writerow(headerrow + newfragments) # write new header + #print " "+str(headerrow + newfragments) # DEBUG + with open(file_name, 'r') as readfile: + filereader = csv.reader(readfile, delimiter=',') + filereader.next() # skip header for row in filereader: # rewrite row plus empty cells for every new fragment name - #print ["DEBUG Old row: " + str(row)] + #print " Old row: " + str(row) # DEBUG filewriter.writerow(row + ['']*len(newfragments)) - #print ["DEBUG New row: " + str(row + ['']*len(newfragments))] - os.rename('temp.csv', file_name) # replace old file with temp file - headerrow = headerrow + newfragments - print ["DEBUG New header row: " + str(headerrow)] + #print " New row: " + str(row + ['']*len(newfragments)) # DEBUG + os.rename('temp.csv', file_name) # replace old file with temp file + headerrow = headerrow + newfragments + #print "DEBUG New header row: " + str(headerrow) # if not, create file and make header else:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/score_plot.py Mon Jun 29 17:19:46 2015 +0100 @@ -0,0 +1,150 @@ +#!/usr/bin/python + +import sys +import os +import csv +import matplotlib.pyplot as plt +import numpy as np +import scipy as sp +import scipy.stats + +# CONFIGURATION + +# Which type(s) of plot do you want? +enable_boxplot = False # show box plot +enable_confidence = True # show confidence interval +confidence = 0.90 # confidence value (for confidence interval plot) +enable_individual = True # show all individual ratings +show_individual = [] # show specific individuals +show_legend = False # show names of individuals +#TODO: Merge, implement this functionality +#TODO: Control by CLI arguments (plot types, save and/or show, ...) + +# Enter folder where rating CSV files are (generated with score_parser.py or same format). +rating_folder = '../saves/ratings/' # folder with rating csv files + +# Font settings +font = {'weight' : 'bold', + 'size' : 10} +plt.rc('font', **font) + + +# CODE + +# get every csv file in folder +for file in os.listdir(rating_folder): # You have to put this in folder where rating csv files are. + if file.endswith(".csv"): + page_name = file[:-4] # file name (without extension) is page ID + + # get header + with open(rating_folder+file, 'rb') as readfile: # read this csv file + filereader = csv.reader(readfile, delimiter=',') + headerrow = filereader.next() # use headerrow as X-axis + headerrow = headerrow[1:] + + # read ratings into matrix +# ratings = np.loadtxt(open(rating_folder+file,"rb"), +# delimiter=",", +# skiprows=1, +# usecols=range(1,len(headerrow)+1) +# ) + ratings = np.genfromtxt(readfile, + delimiter=",", + #skip_header = 1, + converters = {3: lambda s: float(s or 'Nan')}, + usecols=range(1,len(headerrow)+1) + ) + + # assert at least 2 subjects (move on to next file if violated) + if ratings.shape[1]<2: + print "WARNING: Just one subject for " + page_name + ". Moving on to next file." + break + + # BOXPLOT + if enable_boxplot: + plt.boxplot(ratings) + + # CONFIDENCE INTERVAL + if enable_confidence: + iterator = 0 + for column in ratings.T: # iterate over transposed matrix + # remove all 'Nan's from column + column = column[~np.isnan(column)] + + # get number of non-Nan ratings (= #subjects) + n = column.size + + # get mean + mean_rating = np.mean(column) + + # get errors + err = scipy.stats.sem(column)* sp.stats.t._ppf((1+confidence)/2., n-1) + + # draw plot + plt.errorbar(iterator+1, + mean_rating, + yerr=err, + marker="x", + color ="k", + markersize=12, + linestyle='None') + + iterator += 1 # increase counter + + + # INDIVIDUAL PLOT + if enable_individual or show_individual: + # marker list and color map to cycle through + markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"] + colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] + increment = 0 + linehandles = [] + legendnames = [] + with open(rating_folder+file, 'rb') as readfile: # read this csv file + filereader = csv.reader(readfile, delimiter=',') + headerrow = filereader.next() # use headerrow as X-axis + headerrow = headerrow[1:] + for row in filereader: + subject_id = row[0][:-4] # read from beginning of line + # assume plotting all individuals if no individual(s) specified + if not show_individual or subject_id in show_individual: + plothandle, = plt.plot(range(1,len(row)), # x-values + ratings[increment,:],#row[1:], # y-values: csv values except subject name + color=colormap[increment%len(colormap)], + marker=markerlist[increment%len(markerlist)], + markersize=10, + linestyle='None', + label=subject_id + ) + linehandles.append(plothandle) + legendnames.append(subject_id) + if show_legend: + plt.legend(linehandles, legendnames, + loc='upper right', + bbox_to_anchor=(1.1, 1), + borderaxespad=0., + numpoints=1 # remove extra marker + ) + increment += 1 # increase counter + + # TITLE, AXIS LABELS AND LIMITS + plt.title(page_name) + plt.xlabel('Fragment') + plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right) + plt.xticks(range(1, len(headerrow)+1), headerrow) # show fragment names + plt.ylabel('Rating') + plt.ylim(0,1) + + + + # SHOW PLOT + #plt.show() + #exit() + + # SAVE PLOT + # automatically + plot_type = ("-box" if enable_boxplot else "") + \ + ("-conf" if enable_confidence else "") + \ + ("-ind" if enable_individual else "") + plt.savefig(rating_folder+page_name+plot_type+".png") + plt.close()
--- a/scripts/timeline_view.py Mon Jun 29 13:15:00 2015 +0100 +++ b/scripts/timeline_view.py Mon Jun 29 17:19:46 2015 +0100 @@ -1,20 +1,36 @@ +#!/usr/bin/python + import xml.etree.ElementTree as ET import os import matplotlib.pyplot as plt -colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] # colormap for to cycle through +# CONFIGURATION -timeline_folder = 'timelines/' # folder where to store timelines, e.g. 'timelines/' +# XML results files location (modify as needed): +folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder +# Folder where to store timelines +timeline_folder = folder_name + '/timelines/' # Stores in 'saves/timelines/' + +# Font settings +font = {'weight' : 'bold', + 'size' : 16} +plt.rc('font', **font) + +# Colormap for to cycle through +colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k'] + + +# CODE # create timeline_folder if not yet created if not os.path.exists(timeline_folder): os.makedirs(timeline_folder) # get every XML file in folder -for file in os.listdir("."): # You have to put this script in folder where output XML files are. +for file in os.listdir(folder_name): # You have to put this script in folder where output XML files are. if file.endswith(".xml"): - tree = ET.parse(file) + tree = ET.parse(folder_name + '/' + file) root = tree.getroot() subject_id = file[:-4] # drop '.xml' @@ -28,7 +44,7 @@ break # SORT AUDIO ELEMENTS ALPHABETICALLY - audioelements = root.findall("*/[@id='"+page_name+"']/audioelement") + audioelements = audioholder.findall("./audioelement") data = [] for elem in audioelements: # from http://effbot.org/zone/element-sort.htm @@ -48,11 +64,7 @@ audioelements_names.append(audio_id) # for this audioelement, loop over all listen events - listen_events = root.findall("*/[@id='" - + page_name - + "']/audioelement/[@id='" - + audio_id - + "']/metric/metricresult/[@name='elementListenTracker']/event") + listen_events = audioelement.findall("./metric/metricresult/[@name='elementListenTracker']/event") for event in listen_events: # get testtime: start and stop start_time = event.find('testtime').get('start')