annotate scripts/score_plot.py @ 1316:279930a008ca

All interfaces support comment boxes. Comment box identification matches presented tag (for instance, AB will be Comment on fragment A, rather than 1). Tighter buffer loading protocol, audioObjects register with the buffer rather than checking for buffer existence (which can be buggy depending on the buffer state). Buffers now have a state to ensure exact location in loading chain (downloading, decoding, LUFS, ready).
author Nicholas Jillings <nickjillings@users.noreply.github.com>
date Fri, 29 Jan 2016 11:11:57 +0000
parents
children 43801b3d6131
rev   line source
nickjillings@1316 1 #!/usr/bin/python
nickjillings@1316 2
nickjillings@1316 3 import sys
nickjillings@1316 4 import os
nickjillings@1316 5 import csv
nickjillings@1316 6 import matplotlib.pyplot as plt
nickjillings@1316 7 import numpy as np
nickjillings@1316 8 import scipy as sp
nickjillings@1316 9 import scipy.stats
nickjillings@1316 10
nickjillings@1316 11 # COMMAND LINE ARGUMENTS
nickjillings@1316 12
nickjillings@1316 13 #TODO: Merge, implement this functionality
nickjillings@1316 14 #TODO: Control by CLI arguments (plot types, save and/or show, ...)
nickjillings@1316 15
nickjillings@1316 16 assert len(sys.argv)<4, "score_plot takes at most 2 command line arguments\n"+\
nickjillings@1316 17 "Use: python score_plot.py [ratings_folder_location]."+\
nickjillings@1316 18 "Type 'python score_plot.py -h' for more options"
nickjillings@1316 19
nickjillings@1316 20 # initialise plot types (false by default) and options
nickjillings@1316 21 enable_boxplot = False # show box plot
nickjillings@1316 22 enable_confidence = False # show confidence interval
nickjillings@1316 23 confidence = 0.90 # confidence value (for confidence interval plot)
nickjillings@1316 24 enable_individual = False # show all individual ratings
nickjillings@1316 25 show_individual = [] # show specific individuals (empty: show all individuals found)
nickjillings@1316 26 show_legend = False # show names of individuals
nickjillings@1316 27
nickjillings@1316 28 # DEFAULT: Looks in 'saves/ratings/' folder from 'scripts/' folder
nickjillings@1316 29 rating_folder = "../saves/ratings/"
nickjillings@1316 30
nickjillings@1316 31 # XML results files location
nickjillings@1316 32 if len(sys.argv) == 1: # no extra arguments
nickjillings@1316 33 enable_boxplot = True # show box plot
nickjillings@1316 34 print "Use: python score_plot.py [rating folder] [plot_type] [-l/-legend]"
nickjillings@1316 35 print "Type 'python score_plot.py -h' for help."
nickjillings@1316 36 print "Using default path: " + rating_folder + " with boxplot."
nickjillings@1316 37 else:
nickjillings@1316 38 for arg in sys.argv: # go over all arguments
nickjillings@1316 39 if arg == '-h':
nickjillings@1316 40 # show help
nickjillings@1316 41 #TODO: replace with contents of helpfile score_plot.info (or similar)
nickjillings@1316 42 print "Use: python score_plot.py [rating_folder] [plot_type] [-l] [confidence]"
nickjillings@1316 43 print " rating_folder:"
nickjillings@1316 44 print " folder where output of 'score_parser' can be found, and"
nickjillings@1316 45 print " where plots will be stored."
nickjillings@1316 46 print " By default, '../saves/ratings/' is used."
nickjillings@1316 47 print ""
nickjillings@1316 48 print "PLOT TYPES"
nickjillings@1316 49 print " Can be used in combination."
nickjillings@1316 50 print " box | boxplot | -b"
nickjillings@1316 51 print " Enables the boxplot"
nickjillings@1316 52 print " conf | confidence | -c"
nickjillings@1316 53 print " Enables the confidence interval plot"
nickjillings@1316 54 print " ind | individual | -i"
nickjillings@1316 55 print " Enables plot of individual ratings"
nickjillings@1316 56 print ""
nickjillings@1316 57 print "PLOT OPTIONS"
nickjillings@1316 58 print " leg | legend | -l"
nickjillings@1316 59 print " For individual plot: show legend with individual file names"
nickjillings@1316 60 print " numeric value between 0 and 1, e.g. 0.95"
nickjillings@1316 61 print " For confidence interval plot: confidence value"
nickjillings@1316 62 assert False, ""# stop immediately after showing help #TODO cleaner way
nickjillings@1316 63
nickjillings@1316 64 # PLOT TYPES
nickjillings@1316 65 elif arg == 'box' or arg == 'boxplot' or arg == '-b':
nickjillings@1316 66 enable_boxplot = True # show box plot
nickjillings@1316 67 elif arg == 'conf' or arg == 'confidence' or arg == '-c':
nickjillings@1316 68 enable_confidence = True # show confidence interval
nickjillings@1316 69 #TODO add confidence value input
nickjillings@1316 70 elif arg == 'ind' or arg == 'individual' or arg == '-i':
nickjillings@1316 71 enable_individual = True # show all individual ratings
nickjillings@1316 72
nickjillings@1316 73 # PLOT OPTIONS
nickjillings@1316 74 elif arg == 'leg' or arg == 'legend' or arg == '-l':
nickjillings@1316 75 if not enable_individual:
nickjillings@1316 76 print "WARNING: The 'legend' option is only relevant to plots of "+\
nickjillings@1316 77 "individual ratings"
nickjillings@1316 78 show_legend = True # show all individual ratings
nickjillings@1316 79 elif arg.isdigit():
nickjillings@1316 80 if not enable_confidence:
nickjillings@1316 81 print "WARNING: The numeric confidence value is only relevant when "+\
nickjillings@1316 82 "confidence plot is enabled"
nickjillings@1316 83 if float(arg)>0 and float(arg)<1:
nickjillings@1316 84 confidence = float(arg)
nickjillings@1316 85 else:
nickjillings@1316 86 print "WARNING: The confidence value needs to be between 0 and 1"
nickjillings@1316 87
nickjillings@1316 88 # FOLDER NAME
nickjillings@1316 89 else:
nickjillings@1316 90 # assume it's the folder name
nickjillings@1316 91 rating_folder = arg
nickjillings@1316 92
nickjillings@1316 93 # at least one plot type should be selected: box plot by default
nickjillings@1316 94 if not enable_boxplot and not enable_confidence and not enable_individual:
nickjillings@1316 95 enable_boxplot = True
nickjillings@1316 96
nickjillings@1316 97 # check if folder_name exists
nickjillings@1316 98 if not os.path.exists(rating_folder):
nickjillings@1316 99 #the file is not there
nickjillings@1316 100 print "Folder '"+rating_folder+"' does not exist."
nickjillings@1316 101 sys.exit() # terminate script execution
nickjillings@1316 102 elif not os.access(os.path.dirname(rating_folder), os.W_OK):
nickjillings@1316 103 #the file does exist but write rating_folder are not given
nickjillings@1316 104 print "No write privileges in folder '"+rating_folder+"'."
nickjillings@1316 105
nickjillings@1316 106
nickjillings@1316 107 # CONFIGURATION
nickjillings@1316 108
nickjillings@1316 109 # Font settings
nickjillings@1316 110 font = {'weight' : 'bold',
nickjillings@1316 111 'size' : 10}
nickjillings@1316 112 plt.rc('font', **font)
nickjillings@1316 113
nickjillings@1316 114
nickjillings@1316 115 # CODE
nickjillings@1316 116
nickjillings@1316 117 # get every csv file in folder
nickjillings@1316 118 for file in os.listdir(rating_folder):
nickjillings@1316 119 if file.endswith(".csv"):
nickjillings@1316 120 page_name = file[:-4] # file name (without extension) is page ID
nickjillings@1316 121
nickjillings@1316 122 # get header
nickjillings@1316 123 with open(rating_folder+file, 'rb') as readfile: # read this csv file
nickjillings@1316 124 filereader = csv.reader(readfile, delimiter=',')
nickjillings@1316 125 headerrow = filereader.next() # use headerrow as X-axis
nickjillings@1316 126 headerrow = headerrow[1:]
nickjillings@1316 127
nickjillings@1316 128 # read ratings into matrix
nickjillings@1316 129 # ratings = np.loadtxt(open(rating_folder+file,"rb"),
nickjillings@1316 130 # delimiter=",",
nickjillings@1316 131 # skiprows=1,
nickjillings@1316 132 # usecols=range(1,len(headerrow)+1)
nickjillings@1316 133 # )
nickjillings@1316 134 ratings = np.genfromtxt(readfile,
nickjillings@1316 135 delimiter=",",
nickjillings@1316 136 #skip_header = 1,
nickjillings@1316 137 converters = {3: lambda s: float(s or 'Nan')},
nickjillings@1316 138 usecols=range(1,len(headerrow)+1)
nickjillings@1316 139 )
nickjillings@1316 140
nickjillings@1316 141 # assert at least 2 subjects (move on to next file if violated)
nickjillings@1316 142 if ratings.shape[0]<2:
nickjillings@1316 143 print "WARNING: Just one subject for " + page_name + ". Moving on to next file."
nickjillings@1316 144 break
nickjillings@1316 145
nickjillings@1316 146 # BOXPLOT
nickjillings@1316 147 if enable_boxplot:
nickjillings@1316 148 plt.boxplot(ratings)
nickjillings@1316 149
nickjillings@1316 150 # CONFIDENCE INTERVAL
nickjillings@1316 151 if enable_confidence:
nickjillings@1316 152 iterator = 0
nickjillings@1316 153 for column in ratings.T: # iterate over transposed matrix
nickjillings@1316 154 # remove all 'Nan's from column
nickjillings@1316 155 column = column[~np.isnan(column)]
nickjillings@1316 156
nickjillings@1316 157 # get number of non-Nan ratings (= #subjects)
nickjillings@1316 158 n = column.size
nickjillings@1316 159
nickjillings@1316 160 # get mean
nickjillings@1316 161 mean_rating = np.mean(column)
nickjillings@1316 162
nickjillings@1316 163 # get errors
nickjillings@1316 164 err = scipy.stats.sem(column)* sp.stats.t._ppf((1+confidence)/2., n-1)
nickjillings@1316 165
nickjillings@1316 166 # draw plot
nickjillings@1316 167 plt.errorbar(iterator+1,
nickjillings@1316 168 mean_rating,
nickjillings@1316 169 yerr=err,
nickjillings@1316 170 marker="x",
nickjillings@1316 171 color ="k",
nickjillings@1316 172 markersize=12,
nickjillings@1316 173 linestyle='None')
nickjillings@1316 174
nickjillings@1316 175 iterator += 1 # increase counter
nickjillings@1316 176
nickjillings@1316 177
nickjillings@1316 178 # INDIVIDUAL PLOT
nickjillings@1316 179 if enable_individual or show_individual:
nickjillings@1316 180 # marker list and color map to cycle through
nickjillings@1316 181 markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"]
nickjillings@1316 182 colormap = ['b', 'r', 'g', 'c', 'm', 'y', 'k']
nickjillings@1316 183 increment = 0
nickjillings@1316 184 linehandles = []
nickjillings@1316 185 legendnames = []
nickjillings@1316 186 with open(rating_folder+file, 'rb') as readfile: # read this csv file
nickjillings@1316 187 filereader = csv.reader(readfile, delimiter=',')
nickjillings@1316 188 headerrow = filereader.next() # use headerrow as X-axis
nickjillings@1316 189 headerrow = headerrow[1:]
nickjillings@1316 190 for row in filereader:
nickjillings@1316 191 subject_id = row[0][:-4] # read from beginning of line
nickjillings@1316 192 # assume plotting all individuals if no individual(s) specified
nickjillings@1316 193 if not show_individual or subject_id in show_individual:
nickjillings@1316 194 plothandle, = plt.plot(range(1,len(row)), # x-values
nickjillings@1316 195 ratings[increment,:],#row[1:], # y-values: csv values except subject name
nickjillings@1316 196 color=colormap[increment%len(colormap)],
nickjillings@1316 197 marker=markerlist[increment%len(markerlist)],
nickjillings@1316 198 markersize=10,
nickjillings@1316 199 linestyle='None',
nickjillings@1316 200 label=subject_id
nickjillings@1316 201 )
nickjillings@1316 202 linehandles.append(plothandle)
nickjillings@1316 203 legendnames.append(subject_id)
nickjillings@1316 204 if show_legend:
nickjillings@1316 205 plt.legend(linehandles, legendnames,
nickjillings@1316 206 loc='upper right',
nickjillings@1316 207 bbox_to_anchor=(1.1, 1),
nickjillings@1316 208 borderaxespad=0.,
nickjillings@1316 209 numpoints=1 # remove extra marker
nickjillings@1316 210 )
nickjillings@1316 211 increment += 1 # increase counter
nickjillings@1316 212
nickjillings@1316 213 # TITLE, AXIS LABELS AND LIMITS
nickjillings@1316 214 plt.title(page_name)
nickjillings@1316 215 plt.xlabel('Fragment')
nickjillings@1316 216 plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right)
nickjillings@1316 217 plt.xticks(range(1, len(headerrow)+1), headerrow, rotation=90) # show fragment names
nickjillings@1316 218 plt.ylabel('Rating')
nickjillings@1316 219 plt.ylim(0,1)
nickjillings@1316 220
nickjillings@1316 221
nickjillings@1316 222
nickjillings@1316 223 # SHOW PLOT
nickjillings@1316 224 #plt.show()
nickjillings@1316 225 #exit()
nickjillings@1316 226
nickjillings@1316 227 # SAVE PLOT
nickjillings@1316 228 # automatically
nickjillings@1316 229 plot_type = ("-box" if enable_boxplot else "") + \
nickjillings@1316 230 ("-conf" if enable_confidence else "") + \
nickjillings@1316 231 ("-ind" if enable_individual else "")
nickjillings@1316 232 plt.savefig(rating_folder+page_name+plot_type+".pdf", bbox_inches='tight')
nickjillings@1316 233 plt.close()