changeset 241:a4b5246ffb32

Scripts: added 'confidence interval plot' (rigorous checking needed) and minor fixes to other scripts.
author Brecht De Man <b.deman@qmul.ac.uk>
date Thu, 25 Jun 2015 17:14:33 +0100
parents 16737f53817c
children e08f2b155d8b
files .hgignore.orig scripts/comment_parser.py scripts/score_boxplot.py scripts/score_confidence.py
diffstat 4 files changed, 157 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore.orig	Thu Jun 25 17:14:33 2015 +0100
@@ -0,0 +1,28 @@
+syntax: glob
+.project/**
+.project
+docs/SMC15/smc2015.log
+docs/SMC15/smc2015template.aux
+docs/SMC15/smc2015template.blg
+docs/SMC15/smc2015template.log
+docs/SMC15/smc2015template.out
+docs/SMC15/smc2015template.pdf
+docs/SMC15/smc2015template.synctex.gz
+re:^docs/ExperimentVariableControl\.aux$
+re:^docs/ExperimentVariableControl\.log$
+re:^docs/ExperimentVariableControl\.synctex\.gz$
+*.aux
+*.bbl
+*.gz
+*.log
+*.blg
+*.DS_Store
+re:^RealismEval/
+re:^SynthReal/
+re:^SynthRealismTest\.html$
+re:^example_eval/paperExample\.xml$
+re:^example_eval/project\.xml\.orig$
+re:^index2\.html$
+re:^realismTest\.html$
+saves/*.xml
+saves/timelines
\ No newline at end of file
--- a/scripts/comment_parser.py	Thu Jun 25 16:40:39 2015 +0100
+++ b/scripts/comment_parser.py	Thu Jun 25 17:14:33 2015 +0100
@@ -1,4 +1,5 @@
 #!/usr/bin/python
+# -*- coding: utf-8 -*-
 
 import xml.etree.ElementTree as ET
 import os
@@ -22,7 +23,7 @@
                 os.makedirs(page_name)
 
             # for page [page_name], print comments related to fragment [id]
-            for audioelement in root.findall("*/[@id='"+page_name+"']/audioelement"):
+            for audioelement in root.findall("*/[@id='"+page_name+"']/audioelement"): #TODO in audioholder.findall(...)
                 if audioelement is not None: # Check it exists
                     audio_id = str(audioelement.get('id'))
                     
@@ -44,9 +45,9 @@
                             writer.writerow([''])
                         else:
                         	# anonymous comments:
-                            writer.writerow([commentstr]) 
+                            writer.writerow([commentstr.encode("utf-8")]) 
                             # comments with (file) name:
-                            #writer.writerow([file[:-4]] + [commentstr]) 
+                            #writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) 
 
                         #TODO Replace 'new line' in comment with something else?
                         
--- a/scripts/score_boxplot.py	Thu Jun 25 16:40:39 2015 +0100
+++ b/scripts/score_boxplot.py	Thu Jun 25 17:14:33 2015 +0100
@@ -4,8 +4,11 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
+# Enter folder where rating CSV files are (generated with score_parser.py or same format).
+# Add subject names of individual ratings to be marked in 'show_individual'. 
+
 rating_folder = 'ratings/' # folder with rating csv files
-show_individual = 'frank'
+show_individual = [] # add name/list of names of individuals to plot
 
 # get every csv file in folder
 for file in os.listdir(rating_folder): # You have to put this in folder where rating csv files are.
@@ -27,33 +30,34 @@
 
         # draw boxplot
         plt.boxplot(ratings)
-
-        # add rating of individual(s)
-        with open(rating_folder+file, 'r') as readfile: # read this csv file
-            filereader = csv.reader(readfile, delimiter=',')
-            headerrow = filereader.next() # use headerrow as X-axis
-            headerrow = headerrow[1:]
-            markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"]
-            increment = 0
-            linehandles = []
-            legendnames = []
-            for row in filereader:
-                subject_id = row[0][:-4]
-                if subject_id in show_individual:
-                    plothandle, = plt.plot(range(1,len(row)), # x-values
-                             row[1:], # y-values: csv values except subject name
-                             color='k',
-                             marker=markerlist[increment%len(markerlist)],
-                             markersize=10,
-                             linestyle='None',
-                             label=subject_id
-                            )
-                    increment += 1 # increase counter
-                    linehandles.append(plothandle)
-                    legendnames.append(subject_id)
-                    plt.legend(linehandles, legendnames,
-                           loc='upper right',
-                           bbox_to_anchor=(1.1, 1), borderaxespad=0.)
+		
+		if not show_individual:
+			# add rating of individual(s)
+			with open(rating_folder+file, 'r') as readfile: # read this csv file
+				filereader = csv.reader(readfile, delimiter=',')
+				headerrow = filereader.next() # use headerrow as X-axis
+				headerrow = headerrow[1:]
+				markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"]
+				increment = 0
+				linehandles = []
+				legendnames = []
+				for row in filereader:
+					subject_id = row[0][:-4]
+					if subject_id in show_individual:
+						plothandle, = plt.plot(range(1,len(row)), # x-values
+								 row[1:], # y-values: csv values except subject name
+								 color='k',
+								 marker=markerlist[increment%len(markerlist)],
+								 markersize=10,
+								 linestyle='None',
+								 label=subject_id
+								)
+						increment += 1 # increase counter
+						linehandles.append(plothandle)
+						legendnames.append(subject_id)
+						plt.legend(linehandles, legendnames,
+							   loc='upper right',
+							   bbox_to_anchor=(1.1, 1), borderaxespad=0.)
 
 
         plt.xlabel('Fragment')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/score_confidence.py	Thu Jun 25 17:14:33 2015 +0100
@@ -0,0 +1,93 @@
+import sys
+import os
+import csv
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy as sp
+import scipy.stats
+
+# Enter folder where rating CSV files are (generated with score_parser.py or same format).
+# Add subject names of individual ratings to be marked in 'show_individual'. 
+# Choose confidence value. 
+
+rating_folder = 'ratings/'    # folder with rating csv files
+show_individual = []          # add name/list of names of individuals to plot
+confidence = .9               # confidence percentage (usually 80%-99%)
+
+# get every csv file in folder
+for file in os.listdir(rating_folder): # You have to put this in folder where rating csv files are.
+    if file.endswith(".csv"):
+        page_name = file[:-4] # file name (without extension) is page ID
+
+        # get header
+        with open(rating_folder+file, 'r') as readfile: # read this csv file
+            filereader = csv.reader(readfile, delimiter=',')
+            headerrow = filereader.next() # use headerrow as X-axis
+            headerrow = headerrow[1:]
+
+        # read ratings into matrix
+        ratings = np.loadtxt(open(rating_folder+file,"rb"),
+                            delimiter=",",
+                            skiprows=1,
+                            usecols=range(1,len(headerrow)+1)
+                            )
+        
+        # get number of rows (= subjects)
+        n = ratings.shape[1]
+        
+        # get means
+        means = np.mean(ratings, axis=0)
+        
+        # get errors
+        err = scipy.stats.sem(ratings)* sp.stats.t._ppf((1+confidence)/2., n-1)
+        
+        # draw plot
+        plt.errorbar(range(1,len(headerrow)+1), 
+                    means, 
+                    yerr=err,
+                    marker="x",
+                    markersize=10,
+                    linestyle='None')
+        
+        if not show_individual:
+            # add rating of individual(s)
+            with open(rating_folder+file, 'r') as readfile: # read this csv file
+                filereader = csv.reader(readfile, delimiter=',')
+                headerrow = filereader.next() # use headerrow as X-axis
+                headerrow = headerrow[1:]
+                markerlist = ["x", ".", "o", "*", "+", "v", ">", "<", "8", "s", "p"]
+                increment = 0
+                linehandles = []
+                legendnames = []
+                for row in filereader:
+                    subject_id = row[0][:-4]
+                    if subject_id in show_individual:
+                        plothandle, = plt.plot(range(1,len(row)), # x-values
+                                 row[1:], # y-values: csv values except subject name
+                                 color='k',
+                                 marker=markerlist[increment%len(markerlist)],
+                                 markersize=10,
+                                 linestyle='None',
+                                 label=subject_id
+                                )
+                        increment += 1 # increase counter
+                        linehandles.append(plothandle)
+                        legendnames.append(subject_id)
+                        plt.legend(linehandles, legendnames,
+                               loc='upper right',
+                               bbox_to_anchor=(1.1, 1), borderaxespad=0.)
+
+
+        plt.xlabel('Fragment')
+        plt.title('Confidence interval '+page_name)
+        plt.xlim(0, len(headerrow)+1) # only show relevant region, leave space left & right)
+        plt.xticks(range(1, len(headerrow)+1), headerrow) # show fragment names
+
+        plt.ylabel('Rating')
+        plt.ylim(0,1)
+
+        #plt.show() # show plot
+        #exit()
+
+        plt.savefig(rating_folder+page_name+"-conf.png")
+        plt.close()