Mercurial > hg > webaudioevaluationtool

--- a/README.txt	Wed Jul 01 11:11:20 2015 +0100
+++ b/README.txt	Wed Jul 01 11:11:54 2015 +0100
@@ -72,6 +72,9 @@
 		Extracts comments from the output XML files corresponding with the different subjects found in ‘saves/’. It creates a folder per ‘audioholder’/page it finds, and stores a CSV file with comments for every ‘audioelement’/fragment within these respective ‘audioholders’/pages. In this CSV file, every line corresponds with a subject/output XML file. Depending on the settings, the first column containing the name of the corresponding XML file can be omitted (for anonymisation).
 		Beware of Excel: sometimes the UTF-8 is not properly imported, leading to problems with special characters in the comments (particularly cumbersome for foreign languages).

+	evaluation_stats.py
+		Shows a few statistics of tests in the ‘saves/‘ folder so far, mainly for checking for errors. Shows the number of files that are there, the audioholder IDs that were tested (and how many of each separate ID), the duration of each page, the duration of each complete test, the average duration per page, and the average duration in function of the page number.
+
 	score_parser.py
 		Extracts rating values from the XML to CSV - necessary for running visualisation of ratings. Creates the folder ‘saves/ratings/‘ if not yet created, to which it writes a separate file for every ‘audioholder’/page in any of the output XMLs it finds in ‘saves/‘. Within each file, rows represent different subjects (output XML file names) and columns represent different ‘audioelements’/fragments.
--- a/scripts/comment_parser.py	Wed Jul 01 11:11:20 2015 +0100
+++ b/scripts/comment_parser.py	Wed Jul 01 11:11:54 2015 +0100
@@ -13,12 +13,13 @@
     if file.endswith(".xml"):
         tree = ET.parse(folder_name + '/' + file)
         root = tree.getroot()
-
+
         # get list of all page names
         for audioholder in root.findall("./audioholder"):   # iterate over pages
             page_name = audioholder.get('id')               # get page name

             if page_name is None: # ignore 'empty' audio_holders
+                print "WARNING: " + file + " contains empty audio holder. (comment_parser.py)"
                 break

             # create folder [page_name] if not yet created
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/evaluation_stats.py	Wed Jul 01 11:11:54 2015 +0100
@@ -0,0 +1,105 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import xml.etree.ElementTree as ET
+import os
+
+# XML results files location (modify as needed):
+folder_name = "../saves"    # Looks in 'saves/' folder from 'scripts/' folder
+
+# Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
+def seconds2timestr(time_in_seconds):
+    time_in_minutes = int(time_in_seconds/60)
+    remaining_seconds = int(time_in_seconds%60)
+    return str(time_in_minutes) + " min " + str(remaining_seconds) + " s"
+
+# stats initialisation
+number_of_XML_files = 0
+number_of_pages = 0
+time_per_page_accum = 0
+
+# arrays initialisation
+page_names = []
+page_count = []
+duration_page = []      # duration of experiment in function of page
+duration_subject = []
+duration_order = []
+
+# get every XML file in folder
+files_list = os.listdir(folder_name)
+for file in files_list: # iterate over all files in files_list
+    if file.endswith(".xml"): # check if XML file
+        number_of_XML_files += 1
+        tree = ET.parse(folder_name + '/' + file)
+        root = tree.getroot()
+
+        print file # print file name (subject name)
+
+        # reset for new subject
+        total_duration = 0
+        page_number = 0
+
+        # get list of all page names
+        for audioholder in root.findall("./audioholder"):   # iterate over pages
+            page_name = audioholder.get('id')               # get page name
+
+            if page_name is None: # ignore 'empty' audio_holders
+                print "WARNING: " + file + " contains empty audio holder. (evaluation_stats.py)"
+                break # move on to next
+
+            # keep list of audioholder ids and count how many times each audioholder id was tested
+            if page_name in page_names:
+                page_index = page_names.index(page_name) # get index
+                page_count[page_index] += 1
+            else:
+                page_names.append(page_name)
+                page_count.append(1)
+
+            # 'testTime' keeps total duration: subtract time so far for duration of this audioholder
+            duration = float(audioholder.find("./metric/metricresult[@id='testTime']").text) - total_duration
+
+            # total duration of test
+            total_duration += duration
+
+            # print audioholder id and duration
+            print "    " + page_name + ": " + seconds2timestr(duration)
+
+            # keep track of duration in function of page index
+            if len(duration_order)>page_number:
+                duration_order[page_number].append(duration)
+            else:
+                duration_order.append([duration])
+
+            page_number += 1 # increase page count for this specific test
+            number_of_pages += 1 # increase total number of pages
+            time_per_page_accum += duration # total duration (for average time spent per page)
+
+        # print total duration of this test
+        print "    TOTAL: " + seconds2timestr(total_duration)
+
+# PRINT EVERYTHING
+
+print "Number of XML files: " + str(number_of_XML_files)
+print "Number of pages: " + str(number_of_pages)
+print "Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages)
+page_count_strings = list(str(x) for x in page_count)
+count_list = page_names + page_count_strings
+count_list[::2] = page_names
+count_list[1::2] = page_count_strings
+print "Pages tested: " + str(count_list)
+
+# Average duration for first, second, ... page
+for page_number in range(len(duration_order)): #TODO make maximum page number automatic
+    print "Average duration page " + str(page_number+1) + ": " +\
+          seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\
+          " ("+str(len(duration_order[page_number]))+" subjects)"
+
+
+#TODO
+# time per page in function of number of fragments (plot)
+# time per participant in function of number of pages
+# plot total time for each participant
+# plot total time
+# show 'count' per page (in order)
+
+# clear up page_index <> page_count <> page_number confusion
--- a/scripts/score_parser.py	Wed Jul 01 11:11:20 2015 +0100
+++ b/scripts/score_parser.py	Wed Jul 01 11:11:54 2015 +0100
@@ -24,6 +24,7 @@
             page_name = audioholder.get('id') # get page name

             if page_name is None: # ignore 'empty' audio_holders
+                print "WARNING: " + file + " contains empty audio holder. (score_parser.py)"
                 break

             file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name
--- a/scripts/score_plot.py	Wed Jul 01 11:11:20 2015 +0100
+++ b/scripts/score_plot.py	Wed Jul 01 11:11:54 2015 +0100
@@ -11,10 +11,10 @@
 # CONFIGURATION

 # Which type(s) of plot do you want?
-enable_boxplot    = False     # show box plot
-enable_confidence = True      # show confidence interval
+enable_boxplot    = True      # show box plot
+enable_confidence = False     # show confidence interval
 confidence        = 0.90      # confidence value (for confidence interval plot)
-enable_individual = True      # show all individual ratings
+enable_individual = False     # show all individual ratings
 show_individual   = []        # show specific individuals
 show_legend       = False     # show names of individuals
 #TODO: Merge, implement this functionality
@@ -56,7 +56,7 @@
                                    )

         # assert at least 2 subjects (move on to next file if violated)
-        if ratings.shape[1]<2:
+        if ratings.shape[0]<2:
             print "WARNING: Just one subject for " + page_name + ". Moving on to next file."
             break