view python/evaluation_stats.py @ 2376:c41caaa96633

Some fixes for #90. Also a failsafe loop if the server never responds with meaningul information from saves (for instance, running only on apache or basic http servers). More changes to pythonServer for python 3.5. Please check if still valid on 2.7
author Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk>
date Thu, 19 May 2016 10:44:19 +0100
parents df459c20946e
children 370a82784c71
line wrap: on
line source
#!/usr/bin/python
# -*- coding: utf-8 -*-

import xml.etree.ElementTree as ET
import os       # for getting files from directory
import operator # for sorting data with multiple keys
import sys      # for accessing command line arguments

# Command line arguments
assert len(sys.argv)<3, "evaluation_stats takes at most 1 command line argument\n"+\
                        "Use: python evaluation_stats.py [results_folder]"

# XML results files location
if len(sys.argv) == 1:
    folder_name = "../saves"    # Looks in 'saves/' folder from 'scripts/' folder
    print("Use: python evaluation_stats.py [results_folder]")
    print("Using default path: " + folder_name)
elif len(sys.argv) == 2:
    folder_name = sys.argv[1]   # First command line argument is folder

# Turn number of seconds (int) to '[minutes] min [seconds] s' (string)
def seconds2timestr(time_in_seconds):
    time_in_minutes = int(time_in_seconds/60)
    remaining_seconds = int(time_in_seconds%60)
    return str(time_in_minutes) + " min " + str(remaining_seconds) + " s"

# stats initialisation
number_of_XML_files  = 0
number_of_pages      = 0
number_of_fragments  = 0
total_empty_comments = 0
total_not_played     = 0
total_not_moved      = 0
time_per_page_accum  = 0

# arrays initialisation
page_names       = []
page_count       = []
duration_page    = []      # duration of experiment in function of page content
duration_order   = []      # duration of experiment in function of page number
fragments_per_page = []    # number of fragments for corresponding page

# get every XML file in folder
files_list = os.listdir(folder_name)
for file in files_list: # iterate over all files in files_list
    if file.endswith(".xml"): # check if XML file
        number_of_XML_files += 1
        tree = ET.parse(folder_name + '/' + file)
        root = tree.getroot()
        
        print(file) # print file name (subject name)
        
        # reset for new subject
        total_duration = 0
        page_number = 0
        
        # get list of all page names
        for page in root.findall("./page"):   # iterate over pages
            page_name = page.get('ref')               # get page name
            
            if page_name is None: # ignore 'empty' audio_holders
                print("\tWARNING: " + file + " contains empty audio holder. (evaluation_stats.py)")
                break # move on to next
            if page.get("state") != "complete":
                print("\tWARNING: " + file + " contains incomplete audio holder.")
                break
            number_of_comments = 0 # for this page
            number_of_missing_comments = 0 # for this page
            not_played = 0 # for this page
            not_moved = 0 # for this page
            
            # 'testTime' keeps total duration: subtract time so far for duration of this page
            duration = float(page.find("./metric/metricresult[@id='testTime']").text) - total_duration
            
            # total duration of test
            total_duration += duration
            
            # number of audio elements
            audioelements = page.findall("./audioelement") # get audioelements
            number_of_fragments += len(audioelements) # add length of this list to total
            
            # number of comments (interesting if comments not mandatory)
            for audioelement in audioelements:
                if audioelement.get("type") != "outside-reference":
                    response = audioelement.find("./comment/response")
                    was_played = audioelement.find("./metric/metricresult/[@name='elementFlagListenedTo']")
                    was_moved = audioelement.find("./metric/metricresult/[@name='elementFlagMoved']")
                    if response.text is not None and len(response.text) > 1: 
                        number_of_comments += 1
                    else: 
                        number_of_missing_comments += 1
                    if was_played is not None and was_played.text == 'false': 
                        not_played += 1
                    if was_moved is not None and was_moved.text == 'false': 
                        not_moved += 1
            
            # update global counters
            total_empty_comments += number_of_missing_comments
            total_not_played += not_played
            total_not_moved += not_moved
            
            # print page id and duration
            print("    " + page_name + ": " + seconds2timestr(duration) + ", "\
                  + str(number_of_comments)+"/"\
                  +str(number_of_comments+number_of_missing_comments)+" comments")
            
            # number of audio elements not played
            if not_played > 1:
                print('ATTENTION: '+str(not_played)+' fragments were not listened to!')
            if not_played == 1: 
                print('ATTENTION: one fragment was not listened to!')
            
            # number of audio element markers not moved
            if not_moved > 1:
                print('ATTENTION: '+str(not_moved)+' markers were not moved!')
            if not_moved == 1: 
                print('ATTENTION: one marker was not moved!')
            
            # keep track of duration in function of page index
            if len(duration_order)>page_number:
                duration_order[page_number].append(duration)
            else:
                duration_order.append([duration])
                
            # keep list of page ids and count how many times each page id
            # was tested, how long it took, and how many fragments there were (if number of 
            # fragments is different, store as different page id)
            if page_name in page_names: 
                page_index = page_names.index(page_name) # get index
                # check if number of audioelements the same
                if len(audioelements) == fragments_per_page[page_index]: 
                    page_count[page_index] += 1
                    duration_page[page_index].append(duration)
                else: # make new entry
                    alt_page_name = page_name+"("+str(len(audioelements))+")"
                    if alt_page_name in page_names: # if already there
                        alt_page_index = page_names.index(alt_page_name) # get index
                        page_count[alt_page_index] += 1
                        duration_page[alt_page_index].append(duration)
                    else: 
                        page_names.append(alt_page_name)
                        page_count.append(1)
                        duration_page.append([duration])
                        fragments_per_page.append(len(audioelements))
            else: 
                page_names.append(page_name)
                page_count.append(1)
                duration_page.append([duration])
                fragments_per_page.append(len(audioelements))
                
            # bookkeeping
            page_number += 1 # increase page count for this specific test
            number_of_pages += 1 # increase total number of pages
            time_per_page_accum += duration # total duration (for average time spent per page)

        # print total duration of this test
        print("    TOTAL: " + seconds2timestr(total_duration))


# PRINT EVERYTHING

print("Number of XML files: " + str(number_of_XML_files))
print("Number of pages: " + str(number_of_pages))
print("Number of fragments: " + str(number_of_fragments))
print("Number of empty comments: " + str(total_empty_comments) +\
      " (" + str(round(100.0*total_empty_comments/number_of_fragments,2)) + "%)")
print("Number of unplayed fragments: " + str(total_not_played) +\
      " (" + str(round(100.0*total_not_played/number_of_fragments,2)) + "%)")
print("Number of unmoved markers: " + str(total_not_moved) +\
      " (" + str(round(100.0*total_not_moved/number_of_fragments,2)) + "%)")
print("Average time per page: " + seconds2timestr(time_per_page_accum/number_of_pages))

# Average duration for first, second, ... page
print("Average duration per ordered page:")
for page_number in range(len(duration_order)): 
    print("        page " + str(page_number+1) + ": " +\
        seconds2timestr(sum(duration_order[page_number])/len(duration_order[page_number])) +\
            " ("+str(len(duration_order[page_number]))+" subjects)")


# Sort pages by number of audioelements, then by duration

# average duration and number of subjects per page
average_duration_page = []
number_of_subjects_page = []
for line in duration_page:
    number_of_subjects_page.append(len(line))
    average_duration_page.append(sum(line)/len(line))

# combine and sort in function of number of audioelements and duration
combined_list = [page_names, average_duration_page, fragments_per_page, number_of_subjects_page]
combined_list = sorted(zip(*combined_list), key=operator.itemgetter(1, 2)) # sort

# Show average duration for all songs
print("Average duration per page ID:")
for page_index in range(len(page_names)):
    print("        "+combined_list[page_index][0] + ": " \
          + seconds2timestr(combined_list[page_index][1]) \
          + " (" + str(combined_list[page_index][3]) + " subjects, " \
          + str(combined_list[page_index][2]) + " fragments)")


#TODO
# time per page in function of number of fragments (plot)
# time per participant in function of number of pages
# plot total time for each participant
# plot total time
# show 'count' per page (in order)

# clear up page_index <> page_count <> page_number confusion

# LaTeX -> PDF print out