me@2938: #!/usr/bin/python me@2938: import xml.etree.ElementTree as ET me@2938: import os me@2938: import sys me@2938: import csv me@2938: import matplotlib.pyplot as plt me@2938: import numpy as np me@2938: import scipy as sp me@2938: import scipy.stats me@2938: me@2938: me@2938: # COMMAND LINE ARGUMENTS me@2938: me@2938: assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\ me@2938: "Use: python score_parser.py [rating_folder_location]" me@2938: me@2938: # XML results files location me@2938: if len(sys.argv) == 1: me@2938: folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder me@2938: print("Use: python score_parser.py [rating_folder_location]") me@2938: print("Using default path: " + folder_name) me@2938: elif len(sys.argv) == 2: me@2938: folder_name = sys.argv[1] # First command line argument is folder me@2938: me@2938: # check if folder_name exists me@2938: if not os.path.exists(folder_name): me@2938: #the file is not there me@2938: print("Folder '"+folder_name+"' does not exist.") me@2938: sys.exit() # terminate script execution me@2938: elif not os.access(os.path.dirname(folder_name), os.W_OK): me@2938: #the file does exist but write privileges are not given me@2938: print("No write privileges in folder '"+folder_name+"'.") me@2938: me@2938: # CODE me@2938: me@2938: storage = {"globals":{}, "pages": {}} me@2938: me@2938: def decodeSurveyTree(session_id, surveyroot, store): b@2957: # Get all the children me@2938: for survey_entry in list(surveyroot): me@2938: survey_id = survey_entry.get("ref") me@2938: if survey_id not in store.keys(): me@2938: store[survey_id] = {"responses": []} me@2938: survey_type = survey_entry.get("type") me@2938: store[survey_id]["type"] = survey_type me@2938: if survey_type == "statement" or survey_type == "video": me@2938: if "header" not in store[survey_id]: me@2938: store[survey_id]["header"] = ("ids", "duration") me@2938: store[survey_id] = decodeSurveyStatement(session_id, survey_entry, store[survey_id]) me@2938: elif survey_type == "question" or survey_type == "number" or survey_type == "slider": me@2938: if "header" not in store[survey_id]: me@2938: store[survey_id]["header"] = ("ids", "durations", "response") me@2938: store[survey_id] = decodeSurveyQuestion(session_id, survey_entry, store[survey_id]) me@2938: elif survey_type == "checkbox": me@2938: if "header" not in store[survey_id]: me@2938: head = ["ids", "duration"] me@2938: for option in survey_entry.findall("./response"): me@2938: head.append(option.get("name")) me@2938: store[survey_id]["header"] = tuple(head) me@2938: store[survey_id] = decodeSurveyCheckbox(session_id, survey_entry, store[survey_id]) me@2938: elif survey_type == "radio": me@2938: if "header" not in store[survey_id]: me@2938: store[survey_id]["header"] = ("ids", "duration", "response") me@2938: store[survey_id] = decodeSurveyRadio(session_id, survey_entry, store[survey_id]) me@2938: return store me@2938: me@2938: def decodeSurveyStatement(session_id, survey_entry, store): me@2938: resp = (session_id, survey_entry.get("duration")) me@2938: store["responses"].append(resp) me@2938: return store me@2938: me@2938: def decodeSurveyQuestion(session_id, survey_entry, store): me@2938: if survey_entry.find("./response") is not None: me@2938: resp = (session_id, survey_entry.get("duration"), survey_entry.find("./response").text) me@2938: else: me@2938: resp = (session_id, survey_entry.get("duration"), None) me@2938: store["responses"].append(resp) me@2938: return store me@2938: # return None me@2938: me@2938: def decodeSurveyCheckbox(session_id, survey_entry, store): me@2938: response = [session_id, survey_entry.get("duration")] me@2938: for node in survey_entry.findall("./response"): me@2938: response.append(node.get("checked")) me@2938: store["responses"].append(tuple(response)) me@2938: return store me@2938: me@2938: def decodeSurveyRadio(session_id, survey_entry, store): me@2938: if survey_entry.find("./response") is not None: me@2938: response = (session_id, survey_entry.get("duration"), survey_entry.find("./response").get("name")) me@2938: else: me@2938: response = (session_id, survey_entry.get("duration"), None) me@2938: store["responses"].append(response) me@2938: return store me@2938: # return None me@2938: me@2938: if folder_name.endswith("/") is False: me@2938: folder_name += "/" me@2938: me@2938: # Create the folder 'surveys' if not yet created me@2938: if not os.path.exists(folder_name + 'surveys'): me@2938: os.makedirs(folder_name + 'surveys') me@2938: me@2938: #Iterate through every XML file in folder_name me@2938: for file_name in os.listdir(folder_name): me@2938: if file_name.endswith(".xml"): me@2938: tree = ET.parse(folder_name +file_name) me@2938: root = tree.getroot() me@2938: subject_id = root.get('key') me@2938: pre_survey = root.find("./survey[@location='pre']") me@2938: # print pre_survey me@2938: if pre_survey is not None: me@2938: if len(pre_survey) is not 0: me@2938: if "pre" not in storage["globals"].keys(): me@2938: storage["globals"]["pre"] = {} me@2938: storage["globals"]["pre"] = decodeSurveyTree(subject_id, pre_survey, storage["globals"]["pre"]) me@2938: post_survey = root.find("./survey[@location='post']") me@2938: if post_survey is not None: me@2938: if len(post_survey) is not 0: me@2938: if "post" not in storage["globals"].keys(): me@2938: storage["globals"]["post"] = {} me@2938: storage["globals"]["post"] = decodeSurveyTree(subject_id, post_survey, storage["globals"]["post"]) me@2938: me@2938: # Now iterate through the page specifics me@2938: for page in root.findall("./page[@state='complete']"): me@2938: page_name = page.get("ref") me@2938: pre_survey = page.find("./survey[@location='pre']") me@2938: try: me@2938: page_store = storage["pages"][page_name] me@2938: except KeyError: me@2938: storage["pages"][page_name] = {} me@2938: page_store = storage["pages"][page_name] me@2938: if pre_survey is not None: me@2938: if len(pre_survey) is not 0: me@2938: if "pre" not in page_store.keys(): me@2938: page_store["pre"] = {} me@2938: page_store["pre"] = decodeSurveyTree(subject_id, pre_survey, page_store["pre"]) me@2938: post_survey = page.find("./survey[@location='post']") me@2938: if post_survey is not None: me@2938: if len(post_survey) is not 0: me@2938: if "post" not in page_store.keys(): me@2938: page_store["post"] = {} nicholas@2956: page_store["post"] = decodeSurveyTree(subject_id, post_survey, page_store["post"]) me@2938: b@2957: # Storage now holds entire survey structure me@2938: # Time to start exporting to files me@2938: me@2938: # Store globals me@2938: file_store_root = folder_name + 'surveys/' me@2938: for position in storage["globals"].keys(): me@2938: for ref in storage["globals"][position].keys(): me@2938: with open(file_store_root+ref+".csv", "w") as f: me@2938: filewriter = csv.writer(f, delimiter=",") me@2938: filewriter.writerow(storage["globals"][position][ref]["header"]) me@2938: for row in storage["globals"][position][ref]["responses"]: me@2938: filewriter.writerow(row) me@2938: for page_name in storage["pages"].keys(): me@2938: for position in storage["pages"][page_name].keys(): me@2938: if not os.path.exists(file_store_root + page_name): me@2938: os.makedirs(file_store_root + page_name) me@2938: for ref in storage["pages"][page_name][position].keys(): me@2938: with open(file_store_root+page_name+"/"+ref+".csv", "w") as f: me@2938: filewriter = csv.writer(f, delimiter=",") me@2938: filewriter.writerow(storage["pages"][page_name][position][ref]["header"]) me@2938: for row in storage["pages"][page_name][position][ref]["responses"]: me@2938: filewriter.writerow(row) me@2938: b@2957: # Time to plot me@2938: me@2938: def plotDurationHistogram(store, plot_id, saveloc): me@2938: x = [] me@2938: for row in store["responses"]: me@2938: r_temp = row[1] me@2938: if r_temp is None: me@2938: r_temp = 0; me@2938: x.append(float(r_temp)) me@2938: x = np.asarray(x) me@2938: plt.figure() me@2938: n, bins, patches = plt.hist(x, 10, facecolor='green', alpha=0.75) me@2938: plt.xlabel("Duration") me@2938: plt.ylabel("Count") me@2938: plt.grid(True) me@2938: plt.title("Histogram of durations for "+plot_id) me@2938: plt.savefig(saveloc+plot_id+"-duration.pdf", bbox_inches='tight') me@2938: me@2938: def plotRadio(store, plot_id, saveloc): me@2938: plt.figure() me@2938: data = {} me@2938: for row in store["responses"]: me@2938: try: me@2938: data[row[2]] += 1 me@2938: except KeyError: me@2938: data[row[2]] = 1 me@2938: labels = data.keys() me@2938: sizes = data.values() me@2938: plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90) me@2938: plt.title("Selections of "+plot_id) me@2938: plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight') me@2938: me@2938: def plotCheckbox(store, plot_id, saveloc): me@2938: data = [] me@2938: labels = [] me@2938: for h in store["header"][2::1]: me@2938: labels.append(h) me@2938: data.append(0) me@2938: for row in store["responses"]: me@2938: for i in range(2, len(labels)+2): me@2938: if row[i] == "true": me@2938: data[i-2] += 1 me@2938: x = scipy.arange(4) me@2938: plt.figure() me@2938: plt.bar(x, data, width=0.8) me@2938: plt.xticks(x+0.4, labels) me@2938: plt.xlabel("Option") me@2938: plt.ylabel("Count") me@2938: plt.title("Selection counts of "+plot_id) me@2938: plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight') me@2938: me@2938: for page_name in storage["pages"].keys(): me@2938: for position in storage["pages"][page_name].keys(): me@2938: saveloc = file_store_root+page_name+"/" me@2938: for ref in storage["pages"][page_name][position].keys(): me@2938: plotDurationHistogram(storage["pages"][page_name][position][ref],ref, saveloc) me@2938: if storage["pages"][page_name][position][ref]["type"] == "radio": me@2938: plotRadio(storage["pages"][page_name][position][ref],ref, saveloc) me@2938: if storage["pages"][page_name][position][ref]["type"] == "checkbox": b@2957: plotCheckbox(storage["pages"][page_name][position][ref],ref, saveloc)