changeset 2938:76c06bd88bbe

Adding surver parser. closes #169
author Dave Moffat <me@davemoffat.com>
date Tue, 12 Sep 2017 15:23:30 +0100
parents 8bcba5c95656
children 5d7e33fd00d8 e2e189785bfa
files python/survey_parser.py
diffstat 1 files changed, 227 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/survey_parser.py	Tue Sep 12 15:23:30 2017 +0100
@@ -0,0 +1,227 @@
+#!/usr/bin/python
+import xml.etree.ElementTree as ET
+import os
+import sys
+import csv
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy as sp
+import scipy.stats
+
+
+# COMMAND LINE ARGUMENTS
+
+assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\
+                        "Use: python score_parser.py [rating_folder_location]"
+
+# XML results files location
+if len(sys.argv) == 1:
+    folder_name = "../saves"    # Looks in 'saves/' folder from 'scripts/' folder
+    print("Use: python score_parser.py [rating_folder_location]")
+    print("Using default path: " + folder_name)
+elif len(sys.argv) == 2:
+    folder_name = sys.argv[1]   # First command line argument is folder
+
+# check if folder_name exists
+if not os.path.exists(folder_name):
+    #the file is not there
+    print("Folder '"+folder_name+"' does not exist.")
+    sys.exit() # terminate script execution
+elif not os.access(os.path.dirname(folder_name), os.W_OK):
+    #the file does exist but write privileges are not given
+    print("No write privileges in folder '"+folder_name+"'.")
+
+# CODE
+
+storage = {"globals":{}, "pages": {}}
+
+def decodeSurveyTree(session_id, surveyroot, store):
+    # Get all the childs
+    for survey_entry in list(surveyroot):
+        survey_id = survey_entry.get("ref")
+        if survey_id not in store.keys():
+            store[survey_id] = {"responses": []}
+        survey_type = survey_entry.get("type")
+        store[survey_id]["type"] = survey_type
+        if survey_type == "statement" or survey_type == "video":
+            if "header" not in store[survey_id]:
+                store[survey_id]["header"] = ("ids", "duration")
+            store[survey_id] = decodeSurveyStatement(session_id, survey_entry, store[survey_id])
+        elif survey_type == "question" or survey_type == "number" or survey_type == "slider":
+            if "header" not in store[survey_id]:
+                store[survey_id]["header"] = ("ids", "durations", "response")
+            store[survey_id] = decodeSurveyQuestion(session_id, survey_entry, store[survey_id])
+        elif survey_type == "checkbox":
+            if "header" not in store[survey_id]:
+                head = ["ids", "duration"]
+                for option in survey_entry.findall("./response"):
+                    head.append(option.get("name"))
+                store[survey_id]["header"] = tuple(head)
+            store[survey_id] = decodeSurveyCheckbox(session_id, survey_entry, store[survey_id])
+        elif survey_type == "radio":
+            if "header" not in store[survey_id]:
+                store[survey_id]["header"] = ("ids", "duration", "response")
+            store[survey_id] = decodeSurveyRadio(session_id, survey_entry, store[survey_id])
+    return store
+
+def decodeSurveyStatement(session_id, survey_entry, store):
+    resp = (session_id, survey_entry.get("duration"))
+    store["responses"].append(resp)
+    return store
+
+def decodeSurveyQuestion(session_id, survey_entry, store):
+    if survey_entry.find("./response") is not None:
+        resp = (session_id, survey_entry.get("duration"), survey_entry.find("./response").text)
+    else:
+        resp = (session_id, survey_entry.get("duration"), None)
+    store["responses"].append(resp)
+    return store
+    # return None
+
+def decodeSurveyCheckbox(session_id, survey_entry, store):
+    response = [session_id, survey_entry.get("duration")]
+    for node in survey_entry.findall("./response"):
+        response.append(node.get("checked"))
+    store["responses"].append(tuple(response))
+    return store
+
+def decodeSurveyRadio(session_id, survey_entry, store):
+    if survey_entry.find("./response") is not None:
+        response = (session_id, survey_entry.get("duration"), survey_entry.find("./response").get("name"))
+    else:
+        response = (session_id, survey_entry.get("duration"), None)
+    store["responses"].append(response)
+    return store
+    # return None
+
+if folder_name.endswith("/") is False:
+    folder_name += "/"
+
+# Create the folder 'surveys' if not yet created
+if not os.path.exists(folder_name + 'surveys'):
+    os.makedirs(folder_name + 'surveys')
+
+#Iterate through every XML file in folder_name
+for file_name in os.listdir(folder_name):
+    if file_name.endswith(".xml"):
+        tree = ET.parse(folder_name +file_name)
+        root = tree.getroot()
+        subject_id = root.get('key')
+        pre_survey = root.find("./survey[@location='pre']")
+        # print pre_survey
+        if pre_survey is not None:
+            if len(pre_survey) is not 0:
+                if "pre" not in storage["globals"].keys():
+                    storage["globals"]["pre"] = {}
+                storage["globals"]["pre"] = decodeSurveyTree(subject_id, pre_survey, storage["globals"]["pre"])
+        post_survey = root.find("./survey[@location='post']")
+        if post_survey is not None:
+            if len(post_survey) is not 0:
+                if "post" not in storage["globals"].keys():
+                    storage["globals"]["post"] = {}
+                storage["globals"]["post"] = decodeSurveyTree(subject_id, post_survey, storage["globals"]["post"])
+        
+        # Now iterate through the page specifics
+        for page in root.findall("./page[@state='complete']"):
+            page_name = page.get("ref")
+            pre_survey = page.find("./survey[@location='pre']")
+            try:
+                page_store = storage["pages"][page_name]
+            except KeyError:
+                storage["pages"][page_name] = {}
+                page_store = storage["pages"][page_name]
+            if pre_survey is not None:
+                if len(pre_survey) is not 0:
+                    if "pre" not in page_store.keys():
+                        page_store["pre"] = {}
+                    page_store["pre"] = decodeSurveyTree(subject_id, pre_survey, page_store["pre"])
+            post_survey = page.find("./survey[@location='post']")
+            if post_survey is not None:
+                if len(post_survey) is not 0:
+                    if "post" not in page_store.keys():
+                        page_store["post"] = {}
+                page_store["post"] = decodeSurveyTree(subject_id, post_survey, page_store["post"])
+
+#Storage now holds entire survey structure
+# Time to start exporting to files
+
+# Store globals
+file_store_root = folder_name + 'surveys/'
+for position in storage["globals"].keys():
+    for ref in storage["globals"][position].keys():
+        with open(file_store_root+ref+".csv", "w") as f:
+            filewriter = csv.writer(f, delimiter=",")
+            filewriter.writerow(storage["globals"][position][ref]["header"])
+            for row in storage["globals"][position][ref]["responses"]:
+                filewriter.writerow(row)
+for page_name in storage["pages"].keys():
+    for position in storage["pages"][page_name].keys():
+        if not os.path.exists(file_store_root + page_name):
+            os.makedirs(file_store_root + page_name)
+        for ref in storage["pages"][page_name][position].keys():
+            with open(file_store_root+page_name+"/"+ref+".csv", "w") as f:
+                filewriter = csv.writer(f, delimiter=",")
+                filewriter.writerow(storage["pages"][page_name][position][ref]["header"])
+                for row in storage["pages"][page_name][position][ref]["responses"]:
+                    filewriter.writerow(row)
+
+#Time to plot
+
+def plotDurationHistogram(store, plot_id, saveloc):
+    x = []
+    for row in store["responses"]:
+        r_temp = row[1]
+        if r_temp  is None:
+            r_temp = 0;
+        x.append(float(r_temp))
+    x = np.asarray(x)
+    plt.figure()
+    n, bins, patches = plt.hist(x, 10, facecolor='green', alpha=0.75)
+    plt.xlabel("Duration")
+    plt.ylabel("Count")
+    plt.grid(True)
+    plt.title("Histogram of durations for "+plot_id)
+    plt.savefig(saveloc+plot_id+"-duration.pdf", bbox_inches='tight')
+
+def plotRadio(store, plot_id, saveloc):
+    plt.figure()
+    data = {}
+    for row in store["responses"]:
+        try:
+            data[row[2]] += 1
+        except KeyError:
+            data[row[2]] = 1
+    labels = data.keys()
+    sizes = data.values()
+    plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
+    plt.title("Selections of "+plot_id)
+    plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight')
+
+def plotCheckbox(store, plot_id, saveloc):
+    data = []
+    labels = []
+    for h in store["header"][2::1]:
+        labels.append(h)
+        data.append(0)
+    for row in store["responses"]:
+        for i in range(2, len(labels)+2):
+            if row[i] == "true":
+                data[i-2] += 1
+    x = scipy.arange(4)
+    plt.figure()
+    plt.bar(x, data, width=0.8)
+    plt.xticks(x+0.4, labels)
+    plt.xlabel("Option")
+    plt.ylabel("Count")
+    plt.title("Selection counts of "+plot_id)
+    plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight')
+
+for page_name in storage["pages"].keys():
+    for position in storage["pages"][page_name].keys():
+        saveloc = file_store_root+page_name+"/"
+        for ref in storage["pages"][page_name][position].keys():
+            plotDurationHistogram(storage["pages"][page_name][position][ref],ref, saveloc)
+            if storage["pages"][page_name][position][ref]["type"] == "radio":
+                plotRadio(storage["pages"][page_name][position][ref],ref, saveloc)
+            if storage["pages"][page_name][position][ref]["type"] == "checkbox":
+                plotCheckbox(storage["pages"][page_name][position][ref],ref, saveloc)
\ No newline at end of file