annotate python/survey_parser.py @ 3141:335bc77627e0 tip

fixing discrete interface to allow labels to display
author Dave Moffat <me@davemoffat.com>
date Mon, 26 Jul 2021 12:15:24 +0100
parents 1ae8c03dd6a6
children
rev   line source
me@2938 1 #!/usr/bin/python
me@2938 2 import xml.etree.ElementTree as ET
me@2938 3 import os
me@2938 4 import sys
me@2938 5 import csv
me@2938 6 import matplotlib.pyplot as plt
me@2938 7 import numpy as np
me@2938 8 import scipy as sp
me@2938 9 import scipy.stats
me@2938 10
me@2938 11
me@2938 12 # COMMAND LINE ARGUMENTS
me@2938 13
me@2938 14 assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\
me@2938 15 "Use: python score_parser.py [rating_folder_location]"
me@2938 16
me@2938 17 # XML results files location
me@2938 18 if len(sys.argv) == 1:
me@2938 19 folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder
me@2938 20 print("Use: python score_parser.py [rating_folder_location]")
me@2938 21 print("Using default path: " + folder_name)
me@2938 22 elif len(sys.argv) == 2:
me@2938 23 folder_name = sys.argv[1] # First command line argument is folder
me@2938 24
me@2938 25 # check if folder_name exists
me@2938 26 if not os.path.exists(folder_name):
me@2938 27 #the file is not there
me@2938 28 print("Folder '"+folder_name+"' does not exist.")
me@2938 29 sys.exit() # terminate script execution
me@2938 30 elif not os.access(os.path.dirname(folder_name), os.W_OK):
me@2938 31 #the file does exist but write privileges are not given
me@2938 32 print("No write privileges in folder '"+folder_name+"'.")
me@2938 33
me@2938 34 # CODE
me@2938 35
me@2938 36 storage = {"globals":{}, "pages": {}}
me@2938 37
me@2938 38 def decodeSurveyTree(session_id, surveyroot, store):
b@2957 39 # Get all the children
me@2938 40 for survey_entry in list(surveyroot):
me@2938 41 survey_id = survey_entry.get("ref")
me@2938 42 if survey_id not in store.keys():
me@2938 43 store[survey_id] = {"responses": []}
me@2938 44 survey_type = survey_entry.get("type")
me@2938 45 store[survey_id]["type"] = survey_type
me@2938 46 if survey_type == "statement" or survey_type == "video":
me@2938 47 if "header" not in store[survey_id]:
me@2938 48 store[survey_id]["header"] = ("ids", "duration")
me@2938 49 store[survey_id] = decodeSurveyStatement(session_id, survey_entry, store[survey_id])
me@2938 50 elif survey_type == "question" or survey_type == "number" or survey_type == "slider":
me@2938 51 if "header" not in store[survey_id]:
me@2938 52 store[survey_id]["header"] = ("ids", "durations", "response")
me@2938 53 store[survey_id] = decodeSurveyQuestion(session_id, survey_entry, store[survey_id])
me@2938 54 elif survey_type == "checkbox":
me@2938 55 if "header" not in store[survey_id]:
me@2938 56 head = ["ids", "duration"]
me@2938 57 for option in survey_entry.findall("./response"):
me@2938 58 head.append(option.get("name"))
me@2938 59 store[survey_id]["header"] = tuple(head)
me@2938 60 store[survey_id] = decodeSurveyCheckbox(session_id, survey_entry, store[survey_id])
me@2938 61 elif survey_type == "radio":
me@2938 62 if "header" not in store[survey_id]:
me@2938 63 store[survey_id]["header"] = ("ids", "duration", "response")
me@2938 64 store[survey_id] = decodeSurveyRadio(session_id, survey_entry, store[survey_id])
me@2938 65 return store
me@2938 66
me@2938 67 def decodeSurveyStatement(session_id, survey_entry, store):
me@2938 68 resp = (session_id, survey_entry.get("duration"))
me@2938 69 store["responses"].append(resp)
me@2938 70 return store
me@2938 71
me@2938 72 def decodeSurveyQuestion(session_id, survey_entry, store):
me@2938 73 if survey_entry.find("./response") is not None:
me@2938 74 resp = (session_id, survey_entry.get("duration"), survey_entry.find("./response").text)
me@2938 75 else:
me@2938 76 resp = (session_id, survey_entry.get("duration"), None)
me@2938 77 store["responses"].append(resp)
me@2938 78 return store
me@2938 79 # return None
me@2938 80
me@2938 81 def decodeSurveyCheckbox(session_id, survey_entry, store):
me@2938 82 response = [session_id, survey_entry.get("duration")]
me@2938 83 for node in survey_entry.findall("./response"):
me@2938 84 response.append(node.get("checked"))
me@2938 85 store["responses"].append(tuple(response))
me@2938 86 return store
me@2938 87
me@2938 88 def decodeSurveyRadio(session_id, survey_entry, store):
me@2938 89 if survey_entry.find("./response") is not None:
me@2938 90 response = (session_id, survey_entry.get("duration"), survey_entry.find("./response").get("name"))
me@2938 91 else:
me@2938 92 response = (session_id, survey_entry.get("duration"), None)
me@2938 93 store["responses"].append(response)
me@2938 94 return store
me@2938 95 # return None
me@2938 96
me@2938 97 if folder_name.endswith("/") is False:
me@2938 98 folder_name += "/"
me@2938 99
me@2938 100 # Create the folder 'surveys' if not yet created
me@2938 101 if not os.path.exists(folder_name + 'surveys'):
me@2938 102 os.makedirs(folder_name + 'surveys')
me@2938 103
me@2938 104 #Iterate through every XML file in folder_name
me@2938 105 for file_name in os.listdir(folder_name):
me@2938 106 if file_name.endswith(".xml"):
me@2938 107 tree = ET.parse(folder_name +file_name)
me@2938 108 root = tree.getroot()
me@2938 109 subject_id = root.get('key')
me@2938 110 pre_survey = root.find("./survey[@location='pre']")
me@2938 111 # print pre_survey
me@2938 112 if pre_survey is not None:
me@2938 113 if len(pre_survey) is not 0:
me@2938 114 if "pre" not in storage["globals"].keys():
me@2938 115 storage["globals"]["pre"] = {}
me@2938 116 storage["globals"]["pre"] = decodeSurveyTree(subject_id, pre_survey, storage["globals"]["pre"])
me@2938 117 post_survey = root.find("./survey[@location='post']")
me@2938 118 if post_survey is not None:
me@2938 119 if len(post_survey) is not 0:
me@2938 120 if "post" not in storage["globals"].keys():
me@2938 121 storage["globals"]["post"] = {}
me@2938 122 storage["globals"]["post"] = decodeSurveyTree(subject_id, post_survey, storage["globals"]["post"])
me@2938 123
me@2938 124 # Now iterate through the page specifics
me@2938 125 for page in root.findall("./page[@state='complete']"):
me@2938 126 page_name = page.get("ref")
me@2938 127 pre_survey = page.find("./survey[@location='pre']")
me@2938 128 try:
me@2938 129 page_store = storage["pages"][page_name]
me@2938 130 except KeyError:
me@2938 131 storage["pages"][page_name] = {}
me@2938 132 page_store = storage["pages"][page_name]
me@2938 133 if pre_survey is not None:
me@2938 134 if len(pre_survey) is not 0:
me@2938 135 if "pre" not in page_store.keys():
me@2938 136 page_store["pre"] = {}
me@2938 137 page_store["pre"] = decodeSurveyTree(subject_id, pre_survey, page_store["pre"])
me@2938 138 post_survey = page.find("./survey[@location='post']")
me@2938 139 if post_survey is not None:
me@2938 140 if len(post_survey) is not 0:
me@2938 141 if "post" not in page_store.keys():
me@2938 142 page_store["post"] = {}
nicholas@2956 143 page_store["post"] = decodeSurveyTree(subject_id, post_survey, page_store["post"])
me@2938 144
b@2957 145 # Storage now holds entire survey structure
me@2938 146 # Time to start exporting to files
me@2938 147
me@2938 148 # Store globals
me@2938 149 file_store_root = folder_name + 'surveys/'
me@2938 150 for position in storage["globals"].keys():
me@2938 151 for ref in storage["globals"][position].keys():
me@2938 152 with open(file_store_root+ref+".csv", "w") as f:
me@2938 153 filewriter = csv.writer(f, delimiter=",")
me@2938 154 filewriter.writerow(storage["globals"][position][ref]["header"])
me@2938 155 for row in storage["globals"][position][ref]["responses"]:
me@2938 156 filewriter.writerow(row)
me@2938 157 for page_name in storage["pages"].keys():
me@2938 158 for position in storage["pages"][page_name].keys():
me@2938 159 if not os.path.exists(file_store_root + page_name):
me@2938 160 os.makedirs(file_store_root + page_name)
me@2938 161 for ref in storage["pages"][page_name][position].keys():
me@2938 162 with open(file_store_root+page_name+"/"+ref+".csv", "w") as f:
me@2938 163 filewriter = csv.writer(f, delimiter=",")
me@2938 164 filewriter.writerow(storage["pages"][page_name][position][ref]["header"])
me@2938 165 for row in storage["pages"][page_name][position][ref]["responses"]:
me@2938 166 filewriter.writerow(row)
me@2938 167
b@2957 168 # Time to plot
me@2938 169
me@2938 170 def plotDurationHistogram(store, plot_id, saveloc):
me@2938 171 x = []
me@2938 172 for row in store["responses"]:
me@2938 173 r_temp = row[1]
me@2938 174 if r_temp is None:
me@2938 175 r_temp = 0;
me@2938 176 x.append(float(r_temp))
me@2938 177 x = np.asarray(x)
me@2938 178 plt.figure()
me@2938 179 n, bins, patches = plt.hist(x, 10, facecolor='green', alpha=0.75)
me@2938 180 plt.xlabel("Duration")
me@2938 181 plt.ylabel("Count")
me@2938 182 plt.grid(True)
me@2938 183 plt.title("Histogram of durations for "+plot_id)
me@2938 184 plt.savefig(saveloc+plot_id+"-duration.pdf", bbox_inches='tight')
me@2938 185
me@2938 186 def plotRadio(store, plot_id, saveloc):
me@2938 187 plt.figure()
me@2938 188 data = {}
me@2938 189 for row in store["responses"]:
me@2938 190 try:
me@2938 191 data[row[2]] += 1
me@2938 192 except KeyError:
me@2938 193 data[row[2]] = 1
me@2938 194 labels = data.keys()
me@2938 195 sizes = data.values()
me@2938 196 plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
me@2938 197 plt.title("Selections of "+plot_id)
me@2938 198 plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight')
me@2938 199
me@2938 200 def plotCheckbox(store, plot_id, saveloc):
me@2938 201 data = []
me@2938 202 labels = []
me@2938 203 for h in store["header"][2::1]:
me@2938 204 labels.append(h)
me@2938 205 data.append(0)
me@2938 206 for row in store["responses"]:
me@2938 207 for i in range(2, len(labels)+2):
me@2938 208 if row[i] == "true":
me@2938 209 data[i-2] += 1
me@2938 210 x = scipy.arange(4)
me@2938 211 plt.figure()
me@2938 212 plt.bar(x, data, width=0.8)
me@2938 213 plt.xticks(x+0.4, labels)
me@2938 214 plt.xlabel("Option")
me@2938 215 plt.ylabel("Count")
me@2938 216 plt.title("Selection counts of "+plot_id)
me@2938 217 plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight')
me@2938 218
me@2938 219 for page_name in storage["pages"].keys():
me@2938 220 for position in storage["pages"][page_name].keys():
me@2938 221 saveloc = file_store_root+page_name+"/"
me@2938 222 for ref in storage["pages"][page_name][position].keys():
me@2938 223 plotDurationHistogram(storage["pages"][page_name][position][ref],ref, saveloc)
me@2938 224 if storage["pages"][page_name][position][ref]["type"] == "radio":
me@2938 225 plotRadio(storage["pages"][page_name][position][ref],ref, saveloc)
me@2938 226 if storage["pages"][page_name][position][ref]["type"] == "checkbox":
b@2957 227 plotCheckbox(storage["pages"][page_name][position][ref],ref, saveloc)