me@2938
|
1 #!/usr/bin/python
|
me@2938
|
2 import xml.etree.ElementTree as ET
|
me@2938
|
3 import os
|
me@2938
|
4 import sys
|
me@2938
|
5 import csv
|
me@2938
|
6 import matplotlib.pyplot as plt
|
me@2938
|
7 import numpy as np
|
me@2938
|
8 import scipy as sp
|
me@2938
|
9 import scipy.stats
|
me@2938
|
10
|
me@2938
|
11
|
me@2938
|
12 # COMMAND LINE ARGUMENTS
|
me@2938
|
13
|
me@2938
|
14 assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\
|
me@2938
|
15 "Use: python score_parser.py [rating_folder_location]"
|
me@2938
|
16
|
me@2938
|
17 # XML results files location
|
me@2938
|
18 if len(sys.argv) == 1:
|
me@2938
|
19 folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder
|
me@2938
|
20 print("Use: python score_parser.py [rating_folder_location]")
|
me@2938
|
21 print("Using default path: " + folder_name)
|
me@2938
|
22 elif len(sys.argv) == 2:
|
me@2938
|
23 folder_name = sys.argv[1] # First command line argument is folder
|
me@2938
|
24
|
me@2938
|
25 # check if folder_name exists
|
me@2938
|
26 if not os.path.exists(folder_name):
|
me@2938
|
27 #the file is not there
|
me@2938
|
28 print("Folder '"+folder_name+"' does not exist.")
|
me@2938
|
29 sys.exit() # terminate script execution
|
me@2938
|
30 elif not os.access(os.path.dirname(folder_name), os.W_OK):
|
me@2938
|
31 #the file does exist but write privileges are not given
|
me@2938
|
32 print("No write privileges in folder '"+folder_name+"'.")
|
me@2938
|
33
|
me@2938
|
34 # CODE
|
me@2938
|
35
|
me@2938
|
36 storage = {"globals":{}, "pages": {}}
|
me@2938
|
37
|
me@2938
|
38 def decodeSurveyTree(session_id, surveyroot, store):
|
b@2957
|
39 # Get all the children
|
me@2938
|
40 for survey_entry in list(surveyroot):
|
me@2938
|
41 survey_id = survey_entry.get("ref")
|
me@2938
|
42 if survey_id not in store.keys():
|
me@2938
|
43 store[survey_id] = {"responses": []}
|
me@2938
|
44 survey_type = survey_entry.get("type")
|
me@2938
|
45 store[survey_id]["type"] = survey_type
|
me@2938
|
46 if survey_type == "statement" or survey_type == "video":
|
me@2938
|
47 if "header" not in store[survey_id]:
|
me@2938
|
48 store[survey_id]["header"] = ("ids", "duration")
|
me@2938
|
49 store[survey_id] = decodeSurveyStatement(session_id, survey_entry, store[survey_id])
|
me@2938
|
50 elif survey_type == "question" or survey_type == "number" or survey_type == "slider":
|
me@2938
|
51 if "header" not in store[survey_id]:
|
me@2938
|
52 store[survey_id]["header"] = ("ids", "durations", "response")
|
me@2938
|
53 store[survey_id] = decodeSurveyQuestion(session_id, survey_entry, store[survey_id])
|
me@2938
|
54 elif survey_type == "checkbox":
|
me@2938
|
55 if "header" not in store[survey_id]:
|
me@2938
|
56 head = ["ids", "duration"]
|
me@2938
|
57 for option in survey_entry.findall("./response"):
|
me@2938
|
58 head.append(option.get("name"))
|
me@2938
|
59 store[survey_id]["header"] = tuple(head)
|
me@2938
|
60 store[survey_id] = decodeSurveyCheckbox(session_id, survey_entry, store[survey_id])
|
me@2938
|
61 elif survey_type == "radio":
|
me@2938
|
62 if "header" not in store[survey_id]:
|
me@2938
|
63 store[survey_id]["header"] = ("ids", "duration", "response")
|
me@2938
|
64 store[survey_id] = decodeSurveyRadio(session_id, survey_entry, store[survey_id])
|
me@2938
|
65 return store
|
me@2938
|
66
|
me@2938
|
67 def decodeSurveyStatement(session_id, survey_entry, store):
|
me@2938
|
68 resp = (session_id, survey_entry.get("duration"))
|
me@2938
|
69 store["responses"].append(resp)
|
me@2938
|
70 return store
|
me@2938
|
71
|
me@2938
|
72 def decodeSurveyQuestion(session_id, survey_entry, store):
|
me@2938
|
73 if survey_entry.find("./response") is not None:
|
me@2938
|
74 resp = (session_id, survey_entry.get("duration"), survey_entry.find("./response").text)
|
me@2938
|
75 else:
|
me@2938
|
76 resp = (session_id, survey_entry.get("duration"), None)
|
me@2938
|
77 store["responses"].append(resp)
|
me@2938
|
78 return store
|
me@2938
|
79 # return None
|
me@2938
|
80
|
me@2938
|
81 def decodeSurveyCheckbox(session_id, survey_entry, store):
|
me@2938
|
82 response = [session_id, survey_entry.get("duration")]
|
me@2938
|
83 for node in survey_entry.findall("./response"):
|
me@2938
|
84 response.append(node.get("checked"))
|
me@2938
|
85 store["responses"].append(tuple(response))
|
me@2938
|
86 return store
|
me@2938
|
87
|
me@2938
|
88 def decodeSurveyRadio(session_id, survey_entry, store):
|
me@2938
|
89 if survey_entry.find("./response") is not None:
|
me@2938
|
90 response = (session_id, survey_entry.get("duration"), survey_entry.find("./response").get("name"))
|
me@2938
|
91 else:
|
me@2938
|
92 response = (session_id, survey_entry.get("duration"), None)
|
me@2938
|
93 store["responses"].append(response)
|
me@2938
|
94 return store
|
me@2938
|
95 # return None
|
me@2938
|
96
|
me@2938
|
97 if folder_name.endswith("/") is False:
|
me@2938
|
98 folder_name += "/"
|
me@2938
|
99
|
me@2938
|
100 # Create the folder 'surveys' if not yet created
|
me@2938
|
101 if not os.path.exists(folder_name + 'surveys'):
|
me@2938
|
102 os.makedirs(folder_name + 'surveys')
|
me@2938
|
103
|
me@2938
|
104 #Iterate through every XML file in folder_name
|
me@2938
|
105 for file_name in os.listdir(folder_name):
|
me@2938
|
106 if file_name.endswith(".xml"):
|
me@2938
|
107 tree = ET.parse(folder_name +file_name)
|
me@2938
|
108 root = tree.getroot()
|
me@2938
|
109 subject_id = root.get('key')
|
me@2938
|
110 pre_survey = root.find("./survey[@location='pre']")
|
me@2938
|
111 # print pre_survey
|
me@2938
|
112 if pre_survey is not None:
|
me@2938
|
113 if len(pre_survey) is not 0:
|
me@2938
|
114 if "pre" not in storage["globals"].keys():
|
me@2938
|
115 storage["globals"]["pre"] = {}
|
me@2938
|
116 storage["globals"]["pre"] = decodeSurveyTree(subject_id, pre_survey, storage["globals"]["pre"])
|
me@2938
|
117 post_survey = root.find("./survey[@location='post']")
|
me@2938
|
118 if post_survey is not None:
|
me@2938
|
119 if len(post_survey) is not 0:
|
me@2938
|
120 if "post" not in storage["globals"].keys():
|
me@2938
|
121 storage["globals"]["post"] = {}
|
me@2938
|
122 storage["globals"]["post"] = decodeSurveyTree(subject_id, post_survey, storage["globals"]["post"])
|
me@2938
|
123
|
me@2938
|
124 # Now iterate through the page specifics
|
me@2938
|
125 for page in root.findall("./page[@state='complete']"):
|
me@2938
|
126 page_name = page.get("ref")
|
me@2938
|
127 pre_survey = page.find("./survey[@location='pre']")
|
me@2938
|
128 try:
|
me@2938
|
129 page_store = storage["pages"][page_name]
|
me@2938
|
130 except KeyError:
|
me@2938
|
131 storage["pages"][page_name] = {}
|
me@2938
|
132 page_store = storage["pages"][page_name]
|
me@2938
|
133 if pre_survey is not None:
|
me@2938
|
134 if len(pre_survey) is not 0:
|
me@2938
|
135 if "pre" not in page_store.keys():
|
me@2938
|
136 page_store["pre"] = {}
|
me@2938
|
137 page_store["pre"] = decodeSurveyTree(subject_id, pre_survey, page_store["pre"])
|
me@2938
|
138 post_survey = page.find("./survey[@location='post']")
|
me@2938
|
139 if post_survey is not None:
|
me@2938
|
140 if len(post_survey) is not 0:
|
me@2938
|
141 if "post" not in page_store.keys():
|
me@2938
|
142 page_store["post"] = {}
|
nicholas@2956
|
143 page_store["post"] = decodeSurveyTree(subject_id, post_survey, page_store["post"])
|
me@2938
|
144
|
b@2957
|
145 # Storage now holds entire survey structure
|
me@2938
|
146 # Time to start exporting to files
|
me@2938
|
147
|
me@2938
|
148 # Store globals
|
me@2938
|
149 file_store_root = folder_name + 'surveys/'
|
me@2938
|
150 for position in storage["globals"].keys():
|
me@2938
|
151 for ref in storage["globals"][position].keys():
|
me@2938
|
152 with open(file_store_root+ref+".csv", "w") as f:
|
me@2938
|
153 filewriter = csv.writer(f, delimiter=",")
|
me@2938
|
154 filewriter.writerow(storage["globals"][position][ref]["header"])
|
me@2938
|
155 for row in storage["globals"][position][ref]["responses"]:
|
me@2938
|
156 filewriter.writerow(row)
|
me@2938
|
157 for page_name in storage["pages"].keys():
|
me@2938
|
158 for position in storage["pages"][page_name].keys():
|
me@2938
|
159 if not os.path.exists(file_store_root + page_name):
|
me@2938
|
160 os.makedirs(file_store_root + page_name)
|
me@2938
|
161 for ref in storage["pages"][page_name][position].keys():
|
me@2938
|
162 with open(file_store_root+page_name+"/"+ref+".csv", "w") as f:
|
me@2938
|
163 filewriter = csv.writer(f, delimiter=",")
|
me@2938
|
164 filewriter.writerow(storage["pages"][page_name][position][ref]["header"])
|
me@2938
|
165 for row in storage["pages"][page_name][position][ref]["responses"]:
|
me@2938
|
166 filewriter.writerow(row)
|
me@2938
|
167
|
b@2957
|
168 # Time to plot
|
me@2938
|
169
|
me@2938
|
170 def plotDurationHistogram(store, plot_id, saveloc):
|
me@2938
|
171 x = []
|
me@2938
|
172 for row in store["responses"]:
|
me@2938
|
173 r_temp = row[1]
|
me@2938
|
174 if r_temp is None:
|
me@2938
|
175 r_temp = 0;
|
me@2938
|
176 x.append(float(r_temp))
|
me@2938
|
177 x = np.asarray(x)
|
me@2938
|
178 plt.figure()
|
me@2938
|
179 n, bins, patches = plt.hist(x, 10, facecolor='green', alpha=0.75)
|
me@2938
|
180 plt.xlabel("Duration")
|
me@2938
|
181 plt.ylabel("Count")
|
me@2938
|
182 plt.grid(True)
|
me@2938
|
183 plt.title("Histogram of durations for "+plot_id)
|
me@2938
|
184 plt.savefig(saveloc+plot_id+"-duration.pdf", bbox_inches='tight')
|
me@2938
|
185
|
me@2938
|
186 def plotRadio(store, plot_id, saveloc):
|
me@2938
|
187 plt.figure()
|
me@2938
|
188 data = {}
|
me@2938
|
189 for row in store["responses"]:
|
me@2938
|
190 try:
|
me@2938
|
191 data[row[2]] += 1
|
me@2938
|
192 except KeyError:
|
me@2938
|
193 data[row[2]] = 1
|
me@2938
|
194 labels = data.keys()
|
me@2938
|
195 sizes = data.values()
|
me@2938
|
196 plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
|
me@2938
|
197 plt.title("Selections of "+plot_id)
|
me@2938
|
198 plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight')
|
me@2938
|
199
|
me@2938
|
200 def plotCheckbox(store, plot_id, saveloc):
|
me@2938
|
201 data = []
|
me@2938
|
202 labels = []
|
me@2938
|
203 for h in store["header"][2::1]:
|
me@2938
|
204 labels.append(h)
|
me@2938
|
205 data.append(0)
|
me@2938
|
206 for row in store["responses"]:
|
me@2938
|
207 for i in range(2, len(labels)+2):
|
me@2938
|
208 if row[i] == "true":
|
me@2938
|
209 data[i-2] += 1
|
me@2938
|
210 x = scipy.arange(4)
|
me@2938
|
211 plt.figure()
|
me@2938
|
212 plt.bar(x, data, width=0.8)
|
me@2938
|
213 plt.xticks(x+0.4, labels)
|
me@2938
|
214 plt.xlabel("Option")
|
me@2938
|
215 plt.ylabel("Count")
|
me@2938
|
216 plt.title("Selection counts of "+plot_id)
|
me@2938
|
217 plt.savefig(saveloc+plot_id+".pdf", bbox_inches='tight')
|
me@2938
|
218
|
me@2938
|
219 for page_name in storage["pages"].keys():
|
me@2938
|
220 for position in storage["pages"][page_name].keys():
|
me@2938
|
221 saveloc = file_store_root+page_name+"/"
|
me@2938
|
222 for ref in storage["pages"][page_name][position].keys():
|
me@2938
|
223 plotDurationHistogram(storage["pages"][page_name][position][ref],ref, saveloc)
|
me@2938
|
224 if storage["pages"][page_name][position][ref]["type"] == "radio":
|
me@2938
|
225 plotRadio(storage["pages"][page_name][position][ref],ref, saveloc)
|
me@2938
|
226 if storage["pages"][page_name][position][ref]["type"] == "checkbox":
|
b@2957
|
227 plotCheckbox(storage["pages"][page_name][position][ref],ref, saveloc)
|