comparison scripts/score_parser.py @ 883:cd20f076f6a3

Readme: reference to issue tracker; Scripts: comment_parser and score_parser read and write in '/saves' from 'scripts/', condensing XML queries (references from children instead of from root)
author Brecht De Man <BrechtDeMan@users.noreply.github.com>
date Mon, 29 Jun 2015 13:15:00 +0100
parents ca4ae613f1dd
children 1dd209550560
comparison
equal deleted inserted replaced
882:594e617b75ad 883:cd20f076f6a3
2 import os 2 import os
3 import csv 3 import csv
4 4
5 #TODO Remove DEBUG statements 5 #TODO Remove DEBUG statements
6 6
7 # XML results files location (modify as needed):
8 folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder
9
7 # get every XML file in folder 10 # get every XML file in folder
8 for file in os.listdir("."): # You have to put this in folder where output XML files are. 11 for file in os.listdir(folder_name): # You have to put this in folder where output XML files are.
9 if file.endswith(".xml"): 12 if file.endswith(".xml"):
10 tree = ET.parse(file) 13 tree = ET.parse(folder_name + '/' + file)
11 root = tree.getroot() 14 root = tree.getroot()
12 #print ["DEBUG Reading " + file + "..."] 15 #print ["DEBUG Reading " + file + "..."]
13 16
14 # get subject ID from XML file 17 # get subject ID from XML file
15 subject_id = file # file name as subject ID 18 subject_id = file # file name as subject ID
20 #print ["DEBUG page " + page_name] 23 #print ["DEBUG page " + page_name]
21 24
22 if page_name is None: # ignore 'empty' audio_holders 25 if page_name is None: # ignore 'empty' audio_holders
23 break 26 break
24 27
25 file_name = 'ratings/'+page_name+'-ratings.csv' # score file name 28 file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name
26 29
27 # create folder 'ratings if not yet created 30 # create folder 'ratings if not yet created
28 if not os.path.exists('ratings'): 31 if not os.path.exists(folder_name + '/ratings'):
29 os.makedirs('ratings') 32 os.makedirs(folder_name + '/ratings')
30 33
31 # header: fragment IDs in 'alphabetical' order 34 # header: fragment IDs in 'alphabetical' order
32 # go to fragment column, or create new column if it doesn't exist yet 35 # go to fragment column, or create new column if it doesn't exist yet
33 36
34 # get array of audio elements and number of audio elements 37 # get array of audio elements and number of audio elements
35 audiolist = root.findall("*/[@id='"+page_name+"']/audioelement") 38 audiolist = audioholder.findall("./audioelement")
36 n_fragments = len(audiolist) 39 n_fragments = len(audiolist)
37 40
38 # get alphabetical array of fragment IDs from this subject's XML 41 # get alphabetical array of fragment IDs from this subject's XML
39 fragmentnamelist = [] # make empty list 42 fragmentnamelist = [] # make empty list
40 for audioelement in audiolist: # iterate over all audioelements 43 for audioelement in audiolist: # iterate over all audioelements
83 # prepare row to be written for this subject for this page 86 # prepare row to be written for this subject for this page
84 ratingrow = [subject_id] 87 ratingrow = [subject_id]
85 88
86 # get scores related to fragment [id] 89 # get scores related to fragment [id]
87 for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column) 90 for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column)
88 elementvalue = root.find("*/[@id='" 91 elementvalue = audioholder.find("./audioelement/[@id='"
89 + page_name
90 + "']/audioelement/[@id='"
91 + fragmentname 92 + fragmentname
92 + "']/value") 93 + "']/value")
93 if hasattr(elementvalue, 'text'): # if rating for this fragment exists 94 if hasattr(elementvalue, 'text'): # if rating for this fragment exists
94 ratingrow.append(elementvalue.text) # add to rating row 95 ratingrow.append(elementvalue.text) # add to rating row
95 else: # if this subject has not rated this fragment 96 else: # if this subject has not rated this fragment