Mercurial > hg > webaudioevaluationtool
comparison python/score_parser.py @ 2264:556b79c72eee
Change folder name 'scripts' to 'python'
author | Brecht De Man <b.deman@qmul.ac.uk> |
---|---|
date | Wed, 20 Apr 2016 16:33:22 +0200 |
parents | |
children | 4843377d9976 |
comparison
equal
deleted
inserted
replaced
2262:5bf0555905de | 2264:556b79c72eee |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import xml.etree.ElementTree as ET | |
4 import os | |
5 import sys | |
6 import csv | |
7 | |
8 # COMMAND LINE ARGUMENTS | |
9 | |
10 assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\ | |
11 "Use: python score_parser.py [rating_folder_location]" | |
12 | |
13 # XML results files location | |
14 if len(sys.argv) == 1: | |
15 folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder | |
16 print "Use: python score_parser.py [rating_folder_location]" | |
17 print "Using default path: " + folder_name | |
18 elif len(sys.argv) == 2: | |
19 folder_name = sys.argv[1] # First command line argument is folder | |
20 | |
21 # check if folder_name exists | |
22 if not os.path.exists(folder_name): | |
23 #the file is not there | |
24 print "Folder '"+folder_name+"' does not exist." | |
25 sys.exit() # terminate script execution | |
26 elif not os.access(os.path.dirname(folder_name), os.W_OK): | |
27 #the file does exist but write privileges are not given | |
28 print "No write privileges in folder '"+folder_name+"'." | |
29 | |
30 | |
31 # CODE | |
32 | |
33 # remember which files have been opened this time | |
34 file_history = [] | |
35 | |
36 # get every XML file in folder | |
37 for file_name in os.listdir(folder_name): | |
38 if file_name.endswith(".xml"): | |
39 tree = ET.parse(folder_name + '/' + file_name) | |
40 root = tree.getroot() | |
41 | |
42 # get subject ID from XML file | |
43 subject_id = file_name[:-4] # file name (without extension) as subject ID | |
44 | |
45 # get list of all pages this subject evaluated | |
46 for page in root.findall("./page"): # iterate over pages | |
47 page_name = page.get('ref') # get page reference ID | |
48 | |
49 if page_name is None: # ignore 'empty' audio_holders | |
50 print "WARNING: " + file_name + " contains empty audio holder. (score_parser.py)" | |
51 break | |
52 | |
53 if page.get('state') != "complete": | |
54 print "WARNING:" + file_name + " contains incomplete page " +page_name+ ". (score_parser.py)" | |
55 break; | |
56 | |
57 file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name | |
58 | |
59 # create folder 'ratings' if not yet created | |
60 if not os.path.exists(folder_name + '/ratings'): | |
61 os.makedirs(folder_name + '/ratings') | |
62 | |
63 # header: fragment IDs in 'alphabetical' order | |
64 # go to fragment column, or create new column if it doesn't exist yet | |
65 | |
66 # get array of audio elements and number of audio elements | |
67 audiolist = page.findall("./audioelement") | |
68 n_fragments = len(audiolist) | |
69 | |
70 # get alphabetical array of fragment IDs from this subject's XML | |
71 fragmentnamelist = [] # make empty list | |
72 for audioelement in audiolist: # iterate over all audioelements | |
73 fragmentnamelist.append(audioelement.get('ref')) # add to list | |
74 | |
75 | |
76 # if file exists, get header and add any 'new' fragments not yet in the header | |
77 if os.path.isfile(file_name): | |
78 with open(file_name, 'r') as readfile: | |
79 filereader = csv.reader(readfile, delimiter=',') | |
80 headerrow = filereader.next() | |
81 | |
82 # If file hasn't been opened yet this time, remove all rows except header | |
83 if file_name not in file_history: | |
84 with open(file_name, 'w') as writefile: | |
85 filewriter = csv.writer(writefile, delimiter=',') | |
86 headerrow = sorted(headerrow) | |
87 filewriter.writerow(headerrow) | |
88 file_history.append(file_name) | |
89 | |
90 # Which of the fragments are in fragmentnamelist but not in headerrow? | |
91 newfragments = list(set(fragmentnamelist)-set(headerrow)) | |
92 newfragments = sorted(newfragments) # new fragments in alphabetical order | |
93 # If not empty, read file and rewrite adding extra columns | |
94 if newfragments: # if not empty | |
95 with open('temp.csv', 'w') as writefile: | |
96 filewriter = csv.writer(writefile, delimiter=',') | |
97 filewriter.writerow(headerrow + newfragments) # write new header | |
98 with open(file_name, 'r') as readfile: | |
99 filereader = csv.reader(readfile, delimiter=',') | |
100 filereader.next() # skip header | |
101 for row in filereader: # rewrite row plus empty cells for every new fragment name | |
102 filewriter.writerow(row + ['']*len(newfragments)) | |
103 os.rename('temp.csv', file_name) # replace old file with temp file | |
104 headerrow = headerrow + newfragments | |
105 | |
106 | |
107 # if file does not exist yet, create file and make header | |
108 else: | |
109 headerrow = sorted(fragmentnamelist) # sort alphabetically | |
110 headerrow.insert(0,'') | |
111 fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist | |
112 with open(file_name, 'w') as writefile: | |
113 filewriter = csv.writer(writefile, delimiter=',') | |
114 filewriter.writerow(headerrow) | |
115 file_history.append(file_name) | |
116 | |
117 # open file to write for this page | |
118 writefile = open(file_name, 'a') | |
119 filewriter = csv.writer(writefile, delimiter=',') | |
120 | |
121 # prepare row to be written for this subject for this page | |
122 ratingrow = [subject_id] | |
123 | |
124 # get scores related to fragment [id] | |
125 for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column) | |
126 elementvalue = page.find("./audioelement/[@ref='" | |
127 + fragmentname | |
128 + "']/value") | |
129 if hasattr(elementvalue, 'text'): # if rating for this fragment exists | |
130 ratingrow.append(elementvalue.text) # add to rating row | |
131 else: # if this subject has not rated this fragment | |
132 ratingrow.append('') # append empty cell | |
133 | |
134 # write row: [subject ID, rating fragment ID 1, ..., rating fragment ID M] | |
135 if any(ratingrow[1:]): # append to file if row non-empty (except subject name) | |
136 filewriter.writerow(ratingrow) |