comparison scripts/score_parser.py @ 884:1dd209550560

Scripts: merge all three plot scripts in to one (box plot, scatter plot, mean plus confidence interval plot); bug fixes
author Brecht De Man <BrechtDeMan@users.noreply.github.com>
date Mon, 29 Jun 2015 17:19:46 +0100
parents cd20f076f6a3
children ba83143187d6
comparison
equal deleted inserted replaced
883:cd20f076f6a3 884:1dd209550560
1 #!/usr/bin/python
2
1 import xml.etree.ElementTree as ET 3 import xml.etree.ElementTree as ET
2 import os 4 import os
3 import csv 5 import csv
4 6
5 #TODO Remove DEBUG statements 7 #TODO Remove DEBUG statements
10 # get every XML file in folder 12 # get every XML file in folder
11 for file in os.listdir(folder_name): # You have to put this in folder where output XML files are. 13 for file in os.listdir(folder_name): # You have to put this in folder where output XML files are.
12 if file.endswith(".xml"): 14 if file.endswith(".xml"):
13 tree = ET.parse(folder_name + '/' + file) 15 tree = ET.parse(folder_name + '/' + file)
14 root = tree.getroot() 16 root = tree.getroot()
15 #print ["DEBUG Reading " + file + "..."] 17 #print "DEBUG Reading " + file + "..."
16 18
17 # get subject ID from XML file 19 # get subject ID from XML file
18 subject_id = file # file name as subject ID 20 subject_id = file[:-4] # file name (without extension) as subject ID
19 21
20 # get list of all pages this subject evaluated 22 # get list of all pages this subject evaluated
21 for audioholder in root.findall("./audioholder"): # iterate over pages 23 for audioholder in root.findall("./audioholder"): # iterate over pages
22 page_name = audioholder.get('id') # get page name 24 page_name = audioholder.get('id') # get page name
23 #print ["DEBUG page " + page_name] 25
24
25 if page_name is None: # ignore 'empty' audio_holders 26 if page_name is None: # ignore 'empty' audio_holders
26 break 27 break
27 28
28 file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name 29 file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name
29 30
44 fragmentnamelist.append(audioelement.get('id')) # add to list 45 fragmentnamelist.append(audioelement.get('id')) # add to list
45 46
46 47
47 # if file exists, get header and add 'new' fragments 48 # if file exists, get header and add 'new' fragments
48 if os.path.isfile(file_name): 49 if os.path.isfile(file_name):
49 #print ["DEBUG file " + file_name + " already exists - reading header"] 50 #print "DEBUG file " + file_name + " already exists - reading header"
50 with open(file_name, 'r') as readfile: 51 with open(file_name, 'r') as readfile:
51 filereader = csv.reader(readfile, delimiter=',') 52 filereader = csv.reader(readfile, delimiter=',')
52 headerrow = filereader.next() 53 headerrow = filereader.next()
53 #headerrow = headerrow[1:] # remove first column (empty)
54 54
55 # Which of the fragmentes are in fragmentnamelist but not in headerrow? 55 # Which of the fragmentes are in fragmentnamelist but not in headerrow?
56 newfragments = list(set(fragmentnamelist)-set(headerrow)) 56 newfragments = list(set(fragmentnamelist)-set(headerrow))
57 newfragments = sorted(newfragments) # new fragments in alphabetical order 57 newfragments = sorted(newfragments) # new fragments in alphabetical order
58 # If not empty, read file and rewrite adding extra columns 58 # If not empty, read file and rewrite adding extra columns
59 if newfragments: # if not empty 59 if newfragments: # if not empty
60 print ["DEBUG New fragments found: " + str(newfragments)] 60 print ' '+page_name+','+file_name+','+subject_id
61 with open('temp.csv', 'w') as writefile: 61 #print "DEBUG New fragments found: " + str(newfragments)
62 filewriter = csv.writer(writefile, delimiter=',') 62 with open('temp.csv', 'w') as writefile:
63 filewriter.writerow(headerrow + newfragments) # write new header 63 filewriter = csv.writer(writefile, delimiter=',')
64 filewriter.writerow(headerrow + newfragments) # write new header
65 #print " "+str(headerrow + newfragments) # DEBUG
66 with open(file_name, 'r') as readfile:
67 filereader = csv.reader(readfile, delimiter=',')
68 filereader.next() # skip header
64 for row in filereader: # rewrite row plus empty cells for every new fragment name 69 for row in filereader: # rewrite row plus empty cells for every new fragment name
65 #print ["DEBUG Old row: " + str(row)] 70 #print " Old row: " + str(row) # DEBUG
66 filewriter.writerow(row + ['']*len(newfragments)) 71 filewriter.writerow(row + ['']*len(newfragments))
67 #print ["DEBUG New row: " + str(row + ['']*len(newfragments))] 72 #print " New row: " + str(row + ['']*len(newfragments)) # DEBUG
68 os.rename('temp.csv', file_name) # replace old file with temp file 73 os.rename('temp.csv', file_name) # replace old file with temp file
69 headerrow = headerrow + newfragments 74 headerrow = headerrow + newfragments
70 print ["DEBUG New header row: " + str(headerrow)] 75 #print "DEBUG New header row: " + str(headerrow)
71 76
72 # if not, create file and make header 77 # if not, create file and make header
73 else: 78 else:
74 #print ["DEBUG file " + file_name + " doesn't exist yet - making new one"] 79 #print ["DEBUG file " + file_name + " doesn't exist yet - making new one"]
75 headerrow = sorted(fragmentnamelist) # sort alphabetically 80 headerrow = sorted(fragmentnamelist) # sort alphabetically