comparison scripts/score_parser.py @ 1187:f08343647d2b

Python scripts: fix error which caused omission of ratings of first encountered file
author Brecht De Man <BrechtDeMan@users.noreply.github.com>
date Mon, 04 Jan 2016 11:59:21 +0100
parents f9c9a40f33bd
children a4ad9e55b5b8
comparison
equal deleted inserted replaced
1186:fd5390509d52 1187:f08343647d2b
32 32
33 # remember which files have been opened this time 33 # remember which files have been opened this time
34 file_history = [] 34 file_history = []
35 35
36 # get every XML file in folder 36 # get every XML file in folder
37 for file in os.listdir(folder_name): 37 for file_name in os.listdir(folder_name):
38 if file.endswith(".xml"): 38 if file_name.endswith(".xml"):
39 tree = ET.parse(folder_name + '/' + file) 39 tree = ET.parse(folder_name + '/' + file_name)
40 root = tree.getroot() 40 root = tree.getroot()
41 41
42 # get subject ID from XML file 42 # get subject ID from XML file
43 subject_id = file[:-4] # file name (without extension) as subject ID 43 subject_id = file_name[:-4] # file name (without extension) as subject ID
44 44
45 # get list of all pages this subject evaluated 45 # get list of all pages this subject evaluated
46 for audioholder in root.findall("./audioholder"): # iterate over pages 46 for audioholder in root.findall("./audioholder"): # iterate over pages
47 page_name = audioholder.get('id') # get page name 47 page_name = audioholder.get('id') # get page name
48 48
49 if page_name is None: # ignore 'empty' audio_holders 49 if page_name is None: # ignore 'empty' audio_holders
50 print "WARNING: " + file + " contains empty audio holder. (score_parser.py)" 50 print "WARNING: " + file_name + " contains empty audio holder. (score_parser.py)"
51 break 51 break
52 52
53 file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name 53 file_name = folder_name+'/ratings/'+page_name+'-ratings.csv' # score file name
54 54
55 # create folder 'ratings' if not yet created 55 # create folder 'ratings' if not yet created
66 # get alphabetical array of fragment IDs from this subject's XML 66 # get alphabetical array of fragment IDs from this subject's XML
67 fragmentnamelist = [] # make empty list 67 fragmentnamelist = [] # make empty list
68 for audioelement in audiolist: # iterate over all audioelements 68 for audioelement in audiolist: # iterate over all audioelements
69 fragmentnamelist.append(audioelement.get('id')) # add to list 69 fragmentnamelist.append(audioelement.get('id')) # add to list
70 70
71 # if file exists, get header and add 'new' fragments 71
72 # if file exists, get header and add any 'new' fragments not yet in the header
72 if os.path.isfile(file_name): 73 if os.path.isfile(file_name):
73 with open(file_name, 'r') as readfile: 74 with open(file_name, 'r') as readfile:
74 filereader = csv.reader(readfile, delimiter=',') 75 filereader = csv.reader(readfile, delimiter=',')
75 headerrow = filereader.next() 76 headerrow = filereader.next()
76 77
80 filewriter = csv.writer(writefile, delimiter=',') 81 filewriter = csv.writer(writefile, delimiter=',')
81 headerrow = sorted(headerrow) 82 headerrow = sorted(headerrow)
82 filewriter.writerow(headerrow) 83 filewriter.writerow(headerrow)
83 file_history.append(file_name) 84 file_history.append(file_name)
84 85
85 # Which of the fragmentes are in fragmentnamelist but not in headerrow? 86 # Which of the fragments are in fragmentnamelist but not in headerrow?
86 newfragments = list(set(fragmentnamelist)-set(headerrow)) 87 newfragments = list(set(fragmentnamelist)-set(headerrow))
87 newfragments = sorted(newfragments) # new fragments in alphabetical order 88 newfragments = sorted(newfragments) # new fragments in alphabetical order
88 # If not empty, read file and rewrite adding extra columns 89 # If not empty, read file and rewrite adding extra columns
89 if newfragments: # if not empty 90 if newfragments: # if not empty
90 with open('temp.csv', 'w') as writefile: 91 with open('temp.csv', 'w') as writefile:
96 for row in filereader: # rewrite row plus empty cells for every new fragment name 97 for row in filereader: # rewrite row plus empty cells for every new fragment name
97 filewriter.writerow(row + ['']*len(newfragments)) 98 filewriter.writerow(row + ['']*len(newfragments))
98 os.rename('temp.csv', file_name) # replace old file with temp file 99 os.rename('temp.csv', file_name) # replace old file with temp file
99 headerrow = headerrow + newfragments 100 headerrow = headerrow + newfragments
100 101
101 # if not, create file and make header 102
103 # if file does not exist yet, create file and make header
102 else: 104 else:
103 headerrow = sorted(fragmentnamelist) # sort alphabetically 105 headerrow = sorted(fragmentnamelist) # sort alphabetically
104 headerrow.insert(0,'') 106 headerrow.insert(0,'')
105 fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist 107 fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist
106 with open(file_name, 'w') as writefile: 108 with open(file_name, 'w') as writefile:
107 filewriter = csv.writer(writefile, delimiter=',') 109 filewriter = csv.writer(writefile, delimiter=',')
108 filewriter.writerow(headerrow) 110 filewriter.writerow(headerrow)
111 file_history.append(file_name)
109 112
110 # open file to write for this page 113 # open file to write for this page
111 writefile = open(file_name, 'a') 114 writefile = open(file_name, 'a')
112 filewriter = csv.writer(writefile, delimiter=',') 115 filewriter = csv.writer(writefile, delimiter=',')
113 116