Mercurial > hg > webaudioevaluationtool
view scripts/score_parser.py @ 147:927d05a43a70
Score parsing: make csv from all XML files (one file per page ID, one column per fragment ID, one row per subject). Supports varying selections of pages and fragments across subjects. Generalised to 'page' (instead of 'song') and 'fragment' (instead of 'mix').
author | Brecht De Man <b.deman@qmul.ac.uk> |
---|---|
date | Sun, 31 May 2015 14:45:30 +0100 |
parents | |
children | 97ebdb6b266a |
line wrap: on
line source
import xml.etree.ElementTree as ET import os import csv #TODO Remove DEBUG statements # get every XML file in folder for file in os.listdir("."): # You have to put this in folder where output XML files are. if file.endswith(".xml"): tree = ET.parse(file) root = tree.getroot() #print ["DEBUG Reading " + file + "..."] # get subject ID from XML file subject_id = file # file name as subject ID # get list of all pages this subject evaluated for audioholder in root.findall("./audioholder"): # iterate over pages page_name = audioholder.get('id') # get page name #print ["DEBUG page " + page_name] file_name = 'ratings/'+page_name+'-ratings.csv' # score file name # create folder 'ratings if not yet created if not os.path.exists('ratings'): os.makedirs('ratings') # header: fragment IDs in 'alphabetical' order # go to fragment column, or create new column if it doesn't exist yet # get array of audio elements and number of audio elements audiolist = root.findall("*/[@id='"+page_name+"']/audioelement") n_fragments = len(audiolist) # get alphabetical array of fragment IDs from this subject's XML fragmentnamelist = [] # make empty list for audioelement in audiolist: # iterate over all audioelements fragmentnamelist.append(audioelement.get('id')) # add to list # if file exists, get header and add 'new' fragments if os.path.isfile(file_name): #print ["DEBUG file " + file_name + " already exists - reading header"] with open(file_name, 'r') as readfile: filereader = csv.reader(readfile, delimiter=',') headerrow = filereader.next() #headerrow = headerrow[1:] # remove first column (empty) # Which of the fragmentes are in fragmentnamelist but not in headerrow? newfragments = list(set(fragmentnamelist)-set(headerrow)) newfragments = sorted(newfragments) # new fragments in alphabetical order # If not empty, read file and rewrite adding extra columns if newfragments: # if not empty print ["DEBUG New fragments found: " + str(newfragments)] with open('temp.csv', 'w') as writefile: filewriter = csv.writer(writefile, delimiter=',') filewriter.writerow(headerrow + newfragments) # write new header for row in filereader: # rewrite row plus empty cells for every new fragment name #print ["DEBUG Old row: " + str(row)] filewriter.writerow(row + ['']*len(newfragments)) #print ["DEBUG New row: " + str(row + ['']*len(newfragments))] os.rename('temp.csv', file_name) # replace old file with temp file headerrow = headerrow + newfragments print ["DEBUG New header row: " + str(headerrow)] # if not, create file and make header else: #print ["DEBUG file " + file_name + " doesn't exist yet - making new one"] headerrow = sorted(fragmentnamelist) # sort alphabetically headerrow.insert(0,'') fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist with open(file_name, 'w') as writefile: filewriter = csv.writer(writefile, delimiter=',') filewriter.writerow(headerrow) # open file to write for this page writefile = open(file_name, 'a') filewriter = csv.writer(writefile, delimiter=',') # prepare row to be written for this subject for this page ratingrow = [subject_id] # get scores related to fragment [id] for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column) elementvalue = root.find("*/[@id='" + page_name + "']/audioelement/[@id='" + fragmentname + "']/value") if hasattr(elementvalue, 'text'): # if rating for this fragment exists ratingrow.append(elementvalue.text) # add to rating row else: # if this subject has not rated this fragment ratingrow.append('') # append empty cell # write row: [subject ID, rating fragment ID 1, ..., rating fragment ID M] filewriter.writerow(ratingrow)