webaudioevaluationtool: scripts/score

annotate scripts/score_parser.py @ 928:ba58cf8d0dbc

Score parsing: make csv from all XML files (one file per page ID, one column per fragment ID, one row per subject). Supports varying selections of pages and fragments across subjects. Generalised to 'page' (instead of 'song') and 'fragment' (instead of 'mix').

author	Brecht De Man <BrechtDeMan@users.noreply.github.com>
date	Sun, 31 May 2015 14:45:30 +0100
parents
children	97ebdb6b266a

rev	line source
BrechtDeMan@928	1 import xml.etree.ElementTree as ET
BrechtDeMan@928	2 import os
BrechtDeMan@928	3 import csv
BrechtDeMan@928	4
BrechtDeMan@928	5 #TODO Remove DEBUG statements
BrechtDeMan@928	6
BrechtDeMan@928	7 # get every XML file in folder
BrechtDeMan@928	8 for file in os.listdir("."): # You have to put this in folder where output XML files are.
BrechtDeMan@928	9 if file.endswith(".xml"):
BrechtDeMan@928	10 tree = ET.parse(file)
BrechtDeMan@928	11 root = tree.getroot()
BrechtDeMan@928	12 #print ["DEBUG Reading " + file + "..."]
BrechtDeMan@928	13
BrechtDeMan@928	14 # get subject ID from XML file
BrechtDeMan@928	15 subject_id = file # file name as subject ID
BrechtDeMan@928	16
BrechtDeMan@928	17 # get list of all pages this subject evaluated
BrechtDeMan@928	18 for audioholder in root.findall("./audioholder"): # iterate over pages
BrechtDeMan@928	19 page_name = audioholder.get('id') # get page name
BrechtDeMan@928	20 #print ["DEBUG page " + page_name]
BrechtDeMan@928	21
BrechtDeMan@928	22 file_name = 'ratings/'+page_name+'-ratings.csv' # score file name
BrechtDeMan@928	23
BrechtDeMan@928	24 # create folder 'ratings if not yet created
BrechtDeMan@928	25 if not os.path.exists('ratings'):
BrechtDeMan@928	26 os.makedirs('ratings')
BrechtDeMan@928	27
BrechtDeMan@928	28 # header: fragment IDs in 'alphabetical' order
BrechtDeMan@928	29 # go to fragment column, or create new column if it doesn't exist yet
BrechtDeMan@928	30
BrechtDeMan@928	31 # get array of audio elements and number of audio elements
BrechtDeMan@928	32 audiolist = root.findall("*/[@id='"+page_name+"']/audioelement")
BrechtDeMan@928	33 n_fragments = len(audiolist)
BrechtDeMan@928	34
BrechtDeMan@928	35 # get alphabetical array of fragment IDs from this subject's XML
BrechtDeMan@928	36 fragmentnamelist = [] # make empty list
BrechtDeMan@928	37 for audioelement in audiolist: # iterate over all audioelements
BrechtDeMan@928	38 fragmentnamelist.append(audioelement.get('id')) # add to list
BrechtDeMan@928	39
BrechtDeMan@928	40
BrechtDeMan@928	41 # if file exists, get header and add 'new' fragments
BrechtDeMan@928	42 if os.path.isfile(file_name):
BrechtDeMan@928	43 #print ["DEBUG file " + file_name + " already exists - reading header"]
BrechtDeMan@928	44 with open(file_name, 'r') as readfile:
BrechtDeMan@928	45 filereader = csv.reader(readfile, delimiter=',')
BrechtDeMan@928	46 headerrow = filereader.next()
BrechtDeMan@928	47 #headerrow = headerrow[1:] # remove first column (empty)
BrechtDeMan@928	48
BrechtDeMan@928	49 # Which of the fragmentes are in fragmentnamelist but not in headerrow?
BrechtDeMan@928	50 newfragments = list(set(fragmentnamelist)-set(headerrow))
BrechtDeMan@928	51 newfragments = sorted(newfragments) # new fragments in alphabetical order
BrechtDeMan@928	52 # If not empty, read file and rewrite adding extra columns
BrechtDeMan@928	53 if newfragments: # if not empty
BrechtDeMan@928	54 print ["DEBUG New fragments found: " + str(newfragments)]
BrechtDeMan@928	55 with open('temp.csv', 'w') as writefile:
BrechtDeMan@928	56 filewriter = csv.writer(writefile, delimiter=',')
BrechtDeMan@928	57 filewriter.writerow(headerrow + newfragments) # write new header
BrechtDeMan@928	58 for row in filereader: # rewrite row plus empty cells for every new fragment name
BrechtDeMan@928	59 #print ["DEBUG Old row: " + str(row)]
BrechtDeMan@928	60 filewriter.writerow(row + ['']*len(newfragments))
BrechtDeMan@928	61 #print ["DEBUG New row: " + str(row + ['']*len(newfragments))]
BrechtDeMan@928	62 os.rename('temp.csv', file_name) # replace old file with temp file
BrechtDeMan@928	63 headerrow = headerrow + newfragments
BrechtDeMan@928	64 print ["DEBUG New header row: " + str(headerrow)]
BrechtDeMan@928	65
BrechtDeMan@928	66 # if not, create file and make header
BrechtDeMan@928	67 else:
BrechtDeMan@928	68 #print ["DEBUG file " + file_name + " doesn't exist yet - making new one"]
BrechtDeMan@928	69 headerrow = sorted(fragmentnamelist) # sort alphabetically
BrechtDeMan@928	70 headerrow.insert(0,'')
BrechtDeMan@928	71 fragmentnamelist = fragmentnamelist[1:] #HACKY FIX inserting in firstrow also affects fragmentnamelist
BrechtDeMan@928	72 with open(file_name, 'w') as writefile:
BrechtDeMan@928	73 filewriter = csv.writer(writefile, delimiter=',')
BrechtDeMan@928	74 filewriter.writerow(headerrow)
BrechtDeMan@928	75
BrechtDeMan@928	76 # open file to write for this page
BrechtDeMan@928	77 writefile = open(file_name, 'a')
BrechtDeMan@928	78 filewriter = csv.writer(writefile, delimiter=',')
BrechtDeMan@928	79
BrechtDeMan@928	80 # prepare row to be written for this subject for this page
BrechtDeMan@928	81 ratingrow = [subject_id]
BrechtDeMan@928	82
BrechtDeMan@928	83 # get scores related to fragment [id]
BrechtDeMan@928	84 for fragmentname in headerrow[1:]: # iterate over fragments in header (skip first empty column)
BrechtDeMan@928	85 elementvalue = root.find("*/[@id='"
BrechtDeMan@928	86 + page_name
BrechtDeMan@928	87 + "']/audioelement/[@id='"
BrechtDeMan@928	88 + fragmentname
BrechtDeMan@928	89 + "']/value")
BrechtDeMan@928	90 if hasattr(elementvalue, 'text'): # if rating for this fragment exists
BrechtDeMan@928	91 ratingrow.append(elementvalue.text) # add to rating row
BrechtDeMan@928	92 else: # if this subject has not rated this fragment
BrechtDeMan@928	93 ratingrow.append('') # append empty cell
BrechtDeMan@928	94
BrechtDeMan@928	95 # write row: [subject ID, rating fragment ID 1, ..., rating fragment ID M]
BrechtDeMan@928	96 filewriter.writerow(ratingrow)

Mercurial > hg > webaudioevaluationtool

annotate scripts/score_parser.py @ 928:ba58cf8d0dbc