b@2264: #!/usr/bin/python b@2264: b@2264: import xml.etree.ElementTree as ET b@2264: import os b@2264: import sys b@2264: import csv brecht@2988: import re b@2264: b@2264: # COMMAND LINE ARGUMENTS b@2264: b@2264: assert len(sys.argv)<3, "score_parser takes at most 1 command line argument\n"+\ b@2264: "Use: python score_parser.py [rating_folder_location]" b@2264: b@2264: # XML results files location b@2264: if len(sys.argv) == 1: b@2264: folder_name = "../saves" # Looks in 'saves/' folder from 'scripts/' folder b@2266: print("Use: python score_parser.py [rating_folder_location]") b@2266: print("Using default path: " + folder_name) b@2264: elif len(sys.argv) == 2: b@2264: folder_name = sys.argv[1] # First command line argument is folder b@2264: b@2264: # check if folder_name exists b@2264: if not os.path.exists(folder_name): b@2264: #the file is not there b@2266: print("Folder '"+folder_name+"' does not exist.") b@2264: sys.exit() # terminate script execution b@2264: elif not os.access(os.path.dirname(folder_name), os.W_OK): b@2264: #the file does exist but write privileges are not given b@2266: print("No write privileges in folder '"+folder_name+"'.") nicholas@2524: b@2264: b@2264: # CODE b@2264: nicholas@2524: storage = {} b@2264: nicholas@2524: # create folder 'ratings' if not yet created nicholas@2524: if not os.path.exists(folder_name + '/ratings'): nicholas@2524: os.makedirs(folder_name + '/ratings') nicholas@2524: nicholas@2524: # Get every XML file in the folder b@2264: for file_name in os.listdir(folder_name): nicholas@2524: if (file_name.endswith(".xml")): b@2264: tree = ET.parse(folder_name + '/' + file_name) b@2264: root = tree.getroot() nicholas@2524: nicholas@2524: subject_id = root.get('key'); nicholas@2524: nicholas@2524: # get the list of the pages this subject evaluated nicholas@2524: for page in root.findall("./page"): # iterate over pages nicholas@2524: page_name = page.get('ref') # get page ID nicholas@2524: b@2264: if page_name is None: # ignore 'empty' audio_holders b@2266: print("WARNING: " + file_name + " contains empty audio holder. (score_parser.py)") b@2264: break b@2264: b@2264: if page.get('state') != "complete": b@2266: print("WARNING: " + file_name + " contains incomplete page " +page_name+ ". (score_parser.py)") b@2264: break; nicholas@2524: nicholas@2524: # Check if page in the store nicholas@2524: if storage.get(page_name) == None: nicholas@2524: storage[page_name] = {'header':[], 'axis':{}} # add to the store nicholas@2524: brecht@2988: # strip repetitions brecht@2988: page_name_root = re.sub('-repeat-.$', '', page_name) brecht@2988: nicholas@2524: # Get the axis names brecht@2988: pageConfig = root.find('./waet/page/[@id="'+page_name_root+'"]') me@2990: if pageConfig is None: me@2990: interfaceName = "default" nicholas@2524: if storage[page_name]['axis'].get(interfaceName) == None: nicholas@2524: storage[page_name]['axis'][interfaceName] = {} # If not in store for page, add empty dict me@2990: storage[page_name]['axis'][interfaceName][subject_id] = [] me@2990: else: me@2990: for interface in pageConfig.findall('./interface'): # Get the nodes me@2990: interfaceName = interface.get("name"); # Get the axis name me@2990: if interfaceName == None: me@2990: interfaceName = "default" # If name not set, make name 'default' me@2990: if storage[page_name]['axis'].get(interfaceName) == None: me@2990: storage[page_name]['axis'][interfaceName] = {} # If not in store for page, add empty dict me@2990: storage[page_name]['axis'][interfaceName][subject_id] = [] # Add the store for the session nicholas@2524: b@2264: # header: fragment IDs in 'alphabetical' order b@2264: # go to fragment column, or create new column if it doesn't exist yet nicholas@2524: b@2264: # get alphabetical array of fragment IDs from this subject's XML b@2264: fragmentnamelist = [] # make empty list nicholas@2524: for audioelement in page.findall("./audioelement"): # iterate over all audioelements epdamskagg@2947: if audioelement is not None and audioelement.get('type') != "outside-reference": epdamskagg@2947: fragmentnamelist.append(audioelement.get('ref')) # add to list nicholas@2524: nicholas@2524: fragmentnamelist = sorted(fragmentnamelist); # Sort the list nicholas@2524: storage[page_name]['header'] = fragmentnamelist; nicholas@2524: nicholas@2524: for fragmentname in fragmentnamelist: nicholas@2524: audioElement = page.find("./audioelement/[@ref='"+ fragmentname+ "']") # Get the element nicholas@2524: for value in audioElement.findall('./value'): nicholas@2524: axisName = value.get('interface-name') nicholas@2831: if axisName == None or axisName == "null": nicholas@2524: axisName = 'default' nicholas@2524: axisStore = storage[page_name]['axis'][axisName] nicholas@2524: if hasattr(value, 'text'): nicholas@2524: axisStore[subject_id].append(value.text) nicholas@2524: else: nicholas@2524: axisStore[subject_id].append('') b@2264: nicholas@2524: # Now create the individual files nicholas@2524: for page_name in storage: nicholas@2524: for axis_name in storage[page_name]['axis']: nicholas@2524: nicholas@2524: file_name = folder_name+'/ratings/'+page_name+'-'+axis_name+'-ratings.csv' # score file name nicholas@2524: nicholas@2524: # I'm not as elegant, I say burn the files and start again nicholas@2524: headerrow = list(storage[page_name]['header']) # Extract the element IDs nicholas@2524: headerrow.insert(0,'file_keys') nicholas@2524: with open(file_name, 'w') as writefile: b@2264: filewriter = csv.writer(writefile, delimiter=',') nicholas@2524: filewriter.writerow(headerrow) nicholas@2524: nicholas@2524: # open file to write the page nicholas@2524: writefile = open(file_name, 'a') nicholas@2524: filewriter = csv.writer(writefile, delimiter=',') nicholas@2524: nicholas@2524: for subject_id in storage[page_name]['axis'][axis_name]: nicholas@2524: entry = [subject_id] nicholas@2524: for value in storage[page_name]['axis'][axis_name][subject_id]: nicholas@2524: entry.append(value) nicholas@2524: filewriter.writerow(entry) epdamskagg@2947: writefile.close()