annotate scripts/comment_parser.py @ 147:927d05a43a70

Score parsing: make csv from all XML files (one file per page ID, one column per fragment ID, one row per subject). Supports varying selections of pages and fragments across subjects. Generalised to 'page' (instead of 'song') and 'fragment' (instead of 'mix').
author Brecht De Man <b.deman@qmul.ac.uk>
date Sun, 31 May 2015 14:45:30 +0100
parents 2d08d2025258
children 97ebdb6b266a
rev   line source
b@146 1 import xml.etree.ElementTree as ET
b@146 2 import os
b@146 3 import csv
b@146 4
b@146 5 # get every XML file in folder
b@146 6 for file in os.listdir("."): # You have to put this script in folder where output XML files are.
b@146 7 if file.endswith(".xml"):
b@146 8 tree = ET.parse(file)
b@146 9 root = tree.getroot()
b@146 10
b@147 11 # get list of all page names
b@147 12 for audioholder in root.findall("./audioholder"): # iterate over pages
b@147 13 page_name = audioholder.get('id') # get page name
b@146 14
b@147 15 # create folder [page_name] if not yet created
b@147 16 if not os.path.exists(page_name):
b@147 17 os.makedirs(page_name)
b@146 18
b@147 19 # for page [page_name], print comments related to fragment [id]
b@147 20 for audioelement in root.findall("*/[@id='"+page_name+"']/audioelement"):
b@146 21 audio_id = str(audioelement.get('id'))
b@147 22 # append to file [page_name]/[page_name]-comments-[id].csv
b@147 23 with open(page_name+'/'+page_name+'-comments-'+audio_id+'.csv', 'a') as csvfile:
b@146 24 commentstr = root.find("*/[@id='"
b@147 25 + page_name
b@146 26 + "']/audioelement/[@id='"
b@146 27 + audio_id
b@146 28 + "']/comment/response").text
b@146 29 writer = csv.writer(csvfile, delimiter=',')
b@146 30 writer.writerow([commentstr.encode("utf-8")])
b@146 31 #TODO Comma doesn't act as delimiter now!
b@146 32 # (when adding more than just a comment per line):
b@146 33 # writer.writerow([file + ',' + commentstr.encode("utf-8")])
b@146 34
b@146 35 #TODO Replace 'new line' with something else?
b@146 36
b@146 37 #TODO 'Append' means duplicate entries if run several times...