comparison scripts/comment_parser.py @ 928:ba58cf8d0dbc

Score parsing: make csv from all XML files (one file per page ID, one column per fragment ID, one row per subject). Supports varying selections of pages and fragments across subjects. Generalised to 'page' (instead of 'song') and 'fragment' (instead of 'mix').
author Brecht De Man <BrechtDeMan@users.noreply.github.com>
date Sun, 31 May 2015 14:45:30 +0100
parents 5db0069046d5
children 97ebdb6b266a
comparison
equal deleted inserted replaced
927:5db0069046d5 928:ba58cf8d0dbc
6 for file in os.listdir("."): # You have to put this script in folder where output XML files are. 6 for file in os.listdir("."): # You have to put this script in folder where output XML files are.
7 if file.endswith(".xml"): 7 if file.endswith(".xml"):
8 tree = ET.parse(file) 8 tree = ET.parse(file)
9 root = tree.getroot() 9 root = tree.getroot()
10 10
11 # get list of all songs 11 # get list of all page names
12 for audioholder in root.findall("./audioholder"): # iterate over songs 12 for audioholder in root.findall("./audioholder"): # iterate over pages
13 song_name = audioholder.get('id') # get song name 13 page_name = audioholder.get('id') # get page name
14 14
15 # create folder [song_name] if not yet created 15 # create folder [page_name] if not yet created
16 if not os.path.exists(song_name): 16 if not os.path.exists(page_name):
17 os.makedirs(song_name) 17 os.makedirs(page_name)
18 18
19 # for song [song_name], print comments related to mix [id] 19 # for page [page_name], print comments related to fragment [id]
20 for audioelement in root.findall("*/[@id='"+song_name+"']/audioelement"): 20 for audioelement in root.findall("*/[@id='"+page_name+"']/audioelement"):
21 audio_id = str(audioelement.get('id')) 21 audio_id = str(audioelement.get('id'))
22 # append to file [song_name]/[song_name]-comments-[id].csv 22 # append to file [page_name]/[page_name]-comments-[id].csv
23 with open(song_name+'/'+song_name+'-comments-'+audio_id+'.csv', 'a') as csvfile: 23 with open(page_name+'/'+page_name+'-comments-'+audio_id+'.csv', 'a') as csvfile:
24 commentstr = root.find("*/[@id='" 24 commentstr = root.find("*/[@id='"
25 + song_name 25 + page_name
26 + "']/audioelement/[@id='" 26 + "']/audioelement/[@id='"
27 + audio_id 27 + audio_id
28 + "']/comment/response").text 28 + "']/comment/response").text
29 writer = csv.writer(csvfile, delimiter=',') 29 writer = csv.writer(csvfile, delimiter=',')
30 writer.writerow([commentstr.encode("utf-8")]) 30 writer.writerow([commentstr.encode("utf-8")])