comparison scripts/comment_parser.py @ 2228:3358d04605db

Updated scripts to work with latest results document specification
author Nicholas Jillings <nicholas.jillings@mail.bcu.ac.uk>
date Thu, 14 Apr 2016 17:16:20 +0100
parents 03b2ff7308fa
children 5bf0555905de
comparison
equal deleted inserted replaced
2226:43ab418ba4b8 2228:3358d04605db
40 tree = ET.parse(folder_name + '/' + file) 40 tree = ET.parse(folder_name + '/' + file)
41 root = tree.getroot() 41 root = tree.getroot()
42 42
43 # get list of all page names 43 # get list of all page names
44 for audioholder in root.findall("./page"): # iterate over pages 44 for audioholder in root.findall("./page"): # iterate over pages
45 page_name = audioholder.get('id') # get page name 45 page_name = audioholder.get('ref') # get page name
46 46
47 if page_name is None: # ignore 'empty' audio_holders 47 if page_name is None: # ignore 'empty' audio_holders
48 print "WARNING: " + file + " contains empty page. (comment_parser.py)" 48 print "WARNING: " + file + " contains empty page. (comment_parser.py)"
49 break 49 break
50
51 if audioholder.get("state") != "complete":
52 print "WARNING: " + file + "test page " + page_name + " is not complete, skipping."
53 else :
54 # create folder [page_name] if not yet created
55 if not os.path.exists(folder_name + "/" + page_name):
56 os.makedirs(folder_name + "/" + page_name)
50 57
51 # create folder [page_name] if not yet created 58 # for page [page_name], print comments related to fragment [id]
52 if not os.path.exists(folder_name + "/" + page_name): 59 for audioelement in audioholder.findall("./audioelement"):
53 os.makedirs(folder_name + "/" + page_name) 60 if audioelement is not None: # Check it exists
61 audio_id = str(audioelement.get('ref'))
54 62
55 # for page [page_name], print comments related to fragment [id] 63 csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv'
56 for audioelement in audioholder.findall("./audioelement"):
57 if audioelement is not None: # Check it exists
58 audio_id = str(audioelement.get('id'))
59
60 csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv'
61 64
62 # If file hasn't been opened yet this time, empty 65 # If file hasn't been opened yet this time, empty
63 if csv_name not in file_history: 66 if csv_name not in file_history:
64 csvfile = open(csv_name, 'w') 67 csvfile = open(csv_name, 'w')
65 file_history.append(csv_name) # remember this file has been written to this time around 68 file_history.append(csv_name) # remember this file has been written to this time around
66 else: 69 else:
67 # append (!) to file [page_name]/[page_name]-comments-[id].csv 70 # append (!) to file [page_name]/[page_name]-comments-[id].csv
68 csvfile = open(csv_name, 'a') 71 csvfile = open(csv_name, 'a')
69 writer = csv.writer(csvfile, 72 writer = csv.writer(csvfile,
70 delimiter=',', 73 delimiter=',',
71 dialect="excel", 74 dialect="excel",
72 quoting=csv.QUOTE_ALL) 75 quoting=csv.QUOTE_ALL)
73 commentstr = audioelement.find("./comment/response").text 76 commentstr = audioelement.find("./comment/response").text
74 77
75 if commentstr is None: 78 if commentstr is None:
76 commentstr = '' 79 commentstr = ''
77 80
78 # anonymous comments: 81 # anonymous comments:
79 #writer.writerow([commentstr.encode("utf-8")]) 82 #writer.writerow([commentstr.encode("utf-8")])
80 # comments with (file) name: 83 # comments with (file) name:
81 writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) 84 writer.writerow([file[:-4]] + [commentstr.encode("utf-8")])
82 85
83 #TODO Replace 'new line' in comment with something else? 86 #TODO Replace 'new line' in comment with something else?
84 87
85 # PRO TIP: Change from csv to txt by running this in bash: 88 # PRO TIP: Change from csv to txt by running this in bash:
86 # $ cd folder_where_csvs_are/ 89 # $ cd folder_where_csvs_are/