comparison scripts/comment_parser.py @ 1072:2ea78697aadf

Scripts: comment_parser and score_parser start new CSV files each time; various plots added to generated PDF report
author Brecht De Man <BrechtDeMan@users.noreply.github.com>
date Thu, 20 Aug 2015 11:29:29 +0200
parents a2a245542ae6
children 235594325b84
comparison
equal deleted inserted replaced
1071:e2dd3105a84c 1072:2ea78697aadf
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 import xml.etree.ElementTree as ET 4 import xml.etree.ElementTree as ET
5 import os 5 import os
6 import csv 6 import csv
7 7 import sys
8 8
9 # COMMAND LINE ARGUMENTS 9 # COMMAND LINE ARGUMENTS
10 10
11 assert len(sys.argv)<3, "comment_parser takes at most 1 command line argument\n"+\ 11 assert len(sys.argv)<3, "comment_parser takes at most 1 command line argument\n"+\
12 "Use: python score_parser.py [rating_folder_location]" 12 "Use: python score_parser.py [rating_folder_location]"
29 print "No write privileges in folder '"+folder_name+"'." 29 print "No write privileges in folder '"+folder_name+"'."
30 30
31 31
32 # CODE 32 # CODE
33 33
34 # remember which files have been opened this time
35 file_history = []
36
34 # get every XML file in folder 37 # get every XML file in folder
35 for file in os.listdir(folder_name): 38 for file in os.listdir(folder_name):
36 if file.endswith(".xml"): 39 if file.endswith(".xml"):
37 tree = ET.parse(folder_name + '/' + file) 40 tree = ET.parse(folder_name + '/' + file)
38 root = tree.getroot() 41 root = tree.getroot()
54 if audioelement is not None: # Check it exists 57 if audioelement is not None: # Check it exists
55 audio_id = str(audioelement.get('id')) 58 audio_id = str(audioelement.get('id'))
56 59
57 csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv' 60 csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv'
58 61
59 # append (!) to file [page_name]/[page_name]-comments-[id].csv 62 # If file hasn't been opened yet this time, empty
60 with open(csv_name, 'a') as csvfile: 63 if csv_name not in file_history:
61 writer = csv.writer(csvfile, 64 csvfile = open(csv_name, 'w')
62 delimiter=',', 65 file_history.append(csv_name) # remember this file has been written to this time around
63 dialect="excel", 66 else:
64 quoting=csv.QUOTE_ALL) 67 # append (!) to file [page_name]/[page_name]-comments-[id].csv
65 commentstr = audioelement.find("./comment/response").text 68 csvfile = open(csv_name, 'a')
69 writer = csv.writer(csvfile,
70 delimiter=',',
71 dialect="excel",
72 quoting=csv.QUOTE_ALL)
73 commentstr = audioelement.find("./comment/response").text
66 74
67 if commentstr is None: 75 if commentstr is None:
68 commentstr = ''; 76 commentstr = ''
69 77
70 # anonymous comments: 78 # anonymous comments:
71 #writer.writerow([commentstr.encode("utf-8")]) 79 #writer.writerow([commentstr.encode("utf-8")])
72 # comments with (file) name: 80 # comments with (file) name:
73 writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) 81 writer.writerow([file[:-4]] + [commentstr.encode("utf-8")])
74 82
75 #TODO Replace 'new line' in comment with something else? 83 #TODO Replace 'new line' in comment with something else?
76 84
77 # PRO TIP: Change from csv to txt by running this in bash: 85 # PRO TIP: Change from csv to txt by running this in bash:
78 # $ cd folder_where_csvs_are/ 86 # $ cd folder_where_csvs_are/
79 # $ for i in *.csv; do mv "$i" "${i/.csv}".txt; done 87 # $ for i in *.csv; do mv "$i" "${i/.csv}".txt; done