diff scripts/comment_parser.py @ 1072:2ea78697aadf

Scripts: comment_parser and score_parser start new CSV files each time; various plots added to generated PDF report
author Brecht De Man <BrechtDeMan@users.noreply.github.com>
date Thu, 20 Aug 2015 11:29:29 +0200
parents a2a245542ae6
children 235594325b84
line wrap: on
line diff
--- a/scripts/comment_parser.py	Tue Aug 18 23:56:05 2015 +0200
+++ b/scripts/comment_parser.py	Thu Aug 20 11:29:29 2015 +0200
@@ -4,7 +4,7 @@
 import xml.etree.ElementTree as ET
 import os
 import csv
-
+import sys
 
 # COMMAND LINE ARGUMENTS
 
@@ -31,6 +31,9 @@
 
 # CODE
 
+# remember which files have been opened this time
+file_history = []
+
 # get every XML file in folder
 for file in os.listdir(folder_name): 
     if file.endswith(".xml"):
@@ -56,23 +59,28 @@
                     
                     csv_name = folder_name +'/' + page_name+'/'+page_name+'-comments-'+audio_id+'.csv'
 
-                    # append (!) to file [page_name]/[page_name]-comments-[id].csv
-                    with open(csv_name, 'a') as csvfile:
-                        writer = csv.writer(csvfile, 
-                                            delimiter=',', 
-                                            dialect="excel",
-                                            quoting=csv.QUOTE_ALL)
-                        commentstr = audioelement.find("./comment/response").text
+                    # If file hasn't been opened yet this time, empty
+                    if csv_name not in file_history:
+                        csvfile = open(csv_name, 'w')
+                        file_history.append(csv_name) # remember this file has been written to this time around
+                    else: 
+                        # append (!) to file [page_name]/[page_name]-comments-[id].csv
+                        csvfile = open(csv_name, 'a')
+                    writer = csv.writer(csvfile, 
+                                        delimiter=',', 
+                                        dialect="excel",
+                                        quoting=csv.QUOTE_ALL)
+                    commentstr = audioelement.find("./comment/response").text
                         
-                        if commentstr is None:
-                           commentstr = '';
-                            
-                        # anonymous comments:
-                        #writer.writerow([commentstr.encode("utf-8")]) 
-                        # comments with (file) name:
-                        writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) 
+                    if commentstr is None:
+                       commentstr = ''
+                        
+                    # anonymous comments:
+                    #writer.writerow([commentstr.encode("utf-8")]) 
+                    # comments with (file) name:
+                    writer.writerow([file[:-4]] + [commentstr.encode("utf-8")]) 
 
-                        #TODO Replace 'new line' in comment with something else?
+                    #TODO Replace 'new line' in comment with something else?
                         
 # PRO TIP: Change from csv to txt by running this in bash: 
 # $ cd folder_where_csvs_are/