# HG changeset patch
# User Brecht De Man <b.deman@qmul.ac.uk>
# Date 1434727448 -3600
# Node ID 625a0f709a55f5b36c48ecbae51965e7805d3b11
# Parent  b04f0c228093c0a64b049801b5c35e92c093efb1
Scripts: comment extraction bug fixes

diff -r b04f0c228093 -r 625a0f709a55 scripts/comment_parser.py
--- a/scripts/comment_parser.py	Fri Jun 19 12:53:40 2015 +0100
+++ b/scripts/comment_parser.py	Fri Jun 19 16:24:08 2015 +0100
@@ -1,3 +1,5 @@
+#!/usr/bin/python
+
 import xml.etree.ElementTree as ET
 import os
 import csv
@@ -23,10 +25,16 @@
             for audioelement in root.findall("*/[@id='"+page_name+"']/audioelement"):
                 if audioelement is not None: # Check it exists
                     audio_id = str(audioelement.get('id'))
+                    
+                    
+                    csv_name = page_name+'/'+page_name+'-comments-'+audio_id+'.csv'
 
-                    # append to file [page_name]/[page_name]-comments-[id].csv
-                    with open(page_name+'/'+page_name+'-comments-'+audio_id+'.csv', 'a') as csvfile:
-                        writer = csv.writer(csvfile, delimiter=',')
+                    # append (!) to file [page_name]/[page_name]-comments-[id].csv
+                    with open(csv_name, 'a') as csvfile:
+                        writer = csv.writer(csvfile, 
+                                            delimiter=',', 
+                                            dialect="excel",
+                                            quoting=csv.QUOTE_ALL)
                         commentstr = root.find("*/[@id='"
                                                + page_name
                                                + "']/audioelement/[@id='"
@@ -35,12 +43,13 @@
                         if commentstr is None:
                             writer.writerow([''])
                         else:
-                            writer.writerow([commentstr.encode("utf-8")])
-                        #TODO Comma doesn't act as delimiter now!
-                        # (when adding more than just a comment per line):
-                        # writer.writerow([file + ',' + commentstr.encode("utf-8")])
+                        	# anonymous comments:
+                            writer.writerow([commentstr]) 
+                            # comments with (file) name:
+                            #writer.writerow([file[:-4]] + [commentstr]) 
 
-                        #TODO Replace 'new line' with something else?
-
-                        #TODO 'Append' means duplicate entries if run several times...
-
+                        #TODO Replace 'new line' in comment with something else?
+                        
+# PRO TIP: Change from csv to txt by running this in bash: 
+# $ cd folder_where_csvs_are/
+# $ for i in *.csv; do mv "$i" "${i/.csv}".txt; done