# HG changeset patch # User Nicholas Jillings # Date 1435246839 -3600 # Node ID 8ab5f896985658bb079181237fdbb77e218b906f # Parent ca4ae613f1dd2e0a8bba0fc68f829cf447887ac5# Parent 302926cdf3c4777ee67f89d123f6cb21ce1a19aa Merge diff -r ca4ae613f1dd -r 8ab5f8969856 scripts/comment_parser.py --- a/scripts/comment_parser.py Thu Jun 25 16:40:11 2015 +0100 +++ b/scripts/comment_parser.py Thu Jun 25 16:40:39 2015 +0100 @@ -1,3 +1,5 @@ +#!/usr/bin/python + import xml.etree.ElementTree as ET import os import csv @@ -23,10 +25,16 @@ for audioelement in root.findall("*/[@id='"+page_name+"']/audioelement"): if audioelement is not None: # Check it exists audio_id = str(audioelement.get('id')) + + + csv_name = page_name+'/'+page_name+'-comments-'+audio_id+'.csv' - # append to file [page_name]/[page_name]-comments-[id].csv - with open(page_name+'/'+page_name+'-comments-'+audio_id+'.csv', 'a') as csvfile: - writer = csv.writer(csvfile, delimiter=',') + # append (!) to file [page_name]/[page_name]-comments-[id].csv + with open(csv_name, 'a') as csvfile: + writer = csv.writer(csvfile, + delimiter=',', + dialect="excel", + quoting=csv.QUOTE_ALL) commentstr = root.find("*/[@id='" + page_name + "']/audioelement/[@id='" @@ -35,12 +43,13 @@ if commentstr is None: writer.writerow(['']) else: - writer.writerow([commentstr.encode("utf-8")]) - #TODO Comma doesn't act as delimiter now! - # (when adding more than just a comment per line): - # writer.writerow([file + ',' + commentstr.encode("utf-8")]) + # anonymous comments: + writer.writerow([commentstr]) + # comments with (file) name: + #writer.writerow([file[:-4]] + [commentstr]) - #TODO Replace 'new line' with something else? - - #TODO 'Append' means duplicate entries if run several times... - + #TODO Replace 'new line' in comment with something else? + +# PRO TIP: Change from csv to txt by running this in bash: +# $ cd folder_where_csvs_are/ +# $ for i in *.csv; do mv "$i" "${i/.csv}".txt; done