Daniel@0
|
1 #!/usr/bin/python
|
Daniel@0
|
2 # Part of DML (Digital Music Laboratory)
|
Daniel@0
|
3 # Copyright 2014-2015 Daniel Wolff, City University
|
Daniel@0
|
4
|
Daniel@0
|
5 # This program is free software; you can redistribute it and/or
|
Daniel@0
|
6 # modify it under the terms of the GNU General Public License
|
Daniel@0
|
7 # as published by the Free Software Foundation; either version 2
|
Daniel@0
|
8 # of the License, or (at your option) any later version.
|
Daniel@0
|
9 #
|
Daniel@0
|
10 # This program is distributed in the hope that it will be useful,
|
Daniel@0
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
13 # GNU General Public License for more details.
|
Daniel@0
|
14 #
|
Daniel@0
|
15 # You should have received a copy of the GNU General Public
|
Daniel@0
|
16 # License along with this library; if not, write to the Free Software
|
Daniel@0
|
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
18
|
Daniel@0
|
19 # -*- coding: utf-8 -*-
|
Daniel@0
|
20 #
|
Daniel@0
|
21 # This is a data conversion wrapper for the spmf toolkit
|
Daniel@0
|
22 __author__="Daniel Wolff"
|
Daniel@0
|
23
|
Daniel@0
|
24 import chord_seq_key_relative as c2f
|
Daniel@0
|
25 import csv
|
Daniel@0
|
26 import re
|
Daniel@0
|
27 import tempfile
|
Daniel@0
|
28 import subprocess
|
Daniel@0
|
29 import os
|
Daniel@0
|
30 import platform
|
Daniel@0
|
31 from aggregate import *
|
Daniel@0
|
32 from csvutils import *
|
Daniel@0
|
33
|
Daniel@0
|
34 # command for threading
|
Daniel@0
|
35 import subprocess, threading
|
Daniel@0
|
36 import signal
|
Daniel@0
|
37
|
Daniel@0
|
38 # limit for sequences read
|
Daniel@0
|
39 max_lines = 10000000
|
Daniel@0
|
40
|
Daniel@0
|
41
|
Daniel@0
|
42 class Command(object):
|
Daniel@0
|
43 def __init__(self, cmd):
|
Daniel@0
|
44 self.cmd = cmd
|
Daniel@0
|
45 self.process = None
|
Daniel@0
|
46 self.text = 'SPMF terminated unexpectedly'
|
Daniel@0
|
47
|
Daniel@0
|
48 def run(self, timeout):
|
Daniel@0
|
49 def target():
|
Daniel@0
|
50 print_status('Thread started')
|
Daniel@0
|
51 if 'Win' in platform.system():
|
Daniel@0
|
52 self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False)
|
Daniel@0
|
53 else:
|
Daniel@0
|
54 self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False, preexec_fn=os.setsid)
|
Daniel@0
|
55
|
Daniel@0
|
56 self.text = self.process.stdout.read()
|
Daniel@0
|
57 self.process.communicate()
|
Daniel@0
|
58
|
Daniel@0
|
59 print_status('Thread finished')
|
Daniel@0
|
60
|
Daniel@0
|
61 thread = threading.Thread(target=target)
|
Daniel@0
|
62 thread.start()
|
Daniel@0
|
63
|
Daniel@0
|
64 # wait until timeout if specified
|
Daniel@0
|
65 if timeout > 0:
|
Daniel@0
|
66 thread.join(timeout)
|
Daniel@0
|
67 if thread.is_alive():
|
Daniel@0
|
68 print_status('Terminating process')
|
Daniel@0
|
69 if 'Win' in platform.system():
|
Daniel@0
|
70 self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds'
|
Daniel@0
|
71 self.process.kill()
|
Daniel@0
|
72 else:
|
Daniel@0
|
73 self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds'
|
Daniel@0
|
74 os.killpg(self.process.pid, signal.SIGTERM)
|
Daniel@0
|
75 thread.join()
|
Daniel@0
|
76
|
Daniel@0
|
77 else:
|
Daniel@0
|
78 thread.join()
|
Daniel@0
|
79
|
Daniel@0
|
80 # return retcode
|
Daniel@0
|
81 return (self.process.returncode, self.text)
|
Daniel@0
|
82
|
Daniel@0
|
83
|
Daniel@0
|
84 # runs the spmf java with method and parameters as specified
|
Daniel@0
|
85 # 1st parameter: usually minimal support of sequence
|
Daniel@0
|
86 # 2nd parameter: minimal length of sequence
|
Daniel@0
|
87 # run spmf with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3
|
Daniel@0
|
88 def spmf(file,method = "CM-SPADE", params=["70%", "3"], timeout=10):
|
Daniel@0
|
89
|
Daniel@0
|
90 # create outpput file name
|
Daniel@0
|
91 outfile = tempfile.mktemp()
|
Daniel@0
|
92 command = ["java"]
|
Daniel@0
|
93 command.extend(["-Xmx1g","-jar","spmf.jar","run"])
|
Daniel@0
|
94 command.extend([method, file, outfile])
|
Daniel@0
|
95 command.extend(params)
|
Daniel@0
|
96
|
Daniel@0
|
97 #print_status('CWD:' + os.getcwd())
|
Daniel@0
|
98 #print_status('Calling SPMF: ' + ' '.join(command))
|
Daniel@0
|
99
|
Daniel@0
|
100 proc = Command(command)
|
Daniel@0
|
101 retcode, text = proc.run(timeout=timeout)
|
Daniel@0
|
102
|
Daniel@0
|
103 if (retcode==0):
|
Daniel@0
|
104 #print_status("Finished")
|
Daniel@0
|
105 return outfile
|
Daniel@0
|
106 else:
|
Daniel@0
|
107 print_status( "Terminated with errors" + text)
|
Daniel@0
|
108 return outfile
|
Daniel@0
|
109
|
Daniel@0
|
110
|
Daniel@0
|
111 # takes a dictionary of chords for one or multiple files
|
Daniel@0
|
112 # in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ]
|
Daniel@0
|
113 # and converts it into spmf
|
Daniel@0
|
114 #
|
Daniel@0
|
115 # output: tempfile of spmf output
|
Daniel@0
|
116 def relchords2spmf(input):
|
Daniel@0
|
117
|
Daniel@0
|
118 # choose random filename for spmf location
|
Daniel@0
|
119 # open spmf file
|
Daniel@0
|
120 fspmf = tempfile.NamedTemporaryFile(delete=False)
|
Daniel@0
|
121
|
Daniel@0
|
122 # ---
|
Daniel@0
|
123 # this is writing the spmf format
|
Daniel@0
|
124 for track,trackdata in input.iteritems():
|
Daniel@0
|
125 # write chord sequence as one line in spmf file
|
Daniel@0
|
126 for (time,key,mode,fun,typ,bfun) in trackdata:
|
Daniel@0
|
127 chord = c2f.fun2num(fun,typ,bfun,mode)
|
Daniel@0
|
128
|
Daniel@0
|
129 # -1 is the spearator of items or itemsets
|
Daniel@0
|
130 fspmf.write(str(chord) + ' -1 ')
|
Daniel@0
|
131
|
Daniel@0
|
132 # the sequence is closed with -2
|
Daniel@0
|
133 fspmf.write('-2\n')
|
Daniel@0
|
134
|
Daniel@0
|
135 fspmf.close()
|
Daniel@0
|
136
|
Daniel@0
|
137 return fspmf
|
Daniel@0
|
138
|
Daniel@0
|
139
|
Daniel@0
|
140 ## takes a dictionary of chords for one or multiple files
|
Daniel@0
|
141 ## in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ]
|
Daniel@0
|
142 ## and converts it into spmf
|
Daniel@0
|
143 #def folder2spmf(folderin = 'D:/mirg/Chord_Analysis20141216/', fileout = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf'):
|
Daniel@0
|
144 #
|
Daniel@0
|
145 # # get chords for all files
|
Daniel@0
|
146 # output = c2f.folder2functions(folderin)
|
Daniel@0
|
147 #
|
Daniel@0
|
148 # # open log
|
Daniel@0
|
149 # logfile = fileout + '.dic'
|
Daniel@0
|
150 # csvfile = open(logfile, "w+b") #opens the file for updating
|
Daniel@0
|
151 # w = csv.writer(csvfile)
|
Daniel@0
|
152 # w.writerow(["track","key","mode","sequence length"])
|
Daniel@0
|
153 #
|
Daniel@0
|
154 # # open spmf file
|
Daniel@0
|
155 # fspmf = open(fileout,'w')
|
Daniel@0
|
156 # # ---
|
Daniel@0
|
157 # # this is writing the spmf format
|
Daniel@0
|
158 # for track,trackdata in output.iteritems():
|
Daniel@0
|
159 # # write chord sequence as one line in spmf file
|
Daniel@0
|
160 # for (time,key,mode,fun,typ,bfun) in trackdata:
|
Daniel@0
|
161 # chord = c2f.fun2num(fun,typ,bfun,mode)
|
Daniel@0
|
162 #
|
Daniel@0
|
163 # # -1 is the spearator of items or itemsets
|
Daniel@0
|
164 # fspmf.write(str(chord) + ' -1 ')
|
Daniel@0
|
165 #
|
Daniel@0
|
166 # # the sequence is closed with -2
|
Daniel@0
|
167 # fspmf.write('-2\n')
|
Daniel@0
|
168 # w.writerow([track, str(key), str(mode),str(len(trackdata))])
|
Daniel@0
|
169 #
|
Daniel@0
|
170 # fspmf.close()
|
Daniel@0
|
171 # csvfile.close()
|
Daniel@0
|
172
|
Daniel@0
|
173 # read an spmf file
|
Daniel@0
|
174 # def parsespmf(filein = 'D:/mirg/Chord_Analysis20141216/Beethoven.txt'):
|
Daniel@0
|
175
|
Daniel@0
|
176 # string sourcefile path to the source spmf file with chords from records
|
Daniel@0
|
177 # string patternfile path to the pattern spmf file
|
Daniel@0
|
178 # matches each of the patterns in patternfile
|
Daniel@0
|
179 # to the chord sequences in sourcefile
|
Daniel@0
|
180 def match(sourcefile = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf',sourcedict = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf.dic', patternfile = 'D:/mirg/Chord_Analysis20141216/Beethoven_70.txt'):
|
Daniel@0
|
181
|
Daniel@0
|
182 # define regular expressions for matching
|
Daniel@0
|
183 # closed sequence
|
Daniel@0
|
184
|
Daniel@0
|
185 # ---
|
Daniel@0
|
186 # we here assume that there are more files than patterns,
|
Daniel@0
|
187 # as display of patterns is somehow limited
|
Daniel@0
|
188 # therefore parallelisation will be 1 pattern/multiple files
|
Daniel@0
|
189 # per instance
|
Daniel@0
|
190 # ---
|
Daniel@0
|
191
|
Daniel@0
|
192 patterns = spmf2table(patternfile)
|
Daniel@0
|
193
|
Daniel@0
|
194 # ---
|
Daniel@0
|
195 # now for the input sequences
|
Daniel@0
|
196 # ---
|
Daniel@0
|
197 # first: read track dictionary and get the input sequence names
|
Daniel@0
|
198 tracks = getClipDict(sourcedict)
|
Daniel@0
|
199
|
Daniel@0
|
200 # read the input sequences
|
Daniel@0
|
201 source = open(sourcefile, 'r')
|
Daniel@0
|
202 patterns_tracks = dict()
|
Daniel@0
|
203 tracks_patterns = dict()
|
Daniel@0
|
204
|
Daniel@0
|
205 # iterate over all tracks - to be parallelised
|
Daniel@0
|
206 for track,count in tracks.iteritems():
|
Daniel@0
|
207 sequence = readSequence(next(source))
|
Daniel@0
|
208 print track
|
Daniel@0
|
209 for p in range(0,len(patterns)):
|
Daniel@0
|
210 # match open or closed pattern
|
Daniel@0
|
211 if openPatternInSequence(sequence,patterns[p]):
|
Daniel@0
|
212 if patterns_tracks.has_key(p):
|
Daniel@0
|
213 patterns_tracks[p].append(track)
|
Daniel@0
|
214 else:
|
Daniel@0
|
215 patterns_tracks[p] = [track]
|
Daniel@0
|
216
|
Daniel@0
|
217 if tracks_patterns.has_key(track):
|
Daniel@0
|
218 tracks_patterns[track].append(p)
|
Daniel@0
|
219 else:
|
Daniel@0
|
220 tracks_patterns[track] = [p]
|
Daniel@0
|
221
|
Daniel@0
|
222 # write clip index to files
|
Daniel@0
|
223 writeAllPatternsForClips('D:/mirg/Chord_Analysis20141216/',tracks_patterns)
|
Daniel@0
|
224 #print patterns_tracks[p]
|
Daniel@0
|
225
|
Daniel@0
|
226 # writes results to disk per key
|
Daniel@0
|
227 def writeAllPatternsForClips(path = 'D:/mirg/Chord_Analysis20141216/',tracks_patterns = dict()):
|
Daniel@0
|
228
|
Daniel@0
|
229 for name, contents in tracks_patterns.iteritems():
|
Daniel@0
|
230 # create new file
|
Daniel@0
|
231 csvfile = open(path + '/' + name + '_patterns.csv', "w+b") #opens the file for updating
|
Daniel@0
|
232 w = csv.writer(csvfile)
|
Daniel@0
|
233
|
Daniel@0
|
234 # compress pattern data ?
|
Daniel@0
|
235 # e.g. 2 columns from-to for the long series of atomic increments
|
Daniel@0
|
236
|
Daniel@0
|
237 w.writerow(contents)
|
Daniel@0
|
238 csvfile.close()
|
Daniel@0
|
239
|
Daniel@0
|
240 # reads output of spmf to table
|
Daniel@0
|
241 def spmf2table(patternfile):
|
Daniel@0
|
242
|
Daniel@0
|
243 patterns = []
|
Daniel@0
|
244 supports = []
|
Daniel@0
|
245 patterns_raw = []
|
Daniel@0
|
246 linecnt = 0
|
Daniel@0
|
247 # read all patterns
|
Daniel@0
|
248 with open(patternfile, 'r') as f:
|
Daniel@0
|
249 for line in f:
|
Daniel@0
|
250 # a line looks like this:
|
Daniel@0
|
251 # 1120401 -1 1120101 -1 #SUP: 916
|
Daniel@0
|
252
|
Daniel@0
|
253 # save pattern
|
Daniel@0
|
254 #patterns.append(pattern)
|
Daniel@0
|
255 #numeric? or just regex?
|
Daniel@0
|
256 # we'll use string, so any representation works
|
Daniel@0
|
257
|
Daniel@0
|
258 pattern,support = readPattern(line)
|
Daniel@0
|
259 patterns.append(pattern)
|
Daniel@0
|
260 supports.append(support)
|
Daniel@0
|
261
|
Daniel@0
|
262 # here's the regex
|
Daniel@0
|
263 # first the spacer
|
Daniel@0
|
264 #spacer = '((\s-1\s)|((\s-1\s)*[0-9]+\s-1\s)+)'
|
Daniel@0
|
265 #repattern = r'(' + spacer + '*' + spacer.join(pattern) + spacer + '*' + '.*)'
|
Daniel@0
|
266 #print repattern
|
Daniel@0
|
267 #patterns.append(re.compile(repattern))
|
Daniel@0
|
268 linecnt +=1
|
Daniel@0
|
269
|
Daniel@0
|
270 if linecnt > max_lines:
|
Daniel@0
|
271 print_status('Not reading more than ' + str(max_lines) + ' lines :(')
|
Daniel@0
|
272 break
|
Daniel@0
|
273
|
Daniel@0
|
274 return patterns,supports
|
Daniel@0
|
275
|
Daniel@0
|
276 # @param line: reads a line in the spmf output file with frequent patterns
|
Daniel@0
|
277 # returns list of strings "pattern" and int "support"
|
Daniel@0
|
278 def readPattern(line):
|
Daniel@0
|
279 # locate support
|
Daniel@0
|
280 suploc = line.find('#SUP:')
|
Daniel@0
|
281
|
Daniel@0
|
282 # test whether we have a broken file
|
Daniel@0
|
283 if suploc > 6:
|
Daniel@0
|
284 support = int(line[suploc+5:-1])
|
Daniel@0
|
285 else:
|
Daniel@0
|
286 support = -1
|
Daniel@0
|
287
|
Daniel@0
|
288 # extract pattern
|
Daniel@0
|
289 pattern = line[:suploc].split(' -1 ')[:-1]
|
Daniel@0
|
290 return (pattern,support)
|
Daniel@0
|
291
|
Daniel@0
|
292 # @param line: reads a line in the spmf input file with chord sequence
|
Daniel@0
|
293 # returns list of strings "pattern" and int "support"
|
Daniel@0
|
294 def readSequence(line):
|
Daniel@0
|
295 # locate support
|
Daniel@0
|
296 suploc = line.find('-2')
|
Daniel@0
|
297
|
Daniel@0
|
298 # extract pattern
|
Daniel@0
|
299 sequence = line[:suploc].split(' -1 ')[:-1]
|
Daniel@0
|
300 return sequence
|
Daniel@0
|
301
|
Daniel@0
|
302 # finds open pattern in sequences
|
Daniel@0
|
303 # @param [string] sequence input sequence
|
Daniel@0
|
304 # @param [string] pattern pattern to be found
|
Daniel@0
|
305 def openPatternInSequence(sequence,pattern):
|
Daniel@0
|
306 patidx = 0
|
Daniel@0
|
307 for item in sequence:
|
Daniel@0
|
308 if item == pattern[patidx]:
|
Daniel@0
|
309 patidx +=1
|
Daniel@0
|
310
|
Daniel@0
|
311 # did we complet the pattern?
|
Daniel@0
|
312 if patidx >= (len(pattern)-1):
|
Daniel@0
|
313 # could also return the start index
|
Daniel@0
|
314 return 1
|
Daniel@0
|
315 # finished the sequence before finishing pattern
|
Daniel@0
|
316 return 0
|
Daniel@0
|
317
|
Daniel@0
|
318 # finds closed pattern in sequences
|
Daniel@0
|
319 # @param [string] sequence input sequence
|
Daniel@0
|
320 # @param [string] pattern pattern to be found
|
Daniel@0
|
321 def closedPatternInSequence(sequence,pattern):
|
Daniel@0
|
322 # alternatively use KnuthMorrisPratt with unsplit string
|
Daniel@0
|
323 return ''.join(map(str, pattern)) in ''.join(map(str, sequence))
|
Daniel@0
|
324
|
Daniel@0
|
325 # reads all track names from the dictionary created by folder2spmf
|
Daniel@0
|
326 # @param sourcedict path to dictionary
|
Daniel@0
|
327 def getClipDict(sourcedict):
|
Daniel@0
|
328
|
Daniel@0
|
329 f = open(sourcedict, 'rt')
|
Daniel@0
|
330 reader = csv.reader(f)
|
Daniel@0
|
331
|
Daniel@0
|
332 # skip first roow that contains legend
|
Daniel@0
|
333 next(reader)
|
Daniel@0
|
334
|
Daniel@0
|
335 # get following rows
|
Daniel@0
|
336 tracks = dict()
|
Daniel@0
|
337 for (track,key,mode,seqlen) in reader:
|
Daniel@0
|
338 tracks[track]= (key,mode,seqlen)
|
Daniel@0
|
339 #tracks.append((track,count))
|
Daniel@0
|
340
|
Daniel@0
|
341 f.close()
|
Daniel@0
|
342 return tracks
|
Daniel@0
|
343
|
Daniel@0
|
344
|
Daniel@0
|
345 # run spmf afterwards with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3
|
Daniel@0
|
346 if __name__ == "__main__":
|
Daniel@0
|
347 #folder2spmf()
|
Daniel@0
|
348 #match()
|
Daniel@0
|
349 print "huhu"
|