dml-open-cliopatria: dml-cla/python/chord_seq_spmf

annotate dml-cla/python/chord_seq_spmf_helper.py @ 0:718306e29690 tip

commiting public release

author	Daniel Wolff
date	Tue, 09 Feb 2016 21:05:06 +0100
parents
children

rev	line source
Daniel@0	1 #!/usr/bin/python
Daniel@0	2 # Part of DML (Digital Music Laboratory)
Daniel@0	3 # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0	4
Daniel@0	5 # This program is free software; you can redistribute it and/or
Daniel@0	6 # modify it under the terms of the GNU General Public License
Daniel@0	7 # as published by the Free Software Foundation; either version 2
Daniel@0	8 # of the License, or (at your option) any later version.
Daniel@0	9 #
Daniel@0	10 # This program is distributed in the hope that it will be useful,
Daniel@0	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0	13 # GNU General Public License for more details.
Daniel@0	14 #
Daniel@0	15 # You should have received a copy of the GNU General Public
Daniel@0	16 # License along with this library; if not, write to the Free Software
Daniel@0	17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0	18
Daniel@0	19 # -- coding: utf-8 --
Daniel@0	20 #
Daniel@0	21 # This is a data conversion wrapper for the spmf toolkit
Daniel@0	22 __author__="Daniel Wolff"
Daniel@0	23
Daniel@0	24 import chord_seq_key_relative as c2f
Daniel@0	25 import csv
Daniel@0	26 import re
Daniel@0	27 import tempfile
Daniel@0	28 import subprocess
Daniel@0	29 import os
Daniel@0	30 import platform
Daniel@0	31 from aggregate import *
Daniel@0	32 from csvutils import *
Daniel@0	33
Daniel@0	34 # command for threading
Daniel@0	35 import subprocess, threading
Daniel@0	36 import signal
Daniel@0	37
Daniel@0	38 # limit for sequences read
Daniel@0	39 max_lines = 10000000
Daniel@0	40
Daniel@0	41
Daniel@0	42 class Command(object):
Daniel@0	43 def __init__(self, cmd):
Daniel@0	44 self.cmd = cmd
Daniel@0	45 self.process = None
Daniel@0	46 self.text = 'SPMF terminated unexpectedly'
Daniel@0	47
Daniel@0	48 def run(self, timeout):
Daniel@0	49 def target():
Daniel@0	50 print_status('Thread started')
Daniel@0	51 if 'Win' in platform.system():
Daniel@0	52 self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False)
Daniel@0	53 else:
Daniel@0	54 self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False, preexec_fn=os.setsid)
Daniel@0	55
Daniel@0	56 self.text = self.process.stdout.read()
Daniel@0	57 self.process.communicate()
Daniel@0	58
Daniel@0	59 print_status('Thread finished')
Daniel@0	60
Daniel@0	61 thread = threading.Thread(target=target)
Daniel@0	62 thread.start()
Daniel@0	63
Daniel@0	64 # wait until timeout if specified
Daniel@0	65 if timeout > 0:
Daniel@0	66 thread.join(timeout)
Daniel@0	67 if thread.is_alive():
Daniel@0	68 print_status('Terminating process')
Daniel@0	69 if 'Win' in platform.system():
Daniel@0	70 self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds'
Daniel@0	71 self.process.kill()
Daniel@0	72 else:
Daniel@0	73 self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds'
Daniel@0	74 os.killpg(self.process.pid, signal.SIGTERM)
Daniel@0	75 thread.join()
Daniel@0	76
Daniel@0	77 else:
Daniel@0	78 thread.join()
Daniel@0	79
Daniel@0	80 # return retcode
Daniel@0	81 return (self.process.returncode, self.text)
Daniel@0	82
Daniel@0	83
Daniel@0	84 # runs the spmf java with method and parameters as specified
Daniel@0	85 # 1st parameter: usually minimal support of sequence
Daniel@0	86 # 2nd parameter: minimal length of sequence
Daniel@0	87 # run spmf with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3
Daniel@0	88 def spmf(file,method = "CM-SPADE", params=["70%", "3"], timeout=10):
Daniel@0	89
Daniel@0	90 # create outpput file name
Daniel@0	91 outfile = tempfile.mktemp()
Daniel@0	92 command = ["java"]
Daniel@0	93 command.extend(["-Xmx1g","-jar","spmf.jar","run"])
Daniel@0	94 command.extend([method, file, outfile])
Daniel@0	95 command.extend(params)
Daniel@0	96
Daniel@0	97 #print_status('CWD:' + os.getcwd())
Daniel@0	98 #print_status('Calling SPMF: ' + ' '.join(command))
Daniel@0	99
Daniel@0	100 proc = Command(command)
Daniel@0	101 retcode, text = proc.run(timeout=timeout)
Daniel@0	102
Daniel@0	103 if (retcode==0):
Daniel@0	104 #print_status("Finished")
Daniel@0	105 return outfile
Daniel@0	106 else:
Daniel@0	107 print_status( "Terminated with errors" + text)
Daniel@0	108 return outfile
Daniel@0	109
Daniel@0	110
Daniel@0	111 # takes a dictionary of chords for one or multiple files
Daniel@0	112 # in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ]
Daniel@0	113 # and converts it into spmf
Daniel@0	114 #
Daniel@0	115 # output: tempfile of spmf output
Daniel@0	116 def relchords2spmf(input):
Daniel@0	117
Daniel@0	118 # choose random filename for spmf location
Daniel@0	119 # open spmf file
Daniel@0	120 fspmf = tempfile.NamedTemporaryFile(delete=False)
Daniel@0	121
Daniel@0	122 # ---
Daniel@0	123 # this is writing the spmf format
Daniel@0	124 for track,trackdata in input.iteritems():
Daniel@0	125 # write chord sequence as one line in spmf file
Daniel@0	126 for (time,key,mode,fun,typ,bfun) in trackdata:
Daniel@0	127 chord = c2f.fun2num(fun,typ,bfun,mode)
Daniel@0	128
Daniel@0	129 # -1 is the spearator of items or itemsets
Daniel@0	130 fspmf.write(str(chord) + ' -1 ')
Daniel@0	131
Daniel@0	132 # the sequence is closed with -2
Daniel@0	133 fspmf.write('-2\n')
Daniel@0	134
Daniel@0	135 fspmf.close()
Daniel@0	136
Daniel@0	137 return fspmf
Daniel@0	138
Daniel@0	139
Daniel@0	140 ## takes a dictionary of chords for one or multiple files
Daniel@0	141 ## in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ]
Daniel@0	142 ## and converts it into spmf
Daniel@0	143 #def folder2spmf(folderin = 'D:/mirg/Chord_Analysis20141216/', fileout = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf'):
Daniel@0	144 #
Daniel@0	145 # # get chords for all files
Daniel@0	146 # output = c2f.folder2functions(folderin)
Daniel@0	147 #
Daniel@0	148 # # open log
Daniel@0	149 # logfile = fileout + '.dic'
Daniel@0	150 # csvfile = open(logfile, "w+b") #opens the file for updating
Daniel@0	151 # w = csv.writer(csvfile)
Daniel@0	152 # w.writerow(["track","key","mode","sequence length"])
Daniel@0	153 #
Daniel@0	154 # # open spmf file
Daniel@0	155 # fspmf = open(fileout,'w')
Daniel@0	156 # # ---
Daniel@0	157 # # this is writing the spmf format
Daniel@0	158 # for track,trackdata in output.iteritems():
Daniel@0	159 # # write chord sequence as one line in spmf file
Daniel@0	160 # for (time,key,mode,fun,typ,bfun) in trackdata:
Daniel@0	161 # chord = c2f.fun2num(fun,typ,bfun,mode)
Daniel@0	162 #
Daniel@0	163 # # -1 is the spearator of items or itemsets
Daniel@0	164 # fspmf.write(str(chord) + ' -1 ')
Daniel@0	165 #
Daniel@0	166 # # the sequence is closed with -2
Daniel@0	167 # fspmf.write('-2\n')
Daniel@0	168 # w.writerow([track, str(key), str(mode),str(len(trackdata))])
Daniel@0	169 #
Daniel@0	170 # fspmf.close()
Daniel@0	171 # csvfile.close()
Daniel@0	172
Daniel@0	173 # read an spmf file
Daniel@0	174 # def parsespmf(filein = 'D:/mirg/Chord_Analysis20141216/Beethoven.txt'):
Daniel@0	175
Daniel@0	176 # string sourcefile path to the source spmf file with chords from records
Daniel@0	177 # string patternfile path to the pattern spmf file
Daniel@0	178 # matches each of the patterns in patternfile
Daniel@0	179 # to the chord sequences in sourcefile
Daniel@0	180 def match(sourcefile = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf',sourcedict = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf.dic', patternfile = 'D:/mirg/Chord_Analysis20141216/Beethoven_70.txt'):
Daniel@0	181
Daniel@0	182 # define regular expressions for matching
Daniel@0	183 # closed sequence
Daniel@0	184
Daniel@0	185 # ---
Daniel@0	186 # we here assume that there are more files than patterns,
Daniel@0	187 # as display of patterns is somehow limited
Daniel@0	188 # therefore parallelisation will be 1 pattern/multiple files
Daniel@0	189 # per instance
Daniel@0	190 # ---
Daniel@0	191
Daniel@0	192 patterns = spmf2table(patternfile)
Daniel@0	193
Daniel@0	194 # ---
Daniel@0	195 # now for the input sequences
Daniel@0	196 # ---
Daniel@0	197 # first: read track dictionary and get the input sequence names
Daniel@0	198 tracks = getClipDict(sourcedict)
Daniel@0	199
Daniel@0	200 # read the input sequences
Daniel@0	201 source = open(sourcefile, 'r')
Daniel@0	202 patterns_tracks = dict()
Daniel@0	203 tracks_patterns = dict()
Daniel@0	204
Daniel@0	205 # iterate over all tracks - to be parallelised
Daniel@0	206 for track,count in tracks.iteritems():
Daniel@0	207 sequence = readSequence(next(source))
Daniel@0	208 print track
Daniel@0	209 for p in range(0,len(patterns)):
Daniel@0	210 # match open or closed pattern
Daniel@0	211 if openPatternInSequence(sequence,patterns[p]):
Daniel@0	212 if patterns_tracks.has_key(p):
Daniel@0	213 patterns_tracks[p].append(track)
Daniel@0	214 else:
Daniel@0	215 patterns_tracks[p] = [track]
Daniel@0	216
Daniel@0	217 if tracks_patterns.has_key(track):
Daniel@0	218 tracks_patterns[track].append(p)
Daniel@0	219 else:
Daniel@0	220 tracks_patterns[track] = [p]
Daniel@0	221
Daniel@0	222 # write clip index to files
Daniel@0	223 writeAllPatternsForClips('D:/mirg/Chord_Analysis20141216/',tracks_patterns)
Daniel@0	224 #print patterns_tracks[p]
Daniel@0	225
Daniel@0	226 # writes results to disk per key
Daniel@0	227 def writeAllPatternsForClips(path = 'D:/mirg/Chord_Analysis20141216/',tracks_patterns = dict()):
Daniel@0	228
Daniel@0	229 for name, contents in tracks_patterns.iteritems():
Daniel@0	230 # create new file
Daniel@0	231 csvfile = open(path + '/' + name + '_patterns.csv', "w+b") #opens the file for updating
Daniel@0	232 w = csv.writer(csvfile)
Daniel@0	233
Daniel@0	234 # compress pattern data ?
Daniel@0	235 # e.g. 2 columns from-to for the long series of atomic increments
Daniel@0	236
Daniel@0	237 w.writerow(contents)
Daniel@0	238 csvfile.close()
Daniel@0	239
Daniel@0	240 # reads output of spmf to table
Daniel@0	241 def spmf2table(patternfile):
Daniel@0	242
Daniel@0	243 patterns = []
Daniel@0	244 supports = []
Daniel@0	245 patterns_raw = []
Daniel@0	246 linecnt = 0
Daniel@0	247 # read all patterns
Daniel@0	248 with open(patternfile, 'r') as f:
Daniel@0	249 for line in f:
Daniel@0	250 # a line looks like this:
Daniel@0	251 # 1120401 -1 1120101 -1 #SUP: 916
Daniel@0	252
Daniel@0	253 # save pattern
Daniel@0	254 #patterns.append(pattern)
Daniel@0	255 #numeric? or just regex?
Daniel@0	256 # we'll use string, so any representation works
Daniel@0	257
Daniel@0	258 pattern,support = readPattern(line)
Daniel@0	259 patterns.append(pattern)
Daniel@0	260 supports.append(support)
Daniel@0	261
Daniel@0	262 # here's the regex
Daniel@0	263 # first the spacer
Daniel@0	264 #spacer = '((\s-1\s)\|((\s-1\s)*[0-9]+\s-1\s)+)'
Daniel@0	265 #repattern = r'(' + spacer + '' + spacer.join(pattern) + spacer + '' + '.*)'
Daniel@0	266 #print repattern
Daniel@0	267 #patterns.append(re.compile(repattern))
Daniel@0	268 linecnt +=1
Daniel@0	269
Daniel@0	270 if linecnt > max_lines:
Daniel@0	271 print_status('Not reading more than ' + str(max_lines) + ' lines :(')
Daniel@0	272 break
Daniel@0	273
Daniel@0	274 return patterns,supports
Daniel@0	275
Daniel@0	276 # @param line: reads a line in the spmf output file with frequent patterns
Daniel@0	277 # returns list of strings "pattern" and int "support"
Daniel@0	278 def readPattern(line):
Daniel@0	279 # locate support
Daniel@0	280 suploc = line.find('#SUP:')
Daniel@0	281
Daniel@0	282 # test whether we have a broken file
Daniel@0	283 if suploc > 6:
Daniel@0	284 support = int(line[suploc+5:-1])
Daniel@0	285 else:
Daniel@0	286 support = -1
Daniel@0	287
Daniel@0	288 # extract pattern
Daniel@0	289 pattern = line[:suploc].split(' -1 ')[:-1]
Daniel@0	290 return (pattern,support)
Daniel@0	291
Daniel@0	292 # @param line: reads a line in the spmf input file with chord sequence
Daniel@0	293 # returns list of strings "pattern" and int "support"
Daniel@0	294 def readSequence(line):
Daniel@0	295 # locate support
Daniel@0	296 suploc = line.find('-2')
Daniel@0	297
Daniel@0	298 # extract pattern
Daniel@0	299 sequence = line[:suploc].split(' -1 ')[:-1]
Daniel@0	300 return sequence
Daniel@0	301
Daniel@0	302 # finds open pattern in sequences
Daniel@0	303 # @param [string] sequence input sequence
Daniel@0	304 # @param [string] pattern pattern to be found
Daniel@0	305 def openPatternInSequence(sequence,pattern):
Daniel@0	306 patidx = 0
Daniel@0	307 for item in sequence:
Daniel@0	308 if item == pattern[patidx]:
Daniel@0	309 patidx +=1
Daniel@0	310
Daniel@0	311 # did we complet the pattern?
Daniel@0	312 if patidx >= (len(pattern)-1):
Daniel@0	313 # could also return the start index
Daniel@0	314 return 1
Daniel@0	315 # finished the sequence before finishing pattern
Daniel@0	316 return 0
Daniel@0	317
Daniel@0	318 # finds closed pattern in sequences
Daniel@0	319 # @param [string] sequence input sequence
Daniel@0	320 # @param [string] pattern pattern to be found
Daniel@0	321 def closedPatternInSequence(sequence,pattern):
Daniel@0	322 # alternatively use KnuthMorrisPratt with unsplit string
Daniel@0	323 return ''.join(map(str, pattern)) in ''.join(map(str, sequence))
Daniel@0	324
Daniel@0	325 # reads all track names from the dictionary created by folder2spmf
Daniel@0	326 # @param sourcedict path to dictionary
Daniel@0	327 def getClipDict(sourcedict):
Daniel@0	328
Daniel@0	329 f = open(sourcedict, 'rt')
Daniel@0	330 reader = csv.reader(f)
Daniel@0	331
Daniel@0	332 # skip first roow that contains legend
Daniel@0	333 next(reader)
Daniel@0	334
Daniel@0	335 # get following rows
Daniel@0	336 tracks = dict()
Daniel@0	337 for (track,key,mode,seqlen) in reader:
Daniel@0	338 tracks[track]= (key,mode,seqlen)
Daniel@0	339 #tracks.append((track,count))
Daniel@0	340
Daniel@0	341 f.close()
Daniel@0	342 return tracks
Daniel@0	343
Daniel@0	344
Daniel@0	345 # run spmf afterwards with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3
Daniel@0	346 if __name__ == "__main__":
Daniel@0	347 #folder2spmf()
Daniel@0	348 #match()
Daniel@0	349 print "huhu"

Mercurial > hg > dml-open-cliopatria

annotate dml-cla/python/chord_seq_spmf_helper.py @ 0:718306e29690 tip