Daniel@0: #!/usr/bin/python Daniel@0: # Part of DML (Digital Music Laboratory) Daniel@0: # Copyright 2014-2015 Daniel Wolff, City University Daniel@0: Daniel@0: # This program is free software; you can redistribute it and/or Daniel@0: # modify it under the terms of the GNU General Public License Daniel@0: # as published by the Free Software Foundation; either version 2 Daniel@0: # of the License, or (at your option) any later version. Daniel@0: # Daniel@0: # This program is distributed in the hope that it will be useful, Daniel@0: # but WITHOUT ANY WARRANTY; without even the implied warranty of Daniel@0: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Daniel@0: # GNU General Public License for more details. Daniel@0: # Daniel@0: # You should have received a copy of the GNU General Public Daniel@0: # License along with this library; if not, write to the Free Software Daniel@0: # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Daniel@0: Daniel@0: # -*- coding: utf-8 -*- Daniel@0: # Daniel@0: # This is a data conversion wrapper for the spmf toolkit Daniel@0: __author__="Daniel Wolff" Daniel@0: Daniel@0: import chord_seq_key_relative as c2f Daniel@0: import csv Daniel@0: import re Daniel@0: import tempfile Daniel@0: import subprocess Daniel@0: import os Daniel@0: import platform Daniel@0: from aggregate import * Daniel@0: from csvutils import * Daniel@0: Daniel@0: # command for threading Daniel@0: import subprocess, threading Daniel@0: import signal Daniel@0: Daniel@0: # limit for sequences read Daniel@0: max_lines = 10000000 Daniel@0: Daniel@0: Daniel@0: class Command(object): Daniel@0: def __init__(self, cmd): Daniel@0: self.cmd = cmd Daniel@0: self.process = None Daniel@0: self.text = 'SPMF terminated unexpectedly' Daniel@0: Daniel@0: def run(self, timeout): Daniel@0: def target(): Daniel@0: print_status('Thread started') Daniel@0: if 'Win' in platform.system(): Daniel@0: self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False) Daniel@0: else: Daniel@0: self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False, preexec_fn=os.setsid) Daniel@0: Daniel@0: self.text = self.process.stdout.read() Daniel@0: self.process.communicate() Daniel@0: Daniel@0: print_status('Thread finished') Daniel@0: Daniel@0: thread = threading.Thread(target=target) Daniel@0: thread.start() Daniel@0: Daniel@0: # wait until timeout if specified Daniel@0: if timeout > 0: Daniel@0: thread.join(timeout) Daniel@0: if thread.is_alive(): Daniel@0: print_status('Terminating process') Daniel@0: if 'Win' in platform.system(): Daniel@0: self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds' Daniel@0: self.process.kill() Daniel@0: else: Daniel@0: self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds' Daniel@0: os.killpg(self.process.pid, signal.SIGTERM) Daniel@0: thread.join() Daniel@0: Daniel@0: else: Daniel@0: thread.join() Daniel@0: Daniel@0: # return retcode Daniel@0: return (self.process.returncode, self.text) Daniel@0: Daniel@0: Daniel@0: # runs the spmf java with method and parameters as specified Daniel@0: # 1st parameter: usually minimal support of sequence Daniel@0: # 2nd parameter: minimal length of sequence Daniel@0: # run spmf with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3 Daniel@0: def spmf(file,method = "CM-SPADE", params=["70%", "3"], timeout=10): Daniel@0: Daniel@0: # create outpput file name Daniel@0: outfile = tempfile.mktemp() Daniel@0: command = ["java"] Daniel@0: command.extend(["-Xmx1g","-jar","spmf.jar","run"]) Daniel@0: command.extend([method, file, outfile]) Daniel@0: command.extend(params) Daniel@0: Daniel@0: #print_status('CWD:' + os.getcwd()) Daniel@0: #print_status('Calling SPMF: ' + ' '.join(command)) Daniel@0: Daniel@0: proc = Command(command) Daniel@0: retcode, text = proc.run(timeout=timeout) Daniel@0: Daniel@0: if (retcode==0): Daniel@0: #print_status("Finished") Daniel@0: return outfile Daniel@0: else: Daniel@0: print_status( "Terminated with errors" + text) Daniel@0: return outfile Daniel@0: Daniel@0: Daniel@0: # takes a dictionary of chords for one or multiple files Daniel@0: # in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ] Daniel@0: # and converts it into spmf Daniel@0: # Daniel@0: # output: tempfile of spmf output Daniel@0: def relchords2spmf(input): Daniel@0: Daniel@0: # choose random filename for spmf location Daniel@0: # open spmf file Daniel@0: fspmf = tempfile.NamedTemporaryFile(delete=False) Daniel@0: Daniel@0: # --- Daniel@0: # this is writing the spmf format Daniel@0: for track,trackdata in input.iteritems(): Daniel@0: # write chord sequence as one line in spmf file Daniel@0: for (time,key,mode,fun,typ,bfun) in trackdata: Daniel@0: chord = c2f.fun2num(fun,typ,bfun,mode) Daniel@0: Daniel@0: # -1 is the spearator of items or itemsets Daniel@0: fspmf.write(str(chord) + ' -1 ') Daniel@0: Daniel@0: # the sequence is closed with -2 Daniel@0: fspmf.write('-2\n') Daniel@0: Daniel@0: fspmf.close() Daniel@0: Daniel@0: return fspmf Daniel@0: Daniel@0: Daniel@0: ## takes a dictionary of chords for one or multiple files Daniel@0: ## in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ] Daniel@0: ## and converts it into spmf Daniel@0: #def folder2spmf(folderin = 'D:/mirg/Chord_Analysis20141216/', fileout = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf'): Daniel@0: # Daniel@0: # # get chords for all files Daniel@0: # output = c2f.folder2functions(folderin) Daniel@0: # Daniel@0: # # open log Daniel@0: # logfile = fileout + '.dic' Daniel@0: # csvfile = open(logfile, "w+b") #opens the file for updating Daniel@0: # w = csv.writer(csvfile) Daniel@0: # w.writerow(["track","key","mode","sequence length"]) Daniel@0: # Daniel@0: # # open spmf file Daniel@0: # fspmf = open(fileout,'w') Daniel@0: # # --- Daniel@0: # # this is writing the spmf format Daniel@0: # for track,trackdata in output.iteritems(): Daniel@0: # # write chord sequence as one line in spmf file Daniel@0: # for (time,key,mode,fun,typ,bfun) in trackdata: Daniel@0: # chord = c2f.fun2num(fun,typ,bfun,mode) Daniel@0: # Daniel@0: # # -1 is the spearator of items or itemsets Daniel@0: # fspmf.write(str(chord) + ' -1 ') Daniel@0: # Daniel@0: # # the sequence is closed with -2 Daniel@0: # fspmf.write('-2\n') Daniel@0: # w.writerow([track, str(key), str(mode),str(len(trackdata))]) Daniel@0: # Daniel@0: # fspmf.close() Daniel@0: # csvfile.close() Daniel@0: Daniel@0: # read an spmf file Daniel@0: # def parsespmf(filein = 'D:/mirg/Chord_Analysis20141216/Beethoven.txt'): Daniel@0: Daniel@0: # string sourcefile path to the source spmf file with chords from records Daniel@0: # string patternfile path to the pattern spmf file Daniel@0: # matches each of the patterns in patternfile Daniel@0: # to the chord sequences in sourcefile Daniel@0: def match(sourcefile = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf',sourcedict = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf.dic', patternfile = 'D:/mirg/Chord_Analysis20141216/Beethoven_70.txt'): Daniel@0: Daniel@0: # define regular expressions for matching Daniel@0: # closed sequence Daniel@0: Daniel@0: # --- Daniel@0: # we here assume that there are more files than patterns, Daniel@0: # as display of patterns is somehow limited Daniel@0: # therefore parallelisation will be 1 pattern/multiple files Daniel@0: # per instance Daniel@0: # --- Daniel@0: Daniel@0: patterns = spmf2table(patternfile) Daniel@0: Daniel@0: # --- Daniel@0: # now for the input sequences Daniel@0: # --- Daniel@0: # first: read track dictionary and get the input sequence names Daniel@0: tracks = getClipDict(sourcedict) Daniel@0: Daniel@0: # read the input sequences Daniel@0: source = open(sourcefile, 'r') Daniel@0: patterns_tracks = dict() Daniel@0: tracks_patterns = dict() Daniel@0: Daniel@0: # iterate over all tracks - to be parallelised Daniel@0: for track,count in tracks.iteritems(): Daniel@0: sequence = readSequence(next(source)) Daniel@0: print track Daniel@0: for p in range(0,len(patterns)): Daniel@0: # match open or closed pattern Daniel@0: if openPatternInSequence(sequence,patterns[p]): Daniel@0: if patterns_tracks.has_key(p): Daniel@0: patterns_tracks[p].append(track) Daniel@0: else: Daniel@0: patterns_tracks[p] = [track] Daniel@0: Daniel@0: if tracks_patterns.has_key(track): Daniel@0: tracks_patterns[track].append(p) Daniel@0: else: Daniel@0: tracks_patterns[track] = [p] Daniel@0: Daniel@0: # write clip index to files Daniel@0: writeAllPatternsForClips('D:/mirg/Chord_Analysis20141216/',tracks_patterns) Daniel@0: #print patterns_tracks[p] Daniel@0: Daniel@0: # writes results to disk per key Daniel@0: def writeAllPatternsForClips(path = 'D:/mirg/Chord_Analysis20141216/',tracks_patterns = dict()): Daniel@0: Daniel@0: for name, contents in tracks_patterns.iteritems(): Daniel@0: # create new file Daniel@0: csvfile = open(path + '/' + name + '_patterns.csv', "w+b") #opens the file for updating Daniel@0: w = csv.writer(csvfile) Daniel@0: Daniel@0: # compress pattern data ? Daniel@0: # e.g. 2 columns from-to for the long series of atomic increments Daniel@0: Daniel@0: w.writerow(contents) Daniel@0: csvfile.close() Daniel@0: Daniel@0: # reads output of spmf to table Daniel@0: def spmf2table(patternfile): Daniel@0: Daniel@0: patterns = [] Daniel@0: supports = [] Daniel@0: patterns_raw = [] Daniel@0: linecnt = 0 Daniel@0: # read all patterns Daniel@0: with open(patternfile, 'r') as f: Daniel@0: for line in f: Daniel@0: # a line looks like this: Daniel@0: # 1120401 -1 1120101 -1 #SUP: 916 Daniel@0: Daniel@0: # save pattern Daniel@0: #patterns.append(pattern) Daniel@0: #numeric? or just regex? Daniel@0: # we'll use string, so any representation works Daniel@0: Daniel@0: pattern,support = readPattern(line) Daniel@0: patterns.append(pattern) Daniel@0: supports.append(support) Daniel@0: Daniel@0: # here's the regex Daniel@0: # first the spacer Daniel@0: #spacer = '((\s-1\s)|((\s-1\s)*[0-9]+\s-1\s)+)' Daniel@0: #repattern = r'(' + spacer + '*' + spacer.join(pattern) + spacer + '*' + '.*)' Daniel@0: #print repattern Daniel@0: #patterns.append(re.compile(repattern)) Daniel@0: linecnt +=1 Daniel@0: Daniel@0: if linecnt > max_lines: Daniel@0: print_status('Not reading more than ' + str(max_lines) + ' lines :(') Daniel@0: break Daniel@0: Daniel@0: return patterns,supports Daniel@0: Daniel@0: # @param line: reads a line in the spmf output file with frequent patterns Daniel@0: # returns list of strings "pattern" and int "support" Daniel@0: def readPattern(line): Daniel@0: # locate support Daniel@0: suploc = line.find('#SUP:') Daniel@0: Daniel@0: # test whether we have a broken file Daniel@0: if suploc > 6: Daniel@0: support = int(line[suploc+5:-1]) Daniel@0: else: Daniel@0: support = -1 Daniel@0: Daniel@0: # extract pattern Daniel@0: pattern = line[:suploc].split(' -1 ')[:-1] Daniel@0: return (pattern,support) Daniel@0: Daniel@0: # @param line: reads a line in the spmf input file with chord sequence Daniel@0: # returns list of strings "pattern" and int "support" Daniel@0: def readSequence(line): Daniel@0: # locate support Daniel@0: suploc = line.find('-2') Daniel@0: Daniel@0: # extract pattern Daniel@0: sequence = line[:suploc].split(' -1 ')[:-1] Daniel@0: return sequence Daniel@0: Daniel@0: # finds open pattern in sequences Daniel@0: # @param [string] sequence input sequence Daniel@0: # @param [string] pattern pattern to be found Daniel@0: def openPatternInSequence(sequence,pattern): Daniel@0: patidx = 0 Daniel@0: for item in sequence: Daniel@0: if item == pattern[patidx]: Daniel@0: patidx +=1 Daniel@0: Daniel@0: # did we complet the pattern? Daniel@0: if patidx >= (len(pattern)-1): Daniel@0: # could also return the start index Daniel@0: return 1 Daniel@0: # finished the sequence before finishing pattern Daniel@0: return 0 Daniel@0: Daniel@0: # finds closed pattern in sequences Daniel@0: # @param [string] sequence input sequence Daniel@0: # @param [string] pattern pattern to be found Daniel@0: def closedPatternInSequence(sequence,pattern): Daniel@0: # alternatively use KnuthMorrisPratt with unsplit string Daniel@0: return ''.join(map(str, pattern)) in ''.join(map(str, sequence)) Daniel@0: Daniel@0: # reads all track names from the dictionary created by folder2spmf Daniel@0: # @param sourcedict path to dictionary Daniel@0: def getClipDict(sourcedict): Daniel@0: Daniel@0: f = open(sourcedict, 'rt') Daniel@0: reader = csv.reader(f) Daniel@0: Daniel@0: # skip first roow that contains legend Daniel@0: next(reader) Daniel@0: Daniel@0: # get following rows Daniel@0: tracks = dict() Daniel@0: for (track,key,mode,seqlen) in reader: Daniel@0: tracks[track]= (key,mode,seqlen) Daniel@0: #tracks.append((track,count)) Daniel@0: Daniel@0: f.close() Daniel@0: return tracks Daniel@0: Daniel@0: Daniel@0: # run spmf afterwards with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3 Daniel@0: if __name__ == "__main__": Daniel@0: #folder2spmf() Daniel@0: #match() Daniel@0: print "huhu"