Daniel@0: #!/usr/bin/python
Daniel@0: # Part of DML (Digital Music Laboratory)
Daniel@0: # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0:  
Daniel@0: # This program is free software; you can redistribute it and/or
Daniel@0: # modify it under the terms of the GNU General Public License
Daniel@0: # as published by the Free Software Foundation; either version 2
Daniel@0: # of the License, or (at your option) any later version.
Daniel@0: # 
Daniel@0: # This program is distributed in the hope that it will be useful,
Daniel@0: # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Daniel@0: # GNU General Public License for more details.
Daniel@0: # 
Daniel@0: # You should have received a copy of the GNU General Public
Daniel@0: # License along with this library; if not, write to the Free Software
Daniel@0: # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Daniel@0: 
Daniel@0: # -*- coding: utf-8 -*-
Daniel@0: #
Daniel@0: # This is a data conversion wrapper for the spmf toolkit
Daniel@0: __author__="Daniel Wolff"
Daniel@0: 
Daniel@0: import chord_seq_key_relative as c2f
Daniel@0: import csv
Daniel@0: import re
Daniel@0: import tempfile
Daniel@0: import subprocess
Daniel@0: import os
Daniel@0: import platform
Daniel@0: from aggregate import *
Daniel@0: from csvutils import *
Daniel@0: 
Daniel@0: # command for threading
Daniel@0: import subprocess, threading
Daniel@0: import signal
Daniel@0: 
Daniel@0: # limit for sequences read
Daniel@0: max_lines = 10000000
Daniel@0: 
Daniel@0: 
Daniel@0: class Command(object):
Daniel@0:     def __init__(self, cmd):
Daniel@0:         self.cmd = cmd
Daniel@0:         self.process = None
Daniel@0:         self.text = 'SPMF terminated unexpectedly'
Daniel@0: 
Daniel@0:     def run(self, timeout):
Daniel@0:         def target():
Daniel@0:             print_status('Thread started')
Daniel@0:             if 'Win' in platform.system():
Daniel@0:                 self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False)
Daniel@0:             else:
Daniel@0:                 self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False, preexec_fn=os.setsid)
Daniel@0:                 
Daniel@0:             self.text = self.process.stdout.read()
Daniel@0:             self.process.communicate()
Daniel@0: 
Daniel@0:             print_status('Thread finished')
Daniel@0: 
Daniel@0:         thread = threading.Thread(target=target)
Daniel@0:         thread.start()
Daniel@0:         
Daniel@0:         # wait until timeout if specified
Daniel@0:         if timeout > 0:
Daniel@0:             thread.join(timeout)
Daniel@0:             if thread.is_alive():
Daniel@0:                 print_status('Terminating process')
Daniel@0:                 if 'Win' in platform.system():
Daniel@0:                     self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds'
Daniel@0:                     self.process.kill()
Daniel@0:                 else:
Daniel@0:                     self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds'
Daniel@0:                     os.killpg(self.process.pid, signal.SIGTERM)
Daniel@0:                     thread.join()
Daniel@0:                     
Daniel@0:         else:
Daniel@0:             thread.join()
Daniel@0:             
Daniel@0:         # return retcode
Daniel@0:         return (self.process.returncode, self.text)
Daniel@0: 
Daniel@0: 
Daniel@0: # runs the spmf java with method and parameters as specified
Daniel@0: # 1st parameter: usually minimal support of sequence
Daniel@0: # 2nd parameter: minimal length of sequence
Daniel@0: # run spmf with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3
Daniel@0: def spmf(file,method = "CM-SPADE", params=["70%", "3"], timeout=10):
Daniel@0:     
Daniel@0:     # create outpput file name
Daniel@0:     outfile = tempfile.mktemp()
Daniel@0:     command = ["java"]
Daniel@0:     command.extend(["-Xmx1g","-jar","spmf.jar","run"])
Daniel@0:     command.extend([method, file, outfile])
Daniel@0:     command.extend(params)
Daniel@0:     
Daniel@0:     #print_status('CWD:' + os.getcwd())
Daniel@0:     #print_status('Calling SPMF: ' + ' '.join(command))
Daniel@0:     
Daniel@0:     proc = Command(command)
Daniel@0:     retcode, text = proc.run(timeout=timeout)
Daniel@0:     
Daniel@0:     if (retcode==0):
Daniel@0:         #print_status("Finished")
Daniel@0:         return outfile
Daniel@0:     else:
Daniel@0:         print_status( "Terminated with errors" + text)
Daniel@0:         return outfile
Daniel@0:     
Daniel@0: 
Daniel@0: # takes a dictionary of chords for one or multiple files 
Daniel@0: # in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ]
Daniel@0: # and converts it into spmf
Daniel@0: #
Daniel@0: # output: tempfile of spmf output
Daniel@0: def relchords2spmf(input):
Daniel@0: 
Daniel@0:     # choose random filename for spmf location
Daniel@0:     # open spmf file
Daniel@0:     fspmf = tempfile.NamedTemporaryFile(delete=False)
Daniel@0:     
Daniel@0:     # ---
Daniel@0:     # this is writing the spmf format
Daniel@0:     for track,trackdata in input.iteritems():
Daniel@0:         # write chord sequence as one line in spmf file
Daniel@0:         for (time,key,mode,fun,typ,bfun) in trackdata:
Daniel@0:             chord = c2f.fun2num(fun,typ,bfun,mode)
Daniel@0:             
Daniel@0:             # -1 is the spearator of items or itemsets
Daniel@0:             fspmf.write(str(chord) + ' -1 ') 
Daniel@0:             
Daniel@0:         # the sequence is closed with -2
Daniel@0:         fspmf.write('-2\n') 
Daniel@0:             
Daniel@0:     fspmf.close()
Daniel@0:     
Daniel@0:     return fspmf
Daniel@0: 
Daniel@0: 
Daniel@0: ## takes a dictionary of chords for one or multiple files 
Daniel@0: ## in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ]
Daniel@0: ## and converts it into spmf
Daniel@0: #def folder2spmf(folderin = 'D:/mirg/Chord_Analysis20141216/', fileout = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf'):
Daniel@0: #
Daniel@0: #    # get chords for all files                    
Daniel@0: #    output  = c2f.folder2functions(folderin)
Daniel@0: #
Daniel@0: #    # open log
Daniel@0: #    logfile = fileout + '.dic'
Daniel@0: #    csvfile = open(logfile, "w+b") #opens the file for updating
Daniel@0: #    w = csv.writer(csvfile)
Daniel@0: #    w.writerow(["track","key","mode","sequence length"])
Daniel@0: #    
Daniel@0: #    # open spmf file
Daniel@0: #    fspmf = open(fileout,'w')
Daniel@0: #    # ---
Daniel@0: #    # this is writing the spmf format
Daniel@0: #    for track,trackdata in output.iteritems():
Daniel@0: #        # write chord sequence as one line in spmf file
Daniel@0: #        for (time,key,mode,fun,typ,bfun) in trackdata:
Daniel@0: #            chord = c2f.fun2num(fun,typ,bfun,mode)
Daniel@0: #            
Daniel@0: #            # -1 is the spearator of items or itemsets
Daniel@0: #            fspmf.write(str(chord) + ' -1 ') 
Daniel@0: #            
Daniel@0: #        # the sequence is closed with -2
Daniel@0: #        fspmf.write('-2\n') 
Daniel@0: #        w.writerow([track, str(key), str(mode),str(len(trackdata))])
Daniel@0: #            
Daniel@0: #    fspmf.close()
Daniel@0: #    csvfile.close()
Daniel@0: 
Daniel@0: # read an spmf file
Daniel@0: # def parsespmf(filein = 'D:/mirg/Chord_Analysis20141216/Beethoven.txt'):
Daniel@0: 
Daniel@0: # string sourcefile path to the source spmf file with chords from records
Daniel@0: # string patternfile path to the pattern spmf file
Daniel@0: # matches each of the patterns in patternfile
Daniel@0: #  to the chord sequences in sourcefile
Daniel@0: def match(sourcefile = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf',sourcedict = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf.dic', patternfile = 'D:/mirg/Chord_Analysis20141216/Beethoven_70.txt'):
Daniel@0:     
Daniel@0:     # define regular expressions for matching
Daniel@0:     # closed sequence
Daniel@0:     
Daniel@0:     # ---
Daniel@0:     # we here assume that there are more files than patterns,
Daniel@0:     # as display of patterns is somehow limited
Daniel@0:     # therefore parallelisation will be 1 pattern/multiple files
Daniel@0:     # per instance
Daniel@0:     # ---
Daniel@0:     
Daniel@0:     patterns = spmf2table(patternfile)
Daniel@0:         
Daniel@0:     # ---
Daniel@0:     # now for the input sequences
Daniel@0:     # ---
Daniel@0:     # first: read track dictionary and get the input sequence names
Daniel@0:     tracks = getClipDict(sourcedict)
Daniel@0:        
Daniel@0:     # read the input sequences
Daniel@0:     source = open(sourcefile, 'r')
Daniel@0:     patterns_tracks = dict()
Daniel@0:     tracks_patterns = dict()
Daniel@0:     
Daniel@0:     # iterate over all tracks - to be parallelised
Daniel@0:     for track,count in tracks.iteritems():
Daniel@0:         sequence = readSequence(next(source))
Daniel@0:         print track
Daniel@0:         for p in range(0,len(patterns)):
Daniel@0:             # match open or closed pattern
Daniel@0:             if openPatternInSequence(sequence,patterns[p]):
Daniel@0:                 if patterns_tracks.has_key(p):
Daniel@0:                     patterns_tracks[p].append(track)
Daniel@0:                 else:
Daniel@0:                     patterns_tracks[p] = [track]
Daniel@0: 
Daniel@0:                 if tracks_patterns.has_key(track):
Daniel@0:                     tracks_patterns[track].append(p)
Daniel@0:                 else:
Daniel@0:                     tracks_patterns[track] = [p]  
Daniel@0:     
Daniel@0:     # write clip index to files
Daniel@0:     writeAllPatternsForClips('D:/mirg/Chord_Analysis20141216/',tracks_patterns)
Daniel@0:     #print patterns_tracks[p]
Daniel@0: 
Daniel@0: # writes results to disk per key
Daniel@0: def writeAllPatternsForClips(path = 'D:/mirg/Chord_Analysis20141216/',tracks_patterns = dict()):
Daniel@0: 
Daniel@0:     for name, contents in tracks_patterns.iteritems():
Daniel@0:         # create new file
Daniel@0:         csvfile = open(path + '/' + name + '_patterns.csv', "w+b") #opens the file for updating
Daniel@0:         w = csv.writer(csvfile)
Daniel@0:    
Daniel@0:         # compress pattern data ?
Daniel@0:         # e.g. 2 columns from-to for the long series of atomic increments
Daniel@0:         
Daniel@0:         w.writerow(contents)
Daniel@0:         csvfile.close()
Daniel@0: 
Daniel@0: # reads output of spmf to table
Daniel@0: def spmf2table(patternfile):
Daniel@0:     
Daniel@0:     patterns = []
Daniel@0:     supports = []
Daniel@0:     patterns_raw = []
Daniel@0:     linecnt = 0
Daniel@0:     # read all patterns
Daniel@0:     with open(patternfile, 'r') as f:
Daniel@0:         for line in f:
Daniel@0:             # a line looks like this:
Daniel@0:             # 1120401 -1 1120101 -1 #SUP: 916
Daniel@0: 
Daniel@0:             # save pattern
Daniel@0:             #patterns.append(pattern)
Daniel@0:             #numeric? or just regex?
Daniel@0:             # we'll use string, so any representation works
Daniel@0: 
Daniel@0:             pattern,support = readPattern(line)
Daniel@0:             patterns.append(pattern)
Daniel@0:             supports.append(support)
Daniel@0: 
Daniel@0:             # here's the regex
Daniel@0:             # first the spacer
Daniel@0:             #spacer = '((\s-1\s)|((\s-1\s)*[0-9]+\s-1\s)+)'
Daniel@0:             #repattern = r'(' + spacer + '*' + spacer.join(pattern) + spacer + '*' + '.*)'
Daniel@0:             #print repattern
Daniel@0:             #patterns.append(re.compile(repattern))
Daniel@0:             linecnt +=1
Daniel@0:             
Daniel@0:             if linecnt > max_lines:
Daniel@0:                 print_status('Not reading more than ' + str(max_lines) + ' lines :(')
Daniel@0:                 break
Daniel@0:             
Daniel@0:     return patterns,supports
Daniel@0: 
Daniel@0: # @param line: reads a line in the spmf output file with frequent patterns
Daniel@0: # returns list of strings "pattern" and int "support"
Daniel@0: def readPattern(line):
Daniel@0:     # locate support 
Daniel@0:     suploc = line.find('#SUP:')
Daniel@0:     
Daniel@0:     # test whether we have a broken file
Daniel@0:     if suploc > 6:
Daniel@0:         support = int(line[suploc+5:-1])
Daniel@0:     else:
Daniel@0:         support = -1
Daniel@0: 
Daniel@0:     # extract pattern
Daniel@0:     pattern = line[:suploc].split(' -1 ')[:-1]
Daniel@0:     return (pattern,support)
Daniel@0:    
Daniel@0: # @param line: reads a line in the spmf input file with chord sequence
Daniel@0: # returns list of strings "pattern" and int "support"
Daniel@0: def readSequence(line):
Daniel@0:     # locate support 
Daniel@0:     suploc = line.find('-2')
Daniel@0: 
Daniel@0:     # extract pattern
Daniel@0:     sequence = line[:suploc].split(' -1 ')[:-1] 
Daniel@0:     return sequence
Daniel@0: 
Daniel@0: # finds open pattern in sequences
Daniel@0: # @param [string] sequence input sequence
Daniel@0: # @param [string] pattern pattern to be found
Daniel@0: def openPatternInSequence(sequence,pattern):
Daniel@0:     patidx = 0
Daniel@0:     for item in sequence:
Daniel@0:         if item == pattern[patidx]:
Daniel@0:             patidx +=1
Daniel@0:             
Daniel@0:             # did we complet the pattern?
Daniel@0:             if patidx >= (len(pattern)-1):
Daniel@0:                 # could also return the start index
Daniel@0:                 return 1
Daniel@0:     # finished the sequence before finishing pattern
Daniel@0:     return 0
Daniel@0:             
Daniel@0: # finds closed pattern in sequences
Daniel@0: # @param [string] sequence input sequence
Daniel@0: # @param [string] pattern pattern to be found
Daniel@0: def closedPatternInSequence(sequence,pattern):
Daniel@0:     # alternatively use KnuthMorrisPratt with unsplit string
Daniel@0:     return ''.join(map(str, pattern)) in ''.join(map(str, sequence))  
Daniel@0:             
Daniel@0: # reads all track names from the dictionary created by folder2spmf
Daniel@0: # @param sourcedict path to dictionary
Daniel@0: def getClipDict(sourcedict):
Daniel@0:     
Daniel@0:     f = open(sourcedict, 'rt')
Daniel@0:     reader = csv.reader(f)
Daniel@0:     
Daniel@0:     # skip first roow that contains legend
Daniel@0:     next(reader)
Daniel@0:     
Daniel@0:     # get following rows
Daniel@0:     tracks = dict()
Daniel@0:     for (track,key,mode,seqlen) in reader:
Daniel@0:         tracks[track]= (key,mode,seqlen)
Daniel@0:         #tracks.append((track,count))
Daniel@0:     
Daniel@0:     f.close()
Daniel@0:     return tracks 
Daniel@0:     
Daniel@0:             
Daniel@0: # run spmf afterwards with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3
Daniel@0: if __name__ == "__main__":
Daniel@0:     #folder2spmf()
Daniel@0:     #match()
Daniel@0:     print "huhu"