Mercurial > hg > dml-open-cliopatria
diff dml-cla/python/chord_seq_spmf_helper.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dml-cla/python/chord_seq_spmf_helper.py Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,349 @@ +#!/usr/bin/python +# Part of DML (Digital Music Laboratory) +# Copyright 2014-2015 Daniel Wolff, City University + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# -*- coding: utf-8 -*- +# +# This is a data conversion wrapper for the spmf toolkit +__author__="Daniel Wolff" + +import chord_seq_key_relative as c2f +import csv +import re +import tempfile +import subprocess +import os +import platform +from aggregate import * +from csvutils import * + +# command for threading +import subprocess, threading +import signal + +# limit for sequences read +max_lines = 10000000 + + +class Command(object): + def __init__(self, cmd): + self.cmd = cmd + self.process = None + self.text = 'SPMF terminated unexpectedly' + + def run(self, timeout): + def target(): + print_status('Thread started') + if 'Win' in platform.system(): + self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False) + else: + self.process = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=False, preexec_fn=os.setsid) + + self.text = self.process.stdout.read() + self.process.communicate() + + print_status('Thread finished') + + thread = threading.Thread(target=target) + thread.start() + + # wait until timeout if specified + if timeout > 0: + thread.join(timeout) + if thread.is_alive(): + print_status('Terminating process') + if 'Win' in platform.system(): + self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds' + self.process.kill() + else: + self.text = 'Terminating SPMF after ' + str(timeout) + ' seconds' + os.killpg(self.process.pid, signal.SIGTERM) + thread.join() + + else: + thread.join() + + # return retcode + return (self.process.returncode, self.text) + + +# runs the spmf java with method and parameters as specified +# 1st parameter: usually minimal support of sequence +# 2nd parameter: minimal length of sequence +# run spmf with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3 +def spmf(file,method = "CM-SPADE", params=["70%", "3"], timeout=10): + + # create outpput file name + outfile = tempfile.mktemp() + command = ["java"] + command.extend(["-Xmx1g","-jar","spmf.jar","run"]) + command.extend([method, file, outfile]) + command.extend(params) + + #print_status('CWD:' + os.getcwd()) + #print_status('Calling SPMF: ' + ' '.join(command)) + + proc = Command(command) + retcode, text = proc.run(timeout=timeout) + + if (retcode==0): + #print_status("Finished") + return outfile + else: + print_status( "Terminated with errors" + text) + return outfile + + +# takes a dictionary of chords for one or multiple files +# in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ] +# and converts it into spmf +# +# output: tempfile of spmf output +def relchords2spmf(input): + + # choose random filename for spmf location + # open spmf file + fspmf = tempfile.NamedTemporaryFile(delete=False) + + # --- + # this is writing the spmf format + for track,trackdata in input.iteritems(): + # write chord sequence as one line in spmf file + for (time,key,mode,fun,typ,bfun) in trackdata: + chord = c2f.fun2num(fun,typ,bfun,mode) + + # -1 is the spearator of items or itemsets + fspmf.write(str(chord) + ' -1 ') + + # the sequence is closed with -2 + fspmf.write('-2\n') + + fspmf.close() + + return fspmf + + +## takes a dictionary of chords for one or multiple files +## in the form of dict[clipid] = [ (time,key,mode,fun,typ,bfun) ] +## and converts it into spmf +#def folder2spmf(folderin = 'D:/mirg/Chord_Analysis20141216/', fileout = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf'): +# +# # get chords for all files +# output = c2f.folder2functions(folderin) +# +# # open log +# logfile = fileout + '.dic' +# csvfile = open(logfile, "w+b") #opens the file for updating +# w = csv.writer(csvfile) +# w.writerow(["track","key","mode","sequence length"]) +# +# # open spmf file +# fspmf = open(fileout,'w') +# # --- +# # this is writing the spmf format +# for track,trackdata in output.iteritems(): +# # write chord sequence as one line in spmf file +# for (time,key,mode,fun,typ,bfun) in trackdata: +# chord = c2f.fun2num(fun,typ,bfun,mode) +# +# # -1 is the spearator of items or itemsets +# fspmf.write(str(chord) + ' -1 ') +# +# # the sequence is closed with -2 +# fspmf.write('-2\n') +# w.writerow([track, str(key), str(mode),str(len(trackdata))]) +# +# fspmf.close() +# csvfile.close() + +# read an spmf file +# def parsespmf(filein = 'D:/mirg/Chord_Analysis20141216/Beethoven.txt'): + +# string sourcefile path to the source spmf file with chords from records +# string patternfile path to the pattern spmf file +# matches each of the patterns in patternfile +# to the chord sequences in sourcefile +def match(sourcefile = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf',sourcedict = 'D:/mirg/Chord_Analysis20141216/Beethoven.spmf.dic', patternfile = 'D:/mirg/Chord_Analysis20141216/Beethoven_70.txt'): + + # define regular expressions for matching + # closed sequence + + # --- + # we here assume that there are more files than patterns, + # as display of patterns is somehow limited + # therefore parallelisation will be 1 pattern/multiple files + # per instance + # --- + + patterns = spmf2table(patternfile) + + # --- + # now for the input sequences + # --- + # first: read track dictionary and get the input sequence names + tracks = getClipDict(sourcedict) + + # read the input sequences + source = open(sourcefile, 'r') + patterns_tracks = dict() + tracks_patterns = dict() + + # iterate over all tracks - to be parallelised + for track,count in tracks.iteritems(): + sequence = readSequence(next(source)) + print track + for p in range(0,len(patterns)): + # match open or closed pattern + if openPatternInSequence(sequence,patterns[p]): + if patterns_tracks.has_key(p): + patterns_tracks[p].append(track) + else: + patterns_tracks[p] = [track] + + if tracks_patterns.has_key(track): + tracks_patterns[track].append(p) + else: + tracks_patterns[track] = [p] + + # write clip index to files + writeAllPatternsForClips('D:/mirg/Chord_Analysis20141216/',tracks_patterns) + #print patterns_tracks[p] + +# writes results to disk per key +def writeAllPatternsForClips(path = 'D:/mirg/Chord_Analysis20141216/',tracks_patterns = dict()): + + for name, contents in tracks_patterns.iteritems(): + # create new file + csvfile = open(path + '/' + name + '_patterns.csv', "w+b") #opens the file for updating + w = csv.writer(csvfile) + + # compress pattern data ? + # e.g. 2 columns from-to for the long series of atomic increments + + w.writerow(contents) + csvfile.close() + +# reads output of spmf to table +def spmf2table(patternfile): + + patterns = [] + supports = [] + patterns_raw = [] + linecnt = 0 + # read all patterns + with open(patternfile, 'r') as f: + for line in f: + # a line looks like this: + # 1120401 -1 1120101 -1 #SUP: 916 + + # save pattern + #patterns.append(pattern) + #numeric? or just regex? + # we'll use string, so any representation works + + pattern,support = readPattern(line) + patterns.append(pattern) + supports.append(support) + + # here's the regex + # first the spacer + #spacer = '((\s-1\s)|((\s-1\s)*[0-9]+\s-1\s)+)' + #repattern = r'(' + spacer + '*' + spacer.join(pattern) + spacer + '*' + '.*)' + #print repattern + #patterns.append(re.compile(repattern)) + linecnt +=1 + + if linecnt > max_lines: + print_status('Not reading more than ' + str(max_lines) + ' lines :(') + break + + return patterns,supports + +# @param line: reads a line in the spmf output file with frequent patterns +# returns list of strings "pattern" and int "support" +def readPattern(line): + # locate support + suploc = line.find('#SUP:') + + # test whether we have a broken file + if suploc > 6: + support = int(line[suploc+5:-1]) + else: + support = -1 + + # extract pattern + pattern = line[:suploc].split(' -1 ')[:-1] + return (pattern,support) + +# @param line: reads a line in the spmf input file with chord sequence +# returns list of strings "pattern" and int "support" +def readSequence(line): + # locate support + suploc = line.find('-2') + + # extract pattern + sequence = line[:suploc].split(' -1 ')[:-1] + return sequence + +# finds open pattern in sequences +# @param [string] sequence input sequence +# @param [string] pattern pattern to be found +def openPatternInSequence(sequence,pattern): + patidx = 0 + for item in sequence: + if item == pattern[patidx]: + patidx +=1 + + # did we complet the pattern? + if patidx >= (len(pattern)-1): + # could also return the start index + return 1 + # finished the sequence before finishing pattern + return 0 + +# finds closed pattern in sequences +# @param [string] sequence input sequence +# @param [string] pattern pattern to be found +def closedPatternInSequence(sequence,pattern): + # alternatively use KnuthMorrisPratt with unsplit string + return ''.join(map(str, pattern)) in ''.join(map(str, sequence)) + +# reads all track names from the dictionary created by folder2spmf +# @param sourcedict path to dictionary +def getClipDict(sourcedict): + + f = open(sourcedict, 'rt') + reader = csv.reader(f) + + # skip first roow that contains legend + next(reader) + + # get following rows + tracks = dict() + for (track,key,mode,seqlen) in reader: + tracks[track]= (key,mode,seqlen) + #tracks.append((track,count)) + + f.close() + return tracks + + +# run spmf afterwards with java -jar spmf.jar run CM-SPADE Beethoven.spmf output.txt 50% 3 +if __name__ == "__main__": + #folder2spmf() + #match() + print "huhu"