Mercurial > hg > segmentation
changeset 0:26838b1f560f
initial commit of a segmenter project
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,78 @@ +================== +segmenter +================== + +A project from music structural segmentation. + +------------------- +FEATURESETS +------------------- +Features are precomputed using using Sonic annotator. + +Extracted features and utilised vamp plugins: + +* MFCC, chromagram - QM Vamp plugins, QMUL +http://vamp-plugins.org/plugin-doc/qm-vamp-plugins.html + +* NNLS-Chroma chromagram — NNLS Chroma, Matthias Mauch and Chris Cannam +http://isophonics.net/nnls-chroma + +* Tempogram features — TempoFeatures, Mi Tian and George Fazekas +Tian et al., ICASSP 2015 + +* Gammtonegram features - FluctuationPatterns, Mi Tian + + +------------------- +BOUNDARY RETRIEVAL +------------------- +* Novelty-based methods +Tian et al., ISMIR 2014 +Foote 2000. + +* C-NMF +Nieto, O., Jehan, T., Convex Non-negative Matrix Factorization For Automatic +Music Structure Identification. ICASSP 2013. + +* 2D-FMC +Nieto, O., & Bello, J. P. Music Segment Similarity Using 2D-Fourier +Magnitude Coefficients. ICASSP 2014. + +* Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. Unsupervised +Detection of Music Boundaries by Time Series Structure Features. AAAI 2012. + +--------------------- +DATASETS +--------------------- +* SALAMI Internet archive +Smith J B L, Burgoyne J A, Fujinaga I, et al. Design and creation of a large-scale database of structural annotations, ISMIR 2011. + +* BEATLES +http://isophonics.net/content/reference-annotations-beatles + +* ISOPHONICS +http://isophonics.net/content/reference-annotations + +JINGJU (30 pieces) +Mi Tian. Hand annotated. + +--------------------- +EVALUATION +--------------------- + +--------------------- +NOTES & EXTERNAL LINKS +--------------------- + +* Mirex results +http://nema.lis.illinois.edu/nema_out/mirex2014/results/struct/sal/summary.html +http://nema.lis.illinois.edu/nema_out/mirex2013/results/struct/sal/summary.html + +* Music struture analysis frame work, Nieto +https://github.com/urinieto/msaf +https://github.com/urinieto/SegmenterMIREX2014 + +* Music segmentation by ordinal linear discriminant analysis, McFee +https://github.com/bmcfee/olda + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SegEval.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,350 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +SegEval.py + +The main segmentation program. + +Created by mi tian on 2015-04-02. +Copyright (c) 2015 __MyCompanyName__. All rights reserved. +""" + +# Load starndard python libs +import sys, os, optparse, csv +from itertools import combinations +from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext +from copy import copy + +import matplotlib +# matplotlib.use('Agg') +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import numpy as np +import scipy as sp +from scipy.signal import correlate2d, convolve2d, filtfilt, resample +from scipy.ndimage.filters import * +from sklearn.decomposition import PCA +from sklearn.mixture import GMM +from sklearn.cluster import KMeans +from sklearn.preprocessing import normalize +from sklearn.metrics.pairwise import pairwise_distances + +# Load dependencies +from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature +from utils.PeakPickerUtil import PeakPicker +from utils.gmmdist import * +from utils.GmmMetrics import GmmDistance +from utils.RankClustering import rClustering +from utils.kmeans import Kmeans +from utils.PathTracker import PathTracker + +# Load bourdary retrieval utilities +import cnmf as cnmf_S +import foote as foote_S +import sf as sf_S +import fmc2d as fmc2d_S + +# Define arg parser +def parse_args(): + op = optparse.OptionParser() + # IO options + op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." ) + op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." ) + op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." ) + op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." ) + op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") + op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") + + # boundary retrieval options + op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." ) + + # Plot/print/mode options + op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots") + op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") + op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.") + + return op.parse_args() +options, args = parse_args() + +class FeatureObj() : + __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] + +class AudioObj(): + __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ + 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] + +class EvalObj(): + __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] + + +class SSMseg(object): + '''The main segmentation object''' + def __init__(self): + self.SampleRate = 44100 + self.NqHz = self.SampleRate/2 + self.timestamp = [] + self.previousSample = 0.0 + self.featureWindow = 6.0 + self.featureStep = 3.0 + self.kernel_size = 64 # Adjust this param according to the feature resolution.pq + self.blockSize = 2048 + self.stepSize = 1024 + + '''NOTE: Match the following params with those used for feature extraction!''' + + '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking + the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every + gammatoneLen without overlap.''' + self.gammatoneLen = 2048 + self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20] + self.nGammatoneBands = 20 + self.lowFreq = 100 + self.highFreq = self.SampleRate / 4 + + '''Settings for extracting tempogram features.''' + self.tempoWindow = 6.0 + self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] + + '''Peak picking settings''' + self.threshold = 50 + self.confidence_threshold = 0.5 + self.delta_threshold = 0.0 + self.backtracking_threshold = 1.9 + self.polyfitting_on = True + self.medfilter_on = True + self.LPfilter_on = True + self.whitening_on = False + self.aCoeffs = [1.0000, -0.5949, 0.2348] + self.bCoeffs = [0.1600, 0.3200, 0.1600] + self.cutoff = 0.34 + self.medianWin = 7 + + + def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0): + '''Pairwise F measure evaluation of detection rates.''' + + # print 'detection', detection + detection = np.append(detection, annotation[-1]) + res = EvalObj() + res.TP = 0 # Total number of matched ground truth and experimental data points + gt = len(annotation) # Total number of ground truth data points + dt = len(detection) # Total number of experimental data points + foundIdx = [] + D_AD = np.zeros(gt) + D_DA = np.zeros(dt) + + for dtIdx in xrange(dt): + D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) + for gtIdx in xrange(gt): + D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) + for dtIdx in xrange(dt): + if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): + res.TP = res.TP + 1.0 + foundIdx.append(gtIdx) + foundIdx = list(set(foundIdx)) + res.TP = len(foundIdx) + res.FP = max(0, dt - res.TP) + res.FN = max(0, gt - res.TP) + + res.AD = np.mean(D_AD) + res.DA = np.mean(D_DA) + + res.P, res.R, res.F = 0.0, 0.0, 0.0 + + if res.TP == 0: + return res + + res.P = res.TP / float(dt) + res.R = res.TP / float(gt) + res.F = 2 * res.P * res.R / (res.P + res.R) + return res + + + def process(self): + '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. + In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection + threshold - segmentation f measure curve. + ''' + + peak_picker = PeakPicker() + peak_picker.params.alpha = 9.0 # Alpha norm + peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta + peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 + peak_picker.params.QuadThresh_b = 0.0 + peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 + peak_picker.params.rawSensitivity = 20 + peak_picker.params.aCoeffs = self.aCoeffs + peak_picker.params.bCoeffs = self.bCoeffs + peak_picker.params.preWin = self.medianWin + peak_picker.params.postWin = self.medianWin + 1 + peak_picker.params.LP_on = self.LPfilter_on + peak_picker.params.Medfilt_on = self.medfilter_on + peak_picker.params.Polyfit_on = self.polyfitting_on + peak_picker.params.isMedianPositive = False + + # Settings used for feature extraction + feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) + feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) + aggregation_window, aggregation_step = 100, 50 + featureRate = float(self.SampleRate) / self.stepSize + + audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] + # audio_files = audio_files[:2] + audio_files.sort() + audio_list = [] + + gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] + gammatone_feature_list = ['contrast4', 'rolloff', 'dct'] + tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] + tempo_feature_list = ['intensity_bpm', 'loudness_bpm'] + timbre_feature_list = ['mfcc'] + harmonic_feature_list = ['nnls'] + + gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] + timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] + tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] + harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] + + fobj_list = [] + + # For each audio file, load specific features + for audio in audio_files: + ao = AudioObj() + ao.name = splitext(audio)[0] + print ao.name + # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami + # ao.gt = np.genfromtxt(annotation_file, usecols=0) + # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) + annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng + ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') + ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) + + gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] + for feature in gammatone_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) + break + if len(gammatone_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in gammatone_featureset]) + gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] + ao.gammatone_features = np.hstack((gammatone_featureset)) + else: + ao.gammatone_features = gammatone_featureset[0] + + for feature in timbre_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) + break + if len(timbre_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in timbre_featureset]) + timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] + ao.timbre_features = np.hstack((timbre_featureset)) + else: + ao.timbre_features = timbre_featureset[0] + for feature in tempo_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) + ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] + break + if len(tempo_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in tempo_featureset]) + tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] + ao.tempo_features = np.hstack((tempo_featureset)) + else: + ao.tempo_features = tempo_featureset[0] + for feature in harmonic_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) + break + if len(harmonic_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in harmonic_featureset]) + harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] + ao.harmonic_features = np.hstack((harmonic_featureset)) + else: + ao.harmonic_features = harmonic_featureset[0] + + # Get aggregated features for computing ssm + aggregation_window, aggregation_step = 1,1 + featureRate = float(self.SampleRate) /self.stepSize + pca = PCA(n_components=5) + + # Resample and normalise features + ao.gammatone_features = resample(ao.gammatone_features, step) + ao.gammatone_features = normaliseFeature(ao.gammatone_features) + ao.timbre_features = resample(ao.timbre_features, step) + ao.timbre_features = normaliseFeature(ao.timbre_features) + ao.harmonic_features = resample(ao.harmonic_features, step) + ao.harmonic_features = normaliseFeature(ao.harmonic_features) + ao.tempo_features = normaliseFeature(ao.harmonic_features) + + pca.fit(ao.gammatone_features) + ao.gammatone_features = pca.transform(ao.gammatone_features) + ao.gammatone_ssm = getSSM(ao.gammatone_features) + + pca.fit(ao.tempo_features) + ao.tempo_features = pca.transform(ao.tempo_features) + ao.tempo_ssm = getSSM(ao.tempo_features) + + pca.fit(ao.timbre_features) + ao.timbre_features = pca.transform(ao.timbre_features) + ao.timbre_ssm = getSSM(ao.timbre_features) + + pca.fit(ao.harmonic_features) + ao.harmonic_features = pca.transform(ao.harmonic_features) + ao.harmonic_ssm = getSSM(ao.harmonic_features) + + ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) + + audio_list.append(ao) + + # Segment input audio using specified boundary retrieval method. + print 'Segmenting using %s method' %options.BOUNDARY + for i,ao in enumerate(audio_list): + print 'processing: %s' %ao.name + + + + + ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] + feature_sel = [int(x) for x in options.FEATURES if x.isdigit()] + ao_featureset = [ao_featureset[i] for i in feature_sel] + + gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker) + timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker) + tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker) + harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker) + + # Peak picking from the novelty curve + smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty) + gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks] + smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty) + timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks] + smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty) + harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks] + smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty) + tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks] + + if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0): + print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks) + + smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty) + smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty) + smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty) + smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty) + combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty)) + + + +def main(): + + segmenter = SSMseg() + segmenter.process() + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__init__.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# encoding: utf-8 + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cnmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,141 @@ +""" +C-NMF method for segmentation, modified from here: + +Nieto, O., Jehan, T., Convex Non-negative Matrix Factorization For Automatic +Music Structure Identification. Proc. of the 38th IEEE International Conference +on Acoustics, Speech, and Signal Processing (ICASSP). Vancouver, Canada, 2013. +""" + +__author__ = "Oriol Nieto" +__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" +__license__ = "GPL" +__version__ = "1.0" +__email__ = "oriol@nyu.edu" + +import numpy as np +import pymf + +# Local stuff +from utils import SegUtil + + +def cnmf(S, rank, niter=500): + """(Convex) Non-Negative Matrix Factorization. + + Parameters + ---------- + S: np.array(p, N) + Features matrix. p row features and N column observations. + rank: int + Rank of decomposition + niter: int + Number of iterations to be used + + Returns + ------- + F: np.array + Cluster matrix (decomposed matrix) + G: np.array + Activation matrix (decomposed matrix) + (s.t. S ~= F * G) + """ + nmf_mdl = pymf.CNMF(S, num_bases=rank) + nmf_mdl.factorize(niter=niter) + F = np.asarray(nmf_mdl.W) + G = np.asarray(nmf_mdl.H) + return F, G + + +def most_frequent(x): + """Returns the most frequent value in x.""" + return np.argmax(np.bincount(x)) + + +def compute_labels(X, rank, R, bound_idxs, niter=300): + """Computes the labels using the bounds.""" + + X = X.T + try: + F, G = cnmf(X, rank, niter=niter) + except: + return [1] + + label_frames = filter_activation_matrix(G.T, R) + label_frames = np.asarray(label_frames, dtype=int) + + # Get labels from the label frames + labels = [] + bound_inters = zip(bound_idxs[:-1], bound_idxs[1:]) + for bound_inter in bound_inters: + if bound_inter[1] - bound_inter[0] <= 0: + labels.append(np.max(label_frames) + 1) + else: + labels.append(most_frequent( + label_frames[bound_inter[0]:bound_inter[1]])) + + return labels + + +def filter_activation_matrix(G, R): + """Filters the activation matrix G, and returns a flattened copy.""" + idx = np.argmax(G, axis=1) + max_idx = np.arange(G.shape[0]) + max_idx = (max_idx, idx.flatten()) + G[:, :] = 0 + G[max_idx] = idx + 1 + G = np.sum(G, axis=1) + G = utils.median_filter(G[:, np.newaxis], R) + return G.flatten() + + +def segmentation(X, rank, R, h, niter=300): + """ + Gets the segmentation (boundaries and labels) from the factorization + matrices. + + Parameters + ---------- + X: np.array() + Features matrix (e.g. chromagram) + rank: int + Rank of decomposition + R: int + Size of the median filter for activation matrix + niter: int + Number of iterations for k-means + bound_idxs : list + Use previously found boundaries (None to detect them) + + Returns + ------- + bounds_idx: np.array + Bound indeces found + labels: np.array + Indeces of the labels representing the similarity between segments. + """ + + # Filter + X = utils.median_filter(X, M=h) + X = X.T + + # Find non filtered boundaries + bound_idxs = None + while True: + if bound_idxs is None: + try: + F, G = cnmf(X, rank, niter=niter) + except: + return np.empty(0), [1] + + # Filter G + G = filter_activation_matrix(G.T, R) + if bound_idxs is None: + bound_idxs = np.where(np.diff(G) != 0)[0] + 1 + + if len(np.unique(bound_idxs)) <= 2: + rank += 1 + bound_idxs = None + else: + break + + return bound_idxs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fmc2d.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,115 @@ +""" +2D-FMC for segmentation, published here: + +Nieto, O., & Bello, J. P. (2014). Music Segment Similarity Using 2D-Fourier +Magnitude Coefficients. In Proc. of the 39th IEEE International Conference on +Acoustics Speech and Signal Processing (pp. 664-668). Florence, Italy. +""" + +__author__ = "Oriol Nieto" +__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" +__license__ = "GPL" +__version__ = "1.0" +__email__ = "oriol@nyu.edu" + +import numpy as np +import scipy.cluster.vq as vq +import logging + +# Local stuff +from utils import utils_2dfmc as utils2d +from utils.xmeans import XMeans + + +MIN_LEN = 4 # Minimum lenght for the segments + + +def get_pcp_segments(PCP, bound_idxs): + """Returns a set of segments defined by the bound_idxs.""" + pcp_segments = [] + for i in xrange(len(bound_idxs)-1): + pcp_segments.append(PCP[bound_idxs[i]:bound_idxs[i+1], :]) + return pcp_segments + + +def pcp_segments_to_2dfmc_fixed(pcp_segments, N=75): + """From a list of PCP segments, return a list of 2D-Fourier Magnitude + Coefs using a fixed segment size (N) and aggregating.""" + + fmcs = [] + for pcp_segment in pcp_segments: + X = [] + + # Append so that we never lose a segment + skip = False + while pcp_segment.shape[0] < MIN_LEN: + try: + pcp_segment = np.vstack((pcp_segment, + pcp_segment[-1][np.newaxis, :])) + except: + logging.warning("Error: Can't stack PCP arrays, " + "skipping segment") + skip = True + break + + if skip: + continue + + curr_len = pcp_segment.shape[0] + + if curr_len > N: + # Sub segment if greater than minimum + for i in xrange(curr_len - N + 1): + sub_segment = pcp_segment[i:i + N] + X.append(utils2d.compute_ffmc2d(sub_segment)) + + # Aggregate + X = np.max(np.asarray(X), axis=0) + + fmcs.append(X) + + elif curr_len <= N: + # Zero-pad + X = np.zeros((N, pcp_segment.shape[1])) + X[:curr_len, :] = pcp_segment + + # 2D-FMC + fmcs.append(utils2d.compute_ffmc2d(X)) + + return np.asarray(fmcs) + + +def compute_labels_kmeans(fmcs, k=6): + # Removing the higher frequencies seem to yield better results + fmcs = fmcs[:, fmcs.shape[1]/2:] + + fmcs = np.log1p(fmcs) + wfmcs = vq.whiten(fmcs) + + dic, dist = vq.kmeans(wfmcs, k, iter=100) + labels, dist = vq.vq(wfmcs, dic) + + return labels + + +def compute_similarity(PCP, bound_idxs, xmeans=False, k=5, N=32): + """Main function to compute the segment similarity of file file_struct.""" + + # Get PCP segments + pcp_segments = get_pcp_segments(PCP, bound_idxs) + + # Get the 2d-FMCs segments + fmcs = pcp_segments_to_2dfmc_fixed(pcp_segments, N=N) + if fmcs == [] or fmcs is None: + return np.arange(len(bound_idxs) - 1) + + # Compute the labels using kmeans + if xmeans: + xm = XMeans(fmcs, plot=False) + k = xm.estimate_K_knee(th=0.01, maxK=8) + est_labels = compute_labels_kmeans(fmcs, k=k) + + # Plot results + #plot_pcp_wgt(PCP, bound_idxs) + + return est_labels
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/foote.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,51 @@ +""" +Foote method for segmentation, published here: + +Foote, J. (2000). Automatic Audio Segmentation Using a Measure Of Audio +Novelty. In Proc. of the IEEE International Conference of Multimedia and Expo +(pp. 452-455). New York City, NY, USA. +""" + +__author__ = "Oriol Nieto" +__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" +__license__ = "GPL" +__version__ = "1.0" +__email__ = "oriol@nyu.edu" + +# Local stuff +from utils import SegUtil + + +def segmentation(F, M, Mg, L, plot=False): + """Computes the Foote segmentator. + + Parameters + ---------- + F : np.array((N,M)) + Features matrix of N beats x M features. + M : int + Median filter size for the audio features (in beats). + Mg : int + Gaussian kernel size (in beats). + L : int + Median filter size for the adaptive threshold + + Return + ------ + bound_idx : np.array + Array containing the indices of the boundaries. + """ + # Filter + F = utils.median_filter(F, M=M) + + # Self Similarity Matrix + S = utils.compute_ssm(F) + + # Compute gaussian kernel + G = utils.compute_gaussian_krnl(Mg) + + # Compute the novelty curve + nc = utils.compute_nc(S, G) + + # Find peaks in the novelty curve + return utils.pick_peaks(nc, L=L, plot=plot)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/notebook/start-mi.sh Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,1 @@ +ipython notebook --pylab inline --notebook-dir=. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/novelty.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +novelty.py + +Created by mi tian on 2015-04-02. +Copyright (c) 2015 __MyCompanyName__. All rights reserved. +""" + +import sys, os +import numpy as np +from scipy.signal import correlate2d, convolve2d + +# from utils.PeakPickerUtil import PeakPicker + +def getNoveltyCurve(ssm, kernel_size, normalise=False): + '''Return novelty score from ssm.''' + + kernel_size = int(np.floor(kernel_size/2.0) + 1) + stripe = getDiagonalSlice(ssm, kernel_size) + kernel = gaussian_kernel(kernel_size) + xc = convolve2d(stripe,kernel,mode='same') + xc[abs(xc)>1e+10]=0.00001 + + novelty = xc[int(np.floor(xc.shape[0]/2.0)),:] + novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in novelty] + + if normalise: + novelty = (novelty - np.min(novelty)) / (np.max(novelty) - np.min(novelty)) + return novelty + +def getDiagonalSlice(ssm, width): + ''' Return a diagonal stripe of the ssm given its width, with 45 degrees rotation. + Note: requres 45 degrees rotated kernel also.''' + w = int(np.floor(width/2.0)) + length = len(np.diagonal(ssm)) + stripe = np.zeros((2*w+1,length)) + # print 'diagonal', length, w, stripe.shape + for i in xrange(-w, w+1) : + stripe[w+i,:] = np.hstack(( np.zeros(int(np.floor(abs(i)/2.0))), np.diagonal(ssm,i), np.zeros(int(np.ceil(abs(i)/2.0))) )) + return stripe + +def gaussian_kernel(size): + '''Create a gaussian tapered 45 degrees rotated checkerboard kernel. + TODO: Unit testing: Should produce this with kernel size 3: + 0.1353 -0.3679 0.1353 + 0.3679 1.0000 0.3679 + 0.1353 -0.3679 0.1353 + ''' + n = float(np.ceil(size / 2.0)) + kernel = np.zeros((size,size)) + for i in xrange(1,size+1) : + for j in xrange(1,size+1) : + gauss = np.exp( -4.0 * (np.square( (i-n)/n ) + np.square( (j-n)/n )) ) + # gauss = 1 + if np.logical_xor( j - n > np.floor((i-n) / 2.0), j - n > np.floor((n-i) / 2.0) ) : + kernel[i-1,j-1] = -gauss + else: + kernel[i-1,j-1] = gauss + + return kernel + +def getNoveltyPeaks(ssm, kernel_size, peak_picker, normalise=False): + '''Detect segment boundaries in the ssm.''' + novelty = getNoveltyCurve(ssm, kernel_size, normalise=False) + smoothed_novelty, novelty_peaks = peak_picker.process(novelty) + + return novelty, smoothed_novelty, novelty_peaks \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/__init__.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,43 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt + +'''pymf is a package for several Matrix Factorization variants.- +Detailed documentation is available at http://pymf.googlecode.com +Copyright (C) Christian Thurau, 2010. GNU General Public License (GPL) +''' + + +import numpy as np +from scipy.sparse import issparse + +from .nmf import * +from .nmfals import * +from .nmfnnls import * +from .cnmf import * +from .chnmf import * +from .snmf import * +from .aa import * + +from .laesa import * +from .bnmf import * + +from .sub import * + +from .svd import * +from .pca import * +from .cur import * +from .sivm_cur import * +from .cmd import * + +from .kmeans import * +from .cmeans import * + +from .sivm import * +from .sivm_sgreedy import * +from .sivm_search import * +from .sivm_gsat import * + +from .gmap import *
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/aa.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,138 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Archetypal Analysis [1] + + AA: class for Archetypal Analysis + +[1] Cutler, A. Breiman, L. (1994), "Archetypal Analysis", Technometrics 36(4), +338-347. +""" + + +import numpy as np +from dist import vq +from cvxopt import solvers, base + +from svd import pinv +from nmf import NMF + +__all__ = ["AA"] + +class AA(NMF): + """ + AA(data, num_bases=4) + + Archetypal Analysis. Factorize a data matrix into two matrices s.t. + F = | data - W*H | = | data - data*beta*H| is minimal. H and beta + are restricted to convexity (beta >=0, sum(beta, axis=1) = [1 .. 1]). + Factorization is solved via an alternating least squares optimization + using the quadratic programming solver from cvxopt. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + beta : "num_bases x num_samples" matrix of basis vector coefficients + (for constructing W s.t. W = beta * data.T ) + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying AA to some rather stupid data set: + + >>> import numpy as np + >>> from aa import AA + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + + Use 2 basis vectors -> W shape(data_dimension, 2). + + >>> aa_mdl = AA(data, num_bases=2) + + Set number of iterations to 5 and start computing the factorization. + + >>> aa_mdl.factorize(niter=5) + + The basis vectors are now stored in aa_mdl.W, the coefficients in aa_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to aa_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> aa_mdl = AA(data, num_bases=2) + >>> aa_mdl.W = W + >>> aa_mdl.factorize(niter=5, compute_w=False) + + The result is a set of coefficients aa_mdl.H, s.t. data = W * aa_mdl.H. + """ + # set cvxopt options + solvers.options['show_progress'] = False + + def init_h(self): + self.H = np.random.random((self._num_bases, self._num_samples)) + self.H /= self.H.sum(axis=0) + + def init_w(self): + self.beta = np.random.random((self._num_bases, self._num_samples)) + self.beta /= self.beta.sum(axis=0) + self.W = np.dot(self.beta, self.data.T).T + self.W = np.random.random((self._data_dimension, self._num_bases)) + + def update_h(self): + """ alternating least squares step, update H under the convexity + constraint """ + def update_single_h(i): + """ compute single H[:,i] """ + # optimize alpha using qp solver from cvxopt + FA = base.matrix(np.float64(np.dot(-self.W.T, self.data[:,i]))) + al = solvers.qp(HA, FA, INQa, INQb, EQa, EQb) + self.H[:,i] = np.array(al['x']).reshape((1, self._num_bases)) + + EQb = base.matrix(1.0, (1,1)) + # float64 required for cvxopt + HA = base.matrix(np.float64(np.dot(self.W.T, self.W))) + INQa = base.matrix(-np.eye(self._num_bases)) + INQb = base.matrix(0.0, (self._num_bases,1)) + EQa = base.matrix(1.0, (1, self._num_bases)) + + for i in xrange(self._num_samples): + update_single_h(i) + + def update_w(self): + """ alternating least squares step, update W under the convexity + constraint """ + def update_single_w(i): + """ compute single W[:,i] """ + # optimize beta using qp solver from cvxopt + FB = base.matrix(np.float64(np.dot(-self.data.T, W_hat[:,i]))) + be = solvers.qp(HB, FB, INQa, INQb, EQa, EQb) + self.beta[i,:] = np.array(be['x']).reshape((1, self._num_samples)) + + # float64 required for cvxopt + HB = base.matrix(np.float64(np.dot(self.data[:,:].T, self.data[:,:]))) + EQb = base.matrix(1.0, (1, 1)) + W_hat = np.dot(self.data, pinv(self.H)) + INQa = base.matrix(-np.eye(self._num_samples)) + INQb = base.matrix(0.0, (self._num_samples, 1)) + EQa = base.matrix(1.0, (1, self._num_samples)) + + for i in xrange(self._num_bases): + update_single_w(i) + + self.W = np.dot(self.beta, self.data.T).T + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/bnmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,127 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Binary Matrix Factorization [1] + + BNMF(NMF) : Class for binary matrix factorization + +[1]Z. Zhang, T. Li, C. H. Q. Ding, X. Zhang: Binary Matrix Factorization with +Applications. ICDM 2007 +""" + + +import numpy as np +from nmf import NMF + +__all__ = ["BNMF"] + +class BNMF(NMF): + """ + BNMF(data, data, num_bases=4) + Binary Matrix Factorization. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H and W are restricted to binary values. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying BNMF to some rather stupid data set: + + >>> import numpy as np + >>> from bnmf import BNMF + >>> data = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0]]) + + Use 2 basis vectors -> W shape(data_dimension, 2). + + >>> bnmf_mdl = BNMF(data, num_bases=2) + + Set number of iterations to 5 and start computing the factorization. + + >>> bnmf_mdl.factorize(niter=5) + + The basis vectors are now stored in bnmf_mdl.W, the coefficients in bnmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to bnmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[0.0], [1.0]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> bnmf_mdl = BNMF(data, num_bases=2) + >>> bnmf_mdl.W = W + >>> bnmf_mdl.factorize(niter=10, compute_w=False) + + The result is a set of coefficients bnmf_mdl.H, s.t. data = W * bnmf_mdl.H. + """ + + # controls how fast lambda should increase: + # this influence convergence to binary values during the update. A value + # <1 will result in non-binary decompositions as the update rule effectively + # is a conventional nmf update rule. Values >1 give more weight to making the + # factorization binary with increasing iterations. + # setting either W or H to 0 results make the resulting matrix non binary. + _LAMB_INCREASE_W = 1.1 + _LAMB_INCREASE_H = 1.1 + + def update_h(self): + H1 = np.dot(self.W.T, self.data[:,:]) + 3.0*self._lamb_H*(self.H**2) + H2 = np.dot(np.dot(self.W.T,self.W), self.H) + 2*self._lamb_H*(self.H**3) + self._lamb_H*self.H + 10**-9 + self.H *= H1/H2 + + self._lamb_W = self._LAMB_INCREASE_W * self._lamb_W + self._lamb_H = self._LAMB_INCREASE_H * self._lamb_H + + def update_w(self): + W1 = np.dot(self.data[:,:], self.H.T) + 3.0*self._lamb_W*(self.W**2) + W2 = np.dot(self.W, np.dot(self.H, self.H.T)) + 2.0*self._lamb_W*(self.W**3) + self._lamb_W*self.W + 10**-9 + self.W *= W1/W2 + + def factorize(self, niter=10, compute_w=True, compute_h=True, + show_progress=False, compute_err=True): + """ Factorize s.t. WH = data + + Parameters + ---------- + niter : int + number of iterations. + show_progress : bool + print some extra information to stdout. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH| for each iteration. + """ + + # init some learning parameters + self._lamb_W = 1.0/niter + self._lamb_H = 1.0/niter + + NMF.factorize(self, niter=niter, compute_w=compute_w, + compute_h=compute_h, show_progress=show_progress, + compute_err=compute_err) + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/chnmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,223 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Convex Hull Non-negative Matrix Factorization [1] + + CHNMF(NMF) : Class for Convex-hull NMF + quickhull : Function for finding the convex hull in 2D + +[1] C. Thurau, K. Kersting, and C. Bauckhage. Convex Non-Negative Matrix +Factorization in the Wild. ICDM 2009. +""" + + +import numpy as np + +from itertools import combinations +from dist import vq +from pca import PCA +from aa import AA + +__all__ = ["CHNMF"] + + +def quickhull(sample): + """ Find data points on the convex hull of a supplied data set + + Args: + sample: data points as column vectors n x d + n - number samples + d - data dimension (should be two) + + Returns: + a k x d matrix containint the convex hull data points + """ + + link = lambda a, b: np.concatenate((a, b[1:])) + edge = lambda a, b: np.concatenate(([a], [b])) + + def dome(sample, base): + h, t = base + dists = np.dot(sample - h, np.dot(((0, -1), (1, 0)), (t - h))) + outer = np.repeat(sample, dists > 0, axis=0) + + if len(outer): + pivot = sample[np.argmax(dists)] + return link(dome(outer, edge(h, pivot)), + dome(outer, edge(pivot, t))) + else: + return base + + if len(sample) > 2: + axis = sample[:, 0] + base = np.take(sample, [np.argmin(axis), np.argmax(axis)], axis=0) + return link(dome(sample, base), + dome(sample, base[::-1])) + else: + return sample + +class CHNMF(AA): + """ + CHNMF(data, num_bases=4) + + Convex Hull Non-negative Matrix Factorization. Factorize a data matrix into + two matrices s.t. F = | data - W*H | is minimal. H is restricted to convexity + (H >=0, sum(H, axis=1) = [1 .. 1]) and W resides on actual data points. + Factorization is solved via an alternating least squares optimization using + the quadratic programming solver from cvxopt. The results are usually + equivalent to Archetypal Analysis (pymf.AA) but CHNMF also works for very + large datasets. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + base_sel: int, + Number of pairwise basis vector projections. Set to a value< rank(data). + Computation time scale exponentially with this value, usually rather low + values are sufficient (3-10). + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying CHNMF to some rather stupid data set: + + >>> import numpy as np + >>> from chnmf import CHNMF + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + + Use 2 basis vectors -> W shape(data_dimension, 2). + + >>> chnmf_mdl = CHNMF(data, num_bases=2) + + And start computing the factorization. + + >>> chnmf_mdl.factorize() + + The basis vectors are now stored in chnmf_mdl.W, the coefficients in + chnmf_mdl.H. To compute coefficients for an existing set of basis vectors + simply copy W to chnmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 2.0], [1.2, 1.8]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> chnmf_mdl = CHNMF(data, num_bases=2) + >>> chnmf_mdl.W = W + >>> chnmf_mdl.factorize(compute_w=False) + + The result is a set of coefficients chnmf_mdl.H, s.t. data = W * chnmf_mdl.H. + """ + + def __init__(self, data, num_bases=4, base_sel=3): + + # call inherited method + AA.__init__(self, data, num_bases=num_bases) + + # base sel should never be larger than the actual data dimension + self._base_sel = base_sel + if base_sel > self.data.shape[0]: + self._base_sel = self.data.shape[0] + + def init_h(self): + self.H = np.zeros((self._num_bases, self._num_samples)) + + def init_w(self): + self.W = np.zeros((self._data_dimension, self._num_bases)) + + def _map_w_to_data(self): + """ Return data points that are most similar to basis vectors W + """ + + # assign W to the next best data sample + self._Wmapped_index = vq(self.data, self.W) + self.Wmapped = np.zeros(self.W.shape) + + # do not directly assign, i.e. Wdist = self.data[:,sel] + # as self might be unsorted (in non ascending order) + # -> sorting sel would screw the matching to W if + # self.data is stored as a hdf5 table (see h5py) + for i, s in enumerate(self._Wmapped_index): + self.Wmapped[:,i] = self.data[:,s] + + def update_w(self): + """ compute new W """ + def select_hull_points(data, n=3): + """ select data points for pairwise projections of the first n + dimensions """ + + # iterate over all projections and select data points + idx = np.array([]) + + # iterate over some pairwise combinations of dimensions + for i in combinations(range(n), 2): + # sample convex hull points in 2D projection + convex_hull_d = quickhull(data[i, :].T) + + # get indices for convex hull data points + idx = np.append(idx, vq(data[i, :], convex_hull_d.T)) + idx = np.unique(idx) + + return np.int32(idx) + + # determine convex hull data points using either PCA or random + # projections + method = 'randomprojection' + if method == 'pca': + pcamodel = PCA(self.data) + pcamodel.factorize(show_progress=False) + proj = pcamodel.H + else: + R = np.random.randn(self._base_sel, self._data_dimension) + proj = np.dot(R, self.data) + + self._hull_idx = select_hull_points(proj, n=self._base_sel) + aa_mdl = AA(self.data[:, self._hull_idx], num_bases=self._num_bases) + + # determine W + aa_mdl.factorize(niter=50, compute_h=True, compute_w=True, + compute_err=True, show_progress=False) + + self.W = aa_mdl.W + self._map_w_to_data() + + def factorize(self, show_progress=False, compute_w=True, compute_h=True, + compute_err=True, niter=1): + """ Factorize s.t. WH = data + + Parameters + ---------- + show_progress : bool + print some extra information to stdout. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH|. + """ + + AA.factorize(self, niter=1, show_progress=show_progress, + compute_w=compute_w, compute_h=compute_h, + compute_err=compute_err) + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/cmd.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,94 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Compact Matrix Decomposition [1] + + CMD(CUR): Class for Compact Matrix Decomposition + +[1] Sun, J., Xie, Y., Zhang, H. and Faloutsos, C. (2007), Less is More: Compact Matrix Decomposition for Large +Sparse Graphs, in Proc. SIAM Int. Conf. on Data Mining. +""" + + +import numpy as np +from cur import CUR + +__all__ = ["CMD"] + +class CMD(CUR): + """ + CMD(data, rrank=0, crank=0) + + + Compact Matrix Decomposition. Factorize a data matrix into three matrices s.t. + F = | data - USV| is minimal. CMD randomly selects rows and columns from + data for building U and V, respectively. + + Parameters + ---------- + data : array_like [data_dimension x num_samples] + the input data + rrank: int, optional + Number of rows to sample from data. Double entries are eliminiated s.t. + the resulting rank might be lower. + 4 (default) + crank: int, optional + Number of columns to sample from data. Double entries are eliminiated s.t. + the resulting rank might be lower. + 4 (default) + + Attributes + ---------- + U,S,V : submatrices s.t. data = USV + + Example + ------- + >>> import numpy as np + >>> from cmd import CMD + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> cmd_mdl = CMD(data, show_progress=False, rrank=1, crank=2) + >>> cmd_mdl.factorize() + """ + + def _cmdinit(self): + nrids = np.unique(self._rid) + ncids = np.unique(self._cid) + + self._rcnt = np.zeros(len(nrids)) + self._ccnt = np.zeros(len(ncids)) + + for i,idx in enumerate(nrids): + self._rcnt[i] = len(np.where(self._rid == idx)[0]) + + for i,idx in enumerate(ncids): + self._ccnt[i] = len(np.where(self._cid == idx)[0]) + + self._rid = np.int32(list(nrids)) + self._cid = np.int32(list(ncids)) + + def factorize(self): + """ Factorize s.t. CUR = data + + Updated Values + -------------- + .C : updated values for C. + .U : updated values for U. + .R : updated values for R. + """ + + [prow, pcol] = self.sample_probability() + + self._rid = self.sample(self._rrank, prow) + self._cid = self.sample(self._crank, pcol) + + self._cmdinit() + + self.computeUCR() + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/cmeans.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,86 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF K-means clustering (unary-convex matrix factorization). +Copyright (C) Christian Thurau, 2010. GNU General Public License (GPL). +""" + + + +import numpy as np + +import dist +from nmf import NMF + +__all__ = ["Cmeans"] + +class Cmeans(NMF): + """ + cmeans(data, num_bases=4) + + + Fuzzy c-means soft clustering. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to convexity (columns + sum to 1) W is simply the weighted mean over the corresponding samples in + data. Note that the objective function is based on distances (?), hence the + Frobenius norm is probably not a good quality measure. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying C-means to some rather stupid data set: + + >>> import numpy as np + >>> from cmeans import Cmeans + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> cmeans_mdl = Cmeans(data, num_bases=2, niter=10) + >>> cmeans_mdl.initialization() + >>> cmeans_mdl.factorize() + + The basis vectors are now stored in cmeans_mdl.W, the coefficients in cmeans_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to cmeans_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = [[1.0, 0.0], [0.0, 1.0]] + >>> cmeans_mdl = Cmeans(data, num_bases=2) + >>> cmeans_mdl.initialization() + >>> cmeans_mdl.W = W + >>> cmeans_mdl.factorize(compute_w=False, niter=50) + + The result is a set of coefficients kmeans_mdl.H, s.t. data = W * kmeans_mdl.H. + """ + + def update_h(self): + # assign samples to best matching centres ... + m = 1.75 + tmp_dist = dist.pdist(self.W, self.data, metric='l2') + self._EPS + self.H[:,:] = 0.0 + + for i in range(self._num_bases): + for k in range(self._num_bases): + self.H[i,:] += (tmp_dist[i,:]/tmp_dist[k,:])**(2.0/(m-1)) + + self.H = np.where(self.H>0, 1.0/self.H, 0) + + def update_w(self): + for i in range(self._num_bases): + tmp = (self.H[i:i+1,:] * self.data).sum(axis=1) + self.W[:,i] = tmp/(self.H[i,:].sum() + self._EPS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/cnmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,191 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Convex Matrix Factorization [1] + + CNMF(NMF) : Class for convex matrix factorization + +[1] Ding, C., Li, T. and Jordan, M.. Convex and Semi-Nonnegative Matrix Factorizations. +IEEE Trans. on Pattern Analysis and Machine Intelligence 32(1), 45-55. +""" + + +import numpy as np +import logging +from nmf import NMF +from kmeans import Kmeans + + +__all__ = ["CNMF"] + +class CNMF(NMF): + """ + CNMF(data, num_bases=4) + + + Convex NMF. Factorize a data matrix into two matrices s.t. + F = | data - W*H | = | data - data*beta*H| is minimal. H and beta + are restricted to convexity (beta >=0, sum(beta, axis=1) = [1 .. 1]). + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying CNMF to some rather stupid data set: + + >>> import numpy as np + >>> from cnmf import CNMF + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> cnmf_mdl = CNMF(data, num_bases=2) + >>> cnmf_mdl.factorize(niter=10) + + The basis vectors are now stored in cnmf_mdl.W, the coefficients in cnmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to cnmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = [[1.0, 0.0], [0.0, 1.0]] + >>> cnmf_mdl = CNMF(data, num_bases=2) + >>> cnmf_mdl.W = W + >>> cnmf_mdl.factorize(compute_w=False, niter=1) + + The result is a set of coefficients acnmf_mdl.H, s.t. data = W * cnmf_mdl.H. + """ + + # see .factorize() for the update of W and H + # -> proper decoupling of W/H not possible ... + def update_w(self): + pass + + def update_h(self): + pass + + def init_h(self): + if not hasattr(self, 'H'): + # init basic matrices + self.H = np.zeros((self._num_bases, self._num_samples)) + + # initialize using k-means + km = Kmeans(self.data[:,:], num_bases=self._num_bases) + km.factorize(niter=10) + assign = km.assigned + + num_i = np.zeros(self._num_bases) + for i in range(self._num_bases): + num_i[i] = len(np.where(assign == i)[0]) + + self.H.T[range(len(assign)), assign] = 1.0 + self.H += 0.2*np.ones((self._num_bases, self._num_samples)) + + if not hasattr(self, 'G'): + self.G = np.zeros((self._num_samples, self._num_bases)) + + self.G[range(len(assign)), assign] = 1.0 + self.G += 0.01 + self.G /= np.tile(np.reshape(num_i[assign],(-1,1)), self.G.shape[1]) + + if not hasattr(self,'W'): + self.W = np.dot(self.data[:,:], self.G) + + def init_w(self): + pass + + def factorize(self, niter=10, compute_w=True, compute_h=True, + compute_err=True, show_progress=False): + """ Factorize s.t. WH = data + + Parameters + ---------- + niter : int + number of iterations. + show_progress : bool + print some extra information to stdout. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH| for each iteration. + """ + + if not hasattr(self,'W'): + self.init_w() + + if not hasattr(self,'H'): + self.init_h() + + def separate_positive(m): + return (np.abs(m) + m)/2.0 + + def separate_negative(m): + return (np.abs(m) - m)/2.0 + + if show_progress: + self._logger.setLevel(logging.INFO) + else: + self._logger.setLevel(logging.ERROR) + + XtX = np.dot(self.data[:,:].T, self.data[:,:]) + XtX_pos = separate_positive(XtX) + XtX_neg = separate_negative(XtX) + + self.ferr = np.zeros(niter) + # iterate over W and H + + for i in xrange(niter): + # update H + XtX_neg_x_W = np.dot(XtX_neg, self.G) + XtX_pos_x_W = np.dot(XtX_pos, self.G) + + if compute_h: + H_x_WT = np.dot(self.H.T, self.G.T) + ha = XtX_pos_x_W + np.dot(H_x_WT, XtX_neg_x_W) + hb = XtX_neg_x_W + np.dot(H_x_WT, XtX_pos_x_W) + 10**-9 + self.H = (self.H.T*np.sqrt(ha/hb)).T + + # update W + if compute_w: + HT_x_H = np.dot(self.H, self.H.T) + wa = np.dot(XtX_pos, self.H.T) + np.dot(XtX_neg_x_W, HT_x_H) + wb = np.dot(XtX_neg, self.H.T) + np.dot(XtX_pos_x_W, HT_x_H) + 10**-9 + + self.G *= np.sqrt(wa/wb) + self.W = np.dot(self.data[:,:], self.G) + + if compute_err: + self.ferr[i] = self.frobenius_norm() + self._logger.info('Iteration ' + str(i+1) + '/' + str(niter) + + ' FN:' + str(self.ferr[i])) + else: + self._logger.info('Iteration ' + str(i+1) + '/' + str(niter)) + + if i > 1 and compute_err: + if self.converged(i): + self.ferr = self.ferr[:i] + break + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/cur.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,143 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF CUR Decomposition [1] + + CUR(SVD) : Class for CUR Decomposition + +[1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III: Computing +a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206. +""" + + +import numpy as np +import scipy.sparse + +from svd import pinv, SVD + + +__all__ = ["CUR"] + +class CUR(SVD): + """ + CUR(data, data, k=-1, rrank=0, crank=0) + + CUR Decomposition. Factorize a data matrix into three matrices s.t. + F = | data - USV| is minimal. CUR randomly selects rows and columns from + data for building U and V, respectively. + + Parameters + ---------- + data : array_like [data_dimension x num_samples] + the input data + rrank: int, optional + Number of rows to sample from data. + 4 (default) + crank: int, optional + Number of columns to sample from data. + 4 (default) + show_progress: bool, optional + Print some extra information + False (default) + + Attributes + ---------- + U,S,V : submatrices s.t. data = USV + + Example + ------- + >>> import numpy as np + >>> from cur import CUR + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> cur_mdl = CUR(data, show_progress=False, rrank=1, crank=2) + >>> cur_mdl.factorize() + """ + + def __init__(self, data, k=-1, rrank=0, crank=0): + SVD.__init__(self, data,k=k,rrank=rrank, crank=rrank) + + # select all data samples for computing the error: + # note that this might take very long, adjust self._rset and self._cset + # for faster computations. + self._rset = range(self._rows) + self._cset = range(self._cols) + + + def sample(self, s, probs): + prob_rows = np.cumsum(probs.flatten()) + temp_ind = np.zeros(s, np.int32) + + for i in range(s): + v = np.random.rand() + + try: + tempI = np.where(prob_rows >= v)[0] + temp_ind[i] = tempI[0] + except: + temp_ind[i] = len(prob_rows) + + return np.sort(temp_ind) + + def sample_probability(self): + + if scipy.sparse.issparse(self.data): + dsquare = self.data.multiply(self.data) + else: + dsquare = self.data[:,:]**2 + + prow = np.array(dsquare.sum(axis=1), np.float64) + pcol = np.array(dsquare.sum(axis=0), np.float64) + + prow /= prow.sum() + pcol /= pcol.sum() + + return (prow.reshape(-1,1), pcol.reshape(-1,1)) + + def computeUCR(self): + # the next lines do NOT work with h5py if CUR is used -> double indices in self.cid or self.rid + # can occur and are not supported by h5py. When using h5py data, always use CMD which ignores + # reoccuring row/column selections. + + if scipy.sparse.issparse(self.data): + self._C = self.data[:, self._cid] * scipy.sparse.csc_matrix(np.diag(self._ccnt**(1/2))) + self._R = scipy.sparse.csc_matrix(np.diag(self._rcnt**(1/2))) * self.data[self._rid,:] + + self._U = pinv(self._C, self._k) * self.data[:,:] * pinv(self._R, self._k) + + else: + self._C = np.dot(self.data[:, self._cid].reshape((self._rows, len(self._cid))), np.diag(self._ccnt**(1/2))) + self._R = np.dot(np.diag(self._rcnt**(1/2)), self.data[self._rid,:].reshape((len(self._rid), self._cols))) + + self._U = np.dot(np.dot(pinv(self._C, self._k), self.data[:,:]), + pinv(self._R, self._k)) + + # set some standard (with respect to SVD) variable names + self.U = self._C + self.S = self._U + self.V = self._R + + def factorize(self): + """ Factorize s.t. CUR = data + + Updated Values + -------------- + .C : updated values for C. + .U : updated values for U. + .R : updated values for R. + """ + [prow, pcol] = self.sample_probability() + self._rid = self.sample(self._rrank, prow) + self._cid = self.sample(self._crank, pcol) + + self._rcnt = np.ones(len(self._rid)) + self._ccnt = np.ones(len(self._cid)) + + self.computeUCR() + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/cursl.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,91 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +#$Id$ +""" +PyMF CUR Decomposition [1] + + CURSL(SVD) : Class for CUR Decomposition (uses statistical leverage based sampling) + +[1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III: Computing +a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206. +""" + + +import numpy as np +import scipy.sparse + +from svd import pinv, SVD +from cmd import CMD + +__all__ = ["CURSL"] + +class CURSL(CMD): + """ + CURSL(data, data, rrank=0, crank=0) + + CUR/CMD Decomposition. Factorize a data matrix into three matrices s.t. + F = | data - USV| is minimal. CURSL randomly selects rows and columns from + data for building U and V, respectively. The importance sampling is based + on a statistical leverage score from the top-k singular vectors (k is + currently set to 4/5*rrank and 4/5*crank). + + Parameters + ---------- + data : array_like [data_dimension x num_samples] + the input data + rrank: int, optional + Number of rows to sample from data. + 4 (default) + crank: int, optional + Number of columns to sample from data. + 4 (default) + show_progress: bool, optional + Print some extra information + False (default) + + Attributes + ---------- + U,S,V : submatrices s.t. data = USV (or _C _U _R) + + Example + ------- + >>> import numpy as np + >>> from cur import CUR + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> cur_mdl = CURSL(data, show_progress=False, rrank=1, crank=2) + >>> cur_mdl.factorize() + """ + + def __init__(self, data, k=-1, rrank=0, crank=0): + SVD.__init__(self, data, k=k, rrank=rrank, crank=rrank) + + def sample_probability(self): + def comp_prob(d, k): + # compute statistical leverage score + c = np.round(k - k/5.0) + + svd_mdl = SVD(d, k=c) + svd_mdl.factorize() + + if scipy.sparse.issparse(self.data): + A = svd_mdl.V.multiply(svd_mdl.V) + ## Rule 1 + pcol = np.array(A.sum(axis=0)/k) + else: + A = svd_mdl.V[:k,:]**2.0 + ## Rule 1 + pcol = A.sum(axis=0)/k + + #c = k * np.log(k/ (self._eps**2.0)) + #pcol = c * pcol.reshape((-1,1)) + pcol /= np.sum(pcol) + return pcol + + pcol = comp_prob(self.data, self._rrank) + prow = comp_prob(self.data.transpose(), self._crank) + + + return (prow.reshape(-1,1), pcol.reshape(-1,1))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/dist.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,130 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF several distance functions + + kl_divergence(): KL Divergence + l1_distance(): L1 distance + l2_distance(): L2 distance + cosine_distance(): Cosine distance + pdist(): Pairwise distance computation + vq(): Vector quantization + +""" + + +import numpy as np +import scipy.sparse + +__all__ = ["abs_cosine_distance", "kl_divergence", "l1_distance", "l2_distance", + "weighted_abs_cosine_distance","cosine_distance","vq", "pdist"] + +def kl_divergence(d, vec): + b = vec*(1/d) + b = np.where(b>0, np.log(b),0) + b = vec * b + b = np.sum(b - vec + d, axis=0).reshape((-1)) + return b + +def l1_distance(d, vec): + ret_val = np.sum(np.abs(d - vec), axis=0) + ret_val = ret_val.reshape((-1)) + return ret_val + +def sparse_l2_distance(d, vec): + # compute the norm of d + nd = (d.multiply(d)).sum(axis=0) + nv = (vec.multiply(vec)).sum(axis=0) + ret_val = nd + nv - 2.0*(d.T * vec).T + return np.sqrt(ret_val) + +def approx_l2_distance(d, vec): + # Use random projections to approximate the conventional l2 distance + k = np.round(np.log(d.shape[0])) + #k = d.shape[0] + R = np.random.randn(k, d.shape[0]) + R = R / np.sqrt((R**2).sum(axis=0)) + A = np.dot(R,d) + B = np.dot(R, vec) + ret_val = np.sum( (A - B)**2, axis=0) + ret_val = np.sqrt(R.shape[1]/R.shape[0]) * np.sqrt(ret_val) + ret_val = ret_val.reshape((-1)) + return ret_val + +def l2_distance(d, vec): + if scipy.sparse.issparse(d): + ret_val = sparse_l2_distance(d, vec) + else: + ret_val = np.sqrt(((d[:,:] - vec)**2).sum(axis=0)) + + return ret_val.reshape((-1)) + +def l2_distance_new(d,vec): + # compute the norm of d + nd = (d**2).sum(axis=0) + nv = (vec**2).sum(axis=0) + ret_val = nd + nv - 2.0*np.dot(d.T,vec.reshape((-1,1))).T + + return np.sqrt(ret_val) + +def cosine_distance(d, vec): + tmp = np.dot(np.transpose(d), vec) + a = np.sqrt(np.sum(d**2, axis=0)) + b = np.sqrt(np.sum(vec**2)) + k = (a*b).reshape(-1) + (10**-9) + + # compute distance + ret_val = 1.0 - tmp/k + + return ret_val.reshape((-1)) + +def abs_cosine_distance(d, vec, weighted=False): + if scipy.sparse.issparse(d): + tmp = np.array((d.T * vec).todense(), dtype=np.float32).reshape(-1) + a = np.sqrt(np.array(d.multiply(d).sum(axis=0), dtype=np.float32).reshape(-1)) + b = np.sqrt(np.array(vec.multiply(vec).sum(axis=0), dtype=np.float32).reshape(-1)) + else: + tmp = np.dot(np.transpose(d), vec).reshape(-1) + a = np.sqrt(np.sum(d**2, axis=0)).reshape(-1) + b = np.sqrt(np.sum(vec**2)).reshape(-1) + + k = (a*b).reshape(-1) + 10**-9 + + # compute distance + ret_val = 1.0 - np.abs(tmp/k) + + if weighted: + ret_val = ret_val * a + return ret_val.reshape((-1)) + +def weighted_abs_cosine_distance(d, vec): + ret_val = abs_cosine_distance(d, vec, weighted=True) + return ret_val + +def pdist(A, B, metric='l2' ): + # compute pairwise distance between a data matrix A (d x n) and B (d x m). + # Returns a distance matrix d (n x m). + d = np.zeros((A.shape[1], B.shape[1])) + if A.shape[1] <= B.shape[1]: + for aidx in xrange(A.shape[1]): + if metric == 'l2': + d[aidx:aidx+1,:] = l2_distance(B[:,:], A[:,aidx:aidx+1]).reshape((1,-1)) + if metric == 'l1': + d[aidx:aidx+1,:] = l1_distance(B[:,:], A[:,aidx:aidx+1]).reshape((1,-1)) + else: + for bidx in xrange(B.shape[1]): + if metric == 'l2': + d[:, bidx:bidx+1] = l2_distance(A[:,:], B[:,bidx:bidx+1]).reshape((-1,1)) + if metric == 'l1': + d[:, bidx:bidx+1] = l1_distance(A[:,:], B[:,bidx:bidx+1]).reshape((-1,1)) + + return d + +def vq(A, B, metric='l2'): + # assigns data samples in B to cluster centers A and + # returns an index list [assume n column vectors, d x n] + assigned = np.argmin(pdist(A,B, metric=metric), axis=0) + return assigned
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/gmap.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,219 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Geometric-Map + + GMAP: Class for Geometric-Map +""" + + +import scipy.sparse +import numpy as np + +from dist import * +from aa import AA +from kmeans import Kmeans + +__all__ = ["GMAP"] + +class GMAP(AA): + """ + GMAP(data, num_bases=4, dist_measure='l2') + + + Geometric-Map. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. G-MAP can emulate/approximate several + standard methods including PCA, NMF, and AA. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + method : one of 'pca' ,'nmf', 'aa', default is 'pca' which emulates + Principal Component Analysis using the geometric map method ('nmf' + emulates Non-negative Matrix Factorization, 'aa' emulates Archetypal + Analysis). + robust_map : bool, optional + use robust_map or the standard max-val selection + [see "On FastMap and the Convex Hull of Multivariate Data: Toward + Fast and Robust Dimension Reduction", Ostrouchov and Samatova, PAMI + 2005] + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying GMAP to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> gmap_mdl = GMAP(data, num_bases=2) + >>> gmap_mdl.factorize() + + The basis vectors are now stored in gmap_mdl.W, the coefficients in gmap_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to gmap_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> gmap_mdl = GMAP(data, num_bases=2) + >>> gmap_mdl.W = W + >>> gmap_mdl.factorize(compute_w=False) + + The result is a set of coefficients gmap_mdl.H, s.t. data = W * gmap_mdl.H. + """ + + # always overwrite the default number of iterations + # -> any value other does not make sense. + _NITER = 1 + + def __init__(self, data, num_bases=4, method='pca', robust_map=True): + + AA.__init__(self, data, num_bases=num_bases) + self.sub = [] + self._robust_map = robust_map + self._method = method + + + def init_h(self): + self.H = np.zeros((self._num_bases, self._num_samples)) + + def init_w(self): + self.W = np.zeros((self._data_dimension, self._num_bases)) + + def update_w(self): + """ compute new W """ + + def select_next(iterval): + """ select the next best data sample using robust map + or simply the max iterval ... """ + + if self._robust_map: + k = np.argsort(iterval)[::-1] + d_sub = self.data[:,k[:self._robust_nselect]] + self.sub.extend(k[:self._robust_nselect]) + + # cluster d_sub + kmeans_mdl = Kmeans(d_sub, num_bases=self._robust_cluster) + kmeans_mdl.factorize(niter=10) + + # get largest cluster + h = np.histogram(kmeans_mdl.assigned, range(self._robust_cluster+1))[0] + largest_cluster = np.argmax(h) + sel = pdist(kmeans_mdl.W[:, largest_cluster:largest_cluster+1], d_sub) + sel = k[np.argmin(sel)] + else: + sel = np.argmax(iterval) + + return sel + + EPS = 10**-8 + + if scipy.sparse.issparse(self.data): + norm_data = np.sqrt(self.data.multiply(self.data).sum(axis=0)) + norm_data = np.array(norm_data).reshape((-1)) + else: + norm_data = np.sqrt(np.sum(self.data**2, axis=0)) + + + self.select = [] + + if self._method == 'pca' or self._method == 'aa': + iterval = norm_data.copy() + + if self._method == 'nmf': + iterval = np.sum(self.data, axis=0)/(np.sqrt(self.data.shape[0])*norm_data) + iterval = 1.0 - iterval + + self.select.append(select_next(iterval)) + + + for l in range(1, self._num_bases): + + if scipy.sparse.issparse(self.data): + c = self.data[:, self.select[-1]:self.select[-1]+1].T * self.data + c = np.array(c.todense()) + else: + c = np.dot(self.data[:,self.select[-1]], self.data) + + c = c/(norm_data * norm_data[self.select[-1]]) + + if self._method == 'pca': + c = 1.0 - np.abs(c) + c = c * norm_data + + elif self._method == 'aa': + c = (c*-1.0 + 1.0)/2.0 + c = c * norm_data + + elif self._method == 'nmf': + c = 1.0 - np.abs(c) + + ### update the estimated volume + iterval = c * iterval + + # detect the next best data point + self.select.append(select_next(iterval)) + + self._logger.info('cur_nodes: ' + str(self.select)) + + # sort indices, otherwise h5py won't work + self.W = self.data[:, np.sort(self.select)] + + # "unsort" it again to keep the correct order + self.W = self.W[:, np.argsort(np.argsort(self.select))] + + def factorize(self, show_progress=False, compute_w=True, compute_h=True, + compute_err=True, robust_cluster=3, niter=1, robust_nselect=-1): + """ Factorize s.t. WH = data + + Parameters + ---------- + show_progress : bool + print some extra information to stdout. + False, default + compute_h : bool + iteratively update values for H. + True, default + compute_w : bool + iteratively update values for W. + default, True + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + robust_cluster : int, optional + set the number of clusters for robust map selection. + 3, default + robust_nselect : int, optional + set the number of samples to consider for robust map + selection. + -1, default (automatically determine suitable number) + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH|. + """ + self._robust_cluster = robust_cluster + self._robust_nselect = robust_nselect + + if self._robust_nselect == -1: + self._robust_nselect = np.round(np.log(self.data.shape[1])*2) + + AA.factorize(self, niter=1, show_progress=show_progress, + compute_w=compute_w, compute_h=compute_h, + compute_err=compute_err) + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/greedy.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,174 @@ +#!/usr/bin/python2.6 +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +#$Id$ +""" +PyMF GREEDY[1] + + GREEDY: class for a deterministic SVD based greedy matrix reconstruction [1]. + + +[1] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix +Reconstruction via Greedy Approximation of SVD. ISAAC'2008. +""" + + +import time +import scipy.sparse +import numpy as np +from svd import * +from nmf import NMF + +__all__ = ["GREEDY"] + +class GREEDY(NMF): + """ + GREEDYVOL(data, num_bases=4, niter=100, show_progress=True, compW=True) + + + Deterministic Sparse Column Based Matrix Reconstruction via Greedy + Approximation of SVD. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. W is iteratively selected as columns + of data. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + k : number of singular vectors for the SVD step of the algorithm + num_bases (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying GREEDY to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> greedy_mdl = GREEDY(data, num_bases=2, niter=10) + >>> greedy_mdl.factorize() + + The basis vectors are now stored in greedy_mdl.W, the coefficients in + greedy_mdl.H. To compute coefficients for an existing set of basis + vectors simply copy W to greedy_mdl.W, and set compW to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> greedy_mdl = GREEDY(data, num_bases=2) + >>> greedy_mdl.W = W + >>> greedy_mdl.factorize(compute_w=False) + + The result is a set of coefficients greedy_mdl.H, s.t. data = W * greedy_mdl.H. + """ + + + def __init__(self, data, k=-1, num_bases=4): + # call inherited method + NMF.__init__(self, data, num_bases=num_bases) + self._k = k + if self._k == -1: + self._k = num_bases + + def update_h(self): + if scipy.sparse.issparse(self.data): + self.H = pinv(self.W) * self.data + else: + self.H = np.dot(pinv(self.W), self.data) + + def update_w(self): + def normalize_matrix(K): + """ Normalize a matrix K s.t. columns have Euclidean-norm |1| + """ + if scipy.sparse.issparse(K): + L = np.sqrt(np.array(K.multiply(K).sum(axis=0)))[0,:] + s = np.where(L > 0.0)[0] + L[s] = L[s]**-1 + KN = scipy.sparse.spdiags(L,0,len(L),len(L),format='csc') + K = K*KN + else: + L = np.sqrt((K**2).sum(axis=0)) + s = np.where(L > 0.0)[0] + L[s] = L[s]**-1 + K = K*L + return K + + self._t = np.zeros((self._num_bases)) + t0 = time.time() + self.select = [] + + # normalize data + A = self.data.copy() + + svd_mdl = SVD(A, k=self._k) + svd_mdl.factorize() + + if scipy.sparse.issparse(self.data): + B = svd_mdl.U * svd_mdl.S + B = B.tocsc() + else: + B = np.dot(svd_mdl.U, svd_mdl.S) + B = B[:, :self._num_bases] + + for i in range(self._num_bases): + A = normalize_matrix(A) + + if scipy.sparse.issparse(self.data): + T = B.transpose() * A + T = np.array(T.multiply(T).sum(axis=0))[0,:] + + # next selected column index + T[self.select] = 0.0 + idx = np.argmax(T) + Aidx = A[:, idx].copy() + self.select.append(idx) + + # update B + BC = Aidx.transpose() * B + B = B - (Aidx*BC) + + # update A + AC = Aidx.transpose() * A + A = A - (Aidx*AC) + + else: + T = np.dot(B.transpose(), A) + T = np.sum(T**2.0, axis=0) + + # next selected column index + T[self.select] = 0.0 + idx = np.argmax(T) + self.select.append(idx) + + # update B + BC = np.dot(B.transpose(),A[:,idx]) + B -= np.dot(A[:,idx].reshape(-1,1), BC.reshape(1,-1)) + + # and A + AC = np.dot(A.transpose(),A[:,idx]) + A -= np.dot(A[:,idx].reshape(-1,1), AC.reshape(1,-1)) + + + # detect the next best data point + self._logger.info('searching for next best column ...') + self._logger.info('cur_columns: ' + str(self.select)) + self._t[i] = time.time() - t0 + + # sort indices, otherwise h5py won't work + self.W = self.data[:, np.sort(self.select)] + + # "unsort" it again to keep the correct order + self.W = self.W[:, np.argsort(np.argsort(self.select))] + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/greedycur.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,83 @@ +#!/usr/bin/python2.6 +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +#$Id$ +""" +PyMF CUR-like Sparse Column Based Matrix Reconstruction via Greedy Approximation[1] + + GREEDYCUR: class for CUR-like decompositions using the GREEDY[2] algorithm. + +[1] Drineas, P., Kannan, R. and Mahoney, M. (2006), 'Fast Monte Carlo Algorithms III: +Computing a Compressed Approixmate Matrix Decomposition', SIAM J. Computing 36(1), 184-206. +[2] Ali Civril, Malik Magdon-Ismail. Deterministic Sparse Column Based Matrix +Reconstruction via Greedy Approximation of SVD. ISAAC'2008. +""" + + +import numpy as np +from greedy import GREEDY +from cur import CUR + +__all__ = ["GREEDYCUR"] + +class GREEDYCUR(CUR): + ''' + GREEDYCUR(data, data, k=-1, rrank=0, crank=0) + + GREEDY-CUR Decomposition. Factorize a data matrix into three matrices s.t. + F = | data - USV| is minimal. Unlike CUR, GREEDYCUR selects the rows + and columns using GREEDY, i.e. it tries to find rows/columns that are close + to SVD-based solutions. + + Parameters + ---------- + data : array_like [data_dimension x num_samples] + the input data + rrank: int, optional + Number of rows to sample from data. + 4 (default) + crank: int, optional + Number of columns to sample from data. + 4 (default) + show_progress: bool, optional + Print some extra information + False (default) + + Attributes + ---------- + U,S,V : submatrices s.t. data = USV + + Example + ------- + >>> import numpy as np + >>> from greedycur import GREEDYCUR + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> cur_mdl = GREEDYCUR(data, show_progress=False, rrank=1, crank=2) + >>> cur_mdl.factorize() + """ + ''' + + def sample(self, A, c): + # set k to a value lower than the number of bases, usually + # gives better results. + k = np.round(c - c/5.0) + greedy_mdl = GREEDY(A, k=k, num_bases=c) + greedy_mdl.factorize(compute_h=False, compute_err=False, niter=1) + return greedy_mdl.select + + + def factorize(self): + # sample row and column indices that maximize the volume of the submatrix + self._rid = self.sample(self.data.transpose(), self._rrank) + self._cid = self.sample(self.data, self._crank) + self._rcnt = np.ones(len(self._rid)) + self._ccnt = np.ones(len(self._cid)) + + self.computeUCR() + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/kmeans.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,87 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF K-means clustering (unary-convex matrix factorization). +""" + + +import numpy as np +import random + +import dist +from nmf import NMF + +__all__ = ["Kmeans"] + +class Kmeans(NMF): + """ + Kmeans(data, num_bases=4) + + K-means clustering. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to unary vectors, W + is simply the mean over the corresponding samples in "data". + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying K-means to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> kmeans_mdl = Kmeans(data, num_bases=2) + >>> kmeans_mdl.factorize(niter=10) + + The basis vectors are now stored in kmeans_mdl.W, the coefficients in kmeans_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to kmeans_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = [[1.0, 0.0], [0.0, 1.0]] + >>> kmeans_mdl = Kmeans(data, num_bases=2) + >>> kmeans_mdl.W = W + >>> kmeans_mdl.factorize(niter=1, compute_w=False) + + The result is a set of coefficients kmeans_mdl.H, s.t. data = W * kmeans_mdl.H. + """ + def init_h(self): + # W has to be present for H to be initialized + self.H = np.zeros((self._num_bases, self._num_samples)) + self.update_h() + + def init_w(self): + # set W to some random data samples + sel = random.sample(xrange(self._num_samples), self._num_bases) + + # sort indices, otherwise h5py won't work + self.W = self.data[:, np.sort(sel)] + + + def update_h(self): + # and assign samples to the best matching centers + self.assigned = dist.vq(self.W, self.data) + self.H = np.zeros(self.H.shape) + self.H[self.assigned, range(self._num_samples)] = 1.0 + + + def update_w(self): + for i in range(self._num_bases): + idx = np.where(self.assigned==i)[0] + n = len(idx) + if n > 1: + self.W[:,i] = np.sum(self.data[:,idx], axis=1)/n
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/laesa.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,87 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF LAESA +""" + + +import scipy.sparse +import numpy as np + +from dist import * +from sivm import SIVM + +__all__ = ["LAESA"] + +class LAESA(SIVM): + """ + LAESA(data, num_bases=4) + + + Simplex Volume Maximization. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to convexity. W is iteratively + found by maximizing the volume of the resulting simplex (see [1]). + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying LAESA to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> laesa_mdl = LAESA(data, num_bases=2) + >>> laesa_mdl.factorize() + + The basis vectors are now stored in laesa_mdl.W, the coefficients in laesa_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to laesa_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> laesa_mdl = LAESA(data, num_bases=2) + >>> laesa_mdl.W = W + >>> laesa_mdl.factorize(niter=1, compute_w=False) + + The result is a set of coefficients laesa_mdl.H, s.t. data = W * laesa_mdl.H. + """ + def update_w(self): + # initialize some of the recursively updated distance measures + self.init_sivm() + distiter = self._distance(self.select[-1]) + + for l in range(self._num_bases-1): + d = self._distance(self.select[-1]) + + # replace distances in distiter + distiter = np.where(d<distiter, d, distiter) + + # detect the next best data point + self.select.append(np.argmax(distiter)) + self._logger.info('cur_nodes: ' + str(self.select)) + + # sort indices, otherwise h5py won't work + self.W = self.data[:, np.sort(self.select)] + + # but "unsort" it again to keep the correct order + self.W = self.W[:, np.argsort(np.argsort(self.select))] + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/nmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,206 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Non-negative Matrix Factorization. + + NMF: Class for Non-negative Matrix Factorization + +[1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative +Matrix Factorization, Nature 401(6755), 788-799. +""" + + +import numpy as np +import logging +import logging.config +import scipy.sparse + +__all__ = ["NMF"] + +class NMF(): + """ + NMF(data, num_bases=4) + + + Non-negative Matrix Factorization. Factorize a data matrix into two matrices + s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative + data. Uses the classicial multiplicative update rule. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying NMF to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> nmf_mdl = NMF(data, num_bases=2, niter=10) + >>> nmf_mdl.factorize() + + The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to nmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> nmf_mdl = NMF(data, num_bases=2) + >>> nmf_mdl.W = W + >>> nmf_mdl.factorize(niter=20, compute_w=False) + + The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H. + """ + + # some small value + _EPS = 10**-8 + + def __init__(self, data, num_bases=4): + + def setup_logging(): + # create logger + self._logger = logging.getLogger("pymf") + + # add ch to logger + if len(self._logger.handlers) < 1: + # create console handler and set level to debug + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + # create formatter + formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") + + # add formatter to ch + ch.setFormatter(formatter) + + self._logger.addHandler(ch) + + setup_logging() + + # set variables + self.data = data + self._num_bases = num_bases + + # initialize H and W to random values + (self._data_dimension, self._num_samples) = self.data.shape + + + def frobenius_norm(self): + """ Frobenius norm (||data - WH||) of a data matrix and a low rank + approximation given by WH + + Returns: + frobenius norm: F = ||data - WH|| + """ + + # check if W and H exist + if hasattr(self,'H') and hasattr(self,'W') and not scipy.sparse.issparse(self.data): + err = np.sqrt( np.sum((self.data[:,:] - np.dot(self.W, self.H))**2 )) + else: + err = -123456 + + return err + + def init_w(self): + self.W = np.random.random((self._data_dimension, self._num_bases)) + + def init_h(self): + self.H = np.random.random((self._num_bases, self._num_samples)) + + def update_h(self): + # pre init H1, and H2 (necessary for storing matrices on disk) + H2 = np.dot(np.dot(self.W.T, self.W), self.H) + 10**-9 + self.H *= np.dot(self.W.T, self.data[:,:]) + self.H /= H2 + + def update_w(self): + # pre init W1, and W2 (necessary for storing matrices on disk) + W2 = np.dot(np.dot(self.W, self.H), self.H.T) + 10**-9 + self.W *= np.dot(self.data[:,:], self.H.T) + self.W /= W2 + + def converged(self, i): + derr = np.abs(self.ferr[i] - self.ferr[i-1])/self._num_samples + if derr < self._EPS: + return True + else: + return False + + def factorize(self, niter=1, show_progress=False, + compute_w=True, compute_h=True, compute_err=True): + """ Factorize s.t. WH = data + + Parameters + ---------- + niter : int + number of iterations. + show_progress : bool + print some extra information to stdout. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH| for each iteration. + """ + + if show_progress: + self._logger.setLevel(logging.INFO) + else: + self._logger.setLevel(logging.ERROR) + + # create W and H if they don't already exist + # -> any custom initialization to W,H should be done before + if not hasattr(self,'W'): + self.init_w() + + if not hasattr(self,'H'): + self.init_h() + + if compute_err: + self.ferr = np.zeros(niter) + + for i in xrange(niter): + if compute_w: + self.update_w() + + if compute_h: + self.update_h() + + if compute_err: + self.ferr[i] = self.frobenius_norm() + self._logger.info('Iteration ' + str(i+1) + '/' + str(niter) + + ' FN:' + str(self.ferr[i])) + else: + self._logger.info('Iteration ' + str(i+1) + '/' + str(niter)) + + + # check if the err is not changing anymore + if i > 1 and compute_err: + if self.converged(i): + # adjust the error measure + self.ferr = self.ferr[:i] + break + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/nmfals.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,97 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Non-negative Matrix Factorization. + + NMFALS: Class for Non-negative Matrix Factorization using alternating least + squares optimization (requires cvxopt) + +[1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative +Matrix Factorization, Nature 401(6755), 788-799. +""" + + + +import numpy as np +from cvxopt import solvers, base +from nmf import NMF + +__all__ = ["NMFALS"] + +class NMFALS(NMF): + """ + NMF(data, num_bases=4) + + + Non-negative Matrix Factorization. Factorize a data matrix into two matrices + s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative + data. Uses the an alternating least squares procedure (quite slow for larger + data sets) + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying NMF to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> nmf_mdl = NMFALS(data, num_bases=2) + >>> nmf_mdl.factorize(niter=10) + + The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to nmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> nmf_mdl = NMFALS(data, num_bases=2) + >>> nmf_mdl.W = W + >>> nmf_mdl.factorize(niter=1, compute_w=False) + + The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H. + """ + + def update_h(self): + def updatesingleH(i): + # optimize alpha using qp solver from cvxopt + FA = base.matrix(np.float64(np.dot(-self.W.T, self.data[:,i]))) + al = solvers.qp(HA, FA, INQa, INQb) + self.H[:,i] = np.array(al['x']).reshape((1,-1)) + + # float64 required for cvxopt + HA = base.matrix(np.float64(np.dot(self.W.T, self.W))) + INQa = base.matrix(-np.eye(self._num_bases)) + INQb = base.matrix(0.0, (self._num_bases,1)) + + map(updatesingleH, xrange(self._num_samples)) + + + def update_w(self): + def updatesingleW(i): + # optimize alpha using qp solver from cvxopt + FA = base.matrix(np.float64(np.dot(-self.H, self.data[i,:].T))) + al = solvers.qp(HA, FA, INQa, INQb) + self.W[i,:] = np.array(al['x']).reshape((1,-1)) + + # float64 required for cvxopt + HA = base.matrix(np.float64(np.dot(self.H, self.H.T))) + INQa = base.matrix(-np.eye(self._num_bases)) + INQb = base.matrix(0.0, (self._num_bases,1)) + + map(updatesingleW, xrange(self._data_dimension))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/nmfnnls.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,80 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Non-negative Matrix Factorization. + + NMFALS: Class for Non-negative Matrix Factorization using non negative + least squares optimization (requires scipy.optimize) + +[1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative +Matrix Factorization, Nature 401(6755), 788-799. +""" + + + +import scipy.optimize +from nmf import NMF + +__all__ = ["NMFNNLS"] + +class NMFNNLS(NMF): + """ + NMFNNLS(data, num_bases=4) + + + Non-negative Matrix Factorization. Factorize a data matrix into two matrices + s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative + data. Uses the Lawsons and Hanson's algorithm for non negative constrained + least squares (-> also see scipy.optimize.nnls) + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying NMF to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> nmf_mdl = NMFALS(data, num_bases=2) + >>> nmf_mdl.factorize(niter=10) + + The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to nmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> nmf_mdl = NMFALS(data, num_bases=2) + >>> nmf_mdl.W = W + >>> nmf_mdl.factorize(niter=1, compute_w=False) + + The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H. + """ + + def update_h(self): + def updatesingleH(i): + self.H[:,i] = scipy.optimize.nnls(self.W, self.data[:,i])[0] + + map(updatesingleH, xrange(self._num_samples)) + + + def update_w(self): + def updatesingleW(i): + self.W[i,:] = scipy.optimize.nnls(self.H.T, self.data[i,:].T)[0] + + map(updatesingleW, xrange(self._data_dimension))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/nndsvd.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,119 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +#$Id$ +""" +PyMF Non-negative Double Singular Value Decompositions. + + NNDSVD: Class for Non-negative Double Singular Value Decompositions [1] + +[1] C. Boutsidis and E. Gallopoulos (2008), SVD based initialization: A head +start for nonnegative matrix factorization, Pattern Recognition, 41, 1350-1362 +""" + + +import numpy as np + +from nmf import NMF +from svd import SVD + +__all__ = ["NNDSVD"] + +class NNDSVD(NMF): + """ + NNDSVD(data, num_bases=4) + + + Non-negative Double Singular Value Decompositions. Factorize a data + matrix into two matrices s.t. F = | data - W*H | = | is minimal. H, and + W are restricted to non-negative data. NNDSVD is primarily used for + initializing NMF. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying NNDSVD to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> nndsvd_mdl = NNDSVD(data, num_bases=2) + >>> nndsvd_mdl.factorize() + + The basis vectors are now stored in nndsvd_mdl.W, the coefficients in + nndsvd_mdl.H. To initialize NMF with nndsvd_mdl.W, nndsvd_mdl.H + simply copy W to nmf_mdl.W and H to nmf_mdl.H: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> nmf_mdl = NMF(data, num_bases=2) + >>> nmf_mdl.W = nndsvd_mdl.W + >>> nmf_mdl.H = nndsvd_mdl.H + >>> nmf_mdl.factorize(niter=20) + + The result is a set of (more optimal) coefficients nmf_mdl.H, nmf_mdl.W. + """ + def init_w(self): + self.W = np.zeros((self._data_dimension, self._num_bases)) + + def init_h(self): + self.H = np.zeros((self._num_bases, self._num_samples)) + + def update_h(self): + pass + + def update_w(self): + svd_mdl = SVD(self.data) + svd_mdl.factorize() + + U, S, V = svd_mdl.U, svd_mdl.S, svd_mdl.V + + # The first left singular vector is nonnegative + # (abs is only used as values could be all negative) + self.W[:,0] = np.sqrt(S[0,0]) * np.abs(U[:,0]) + + #The first right singular vector is nonnegative + self.H[0,:] = np.sqrt(S[0,0]) * np.abs(V[0,:].T) + + for i in range(1,self._num_bases): + # Form the rank one factor + Tmp = np.dot(U[:,i:i+1]*S[i,i], V[i:i+1,:]) + + # zero out the negative elements + Tmp = np.where(Tmp < 0, 0.0, Tmp) + + # Apply 2nd SVD + svd_mdl_2 = SVD(Tmp) + svd_mdl_2.factorize() + u, s, v = svd_mdl_2.U, svd_mdl_2.S, svd_mdl_2.V + + # The first left singular vector is nonnegative + self.W[:,i] = np.sqrt(s[0,0]) * np.abs(u[:,0]) + + #The first right singular vector is nonnegative + self.H[i,:] = np.sqrt(s[0,0]) * np.abs(v[0,:].T) + + def factorize(self, niter=1, show_progress=False, + compute_w=True, compute_h=True, compute_err=True): + + # enforce certain default values, otherwise it won't work + NMF.factorize(self, niter=1, show_progress=show_progress, + compute_w=True, compute_h=True, compute_err=compute_err) + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/pca.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,140 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Principal Component Analysis. + + PCA: Class for Principal Component Analysis +""" + + + +import numpy as np + +from nmf import NMF +from svd import SVD + + +__all__ = ["PCA"] + +class PCA(NMF): + """ + PCA(data, num_bases=4, center_mean=True) + + + Archetypal Analysis. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. W is set to the eigenvectors of the + data covariance. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + center_mean: bool, True + Make sure that the data is centred around the mean. + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying PCA to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> pca_mdl = PCA(data, num_bases=2) + >>> pca_mdl.factorize() + + The basis vectors are now stored in pca_mdl.W, the coefficients in pca_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to pca_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> pca_mdl = PCA(data, num_bases=2) + >>> pca_mdl.W = W + >>> pca_mdl.factorize(compute_w=False) + + The result is a set of coefficients pca_mdl.H, s.t. data = W * pca_mdl.H. + """ + + def __init__(self, data, num_bases=0, center_mean=True): + + NMF.__init__(self, data, num_bases=num_bases) + + # center the data around the mean first + self._center_mean = center_mean + + if self._center_mean: + # copy the data before centering it + self._data_orig = data + self._meanv = self._data_orig[:,:].mean(axis=1).reshape(data.shape[0],-1) + self.data = self._data_orig - self._meanv + else: + self.data = data + + def init_h(self): + pass + + def init_w(self): + pass + + def update_h(self): + self.H = np.dot(self.W.T, self.data[:,:]) + + def update_w(self): + # compute eigenvectors and eigenvalues using SVD + svd_mdl = SVD(self.data) + svd_mdl.factorize() + + # argsort sorts in ascending order -> do reverese indexing + # for accesing values in descending order + S = np.diag(svd_mdl.S) + order = np.argsort(S)[::-1] + + # select only a few eigenvectors ... + if self._num_bases >0: + order = order[:self._num_bases] + + self.W = svd_mdl.U[:,order] + self.eigenvalues = S[order] + + def factorize(self, show_progress=False, compute_w=True, compute_h=True, + compute_err=True, niter=1): + """ Factorize s.t. WH = data + + Parameters + ---------- + show_progress : bool + print some extra information to stdout. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH|. + """ + + NMF.factorize(self, niter=1, show_progress=show_progress, + compute_w=compute_w, compute_h=compute_h, + compute_err=compute_err) + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/rnmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,120 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Non-negative Matrix Factorization. + + NMF: Class for Non-negative Matrix Factorization + +[1] Lee, D. D. and Seung, H. S. (1999), Learning the Parts of Objects by Non-negative +Matrix Factorization, Nature 401(6755), 788-799. +""" + + +import numpy as np +import logging +import logging.config +import scipy.sparse + +from nmf import NMF + +__all__ = ["RNMF"] + +class RNMF(NMF): + """ + RNMF(data, num_bases=4) + + + Non-negative Matrix Factorization. Factorize a data matrix into two matrices + s.t. F = | data - W*H | = | is minimal. H, and W are restricted to non-negative + data. Uses the classicial multiplicative update rule. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying NMF to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> nmf_mdl = NMF(data, num_bases=2, niter=10) + >>> nmf_mdl.factorize() + + The basis vectors are now stored in nmf_mdl.W, the coefficients in nmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to nmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> nmf_mdl = NMF(data, num_bases=2) + >>> nmf_mdl.W = W + >>> nmf_mdl.factorize(niter=20, compute_w=False) + + The result is a set of coefficients nmf_mdl.H, s.t. data = W * nmf_mdl.H. + """ + + def __init__(self, data, num_bases=4, lamb=2.0): + # call inherited method + NMF.__init__(self, data, num_bases=num_bases) + self._lamb = lamb + + def soft_thresholding(self, X, lamb): + X = np.where(np.abs(X) <= lamb, 0.0, X) + X = np.where(X > lamb, X - lamb, X) + X = np.where(X < -1.0*lamb, X + lamb, X) + return X + + def init_w(self): + self.W = np.random.random((self._data_dimension, self._num_bases)) + + def init_h(self): + self.H = np.random.random((self._num_bases, self._num_samples)) + self.H[:,:] = 1.0 + # normalized bases + Wnorm = np.sqrt(np.sum(self.W**2.0, axis=0)) + self.W /= Wnorm + + for i in range(self.H.shape[0]): + self.H[i,:] *= Wnorm[i] + + self.update_s() + + def update_s(self): + self.S = self.data - np.dot(self.W, self.H) + self.S = self.soft_thresholding(self.S, self._lamb) + + def update_h(self): + # pre init H1, and H2 (necessary for storing matrices on disk) + H1 = np.dot(self.W.T, self.S - self.data) + H1 = np.abs(H1) - H1 + H1 /= (2.0* np.dot(self.W.T, np.dot(self.W, self.H))) + self.H *= H1 + + # adapt S + self.update_s() + + def update_w(self): + # pre init W1, and W2 (necessary for storing matrices on disk) + W1 = np.dot(self.S - self.data, self.H.T) + #W1 = np.dot(self.data - self.S, self.H.T) + W1 = np.abs(W1) - W1 + W1 /= (2.0 * (np.dot(self.W, np.dot(self.H, self.H.T)))) + self.W *= W1 + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/sivm.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,232 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Simplex Volume Maximization [1] + + SIVM: class for SiVM + +[1] C. Thurau, K. Kersting, and C. Bauckhage. Yes We Can - Simplex Volume +Maximization for Descriptive Web-Scale Matrix Factorization. In Proc. Int. +Conf. on Information and Knowledge Management. ACM. 2010. +""" + + +import scipy.sparse +import numpy as np + +from dist import * +from aa import AA + +__all__ = ["SIVM"] + +class SIVM(AA): + """ + SIVM(data, num_bases=4, dist_measure='l2') + + + Simplex Volume Maximization. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to convexity. W is iteratively + found by maximizing the volume of the resulting simplex (see [1]). + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + dist_measure : one of 'l2' ,'cosine', 'l1', 'kl' + Standard is 'l2' which maximizes the volume of the simplex. In contrast, + 'cosine' maximizes the volume of a cone (see [1] for details). + init : string (default: 'fastmap') + 'fastmap' or 'origin'. Sets the method used for finding the very first + basis vector. 'Origin' assumes the zero vector, 'Fastmap' picks one of + the two vectors that have the largest pairwise distance. + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying SIVM to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> sivm_mdl = SIVM(data, num_bases=2) + >>> sivm_mdl.factorize() + + The basis vectors are now stored in sivm_mdl.W, the coefficients in sivm_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to sivm_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> sivm_mdl = SIVM(data, num_bases=2) + >>> sivm_mdl.W = W + >>> sivm_mdl.factorize(compute_w=False) + + The result is a set of coefficients sivm_mdl.H, s.t. data = W * sivm_mdl.H. + """ + + # always overwrite the default number of iterations + # -> any value other does not make sense. + _NITER = 1 + + def __init__(self, data, num_bases=4, dist_measure='l2', init='fastmap'): + + AA.__init__(self, data, num_bases=num_bases) + + self._dist_measure = dist_measure + self._init = init + + # assign the correct distance function + if self._dist_measure == 'l1': + self._distfunc = l1_distance + + elif self._dist_measure == 'l2': + self._distfunc = l2_distance + + elif self._dist_measure == 'cosine': + self._distfunc = cosine_distance + + elif self._dist_measure == 'abs_cosine': + self._distfunc = abs_cosine_distance + + elif self._dist_measure == 'weighted_abs_cosine': + self._distfunc = weighted_abs_cosine_distance + + elif self._dist_measure == 'kl': + self._distfunc = kl_divergence + + + def _distance(self, idx): + """ compute distances of a specific data point to all other samples""" + + if scipy.sparse.issparse(self.data): + step = self.data.shape[1] + else: + step = 50000 + + d = np.zeros((self.data.shape[1])) + if idx == -1: + # set vec to origin if idx=-1 + vec = np.zeros((self.data.shape[0], 1)) + if scipy.sparse.issparse(self.data): + vec = scipy.sparse.csc_matrix(vec) + else: + vec = self.data[:, idx:idx+1] + + self._logger.info('compute distance to node ' + str(idx)) + + # slice data into smaller chunks + for idx_start in range(0, self.data.shape[1], step): + if idx_start + step > self.data.shape[1]: + idx_end = self.data.shape[1] + else: + idx_end = idx_start + step + + d[idx_start:idx_end] = self._distfunc( + self.data[:,idx_start:idx_end], vec) + self._logger.info('completed:' + + str(idx_end/(self.data.shape[1]/100.0)) + "%") + return d + + def init_h(self): + self.H = np.zeros((self._num_bases, self._num_samples)) + + def init_w(self): + self.W = np.zeros((self._data_dimension, self._num_bases)) + + def init_sivm(self): + self.select = [] + if self._init == 'fastmap': + # Fastmap like initialization + # set the starting index for fastmap initialization + cur_p = 0 + + # after 3 iterations the first "real" index is found + for i in range(3): + d = self._distance(cur_p) + cur_p = np.argmax(d) + + # store maximal found distance -> later used for "a" (->update_w) + self._maxd = np.max(d) + self.select.append(cur_p) + + elif self._init == 'origin': + # set first vertex to origin + cur_p = -1 + d = self._distance(cur_p) + self._maxd = np.max(d) + self.select.append(cur_p) + + def update_w(self): + """ compute new W """ + EPS = 10**-8 + self.init_sivm() + + # initialize some of the recursively updated distance measures .... + d_square = np.zeros((self.data.shape[1])) + d_sum = np.zeros((self.data.shape[1])) + d_i_times_d_j = np.zeros((self.data.shape[1])) + distiter = np.zeros((self.data.shape[1])) + a = np.log(self._maxd) + a_inc = a.copy() + + for l in range(1, self._num_bases): + d = self._distance(self.select[l-1]) + + # take the log of d (sually more stable that d) + d = np.log(d + EPS) + + d_i_times_d_j += d * d_sum + d_sum += d + d_square += d**2 + distiter = d_i_times_d_j + a*d_sum - (l/2.0) * d_square + + # detect the next best data point + self.select.append(np.argmax(distiter)) + + self._logger.info('cur_nodes: ' + str(self.select)) + + # sort indices, otherwise h5py won't work + self.W = self.data[:, np.sort(self.select)] + + # "unsort" it again to keep the correct order + self.W = self.W[:, np.argsort(np.argsort(self.select))] + + def factorize(self, show_progress=False, compute_w=True, compute_h=True, + compute_err=True, niter=1): + """ Factorize s.t. WH = data + + Parameters + ---------- + show_progress : bool + print some extra information to stdout. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH|. + """ + + AA.factorize(self, niter=1, show_progress=show_progress, + compute_w=compute_w, compute_h=compute_h, + compute_err=compute_err) + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/sivm_cur.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,97 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Simplex Volume Maximization for CUR [1] + + SIVMCUR: class for SiVM-CUR + +[1] C. Thurau, K. Kersting, and C. Bauckhage. Yes We Can - Simplex Volume +Maximization for Descriptive Web-Scale Matrix Factorization. In Proc. Int. +Conf. on Information and Knowledge Management. ACM. 2010. +""" + + +import numpy as np +import scipy +from sivm import SIVM +from cur import CUR + +__all__ = ["SIVM_CUR"] + +class SIVM_CUR(CUR): + ''' + SIVM_CUR(data, num_bases=4, dist_measure='l2') + + Simplex Volume based CUR Decomposition. Factorize a data matrix into three + matrices s.t. F = | data - USV| is minimal. Unlike CUR, SIVMCUR selects the + rows and columns using SIVM, i.e. it tries to maximize the volume of the + enclosed simplex. + + Parameters + ---------- + data : array_like [data_dimension x num_samples] + the input data + rrank: int, optional + Number of rows to sample from data. + 4 (default)crank + crank: int, optional + Number of columns to sample from data. + 4 (default) + dist_measure: string, optional + The distance measure for finding the next best candidate that + maximizes the simplex volume ['l2','l1','cosine','sparse_graph_l2'] + 'l2' (default) + + Attributes + ---------- + U,S,V : submatrices s.t. data = USV + + Example + ------- + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> sivmcur_mdl = SIVM_CUR(data, show_progress=False, rrank=1, crank=2) + >>> sivmcur_mdl.factorize() + ''' + + def __init__(self, data, k=-1, rrank=0, crank=0, dist_measure='l2', init='origin'): + CUR.__init__(self, data, k=k, rrank=rrank, crank=rrank) + self._dist_measure = dist_measure + self.init = init + + def sample(self, A, c): + # for optimizing the volume of the submatrix, set init to 'origin' (otherwise the volume of + # the ordinary simplex would be optimized) + sivm_mdl = SIVM(A, num_bases=c, dist_measure=self._dist_measure, + init=self.init) + sivm_mdl.factorize(show_progress=False, compute_w=True, niter=1, + compute_h=False, compute_err=False) + + return sivm_mdl.select + + + def factorize(self): + """ Factorize s.t. CUR = data + + Updated Values + -------------- + .C : updated values for C. + .U : updated values for U. + .R : updated values for R. + """ + # sample row and column indices that maximize the volume of the submatrix + self._rid = self.sample(self.data.transpose(), self._rrank) + self._cid = self.sample(self.data, self._crank) + + self._rcnt = np.ones(len(self._rid)) + self._ccnt = np.ones(len(self._cid)) + + self.computeUCR() + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/sivm_gsat.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,185 @@ +#!/usr/bin/python2.6 +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Simplex Volume Maximization [1] + + SIVM_GSAT: class for gsat-SiVM + +[1] C. Thurau, K. Kersting, and C. Bauckhage. Yes We Can - Simplex Volume +Maximization for Descriptive Web-Scale Matrix Factorization. In Proc. Int. +Conf. on Information and Knowledge Management. ACM. 2010. +""" + + +import logging +import numpy as np +from dist import * +from vol import cmdet +from sivm import SIVM + +__all__ = ["SIVM_GSAT"] + +class SIVM_GSAT(SIVM): + """ + SIVM(data, num_bases=4, dist_measure='l2') + + + Simplex Volume Maximization. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to convexity. W is iteratively + found by maximizing the volume of the resulting simplex (see [1]). Can be + applied to data streams using the .online_update_w(vec) function which decides + on adding data sample "vec" to the already selected basis vectors. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + dist_measure : one of 'l2' ,'cosine', 'l1', 'kl' + Standard is 'l2' which maximizes the volume of the simplex. In contrast, + 'cosine' maximizes the volume of a cone (see [1] for details). + init : string (default: 'fastmap') + 'fastmap' or 'origin'. Sets the method used for finding the very first + basis vector. 'Origin' assumes the zero vector, 'Fastmap' picks one of + the two vectors that have the largest pairwise distance. + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying SIVM to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> sivm_mdl = SIVM_GSAT(data, num_bases=2) + >>> sivm_mdl.factorize() + + The basis vectors are now stored in sivm_mdl.W, the coefficients in sivm_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to sivm_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> sivm_mdl = SIVM_GSAT(data, num_bases=2) + >>> sivm_mdl.W = W + >>> sivm_mdl.factorize(compute_w=False) + + The result is a set of coefficients sivm_mdl.H, s.t. data = W * sivm_mdl.H. + """ + + def init_w(self): + self.select = range(self._num_bases) + self.W = self.data[:, self.select] + + def online_update_w(self, vec): + # update D if it does not exist + k = self._num_bases + if not hasattr(self, 'D'): + self.D = np.zeros((k + 1, k + 1)) + self.D[:k, :k] = pdist(self.W, self.W) + self.V = cmdet(self.D[:k, :k]) + + tmp_d = self._distfunc(self.W, vec.reshape((-1,1))) + self.D[k, :-1] = tmp_d + self.D[:-1, k] = tmp_d + + v = np.zeros((self._num_bases + 1)) + + for i in range(self._num_bases): + # compute volume for each combination... + s = np.setdiff1d(range(self._num_bases + 1), [i]) + v[i] = cmdet((self.D[s,:])[:,s]) + + # select index that maximizes the volume + v[-1] = self.V + s = np.argmax(v) + + if s < self._num_bases: + self.W[:,s] = vec + self.D[:self._num_bases, :self._num_bases] = pdist(self.W, self.W) + + if not hasattr(self, '_v'): + self._v = [self.V] + self.V = v[s] + self._v.append(v[s]) + + self._logger.info('Volume increased:' + str(self.V)) + return True, s + + return False,-1 + + def update_w(self): + n = np.int(np.floor(np.random.random() * self._num_samples)) + if n not in self.select: + updated, s = self.online_update_w(self.data[:,n]) + if updated: + self.select[s] = n + self._logger.info('Current selection:' + str(self.select)) + + + def factorize(self, show_progress=False, compute_w=True, compute_h=True, + compute_err=True, niter=1): + """ Factorize s.t. WH = data + + Parameters + ---------- + show_progress : bool + print some extra information to stdout. + niter : int + number of iterations. + compute_h : bool + iteratively update values for H. + compute_w : bool + iteratively update values for W. + compute_err : bool + compute Frobenius norm |data-WH| after each update and store + it to .ferr[k]. + + Updated Values + -------------- + .W : updated values for W. + .H : updated values for H. + .ferr : Frobenius norm |data-WH|. + """ + if show_progress: + self._logger.setLevel(logging.INFO) + else: + self._logger.setLevel(logging.ERROR) + + # create W and H if they don't already exist + # -> any custom initialization to W,H should be done before + if not hasattr(self,'W'): + self.init_w() + + if not hasattr(self,'H'): + self.init_h() + + if compute_err: + self.ferr = np.zeros(niter) + + for i in xrange(niter): + if compute_w: + self.update_w() + + if compute_h: + self.update_h() + + if compute_err: + self.ferr[i] = self.frobenius_norm() + self._logger.info('Iteration ' + str(i+1) + '/' + str(niter) + + ' FN:' + str(self.ferr[i])) + else: + self._logger.info('Iteration ' + str(i+1) + '/' + str(niter)) + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/sivm_search.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,165 @@ +#!/usr/bin/python2.6 +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Simplex Volume Maximization [1] + + SIVM_SEARCH: class for search-SiVM + +[1] C. Thurau, K. Kersting, and C. Bauckhage. Yes We Can - Simplex Volume +Maximization for Descriptive Web-Scale Matrix Factorization. In Proc. Int. +Conf. on Information and Knowledge Management. ACM. 2010. +""" + + +import scipy.sparse +import numpy as np +from scipy import inf +try: + from scipy.misc.common import factorial +except: + from scipy.misc import factorial + +from dist import * +from vol import * +from sivm import SIVM + +__all__ = ["SIVM_SEARCH"] + +class SIVM_SEARCH(SIVM): + """ + SIVM_SEARCH(data, num_bases=4, dist_measure='l2') + + + Simplex Volume Maximization. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to convexity. W is iteratively + found by maximizing the volume of the resulting simplex (see [1]). A solution + is found by employing a simple A-star like search strategy. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + dist_measure : one of 'l2' ,'cosine', 'l1', 'kl' + Standard is 'l2' which maximizes the volume of the simplex. In contrast, + 'cosine' maximizes the volume of a cone (see [1] for details). + init : string (default: 'fastmap') + 'fastmap' or 'origin'. Sets the method used for finding the very first + basis vector. 'Origin' assumes the zero vector, 'Fastmap' picks one of + the two vectors that have the largest pairwise distance. + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying SIVM to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> sivm_mdl = SIVM_SEARCH(data, num_bases=2) + >>> sivm_mdl.factorize() + + The basis vectors are now stored in sivm_mdl.W, the coefficients in sivm_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to sivm_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> sivm_mdl = SIVM_SEARCH(data, num_bases=2) + >>> sivm_mdl.W = W + >>> sivm_mdl.factorize(compute_w=False) + + The result is a set of coefficients sivm_mdl.H, s.t. data = W * sivm_mdl.H. + """ + + def update_w(self): + def h(sel,D,k): + # compute the volume for a selection of sel columns + # and a k-1 simplex (-> k columns have to be selected) + mv = np.max(D) + + # fill the remaining distance by the maximal overall found distance + d = np.zeros((k,k)) + mv + for i in range(k): + d[i,i] = 0.0 + + for idx_i,i in enumerate(sel): + for idx_j,j in enumerate(sel): + d[idx_i,idx_j] = D[i,j] + + return d + + # compute distance matrix -> required for the volume + D = pdist(self.data, self.data) + Openset = {} + + for i in range(self._num_samples): + # compute volume for temp selection + d = h([i],D,self._num_bases) + Vtmp = cmdet(d) + Openset[tuple([i])] = Vtmp + + Closedset = {} + finished = False + self._v = [] + self.init_sivm() + next_sel = np.array([self.select[0]]) + iter = 0 + + while not finished: + # add the current selection to closedset + Closedset[(tuple(next_sel))] = [] + + for i in range(D.shape[0]): + # create a temp selection + tmp_sel = np.array(next_sel).flatten() + tmp_sel = np.concatenate((tmp_sel, [i]),axis=0) + tmp_sel = np.unique(tmp_sel) + tmp_sel = list(tmp_sel) + hkey = tuple(tmp_sel) + + if len(tmp_sel) > len(next_sel) and ( + not Closedset.has_key(hkey)) and ( + not Openset.has_key(hkey)): + + # compute volume for temp selection + d = h(tmp_sel, D, self._num_bases) + Vtmp = cmdet(d) + + # add to openset + Openset[hkey] = Vtmp + + # get next best tuple + vmax = 0.0 + for (k,v) in Openset.iteritems(): + if v > vmax: + next_sel = k + vmax = v + + self._logger.info('Iter:' + str(iter)) + self._logger.info('Current selection:' + str(next_sel)) + self._logger.info('Current volume:' + str(vmax)) + self._v.append(vmax) + + # remove next_sel from openset + Openset.pop(next_sel) + + if len(list(next_sel)) == self._num_bases: + finished = True + iter += 1 + + # update some values ... + self.select = list(next_sel) + self.W = self.data[:, self.select] + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/sivm_sgreedy.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,139 @@ +#!/usr/bin/python2.6 +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Simplex Volume Maximization [1] + + SIVM_SGREEDY: class for greedy-search SiVM + +[1] C. Thurau, K. Kersting, and C. Bauckhage. Yes We Can - Simplex Volume +Maximization for Descriptive Web-Scale Matrix Factorization. In Proc. Int. +Conf. on Information and Knowledge Management. ACM. 2010. +""" + + +import numpy as np +import time + +from dist import * +from vol import * +from sivm_search import SIVM_SEARCH + +__all__ = ["SIVM_SGREEDY"] + +class SIVM_SGREEDY(SIVM_SEARCH): + """ + SIVM(data, num_bases=4, niter=100, show_progress=True, compW=True) + + + Simplex Volume Maximization. Factorize a data matrix into two matrices s.t. + F = | data - W*H | is minimal. H is restricted to convexity. W is iteratively + found by maximizing the volume of the resulting simplex (see [1]). A solution + is found by employing a simple greedy max-vol strategy. + + Parameters + ---------- + data : array_like + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + niter: int, optional + Number of iterations of the alternating optimization. + 100 (default) + show_progress: bool, optional + Print some extra information + False (default) + compW: bool, optional + Compute W (True) or only H (False). Useful for using basis vectors + from another convexity constrained matrix factorization function + (e.g. svmnmf) (if set to "True" niter can be set to "1") + compH: bool, optional + Compute H (True) or only H (False). Useful for using precomputed + basis vectors. + dist_measure: string, optional + The distance measure for finding the next best candidate that + maximizes the simplex volume ['l2','l1','cosine','sparse_graph_l2'] + 'l2' (default) + optimize_lower_bound: bool, optional + Use the alternative selection criterion that optimizes the lower + bound (see [1]) + False (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + + ferr : frobenius norm (after applying .factoriz()) + + Example + ------- + Applying SIVM to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> sivm_mdl = SIVM_SGREEDY(data, num_bases=2, niter=10) + >>> sivm_mdl.initialization() + >>> sivm_mdl.factorize() + + The basis vectors are now stored in sivm_mdl.W, the coefficients in sivm_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to sivm_mdl.W, and set compW to False: + + >>> data = np.array([[1.5, 1.3], [1.2, 0.3]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> sivm_mdl = SIVM_SGREEDY(data, num_bases=2, niter=1, compW=False) + >>> sivm_mdl.initialization() + >>> sivm_mdl.W = W + >>> sivm_mdl.factorize() + + The result is a set of coefficients sivm_mdl.H, s.t. data = W * sivm_mdl.H. + """ + + def update_w(self): + # compute distance matrix -> requiresd for the volume + self.init_sivm() + next_sel = list([self.select[0]]) + self.select = [] + + self._v = [] + self._t = [] + stime = time.time() + + for iter in range(self._num_bases-1): + # add new selections to openset + next_sel = list(np.sort(next_sel)) + D = pdist(self.data[:, next_sel], self.data[:, next_sel]) + V = np.zeros(self.data.shape[1]) + d = np.zeros((D.shape[0]+1,D.shape[1]+1)) + d[:D.shape[0], :D.shape[1]] = D[:,:] + + for i in range(self.data.shape[1]): + # create a temp selection + dtmp = l2_distance(self.data[:,next_sel], self.data[:,i:i+1]) + d[:-1,-1] = dtmp + d[-1,:-1] = dtmp + # compute volume for temp selection + V[i] = cmdet(d) + + next_index = np.argmax(V) + next_sel.append(next_index) + self._v.append(np.max(V)) + + self._logger.info('Iter:' + str(iter)) + self._logger.info('Current selection:' + str(next_sel)) + self._logger.info('Current volume:' + str(self._v[-1])) + self._t.append(time.time() - stime) + + # update some values ... + self.select = list(next_sel) + self.W = self.data[:, self.select] + + + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/snmf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,95 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Semi Non-negative Matrix Factorization. + + SNMF(NMF) : Class for semi non-negative matrix factorization + +[1] Ding, C., Li, T. and Jordan, M.. Convex and Semi-Nonnegative Matrix Factorizations. +IEEE Trans. on Pattern Analysis and Machine Intelligence 32(1), 45-55. +""" + + + +import numpy as np + +from nmf import NMF + +__all__ = ["SNMF"] + +class SNMF(NMF): + """ + SNMF(data, num_bases=4) + + Semi Non-negative Matrix Factorization. Factorize a data matrix into two + matrices s.t. F = | data - W*H | is minimal. + + Parameters + ---------- + data : array_like, shape (_data_dimension, _num_samples) + the input data + num_bases: int, optional + Number of bases to compute (column rank of W and row rank of H). + 4 (default) + + Attributes + ---------- + W : "data_dimension x num_bases" matrix of basis vectors + H : "num bases x num_samples" matrix of coefficients + ferr : frobenius norm (after calling .factorize()) + + Example + ------- + Applying Semi-NMF to some rather stupid data set: + + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> snmf_mdl = SNMF(data, num_bases=2) + >>> snmf_mdl.factorize(niter=10) + + The basis vectors are now stored in snmf_mdl.W, the coefficients in snmf_mdl.H. + To compute coefficients for an existing set of basis vectors simply copy W + to snmf_mdl.W, and set compute_w to False: + + >>> data = np.array([[1.5], [1.2]]) + >>> W = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> snmf_mdl = SNMF(data, num_bases=2) + >>> snmf_mdl.W = W + >>> snmf_mdl.factorize(niter=1, compute_w=False) + + The result is a set of coefficients snmf_mdl.H, s.t. data = W * snmf_mdl.H. + """ + + + def update_w(self): + W1 = np.dot(self.data[:,:], self.H.T) + W2 = np.dot(self.H, self.H.T) + self.W = np.dot(W1, np.linalg.inv(W2)) + + def update_h(self): + def separate_positive(m): + return (np.abs(m) + m)/2.0 + + def separate_negative(m): + return (np.abs(m) - m)/2.0 + + XW = np.dot(self.data[:,:].T, self.W) + + WW = np.dot(self.W.T, self.W) + WW_pos = separate_positive(WW) + WW_neg = separate_negative(WW) + + XW_pos = separate_positive(XW) + H1 = (XW_pos + np.dot(self.H.T, WW_neg)).T + + XW_neg = separate_negative(XW) + H2 = (XW_neg + np.dot(self.H.T,WW_pos)).T + 10**-9 + + self.H *= np.sqrt(H1/H2) + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/sub.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,227 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Matrix sampling methods + + SUB: apply one of the matrix factorization methods of PyMF + on sampled data for computing W, then compute H. + +Copyright (C) Christian Thurau, 2010. GNU General Public License (GPL). +""" + + + +import numpy as np +import random +#from itertools import combinations +from chnmf import combinations + +import dist +from chnmf import quickhull +from nmf import NMF +from pca import PCA +from kmeans import Kmeans +from laesa import LAESA +from sivm import SIVM + +__all__ = ["SUB"] + +class SUB(NMF): + """ + SUB(data, mfmethod, sstrategy='rand', nsub=20, show_progress=True, mapW=False, + base_sel=2, num_bases=3 , niterH=1, niter=100, compute_h=True, compute_w=True, ) + + Evaluate a matrix factorization method "mfmethod" for a certain sampling + strategy "sstrategy". This is particular useful for very large datasets. + + Parameters + ---------- + todo ... + + Attributes + ---------- + todo .... + """ + + def __init__(self, data, mfmethod, nsub=20, show_progress=True, mapW=False, base_sel=2, + num_bases=3 , niterH=1, compute_h=True, compute_w=True, sstrategy='rand'): + NMF.__init__(self, data, num_bases=num_bases, compute_h=compute_h, show_progress=show_progress, compute_w=compute_w) + + self._niterH = niterH + self._nsub = nsub + self.data = data + self._mfmethod = mfmethod + self._mapW = mapW + self._sstrategy = sstrategy + self._base_sel = base_sel + + # assign the correct distance function + if self._sstrategy == 'cur': + self._subfunc = self.curselect + + elif self._sstrategy == 'kmeans': + self._subfunc = self.kmeansselect + + elif self._sstrategy == 'hull': + self._subfunc = self.hullselect + + elif self._sstrategy == 'laesa': + self._subfunc = self.laesaselect + + elif self._sstrategy == 'sivm': + self._subfunc = self.sivmselect + + else: + self._subfunc = self.randselect + + def hullselect(self): + + def selectHullPoints(data, n=20): + """ select data points for pairwise projections of the first n + dimensions """ + + # iterate over all projections and select data points + idx = np.array([]) + + # iterate over some pairwise combinations of dimensions + for i in combinations(range(n), 2): + + # sample convex hull points in 2D projection + convex_hull_d = quickhull(data[i, :].T) + + # get indices for convex hull data points + idx = np.append(idx, dist.vq(data[i, :], convex_hull_d.T)) + idx = np.unique(idx) + + return np.int32(idx) + + + # determine convex hull data points only if the total + # amount of available data is >50 + #if self.data.shape[1] > 50: + pcamodel = PCA(self.data, show_progress=self._show_progress) + pcamodel.factorize() + + idx = selectHullPoints(pcamodel.H, n=self._base_sel) + + # set the number of subsampled data + self.nsub = len(idx) + + return idx + + def kmeansselect(self): + kmeans_mdl = Kmeans(self.data, num_bases=self._nsub) + kmeans_mdl.initialization() + kmeans_mdl.factorize() + + # pick data samples closest to the centres + idx = dist.vq(kmeans_mdl.data, kmeans_mdl.W) + return idx + + def curselect(self): + def sample_probability(): + dsquare = self.data[:,:]**2 + + pcol = np.array(dsquare.sum(axis=0)) + pcol /= pcol.sum() + + return (pcol.reshape(-1,1)) + + probs = sample_probability() + prob_cols = np.cumsum(probs.flatten()) #.flatten() + temp_ind = np.zeros(self._nsub, np.int32) + + for i in range(self._nsub): + tempI = np.where(prob_cols >= np.random.rand())[0] + temp_ind[i] = tempI[0] + + return np.sort(temp_ind) + + def sivmselect(self): + sivmmdl = SIVM(self.data, num_bases=self._nsub, compute_w=True, compute_h=False, dist_measure='cosine') + + sivmmdl.initialization() + sivmmdl.factorize() + idx = sivmmdl.select + return idx + + def laesaselect(self): + laesamdl = LAESA(self.data, num_bases=self._nsub, compute_w=True, compute_h=False, dist_measure='cosine') + laesamdl.initialization() + laesamdl.factorize() + idx = laesamdl.select + return idx + + + def randselect(self): + idx = random.sample(xrange(self._num_samples), self._nsub) + return np.sort(np.int32(idx)) + + def update_w(self): + + idx = self._subfunc() + idx = np.sort(np.int32(idx)) + + + mdl_small = self._mfmethod(self.data[:, idx], + num_bases=self._num_bases, + show_progress=self._show_progress, + compute_w=True) + + # initialize W, H, and beta + mdl_small.initialization() + + # determine W + mdl_small.factorize() + + + self.mdl = self._mfmethod(self.data[:, :], + num_bases=self._num_bases , + show_progress=self._show_progress, + compute_w=False) + + + self.mdl.initialization() + + if self._mapW: + # compute pairwise distances + #distance = vq(self.data, self.W) + _Wmapped_index = dist.vq(self.mdl.data, mdl_small.W) + + # do not directly assign, i.e. Wdist = self.data[:,sel] + # as self might be unsorted (in non ascending order) + # -> sorting sel would screw the matching to W if + # self.data is stored as a hdf5 table (see h5py) + for i,s in enumerate(_Wmapped_index): + self.mdl.W[:,i] = self.mdl.data[:,s] + else: + self.mdl.W = np.copy(mdl_small.W) + + def update_h(self): + self.mdl.factorize() + + def factorize(self): + """Do factorization s.t. data = dot(dot(data,beta),H), under the convexity constraint + beta >=0, sum(beta)=1, H >=0, sum(H)=1 + """ + # compute new coefficients for reconstructing data points + self.update_w() + + # for CHNMF it is sometimes useful to only compute + # the basis vectors + if self._compute_h: + self.update_h() + + self.W = self.mdl.W + self.H = self.mdl.H + + self.ferr = np.zeros(1) + self.ferr[0] = self.mdl.frobenius_norm() + self._print_cur_status(' Fro:' + str(self.ferr[0])) + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/svd.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,242 @@ +#!/usr/bin/python +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF Singular Value Decomposition. + + SVD : Class for Singular Value Decomposition + pinv() : Compute the pseudoinverse of a Matrix + +""" + + + +from numpy.linalg import eigh +import scipy.sparse + +try: + import scipy.sparse.linalg.eigen.arpack as linalg +except (ImportError, AttributeError): + import scipy.sparse.linalg as linalg + + +import numpy as np + +def pinv(A, k=-1, eps=10**-8): + # Compute Pseudoinverse of a matrix + # calculate SVD + svd_mdl = SVD(A, k=k) + svd_mdl.factorize() + + S = svd_mdl.S + Sdiag = S.diagonal() + Sdiag = np.where(Sdiag >eps, 1.0/Sdiag, 0.0) + + for i in range(S.shape[0]): + S[i,i] = Sdiag[i] + + if scipy.sparse.issparse(A): + A_p = svd_mdl.V.T * (S * svd_mdl.U.T) + else: + A_p = np.dot(svd_mdl.V.T, np.core.multiply(np.diag(S)[:,np.newaxis], svd_mdl.U.T)) + + return A_p + + +class SVD(): + """ + SVD(data, show_progress=False) + + + Singular Value Decomposition. Factorize a data matrix into three matrices s.t. + F = | data - USV| is minimal. U and V correspond to eigenvectors of the matrices + data*data.T and data.T*data. + + Parameters + ---------- + data : array_like [data_dimension x num_samples] + the input data + + Attributes + ---------- + U,S,V : submatrices s.t. data = USV + + Example + ------- + >>> import numpy as np + >>> data = np.array([[1.0, 0.0, 2.0], [0.0, 1.0, 1.0]]) + >>> svd_mdl = SVD(data, show_progress=False) + >>> svd_mdl.factorize() + """ + + _EPS=10**-8 + + def __init__(self, data, k=-1, rrank=0, crank=0): + self.data = data + (self._rows, self._cols) = self.data.shape + if rrank > 0: + self._rrank = rrank + else: + self._rrank = self._rows + + if crank > 0: + self._crank = crank + else: + self._crank = self._cols + + # set the rank to either rrank or crank + self._k = k + + def frobenius_norm(self): + """ Frobenius norm (||data - USV||) for a data matrix and a low rank + approximation given by SVH using rank k for U and V + + Returns: + frobenius norm: F = ||data - USV|| + """ + if scipy.sparse.issparse(self.data): + err = self.data - self.U*self.S*self.V + err = err.multiply(err) + err = np.sqrt(err.sum()) + else: + err = self.data[:,:] - np.dot(np.dot(self.U, self.S), self.V) + err = np.sqrt(np.sum(err**2)) + + return err + + + def factorize(self): + def _right_svd(): + AA = np.dot(self.data[:,:], self.data[:,:].T) + values, u_vectors = eigh(AA) + + # get rid of too low eigenvalues + u_vectors = u_vectors[:, values > self._EPS] + values = values[values > self._EPS] + + # sort eigenvectors according to largest value + idx = np.argsort(values) + values = values[idx[::-1]] + + # argsort sorts in ascending order -> access is backwards + self.U = u_vectors[:,idx[::-1]] + + # compute S + self.S = np.diag(np.sqrt(values)) + + # and the inverse of it + S_inv = np.diag(np.sqrt(values)**-1) + + # compute V from it + self.V = np.dot(S_inv, np.dot(self.U[:,:].T, self.data[:,:])) + + + def _left_svd(): + AA = np.dot(self.data[:,:].T, self.data[:,:]) + values, v_vectors = eigh(AA) + + # get rid of too low eigenvalues + v_vectors = v_vectors[:, values > self._EPS] + values = values[values > self._EPS] + + # sort eigenvectors according to largest value + # argsort sorts in ascending order -> access is backwards + idx = np.argsort(values)[::-1] + values = values[idx] + + # compute S + self.S= np.diag(np.sqrt(values)) + + # and the inverse of it + S_inv = np.diag(1.0/np.sqrt(values)) + + Vtmp = v_vectors[:,idx] + + self.U = np.dot(np.dot(self.data[:,:], Vtmp), S_inv) + self.V = Vtmp.T + + def _sparse_right_svd(): + ## for some reasons arpack does not allow computation of rank(A) eigenvectors (??) # + AA = self.data*self.data.transpose() + if self.data.shape[0] > 1: + # do not compute full rank if desired + if self._k > 0 and self._k < self.data.shape[0]-1: + k = self._k + else: + k = self.data.shape[0]-1 + + values, u_vectors = linalg.eigen_symmetric(AA,k=k) + else: + values, u_vectors = eigh(AA.todense()) + + # get rid of too low eigenvalues + u_vectors = u_vectors[:, values > self._EPS] + values = values[values > self._EPS] + + # sort eigenvectors according to largest value + idx = np.argsort(values) + values = values[idx[::-1]] + + # argsort sorts in ascending order -> access is backwards + self.U = scipy.sparse.csc_matrix(u_vectors[:,idx[::-1]]) + + # compute S + self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) + + # and the inverse of it + S_inv = scipy.sparse.csc_matrix(np.diag(1.0/np.sqrt(values))) + + # compute V from it + self.V = self.U.transpose() * self.data + self.V = S_inv * self.V + + def _sparse_left_svd(): + # for some reasons arpack does not allow computation of rank(A) eigenvectors (??) + AA = self.data.transpose()*self.data + + if self.data.shape[1] > 1: + # do not compute full rank if desired + if self._k > 0 and self._k < self.data.shape[1]-1: + k = self._k + else: + k = self.data.shape[1]-1 + values, v_vectors = linalg.eigen_symmetric(AA,k=k) + else: + values, v_vectors = eigh(AA.todense()) + # get rid of too low eigenvalues + v_vectors = v_vectors[:, values > self._EPS] + values = values[values > self._EPS] + + # sort eigenvectors according to largest value + idx = np.argsort(values) + values = values[idx[::-1]] + + # argsort sorts in ascending order -> access is backwards + self.V = scipy.sparse.csc_matrix(v_vectors[:,idx[::-1]]) + + # compute S + self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) + + # and the inverse of it + S_inv = scipy.sparse.csc_matrix(np.diag(1.0/np.sqrt(values))) + + self.U = self.data * self.V * S_inv + self.V = self.V.transpose() + + + if self._rows > self._cols: + if scipy.sparse.issparse(self.data): + _sparse_left_svd() + else: + _left_svd() + else: + if scipy.sparse.issparse(self.data): + _sparse_right_svd() + else: + _right_svd() + +if __name__ == "__main__": + import doctest + doctest.testmod()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pymf/vol.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,40 @@ +#!/usr/bin/python2.6 +# +# Copyright (C) Christian Thurau, 2010. +# Licensed under the GNU General Public License (GPL). +# http://www.gnu.org/licenses/gpl.txt +""" +PyMF functions for computing matrix/simplex volumes + + cmdet(): Cayley-Menger Determinant + simplex_volume(): Ordinary simplex volume + +""" + + +import numpy as np +try: + from scipy.misc.common import factorial +except: + from scipy.misc import factorial + +__all__ = ["cmdet", "simplex"] + +def cmdet(d): + # compute the CMD determinant of the euclidean distance matrix d + # -> d should not be squared! + D = np.ones((d.shape[0]+1,d.shape[0]+1)) + D[0,0] = 0.0 + D[1:,1:] = d**2 + j = np.float32(D.shape[0]-2) + f1 = (-1.0)**(j+1) / ( (2**j) * ((factorial(j))**2)) + cmd = f1 * np.linalg.det(D) + # sometimes, for very small values "cmd" might be negative ... + return np.sqrt(np.abs(cmd)) + +def simplex(d): + # compute the simplex volume using coordinates + D = np.ones((d.shape[0]+1, d.shape[1])) + D[1:,:] = d + vol = np.abs(np.linalg.det(D)) / factorial(d.shape[1] - 1) + return vol
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sf.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# coding: utf-8 +""" +This script identifies the boundaries of a given track using the Structural +Features method: + +Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. (2012). Unsupervised +Detection of Music Boundaries by Time Series Structure Features. +In Proc. of the 26th AAAI Conference on Artificial Intelligence +(pp. 1613–1619). + +Toronto, Canada. +""" + +__author__ = "Oriol Nieto" +__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" +__license__ = "GPL" +__version__ = "1.0" +__email__ = "oriol@nyu.edu" + + +import numpy as np +from scipy.spatial import distance +from scipy import signal +from scipy.ndimage import filters +import pylab as plt + +# Local stuff +from utils import SegUtil + + +def median_filter(X, M=8): + """Median filter along the first axis of the feature matrix X.""" + for i in xrange(X.shape[1]): + X[:, i] = filters.median_filter(X[:, i], size=M) + return X + + +def gaussian_filter(X, M=8, axis=0): + """Gaussian filter along the first axis of the feature matrix X.""" + for i in xrange(X.shape[axis]): + if axis == 1: + X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.) + elif axis == 0: + X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.) + return X + + +def compute_gaussian_krnl(M): + """Creates a gaussian kernel following Serra's paper.""" + g = signal.gaussian(M, M / 3., sym=True) + G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) + G[M / 2:, :M / 2] = -G[M / 2:, :M / 2] + G[:M / 2, M / 1:] = -G[:M / 2, M / 1:] + return G + + +def compute_nc(X): + """Computes the novelty curve from the structural features.""" + N = X.shape[0] + # nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's + + nc = np.zeros(N) + for i in xrange(N - 1): + nc[i] = distance.euclidean(X[i, :], X[i + 1, :]) + + # Normalize + nc += np.abs(nc.min()) + nc /= nc.max() + return nc + + +def pick_peaks(nc, L=16, offset_denom=0.1): + """Obtain peaks from a novelty curve using an adaptive threshold.""" + offset = nc.mean() * float(offset_denom) + th = filters.median_filter(nc, size=L) + offset + peaks = [] + for i in xrange(1, nc.shape[0] - 1): + # is it a peak? + if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: + # is it above the threshold? + if nc[i] > th[i]: + peaks.append(i) + #plt.plot(nc) + #plt.plot(th) + #for peak in peaks: + #plt.axvline(peak, color="m") + #plt.show() + return peaks + + +def circular_shift(X): + """Shifts circularly the X squre matrix in order to get a + time-lag matrix.""" + N = X.shape[0] + L = np.zeros(X.shape) + for i in xrange(N): + L[i, :] = np.asarray([X[(i + j) % N, j] for j in xrange(N)]) + return L + + +def embedded_space(X, m, tau=1): + """Time-delay embedding with m dimensions and tau delays.""" + N = X.shape[0] - int(np.ceil(m)) + Y = np.zeros((N, int(np.ceil(X.shape[1] * m)))) + for i in xrange(N): + rem = int((m % 1) * X.shape[1]) # Reminder for float m + Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(), + X[i + int(m), :rem])) + return Y + + +def segmentation(F): + """Main process.""" + + # Structural Features params + Mp = 32 # Size of the adaptive threshold for peak picking + od = 0.1 # Offset coefficient for adaptive thresholding + + M = 16 # Size of gaussian kernel in beats + m = 3 # Number of embedded dimensions + k = 0.06 # k*N-nearest neighbors for the recurrence plot + + # Emedding the feature space (i.e. shingle) + E = embedded_space(F, m) + + # Recurrence matrix + R = utils.recurrence_matrix(E.T, k=k * int(F.shape[0]), + width=0, # zeros from the diagonal + metric="seuclidean", + sym=True).astype(np.float32) + + # Check size in case the track is too short + if R.shape[0] > 0: + # Circular shift + L = circular_shift(R) + + # Obtain structural features by filtering the lag matrix + SF = gaussian_filter(L.T, M=M, axis=1) + SF = gaussian_filter(L.T, M=1, axis=0) + # plt.imshow(SF.T, interpolation="nearest", aspect="auto"); plt.show() + + # Compute the novelty curve + nc = compute_nc(SF) + + # Find peaks in the novelty curve + est_bounds = pick_peaks(nc, L=Mp, offset_denom=od) + + # Re-align embedded space + est_bound_idxs = np.asarray(est_bounds) + int(np.ceil(m / 2.)) + else: + est_bound_idxs = [] + + if len(est_bound_idxs) == 0: + est_bound_idxs = np.asarray([0]) # Return first one + + return est_bound_idxs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/ComputationCache.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +ComputationCache.py + +Created by George Fazekas on 2014-09-21. +Copyright (c) 2014 . All rights reserved. + +These methods can be used to decorate any function with a caching mechanism to avoid repeating long computations during experimentation. + +Example: + +m = Meta() +m.cache = True +m.cache_file_base = "filname-with-unique-parameters.txt" + +@with_cache(meta) +def some_heavy_function(): + ... lots of computation here... + return numpy_array + + +Once decorated, some_heavy_function() will only execute once, then the results will be loaded from a file as long as the parametrs +encoded in cache_file_base don't change. The parameters and function name are appended to the cache file name so the decorator +is safe to use for multiple computationally intense functions. + +The decorator assumes the heavy function returns a singke numpy array or matrix. + +""" + +import cPickle +import numpy as np +from os.path import join, isdir, dirname + +class Meta(object): + __slots__ = ["cache","cache_file_base","cache_location"] + + +def makedir(output_folder) : + '''Create a directory tree and set privileges to allow acces to multiple users.''' + if output_folder and not isdir(output_folder) : + try: + from os import makedirs + makedirs(output_folder,0o777) + except : + print "Failed to create directory: %s" %output_folder + import sys + sys.exit(-1) + pass + + +'''Generic decorator that caches function execution results.''' +def with_cache(meta): + def wrap(func): + def file_cache(*args, **kwargs): + if meta.cache : + file = "Cache-" + meta.cache_file_base + "-f_%s.txt" %func.__name__ + file = join(getattr(meta,"cache_location",""),file) + # print func, meta.cache, file + try : + print "Loading data from file <%s>" %file + return np.loadtxt(file) + except : + print "Loading from <%s> failed. Computing new results." %file + makedir(dirname(file)) + result = func(*args, **kwargs) + np.savetxt(file,result) + return result + else : + return func(*args, **kwargs) + return file_cache + return wrap + +def with_pickle_dump(meta): + def wrap(func): + def file_cache(*args, **kwargs): + if meta.cache : + file = "Cache-" + meta.cache_file_base + "-f_%s-pickle.txt" %func.__name__ + file = join(getattr(meta,"cache_location",""),file) + file = file.replace(" ","-") + # print func, meta.cache, file + try : + print "Loading data from file <%s>" %file + with open(file, 'r') as fh: + return cPickle.load(fh) + except : + print "Loading from <%s> failed. Computing new results." %file + makedir(dirname(file)) + result = func(*args, **kwargs) + with open(file, 'w') as f: + f.write(cPickle.dumps(result)) + return result + else : + return func(*args, **kwargs) + return file_cache + return wrap + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/GmmMetrics.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,417 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +GmmFeature.py + +Created by George Fazekas on 2014-05-30. +Copyright (c) 2014 . All rights reserved. +""" + +import sys, os, math, wave, struct, cPickle +from pprint import pprint +from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext +from itertools import * +import matplotlib.pyplot as plt +from numpy import sum,isnan, log, power, pi, exp, transpose +from numpy.random import rand +import numpy as np +from sklearn.mixture import GMM +from sklearn.metrics.pairwise import pairwise_distances +from scipy.linalg import * +from scipy.io.wavfile import read +# from wavebender import * +from gmmdist import skl_models + +outFile = '/Users/mitian/Documents/hg/seg/test/distance.txt' +audioDir = '/Users/mitian/Documents/hg/seg/audio' + + +class GaussianComponent(object): + '''GMM representations of data.''' + __slots__ = ['weight','means','covars'] + + def __getstate__(self): + d = {} + for obj in GaussianComponent.__slots__ : + d.update({obj:getattr(self,obj)}) + return d + + def __setstate__(self,d): + for k,v in d.iteritems() : + setattr(self,k,v) + +class AudioObj(object): + '''A class to store generated audio samples.''' + __slots__ = ['name', 'data'] + +class FeatureObj(object): + '''A class to store extracted features for audio samples.''' + __slots__ = ['audio', 'mean', 'std', 'sc', 'sl'] + +class DistanceObj(object): + '''A class to store calculated GMM distances.''' + __slots__ = ['audio1', 'audio2', 'nComponents', 'skl_diag', 'skl_full', 'c2', 'euclidean', 'bhattacharyya'] + + def save(self, filename=None): + '''Save distance objects.''' + if filename == None: + filenmae = outFile + f = open(filenmae,'a+') + f.write(cPickle.dumps(self.__dict__)) + f.close() + +class GmmDistance(object): + '''Calculating distances between two pdfs using different distance metrics.''' + + def __init__(self, feature_array, components = 1, save_data=False): + self.n_components = components + self.gmm = GMM(n_components = components, covariance_type='full') + feature_array = np.nan_to_num(feature_array) + feature_array[np.isinf(feature_array)] = 0.0 + feature_array[np.isnan(feature_array)] = 0.0 + self.model = self.gmm.fit(feature_array) + if save_data : + self.data = feature_array + else : + self.data = None + if not self.model.converged_ : + print "Warning: model fitting did not converge." + self.means = self.model.means_ + self.covars = self.model.covars_ + self.weights = self.model.weights_ + self.label = -1 # used only for clustering + self.components = [] + for c in range(self.n_components) : + g = GaussianComponent() + g.weight = self.weights[c] + g.means = self.means[c] + g.covars = self.covars[c] + self.components.append(g) + + def update(self): + for c in range(self.n_components) : + self.weights[c] = self.components[c].weight + self.means[c] = self.components[c].means + self.covars[c] = self.components[c].covars + + def __str__(self): + self.update() + return "GMM object:\nmeans:%(means)s\ncovariances:%(covars)s\nweights%(weights)s" %vars(self) + + def kl_div(self, p1, p2): + '''Compute KL divergence between p1 and p2 with diagonal covariances.''' + if p1.means is None or p2.means is None: + return np.finfo(np.float64).max + # d = ((0.5 * log(p2.covars**2/p1.covars**2)) - 0.5 + p1.covars**2/(2*p2.covars**2) + (abs(p2.means - p1.means)**2) / (2*p2.covars**2)) + d = -2.0 * p1.means.shape[0] + d += sum( (p1.covars / p2.covars) + (p2.covars / p1.covars) + ((p1.means - p2.means) * ((1.0 / p1.covars) + (1.0 / p2.covars)) * (p1.means - p2.means)), dtype = np.float64 ) + if isnan(d) : + return np.finfo(np.float64).max + return d + + def skl_distance_diag(self,other): + '''Estimate the symmetrised Kullback–Leibler divergence between this and an other model.''' + # TODO: the components are sorted so this works but still theoretically not quite correct to do this... + assert (self.n_components == other.n_components), "Number of components must be equal." + kl12 = kl21 = 0.0 + for i in range(self.n_components) : + kl12 += self.weights[i] * self.kl_div(self.components[i],other.components[i]) + kl21 += other.weights[i] * self.kl_div(other.components[i],self.components[i]) + return (kl12 + kl21 ) / 2.0 + + def skl_distance_full(self,other): + return skl_models(self,other) + + def skl_distance_full_orig(self,other): + '''Estimate the symmetrised Kullback–Leibler divergence between this and an other model.''' + n = len(self.components) # number of components + d = len(self.components[0].means) # number of dimensions + + ixm = np.ones((n,1),dtype=int).T # selector of mean matrix components + ixd = range(0,d*d,d+1) # indices of diagonal elements of DxD matrix + t1 = self.covars.swapaxes(1,2).reshape(d,n*d) # concatenate gmm1 covariance matrices + t2 = other.covars.swapaxes(1,2).reshape(d,n*d) # concatenate gmm2 covariance matrices + loopn = xrange(n) + + logdet1 = np.zeros((n,1)) + kl11 = np.zeros((n,n)) + for i in loopn : + # step 1) precompute log(determinant()) of covariance matrices of gmm1 + logdet1[i] = log(det(self.covars.swapaxes(0,2)[:,:,i])) + + # step 2) compute reference kldiv between individual components of gmm1 + inv1 = inv(self.covars.swapaxes(0,2)[:,:,i]) + mm1 = self.means - self.means[i*ixm,:][0] + b1 = np.dot(inv1,t1).swapaxes(0,1).reshape(n,power(d,2)).T + kl11[:,i] = 0.5 * ( (logdet1[i]-d-logdet1)[:,0] + sum(b1[ixd,:],0).T + sum(np.dot(mm1,inv1) * mm1,1)) + # print kl11 + + logdet2 = np.zeros((n,1)) + kl22 = np.zeros((n,n)) + for i in loopn : + # step 3) precompute log(determinant()) of covariance matrices of gmm2 + logdet2[i] = log(det(other.covars.swapaxes(0,2)[:,:,i])) + inv2 = inv(other.covars.swapaxes(0,2)[:,:,i]) + mm2 = other.means - other.means[i*ixm,:][0] + b2 = np.dot(inv2,t2).swapaxes(0,1).reshape(n,power(d,2)).T + kl22[:,i] = 0.5 * ( (logdet2[i]-d-logdet2)[:,0] + sum(b2[ixd,:],0).T + sum(np.dot(mm2,inv2) * mm2,1)) + + # step 4) compute pair-wise kldiv between components of gmm1 and gmm2 + kl12 = np.zeros((n,n)) + kl21 = np.zeros((n,n)) + for i in loopn : + inv1 = inv(self.covars.swapaxes(0,2)[:,:,i]) + inv2 = inv(other.covars.swapaxes(0,2)[:,:,i]) + m12 = self.means - other.means[i*ixm,:][0] + m21 = other.means - self.means[i*ixm,:][0] + b1 = np.dot(inv1,t1).swapaxes(0,1).reshape(n,power(d,2)).T + b2 = np.dot(inv2,t2).swapaxes(0,1).reshape(n,power(d,2)).T + kl12[:,i] = 0.5 * ( (logdet2[i]-d-logdet1)[:,0] + sum(b2[ixd,:],0).T + sum(np.dot(m12,inv2) * m12,1)) + kl21[:,i] = 0.5 * ( (logdet1[i]-d-logdet2)[:,0] + sum(b1[ixd,:],0).T + sum(np.dot(m21,inv1) * m21,1)) + + # step 5) compute the final variational distance between gmm1 and gmm2 + kl_full_12 = np.dot(self.weights.T, (log(sum(exp(-kl11)*self.weights,1))) - log(sum(exp(-kl12)*other.weights,1))) + kl_full_21 = np.dot(other.weights.T, (log(sum(exp(-kl22)*other.weights,1))) - log(sum(exp(-kl21)*self.weights,1))) + return (kl_full_12 + kl_full_21 ) / 2.0 + + def emd(self, p1, p2): + '''Compute earth movers distance between component p1 and p2''' + + pass + + def c2(self, p1, p2): + d = power(det(p1.covars), -0.5) * power(det(p2.covars), -0.5) + return d + + def c2_distance(self, other): + '''Compute the c2 pdf distance metric + G. Sfikas etc. An analytic distance metric for Gaussian Mixture Models with application in image retrieval. ICANN 2005. + ''' + d12 = d11 = d22 = 0.0 + for i in xrange(self.n_components) : + for j in xrange(self.n_components) : + V = inv(inv(self.covars[i]) + inv(other.covars[j])) + K = self.means[i].T.dot(inv(self.covars[i])) * (self.means[i] - other.means[j]) + other.means[j].T.dot(inv(other.covars[j])) * (other.means[j] - self.means[i]) + + d12 += sum(self.weights[i] * other.weights[j] * power((det(V) / (exp(K) * self.c2(self.components[i], other.components[j]))), 0.5)) + d11 += sum(self.weights[i] * other.weights[j] * power((det(V) / (exp(K) * self.c2(self.components[i], self.components[j]))), 0.5)) + d22 += sum(self.weights[i] * other.weights[j] * power((det(V) / (exp(K) * self.c2(other.components[i], other.components[j]))), 0.5)) + + dist = -log(2 * d12 / (d11 + d22)) + + if isnan(dist) : + return np.finfo(np.float64).max + return dist + + def euclidean(self, p1, p2): + '''Compute euclidean distance between p1 and p2''' + if p1.means is None or p2.means is None: + return np.finfo(np.float64).max + d = sum(power(2 * pi * (power(p1.covars, 2) + power(p2.covars, 2)), -0.5) * exp(-0.5 * power(p1.means - p2.means, 2) / (power(p1.covars, 2) + power(p2.covars, 2)))) + + if isnan(d) : + return np.finfo(np.float64).max + return d + + def euclidean_distance(self, other): + '''Compute the pdf distance metric''' + e11 = e22 = e12 = 0.0 + for i in range(self.n_components) : + e11 += self.weights[i] * self.weights[i] * self.euclidean(self.components[i],self.components[i]) + e22 += other.weights[i] * other.weights[i] * self.euclidean(other.components[i],other.components[i]) + e12 += self.weights[i] * other.weights[i] * self.euclidean(self.components[i],other.components[i]) + + dist = e11 + e22 - 2 * e12 + + if isnan(dist) : + return np.finfo(np.float64).max + return dist + + def bhattacharyya(self, p1, p2): + '''Compute the Bhattacharyya based distance following: + K. Fukunaga. Introduction to statistical pattern recognition. Academic Press 1990 + ''' + B_dev = 0.125 * ((p1.means - p2.means).T.dot( inv((p1.covars + p2.covars) / 2))).dot(p1.means - p2.means)\ + + 0.5 * log( abs(inv((p1.covars + p2.covars) / 2)) / power(abs(inv(p1.covars) * inv(p2.covars)), 0.5) ) + + d = sum(B_dev) + if isnan(d) : + return np.finfo(np.float64).max + return d + + def bhattacharyya_distance(self, other): + '''Compute the pdf distance metric''' + dist = 0.0 + for i in xrange(self.n_components) : + dist += self.weights[i] * other.weights[i] *self.bhattacharyya(self.components[i], other.components[i]) + return dist + + +class AudioGenerator(object): + '''Generate simple audio data (sinusoidal etc. and noise and their combinations).''' + + def sine(self, freq=440.0, framerate=44100, amplitude=0.5, length=0.02): + n = int(length * framerate) + return np.array([amplitude * math.sin(2.0*math.pi * float(freq) * (float(i)/float(framerate))) for i in xrange(n)]) + + def noise(self, framerate=44100, amplitude=0.1, length=0.02): + n = int(length * framerate) + return np.array([float(amplitude) * np.random.uniform(-1, 1) for i in xrange(n)]) + + def square(self, freq=440.0, framerate=44100, amplitude=0.5, length=0.02): + for s in self.sine(freq, framerate, amplitude, length): + if s > 0: + yield amplitude + elif s < 0: + yield -amplitude + else: + yield 0.0 + + def damped(self, freq=440.0, framerate=44100, amplitude=0.5, length=0.02): + n = int(length*framerate) + return (exp(-(float(i%n)/float(framerate))) * s for i, s in enumerate(self.sine(frequency, framerate, amplitude, length))) + + def write_wav_file(self, samples,filename) : + '''Create a wav file and write it to disk.''' + nchannels, sampwidth, framerate, nframes = 1, 2, 44100, len(samples) + max_amplitude = 32767.0 + w = wave.open(filename, 'w') + w.setparams((nchannels, sampwidth, framerate, nframes, 'NONE', 'not compressed')) + frames = str().join(struct.pack('h', int(max_amplitude * s)) for s in samples) + w.writeframesraw(frames) + w.close() + print "wav file: %s written." %filename + +class FeatureExtractor(object): + '''Extract low level features of input audio samples for compare distance computation.''' +def main(): + + # ag = AudioGenerator() + # fe = FeatureExtractor() + # + # # Create a "test1.wav" file which is 10s length and contains two sinusoids + noise + # length = 10.0 # unit = sG + # samples = ag.sine(440.0,length=length) + ag.sine(240.0,length=length) + ag.noise(length=length) + # samples = samples / (max(samples) + 0.05) + # ag.write_wav_file(samples, "audio/test1.wav") + # # Create a file indentical to 'test1.wav + # ag.write_wav_file(samples,"audio/test1a.wav") + # # Create a file with the same componentes as "test1.wav" but the freq of one sinusouid different + # samples = ag.sine(440.0,length=length) + ag.sine(480.0,length=length) + ag.noise(length=length) + # samples = samples / (max(samples) + 0.05) + # ag.write_wav_file(samples, "audio/test1b.wav") + # # Create a file with the same componentes as "test1.wav" but the freq of both sinusouids different + # samples = ag.sine(880.0,length=length) + ag.sine(480.0,length=length) + ag.noise(length=length) + # samples = samples / (max(samples) + 0.05) + # ag.write_wav_file(samples, "audio/test1c.wav") + # + # # Create a file with one more sinusouid componentes as "test1.wav" + # samples = ag.sine(440.0,length=length) + ag.sine(240.0,length=length) + ag.sine(880.0,length=length) + ag.noise(length=length) + # samples = samples / (max(samples)+0.05) + # ag.write_wav_file(samples, "audio/test2a.wav") + # # Create a file with changed freq and one more sinusouid componentes as "test1.wav" + # samples = ag.sine(440.0,length=length) + ag.sine(240.0,length=length) + ag.sine(1320.0,length=length) + ag.noise(length=length) + # samples = samples / (max(samples)+0.05) + # ag.write_wav_file(samples, "audio/test2b.wav") + # + # # Create a file with longer length than "test1.wav" + # samples = ag.sine(440.0,length=15) + ag.sine(240.0,length=15) + ag.noise(length=15) + # samples = samples / (max(samples)+0.05) + # ag.write_wav_file(samples, "audio/test3a.wav") + # # Create a file with longer length and one more sinusoid than "test1.wav" + # samples = ag.sine(440.0,length=15) + ag.sine(240.0,length=15) + ag.sine(880.0,length=15) + ag.noise(length=15) + # samples = samples / (max(samples)+0.05) + # ag.write_wav_file(samples, "audio/test3b.wav") + + + # plt.plot(samples[:1000]) + # plt.show() + + + # print "Testing Gaussian Feature: Generating random features." + feature_array_1 = np.zeros((20,100),dtype=np.float64) + feature_array_2 = np.zeros((20,100),dtype=np.float64) + + r = rand(2) + + for x in xrange(100) : + feature_array_1[:,x] = rand(20) + r[0] + feature_array_2[:,x] = rand(20) + r[1] + + f1 = GmmDistance(feature_array_1) + f2 = GmmDistance(feature_array_2) + + print f1,f2,"\n" + + print "KL2 distance between f1-f2 using diag covariance:", f1.skl_distance_diag(f2) + print "KL2 distance between f1-f1 using diag covariance:", f1.skl_distance_diag(f1) + print "KL2 distance between f2-f2 using diag covariance:", f2.skl_distance_diag(f2) + print "KL2 distance between f2-f1 using diag covariance:", f2.skl_distance_diag(f1) + print '\n' + print "KL2 distance between f1-f2 using full covariance:", f1.skl_distance_full(f2) + print "KL2 distance between f1-f1 using full covariance:", f1.skl_distance_full(f1) + print "KL2 distance between f2-f2 using full covariance:", f2.skl_distance_full(f2) + print "KL2 distance between f2-f1 using full covariance:", f2.skl_distance_full(f1) + print '\n' + print "c2 distance between f1-f2:", f1.c2_distance(f2) + print "c2 distance between f1-f1:", f1.c2_distance(f1) + print "c2 distance between f2-f2:", f2.c2_distance(f2) + print "c2 distance between f2-f1:", f2.c2_distance(f1) + print '\n' + print "euclidean distance between f1-f2:", f1.euclidean_distance(f2) + print "euclidean distance between f1-f1:", f1.euclidean_distance(f1) + print "euclidean distance between f2-f2:", f2.euclidean_distance(f2) + print "euclidean distance between f2-f1:", f2.euclidean_distance(f1) + print '\n' + print "bhattacharyya distance between f1-f2:", f1.bhattacharyya_distance(f2) + print "bhattacharyya distance between f1-f1:", f1.bhattacharyya_distance(f1) + print "bhattacharyya distance between f2-f2:", f2.bhattacharyya_distance(f2) + print "bhattacharyya distance between f2-f1:", f2.bhattacharyya_distance(f1) + + # ag = AudioGenerator() + # sound = ag.synthesisor(nChannels = 2, framerate=44100, amplitude=0.5, length=0.02) + # ag.write_wav_file(samples=sound, nframes=None, length=0.02, filename='audio/test.wav') + # + # l = dir(__builtins__) + # d = __builtins__.__dict__ + # pprint(l) + # pprint(d) + + # # Load audio data + # audio_files = [i for i in os.listdir(audioDir) if i.endswith('wav') and not i.startswith('.')] + # audio_list = [] + # for i in audio_files: + # ao = AudioObj() + # ao.name = splitext(i)[0] + # ao.data = np.array(read(join(audioDir, i))[1], dtype=float) + # audio_list.append(ao) + # + # # Calculate pairwise ditances between audio data using listed metrics + # res_list = [] + # for a1, a2 in combinations(audio_list, 2): + # # basic info + # res = DistanceObj() + # f1 = GmmDistance(a1.data) + # f2 = GmmDistance(a2.data) + # res.audio1 = a1.name + # res.audio2 = a2.name + # res.nComponents = len(f1.components) + # + # # distances between two samples should be symmetric + # res.skl_full = f1.skl_distance_full(f2) + # res.skl_diag = f1.skl_distance_diag(f2) + # res.c2 = f1.c2_distance(f2) + # res.euclidean = f1.euclidean_distance(f2) + # res.bhattacharyya = f1.bhattacharyya_distance(f2) + # + # res.save() + # res_list.append(res) + + + +if __name__ == '__main__': + main() + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/MutualInfo.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,249 @@ +import sys +import scipy.spatial as ss +from scipy.special import digamma,gamma +from math import log,pi +import numpy.random as nr +import numpy as np +import random +from sklearn.metrics.pairwise import pairwise_distances +from scipy.stats import ttest_ind, ttest_rel, pearsonr, norm +from scipy.linalg import eigh, cholesky + +def mi(x,y,k=3,base=2): + """ Mutual information of x and y + x,y should be a list of vectors, e.g. x = [[1.3],[3.7],[5.1],[2.4]] + if x is a one-dimensional scalar and we have four samples + """ + assert len(x)==len(y), "Lists should have same length" + assert k <= len(x) - 1, "Set k smaller than num. samples - 1" + intens = 1e-10 #small noise to break degeneracy, see doc. + + x = [list(p + intens*nr.rand(len(x[0]))) for p in x] + y = [list(p + intens*nr.rand(len(y[0]))) for p in y] + points = zip2(x,y) + #Find nearest neighbors in joint space, p=inf means max-norm + tree = ss.cKDTree(points) + dvec = [tree.query(point,k+1,p=float('inf'))[0][k] for point in points] + + a,b,c,d = avgdigamma(x,dvec), avgdigamma(y,dvec), digamma(k), digamma(len(x)) + + return (-a-b+c+d)/log(base) + +def mi2(x,y,k=3,base=2): + """ Mutual information of x and y + x,y should be a list of vectors, e.g. x = [[1.3],[3.7],[5.1],[2.4]] + if x is a one-dimensional scalar and we have four samples + """ + assert len(x)==len(y), "Lists should have same length" + assert k <= len(x) - 1, "Set k smaller than num. samples - 1" + intens = 1e-10 #small noise to break degeneracy, see doc. + + x += intens * nr.rand(len(x)) + y += intens * nr.rand(len(y)) + points = np.array([x,y]).T + + #Find nearest neighbors in joint space, p=inf means max-norm + tree = ss.cKDTree(points) + dvec = [tree.query(point,k+1,p=float('inf'))[0][k] for point in points] + a,b,c,d = avgdigamma(x[np.newaxis,:].T, dvec), avgdigamma(y[np.newaxis,:].T, dvec), digamma(k), digamma(len(x)) + + mi = (-a-b+c+d)/log(base) + if mi < 0: + return 0.0 + return mi + +def mi3(x,y,k=3,base=2): + """ Mutual information of x and y + x,y should be a list of vectors, e.g. x = [[1.3],[3.7],[5.1],[2.4]] + if x is a one-dimensional scalar and we have four samples + """ + if len(x) < 1000: + return mi2(x,y,k,base) + + intens = 1e-10 #small noise to break degeneracy, see doc. + + sampleSize = 500 + c = digamma(k) + d = digamma(sampleSize) + num_iter = 1 + int(len(x)/1000) + + mi_mean = np.zeros(num_iter,dtype=np.float64) + for i in xrange(num_iter): + ix = np.random.randint(low=0, high=len(x), size=sampleSize) + + xs = x[ix] + ys = y[ix] + xs += intens * nr.rand(len(xs)) + ys += intens * nr.rand(len(ys)) + points = np.array([xs,ys]).T + + #Find nearest neighbors in joint space, p=inf means max-norm + tree = ss.cKDTree(points) + dvec = [tree.query(point,k+1,p=float('inf'))[0][k] for point in points] + a,b = avgdigamma(xs[np.newaxis,:].T, dvec), avgdigamma(ys[np.newaxis,:].T, dvec) + + mi_mean[i] = (-a-b+c+d)/log(base) + + mi = np.mean(mi_mean) + if mi < 0: + return 0.0 + return mi + + +def mic(xs,ys,intens,s,k): + xs += intens * nr.rand(s) + ys += intens * nr.rand(s) + points = np.array([xs,ys]).T + tree = ss.cKDTree(points) + dvec = [tree.query(point,k+1,p=float('inf'))[0][k] for point in points] + return avgdigamma(xs[np.newaxis,:].T, dvec), avgdigamma(ys[np.newaxis,:].T, dvec) + + +def dmi(x,y,k=3,base=2): + ''' Mutual information distance between x and y.''' + + if np.array_equal(x, y): + return 0.0 + intens = 1e-10 #small noise to break degeneracy + c = digamma(k) + s = len(x) + lb = 1.0/log(base) + + # for small samples calculate mi directly + if s < 1000: + a,b = mic(x,y,intens,s,k) + d = digamma(s) + mx = (-c+d)*lb + nmi = (-a-b+c+d)*lb / mx + if nmi > 1 : nmi = 1.0 # handle the case when mi of correlated samples is overestimated + if nmi < 0 : nmi = 0.0 # handle estimation error resulting in small negative values + return 1.0-nmi + + sampleSize = 500 + num_iter = 1 + int(s/1000) + d = digamma(sampleSize) + + mi_mean = np.zeros(num_iter,dtype=np.float64) + for i in xrange(num_iter): + ix = np.random.randint(low = 0, high = s, size=sampleSize) + a,b = mic(x[ix],y[ix],intens,sampleSize,k) + mi_mean[i] = (-a-b+c+d)*lb + + mmi = np.mean(mi_mean) + mx = (-c+d)*lb + nmi = mmi / mx + print mmi,mx,nmi + + if nmi > 1 : nmi = 1.0 # handle the case when mi of correlated samples is overestimated + if nmi < 0 : nmi = 0.0 # handle estimation error resulting in small negative values + return 1.0-nmi + + +def avgdigamma(points,dvec): + #This part finds number of neighbors in some radius in the marginal space + #returns expectation value of <psi(nx)> + N = len(points) + tree = ss.cKDTree(points) + avg = 0. + for i in range(N): + dist = dvec[i] + #subtlety, we don't include the boundary point, + #but we are implicitly adding 1 to kraskov def bc center point is included + num_points = len(tree.query_ball_point(points[i],dist-1e-15,p=float('inf'))) + avg += digamma(num_points)/N + return avg + +def zip2(*args): + #zip2(x,y) takes the lists of vectors and makes it a list of vectors in a joint space + #E.g. zip2([[1],[2],[3]],[[4],[5],[6]]) = [[1,4],[2,5],[3,6]] + return [sum(sublist,[]) for sublist in zip(*args)] + + +def test_mi(num_samples=9000): + ''' + Generate correlated multivariate random variables: + ''' + + # num_samples = 9000 + + # Generate samples from three independent normally distributed random + # variables (with mean 0 and std. dev. 1). + X = norm.rvs(size=(3, num_samples)) + + # The desired covariance matrix. + r = np.array([ + [ 3.40, -2.75, -2.00], + [ -2.75, 5.50, 1.50], + [ -2.00, 1.50, 1.25] + ]) + + # Choice of cholesky or eigenvector method. + method = 'cholesky' + #method = 'eigenvectors' + + if method == 'cholesky': + # Compute the Cholesky decomposition. + c = cholesky(r, lower=True) + else: + # Compute the eigenvalues and eigenvectors. + evals, evecs = eigh(r) + # Construct c, so c*c^T = random. + c = np.dot(evecs, np.diag(np.sqrt(evals))) + + # Convert the data to correlated random variables. + Y1 = np.dot(c, X)[2,:] + Y2 = norm.rvs(size=(3, num_samples))[0,:] + X = X[0,:] + + xx = mi2(X, X) + xy1 = mi2(X, Y1) + xy2 = mi2(X, Y2) + print 'identical', xx + print 'correlated', xy1 + print 'uncorrelated', xy2 + + xx = mi3(X, X) + xy1 = mi3(X, Y1) + xy2 = mi3(X, Y2) + print 'identical', xx + print 'correlated', xy1 + print 'uncorrelated', xy2 + + xx = dmi(X, X) + xy1 = dmi(X, Y1) + xy2 = dmi(X, Y2) + print 'identical', xx + print 'correlated', xy1 + print 'uncorrelated', xy2 + + +def print_progress(counter="", message=""): + sys.stdout.write("%(counter)s: %(message)s" %vars()) + sys.stdout.flush() + sys.stdout.write("\r\r") + +def test_direct(num_samples): + X = norm.rvs(size=(1, num_samples))[0,:] + return mi2(X, X) + +def main(): + test_mi() + raise SystemExit + + import matplotlib.pyplot as plt + figure = plt.figure() + axis = figure.add_subplot(111) + series = np.linspace(100,25000,20) + # series = np.linspace(10,250,20) + + # result = [test_direct(int(x)) for x in series] + result = [] + for i,x in enumerate(series) : + print_progress(i) + result.append(test_direct(int(x))) + axis.plot(series,result) + plt.show() + # test_direct(1500) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/OnsetDetectBase.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,322 @@ +from vampy import * +from numpy import zeros,float64, float32, array +import sys +from scipy.signal import butter +import numpy as np + +''' +Define a common base class where we define the methods common to plugins without +a fusion process involved. +The base class implements adaptive whitening and onset location backtracking. +This also makes individual plugins easier to change / manage / overview. +''' + +class OnsetDetectBase(object): + # WARNING: Apparently vampy doesn't handle errors in super classes (bases) as gracefully as they are handeled + # in the single class scenario with no inheritnace. This is to be fixed later in vampy itself. + # For now syntax errors, missing imports, etc... are likely to cause segfault, without printing a detailed python traceback. + # However, the source of the error is still printed in debug mode, so at least we know which function to fix. + + def __init__(self,inputSampleRate): + self.vampy_flags = vf_DEBUG + + # basic common parameters + self.preferredStepSecs = 0.01161 + # self.preferredStepSecs = 0.02322 + self.inputSampleRate = inputSampleRate + self.stepSize = 0 + self.blockSize = 0 + self.channels = 0 + + # user configurable parameters + self.threshold = 50 + self.delta_threshold = 0.0 + self.backtracking_threshold = 1.9 + self.polyfitting_on = True + self.medfilter_on = True + self.LPfilter_on = True + self.whitening_on = False + self.simplePick = False + + # whitening + self.whitenRelaxCoeff = 0.9997 + self.whitenFloor = 0.01 + self.magPeaks = None + self.medianWin = 7 + self.aCoeffs = [1.0000, -0.5949, 0.2348] + self.bCoeffs = [0.1600, 0.3200, 0.1600] + self.cutoff = 0.34 + + + def initialise(self,channels,stepSize,blockSize): + self.channels = channels + self.stepSize = stepSize + self.blockSize = blockSize + self.half_length = self.blockSize * 0.5 + 1.0 + self.magPeaks = zeros(self.half_length, dtype = float64) + return True + + def reset(self): + self.magPeaks = None + return None + + def getMaker(self): + return 'Mi Tian, Testing' + + def getIdentifier(self): + return 'vampy-base' + + def getPreferredBlockSize(self): + '''Preferred window size is twice the preferred step size''' + # return 2048 + return int(self.getPreferredStepSize() * 2) + + def getPreferredStepSize(self): + '''Preferred block size is set to 256 in the QM Vamp plugin in case SR is 22.5kHz''' + step = int(self.inputSampleRate * self.preferredStepSecs + 0.0001) + if step < 1 : return 1 + return step + # return 1024 + + def getMaxChannelCount(self): + return 1 + + def getInputDomain(self): + return FrequencyDomain + + def getParameterDescriptors(self): + '''Define all common parameters of the plugins.''' + + threshold = ParameterDescriptor() + threshold.identifier ='threshold' + threshold.name ='Detection Sensitivity' + threshold.description = 'Detection Sensitivity' + threshold.unit = '%' + threshold.minValue = 0 + threshold.maxValue = 100 + threshold.defaultValue = 50 + threshold.isQuantized = False + + delta_thd = ParameterDescriptor() + delta_thd.identifier ='dthreshold' + delta_thd.name ='Delta Threshold' + delta_thd.description = 'Delta threshold used for adaptive theresholding using the median of the detection function' + delta_thd.unit = '' + delta_thd.minValue = -1.0 + delta_thd.maxValue = 1.0 + delta_thd.defaultValue = 0.0 + delta_thd.isQuantized = False + + # NOTE: GF: Not sure this should really be called a threshold. 'Tolerance' may be better. + bt_thd = ParameterDescriptor() + bt_thd.identifier ='bt-threshold' + bt_thd.name ='Backtracking Threshold' + bt_thd.description = 'Backtracking threshold used determine the stopping condition for backtracking the onset location' + bt_thd.unit = '' + bt_thd.minValue = -1.0 + bt_thd.maxValue = 3.0 + bt_thd.defaultValue = 1.9 + bt_thd.isQuantized = False + + cutoff = ParameterDescriptor() + cutoff.identifier ='cut-off' + cutoff.name ='cut off value' + cutoff.description = 'low pass filter cut off value' + cutoff.unit = '' + cutoff.minValue = 0.1 + cutoff.maxValue = 0.6 + cutoff.defaultValue = 0.34 + cutoff.isQuantized = False + + med_thd = ParameterDescriptor() + med_thd.identifier ='med-threshold' + med_thd.name ='Median filter window' + med_thd.description = 'Median filter window size' + med_thd.unit = '' + med_thd.minValue = 3.0 + med_thd.maxValue = 12.0 + med_thd.defaultValue = 7.0 + med_thd.isQuantized = True + med_thd.quantizeStep = 1 + + # save some typing by defining a descriptor type + boolDescriptor = ParameterDescriptor() + boolDescriptor.isQuantized = True + boolDescriptor.minValue= 0 + boolDescriptor.maxValue= 1 + boolDescriptor.quantizeStep = 1 + + polyfit = ParameterDescriptor(boolDescriptor) + polyfit.identifier='polyfit' + polyfit.name='polynomial fitting' + polyfit.description='Use polynomial fitting to evaluate detection function peaks.' + + medfilt = ParameterDescriptor(boolDescriptor) + medfilt.identifier='medfilt' + medfilt.name='median filtering' + medfilt.description='Use median filtering' + + filtfilt = ParameterDescriptor(boolDescriptor) + filtfilt.identifier='filtfilt' + filtfilt.name='low-pass filtering' + filtfilt.description='Use zero-phase foward-backward low-pass filtering' + + whitening = ParameterDescriptor(boolDescriptor) + whitening.identifier='whitening' + whitening.name='Adaptive whitening' + whitening.description='Turn adaptive whitening on or off' + whitening.defaultValue = False + + return ParameterList(threshold, delta_thd, bt_thd, cutoff, med_thd, whitening, polyfit, medfilt, filtfilt) + + + def setParameter(self,paramid,newval): + if paramid == 'threshold' : + self.threshold = newval + print >> sys.stderr, "sensitivity threshold: ", newval + if paramid == 'dthreshold' : + self.delta_threshold = newval + print >> sys.stderr, "delta threshold: ", newval + if paramid == 'bt-threshold' : + self.backtracking_threshold = newval + print >> sys.stderr, "backtracking threshold: ", newval + if paramid == 'cut-off' : + self.cutoff = newval + self.bCoeffs, self.aCoeffs = butter(2, self.cutoff) + print >> sys.stderr, "low pass filter cut off value: ", newval + if paramid == 'med-threshold' : + self.medianWin = newval + print >> sys.stderr, "meidan filter windown: ", newval + if paramid == 'medfilt' : + self.medfilter_on = newval == 1.0 + print >> sys.stderr, "median filering: ", self.medfilter_on, newval + if paramid == 'filtfilt' : + self.LPfilter_on = newval == 1.0 + print >> sys.stderr, "foward-backward filering: ", self.LPfilter_on, newval + if paramid == 'polyfit' : + self.polyfitting_on = newval == 1.0 + print >> sys.stderr, "polynomial fitting: ", self.polyfitting_on, newval + if paramid == 'whitening' : + self.whitening_on = newval == 1.0 + print >> sys.stderr, "whitening: ", self.whitening_on, newval + + return None + + + def getParameter(self,paramid): + if paramid == 'whitening' : + if self.whitening_on : + return 1.0 + else : + return 0.0 + if paramid == 'medfilt' : + if self.medfilter_on : + return 1.0 + else : + return 0.0 + if paramid == 'filtfilt' : + if self.LPfilter_on : + return 1.0 + else : + return 0.0 + if paramid == 'polyfit' : + if self.polyfitting_on : + return 1.0 + else : + return 0.0 + if paramid == 'threshold' : + return self.threshold + if paramid == 'dthreshold' : + return self.delta_threshold + if paramid == 'bt-threshold' : + return self.backtracking_threshold + # if paramid == 'tol-threshold' : + if paramid == 'med-threshold' : + return self.medianWin + if paramid == 'cut-off' : + return self.cutoff + return 0.0 + + + + def getGenericOutputDescriptors(self): + '''Define 3 outputs ike in the QM plugin. First is the raw detecion function, second is the smoothed one, + third is the actual note onset outputs. Note: in QM-Vamp the onsets are the first output.''' + # We call this getGenericOutputDescriptors as we don't want the base to have real outputs. + # Identifiers shoudl be defined in the sub-classes therefore they are ommitted here. + + DF_Descriptor = OutputDescriptor() + DF_Descriptor.hasFixedBinCount=True + DF_Descriptor.binCount=1 + DF_Descriptor.hasKnownExtents=False + DF_Descriptor.isQuantized=False + DF_Descriptor.sampleType = OneSamplePerStep + DF_Descriptor.unit = '' + DF_Descriptor.name = 'Onset Detection Function' + DF_Descriptor.description ='Onset Detection Function' + + # NOTE: Just change what we need, all oter parameters are inherited from DF_Descriptor + SDF_Descriptor = OutputDescriptor(DF_Descriptor) + SDF_Descriptor.name = 'Smoothed Onset Detection Function' + SDF_Descriptor.description ='Smoothed Onset Detection Function' + SDF_Descriptor.sampleType = VariableSampleRate + SDF_Descriptor.sampleRate = 1.0 / self.preferredStepSecs + + Onset_Descriptor = OutputDescriptor() + Onset_Descriptor.name = 'Onsets' + Onset_Descriptor.description ='Onsets using spectral difference' + Onset_Descriptor.hasFixedBinCount=True + Onset_Descriptor.binCount=0 + Onset_Descriptor.hasKnownExtents=False + Onset_Descriptor.isQuantized=False + Onset_Descriptor.sampleType = VariableSampleRate + Onset_Descriptor.unit = '' + + return DF_Descriptor, SDF_Descriptor, Onset_Descriptor + + + def backtrack(self, onset_index, smoothed_df): + '''Backtrack the onsets to an earlier 'perceived' location from the actually detected peak... + This is based on the rationale that the perceived onset tends to be a few frames before the detected peak. + This tracks the position in the detection function back to where the peak is startng to build up. + Notice the "out of the blue" parameter: 0.9. (Ideally, this should be tested, evaluated and reported...)''' + prevDiff = 0.0 + while (onset_index > 1) : + diff = smoothed_df[onset_index] - smoothed_df[onset_index-1] + if diff < prevDiff * self.backtracking_threshold : break + prevDiff = diff + onset_index -= 1 + return onset_index + + def trackDF(self, onset1_index, df2): + '''In the second round of detection, remove the known onsets from the DF by tracking from the peak given by the first round + to a valley to deminish the recognised peaks on top of which to start new detection.''' + for idx in xrange(len(onset1_index)) : + remove = True + for i in xrange(onset1_index[idx], 1, -1) : + if remove : + if df2[i] >= df2[i-1] : + df2[i] == 0.0 + else: + remove = False + return df2 + + def whiten(self,magnitudeSpectrum): + '''This function reproduces adaptive whitening as described in Dan Stowell's paper.''' + if self.magPeaks is None : + self.magPeaks = zeros(self.half_length, dtype = float32) + m = array(magnitudeSpectrum, dtype=float32) + idx = m < self.magPeaks + # print " m", m[idx] + + m[idx] += (self.magPeaks[idx] - m[idx]) * self.whitenRelaxCoeff + m[m < self.whitenFloor] = self.whitenFloor + self.magPeaks = m + + magnitudeSpectrum /= m + + return magnitudeSpectrum + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/OnsetPlotProc.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,95 @@ +import numpy as np +from numpy import * +import sys, resource +# import matplotlib.pyplot as plt +import subprocess as sp +import cPickle as pickle + +# set this True or False to enable/disable plotting +plot_on = False +# plot_on = True + +if plot_on : + import matplotlib.pyplot as plt + +class OnsetPlot(object): + '''This class allows creating a series of plots easily.''' + + def __init__(self): + self.signal_plots = [] + self.marker_plots = [] + self.subplots = {} + + def reset(self): + self.__init__() + + def update(self,obj): + self.signal_plots = obj.signal_plots + self.marker_plots = obj.marker_plots + self.subplots = obj.subplots + + def add_plot(self,signal,subplot=-1,title=""): + self.signal_plots.append([signal,subplot,title]) + + def plot_signal(self,signal,subplot,title): + axes = self.figure.add_subplot(subplot) + axes.plot(signal) + if title : axes.set_title(title) + plt.axis('tight') + self.subplots.update({subplot:(axes,signal)}) + + def plot_markers(self,markers,symbol,subplot): + values = zeros(len(markers)) + 1 + axes,signal = self.subplots[subplot] + axes.plot(markers,np.array(signal)[markers],symbol) + + def add_markers(self,markers,symbol="g^",subplot=-1): + self.marker_plots.append([markers,symbol,subplot]) + + def show_in_subprocess(self): + '''Marshal the object into a separate subprocess to fix OS/X issues with threading.''' + cmd = "python %s" %__file__ + print "executing:", cmd + proc = sp.Popen(cmd, stdin = sp.PIPE, stdout = sp.PIPE, shell=True) + pickle.dump(self,proc.stdin) + result = proc.stdout.readlines() + for item in result: + print str(item).strip() + proc.wait() + return result + + def show(self): + self.figure = plt.figure(figsize=(15,6)) + num_plots = len(self.signal_plots) + for i,s in enumerate(self.signal_plots) : + j = i+1 + if s[1] < 1 : + s[1] = int("%(num_plots)s1%(j)s" %locals()) + else : + k = s[1] + s[1] = int("%(num_plots)s1%(k)s" %locals()) + print "plotting:",s[2] + s = tuple(s) + self.plot_signal(*s) + # plot marketrs on the last subplot + for m in self.marker_plots : + if m[2] < 1 or m[2] > num_plots: + m[2] = int("%(num_plots)s1%(num_plots)s" %locals()) + else : + k = m[2] + m[2] = int("%(num_plots)s1%(k)s" %locals()) + m = tuple(m) + self.plot_markers(*m) + plt.show(block=True) + +# create a module-level instance that can be reused across several files +onset_plot = OnsetPlot() + +if __name__ == '__main__': + obj = pickle.load(sys.stdin) + onset_plot = OnsetPlot() + onset_plot.update(obj) + onset_plot.show() + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/PathTracker.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,528 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +TempoPathTrackerUtil.py + +Created by George Fazekas on 2014-04-06. +Copyright (c) 2014 . All rights reserved. + +This program implements max path tracker combining ideas from dynamic programming and the Hough line transform. +It may be used to track tempo tracks in tempograms, partials in STFT spectrograms, or similar tasks. + +""" + +import os, sys, itertools +from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext +from scipy import ndimage +from scipy.ndimage.filters import maximum_filter, minimum_filter, median_filter, uniform_filter +from math import ceil, floor +from numpy import linspace +from numpy.linalg import norm +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import scipy.spatial as ss +from math import sqrt +from copy import deepcopy +from skimage.feature import peak_local_max + +SSM_PATH = '/Users/mitian/Documents/experiments/mit/segmentation/combined/iso/ssm_data_combined' +GT_PATH = '/Users/mitian/Documents/audio/annotation/isophonics' +TRACK_PATH = '/Users/mitian/Documents/experiments/mit/segmentation/combined/iso/tracks' +# SSM_PATH = '/Users/mitian/Documents/experiments/mit/segmentation/combined/qupujicheng/ssm_data1' +# GT_PATH = '/Users/mitian/Documents/experiments/mit/annotation/qupujicheng1/lowercase' +# TRACK_PATH = '/Users/mitian/Documents/experiments/mit/segmentation/combined/qupujicheng/tracks' + +class Track(object): + '''A track object representing a single fixed length path in the data.''' + + track_ID = 0 + + def __init__(self,start): + self.node_array = [] + self.pair_array = [] + self.start = start + self.id = Track.track_ID + Track.track_ID += 1 + self.sorted = False + self.end = self.get_end() + + def __eq__(self,other): + return self.id == other.id + + def add_point(self,point): + '''Add a node/point to the trace.''' + self.node_array.append(point) + + def add_pairs(self,point,pair): + '''Add neighbouring points to aid puruning double traces.''' + self.pair_array.append((point,pair)) + + @property + def length(self): + '''Calculate track length on the time axis.''' + nodes = np.array(self.node_array) + if len(nodes) : + return max(nodes[:,0]) - min(nodes[:,0]) + return 0 + + @property + def mean(self): + nodes = np.array(self.node_array) + return nodes.mean()[1] + + @property + def start_x(self): + return self.start[0] + + '''Replacing the property in the original implementation with a func to avoid the AttributeError: can't set attribute''' + # @property + # def end(self): + # if not self.node_array : + # return self.start + # if not self.sorted : + # self.node_array = sorted(self.node_array) + # self.start = self.node_array[0] + # return self.node_array[-1] + + def get_end(self): + if not self.node_array : + return self.start + if not self.sorted : + self.node_array = sorted(self.node_array) + self.start = self.node_array[0] + return self.node_array[-1] + + def join(self, other): + '''Join self with other by absorbing the nodes of other.''' + if not len(other.node_array): + # print "Warning: Empty track encountered." + return None + self.node_array.extend(other.node_array) + self.node_array = list(set(self.node_array)) + if other.end[0] < self.start[0] : + print "Info: Starting point moved from ", self.start[0], " to " ,other.end[0] + self.start = other.end + + def concatenate(self, other): + if (not len(other.node_array)) or (not len(self.node_array)) : + # print "Warning: Empty track encountered." + return None + self.end = other.end + self.node_array.extend(other.node_array) + self.node_array = list(set(self.node_array)) + self.node_array.sort() + +class PathTracker(object): + '''The main tracker object ''' + + def __init__(self): + self.track_list = [] + self.ssm = None + self.max_index = None + self.kd_tree = None + self.group_num = 0 + self.group = None + + def get_local_maxima(self,ssm, threshold = 0.7, neighborhood_size = 4): + '''Find local maxima in the ssm using a minfilt/maxfilt approach.''' + + # # uniform filter to smooth out discontinuities in the tracks + # ssm = uniform_filter(ssm, size = neighborhood_size) + # + # # basic noise reduction + # ssm[ssm < threshold] = 0.0 + # ssm[ssm > threshold] = 1.0 + # + # # maxfilt/minfilt local maxima detection + # data_max = maximum_filter(ssm, size = neighborhood_size) + # maxima = (ssm == data_max) + # data_min = minimum_filter(ssm, size = neighborhood_size) + # diff = ((data_max - data_min) > 0.00001) + # maxima[diff == 0] = 0 + + maxima = (ssm>threshold) + # create a list of tuples indexing the nonzero elements of maxima + iy,ix = maxima.nonzero() + indices = zip(ix,iy) + return indices,maxima + + def get_peak_local(self, ssm, thresh=0.8, min_distance=10, threshold_rel=0.8): + '''Final local maxima using skimage built-in funcs and return them as coordinates or a boolean array''' + + reduced_ssm = deepcopy(ssm) + reduced_ssm[reduced_ssm<thresh] = 0.0 # a hard thresholding for finding maxima + ssm[ssm<0.6] = 0.0 + np.fill_diagonal(reduced_ssm, 0) # zero fill dignonal in case it will be picked as the only maxima in the neighborhood + indices = peak_local_max(reduced_ssm, min_distance=min_distance, threshold_rel=threshold_rel, indices=True) + maxima = peak_local_max(reduced_ssm, min_distance=min_distance, threshold_rel=threshold_rel, indices=False) + return reduced_ssm, indices, maxima + + def prune_duplicates(self, maxima, size): + track_list = deepcopy(self.track_list) + # print "len track_list 1", len(track_list) + for track in track_list: + if not track.node_array: + self.track_list.remove(track) + # print "len track_list 2", len(self.track_list) + + + track_list = deepcopy(self.track_list) + print "self.track_list start", len(self.track_list) + for track1, track2 in itertools.combinations(track_list, 2): + points1 = track1.node_array + points2 = track2.node_array + if abs(track1.end[1] - track2.end[1]) > 10 : + continue + if abs(track1.start[1] - track2.start[1]) > 10 : + continue + if abs(track1.start[0] - track2.start[0]) > 10 : + continue + # print track1.id, track2.id + dist = [((i[0]-j[0])**2 + (i[1]-j[1])**2) for i in points1 for j in points2] + # if dist and sum(i < size for i in dist) > 1: + # print min(dist) + if dist and min(dist) < size : + # print min(dist) + # Nearby track found. If starts from distant positions, concatenate the two, + # otherwise discard the one with shorter lengh. + if len(points1) < len(points2): + duplicate = track1 + else: + duplicate = track2 + # duplicate = sorted([points1, points2], key=len)[0] + if duplicate in self.track_list: + self.track_list.remove(duplicate) + # print "removing ", duplicate.id + print "self.track_list pruned", len(self.track_list) + + def count_groups(self): + '''Cluster the tracks within the same horizontal area for later to calcute distance''' + self.track_list.sort(key=lambda x: x.start_x) + start_points = [track.start for track in self.track_list] + # start_points.sort(key=lambda tup: tup[0]) + for i in xrange(1, len(start_points)): + if start_points[i][0] - start_points[i-1][0] > 10.0: + self.group_num += 1 + + self.groups = [[] for n in xrange(self.group_num)] + for track in self.track_list: + for group_idx in xrange(self.group_num): + self.groups[group_idx].append(track) + + print 'self.groups', len(self.groups) + pass + + def histogram(self): + '''Compare pairwise distance for tracks within the same x-axis location and group by histograming the distance''' + for group in self.groups: + group_track = np.array(group) + + pass + + def process(self, ssm, thresh=0.8, min_local_dist=20, slice_size = 2, step_thresh=0.25, track_min_len=50, track_gap=50): + '''Track path in the ssm and mask values using the set of discrete path found.''' + + self.ssm = ssm + print "ssm.shape",ssm.shape + + # max_index,maxima = self.get_local_maxima(ssm, threshold=0.95, neighborhood_size =3) + reduced_ssm,max_index,maxima = self.get_peak_local(ssm, min_distance=min_local_dist, threshold_rel=0.5) + + # build a spatial binary search tree to aid removing maxima already passed by a trace + self.max_index = np.array(max_index) + if not len(self.max_index): + print 'No maxima found.' + return np.zeros_like(ssm) + self.kd_tree = ss.cKDTree(self.max_index) + + discard_maxima = set() + + # trace forwards + for ix,iy in self.max_index : + point = (ix,iy) + if point in discard_maxima : + continue + start = point + track = Track(start) + self.track_list.append(track) + while True : + slice = self.get_neighbourhood(point, size = slice_size) + x,y = self.step(point, slice, threshold = step_thresh, direction = "forward") + if x == None : break + point = (x,y) + remove = self.get_nearest_maxima(point) + if remove and remove != start: + discard_maxima.add(remove) + maxima[y,x] = True + track.add_point(point) + print "discarded maxima: ",len(discard_maxima) + + self.max_index = [(x,y) for x,y in self.max_index if (x,y) not in discard_maxima] + + # trace back + print "Tracing back..." + for ix,iy in self.max_index : + point = (ix,iy) + track = Track(point) + self.track_list.append(track) + while True : + slice = self.get_neighbourhood(point, size = slice_size) + x,y = self.step(point, slice, threshold = step_thresh, direction = "backward") + if x == None : break + point = (x,y) + track.add_point(point) + maxima[y,x] = True + + print "tracing done." + + print 'tracks after tracing:', len(self.track_list) + # join forward and back traces with the same staring point + self.join_tracks() + + # concatenate nearby tracks on the same diagonal direction + self.concatenate_tracks(size=track_gap) + + # prune duplicated tracks in local neighbourhood + # self.prune_duplicates(maxima, size = 10) + maxima = maximum_filter(maxima, size=2) + # TODO: smooth paths, experiment with segmentation of individual tracks... + self.count_groups() + + # empty mask for visualisation / further processing + tracks = np.zeros_like(maxima) + ssm_len = tracks.shape[0] + # assess tracks individually, skip short ones and add the rest of the tracks to the mask + for track in self.track_list : + if track.length < track_min_len : continue + track.node_array.sort() + # for point in track.node_array : + # tracks[point[1],point[0]] = 1.0 + xs, xe = track.node_array[0][1], track.node_array[-1][1] + ys, ye = track.node_array[0][0], track.node_array[-1][0] + track_len = xe - xs + for i in xrange(track_len): + if max(xs+i, ys+i) < ssm_len: + tracks[xs+i, ys+i] = 1.0 + print 'number of final tracks', len(self.track_list) + # tracks = uniform_filter(tracks.astype(np.float32), size = 2) + # tracks[tracks<0.2] = 0.0 + # tracks[tracks>=0.2] = 1.0 + + return reduced_ssm, self.max_index, tracks + + + def join_tracks(self): + '''Join tracks which share a common starting point. + This function is essentially trying to join forward traces and back traces.''' + + # collect the set of unique starting points + start_points = set() + [start_points.add(track.start) for track in self.track_list] + print "Initial Traces before joining:", len(self.track_list) + print "Unique start points:", len(start_points) + + # join tracks starting from the same point and remove the residual + for start in start_points: + shared_tracks = [x for x in self.track_list if x.start == start] + if len(shared_tracks) == 2 : + shared_tracks[1].join(shared_tracks[0]) + self.track_list.remove(shared_tracks[0]) + print "Final tracklist after joining", len(self.track_list) + return self.track_list + + def concatenate_tracks(self, size=3): + '''Concatenate the end point and start point of two sequential tracks.''' + + start_points = set() + [start_points.add(track.start) for track in self.track_list] + end_points = set() + [end_points.add(track.end) for track in self.track_list] + print "Traces before concatenation:", len(self.track_list), len(start_points), len(end_points) + for end in end_points: + xe, ye = end + if not [x for x in self.track_list if (x.end == end and x.length >1)]: continue + track = [x for x in self.track_list if x.end == end][0] + for i in xrange(1, size): + xs, ys = xe+i, ye+i + if (xs, ys) in start_points: + succeeding_track_list = [x for x in self.track_list if x.start == (xs,ys)] + if not succeeding_track_list: continue + succeeding_track = [x for x in self.track_list if x.start == (xs,ys)][0] + track.concatenate(succeeding_track) + self.track_list.remove(succeeding_track) + print "Traces after concatenation:", len(self.track_list) + return self.track_list + + def get_nearest_maxima(self,point,threshold = 5.0): + '''Find the nearest maxima to a given point using NN serach in the array of known maxima. + NN serach is done usinf a KD-Tree approach because pairwise comparison is way too slow.''' + + # query tree parameters: k is the number of nearest neighbours to return, d is the distance type used (2: Euclidean), + # distance_upper_bound specifies search realm + d,i = self.kd_tree.query(point, k=1, p=2, distance_upper_bound= threshold) + if d != np.inf : + return tuple(self.max_index[i,:]) + return None + + + def get_neighbourhood(self,point,size=1): + '''Return a square matrix centered around a given point + with zero padding if point is close to the edges of the data array.''' + + # calculate boundaries + xs = point[0]-size + xe = point[0]+size+1 + ys = point[1]-size + ye = point[1]+size+1 + + # extract slice from the array cropped at edges + y,x = self.ssm.shape + slice = self.ssm[max(0,ys):min(ye,y),max(0,xs):min(xe,x)] + + # left/right padding + if xs < 0 : + leftpad = np.zeros((slice.shape[0],abs(xs))) + slice = np.hstack([leftpad,slice]) + + if xe > x : + rightpad = np.zeros((slice.shape[0],xe-x)) + slice = np.hstack([slice,rightpad]) + + # top/bottom padding + if ys < 0 : + bottompad = np.zeros((abs(ys),slice.shape[1])) + slice = np.vstack([bottompad,slice]) + + if ye > y : + toppad = np.zeros((ye-y,slice.shape[1])) + slice = np.vstack([slice,toppad]) + + return slice + + + def step(self, point, slice, threshold = 0.3, direction = "forward"): + '''Choose a step from the given point and retun the coordinate of the selected point. + + inputs: + point (x,y) is the starting coordinate in the data matrix, + slice is a square matrix centered around the given point, + threshold helps to decide where to terminate a track, + direction {forwards | backwards} describes which way to track along the X axis. + + output: + The output is always a tuple. + (None,None) in case the track is terminated or reached the boundary of the data matrix. + (x,y) for the next valid step forwards or backwards. + + Note: The algorithm never steps straight up or down, i.e. the next coordinate relates to + either the next or the previous point on the x axis. + + Note2: The intuition of this algorithm relates to both classical dynamic programming search + and that of the Hough line transform. At each step a weighted line segment is considered + corresponding to the slice of the data slice around the considered point. The line segment + is rotated around the center point and the most higlhly weighted is choosen which prescribes + the step direction of the algorithm. + ''' + + backward = False + if direction == 'backward': + backward = True + x,y = point + + # create direction specific weight vector + w = np.linspace(0.0, 1.0, slice.shape[0]) + if backward : w = w[::-1] + + # calcualte weighted sums of main diagonal + a = sum(slice.diagonal() * w) + segment_weight = a.max() / sum(w) + + # adjust steps for desired direction + direction = 1 + xstep = 1 + if backward : + xstep = -1 + direction *= -1 + + xs,ys = x+xstep, y+direction + yd,xd = self.ssm.shape + + # Terminate tracking if the weighted mean of the segment is below a threshold + if segment_weight < threshold : + # print "Terminating due to thd" + return None,None + + # Terminate tracking if data matrix bounds are reached + if xs < 0 or xs >= xd or ys < 0 or ys >= yd : + # print "Terminating due to bound" + return None,None + + return xs,ys + + +def main(): + + plot = "-p" in sys.argv + plot = True + + tracker = PathTracker() + + # ssm = np.loadtxt('/Users/mitian/Documents/hg/py-features/data/ssm.txt', delimiter=',') + # gt = np.genfromtxt('/Users/mitian/Documents/audio/annotation/isophonics/06YellowSubmarine.txt',usecols=0) + # ssm = np.loadtxt('/Users/mitian/Documents/experiments/mit/segmentation/combined/iso/ssm_data/1-12ShesOutOfMyLife-otsu.txt', delimiter=',') + # gt = np.genfromtxt('/Users/mitian/Documents/audio/annotation/isophonics/1-12ShesOutOfMyLife.txt',usecols=0) + + ssm_files = [x for x in os.listdir(SSM_PATH) if not x.startswith('.')] + ssm_files = [join(SSM_PATH, x) for x in ssm_files] + ssm_files.sort() + gt_files = [x for x in os.listdir(GT_PATH) if not x.startswith('.')] + gt_files = [join(GT_PATH, x) for x in gt_files] + gt_files.sort() + + for i, x in enumerate(ssm_files): + ssm = np.genfromtxt(x, delimiter=',') + gt = np.genfromtxt(gt_files[i], usecols=0) + # gt = np.genfromtxt(gt_files[i], delimiter=',', usecols=0) + audio_name = splitext(basename(gt_files[i]))[0] + if isfile(join(TRACK_PATH, audio_name+'.txt')): continue + print 'Processing:', audio_name + + reduced_ssm, maxima, tracks = tracker.process(ssm, thresh=0.5, min_local_dist=20, slice_size=20, step_thresh=0.4, track_min_len=50, track_gap=50) + np.savetxt(join(TRACK_PATH, audio_name+'.txt'), tracks, delimiter=',') + + track_df = np.sum(tracks, axis=-1) + # track_df = np.zeros(len(tracks)) + # print len(tracker.track_list) + # for track in tracker.track_list: + # start, end = track.start[0], track.end[0] + # # if (track.length != len(tracks)-1 and start < end): + # # track_df[start:end] += 1 + # track_df[start] += 1 + # track_df[end] += 1 + + if plot : + ax1 = plt.subplot(131) + ax1.imshow(ssm, cmap='Greys') + ax1.vlines(gt / gt[-1] * len(track_df), 0, len(track_df), colors='r') + + ax2 = plt.subplot(132) + ax2.imshow(reduced_ssm, cmap='Greys') + ax2.scatter(zip(*maxima)[0], zip(*maxima)[1], s=5, c='y') + ax2.set_xlim([0, len(tracks)]) + ax2.set_ylim([len(tracks), 0]) + + ax3 = plt.subplot(133) + ax3.imshow(tracks, cmap='Greys') + # ax2.plot(np.arange(0, len(tracks)), track_df*10) + ax3.vlines(gt / gt[-1] * len(track_df), 0, len(track_df), colors='r') + ax3.set_xlim([0, len(tracks)]) + ax3.set_ylim([len(tracks), 0]) + # plt.show() + plt.savefig(join(TRACK_PATH, audio_name+'.pdf'), fomat='pdf') + plt.close() + # smoothing funcs + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/PeakPickerUtil.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,225 @@ +import numpy as np +from numpy import * +import sys +from scipy.signal import medfilt, filtfilt, butter + +from OnsetPlotProc import onset_plot, plot_on + +class PeakPicker(object): + '''Separate Peak Picker implementation that can be used in all plugins identically.''' + + def __init__(self): + '''Initialise the PeakPicker, but for now, we just define a class for the parameters.''' + class Params(object): + '''Just create a small efficient object for storing the parameters.''' + __slots__ = ['alpha','delta','QuadThresh_a','QuadThresh_b','QuadThresh_c','aCoeffs','bCoeffs',\ + 'preWin','postWin','LP_on','Medfilt_on','Polyfit_on','isMedianPositive','rawSensitivity'] + self.params = Params() + + + def process(self, onset_df, calcu_env=False, prune = False): + '''Smooth and peak pick the detection function.''' + + smoothed_df = self.smooth_df(onset_df) + # min_old = min(smoothed_df) + # range_old = max(smoothed_df) - min_old + # smoothed_df = [((n - min_old) / range_old ) for n in smoothed_df] + + onsets = self.quadEval(smoothed_df, prune) + return smoothed_df, onsets + + def envFollower(self, sig): + '''Peak position of the signal envelope.''' + env = [] + if not len(sig): return env + i = 1 + while i < len(sig): + pos = 1 + while (i+pos) < len(sig): + if sig[i+pos] < sig[i+pos-1]: break + pos += 1 + if sig[i+pos-1] > sig[i+pos-2]: env.append(i+pos-1) + i += pos + + env = list(sort(env)) + + if not len(env): return env + + if env[-1] == len(sig): + env = env[:-1] + return env + + def quadEval(self, smoothed_df, prune=False): + '''Assess onset locations using the paramaters of a quadratic fucntion (or simple thresholding).''' + onsets = [] + x_array = np.array(xrange(-2,3),dtype=np.float32) + max_index = [] + + # if prune: + # smoothed_df = [abs(i) for i in smoothed_df] + # find maxima in the smoothed function, NOTE: could do this later with scipy.signal.argrelmax (using ver > .0.11) + for i in xrange(2,len(smoothed_df)-2) : + if smoothed_df[i] > smoothed_df[i-1] and smoothed_df[i] > smoothed_df[i+1] and smoothed_df[i] > 0 : + max_index.append(i) + # in case the last local maxima with an incomplete peak shape is missed + last = len(smoothed_df)-1 + if smoothed_df[last] >= max(smoothed_df[i] for i in xrange(last-2, last-1)): + max_index.append(last) + + # if len(max_index) == 0 : + # return onsets + if plot_on : onset_plot.add_markers(max_index,symbol="r1",subplot=4) + + # if the polynomial fitting is not on, just return the detected peaks above a threshold + # calculated using 100-rawSensitivity value considering the smallest and largest peaks + if not self.params.Polyfit_on : + if not max_index : + return onsets + max_index = np.array(max_index) + smoothed_df = np.array(smoothed_df) + smoothed_df_peaks = smoothed_df[max_index] + min_df, max_df = smoothed_df_peaks.min(), smoothed_df_peaks.max() + range_df = max_df-min_df + sensitivity = (100-self.params.rawSensitivity) / 100.0 + threshold = min_df + sensitivity * range_df + return max_index[smoothed_df[max_index]>=threshold] + + # NOTE: GF: The order of the polynomial coefficients is reversed in the C++ implementation! + # But it is numerically equivalent and accurate (checked by printing the results from the C++ code). + for j in xrange(len(max_index)) : + if max_index[j] + 2 > len(smoothed_df) : + onsets.append(max_index[j]) + else : + y_array = list() + for k in xrange(-2,3) : + selMax = smoothed_df[max_index[j] + k] + y_array.append(selMax) + coeffs = polyfit(x_array,np.array(y_array),2) + # print coeffs + + if coeffs[0] < -self.params.QuadThresh_a or coeffs[2] > self.params.QuadThresh_c : + onsets.append(max_index[j]) + # print max_index[j] + + # If the arg prune is on, remove onset candidates that have spurious peaks on its both side neighbourhood (1.5-2s) + if prune : + remove = [] + step = 50 + onsets.sort() + for idx in xrange(1, len(onsets) - 1): + if onsets[idx+1] - onsets[idx] < step and onsets[idx] - onsets[idx-1] < step : + remove.append(onsets[idx]) + onsets = [i for i in onsets if not i in remove] + print 'remove', remove, onsets + return onsets + + + def smooth_df(self, onset_df): + '''Smooth the detection function by 1) removing DC and normalising, 2) zero-phase low-pass filtering, + and 3) adaptive thresholding using a moving median filter with separately configurable pre/post window sizes. + ''' + + if plot_on : onset_plot.add_plot(onset_df, title="raw detection function") + + out_df = self.removeDCNormalize(onset_df) + + if self.params.LP_on : + # Now we get the exact same filtered function produced by the QM-Vamp plugin: + out_df = filtfilt(self.params.bCoeffs, self.params.aCoeffs, out_df) + onset_plot.add_plot(out_df, title = "lowpass filtered detection function") + + if self.params.Medfilt_on : + out_df = self.movingMedianFilter(out_df) + + return out_df + + + + def movingMedianFilter(self, onset_df): + '''Adaptive thresholding implementation using a moving median filter with configurable pre/post windows. ''' + # TODO: Simplify and vectorise this where appropriate, may replace While loops with For if faster, + # perhaps use C extension module, Theano or similar... + length = len(onset_df) + isMedianPositive = self.params.isMedianPositive + preWin = int(self.params.preWin) + postWin = int(self.params.postWin) + index = 0 + delta = self.params.delta / 10.0 + y = np.zeros(postWin+preWin+1, dtype=np.float64) + scratch = np.zeros(length, dtype=np.float64) + output = np.zeros(length, dtype=np.float64) + + for i in xrange(preWin) : + if index >= length : break + k = i + postWin + 1; + for j in xrange(k) : + if j < length: y[j] = onset_df[j] + scratch[index] = np.median(y[:k]) + index += 1 + + i = 0 + while True : + if i+preWin+postWin >= length : break + if index >= length : break + l = 0 + j = i + while True: + if j >= ( i+preWin+postWin+1) : break + y[l] = onset_df[j] + l += 1 + j += 1 + i += 1 + scratch[index] = np.median(y[:preWin+postWin+1]) + index += 1 + + i = max(length-postWin, 1) + while True : + if i >= length : break + if index >= length : break + k = max(i-preWin, 1) + l = 0 + j = k + while True : + if j >= length : break + y[l] = onset_df[j] + j += 1 + l += 1 + scratch[index] = np.median(y[:l]) + index += 1 + i += 1 + + # onset_plot.add_plot(scratch,title = "median filter output", subplot = 1) + onset_plot.add_plot(scratch,title = "median filter output") + + for i in xrange(length) : + value = onset_df[i] - scratch[i] - delta + output[i] = value + if isMedianPositive and value < 0.0 : + output[i] = 0.0 + + if plot_on : onset_plot.add_plot(output,title = "detection function with adaptive thresholding") + + return output.tolist() + + def removeDCNormalize(self,onset_df): + '''Remove constant offset (DC) and regularise the scale of the detection function.''' + DFmin,DFmax = self.getFrameMinMax(onset_df) + DFAlphaNorm = self.getAlphaNorm(onset_df,self.params.alpha) + for i,v in enumerate(onset_df) : + onset_df[i] = (v - DFmin) / DFAlphaNorm + return onset_df + + def getAlphaNorm(self, onset_df, alpha): + '''Calculate the alpha norm of the detecion function''' + # TODO: Simplify or vectorise this. + # a = 0.0 + # for i in onset_df : + # a += pow(fabs(i), alpha) + a = sum( np.power(fabs(onset_df), alpha) ) + a /= len(onset_df) + a = pow(a, (1.0 / alpha)) + return a + + def getFrameMinMax(self, onset_df): + '''Just return the min/max of the detecion function''' + return min(onset_df),max(onset_df)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/RankClustering.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,302 @@ +import sys,os,optparse +from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext +import numpy as np +from numpy import log, power, pi, exp, transpose, zeros, log, ones, dot, argmin, inf, zeros_like, argsort, finfo +from sklearn.mixture import GMM +from sklearn.metrics.pairwise import pairwise_distances +from copy import copy + +from MutualInfo import print_progress +from ComputationCache import Meta, with_cache, with_pickle_dump +from gmmdist import skl_full, skl_gmm +from GmmMetrics import GmmDistance + +class Node(object): + + def __init__(self, model, distances): + self.model = model + self.distances = distances + self.neighborhood = [] + + def __str__(self): + # return 'Node(%i):%s' %(self.sorted_distances[0], self.neighborhood) + return 'Node(%i):%i, %3.2f' %(self.sorted_distances[0], self.neighborhood_size, self.average_div) + + def rankDistances(self): + '''Return the index of the sorted array.''' + self.sorted_distances = np.argsort(self.distances) + + def findNeighborhood(self, eps): + '''''' + d = self.distances[self.sorted_distances][1] + b = eps * d + if np.isinf(b) : return None + + for i in self.sorted_distances: + if self.distances[i] <= b : + self.neighborhood.append(i) + + # Brute force hack; we exclude nodes whose neighbourhood is larger than 80% of all frames + # Proper solution: use the delta assymmetric KL divergence to identify near singularities + if len(self.neighborhood) > 0.7 * len(self.sorted_distances) : + self.neighborhood = [] + + def getStatistics(self): + + self.neighborhood_size = len(self.neighborhood) + self.average_div = np.mean(self.distances[self.neighborhood[1:]]) + +class rClustering(object): + + # meta = Meta() + # + # @classmethod + # def set_cache_filename(cls, filename, cache = True, cache_location=""): + # cls.meta.cache = cache + # cls.meta.cache_file_base = filename + # cls.meta.cache_location = cache_location + + def __init__(self, eps, thresh=15, k=5, rank='max_neighbors', centering=True): + self.eps = eps + self.thresh = thresh + self.rank = rank + self.node_list = [] + self.methods = {'max_neighbors':'neighborhood_size', 'min_avg_div':'average_div'} + self.classification = [] + self.centering = centering + self.k = k + + def fit(self, data): + ''' + Arguments: + data: Input list of GMMs.''' + D = self.getPairwiseDistances(data) + if self.centering: + D[D>=finfo(np.float32).max] = inf + print "hubness before centering:", self.hubness_measure(D) + D = self.center_divergence_matrix(D) + print "hubness after centering:", self.hubness_measure(D) + for i, model in enumerate(data): + self.node_list.append(Node(model, D[i])) + for node in self.node_list: + node.rankDistances() + node.findNeighborhood(self.eps) + node.getStatistics() + + print "finding centroids" + + hub_candidates = self.findCentroids() + print "\n\n hubs:",len(hub_candidates) + if len(hub_candidates) > self.k: + hub_candidates = hub_candidates[:self.k] + self.classification = [] + + for i, node in enumerate(self.node_list): + dists = zeros(len(hub_candidates)) + for k,hub in enumerate(hub_candidates): + j = self.node_list.index(hub) + dists[k] = D[i,j] + di = argmin(dists) + sel_hub = hub_candidates[di] + self.classification.append(self.node_list.index(sel_hub)) + + return self + + def classify(self): + return self.classification + + def findCentroids(self): + # sorted_nodelist = sorted(self.node_list, key = lambda x: x.neighborhood_size) + sorted_nodelist = sorted(self.node_list, key = lambda x: x.__getattribute__(self.methods[self.rank]), reverse=True) + hub_candidates = [] + while True: + hub = sorted_nodelist.pop(0) + hub_candidates.append(hub) + [sorted_nodelist.remove(self.node_list[n]) for n in hub.neighborhood if self.node_list[n] in sorted_nodelist] + # print "hub.neighborhood",len(hub.neighborhood) + # for n in hub.neighborhood : + # if self.node_list[n] in sorted_nodelist : + # sorted_nodelist.remove(self.node_list[n]) + # print "removed", n, "from node list" + # print len(hub_candidates),len(sorted_nodelist) + if not sorted_nodelist: break + # print 'hub_candidates', hub_candidates + return hub_candidates + + + def getNodeRank(self): + average_div = [] + neighborhood_size = [] + node_rank = [] + sorted_nodelist = sorted(self.node_list, key = lambda x: x.__getattribute__(self.methods[self.rank])) + + for node in self.node_list: + neighborhood_size.append(node.neighborhood_size) + average_div.append(node.average_div) + node_rank.append(sorted_nodelist.index(node)) + + return neighborhood_size, average_div, node_rank + + def test(self): + for n in self.node_list: + print n + + # @with_cache(meta) + def getPairwiseDistances(self, gmm_list): + '''Calculate the pairwise distances of the input GMM data''' + n_GMMs = len(gmm_list) + distance_matrix = np.zeros((n_GMMs, n_GMMs)) + print "Creating divergence matrix." + for i in xrange(n_GMMs): + print_progress(i,"gmm node processed") + for j in xrange(i, n_GMMs): + distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j]) + distance_matrix[j][i] = distance_matrix[i][j] + np.fill_diagonal(distance_matrix, 0.0) + distance_matrix[np.isinf(distance_matrix)] = np.finfo(np.float64).max + + return distance_matrix + + def hubness_measure(self, D,r=0.45): + n = D.shape[0] + Dmean = zeros(n) + Dstd = zeros(n) + for i in xrange(n) : + Dmean[i] = np.mean(D[i][ [D[i]>0] and [D[i]!=inf] ]) + Dstd[i] = np.std(D[i][ [D[i]>0] and [D[i]!=inf] ]) + + m = Dmean.mean() + r = r*m + #print m,r + #print Dmean.min(), Dmean.max() + + N = zeros(n) + for i in xrange(n) : + s = [D[i] > 0] and [D[i] < r] + N[i] = len(D[i][s]) + sortindex = argsort(N) + hubs_mean = np.mean(Dmean[sortindex][-5:][::-1,...]) + anti_hubs_mean = np.mean(Dmean[sortindex][:5]) + return (anti_hubs_mean - hubs_mean) / Dmean.mean() + + def center_divergence_matrix(self, D) : + n = D.shape[0] + Dmean = zeros(n) + Dstd = zeros(n) + for i in xrange(D.shape[0]) : + Dmean[i] = np.mean(D[i][ [D[i]>0] and [D[i]!=inf] ]) + Dstd[i] = np.std(D[i][ [D[i]>0] and [D[i]!=inf] ]) + B = zeros_like(D) + for i in xrange(n) : + for j in xrange(n) : + d = D[i,j] + B[i,j] = B[j,i] = 0.5 * ((d - Dmean[i]) / Dstd[i] + (d - Dmean[j]) / Dstd[j]) + B += abs(np.min(B)) + np.fill_diagonal(B,0.0) + return B + +class FeatureObj() : + __slots__ = ['key','audio','timestamps','features'] + +def getGMMs(feature, gmmWindow=10, stepsize=1): + gmm_list = [] + steps = int((feature.shape[0] - gmmWindow + stepsize) / stepsize) + for i in xrange(steps): + gmm_list.append(GmmDistance(feature[i*stepsize:(i*stepsize+gmmWindow), :])) + return gmm_list + +def pairwiseSKL(gmm_list): + '''Compute pairwise symmetrised KL divergence of a list of GMMs.''' + n_GMMs = len(gmm_list) + distance_matrix = np.zeros((n_GMMs, n_GMMs)) + for i in xrange(n_GMMs): + for j in xrange(i, n_GMMs): + distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j]) + distance_matrix[j][i] = distance_matrix[i][j] + # X = np.array(gmm_list) + # distance_matrix = pairwise_distances(X, metric = lambda x, y: x.skl_distance_full(y) ) + distance_matrix[np.isnan(distance_matrix)] = 10.0 + distance_matrix[np.isinf(distance_matrix)] = 10.0 + + +# def parse_args(): +# # define parser +# op = optparse.OptionParser() +# # IO options +# op.add_option('-i', '--input', action="store", dest="INPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/features', type="str", help="Loading features from..." ) +# op.add_option('-o', '--out', action="store", dest="OUTPUT", default='test/clustering_resutls', type="str", help="Writing clustering results to... ") +# +# return op.parse_args() +# +# options, args = parse_args() +# +# def main(): +# +# feature_list = [i for i in os.listdir(options.INPUT) if not i.startswith('.')] +# feature_list.sort() +# fobj_list = [] +# +# for feature in feature_list: +# data = np.genfromtxt(join(options.INPUT, feature), delimiter=',',filling_values=0.0) +# dim = data.shape[1] - 1 +# if dim == 1 : +# fo = FeatureObj() +# fo.audio = feature[:feature.find('_vamp')] +# fo.key = splitext(feature.strip(fo.audio + '_'))[0] +# fo.timestamps = data[:, 0] # the first column is the timestamps +# fo.features = data[:, 1] +# fobj_list.append(fo) +# +# else : +# for col in xrange(dim): +# fo = FeatureObj() +# fo.audio = feature[:feature.find('_vamp')] +# fo.key = splitext(feature.strip(fo.audio + '_'))[0] + '_' + '%d' %col +# fo.timestamps = data[:, 0] # the first column records the timestamps +# fo.features = data[:, col+1][:,np.newaxis] +# fobj_list.append(fo) +# +# timestamps = fobj_list[0].timestamps +# features = map(lambda x: "%i:%s" %(x[0],x[1].key), enumerate(fobj_list)) +# print "Loading %d features:\n", len(fobj_list) +# +# # find the feature with the fewer number of frames +# n_frames = np.array(map(lambda x: x.features.shape[0], fobj_list)).min() +# n_features = len(fobj_list) +# +# feature_matrix = np.zeros((n_frames, n_features)) +# print 'feature_matrix', feature_matrix.shape +# +# # take the arrays from the feature objects and add them to a matrix +# for i,f in enumerate(fobj_list) : +# feature_matrix[:,i] = f.features[:n_frames,0] +# +# # normalise the feature matrix, get rid of negative features, ensure numerical stability by adding a small constant +# feature_matrix = abs(feature_matrix) / (abs(feature_matrix.max(0))+0.0005) +# feature_matrix[np.isnan(feature_matrix)] = 0.0 +# +# winlength = 5 +# stepsize = 2 +# +# gmm_list = getGMMs(feature_matrix, gmmWindow=winlength, stepsize=stepsize) +# print 'number of GMMs:', len(gmm_list) +# skl_matrix = pairwiseSKL(gmm_list) +# +# rc = rClustering(eps=3, rank='min_avg_div').fit(gmm_list) +# rc.test() +# classification = rc.classification +# neighborhood_size, average_div, node_rank = rc.getNodeRank() +# +# f1 = np.array(zip(timestamps[:len(classification)], labelclassifications)) +# f2 = np.array(zip(timestamps[:len(neighborhood_size)], neighborhood_size)) +# f3 = np.array(zip(timestamps[:len(average_div)], average_div)) +# f4 = np.array(zip(timestamps[:len(node_rank)], node_rank)) +# +# np.savetxt(join(options.OUTPUT, 'classification')+'.csv', f1, delimiter=',') +# np.savetxt(join(options.OUTPUT, 'neighborhood_size')+'.csv', f2, delimiter=',') +# np.savetxt(join(options.OUTPUT, 'average_div')+'.csv', f3, delimiter=',') +# np.savetxt(join(options.OUTPUT, 'node_rank')+'.csv', f4, delimiter=',') +# +# if __name__ == '__main__': +# main() + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/SegProperties.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +SegProperties.py + +Created by mi tian on 2015-04-02. +Copyright (c) 2015 __MyCompanyName__. All rights reserved. +""" + +import sys +import os + +class FeatureGMM(object): + '''Represent segment candidates using single GMMs and compute pairwise distances.''' + def getGaussianParams(self, length, featureRate, timeWindow): + + win_len = round(timeWindow * featureRate) + win_len = win_len + (win_len % 2) - 1 + + # a 50% overlap between windows + stepsize = ceil(win_len * 0.5) + num_win = int(floor( (length) / stepsize)) + gaussian_rate = featureRate / stepsize + + return stepsize, num_win, win_len, gaussian_rate + + def GaussianDistance(self, feature, featureRate, timeWindow): + + stepsize, num_win, win_len, gr = self.getGaussianParams(feature.shape[0], featureRate, timeWindow) + print 'stepsize, num_win, feature', stepsize, num_win, feature.shape, featureRate, timeWindow + gaussian_list = [] + gaussian_timestamps = [] + tsi = 0 + + # f = open('/Users/mitian/Documents/experiments/features.txt','w') + # print 'divergence computing..' + for num in xrange(num_win): + # print num, num * stepsize , (num * stepsize) + win_len + gf=GaussianFeature(feature[int(num * stepsize) : int((num * stepsize) + win_len), :],2) + # f.write("\n%s" %str(gf)) + gaussian_list.append(gf) + tsi = int(floor( num * stepsize + 1)) + gaussian_timestamps.append(self.timestamp[tsi]) + + # f.close() + + # print 'gaussian_list', len(gaussian_list), len(gaussian_timestamps) + dm = np.zeros((len(gaussian_list), len(gaussian_list))) + + for v1, v2 in combinations(gaussian_list, 2): + i, j = gaussian_list.index(v1), gaussian_list.index(v2) + dm[i, j] = v1.distance(v2) + dm[j, i] = v2.distance(v1) + # print 'dm[i,j]',dm[i,j] + # sio.savemat("/Users/mitian/Documents/experiments/dm-from-segmenter.mat",{"dm":dm}) + return dm, gaussian_timestamps + + def getGMMs(self, feature, segment_boundaries): + '''Return GMMs for located segments''' + gmm_list = [] + gmm_list.append(GmmDistance(feature[: segment_boundaries[0], :], components = 1)) + for i in xrange(1, len(segment_boundaries)): + gmm_list.append(GmmDistance(feature[segment_boundaries[i-1] : segment_boundaries[i], :], components = 1)) + return gmm_list + + +class FusedPeakSelection(object): + '''Peak selection from fusion of individual results.''' + def getFusedPeaks(self, combined_thresh, individual_thresh, individual_tol, combined_tol, w1=None, w2=None, w3=None, w4=None): + '''Return a list a peak position and the corresponding confidence.''' + confidence_array = np.zeros_like(w1) + conf1 = np.zeros_like(w1) + len_arr = len(w1) + + # keep peaks retrieved by single feature if its confidence is above individual_thresh + w1_keep = np.where(w1>=individual_thresh)[0] + w2_keep = np.where(w2>=individual_thresh)[0] + w3_keep = np.where(w3>=individual_thresh)[0] + w4_keep = np.where(w4>=individual_thresh)[0] + confidence_array[w1_keep] += w1[w1_keep] + confidence_array[w2_keep] += w2[w2_keep] + confidence_array[w3_keep] += w3[w3_keep] + confidence_array[w4_keep] += w4[w4_keep] + + confidence_array[confidence_array>1] = 1 + + # deal with peaks picked individual features with high confidence first + i=0 + while i < len_arr: + if confidence_array[i] > 0: + temp = [confidence_array[i]] + pos = [i] + i += 1 + + # start searching neighborhood for local maximum + while (i+individual_tol < len_arr and np.max(confidence_array[i:i+individual_tol]) > 0): + temp += [confidence_array[i+delta] for delta in xrange(individual_tol) if confidence_array[i+delta]>0] + pos += [i+delta for delta in xrange(individual_tol) if confidence_array[i+delta]>0] + i += individual_tol + + if len(temp) == 1: + conf1[pos[0]] = temp[0] + else: + # p = int(np.rint(np.sum(np.multiply(pos,temp))/ np.sum(temp))) + # conf1[p] = 1 + p = int(np.mean(pos)) + conf1[p] = np.mean(temp) + else: + i += 1 + conf1[conf1>1] = 1 + + # Process peaks with low confidence but located by multiple features in the same neighborhood + # conf2 = copy(conf1) + conf2 = np.zeros_like(conf1) + weight1, weight2, weight3, weight4 = copy(w1), copy(w2), copy(w3), copy(w4) + weight1[weight1>individual_thresh] = 0.0 + weight2[weight2>individual_thresh] = 0.0 + weight3[weight3>individual_thresh] = 0.0 + weight4[weight4>individual_thresh] = 0.0 + combined = weight1 + weight2 + weight3 + weight4 + combined = (combined - np.min(combined)) / (np.max(combined) - np.min(combined)) + if combined[0]>0.3: combined[0] = 0.8 + + i = 0 + while i < len_arr: + if combined[i] > 0: + temp = [combined[i]] + pos = [i] + i += 1 + + # start searching neighborhood for local maximum + while (i+combined_tol < len_arr and np.max(combined[i:i+combined_tol]) > 0): + temp += [combined[i+delta] for delta in xrange(combined_tol) if combined[i+delta]>0] + pos += [i+delta for delta in xrange(combined_tol) if combined[i+delta]>0] + i += combined_tol + + if len(temp) == 1: + conf2[pos[0]] += temp[0] + else: + p = int(np.rint(np.sum(np.multiply(pos,temp))/ np.sum(temp))) + conf2[p] += np.sum(np.multiply(pos,temp)) / p + else: + i += 1 + + conf2[conf2<combined_thresh] = 0 + conf2[conf2>1] = 1 + + combined_conf = conf1 + conf2 + combined_conf[combined_conf>1] = 1 + conf = np.zeros_like(combined_conf) + # Combine selections from the obove two steps + i=0 + while i < len_arr: + if combined_conf[i] > 0.3: + temp = [combined_conf[i]] + pos = [i] + i += 1 + + # start searching neighborhood for local maximum + while (i+individual_tol < len_arr and np.max(combined_conf[i:i+individual_tol]) > 0.5): + temp += [combined_conf[i+delta] for delta in xrange(individual_tol) if combined_conf[i+delta]>0.5] + pos += [i+delta for delta in xrange(individual_tol) if combined_conf[i+delta]>0.5] + i += individual_tol + + if len(temp) == 1: + conf[pos[0]] = combined_conf[pos[0]] + elif (np.max(temp)== 1 and np.sort(temp)[-2] < combined_thresh): + p = pos[np.argmax(temp)] + conf[p] = np.max(temp) + else: + p = int(np.rint(np.sum(np.multiply(pos,temp))/ np.sum(temp))) + conf[p] = np.mean(np.multiply(pos,temp)) / p + else: + i += 1 + + peaks = list(np.where(conf>combined_thresh)[0]) + return peaks, conf1, conf2, conf + + def getPeakWeights(self, sdf, peak_list): + '''Compute peak confidence. + Return: array with confidence values at peak positions and zeros otherwise''' + mask = np.zeros_like(sdf) + mask[peak_list] = 1.0 + return sdf * mask + + def selectPeak(self, peak_candidates, featureset, winlen=5): + dist_list = [] + feature_types = len(featureset) + gt_dist, hm_dist, tb_dist, tp_dist = [], [], [], [] + + for idx, x in enumerate(peak_candidates): + prev_features = tuple([featureset[i][x-winlen:x, :] for i in xrange(feature_types)]) + post_features = tuple([featureset[i][x:x+winlen, :] for i in xrange(feature_types)]) + gt_dist.append(np.sum(pairwise_distances(prev_features[0], post_features[0]))) + hm_dist.append(np.sum(pairwise_distances(prev_features[1], post_features[1]))) + tb_dist.append(np.sum(pairwise_distances(prev_features[2], post_features[2]))) + tp_dist.append(np.sum(pairwise_distances(prev_features[3], post_features[3]))) + + return peak_candidates[np.argmax(gt_dist)], peak_candidates[np.argmax(hm_dist)], peak_candidates[np.argmax(tb_dist)], peak_candidates[np.argmax(tp_dist)] + + def getPeakFeatures(self, peak_candidates, featureset, winlen): + ''' + args: winlen: length of feature window before and after an investigated peak + featureset: A list of audio features for measuring the dissimilarity. + + return: peak_features + A list of tuples of features for windows before and after each peak. + ''' + prev_features = [] + post_features = [] + feature_types = len(featureset) + + # print peak_candidates[-1], winlen, featureset[0].shape + # if peak_candidates[-1] + winlen > featureset[0].shape[0]: + # peak_candidates = peak_candidates[:-1] + # for x in peak_candidates: + # prev_features.append(tuple([featureset[i][x-winlen:x, :] for i in xrange(feature_types)])) + # post_features.append(tuple([featureset[i][x:x+winlen, :] for i in xrange(feature_types)])) + prev_features.append(tuple([featureset[i][:peak_candidates[0], :] for i in xrange(feature_types)])) + post_features.append(tuple([featureset[i][peak_candidates[0]:peak_candidates[1], :] for i in xrange(feature_types)])) + for idx in xrange(1, len(peak_candidates)-1): + prev_features.append(tuple([featureset[i][peak_candidates[idx-1]:peak_candidates[idx], :] for i in xrange(feature_types)])) + post_features.append(tuple([featureset[i][peak_candidates[idx]:peak_candidates[idx+1], :] for i in xrange(feature_types)])) + prev_features.append(tuple([featureset[i][peak_candidates[-2]:peak_candidates[-1], :] for i in xrange(feature_types)])) + post_features.append(tuple([featureset[i][peak_candidates[-1]:, :] for i in xrange(feature_types)])) + return prev_features, post_features + + def segStats(self, feature_array, boundary_list): + '''Return some basic stats of features associated with two boundaries.''' + feature_stats = [] + for i in xrange(1, len(boundary_list)): + feature_stats.append(np.std(feature_array[boundary_list[i-1]:boundary_list[i]], axis=0)) + return feature_stats + + def segmentDev(self, prev_features, post_features, metric='kl'): + '''Deviations are measured for each given feature type. + peak_candidates: peaks from the 1st round detection + peak_features: Features for measuring the dissimilarity for parts before and after each peak. + dtype: tuple. + ''' + dev_list = [] + n_peaks = len(prev_features) + n_features = len(prev_features[0]) + # print 'n_peaks, n_features', n_peaks, n_features + if metric == 'kl': + for x in xrange(n_peaks): + f1, f2 = prev_features[x], post_features[x] + dev_list.append(tuple([GmmDistance(f1[i], components=1).skl_distance_full(GmmDistance(f2[i], components=1)) for i in xrange(n_features)])) + elif metric == 'euclidean': + for x in xrange(n_peaks): + f1, f2 = prev_features[x], post_features[x] + dev_list.append(tuple([pairwise_distances(f1[i], f2[i]) for i in xrange(n_features)])) + return dev_list + +def main(): + pass + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/SegUtil.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,395 @@ +""" +Useful functions that are quite common for music segmentation +""" +''' +Modified and more funcs added. +Mi Tian, April 2015. +''' + +__author__ = "Oriol Nieto" +__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" +__license__ = "GPL" +__version__ = "1.0" +__email__ = "oriol@nyu.edu" + +import copy +import numpy as np +import os +import scipy +from scipy.spatial import distance +from scipy.ndimage import filters, zoom +from scipy import signal +import pylab as plt +from scipy.spatial.distance import squareform, pdist + + +def lognormalize_chroma(C): + """Log-normalizes chroma such that each vector is between -80 to 0.""" + C += np.abs(C.min()) + 0.1 + C = C/C.max(axis=0) + C = 80 * np.log10(C) # Normalize from -80 to 0 + return C + + +def normalize_matrix(X): + """Nomalizes a matrix such that it's maximum value is 1 and minimum is 0.""" + X += np.abs(X.min()) + X /= X.max() + return X + + +def ensure_dir(directory): + """Makes sure that the given directory exists.""" + if not os.path.exists(directory): + os.makedirs(directory) + + +def median_filter(X, M=8): + """Median filter along the first axis of the feature matrix X.""" + for i in xrange(X.shape[1]): + X[:, i] = filters.median_filter(X[:, i], size=M) + return X + + +def compute_gaussian_krnl(M): + """Creates a gaussian kernel following Foote's paper.""" + g = signal.gaussian(M, M / 3., sym=True) + G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) + G[M / 2:, :M / 2] = -G[M / 2:, :M / 2] + G[:M / 2, M / 2:] = -G[:M / 2, M / 2:] + return G + + +def compute_ssm(X, metric="seuclidean"): + """Computes the self-similarity matrix of X.""" + D = distance.pdist(X, metric=metric) + D = distance.squareform(D) + D /= D.max() + return 1 - D + + +def compute_nc(X, G): + """Computes the novelty curve from the self-similarity matrix X and + the gaussian kernel G.""" + N = X.shape[0] + M = G.shape[0] + nc = np.zeros(N) + + for i in xrange(M / 2, N - M / 2 + 1): + nc[i] = np.sum(X[i - M / 2:i + M / 2, i - M / 2:i + M / 2] * G) + + # Normalize + nc += nc.min() + nc /= nc.max() + return nc + + +def resample_mx(X, incolpos, outcolpos): + """ + Method from Librosa + Y = resample_mx(X, incolpos, outcolpos) + X is taken as a set of columns, each starting at 'time' + colpos, and continuing until the start of the next column. + Y is a similar matrix, with time boundaries defined by + outcolpos. Each column of Y is a duration-weighted average of + the overlapping columns of X. + 2010-04-14 Dan Ellis dpwe@ee.columbia.edu based on samplemx/beatavg + -> python: TBM, 2011-11-05, TESTED + """ + noutcols = len(outcolpos) + Y = np.zeros((X.shape[0], noutcols)) + # assign 'end times' to final columns + if outcolpos.max() > incolpos.max(): + incolpos = np.concatenate([incolpos,[outcolpos.max()]]) + X = np.concatenate([X, X[:,-1].reshape(X.shape[0],1)], axis=1) + outcolpos = np.concatenate([outcolpos, [outcolpos[-1]]]) + # durations (default weights) of input columns) + incoldurs = np.concatenate([np.diff(incolpos), [1]]) + + for c in range(noutcols): + firstincol = np.where(incolpos <= outcolpos[c])[0][-1] + firstincolnext = np.where(incolpos < outcolpos[c+1])[0][-1] + lastincol = max(firstincol,firstincolnext) + # default weights + wts = copy.deepcopy(incoldurs[firstincol:lastincol+1]) + # now fix up by partial overlap at ends + if len(wts) > 1: + wts[0] = wts[0] - (outcolpos[c] - incolpos[firstincol]) + wts[-1] = wts[-1] - (incolpos[lastincol+1] - outcolpos[c+1]) + wts = wts * 1. /sum(wts) + Y[:,c] = np.dot(X[:,firstincol:lastincol+1], wts) + # done + return Y + + +def chroma_to_tonnetz(C): + """Transforms chromagram to Tonnetz (Harte, Sandler, 2006).""" + N = C.shape[0] + T = np.zeros((N, 6)) + + r1 = 1 # Fifths + r2 = 1 # Minor + r3 = 0.5 # Major + + # Generate Transformation matrix + phi = np.zeros((6, 12)) + for i in range(6): + for j in range(12): + if i % 2 == 0: + fun = np.sin + else: + fun = np.cos + + if i < 2: + phi[i, j] = r1 * fun(j * 7 * np.pi / 6.) + elif i >= 2 and i < 4: + phi[i, j] = r2 * fun(j * 3 * np.pi / 2.) + else: + phi[i, j] = r3 * fun(j * 2 * np.pi / 3.) + + # Do the transform to tonnetz + for i in range(N): + for d in range(6): + denom = float(C[i, :].sum()) + if denom == 0: + T[i, d] = 0 + else: + T[i, d] = 1 / denom * (phi[d, :] * C[i, :]).sum() + + return T + + +def most_frequent(x): + """Returns the most frequent value in x.""" + return np.argmax(np.bincount(x)) + + +def pick_peaks(nc, L=16, plot=False): + """Obtain peaks from a novelty curve using an adaptive threshold.""" + offset = nc.mean() / 3 + th = filters.median_filter(nc, size=L) + offset + peaks = [] + for i in xrange(1, nc.shape[0] - 1): + # is it a peak? + if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: + # is it above the threshold? + if nc[i] > th[i]: + peaks.append(i) + if plot: + plt.plot(nc) + plt.plot(th) + for peak in peaks: + plt.axvline(peak, color="m") + plt.show() + return peaks + + +def recurrence_matrix(data, k=None, width=1, metric='sqeuclidean', sym=False): + ''' + Note: Copied from librosa + + Compute the binary recurrence matrix from a time-series. + + ``rec[i,j] == True`` <=> (``data[:,i]``, ``data[:,j]``) are + k-nearest-neighbors and ``|i-j| >= width`` + + :usage: + >>> mfcc = librosa.feature.mfcc(y=y, sr=sr) + >>> R = librosa.segment.recurrence_matrix(mfcc) + + >>> # Or fix the number of nearest neighbors to 5 + >>> R = librosa.segment.recurrence_matrix(mfcc, k=5) + + >>> # Suppress neighbors within +- 7 samples + >>> R = librosa.segment.recurrence_matrix(mfcc, width=7) + + >>> # Use cosine similarity instead of Euclidean distance + >>> R = librosa.segment.recurrence_matrix(mfcc, metric='cosine') + + >>> # Require mutual nearest neighbors + >>> R = librosa.segment.recurrence_matrix(mfcc, sym=True) + + :parameters: + - data : np.ndarray + feature matrix (d-by-t) + + - k : int > 0 or None + the number of nearest-neighbors for each sample + + Default: ``k = 2 * ceil(sqrt(t - 2 * width + 1))``, + or ``k = 2`` if ``t <= 2 * width + 1`` + + - width : int > 0 + only link neighbors ``(data[:, i], data[:, j])`` + if ``|i-j| >= width`` + + - metric : str + Distance metric to use for nearest-neighbor calculation. + + See ``scipy.spatial.distance.cdist()`` for details. + + - sym : bool + set ``sym=True`` to only link mutual nearest-neighbors + + :returns: + - rec : np.ndarray, shape=(t,t), dtype=bool + Binary recurrence matrix + ''' + + t = data.shape[1] + + if k is None: + if t > 2 * width + 1: + k = 2 * np.ceil(np.sqrt(t - 2 * width + 1)) + else: + k = 2 + + k = int(k) + + def _band_infinite(): + '''Suppress the diagonal+- of a distance matrix''' + + band = np.empty((t, t)) + band.fill(np.inf) + band[np.triu_indices_from(band, width)] = 0 + band[np.tril_indices_from(band, -width)] = 0 + + return band + + # Build the distance matrix + D = scipy.spatial.distance.cdist(data.T, data.T, metric=metric) + + # Max out the diagonal band + D = D + _band_infinite() + + # build the recurrence plot + rec = np.zeros((t, t), dtype=bool) + + # get the k nearest neighbors for each point + for i in range(t): + for j in np.argsort(D[i])[:k]: + rec[i, j] = True + + # symmetrize + if sym: + rec = rec * rec.T + + return rec + + +def getMean(feature, winlen, stepsize): + means = [] + steps = int((feature.shape[0] - winlen + stepsize) / stepsize) + for i in xrange(steps): + means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) + return np.array(means) + + +def getStd(feature, winlen, stepsize): + std = [] + steps = int((feature.shape[0] - winlen + stepsize) / stepsize) + for i in xrange(steps): + std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) + return np.array(std) + + +def getDelta(feature): + delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0))) + return delta_feature + + +def getSSM(feature_array, metric='cosine', norm='simple', reduce=False): + '''Compute SSM given input feature array. + args: norm: ['simple', 'remove_noise'] + ''' + dm = pairwise_distances(feature_array, metric=metric) + dm = np.nan_to_num(dm) + if norm == 'simple': + ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm)) + if reduce: + ssm = reduceSSM(ssm) + return ssm + + +def reduceSSM(ssm, maxfilter_size = 2, remove_size=50): + reduced_ssm = ssm + reduced_ssm[reduced_ssm<0.75] = 0 + # # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size) + # # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size) + local_otsu = otsu(reduced_ssm, disk(5)) + local_otsu = (local_otsu.astype(float) - np.min(local_otsu)) / (np.max(local_otsu) - np.min(local_otsu)) + reduced_ssm = reduced_ssm - 0.6*local_otsu + return reduced_ssm + + +def upSample(feature_array, step): + '''Resample downsized tempogram features, tempoWindo should be in accordance with input features''' + # print feature_array.shape + sampleRate = 44100 + stepSize = 1024.0 + # step = np.ceil(sampleRate/stepSize/5.0) + feature_array = zoom(feature_array, (step,1)) + # print 'resampled', feature_array.shape + return feature_array + + +def normaliseFeature(feature_array): + '''Normalise features column wisely.''' + feature_array[np.isnan(feature_array)] = 0.0 + feature_array[np.isinf(feature_array)] = 0.0 + feature_array = (feature_array - np.min(feature_array, axis=-1)[:,np.newaxis]) / (np.max(feature_array, axis=-1) - np.min(feature_array, axis=-1))[:,np.newaxis] + feature_array[np.isnan(feature_array)] = 0.0 + feature_array[np.isinf(feature_array)] = 0.0 + + return feature_array + + +def verifyPeaks(peak_canditates, dev_list): + '''Verify peaks from the 1st round detection by applying adaptive thresholding to the deviation list.''' + + final_peaks = copy(peak_canditates) + dev_list = np.array([np.mean(x) for x in dev_list]) # get average of devs of different features + med_dev = median_filter(dev_list, size=5) + # print dev_list, np.min(dev_list), np.median(dev_list), np.mean(dev_list), np.std(dev_list) + dev = dev_list - np.percentile(dev_list, 50) + # print dev + for i, x in enumerate(dev): + if x < 0: + final_peaks.remove(peak_canditates[i]) + return final_peaks + + +def envelopeFollower(xc, AT, RT, prevG, scaler=1): + '''Follows the amplitude envelope of input signal xc.''' + + g = np.zeros_like(xc) + length = len(xc) + + for i in xrange(length): + xSquared = xc[i] ** 2 + # if input is less than the previous output use attack, otherwise use the release + if xSquared < prevG: + coeff = AT + else: + coeff = RT + g[i] = (xSquared - prevG)*coeff + prevG + g[i] *= scaler + prevG = g[i] + + return g + + +def getEnvPeaks(sig, sig_env, size=1): + '''Finds peaks in the signal envelope. + args: sig (1d array): orignal input signal + sig_env (list): position of the signal envelope. + size: ranges to locate local maxima in the envelope as peaks. + ''' + envelope = sig[sig_env] + peaks = [] + if len(envelope) > 1 and envelope[0] > envelope[1]: + peaks.append(sig_env[0]) + for i in xrange(size, len(envelope)-size-1): + if envelope[i] > np.max(envelope[i-size:i]) and envelope[i] > np.max(envelope[i+1:i+size+1]): + peaks.append(sig_env[i]) + return peaks \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/__init__.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,3 @@ +#!/usr/bin/env python +# encoding: utf-8 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/acorr.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +acorr.py + +This function provides similar fuctionality to Matlab's xcorr() but for computing autocorrelation only. + +It gives equivalent results given the following Matlab test script: + +%==== MATLAB ==== +a=[1,2,3,4,5,1,2,3,4,5,2,3,4,5,6,1,2,3,4,5]; +[xcr,lag] = xcorr(a,a,20,'unbiased'); +disp(xcr); +plot(lag,xcr); +%==== MATLAB ==== + +Created by George Fazekas on 2014-02-26. +Copyright (c) 2014 . All rights reserved. +""" + +import sys,os +import numpy as np +from scipy.fftpack import fft, ifft + +class ACORR(object): + + def nextpow2(self, n): + '''Return the next power of 2 such as 2^p >= n''' + if np.any(n < 0): + raise ValueError("n should be > 0") + if np.isscalar(n): + f, p = np.frexp(n) + if f == 0.5: + return p-1 + elif np.isfinite(f): + return p + else: + return f + else: + f, p = np.frexp(n) + res = f + bet = np.isfinite(f) + exa = (f == 0.5) + res[bet] = p[bet] + res[exa] = p[exa] - 1 + return res + + def acorr_ifft_fft(self, x, nfft, lag, onesided=False, scale='none'): + '''Compute the actual autocorrelation via IFFT/FFT ''' + ra = np.real(ifft(np.abs(fft(x, n=nfft) ** 2))) + if onesided: + b = ra[..., :lag] + else: + b = np.concatenate([ra[..., nfft-lag+1:nfft], ra[..., :lag]], axis=-1) + #print b, ra[..., 0][..., np.newaxis], b / ra[..., 0][..., np.newaxis] + if scale == 'coeff': + return b / ra[..., 0][..., np.newaxis] + elif scale == 'unbiased': + '''scale = len(x)-abs(lags)''' + lags = np.array(range(-lag+1,lag)) + # print lags,len(x) + scale = len(x) - abs(lags) + scale[scale<=0] = 1.0 + # print scale + return b/scale + else: + return b + + def acorr(self, x, axis=-1, onesided=False, lag=None, scale='none'): + '''Compute autocorrelation of x along given axis. + x : array-like + signal to correlate. + axis : int + axis along which autocorrelation is computed. + onesided: bool, optional + if True, only returns the right side of the autocorrelation. + scale: {'none', 'coeff'} scaling mode. + If 'coeff', the correlation is normalised such as the 0-lag is equal to 1. + if 'unbiased' the correlation is normalised using len(x) - abs(lags) ''' + + if not scale in ['none', 'coeff','unbiased']: + raise ValueError("scale mode %s not understood" % scale) + if not lag : + lag = x.shape[axis] + lag += 1 + nfft = 2 ** self.nextpow2(2 * lag - 1) + lags = np.array(range(-lag+1, lag)) + if axis != -1: + x = np.swapaxes(x, -1, axis) + a = self.acorr_ifft_fft(x, nfft, lag, onesided, scale) + if axis != -1: + a = np.swapaxes(a, -1, axis) + return a,lags +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/animate.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +animate.py + +Created by mi tian on 2015-01-21. +Copyright (c) 2015 __MyCompanyName__. All rights reserved. +""" + +''' +Animate feature from stored data. +''' +import sys, os +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.animation as animation + +data = np.genfromtxt('/Users/mitian/Documents/workspace/data/ph.txt',delimiter=',') + +fs, stepSize, blockSize = 44100, 2048, 1024 +# feature_time = 6 +# feature_window = int(fs / stepSize * feature_time) +# feature_step = int(0.5 * fs / stepSize * feature_time) +# nWindow = int((len(data) - feature_window) / feature_step + 1) +feature_step = 48 +nWindow = int(len(data) / feature_step) + +def animate(i): + plt.imshow(data[i*feature_step:(i+1)*feature_step, :].T, cmap='hot', origin='lower') + plt.title('%d-%d(s)' %(3*i, 3*i+6)) + +def main(): + + print nWindow + + fig, ax = plt.subplots() + ani = animation.FuncAnimation(fig, animate, np.arange(nWindow), interval= 3000) + plt.show() + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/filters.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,241 @@ +# Copyright 2014 Jason Heeris, jason.heeris@gmail.com +# +# This file is part of the gammatone toolkit, and is licensed under the 3-clause +# BSD license: https://github.com/detly/gammatone/blob/master/COPYING +""" +This module contains functions for constructing sets of equivalent rectangular +bandwidth gammatone filters. +""" +from __future__ import division +from collections import namedtuple + +import numpy as np +import scipy as sp +from scipy import signal as sgn + +DEFAULT_FILTER_NUM = 100 +DEFAULT_LOW_FREQ = 100 +DEFAULT_HIGH_FREQ = 44100/4 + + +def erb_point(low_freq, high_freq, fraction): + """ + Calculates a single point on an ERB scale between ``low_freq`` and + ``high_freq``, determined by ``fraction``. When ``fraction`` is ``1``, + ``low_freq`` will be returned. When ``fraction`` is ``0``, ``high_freq`` + will be returned. + + ``fraction`` can actually be outside the range ``[0, 1]``, which in general + isn't very meaningful, but might be useful when ``fraction`` is rounded a + little above or below ``[0, 1]`` (eg. for plot axis labels). + """ + # Change the following three parameters if you wish to use a different ERB + # scale. Must change in MakeERBCoeffs too. + # TODO: Factor these parameters out + ear_q = 9.26449 # Glasberg and Moore Parameters + min_bw = 24.7 + order = 1 + + # All of the following expressions are derived in Apple TR #35, "An + # Efficient Implementation of the Patterson-Holdsworth Cochlear Filter + # Bank." See pages 33-34. + erb_point = ( + -ear_q*min_bw + + np.exp( + fraction * ( + -np.log(high_freq + ear_q*min_bw) + + np.log(low_freq + ear_q*min_bw) + ) + ) * + (high_freq + ear_q*min_bw) + ) + + return erb_point + + +def erb_space( + low_freq=DEFAULT_LOW_FREQ, + high_freq=DEFAULT_HIGH_FREQ, + num=DEFAULT_FILTER_NUM): + """ + This function computes an array of ``num`` frequencies uniformly spaced + between ``high_freq`` and ``low_freq`` on an ERB scale. + + For a definition of ERB, see Moore, B. C. J., and Glasberg, B. R. (1983). + "Suggested formulae for calculating auditory-filter bandwidths and + excitation patterns," J. Acoust. Soc. Am. 74, 750-753. + """ + return erb_point( + low_freq, + high_freq, + np.arange(1, num+1)/num + ) + + +def centre_freqs(high_freq, num_freqs, cutoff): + """ + Calculates an array of centre frequencies (for :func:`make_erb_filters`) + from a sampling frequency, lower cutoff frequency and the desired number of + filters. + + :param fs: sampling rate + :param num_freqs: number of centre frequencies to calculate + :type num_freqs: int + :param cutoff: lower cutoff frequency + :return: same as :func:`erb_space` + """ + # print 'center freq', erb_space(cutoff, fs/2, num_freqs) + return erb_space(cutoff, high_freq, num_freqs) + + +def make_erb_filters(fs, centre_freqs, width=1.0): + """ + This function computes the filter coefficients for a bank of + Gammatone filters. These filters were defined by Patterson and Holdworth for + simulating the cochlea. + + The result is returned as a :class:`ERBCoeffArray`. Each row of the + filter arrays contains the coefficients for four second order filters. The + transfer function for these four filters share the same denominator (poles) + but have different numerators (zeros). All of these coefficients are + assembled into one vector that the ERBFilterBank can take apart to implement + the filter. + + The filter bank contains "numChannels" channels that extend from + half the sampling rate (fs) to "lowFreq". Alternatively, if the numChannels + input argument is a vector, then the values of this vector are taken to be + the center frequency of each desired filter. (The lowFreq argument is + ignored in this case.) + + Note this implementation fixes a problem in the original code by + computing four separate second order filters. This avoids a big problem with + round off errors in cases of very small cfs (100Hz) and large sample rates + (44kHz). The problem is caused by roundoff error when a number of poles are + combined, all very close to the unit circle. Small errors in the eigth order + coefficient, are multiplied when the eigth root is taken to give the pole + location. These small errors lead to poles outside the unit circle and + instability. Thanks to Julius Smith for leading me to the proper + explanation. + + Execute the following code to evaluate the frequency response of a 10 + channel filterbank:: + + fcoefs = MakeERBFilters(16000,10,100); + y = ERBFilterBank([1 zeros(1,511)], fcoefs); + resp = 20*log10(abs(fft(y'))); + freqScale = (0:511)/512*16000; + semilogx(freqScale(1:255),resp(1:255,:)); + axis([100 16000 -60 0]) + xlabel('Frequency (Hz)'); ylabel('Filter Response (dB)'); + + | Rewritten by Malcolm Slaney@Interval. June 11, 1998. + | (c) 1998 Interval Research Corporation + | + | (c) 2012 Jason Heeris (Python implementation) + """ + T = 1/fs + # Change the followFreqing three parameters if you wish to use a different + # ERB scale. Must change in ERBSpace too. + # TODO: factor these out + ear_q = 9.26449 # Glasberg and Moore Parameters + min_bw = 24.7 + order = 1 + + erb = width*((centre_freqs/ear_q)**order + min_bw**order)**(1/order) + B = 1.019*2*np.pi*erb + + arg = 2*centre_freqs*np.pi*T + vec = np.exp(2j*arg) + + A0 = T + A2 = 0 + B0 = 1 + B1 = -2*np.cos(arg)/np.exp(B*T) + B2 = np.exp(-2*B*T) + + rt_pos = np.sqrt(3 + 2**1.5) + rt_neg = np.sqrt(3 - 2**1.5) + + common = -T * np.exp(-(B * T)) + + # TODO: This could be simplified to a matrix calculation involving the + # constant first term and the alternating rt_pos/rt_neg and +/-1 second + # terms + k11 = np.cos(arg) + rt_pos * np.sin(arg) + k12 = np.cos(arg) - rt_pos * np.sin(arg) + k13 = np.cos(arg) + rt_neg * np.sin(arg) + k14 = np.cos(arg) - rt_neg * np.sin(arg) + + A11 = common * k11 + A12 = common * k12 + A13 = common * k13 + A14 = common * k14 + + gain_arg = np.exp(1j * arg - B * T) + + gain = np.abs( + (vec - gain_arg * k11) + * (vec - gain_arg * k12) + * (vec - gain_arg * k13) + * (vec - gain_arg * k14) + * ( T * np.exp(B*T) + / (-1 / np.exp(B*T) + 1 + vec * (1 - np.exp(B*T))) + )**4 + ) + + allfilts = np.ones_like(centre_freqs) + + fcoefs = np.column_stack([ + A0*allfilts, A11, A12, A13, A14, A2*allfilts, + B0*allfilts, B1, B2, + gain + ]) + + return fcoefs + + +def erb_filterbank(wave, coefs): + """ + :param wave: input data (one dimensional sequence) + :param coefs: gammatone filter coefficients + + Process an input waveform with a gammatone filter bank. This function takes + a single sound vector, and returns an array of filter outputs, one channel + per row. + + The fcoefs parameter, which completely specifies the Gammatone filterbank, + should be designed with the :func:`make_erb_filters` function. + + | Malcolm Slaney @ Interval, June 11, 1998. + | (c) 1998 Interval Research Corporation + | Thanks to Alain de Cheveigne' for his suggestions and improvements. + | + | (c) 2013 Jason Heeris (Python implementation) + """ + output = np.zeros((coefs[:,9].shape[0], wave.shape[0])) + + gain = coefs[:, 9] + # A0, A11, A2 + As1 = coefs[:, (0, 1, 5)] + # A0, A12, A2 + As2 = coefs[:, (0, 2, 5)] + # A0, A13, A2 + As3 = coefs[:, (0, 3, 5)] + # A0, A14, A2 + As4 = coefs[:, (0, 4, 5)] + # B0, B1, B2 + Bs = coefs[:, 6:9] + + # Loop over channels + for idx in range(0, coefs.shape[0]): + # These seem to be reversed (in the sense of A/B order), but that's what + # the original code did... + # Replacing these with polynomial multiplications reduces both accuracy + # and speed. + y1 = sgn.lfilter(As1[idx], Bs[idx], wave) + y2 = sgn.lfilter(As2[idx], Bs[idx], y1) + y3 = sgn.lfilter(As3[idx], Bs[idx], y2) + y4 = sgn.lfilter(As4[idx], Bs[idx], y3) + output[idx, :] = y4/gain[idx] + + return output
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/gmmdist.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +gmmdist.py + +Created by George Fazekas on 2014-07-03. +Copyright (c) 2014 . All rights reserved. +""" + +import sys,os + +from numpy import sum,isnan,isinf,vstack +from numpy.random import rand +import numpy as np +from numpy import log, power, pi, exp, transpose, zeros, log, ones, dot +from sklearn.mixture import GMM +from sklearn.metrics.pairwise import pairwise_distances +from scipy.linalg import * +from pylab import plt +from scipy.stats import norm +#from gmmplot import plot_gmm +from scipy.io import savemat,loadmat +from numpy import trace +from numpy.linalg import det, inv, eigvals + +FLOAT_MAX = np.finfo(np.float64).max + +def is_pos_def(x): + '''Check if matrix is positive definite.''' + return np.all(eigvals(x) > 0) + +def skl_models(g1,g2): + '''Wrapper function with error checking and adaptation to the GmmDistance/GaussianComponent classes. + This function compares two Gaussian mixture models with and equal number of components and full covariance matrices. + Covariance matrices must be positive definite. + ''' + m1,m2 = g1.means,g2.means + v1,v2 = g1.covars.swapaxes(0,2), g2.covars.swapaxes(0,2) + w1,w2 = g1.weights[:,np.newaxis],g2.weights[:,np.newaxis] + assert m1.shape[1] > 1, "The minimum number of features is 2." + assert w1.shape == w2.shape, "Models must have the same number of components" + # print 'v1, v2', v1.shape, v2.shape + # assert (is_pos_def(v1) and is_pos_def(v2)) == True, "Covariance matrices must be positive definite." + d = skl_gmm(m1,m2,v1,v2,w1,w2) + if isnan(d): #or isinf(d): + return FLOAT_MAX + return d + +# def kldiv_full(m0,m1,s0,s1): +# '''Naive (unoptimised) implementation of the KL divergence between two single Gaussians with fully defined covariances (s).''' +# return 0.5*(np.trace(s0/s1)+np.trace(s1/s0)+ np.dot( np.dot((m0-m1).T, np.linalg.inv(s0+s1)), (m0-m1)))-m0.shape[0] +# +# def skl_full(p0,p1): +# '''Symmetrised KL divergence computed from 2 KL divergences using mean( KL(p||q), KL(q||p) )''' +# d = (kldiv_full(p0.means,p0.covars,p1.means,p1.covars) + kldiv_full(p1.means,p1.covars,p0.means,p0.covars)) * 0.5 +# d = sum(d) +# if isnan(d) : +# return np.finfo(np.float64).max +# return d + +def kldiv_full(m1,m2,s1,s2): + m1,m2 = m1[:,None],m2[:,None] + logdet1, logdet2 = log(det(s1)), log(det(s2)) + inv1, inv2 = inv(s1), inv(s2) + m = m1-m2 + d = m.shape[0] # number of dimensions + return 0.5 * ((logdet1-logdet2) + trace(dot(inv1,s2)) + dot(dot(m.T, inv1), m) - d)[0][0] + +def _skl_full(m1,m2,s1,s2): + m1,m2 = m1[:,None],m2[:,None] + logdet1, logdet2 = log(det(s1)), log(det(s2)) + inv1, inv2 = inv(s1), inv(s2) + m12 = m1-m2 + m21 = m2-m1 + d = m12.shape[0] # number of dimensions + kl12 = 0.5 * ((logdet1-logdet2) + trace(dot(inv1,s2)) + dot(dot(m12.T, inv1), m12) - d)[0][0] + kl21 = 0.5 * ((logdet2-logdet1) + trace(dot(inv2,s1)) + dot(dot(m21.T, inv2), m21) - d)[0][0] + return 0.5 * (kl12+kl21) + +def skl_full(p1,p2): + m1,m2 = p1.means,p2.means + s1,s2 = p1.covars,p2.covars + return _skl_full(m1,m2,s1,s2) + + + +def skl_gmm(m1,m2,v1,v2,w1,w2): + '''Take the mean of KL(g1mm||gmm2) & KL(gmm2||gmm1) to symmetrise the divergence.''' + return (abs(kldiv_gmm(m1,m2,v1,v2,w1,w2)) + abs(kldiv_gmm(m2,m1,v2,v1,w2,w1))) * 0.5 + +def kldiv_gmm(m1,m2,v1,v2,w1,w2): + '''Low level implementation of variational approximation of KL divergence between Gaussian Mixture Models. + See first: J. R. Hershey and P. A. Olsen. "Approximating the Kullback-Leibler Divergence Between Gaussian Mixture Models." + IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Volume 4, pp. 317320, April, 2007. + Further theory and refinement: J. L. Durrieu, J. P. Thiran, F. Kelly. "Lower and Upper Bounds for Approximation + of the Kullback-Leibler Divergence Between Gaussian Mixture Models", ICASSP, 2012. + This implementation is by George Fazekas, Centre for Digital Music, QMUL, London, UK. + + Inputs: + m(x) : mean vector of gmm(x) + v(x) : covariance matrices of gmm(x) + w(x) : weight vector of gmm(x) + + Ouptut: + kl_full_12 : Kullback-Leibler divergence of the PDFs approximated by two Gaussian Mixture Models. + + This implementation is using a variational approximation rather than the conventional (and expensive) + Monte Carlo simulation based approach. See cited paper for details. The divergence is not symmetrised + and may be negative (unlike the closed form) or inf. In case the output is complex, the coavriance matrices + do not fulfill required criteria, i.e. somehow badly formed, sigular, not positive definite etc... + ''' + # TODO: consider dieling better with inf/-inf outcomes in the final distance computation + # - note: the max of the rows of kl12 are not always zero like that of kl11 + # - eliminate the need for swapaxes of the covariances + + n = m1.shape[0] # number of components + d = m1.shape[1] # number of dimensions + + ixm = ones((n,1),dtype=int).T # selector of mean matrix components + ixd = range(0,d*d,d+1) # indices of diagonal elements of DxD matrix + t1 = v1.swapaxes(1,2).reshape(d,n*d) # concatenate gmm1 covariance matrices + loopn = xrange(n) + + # step 1) precompute log(determinant()) of covariance matrices of gmm1 + logdet1 = zeros((n,1)) + for i in loopn : + logdet1[i] = log(det(v1[:,:,i])) + + # step 2) compute reference kldiv between individual components of gmm1 + kl11 = zeros((n,n)) + for i in loopn : + inv1 = inv(v1[:,:,i]) + mm1 = m1 - m1[i*ixm,:][0] + b1 = dot(inv1,t1).swapaxes(0,1).reshape(n,power(d,2)).T + kl11[:,i] = 0.5 * ( (logdet1[i]-d-logdet1)[:,0] + sum(b1[ixd,:],0).T + sum(dot(mm1,inv1) * mm1,1)) + # print kl11 + + # step 3) precompute log(determinant()) of covariance matrices of gmm2 + logdet2 = zeros((n,1)) + for i in loopn : + logdet2[i] = log(det(v2[:,:,i])) + + # step 4) compute pair-wise kldiv between components of gmm1 and gmm2 + kl12 = zeros((n,n)) + for i in loopn : + inv2 = inv(v2[:,:,i]) + m12 = m1 - m2[i*ixm,:][0] + b2 = dot(inv2,t1).swapaxes(0,1).reshape(n,power(d,2)).T + kl12[:,i] = 0.5 * ( (logdet2[i]-d-logdet1)[:,0] + sum(b2[ixd,:],0).T + sum(dot(m12,inv2) * m12,1)) + # print kl12 + + # step 5) compute the final variational distance between gmm1 and gmm2 + kl_full_12 = dot(w1.T, (log(sum(exp(-kl11)*w1,1))) - log(sum(exp(-kl12)*w2,1)))[0] + # print "KL divergence between gmm1 || gmm2:", kl_full_12 + return kl_full_12 + + +# models = loadmat("gmms.mat") +# # print models.keys() +# +# X = models['X'] +# Y = models['Y'] +# +# print "Data shape:" +# print X.shape +# print Y.shape +# +# # # plot the fitted model +# # gmm1 = GMM(n_components = 3, covariance_type='full') +# # model1 = gmm1.fit(X) +# # plot_gmm(gmm1,X) +# # +# # # plot the fitted model +# # gmm2 = GMM(n_components = 3, covariance_type='full') +# # model2 = gmm2.fit(Y) +# # plot_gmm(gmm2,Y) +# +# # print "KL=",kldiv_full(gmm1.means_[0],gmm1.means_[1],gmm1.covars_[0],gmm1.covars_[1]) +# # +# # print "gmm1_covars:\n", gmm1.covars_, gmm1.covars_.shape +# +# +# # m1 = gmm1.means_ +# # v1 = gmm1.covars_.swapaxes(0,2) +# # w1 = gmm1.weights_ +# # +# # m2 = gmm2.means_ +# # v2 = gmm2.covars_.swapaxes(0,2) +# # w2 = gmm2.weights_ +# +# m1 = models['gmm1_means'] +# v1 = models['gmm1_covars'] +# w1 = models['gmm1_weights'] +# +# m2 = models['gmm2_means'] +# v2 = models['gmm2_covars'] +# w2 = models['gmm2_weights'] +# +# print "KL divergence between gmm1 || gmm2:", kldiv_gmm(m1,m2,v1,v2,w1,w2) +# print "KL divergence between gmm2 || gmm1:", kldiv_gmm(m2,m1,v2,v1,w2,w1) +# print "Symmetrised KL distance between gmm1 || gmm2:", skl_gmm(m1,m2,v1,v2,w1,w2) + +def main(): + pass + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/gtgram.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,74 @@ +# Copyright 2014 Jason Heeris, jason.heeris@gmail.com +# +# This file is part of the gammatone toolkit, and is licensed under the 3-clause +# BSD license: https://github.com/detly/gammatone/blob/master/COPYING +from __future__ import division +import numpy as np + +from .filters import make_erb_filters, centre_freqs, erb_filterbank + +""" +This module contains functions for rendering "spectrograms" which use gammatone +filterbanks instead of Fourier transforms. +""" +''' +Modified from the original toolbox. Jan 2015. --Mi + +''' + +def round_half_away_from_zero(num): + """ Implement the round-half-away-from-zero rule, where fractional parts of + 0.5 result in rounding up to the nearest positive integer for positive + numbers, and down to the nearest negative number for negative integers. + """ + return np.sign(num) * np.floor(np.abs(num) + 0.5) + + +def gtgram_strides(fs, gammatoneLen, step_rate, filterbank_cols): + """ + Calculates the window size for a gammatonegram. + + @return a tuple of (window_size, hop_samples, output_columns) + """ + step_samples = int(gammatoneLen * step_rate) + columns = (1 + int(np.floor((filterbank_cols - gammatoneLen)/ step_samples))) + + return (step_samples, columns) + + +def gtgram_xe(wave, fs, f_max, channels, f_min): + """ Calculate the intermediate ERB filterbank processed matrix """ + cfs = centre_freqs(f_max, channels, f_min) + fcoefs = np.flipud(make_erb_filters(fs, cfs)) + xf = erb_filterbank(wave, fcoefs) + return xf + # xe = np.power(xf, 2) + # return xe + + +def gtgram(wave, fs, gammatoneLen, step_rate, channels, f_max, f_min): + """ + Calculate a spectrogram-like time frequency magnitude array based on + gammatone subband filters. The waveform ``wave`` (at sample rate ``fs``) is + passed through an multi-channel gammatone auditory model filterbank, with + lowest frequency ``f_min`` and highest frequency ``f_max``. The outputs of + each band then have their energy integrated over windows of ``window_time`` + seconds, advancing by ``hop_time`` secs for successive columns. These + magnitudes are returned as a nonnegative real matrix with ``channels`` rows. + + | 2009-02-23 Dan Ellis dpwe@ee.columbia.edu + | + | (c) 2013 Jason Heeris (Python implementation) + """ + xe = gtgram_xe(wave, fs, f_max, channels, f_min) + # print 'xe', xe.shape + step_samples, ncols = gtgram_strides(fs, gammatoneLen, step_rate, xe.shape[1]) + # print gammatoneLen, step_samples, channels, ncols + y = np.zeros((channels, ncols)) + + for cnum in range(ncols): + segment = xe[:, cnum * step_samples + np.arange(gammatoneLen)] + segment = np.power(segment, 2) + y[:, cnum] = np.sqrt(segment.mean(1)) + + return y.T
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/kmeans.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,341 @@ +import sys, os, optparse +import numpy as np +from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext + +from gmmdist import skl_full, skl_gmm +from GmmMetrics import GmmDistance +from sklearn.mixture import GMM +from sklearn.metrics.pairwise import pairwise_distances +from copy import copy + +class Model(): + def __init__(self,m,c): + self.mean = m + self.cov = c + +class Kmeans(object): + + def __init__(self, data, K=10, initial_centroids=None): + ''' + K: Pre-regulated number of clusters. + data: Input data. A list of GMMs. + self.centroids: A K-length list of cluster centroids. + ''' + + self.data = data + self.w = 0.8 + self.bias = 500 + rand_points = np.random.choice(len(self.data), K) + self.centroids = [self.data[i] for i in rand_points] + # used precomputed centroids if initialised externally + if initial_centroids: + self.centroids = [self.data[i] for i in initial_centroids] + self.labels = np.empty(len(self.data)) + self.labels.fill(np.nan) + self.n_clusters = K + self.minPts = 10 + self.maxStd = 1 + + def fit(self): + ''' + Cluster the input observations into K clusters. + The algorithm stops when no prominent difference is found in the average distance between each observation gmm to the cluster + it is assigned to. + + Returns + ------- + labels : 1d np array + Index of clusters each sample in the input observations is assigned to. + If it is not assigned to any cluster, index will be NaN. + ''' + + prev_overall_distance = 0.0 + prev_diff = np.inf + stop_condition = False + # delta_stop = 1e-4 + delta_stop = 1 + max_iter = 10 + iter_count = max_iter + while iter_count > 0: + # assign each GMM to closest centroid + for i, m in enumerate(self.data): + cc = self.find_closest_centroid(self.centroids, m) + if cc is None : + print "Warning (fit): No matching cluster can be found for data point %i" %i + continue + self.labels[i] = self.centroids.index(cc) + self.data[i].label = self.centroids.index(cc) + # centroid re-estimation + self.update_centroid(cc, m) + distance_array, within_cluster_std = self.evaluate() + overall_distance = distance_array.mean() + min_distance = distance_array.min() + # Terminate the loop when 1. the change in the mean distances of each sample with respective centroid (overall_distance) + # goes below this threshold; 2. number of data in each cluster exceeds given threshold (minPts) + diff = prev_overall_distance - overall_distance + unique_labels = np.unique(self.labels) + data_count = [(self.labels==element).sum() for element in unique_labels] + print iter_count, overall_distance, diff + if 0 < diff < delta_stop: + # if ((diff < delta_stop) and not np.isnan(prev_overall_distance)): + # if diff < delta_stop and : + stop_condition = True + break + prev_overall_distance = overall_distance + # if np.isinf(prev_overall_distance): + # prev_overall_distance = 0.0 # Otherwise whenever the next distance value is not nan, condition is met + iter_count -= 1 + + if iter_count == 0: + print 'Stop loop after max number of iteration reached, distance:', overall_distance + + # Splitting and merging formed clusters based on mean distances between each centroid and its member data points (within_cluster_distances). + # Also to be handled iteratively. + new_label = self.n_clusters + label_list = np.array([self.data[i].label for i in xrange(len(self.data))]) + + for cluster_id, count_std in enumerate(within_cluster_std): + print cluster_id, count_std + if count_std[1] > self.maxStd and count_std[0] >= 2 * self.minPts: + print 'Splitting the %ith cluster into the %ith and the %ith.' %(cluster_id, new_label, new_label+1) + self.split_cluster(label_list, cluster_id, new_label) + new_label += 2 + + # Label singular data points as noise + label_list = np.array([self.data[i].label for i in xrange(len(self.data))]) + labels = copy(label_list) + + for i, value in enumerate(label_list): + if (label_list==value).sum() == 1: + labels[i] = -1 + print 'Label singular data as noise.\n', labels + + return labels + + def find_closest_centroid(self, centroids, x, prn=False): + '''Find the best matching model in k model clusters.''' + dist = [] + for c in centroids: + dist.append(c.skl_distance_full(x)) + # if prn and any([np.isinf(x) for x in dist]) : + # print "inf encountered in find best match. argmin returns:", np.argmin(dist) + # print dist + if all([np.isinf(x) for x in dist]) : + # print "All distances are inf..." + return None + c_ix = np.argmin(dist) + return centroids[c_ix] + + def update_centroid(self, c, x): + '''Update cluster models given a GMM from the observation sequence.''' + # print "c pre-update:", c + C = len(x.components) + + dist_matrix = np.zeros((C, C)) + for i in xrange(C): + for j in xrange(i, C): + dist_matrix[i,j] = skl_full(x.components[i], c.components[j]) + # dist_matrix[i,j] = skl_gmm(x.components[i].means, c.components[j].means, x.components[i].covars, c.components[j].covars, 1, 1) + dist_matrix[j,i] = dist_matrix[i,j] + # print dist_matrix + min_idx = np.argmin(dist_matrix, axis=0) + # print min_idx + # sum_weights = sum(c.weights) + sum_weights = 0.0 + for i in xrange(C): + pi0 = c.components[i].weight + pi1 = x.components[min_idx[i]].weight + if pi1 < 0.001 : + print "Warning: observed component weight very low. skipping update for this component." + continue + try : + n0 = int(self.w * self.bias * pi0 / (pi0 + pi1)) + n1 = int((1 - self.w) * self.bias * pi1 / (pi0 + pi1)) + except : + print "Warning: could not estimaate sample size" + continue + if n0 < 2 or n1 < 2 : + print "Warning: sample size estimates too small.. ", n0, n1 + c.components[i].means, c.components[i].covars = self.update_component(c.components[i], x.components[min_idx[i]], n0, n1) + # weights update + w = (c.components[i].weight * n0 + x.components[min_idx[i]].weight * n1) / (n0 + n1) + c.components[i].weight = w + sum_weights += w + for i in xrange(C): + c.components[i].weight = c.components[i].weight / sum_weights + c.update() + # print "c post-update:", c + return c + + def update_component(self, c1,c2,n1,n2) : + '''Returning the updated mean / covariance''' + nz = float(n1+n2) + m1,m2 = c1.means, c2.means + mz_est_b = (n1 * m1 + n2 * m2) / nz + + k1 = (n1-1.0)/float(n1) + k2 = (n2-1.0)/float(n2) + kz = (nz-1.0)/float(nz) + + m1p = k1*c1.covars + m1[:,np.newaxis]*m1[:,np.newaxis].T + m2p = k2*c2.covars + m2[:,np.newaxis]*m2[:,np.newaxis].T + + mzp_est = (n1 * m1p + n2 * m2p) / (nz-1.0) + + # 2) Calculate the variate mean estimates: + mz_est = (n1 * m1 + n2 * m2) / (nz-1.0) + + cov_z_est = mzp_est - (mz_est[:,np.newaxis]*mz_est[:,np.newaxis].T) * kz + + return (mz_est_b,cov_z_est) + + def evaluate(self): + '''Find stopping criteria using the Bregman loss function. + Stopping condition is safisfied when the ''' + matches = 0 + distances = np.zeros(self.n_clusters) + within_cluster_std = [] + distance_list = [[] for i in xrange(self.n_clusters)] + for j,m in enumerate(self.data): + cc = self.find_closest_centroid(self.centroids, m, prn=True) + if cc is None : + print "Warning (eval): No matching cluster can be found for data point %i" %j + continue + ix = self.centroids.index(cc) + d = cc.skl_distance_full(m) + if np.isinf(d) : + print j, m + print "centroid index, distance:",ix,d + # print 'c-m', d + distances[ix] += d + distance_list[ix].append(d) + if d < 0.001 : + matches += 1 + print matches + for i in xrange(self.n_clusters): + within_cluster_std.append((len(distance_list[i]), np.std(distance_list[i]))) + distances = np.array(distances) / self.n_clusters + print 'within_cluster_std', within_cluster_std + return distances, within_cluster_std + + def split_cluster(self, label_list, cluster_id, new_label): + '''Split clusters to form new clusters.''' + old_members = np.where(label_list==cluster_id)[0] + data = [self.data[i] for i in old_members] + # Init new centroids + new_centroids = [self.data[i] for i in np.random.choice(old_members, 2)] + new_label_list = [new_label, new_label+1] + + iter_count = 1 + while iter_count > 0: + for i, m in enumerate(data): + cc = self.find_closest_centroid(new_centroids, m) + if cc is None : + print "Warning (fit): No matching cluster can be found for data point %i" %i + continue + # assign new label to the original gmm data + pos = new_centroids.index(cc) + self.data[old_members[data.index(m)]].label = new_label_list[pos] + # centroid re-estimation + cc = self.update_centroid(cc, m) + new_centroids[pos] = cc + iter_count -= 1 + + +class FeatureObj() : + __slots__ = ['key','audio','timestamps','features'] + +def getGMMs(feature, gmmWindow=10, stepsize=1): + gmm_list = [] + steps = int((feature.shape[0] - gmmWindow + stepsize) / stepsize) + for i in xrange(steps): + gmm_list.append(GmmDistance(feature[i*stepsize:(i*stepsize+gmmWindow), :])) + return gmm_list + +def pairwiseSKL(gmm_list): + '''Compute pairwise symmetrised KL divergence of a list of GMMs.''' + n_GMMs = len(gmm_list) + distance_matrix = np.zeros((n_GMMs, n_GMMs)) + for i in xrange(n_GMMs): + for j in xrange(i, n_GMMs): + distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j]) + distance_matrix[j][i] = distance_matrix[i][j] + # X = np.array(gmm_list) + # distance_matrix = pairwise_distances(X, metric = lambda x, y: x.skl_distance_full(y) ) + distance_matrix[np.isnan(distance_matrix)] = 10.0 + distance_matrix[np.isinf(distance_matrix)] = 10.0 + + +# def parse_args(): +# # define parser +# op = optparse.OptionParser() +# # IO options +# op.add_option('-i', '--input', action="store", dest="INPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/features', type="str", help="Loading features from..." ) +# op.add_option('-o', '--out', action="store", dest="OUTPUT", default='test/clustering_resutls', type="str", help="Writing clustering results to... ") +# +# return op.parse_args() +# +# options, args = parse_args() +# +# def main(): +# +# feature_list = [i for i in os.listdir(options.INPUT) if not i.startswith('.')] +# feature_list.sort() +# fobj_list = [] +# +# for feature in feature_list: +# data = np.genfromtxt(join(options.INPUT, feature), delimiter=',',filling_values=0.0) +# dim = data.shape[1] - 1 +# if dim == 1 : +# fo = FeatureObj() +# fo.audio = feature[:feature.find('_vamp')] +# fo.key = splitext(feature.strip(fo.audio + '_'))[0] +# fo.timestamps = data[:, 0] # the first column is the timestamps +# fo.features = data[:, 1] +# fobj_list.append(fo) +# +# else : +# for col in xrange(dim): +# fo = FeatureObj() +# fo.audio = feature[:feature.find('_vamp')] +# fo.key = splitext(feature.strip(fo.audio + '_'))[0] + '_' + '%d' %col +# fo.timestamps = data[:, 0] # the first column records the timestamps +# fo.features = data[:, col+1][:,np.newaxis] +# fobj_list.append(fo) +# +# timestamps = fobj_list[0].timestamps +# features = map(lambda x: "%i:%s" %(x[0],x[1].key), enumerate(fobj_list)) +# print "Loading %d features:\n", len(fobj_list) +# +# # find the feature with the fewer number of frames +# n_frames = np.array(map(lambda x: x.features.shape[0], fobj_list)).min() +# n_features = len(fobj_list) +# +# feature_matrix = np.zeros((n_frames, n_features)) +# print 'feature_matrix', feature_matrix.shape +# +# # take the arrays from the feature objects and add them to a matrix +# for i,f in enumerate(fobj_list) : +# feature_matrix[:,i] = f.features[:n_frames,0] +# +# # normalise the feature matrix, get rid of negative features, ensure numerical stability by adding a small constant +# feature_matrix = abs(feature_matrix) / (abs(feature_matrix.max(0))+0.0005) +# feature_matrix[np.isnan(feature_matrix)] = 0.0 +# +# winlength = 5 +# stepsize = 2 +# +# gmm_list = getGMMs(feature_matrix, gmmWindow=winlength, stepsize=stepsize) +# print 'number of GMMs:', len(gmm_list) +# skl_matrix = pairwiseSKL(gmm_list) +# +# # k-means clustering of GMMs +# KmeansClustering = Kmeans(gmm_list, K=5) +# labels = KmeansClustering.fit() +# +# f1 = np.array(zip(timestamps[:len(labels)], labels)) +# +# np.savetxt(join(options.OUTPUT, 'kmeans')+'.csv', f1, delimiter=',') +# +# if __name__ == '__main__': +# main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/plotSSM.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +plotSSM.py + +A helper util to plot SSMs from different features. +""" + +import sys +import os +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec, subplot2grid + + +class FeatureObj() : + __slots__ = ['key', 'audio', 'timestamps', 'features'] + +class SSMObj(): + __slots__ = ['ssm', 'novelty', 'gt'] + +class pltSSM(object): + + def __init__(self, ssm0, ssm1, ssm2, novelty0, novelty1, novelty2, novelty0_smoothed, \ + novelty1_smoothed, novelty2_smoothed, gt, ssm_cleaned, novelty): + self.gt = gt + self.ssm0 = ssmO + self.ssm0_cleaned = ssm0_cleaned + self.novelty0 = novelty0 + self.novelty0_smoothed = novelty0_smoothed + self.ssm1 = ssm1 + self.ssm1_cleaned = ssm1_cleaned + self.novelty1_smoothed = novelty1_smoothed + self.ssm2 = ssm2 + self.ssm2_cleaned = ssm2_cleaned + self.novelty2_smoothed = novelty2_smoothed + + def img_show(self, ax, img, gt, c): + ax.vlines(gt, 0, img.shape[0], colors=c) + ax.imshow(img) + + def func_show(self, func, gt, c1, c2): + ax.vlines(gt, 0, len(func), colors=c1) + ax.plot(np.linspace(0, len(func)-1, len(func)), func, colors=c2) + + def plt_subplots(self, filename): + plt.tight_layout() + plt.figure(figsize=(20, 30)) + + ax = plt.subplot2grid((6,3),(0, 0)) + ax0 = plt.subplot2grid((6,3), (0,0), rowspan=2) + ax1 = plt.subplot2grid((6,3), (0,1), rowspan=2) + ax2 = plt.subplot2grid((6,3), (0,2)) + ax3 = plt.subplot2grid((6,3), (1,2)) + ax4 = plt.subplot2grid((6,3), (2,0), rowspan=2) + ax5 = plt.subplot2grid((6,3), (2,1), rowspan=2) + ax6 = plt.subplot2grid((6,3), (2, 2)) + ax7 = plt.subplot2grid((6,3), (3, 2)) + ax8 = plt.subplot2grid((6,3), (4,0), rowspan=2) + ax9 = plt.subplot2grid((6,3), (4,1), rowspan=2) + ax10 = plt.subplot2grid((6,3), (4, 2)) + ax11 = plt.subplot2grid((6,3), (5, 2)) + + self.img_show(ax0, self.ssm0, self.gt, color='k') + self.img_show(ax1, self.ssm0_cleaned, self.gt, color='k') + self.func_show(ax2, self.novelty0, self.gt, c1='g', c2='r') + self.func_show(ax3, self.novelty0_smoothed, self.gt, c1='g', c2='r') + self.img_show(ax4, self.ssm1, self.gt, color='k') + self.img_show(ax5, self.ssm1_cleaned, self.gt, color='k') + self.func_show(ax6, self.novelty1, self.gt, c1='g', c2='r') + self.func_show(ax7, self.novelty1_smoothed, self.gt, c1='g', c2='r') + self.img_show(ax8, self.ssm2, self.gt, color='k') + self.img_show(ax9, self.ssm2_cleaned, self.gt, color='k') + self.func_show(ax10, self.novelty2, self.gt, c1='g', c2='r') + self.func_show(ax11, self.novelty2_smoothed, self.gt, c1='g', c2='r') + + plt.show() + # plt.savefig(filename) + +def main(): + + + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/som_seg.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,1221 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +feature_combine_seg.py + +A script to evaluation script for the segmentation results using combinations of different features. +""" + +import matplotlib +# matplotlib.use('Agg') +import sys, os, optparse, csv +from itertools import combinations +from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext +from copy import copy +from mvpa2.suite import * + +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import numpy as np +from scipy.signal import correlate2d, convolve2d, filtfilt, resample +from scipy.stats import mode +from scipy.ndimage import zoom +from scipy.ndimage.morphology import binary_fill_holes +from scipy.ndimage.filters import * +from scipy.spatial.distance import squareform, pdist +from sklearn.decomposition import PCA +from sklearn.mixture import GMM +from sklearn.preprocessing import normalize +from sklearn.metrics.pairwise import pairwise_distances +from skimage.transform import hough_line, hough_line_peaks, probabilistic_hough_line +from skimage.filter import canny, sobel +from skimage import data, measure, segmentation, morphology + +from PeakPickerUtil import PeakPicker +from gmmdist import * +from GmmMetrics import GmmDistance +from RankClustering import rClustering +from kmeans import Kmeans + +def parse_args(): + # define parser + op = optparse.OptionParser() + # IO options + op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading features from.." ) + op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading features from.." ) + op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading features from.." ) + op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ") + op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ") + op.add_option('-p', '--plot-novelty', action="store_true", dest="PLOT", default=False, help="Save novelty curev plot") + op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode") + op.add_option('-v', '--verbose-output', action="store_true", dest="VERBOSE", default=False, help="Exported raw detections.") + + return op.parse_args() +options, args = parse_args() + +class FeatureObj() : + __slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps'] + +class AudioObj(): + __slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\ + 'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps'] + +class EvalObj(): + __slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA'] + +class SSMseg(object): + '''The main segmentation object''' + def __init__(self): + self.SampleRate = 44100 + self.NqHz = self.SampleRate/2 + self.timestamp = [] + self.previousSample = 0.0 + self.featureWindow = 6.0 + self.featureStep = 3.0 + self.kernel_size = 80 # Adjust this param according to the feature resolution. + self.blockSize = 4094 + self.stepSize = 2048 + + '''NOTE: Match the following params with those used for feature extraction!''' + + '''NOTE: Unlike spectrogram ones, Gammatone features are extracted without an FFT or any overlap. The windowing is done under the purpose of chunking + the audio to facilitate the gammatone filtering. Despite of the overlap in the time domain, only the first half after the filtering is returned, + resulting in no overlapping effect in the extracted features. To obtain features for overlapped audio input, make the gammatoneLen equal to blockSize + and return the whole filter output.''' + self.gammatoneLen = 2048 + self.gammatoneBandGroups = [0, 16, 34, 50, 64] + self.nGammatoneBands = 20 + self.histRes = 40 + self.lowFreq = 100 + self.highFreq = self.SampleRate / 4 + + '''Settings for extracting tempogram features.''' + self.tempoWindow = 6.0 + self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600] + + '''Peak picking settings''' + self.threshold = 30 + self.delta_threshold = 0.5 + self.backtracking_threshold = 2.4 + self.polyfitting_on = True + self.medfilter_on = True + self.LPfilter_on = True + self.whitening_on = False + self.aCoeffs = [1.0000, -0.5949, 0.2348] + self.bCoeffs = [0.1600, 0.3200, 0.1600] + self.cutoff = 0.5 + self.medianWin = 5 + + def getGaussianParams(self, length, featureRate, timeWindow): + + win_len = round(timeWindow * featureRate) + win_len = win_len + (win_len % 2) - 1 + + # a 50% overlap between windows + stepsize = ceil(win_len * 0.5) + num_win = int(floor( (length) / stepsize)) + gaussian_rate = featureRate / stepsize + + return stepsize, num_win, win_len, gaussian_rate + + def GaussianDistance(self, feature, featureRate, timeWindow): + + stepsize, num_win, win_len, gr = self.getGaussianParams(feature.shape[0], featureRate, timeWindow) + print 'stepsize, num_win, feature', stepsize, num_win, feature.shape, featureRate, timeWindow + gaussian_list = [] + gaussian_timestamps = [] + tsi = 0 + + # f = open('/Users/mitian/Documents/experiments/features.txt','w') + # print 'divergence computing..' + for num in xrange(num_win): + # print num, num * stepsize , (num * stepsize) + win_len + gf=GaussianFeature(feature[int(num * stepsize) : int((num * stepsize) + win_len), :],2) + # f.write("\n%s" %str(gf)) + gaussian_list.append(gf) + tsi = int(floor( num * stepsize + 1)) + gaussian_timestamps.append(self.timestamp[tsi]) + + # f.close() + + # print 'gaussian_list', len(gaussian_list), len(gaussian_timestamps) + dm = np.zeros((len(gaussian_list), len(gaussian_list))) + + for v1, v2 in combinations(gaussian_list, 2): + i, j = gaussian_list.index(v1), gaussian_list.index(v2) + dm[i, j] = v1.distance(v2) + dm[j, i] = v2.distance(v1) + # print 'dm[i,j]',dm[i,j] + # sio.savemat("/Users/mitian/Documents/experiments/dm-from-segmenter.mat",{"dm":dm}) + return dm, gaussian_timestamps + + def gaussian_kernel(self, size): + '''Create a gaussian tapered 45 degrees rotated checkerboard kernel. + TODO: Unit testing: Should produce this with kernel size 3: + 0.1353 -0.3679 0.1353 + 0.3679 1.0000 0.3679 + 0.1353 -0.3679 0.1353 + ''' + n = float(np.ceil(size / 2.0)) + kernel = np.zeros((size,size)) + for i in xrange(1,size+1) : + for j in xrange(1,size+1) : + gauss = np.exp( -4.0 * (np.square( (i-n)/n ) + np.square( (j-n)/n )) ) + # gauss = 1 + if np.logical_xor( j - n > np.floor((i-n) / 2.0), j - n > np.floor((n-i) / 2.0) ) : + kernel[i-1,j-1] = -gauss + else: + kernel[i-1,j-1] = gauss + return kernel + + def getDiagonalSlice(self, ssm, width): + ''' Return a diagonal slice of the ssm given its width, with 45 degrees rotation. + Note: requres 45 degrees rotated kernel also.''' + w = int(np.floor(width/2.0)) + length = len(np.diagonal(ssm)) + slice = np.zeros((2*w+1,length)) + # print 'diagonal', length, w, slice.shape + for i in xrange(-w, w+1) : + slice[w+i,:] = np.hstack(( np.zeros(int(np.floor(abs(i)/2.0))), np.diagonal(ssm,i), np.zeros(int(np.ceil(abs(i)/2.0))) )) + return slice + + def getNoveltyCurve(self,dm, kernel_size): + '''Return novelty score from distance matrix.''' + + kernel_size = int(np.floor(kernel_size/2.0)+1) + slice = self.getDiagonalSlice(dm, kernel_size) + kernel = self.gaussian_kernel(kernel_size) + xc = convolve2d(slice,kernel,mode='same') + xc[abs(xc)>1e+10]=0.00001 + # print 'xc', xc.shape, xc + return xc[int(np.floor(xc.shape[0]/2.0)),:] + + def mergeBlocks(self, SSM, thresh=0.9, size=5): + '''Merge consequtive small blocks along the diagonal.''' + # found = False + # start = 0 + # i = 0 + # while i < len(SSM): + # j = i + 1 + # if found: start = i + # while(j < len(SSM) and SSM[i, j]): + # if (j-i) > size: + # found = True + # i = j + # # print 'start,end', start, i + # start = i + # else: + # found = False + # j += 1 + # if not found: + # print 'start,end', start, i + # SSM[start:i, start:i] = 0.9 + # i = j + idx = 1 + while idx < len(SSM): + i = 0 + # if ((idx-1-i) > 0 and (idx+1+i) < len(SSM)): + while ((idx-1-i) > 0 and (idx+1+i) < len(SSM) and SSM[idx-1-i, idx] > 0 and SSM[idx+1+i, idx] > 0): + i += 1 + if i > size/2: + SSM[idx-1-i:min(idx+i,len(SSM)), idx-1-i:min(idx+i,len(SSM))] = 1.0 + idx += max(1, i) + return SSM + + def getGMMs(self, feature, segment_boundaries): + '''Return GMMs for located segments''' + gmm_list = [] + gmm_list.append(GmmDistance(feature[: segment_boundaries[0], :], components = 1)) + for i in xrange(1, len(segment_boundaries)): + gmm_list.append(GmmDistance(feature[segment_boundaries[i-1] : segment_boundaries[i], :], components = 1)) + return gmm_list + + def trackValley(self, onset_index, smoothed_df): + '''Back track to the valley location of detected peaks''' + prevDiff = oldDiff = 0.0 + while (onset_index > 1) : + diff = smoothed_df[onset_index] - smoothed_df[onset_index-1] + # if (diff < 0.0 and 0 <= prevDiff < oldDiff * self.backtracking_threshold) : break + if (diff < 0 and prevDiff >= 0.1 * smoothed_df[onset_index]) : break + prevDiff = diff + oldDiff = prevDiff + onset_index -= 1 + return onset_index + + def normaliseFeature(self, feature_array): + + feature_array = np.array(feature_array) + feature_array[np.isnan(feature_array)] = 0.0 + feature_array[np.isinf(feature_array)] = 0.0 + + if len(feature_array.shape) == 1: + feature_array = (feature_array - feature_array.min()) / (feature_array.max() - feature_array.min()) + else: + mins = feature_array.min(axis=1) + maxs = feature_array.max(axis=1) + feature_array = (feature_array - mins[:, np.newaxis]) / (maxs - mins)[:, np.newaxis] + feature_array[np.isnan(feature_array)] = 0.0 + return feature_array + + def upSample(self, feature_array, step): + '''Resample downsized tempogram features, tempoWindo should be in accordance with input features''' + # print feature_array.shape + sampleRate = 44100 + stepSize = 1024.0 + # step = np.ceil(sampleRate/stepSize/5.0) + feature_array = zoom(feature_array, (step,1)) + # print 'resampled', feature_array.shape + return feature_array + + def stripeDistance(self, feature_array, feature_len, step, metric='cosine'): + '''Return distance matrix calculated for 2d time invariant features.''' + size = feature_array.shape[0] / feature_len + dm = np.zeros((size, size)) + + for i in xrange(size): + for j in xrange(i, size): + dm[i, j] = np.sum(pairwise_distances(feature_array[i*step:(i+1)*step, :], feature_array[j*step:(j+1)*step, :], metric)) + dm[j, i] = dm[i, j] + # print 'np.nanmax(dm)', np.nanmax(dm) + dm[np.isnan(dm)] = np.nanmax(dm) + ssm = 1 - (dm - dm.min()) / (dm.max() - dm.min()) + np.fill_diagonal(ssm, 1) + return ssm + + + def getMean(self, feature, winlen, stepsize): + means = [] + steps = int((feature.shape[0] - winlen + stepsize) / stepsize) + for i in xrange(steps): + means.append(np.mean(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) + return np.array(means) + + def getStd(self, feature, winlen, stepsize): + std = [] + steps = int((feature.shape[0] - winlen + stepsize) / stepsize) + for i in xrange(steps): + std.append(np.std(feature[i*stepsize:(i*stepsize+winlen), :], axis=0)) + return np.array(std) + + def getDelta(self, feature): + delta_feature = np.vstack((np.zeros((1, feature.shape[1])), np.diff(feature, axis=0))) + return delta_feature + + def backtrack(self, onset_index, smoothed_df): + '''Backtrack the onsets to an earlier 'perceived' location from the actually detected peak... + This is based on the rationale that the perceived onset tends to be a few frames before the detected peak. + This tracks the position in the detection function back to where the peak is startng to build up. + Notice the "out of the blue" parameter: 0.9. (Ideally, this should be tested, evaluated and reported...)''' + prevDiff = 0.0 + while (onset_index > 1) : + diff = smoothed_df[onset_index] - smoothed_df[onset_index-1] + if diff < prevDiff * self.backtracking_threshold : break + prevDiff = diff + onset_index -= 1 + return onset_index + + def trackDF(self, onset1_index, df2): + '''In the second round of detection, remove the known onsets from the DF by tracking from the peak given by the first round + to a valley to deminish the recognised peaks on top of which to start new detection.''' + for idx in xrange(len(onset1_index)) : + remove = True + for i in xrange(onset1_index[idx], 1, -1) : + if remove : + if df2[i] >= df2[i-1] : + df2[i] == 0.0 + else: + remove = False + return df2 + + def getSSM(self, feature_array, metric='cosine', norm='simple'): + '''Compute SSM given input feature array. + args: norm: ['simple', 'remove_noise'] + ''' + dm = pairwise_distances(feature_array, metric=metric) + dm = np.nan_to_num(dm) + if norm == 'simple': + ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm)) + return ssm + + def reduceSSM(self, ssm, maxfilter_size = 2, remove_size=50): + ssm[ssm<0.8] = 0 + ssm = maximum_filter(ssm,size=maxfilter_size) + ssm = morphology.remove_small_objects(ssm.astype(bool), min_size=remove_size) + return ssm + + def getPeakFeatures(self, peak_candidates, featureset, winlen): + ''' + args: winlen: length of feature window before and after an investigated peak + featureset: A list of audio features for measuring the dissimilarity. + + return: peak_features + A list of tuples of features for windows before and after each peak. + ''' + prev_features = [] + post_features = [] + feature_types = len(featureset) + # print peak_candidates[-1], winlen, featureset[0].shape + # if peak_candidates[-1] + winlen > featureset[0].shape[0]: + # peak_candidates = peak_candidates[:-1] + # for x in peak_candidates: + # prev_features.append(tuple([featureset[i][x-winlen:x, :] for i in xrange(feature_types)])) + # post_features.append(tuple([featureset[i][x:x+winlen, :] for i in xrange(feature_types)])) + prev_features.append(tuple([featureset[i][:peak_candidates[0], :] for i in xrange(feature_types)])) + post_features.append(tuple([featureset[i][peak_candidates[0]:peak_candidates[1], :] for i in xrange(feature_types)])) + for idx in xrange(1, len(peak_candidates)-1): + prev_features.append(tuple([featureset[i][peak_candidates[idx-1]:peak_candidates[idx], :] for i in xrange(feature_types)])) + post_features.append(tuple([featureset[i][peak_candidates[idx]:peak_candidates[idx+1], :] for i in xrange(feature_types)])) + prev_features.append(tuple([featureset[i][peak_candidates[-2]:peak_candidates[-1], :] for i in xrange(feature_types)])) + post_features.append(tuple([featureset[i][peak_candidates[-1]:, :] for i in xrange(feature_types)])) + return prev_features, post_features + + def segmentDev(self, prev_features, post_features): + '''Deviations are measured for each given feature type. + peak_candidates: peaks from the 1st round detection + peak_features: Features for measuring the dissimilarity for parts before and after each peak. + dtype: tuple. + ''' + dev_list = [] + n_peaks = len(prev_features) + n_features = len(prev_features[0]) + # print 'n_peaks, n_features', n_peaks, n_features + for x in xrange(n_peaks): + f1, f2 = prev_features[x], post_features[x] + dev_list.append(tuple([GmmDistance(f1[i], components=1).skl_distance_full(GmmDistance(f2[i], components=1)) for i in xrange(n_features)])) + return dev_list + + def verifyPeaks(self, peak_canditates, dev_list): + '''Verify peaks from the 1st round detection by applying adaptive thresholding to the deviation list.''' + + final_peaks = copy(peak_canditates) + dev_list = np.array([np.mean(x) for x in dev_list]) # get average of devs of different features + med_dev = median_filter(dev_list, size=5) + # print dev_list, np.min(dev_list), np.median(dev_list), np.mean(dev_list), np.std(dev_list) + dev = dev_list - med_dev + # print dev + for i, x in enumerate(dev): + if x < 0: + final_peaks.remove(peak_canditates[i]) + return final_peaks + + def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0): + '''Pairwise F measure evaluation of detection rates.''' + + # print 'detection', detection + res = EvalObj() + res.TP = 0 # Total number of matched ground truth and experimental data points + gt = len(annotation) # Total number of ground truth data points + dt = len(detection) # Total number of experimental data points + foundIdx = [] + D_AD = np.zeros(gt) + D_DA = np.zeros(dt) + + for dtIdx in xrange(dt): + # print detection[dtIdx], abs(detection[dtIdx] - annotation) + D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation)) + # D_DA[dtIdx] = min([abs(annot - detection[dtIdx]) for annot in annotation]) + for gtIdx in xrange(gt): + D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection)) + # D_AD[gtIdx] = min([abs(det - annotation[gtIdx]) for det in detection]) + for dtIdx in xrange(dt): + if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0): + res.TP = res.TP + 1.0 + foundIdx.append(gtIdx) + foundIdx = list(set(foundIdx)) + res.TP = len(foundIdx) + res.FP = dt - res.TP + res.FN = gt - res.TP + + res.AD = np.mean(D_AD) + res.DA = np.mean(D_DA) + + res.P, res.R, res.F = 0.0, 0.0, 0.0 + + if res.TP == 0: + return res + + res.P = res.TP / float(dt) + res.R = res.TP / float(gt) + # res.F = 2 * res.P * res.R / (res.P + res.F) + res.F = 2.0 / (1.0/res.P + 1.0/res.R) + # return TP3, FP3, FN3, pairwisePrecision3, pairwiseRecall3, pairwiseFValue3, TP05, FP05, FN05, pairwisePrecision05, pairwiseRecall05, pairwiseFValue05 + return res + + def plotDetection(self, ssm, novelty, smoothed_novelty, gt, det, filename): + '''Plot performance curve. + x axis: distance threshold for feature selection; y axis: f measure''' + + plt.figure(figsize=(10,16)) + gt_plot = gt / gt[-1] * len(novelty) + det_plot = det / gt[-1] * len(novelty) + + gs = gridspec.GridSpec(2, 1, height_ratios=[3,1]) + ax0 = plt.subplot(gs[0]) + ax1 = plt.subplot(gs[1], sharex=ax0) + + ax0.imshow(ssm) + ax0.vlines(gt_plot, 0, len(ssm), colors ='w', linestyles='solid') + ax0.vlines(det_plot, 0, len(ssm), colors='k', linestyles='dashed') + ax1.plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty, 'g', np.linspace(0, len(novelty)-1, len(novelty)), smoothed_novelty,'b') + y_min, y_max = min([min(novelty), min(smoothed_novelty)]), max([max(novelty), max(smoothed_novelty)]) + ax1.vlines(gt_plot, y_min, y_max, colors ='r', linestyles='solid') + ax1.vlines(det_plot, y_min, y_max, colors='k', linestyles='dashed') + + # f, ax = plt.subplots(2, sharex=True) + # ax[0].imshow(ssm) + # ax[1].plot(np.linspace(0, len(novelty)-1, len(novelty)), novelty) + # ax[1].vlines(gt_plot, 0, len(novelty), colors ='r', linestyles='solid') + # ax[1].vlines(det_plot, 0, len(novelty), colors='b', linestyles='dashed') + # + # plt.show() + plt.savefig(filename) + + return None + + def process(self): + '''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending. + In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection + threshold - segmentation f measure curve. + ''' + ssom = SimpleSOMMapper((30,30), 800, learning_rate=0.001) + + peak_picker = PeakPicker() + peak_picker.params.alpha = 9.0 # Alpha norm + peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta + peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0 + peak_picker.params.QuadThresh_b = 0.0 + peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0 + peak_picker.params.rawSensitivity = 20 + peak_picker.params.aCoeffs = self.aCoeffs + peak_picker.params.bCoeffs = self.bCoeffs + peak_picker.params.preWin = self.medianWin + peak_picker.params.postWin = self.medianWin + 1 + peak_picker.params.LP_on = self.LPfilter_on + peak_picker.params.Medfilt_on = self.medfilter_on + peak_picker.params.Polyfit_on = self.polyfitting_on + peak_picker.params.isMedianPositive = False + + # Settings used for feature extraction + feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow) + feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep) + aggregation_window, aggregation_step = 100, 50 + featureRate = float(self.SampleRate) / self.stepSize + + audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] + # audio_files = audio_files[:2] + audio_files.sort() + audio_list = [] + + gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')] + gammatone_feature_list = ['rolloff', 'contrast'] + tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')] + # tempo_feature_list = ['intensity_bpm_renamed', 'loudness_bpm_renamed'] + timbre_feature_list = ['mfcc'] + harmonic_feature_list = ['nnls'] + + gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list] + timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list] + tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list] + harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list] + + fobj_list = [] + + # For each audio file, load specific features + for audio in audio_files: + ao = AudioObj() + ao.name = splitext(audio)[0] + # print 'audio:', ao.name + # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami + # ao.gt = np.genfromtxt(annotation_file, usecols=0) + # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) + annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng + ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') + ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) + + gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], [] + for feature in gammatone_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) + break + if len(gammatone_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in gammatone_featureset]) + gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset] + ao.gammatone_features = np.hstack((gammatone_featureset)) + else: + ao.gammatone_features = gammatone_featureset[0] + + for feature in timbre_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) + break + if len(timbre_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in timbre_featureset]) + timbre_featureset = [x[:n_frame,:] for x in timbre_featureset] + ao.timbre_features = np.hstack((timbre_featureset)) + else: + ao.timbre_features = timbre_featureset[0] + for feature in tempo_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:]) + ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0] + break + if len(tempo_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in tempo_featureset]) + tempo_featureset = [x[:n_frame,:] for x in tempo_featureset] + ao.tempo_features = np.hstack((tempo_featureset)) + else: + ao.tempo_features = tempo_featureset[0] + for feature in harmonic_feature_list: + for f in os.listdir(feature): + if f[:f.find('_vamp')]==ao.name: + harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:]) + break + if len(harmonic_feature_list) > 1: + n_frame = np.min([x.shape[0] for x in harmonic_featureset]) + harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset] + ao.harmonic_features = np.hstack((harmonic_featureset)) + else: + ao.harmonic_features = harmonic_featureset[0] + + # # Reshaping features to keep identical dimension + # n_frames = np.array([ao.gammatone_features.shape[0], ao.harmonic_features.shape[0], ao.timbre_features.shape[0]]).min() + # step = n_frames / float(ao.tempo_features.shape[0]) + # # ao.tempo_features = self.upSample(ao.tempo_features, step) + # ao.gammatone_features = ao.gammatone_features[:n_frames, :] + # ao.harmonic_features = ao.harmonic_features[:n_frames, :] + # ao.timbre_features = ao.timbre_features[:n_frames, :] + # print ao.gammatone_features.shape, ao.harmonic_features.shape, ao.tempo_features.shape, ao.timbre_features.shape + + # Reshape features (downsample) to match tempogram ones + step = ao.tempo_features.shape[0] + # aggregation_step = (n_frames / (step+1.0)) + # Get aggregated features for computing ssm + aggregation_window, aggregation_step = 1,1 + featureRate = float(self.SampleRate) /self.stepSize + pca = PCA(n_components=5) + + ao.gammatone_features = resample(ao.gammatone_features, step) + ao.gammatone_features = (ao.gammatone_features - np.min(ao.gammatone_features, axis=-1)[:,np.newaxis]) / (np.max(ao.gammatone_features, axis=-1) - np.min(ao.gammatone_features, axis=-1))[:,np.newaxis] + ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0 + ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0 + ao.timbre_features = resample(ao.timbre_features, step) + ao.timbre_features = (ao.timbre_features - np.min(ao.timbre_features, axis=-1)[:,np.newaxis]) / (np.max(ao.timbre_features, axis=-1) - np.min(ao.timbre_features, axis=-1))[:,np.newaxis] + ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0 + ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0 + ao.harmonic_features = resample(ao.harmonic_features, step) + ao.harmonic_features = (ao.harmonic_features - np.min(ao.harmonic_features, axis=-1)[:,np.newaxis]) / (np.max(ao.harmonic_features, axis=-1) - np.min(ao.harmonic_features, axis=-1))[:,np.newaxis] + ao.harmonic_features[np.isnan(ao.harmonic_features)] = 0.0 + ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0 + ao.tempo_features = (ao.tempo_features - np.min(ao.tempo_features, axis=-1)[:,np.newaxis]) / (np.max(ao.tempo_features, axis=-1) - np.min(ao.tempo_features, axis=-1))[:,np.newaxis] + ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0 + ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0 + # print 'resampled', ao.gammatone_features.shape, ao.timbre_features.shape, ao.harmonic_features.shape + # gt_feature_matrix = (ao.gammatone_features - np.min(ao.gammatone_features, axis=-1)[:,np.newaxis]) / (np.max(ao.gammatone_features, axis=-1) - np.min(ao.gammatone_features, axis=-1))[:,np.newaxis] + # gt_feature_matrix[np.isnan(gt_feature_matrix)] = 0.0 + # mean_gt_feature = self.getMean(gt_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # std_gt_feature = self.getStd(gt_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # delta_gt_feature = self.getDelta(gt_feature_matrix) + # mean_dgt_feature = self.getMean(delta_gt_feature, winlen=aggregation_window, stepsize=aggregation_step) + # std_dgt_feature = self.getStd(delta_gt_feature, winlen=aggregation_window, stepsize=aggregation_step) + # aggregated_gt_feature = np.hstack((mean_gt_feature, std_gt_feature)) + # aggregated_gt_feature = np.hstack((mean_gt_feature, std_gt_feature, mean_dgt_feature, std_dgt_feature)) + # aggregated_gt_feature = ao.gammatone_features + aggregated_gt_feature = self.getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step) + pca.fit(aggregated_gt_feature) + aggregated_gt_feature = pca.transform(aggregated_gt_feature) + distance_gt_matrix = pairwise_distances(aggregated_gt_feature, metric = 'cosine') + distance_gt_matrix = np.nan_to_num(distance_gt_matrix) + ao.gammatone_ssm = 1 - (distance_gt_matrix - distance_gt_matrix.min()) / (distance_gt_matrix.max() - distance_gt_matrix.min()) + + # tempo_feature_matrix = (ao.tempo_features - np.min(ao.tempo_features, axis=-1)[:,np.newaxis]) / (np.max(ao.tempo_features, axis=-1) - np.min(ao.tempo_features, axis=-1))[:,np.newaxis] + # tempo_feature_matrix[np.isnan(tempo_feature_matrix)] = 0.0 + # mean_tempo_feature = self.getMean(tempo_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # std_tempo_feature = self.getStd(tempo_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # delta_tempo_feature = self.getDelta(tempo_feature_matrix) + # mean_dtempo_feature = self.getMean(delta_tempo_feature, winlen=aggregation_window, stepsize=aggregation_step) + # std_dtempo_feature = self.getStd(delta_tempo_feature, winlen=aggregation_window, stepsize=aggregation_step) + # aggregated_tempo_feature = np.hstack((mean_tempo_feature, std_tempo_feature)) + # aggregated_tempo_feature = np.hstack((mean_tempo_feature, std_tempo_feature, mean_dtempo_feature, std_dtempo_feature)) + # aggregated_tempo_feature = ao.tempo_features + aggregated_tempo_feature = self.getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step) + pca.fit(aggregated_tempo_feature) + aggregated_tempo_feature = pca.transform(aggregated_tempo_feature) + distance_tempo_matrix = pairwise_distances(aggregated_tempo_feature, metric = 'cosine') + distance_tempo_matrix = np.nan_to_num(distance_tempo_matrix) + ao.tempo_ssm = 1 - (distance_tempo_matrix - distance_tempo_matrix.min()) / (distance_tempo_matrix.max() - distance_tempo_matrix.min()) + + # timbre_feature_matrix = (ao.timbre_features - np.min(ao.timbre_features, axis=-1)[:,np.newaxis]) / (np.max(ao.timbre_features, axis=-1) - np.min(ao.timbre_features, axis=-1))[:,np.newaxis] + # timbre_feature_matrix[np.isnan(timbre_feature_matrix)] = 0.0 + # mean_timbre_feature = self.getMean(timbre_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # std_timbre_feature = self.getStd(timbre_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # delta_timbre_feature = self.getDelta(timbre_feature_matrix) + # mean_dtimbre_feature = self.getMean(delta_timbre_feature, winlen=aggregation_window, stepsize=aggregation_step) + # std_dtimbre_feature = self.getStd(delta_timbre_feature, winlen=aggregation_window, stepsize=aggregation_step) + # aggregated_timbre_feature = np.hstack((mean_timbre_feature, std_timbre_feature) + # aggregated_timbre_feature = np.hstack((mean_timbre_feature, std_timbre_feature, mean_dtimbre_feature, std_dtimbre_feature)) + # aggregated_timbre_feature = ao.timbre_features + aggregated_timbre_feature = self.getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step) + pca.fit(aggregated_timbre_feature) + aggregated_timbre_feature = pca.transform(aggregated_timbre_feature) + distance_timbre_matrix = pairwise_distances(aggregated_timbre_feature, metric = 'cosine') + distance_timbre_matrix = np.nan_to_num(distance_timbre_matrix) + ao.timbre_ssm = 1 - (distance_timbre_matrix - distance_timbre_matrix.min()) / (distance_timbre_matrix.max() - distance_timbre_matrix.min()) + + # harmonic_feature_matrix = (ao.harmonic_features - np.min(ao.harmonic_features, axis=-1)[:,np.newaxis]) / (np.max(ao.harmonic_features, axis=-1) - np.min(ao.harmonic_features, axis=-1))[:,np.newaxis] + # harmonic_feature_matrix[np.isnan(harmonic_feature_matrix)] = 0.0 + # mean_harmonic_feature = self.getMean(harmonic_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # std_harmonic_feature = self.getStd(harmonic_feature_matrix, winlen=aggregation_window, stepsize=aggregation_step) + # delta_harmonic_feature = self.getDelta(harmonic_feature_matrix) + # mean_dharmonic_feature = self.getMean(delta_harmonic_feature, winlen=aggregation_window, stepsize=aggregation_step) + # std_dharmonic_feature = self.getStd(delta_harmonic_feature, winlen=aggregation_window, stepsize=aggregation_step) + # aggregated_harmonic_feature = np.hstack((mean_harmonic_feature, std_harmonic_feature)) + # aggregated_harmonic_feature = np.hstack((mean_harmonic_feature, std_harmonic_feature, mean_dharmonic_feature, std_dharmonic_feature)) + aggregated_harmonic_feature = ao.harmonic_features + aggregated_harmonic_feature = self.getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step) + pca.fit(aggregated_harmonic_feature) + aggregated_harmonic_feature = pca.transform(aggregated_harmonic_feature) + distance_harmonic_matrix = pairwise_distances(aggregated_harmonic_feature, metric = 'cosine') + distance_harmonic_matrix = np.nan_to_num(distance_harmonic_matrix) + ao.harmonic_ssm = 1 - (distance_harmonic_matrix - distance_harmonic_matrix.min()) / (distance_harmonic_matrix.max() - distance_harmonic_matrix.min()) + + ao.combined_features = np.hstack((aggregated_gt_feature, aggregated_harmonic_feature, aggregated_timbre_feature, aggregated_tempo_feature)) + pca.fit(ao.combined_features) + ao.combined_features = pca.transform(ao.combined_features) + distance_combined_matrix = pairwise_distances(ao.combined_features, metric = 'cosine') + distance_combined_matrix = np.nan_to_num(distance_combined_matrix) + ao.combined_ssm = 1 - (distance_combined_matrix - distance_combined_matrix.min()) / (distance_combined_matrix.max() - distance_combined_matrix.min()) + + # Resample timestamps + # ao.ssm_timestamps = np.array(map(lambda step: step * aggregation_step / featureRate, np.arange(0.0, aggregated_gt_feature.shape[0]))) + ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) + # print ao.gammatone_ssm.shape, ao.tempo_ssm.shape, ao.timbre_ssm.shape, ao.harmonic_ssm.shape, len(ao.ssm_timestamps) + + # # Save SSMs. + # gammatone_ssm = copy(ao.gammatone_ssm) + # gammatone_ssm[gammatone_ssm<0.8]=0.0 + # plt.figure(figsize=(10, 10)) + # plt.vlines(ao.gt / ao.gt[-1] * gammatone_ssm.shape[0], 0, gammatone_ssm.shape[0], colors='r') + # plt.imshow(gammatone_ssm, cmap='Greys') + # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-gammatone.pdf'),format='pdf') + # + # tempo_ssm = copy(ao.tempo_ssm) + # tempo_ssm[tempo_ssm<0.8]=0.0 + # plt.figure(figsize=(10, 10)) + # plt.vlines(ao.gt / ao.gt[-1] * tempo_ssm.shape[0], 0, tempo_ssm.shape[0], colors='r') + # plt.imshow(tempo_ssm, cmap='Greys') + # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-tempo.pdf'),format='pdf') + # + # timbre_ssm = copy(ao.timbre_ssm) + # timbre_ssm[timbre_ssm<0.8]=0.0 + # plt.figure(figsize=(10, 10)) + # plt.vlines(ao.gt / ao.gt[-1] * timbre_ssm.shape[0], 0, timbre_ssm.shape[0], colors='r') + # plt.imshow(timbre_ssm, cmap='Greys') + # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-timbre.pdf'),format='pdf') + # + # harmonic_ssm = copy(ao.harmonic_ssm) + # harmonic_ssm[harmonic_ssm<0.8]=0.0 + # plt.figure(figsize=(10, 10)) + # plt.vlines(ao.gt / ao.gt[-1] * harmonic_ssm.shape[0], 0, harmonic_ssm.shape[0], colors='r') + # plt.imshow(harmonic_ssm, cmap='Greys') + # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-harmonic.pdf'),format='pdf') + # + # ssm_cleaned = copy(ao.combined_ssm) + # ssm_cleaned[ssm_cleaned<0.8] = 0 + # plt.figure(figsize=(10, 10)) + # plt.vlines(ao.gt / ao.gt[-1] * ssm_cleaned.shape[0], 0, ssm_cleaned.shape[0], colors='r') + # plt.imshow(ssm_cleaned, cmap='Greys') + # plt.savefig(join(options.OUTPUT, 'ssm', ao.name+'-combined.pdf'),format='pdf') + + audio_list.append(ao) + + # Evaluate individual segmentation results. + outfile1 = join(options.OUTPUT, 'individualSOM.csv') + with open(outfile1, 'a') as f: + csvwriter = csv.writer(f, delimiter=',') + csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \ + 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ + 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ + 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ + 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ + 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) + + # outfile4 = join(options.OUTPUT, 'individualResDF.csv') + # with open(outfile4, 'a') as f: + # csvwriter = csv.writer(f, delimiter=',') + # csvwriter.writerow(['audio', 'gammatone_tp_05', 'gammatone_fp_05', 'gammatone_fn_05', 'gammatone_P_05', 'gammatone_R_05', 'gammatone_F_05', 'gammatone_AD_05', 'gammatone_DA_05', 'gammatone_tp_3', \ + # 'gammatone_fp_3', 'gammatone_fn_3', 'gammatone_P_3', 'gammatone_R_3', 'gammatone_F_3', 'gammatone_AD_3', 'gammatone_DA_3', 'harmonic_tp_05', 'harmonic_fp_05', 'harmonic_fn_05', 'harmonic_P_05', \ + # 'harmonic_R_05', 'harmonic_F_05', 'harmonic_AD_05', 'harmonic_DA_05', 'harmonic_tp_3', 'harmonic_fp_3', 'harmonic_fn_3', 'harmonic_P_3', 'harmonic_R_3', 'harmonic_F_3', 'harmonic_AD_3', 'harmonic_DA_3', \ + # 'timbre_tp_05', 'timbre_fp_05', 'timbre_fn_05', 'timbre_P_05', 'timbre_R_05', 'timbre_F_05', 'timbre_AD_05', 'timbre_DA_05', 'timbre_tp_3', 'timbre_fp_3', 'timbre_fn_3', 'timbre_P_3', 'timbre_R_3', \ + # 'timbre_F_3', 'timbre_AD_3', 'timbre_DA_3', 'tempo_tp_05', 'tempo_fp_05', 'tempo_fn_05', 'tempo_P_05', 'tempo_R_05', 'tempo_F_05', 'tempo_AD_05', 'tempo_DA_05', \ + # 'tempo_tp_3', 'tempo_fp_3', 'tempo_fn_3', 'tempo_P_3', 'tempo_R_3', 'tempo_F_3', 'tempo_AD_3', 'tempo_DA_3']) + + # Fuse novelty curves from individual segmentation results. + outfile2 = join(options.OUTPUT, 'individualFuseSOM.csv') + with open(outfile2, 'a') as f: + csvwriter = csv.writer(f, delimiter=',') + csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\ + 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'tb_hm_P_0.5', 'tb_hm_R_0.5', 'tb_hm_F_0.5', \ + 'tb_hm_P_3', 'tb_hm_R_3', 'tb_hm_F_3', 'tp_hm_P_0.5', 'tp_hm_R_0.5', 'tp_hm_F_0.5', 'tp_hm_P_3', 'tp_hm_R_3', 'tp_hm_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \ + 'gt_tb_tp_F_3', 'gt_tb_hm_P_0.5', 'gt_tb_hm_R_0.5', 'gt_tb_hm_F_0.5', 'gt_tb_hm_P_3', 'gt_tb_hm_R_3', 'gt_tb_hm_F_3', 'gt_tp_hm_P_0.5', 'gt_tp_hm_R_0.5', 'gt_tp_hm_F_0.5', 'gt_tp_hm_P_3', 'gt_tp_hm_R_3', 'gt_tp_hm_F_3', \ + 'tb_tp_hm_P_0.5', 'tb_tp_hm_R_0.5', 'tb_tp_hm_F_0.5', 'tb_tp_hm_P_3', 'tb_tp_hm_R_3', 'tb_tp_hm_F_3', 'gt_tb_tp_hm_P_0.5', 'gt_tb_tp_hm_R_0.5', 'gt_tb_tp_hm_F_0.5', 'gt_tb_tp_hm_P_3', 'gt_tb_tp_hm_R_3', 'gt_tb_tp_hm_F_3']) + + + for i,ao in enumerate(audio_list): + + print 'processing self organizing maps for %s' %ao.name + + # 1.Novelty based segmentation. + # Correlate an Gaussian on the diagonal to contruct the novelty curve + # print 'ssm', ao.gammatone_ssm.shape, ao.timbre_ssm.shape, ao.tempo_ssm.shape, ao.harmonic_ssm.shape + ssom.train(ao.gammatone_features) + gammatone_som = ssom(ao.gammatone_features) + ssom.train(ao.timbre_features) + timbre_som = ssom(ao.timbre_features) + ssom.train(ao.tempo_features) + tempo_som = ssom(ao.tempo_features) + ssom.train(ao.harmonic_features) + harmonic_som = ssom(ao.harmonic_features) + + gammatone_harmonic_features = np.hstack((ao.gammatone_features, ao.harmonic_features)) + gammatone_timbre_features = np.hstack((ao.gammatone_features, ao.timbre_features)) + gammatone_tempo_features = np.hstack((ao.gammatone_features, ao.tempo_features)) + harmonic_timbre_features = np.hstack((ao.harmonic_features, ao.timbre_features)) + harmonic_tempo_features = np.hstack((ao.harmonic_features, ao.tempo_features)) + timbre_tempo_features = np.hstack((ao.timbre_features, ao.tempo_features)) + + gammatone_harmonic_timbre_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.timbre_features)) + gammatone_harmonic_tempo_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.tempo_features)) + gammatone_timbre_tempo_features = np.hstack((ao.gammatone_features, ao.timbre_features, ao.tempo_features)) + harmonic_timbre_tempo_features = np.hstack((ao.harmonic_features, ao.timbre_features, ao.tempo_features)) + + gammatone_harmonic_timbre_tempo_features = np.hstack((ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features)) + + ssom.train(gammatone_harmonic_features) + gammatone_harmonic_som = ssom(gammatone_harmonic_features) + ssom.train(gammatone_timbre_features) + gammatone_timbre_som = ssom(gammatone_timbre_features) + ssom.train(gammatone_tempo_features) + gammatone_tempo_som = ssom(gammatone_tempo_features) + ssom.train(harmonic_timbre_features) + harmonic_timbre_som = ssom(harmonic_timbre_features) + ssom.train(harmonic_timbre_features) + harmonic_timbre_som = ssom(harmonic_timbre_features) + ssom.train(harmonic_tempo_features) + harmonic_tempo_som = ssom(harmonic_tempo_features) + ssom.train(timbre_tempo_features) + timbre_tempo_som = ssom(timbre_tempo_features) + + ssom.train(gammatone_harmonic_timbre_features) + gammatone_harmonic_timbre_som = ssom(gammatone_harmonic_timbre_features) + ssom.train(gammatone_harmonic_tempo_features) + gammatone_harmonic_tempo_som = ssom(gammatone_harmonic_tempo_features) + ssom.train(gammatone_timbre_tempo_features) + gammatone_timbre_tempo_som = ssom(gammatone_timbre_tempo_features) + ssom.train(harmonic_timbre_tempo_features) + harmonic_timbre_tempo_som = ssom(harmonic_timbre_tempo_features) + + ssom.train(gammatone_harmonic_timbre_tempo_features) + gammatone_harmonic_timbre_tempo_som = ssom(gammatone_harmonic_timbre_tempo_features) + + gammatone_ssm = self.getSSM(gammatone_som) + harmonic_ssm = self.getSSM(harmonic_som) + timbre_ssm = self.getSSM(timbre_som) + tempo_ssm = self.getSSM(tempo_som) + gammatone_harmonic_ssm = self.getSSM(gammatone_harmonic_som) + gammatone_timbre_ssm = self.getSSM(gammatone_timbre_som) + gammatone_tempo_ssm = self.getSSM(gammatone_tempo_som) + harmonic_timbre_ssm = self.getSSM(harmonic_timbre_som) + harmonic_tempo_ssm = self.getSSM(harmonic_tempo_som) + timbre_tempo_ssm = self.getSSM(timbre_tempo_som) + gammatone_harmonic_timbre_ssm = self.getSSM(gammatone_harmonic_timbre_som) + gammatone_harmonic_tempo_ssm = self.getSSM(gammatone_harmonic_tempo_som) + gammatone_timbre_tempo_ssm = self.getSSM(gammatone_timbre_tempo_som) + harmonic_timbre_tempo_ssm = self.getSSM(harmonic_timbre_tempo_som) + gammatone_harmonic_timbre_tempo_ssm = self.getSSM(gammatone_harmonic_timbre_tempo_som) + + + # Noise removal in ssm + reduced_gammatone_ssm = self.reduceSSM(gammatone_ssm) + reduced_timbre_ssm = self.reduceSSM(timbre_ssm) + reduced_tempo_ssm = self.reduceSSM(ao.tempo_ssm) + reduced_harmonic_ssm = self.reduceSSM(ao.harmonic_ssm) + reduced_gammatone_harmonic_ssm = self.reduceSSM(gammatone_harmonic_ssm) + reduced_gammatone_timbre_ssm = self.reduceSSM(gammatone_timbre_ssm) + reduced_gammatone_tempo_ssm = self.reduceSSM(gammatone_tempo_ssm) + reduced_harmonic_timbre_ssm = self.reduceSSM(harmonic_timbre_ssm) + reduced_harmonic_tempo_ssm = self.reduceSSM(harmonic_tempo_ssm) + reduced_timbre_tempo_ssm = self.reduceSSM(timbre_tempo_ssm) + reduced_gammatone_harmonic_timbre_ssm = self.reduceSSM(gammatone_harmonic_timbre_ssm) + reduced_gammatone_harmonic_tempo_ssm = self.reduceSSM(gammatone_harmonic_tempo_ssm) + reduced_gammatone_timbre_tempo_ssm = self.reduceSSM(gammatone_timbre_tempo_ssm) + reduced_harmonic_timbre_tempo_ssm = self.reduceSSM(harmonic_timbre_tempo_ssm) + reduced_gammatone_harmonic_timbre_tempo_ssm = self.reduceSSM(gammatone_harmonic_timbre_tempo_ssm) + + + gammatone_novelty = self.getNoveltyCurve(reduced_gammatone_ssm, self.kernel_size) + gammatone_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gammatone_novelty] + timbre_novelty = self.getNoveltyCurve(reduced_timbre_ssm, self.kernel_size) + timbre_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in timbre_novelty] + tempo_novelty = self.getNoveltyCurve(reduced_tempo_ssm, self.kernel_size) + tempo_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tempo_novelty] + harmonic_novelty = self.getNoveltyCurve(reduced_harmonic_ssm, self.kernel_size) + harmonic_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in harmonic_novelty] + + # Peak picking from the novelty curve + smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty) + gammatone_detection = [ao.ssm_timestamps[int(i)] for i in gammatone_novelty_peaks] + [ao.gt[-1]] + smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty) + timbre_detection = [ao.ssm_timestamps[int(i)] for i in timbre_novelty_peaks] + [ao.gt[-1]] + smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty) + harmonic_detection = [ao.ssm_timestamps[int(i)] for i in harmonic_novelty_peaks] + [ao.gt[-1]] + smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty) + tempo_detection = [ao.ssm_timestamps[int(i)] for i in tempo_novelty_peaks] + [ao.gt[-1]] + + gt_res_05 = self.pairwiseF(ao.gt, gammatone_detection, tolerance=0.5, combine=1.0) + gt_res_3 = self.pairwiseF(ao.gt, gammatone_detection, tolerance=3, combine=1.0) + harmonic_res_05 = self.pairwiseF(ao.gt, harmonic_detection, tolerance=0.5, combine=1.0) + harmonic_res_3 = self.pairwiseF(ao.gt, harmonic_detection, tolerance=3, combine=1.0) + tempo_res_05 = self.pairwiseF(ao.gt, tempo_detection, tolerance=0.5, combine=1.0) + tempo_res_3 = self.pairwiseF(ao.gt, tempo_detection, tolerance=3, combine=1.0) + timbre_res_05 = self.pairwiseF(ao.gt, timbre_detection, tolerance=0.5, combine=1.0) + timbre_res_3 = self.pairwiseF(ao.gt, timbre_detection, tolerance=3, combine=1.0) + + with open(outfile1, 'a') as f: + csvwriter = csv.writer(f, delimiter=',') + csvwriter.writerow([ao.name, gt_res_05.TP, gt_res_05.FP, gt_res_05.FN, gt_res_05.P, gt_res_05.R, gt_res_05.F, gt_res_05.AD, gt_res_05.DA, gt_res_3.TP, gt_res_3.FP, gt_res_3.FN, gt_res_3.P, \ + gt_res_3.R, gt_res_3.F, gt_res_3.AD, gt_res_3.DA, harmonic_res_05.TP, harmonic_res_05.FP, harmonic_res_05.FN, harmonic_res_05.P, harmonic_res_05.R, harmonic_res_05.F, harmonic_res_05.AD, harmonic_res_05.DA, \ + harmonic_res_3.TP, harmonic_res_3.FP, harmonic_res_3.FN, harmonic_res_3.P, harmonic_res_3.R, harmonic_res_3.F, harmonic_res_3.AD, harmonic_res_3.DA, timbre_res_05.TP, timbre_res_05.FP, \ + timbre_res_05.FN, timbre_res_05.P, timbre_res_05.R, timbre_res_05.F, timbre_res_05.AD, timbre_res_05.DA, timbre_res_3.TP, timbre_res_3.FP, timbre_res_3.FN, timbre_res_3.P, timbre_res_3.R, timbre_res_3.F, \ + timbre_res_3.AD, timbre_res_3.DA, tempo_res_05.TP, tempo_res_05.FP, tempo_res_05.FN, tempo_res_05.P, tempo_res_05.R, tempo_res_05.F, tempo_res_05.AD, tempo_res_05.DA, tempo_res_3.TP, tempo_res_3.FP, \ + tempo_res_3.FN, tempo_res_3.P, tempo_res_3.R, tempo_res_3.F, tempo_res_3.AD, tempo_res_3.DA]) + + gt_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_ssm, self.kernel_size) + gt_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_novelty] + gt_tb_novelty = self.getNoveltyCurve(reduced_gammatone_timbre_ssm, self.kernel_size) + gt_tb_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_novelty] + gt_tp_novelty = self.getNoveltyCurve(reduced_gammatone_tempo_ssm, self.kernel_size) + gt_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_novelty] + hm_tb_novelty = self.getNoveltyCurve(reduced_harmonic_timbre_ssm, self.kernel_size) + hm_tb_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tb_novelty] + hm_tp_novelty = self.getNoveltyCurve(reduced_harmonic_tempo_ssm, self.kernel_size) + hm_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tp_novelty] + tb_tp_novelty = self.getNoveltyCurve(reduced_timbre_tempo_ssm, self.kernel_size) + tb_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_novelty] + + smoothed_gt_tb_novelty, gt_tb_novelty_peaks = peak_picker.process(gt_tb_novelty) + gt_tb_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_novelty_peaks] + [ao.gt[-1]] + smoothed_gt_tp_novelty, gt_tp_novelty_peaks = peak_picker.process(gt_tp_novelty) + gt_tp_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_novelty_peaks] + [ao.gt[-1]] + smoothed_gt_hm_novelty, gt_hm_novelty_peaks = peak_picker.process(gt_hm_novelty) + gt_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_novelty_peaks] + [ao.gt[-1]] + smoothed_tb_tp_novelty, tb_tp_novelty_peaks = peak_picker.process(tb_tp_novelty) + tb_tp_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_novelty_peaks] + [ao.gt[-1]] + smoothed_tb_hm_novelty, tb_hm_novelty_peaks = peak_picker.process(tb_hm_novelty) + tb_hm_detection = [ao.ssm_timestamps[int(i)] for i in tb_hm_novelty_peaks] + [ao.gt[-1]] + smoothed_tp_hm_novelty, tp_hm_novelty_peaks = peak_picker.process(tp_hm_novelty) + tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in tp_hm_novelty_peaks] + [ao.gt[-1]] + + gt_tb_tp_novelty = self.getNoveltyCurve(reduced_gammatone_timbre_tempo_ssm, self.kernel_size) + gt_tb_tp_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_novelty] + gt_tb_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_timbre_ssm, self.kernel_size) + gt_tb_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_hm_novelty] + gt_tp_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_tempo_ssm, self.kernel_size) + gt_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_hm_novelty] + tb_tp_hm_novelty = self.getNoveltyCurve(reduced_harmonic_timbre_tempo_ssm, self.kernel_size) + tb_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_hm_novelty] + gt_tb_tp_hm_novelty = self.getNoveltyCurve(reduced_gammatone_harmonic_timbre_tempo_ssm, self.kernel_size) + gt_tb_tp_hm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_hm_novelty] + + smoothed_gt_tb_tp_novelty, gt_tb_tp_novelty_peaks = peak_picker.process(gt_tb_tp_novelty) + gt_tb_tp_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_novelty_peaks] + [ao.gt[-1]] + smoothed_gt_tb_hm_novelty, gt_tb_hm_novelty_peaks = peak_picker.process(gt_tb_hm_novelty) + gt_tb_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_hm_novelty_peaks] + [ao.gt[-1]] + smoothed_gt_tp_hm_novelty, gt_tp_hm_novelty_peaks = peak_picker.process(gt_tp_hm_novelty) + gt_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_hm_novelty_peaks] + [ao.gt[-1]] + smoothed_tb_tp_hm_novelty, tb_tp_hm_novelty_peaks = peak_picker.process(tb_tp_hm_novelty) + tb_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_hm_novelty_peaks] + [ao.gt[-1]] + smoothed_gt_tb_tp_hm_novelty, gt_tb_tp_hm_novelty_peaks = peak_picker.process(gt_tb_tp_hm_novelty) + gt_tb_tp_hm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_hm_novelty_peaks] + [ao.gt[-1]] + + # novelty_peaks = gt_tb_tp_hm_novelty_peaks + # novelty_detection = [ao.ssm_timestamps[int(i)] for i in novelty_peaks] + [ao.gt[-1]] + + if options.PLOT: + self.plotDetection(ao.ssm, novelty, smoothed_novelty, ao.gt, detection, filename=join(options.OUTPUT+ ao.name)+'.pdf') + + gt_tb_res_05 = self.pairwiseF(ao.gt, gt_tb_detection, tolerance=0.5, combine=1.0) + gt_tb_res_3 = self.pairwiseF(ao.gt, gt_tb_detection, tolerance=3, combine=1.0) + gt_tp_res_05 = self.pairwiseF(ao.gt, gt_tp_detection, tolerance=0.5, combine=1.0) + gt_tp_res_3 = self.pairwiseF(ao.gt, gt_tp_detection, tolerance=3, combine=1.0) + gt_hm_res_05 = self.pairwiseF(ao.gt, gt_hm_detection, tolerance=0.5, combine=1.0) + gt_hm_res_3 = self.pairwiseF(ao.gt, gt_hm_detection, tolerance=3, combine=1.0) + tb_tp_res_05 = self.pairwiseF(ao.gt, tb_tp_detection, tolerance=0.5, combine=1.0) + tb_tp_res_3 = self.pairwiseF(ao.gt, tb_tp_detection, tolerance=3, combine=1.0) + tb_hm_res_05 = self.pairwiseF(ao.gt, tb_hm_detection, tolerance=0.5, combine=1.0) + tb_hm_res_3 = self.pairwiseF(ao.gt, tb_hm_detection, tolerance=3, combine=1.0) + tp_hm_res_05 = self.pairwiseF(ao.gt, tp_hm_detection, tolerance=0.5, combine=1.0) + tp_hm_res_3 = self.pairwiseF(ao.gt, tp_hm_detection, tolerance=3, combine=1.0) + + gt_tb_tp_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_detection, tolerance=0.5, combine=1.0) + gt_tb_tp_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_detection, tolerance=3, combine=1.0) + gt_tb_hm_res_05 = self.pairwiseF(ao.gt, gt_tb_hm_detection, tolerance=0.5, combine=1.0) + gt_tb_hm_res_3 = self.pairwiseF(ao.gt, gt_tb_hm_detection, tolerance=3, combine=1.0) + gt_tp_hm_res_05 = self.pairwiseF(ao.gt, gt_tp_hm_detection, tolerance=0.5, combine=1.0) + gt_tp_hm_res_3 = self.pairwiseF(ao.gt, gt_tp_hm_detection, tolerance=3, combine=1.0) + tb_tp_hm_res_05 = self.pairwiseF(ao.gt, tb_tp_hm_detection, tolerance=0.5, combine=1.0) + tb_tp_hm_res_3 = self.pairwiseF(ao.gt, tb_tp_hm_detection, tolerance=3, combine=1.0) + + gt_tb_tp_hm_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_hm_detection, tolerance=0.5, combine=1.0) + gt_tb_tp_hm_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_hm_detection, tolerance=3, combine=1.0) + + + # Output detected segment locations. + if options.VERBOSE: + outdir = join(options.OUTPUT, 'detection', ao.name) + if not isdir(outdir): + os.mkdir(outdir) + np.savetxt(join(outdir, 'gammatone.csv'), gammatone_detection) + np.savetxt(join(outdir, 'timbre.csv'), timbre_detection) + np.savetxt(join(outdir, 'tempo.csv'), tempo_detection) + np.savetxt(join(outdir, 'harmonic.csv'), harmonic_detection) + + np.savetxt(join(outdir, 'gammatone_timbre_novelty.csv'), gt_tb_detection) + np.savetxt(join(outdir, 'gammatone_tempo_novelty.csv'), gt_tp_detection) + np.savetxt(join(outdir, 'gammatone_harmonic_novelty.csv'), gt_hm_detection) + np.savetxt(join(outdir, 'timbre_tempo_novelty.csv'), tb_tp_detection) + np.savetxt(join(outdir, 'timbre_harmonic_novelty.csv'), tb_hm_detection) + np.savetxt(join(outdir, 'tempo_harmonic_novelty.csv'), tp_hm_detection) + + np.savetxt(join(outdir, 'gammatone_timbre_tempo_novelty.csv'), gt_tb_tp_detection) + np.savetxt(join(outdir, 'gammatone_timbre_harmonic_novelty.csv'), gt_tb_hm_detection) + np.savetxt(join(outdir, 'gammatone_tempo_harmonic_novelty.csv'), gt_tp_hm_detection) + np.savetxt(join(outdir, 'timbre_tempo_harmonic_novelty.csv'), tb_tp_hm_detection) + np.savetxt(join(outdir, 'gammatone_timbre_tempo_harmonic_novelty.csv'), gt_tb_tp_hm_detection) + + # with open(outfile4, 'a') as f: + # csvwriter = csv.writer(f, delimiter=',') + # csvwriter.writerow([ao.name, gt_df_05.TP, gt_df_05.FP, gt_df_05.FN, gt_df_05.P, gt_df_05.R, gt_df_05.F, gt_df_05.AD, gt_df_05.DA, gt_df_3.TP, gt_df_3.FP, gt_df_3.FN, gt_df_3.P, \ + # gt_df_3.R, gt_df_3.F, gt_df_3.AD, gt_df_3.DA, harmonic_df_05.TP, harmonic_df_05.FP, harmonic_df_05.FN, harmonic_df_05.P, harmonic_df_05.R, harmonic_df_05.F, harmonic_df_05.AD, harmonic_df_05.DA, \ + # harmonic_df_3.TP, harmonic_df_3.FP, harmonic_df_3.FN, harmonic_df_3.P, harmonic_df_3.R, harmonic_df_3.F, harmonic_df_3.AD, harmonic_df_3.DA, timbre_df_05.TP, timbre_df_05.FP, \ + # timbre_df_05.FN, timbre_df_05.P, timbre_df_05.R, timbre_df_05.F, timbre_df_05.AD, timbre_df_05.DA, timbre_df_3.TP, timbre_df_3.FP, timbre_df_3.FN, timbre_df_3.P, timbre_df_3.R, timbre_df_3.F, \ + # timbre_df_3.AD, timbre_df_3.DA, tempo_df_05.TP, tempo_df_05.FP, tempo_df_05.FN, tempo_df_05.P, tempo_df_05.R, tempo_df_05.F, tempo_df_05.AD, tempo_df_05.DA, tempo_df_3.TP, tempo_df_3.FP, \ + # tempo_df_3.FN, tempo_df_3.P, tempo_df_3.R, tempo_df_3.F, tempo_df_3.AD, tempo_df_3.DA]) + + with open(outfile2, 'a') as f: + csvwriter = csv.writer(f, delimiter=',') + csvwriter.writerow([ao.name, gt_tb_res_05.P, gt_tb_res_05.R, gt_tb_res_05.F, gt_tb_res_3.P, gt_tb_res_3.R, gt_tb_res_3.F, gt_tp_res_05.P, gt_tp_res_05.R, gt_tp_res_05.F, gt_tp_res_3.P, gt_tp_res_3.R, gt_tp_res_3.F, \ + gt_hm_res_05.P, gt_hm_res_05.R, gt_hm_res_05.F, gt_hm_res_3.P, gt_hm_res_3.R, gt_hm_res_3.F, tb_tp_res_05.P, tb_tp_res_05.R, tb_tp_res_05.F, tb_tp_res_3.P, tb_tp_res_3.R, tb_tp_res_3.F, \ + tb_hm_res_05.P, tb_hm_res_05.R, tb_hm_res_05.F, tb_hm_res_3.P, tb_hm_res_3.R, tb_hm_res_3.F, tp_hm_res_05.P, tp_hm_res_05.R, tp_hm_res_05.F, tp_hm_res_3.P, tp_hm_res_3.R, tp_hm_res_3.F, \ + gt_tb_tp_res_05.P, gt_tb_tp_res_05.R, gt_tb_tp_res_05.F, gt_tb_tp_res_3.P, gt_tb_tp_res_3.R, gt_tb_tp_res_3.F, gt_tb_hm_res_05.P, gt_tb_hm_res_05.R, gt_tb_hm_res_05.F, gt_tb_hm_res_3.P, gt_tb_hm_res_3.R, gt_tb_hm_res_3.F, \ + gt_tp_hm_res_05.P, gt_tp_hm_res_05.R, gt_tp_hm_res_05.F, gt_tp_hm_res_3.P, gt_tp_hm_res_3.R, gt_tp_hm_res_3.F, tb_tp_hm_res_05.P, tb_tp_hm_res_05.R, tb_tp_hm_res_05.F, tb_tp_hm_res_3.P, tb_tp_hm_res_3.R, tb_tp_hm_res_3.F, \ + gt_tb_tp_hm_res_05.P, gt_tb_tp_hm_res_05.R, gt_tb_tp_hm_res_05.F, gt_tb_tp_hm_res_3.P, gt_tb_tp_hm_res_3.R, gt_tb_tp_hm_res_3.F]) + + + # Verification of detected boundaries by novelty fusion from the first round + # ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features] + # winlen = 1.5 * self.SampleRate / self.stepSize + # prev_features, post_features = self.getPeakFeatures(gt_tb_tp_hm_novelty_peaks, ao_featureset, winlen=10) + # dev_list = self.segmentDev(prev_features, post_features) + # gt_tb_tp_hm_novelty_peaks = gt_tb_tp_hm_novelty_peaks[:len(dev_list)] + # # print 'len(dev_list)', len(dev_list), len(gt_tb_tp_hm_novelty_peaks) + # # print gt_tb_tp_hm_novelty_peaks, dev_list + # dev_mean = [np.mean(x) for x in dev_list] + # np.savetxt(join(options.OUTPUT, 'dev', ao.name+'.csv'), np.vstack((gt_tb_tp_hm_detection[:len(dev_list)], dev_mean)).T, delimiter=',') + # peak_verified = self.verifyPeaks(gt_tb_tp_hm_novelty_peaks, dev_list) + # + # verified_detection = [ao.ssm_timestamps[int(i)] for i in peak_verified] + [ao.gt[-1]] + # verified_detection_05 = self.pairwiseF(ao.gt, verified_detection, tolerance=0.5, combine=1.0) + # verified_detection_3 = self.pairwiseF(ao.gt, verified_detection, tolerance=3, combine=1.0) + # + # print gt_tb_tp_hm_res_05.TP, gt_tb_tp_hm_res_05.FP, gt_tb_tp_hm_res_05.FN, gt_tb_tp_hm_res_05.P, gt_tb_tp_hm_res_05.R, gt_tb_tp_hm_res_05.F + # print gt_tb_tp_hm_res_3.TP, gt_tb_tp_hm_res_3.FP, gt_tb_tp_hm_res_3.FN, gt_tb_tp_hm_res_3.P, gt_tb_tp_hm_res_3.R, gt_tb_tp_hm_res_3.F + # + # print verified_detection_05.TP, verified_detection_05.FP, verified_detection_05.FN, verified_detection_05.P, verified_detection_05.R, verified_detection_05.F + # print verified_detection_3.TP, verified_detection_3.FP, verified_detection_3.FN, verified_detection_3.P, verified_detection_3.R, verified_detection_3.F + + # if len(novelty_peaks): + # ao.gammatone_gmm = self.getGMMs(ao.gammatone_features, novelty_peaks) + # ao.harmonic_gmm = self.getGMMs(ao.harmonic_features, novelty_peaks) + # ao.tempo_gmm = self.getGMMs(ao.tempo_features, novelty_peaks) + # ao.timbre_gmm = self.getGMMs(ao.timbre_features, novelty_peaks) + # + # rc = rClustering(eps=1., k=8, rank='max_neighbors') + # rc.fit(ao.gammatone_gmm) + # gammatone_clf = rc.classification + # gammatone_neighborhood_size, gammatone_average_div, gammatone_node_rank = rc.getNodeRank() + # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_clf)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_neighborhood_size)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_average_div)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-gammatone.csv'), np.vstack((novelty_detection[:-1], gammatone_node_rank)).T, delimiter=',') + # + # rc = rClustering(eps=1., k=8, rank='max_neighbors') + # rc.fit(ao.harmonic_gmm) + # harmonic_clf = rc.classification + # harmonic_neighborhood_size, harmonic_average_div, harmonic_node_rank = rc.getNodeRank() + # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_clf)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_neighborhood_size)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_average_div)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-harmonic.csv'), np.vstack((novelty_detection[:-1], harmonic_node_rank)).T, delimiter=',') + # + # rc = rClustering(eps=1., k=8, rank='max_neighbors') + # rc.fit(ao.tempo_gmm) + # tempo_clf = rc.classification + # tempo_neighborhood_size, tempo_average_div, tempo_node_rank = rc.getNodeRank() + # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_clf)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_neighborhood_size)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_average_div)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-tempo.csv'), np.vstack((novelty_detection[:-1], tempo_node_rank)).T, delimiter=',') + # + # rc = rClustering(eps=1., k=8, rank='max_neighbors') + # rc.fit(ao.timbre_gmm) + # timbre_clf = rc.classification + # timbre_neighborhood_size, timbre_average_div, timbre_node_rank = rc.getNodeRank() + # np.savetxt(join(options.OUTPUT, 'classification', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_clf)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'neighborhood_size', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_neighborhood_size)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'node_rank', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_average_div)).T, delimiter=',') + # np.savetxt(join(options.OUTPUT, 'average_div', ao.name+'-timbre.csv'), np.vstack((novelty_detection[:-1], timbre_node_rank)).T, delimiter=',') + + + # # Evaluate segmentation results using combined SSMs. + # outfile3 = join(options.OUTPUT, 'combinedSSMRes.csv') + # with open(outfile3, 'a') as f: + # csvwriter = csv.writer(f, delimiter=',') + # csvwriter.writerow(['audio', 'gt_tb_P_0.5', 'gt_tb_R_0.5', 'gt_tb_F_0.5', 'gt_tb_P_3', 'gt_tb_R_3', 'gt_tb_F_3', 'gt_tp_P_0.5', 'gt_tp_R_0.5', 'gt_tp_F_0.5', 'gt_tp_P_3', 'gt_tp_R_3', 'gt_tp_F_3',\ + # 'gt_hm_P_0.5', 'gt_hm_R_0.5', 'gt_hm_F_0.5', 'gt_hm_P_3', 'gt_hm_R_3', 'gt_hm_F_3', 'tb_tp_P_0.5', 'tb_tp_R_0.5', 'tb_tp_F_0.5', 'tb_tp_P_3', 'tb_tp_R_3', 'tb_tp_F_3', 'tb_hm_P_0.5', 'tb_hm_R_0.5', 'tb_hm_F_0.5', \ + # 'tb_hm_P_3', 'tb_hm_R_3', 'tb_hm_F_3', 'tp_hm_P_0.5', 'tp_hm_R_0.5', 'tp_hm_F_0.5', 'tp_hm_P_3', 'tp_hm_R_3', 'tp_hm_F_3', 'gt_tb_tp_P_0.5', 'gt_tb_tp_R_0.5', 'gt_tb_tp_F_0.5', 'gt_tb_tp_P_3', 'gt_tb_tp_R_3', \ + # 'gt_tb_tp_F_3', 'gt_tb_hm_P_0.5', 'gt_tb_hm_R_0.5', 'gt_tb_hm_F_0.5', 'gt_tb_hm_P_3', 'gt_tb_hm_R_3', 'gt_tb_hm_F_3', 'gt_tp_hm_P_0.5', 'gt_tp_hm_R_0.5', 'gt_tp_hm_F_0.5', 'gt_tp_hm_P_3', 'gt_tp_hm_R_3', 'gt_tp_hm_F_3', \ + # 'tb_tp_hm_P_0.5', 'tb_tp_hm_R_0.5', 'tb_tp_hm_F_0.5', 'tb_tp_hm_P_3', 'tb_tp_hm_R_3', 'tb_tp_hm_F_3', 'gt_tb_tp_hm_P_0.5', 'gt_tb_tp_hm_R_0.5', 'gt_tb_tp_hm_F_0.5', 'gt_tb_tp_hm_P_3', 'gt_tb_tp_hm_R_3', 'gt_tb_tp_hm_F_3']) + # + # for i,ao in enumerate(audio_list): + # # Combine SSMs computed from different features + # gt_hm_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm) + # gt_tb_ssm = np.multiply(ao.gammatone_ssm, ao.timbre_ssm) + # gt_tp_ssm = np.multiply(ao.gammatone_ssm, ao.tempo_ssm) + # tb_tp_ssm = np.multiply(ao.timbre_ssm, ao.tempo_ssm) + # tb_hm_ssm = np.multiply(ao.timbre_ssm, ao.harmonic_ssm) + # tp_hm_ssm = np.multiply(ao.tempo_ssm, ao.harmonic_ssm) + # + # gt_hm_tb_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm, ao.timbre_ssm) + # gt_hm_tp_ssm = np.multiply(ao.gammatone_ssm, ao.harmonic_ssm, ao.tempo_ssm) + # gt_tb_tp_ssm = np.multiply(ao.gammatone_ssm, ao.timbre_ssm, ao.tempo_ssm) + # hm_tb_tp_ssm = np.multiply(ao.harmonic_ssm, ao.timbre_ssm, ao.tempo_ssm) + # + # gt_hm_tb_tp_ssm = np.multiply(np.multiply(ao.gammatone_ssm, ao.harmonic_ssm), np.multiply(ao.timbre_ssm, ao.tempo_ssm)) + # + # gt_hm_ssm_novelty = self.getNoveltyCurve(gt_hm_ssm, self.kernel_size) + # gt_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_ssm_novelty] + # gt_tb_ssm_novelty = self.getNoveltyCurve(gt_tb_ssm, self.kernel_size) + # gt_tb_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_ssm_novelty] + # gt_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_ssm, self.kernel_size) + # gt_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tp_ssm_novelty] + # tb_tp_ssm_novelty = self.getNoveltyCurve(tb_tp_ssm, self.kernel_size) + # tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_tp_ssm_novelty] + # tb_hm_ssm_novelty = self.getNoveltyCurve(tb_hm_ssm, self.kernel_size) + # tb_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tb_hm_ssm_novelty] + # tp_hm_ssm_novelty = self.getNoveltyCurve(tp_hm_ssm, self.kernel_size) + # tp_hm_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in tp_hm_ssm_novelty] + # + # gt_hm_tb_ssm_novelty = self.getNoveltyCurve(gt_hm_tb_ssm, self.kernel_size) + # gt_hm_tb_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tb_ssm_novelty] + # gt_hm_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_tp_ssm, self.kernel_size) + # gt_hm_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tp_ssm_novelty] + # gt_tb_tp_ssm_novelty = self.getNoveltyCurve(gt_tb_tp_ssm, self.kernel_size) + # gt_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_tb_tp_ssm_novelty] + # hm_tb_tp_ssm_novelty = self.getNoveltyCurve(hm_tb_tp_ssm, self.kernel_size) + # hm_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in hm_tb_tp_ssm_novelty] + # + # gt_hm_tb_tp_ssm_novelty = self.getNoveltyCurve(gt_hm_tb_tp_ssm, self.kernel_size) + # gt_hm_tb_tp_ssm_novelty = [0.0 if (np.isnan(x) or np.isinf(x) or x > 1e+100) else x for x in gt_hm_tb_tp_ssm_novelty] + # + # smoothed_gt_hm_ssm_novelty, gt_hm_ssm_novelty_peaks = peak_picker.process(gt_hm_ssm_novelty) + # gt_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_gt_tb_ssm_novelty, gt_tb_ssm_novelty_peaks = peak_picker.process(gt_tb_ssm_novelty) + # gt_tb_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_gt_tp_ssm_novelty, gt_tp_ssm_novelty_peaks = peak_picker.process(gt_tp_ssm_novelty) + # gt_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tp_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_tb_tp_ssm_novelty, tb_tp_ssm_novelty_peaks = peak_picker.process(tb_tp_ssm_novelty) + # tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_tb_hm_ssm_novelty, tb_hm_ssm_novelty_peaks = peak_picker.process(tb_hm_ssm_novelty) + # tb_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tb_hm_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_tp_hm_ssm_novelty, tp_hm_ssm_novelty_peaks = peak_picker.process(tp_hm_ssm_novelty) + # tp_hm_ssm_detection = [ao.ssm_timestamps[int(i)] for i in tp_hm_ssm_novelty_peaks] + [ao.gt[-1]] + # + # smoothed_gt_hm_tb_ssm_novelty, gt_hm_tb_ssm_novelty_peaks = peak_picker.process(gt_hm_tb_ssm_novelty) + # gt_hm_tb_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tb_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_gt_hm_tp_ssm_novelty, gt_hm_tp_ssm_novelty_peaks = peak_picker.process(gt_hm_tp_ssm_novelty) + # gt_hm_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tp_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_gt_tb_tp_ssm_novelty, gt_tb_tp_ssm_novelty_peaks = peak_picker.process(gt_tb_tp_ssm_novelty) + # gt_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] + # smoothed_hm_tb_tp_ssm_novelty, hm_tb_tp_ssm_novelty_peaks = peak_picker.process(hm_tb_tp_ssm_novelty) + # hm_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in hm_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] + # + # smoothed_gt_hm_tb_tp_ssm_novelty, gt_hm_tb_tp_ssm_novelty_peaks = peak_picker.process(gt_hm_tb_tp_ssm_novelty) + # gt_hm_tb_tp_ssm_detection = [ao.ssm_timestamps[int(i)] for i in gt_hm_tb_tp_ssm_novelty_peaks] + [ao.gt[-1]] + # + # # Output detected segment locations. + # if options.VERBOSE: + # outdir = join(options.OUTPUT, 'detection', ao.name) + # if not isdir(outdir): + # os.mkdir(outdir) + # + # np.savetxt(join(outdir, 'gammatone_timbre_ssm.csv'), gt_tb_ssm_detection) + # np.savetxt(join(outdir, 'gammatone_tempo_ssm.csv'), gt_tp_ssm_detection) + # np.savetxt(join(outdir, 'gammatone_harmonic_ssm.csv'), gt_hm_ssm_detection) + # np.savetxt(join(outdir, 'timbre_tempo_ssm.csv'), tb_tp_ssm_detection) + # np.savetxt(join(outdir, 'timbre_harmonic_ssm.csv'), tb_hm_ssm_detection) + # np.savetxt(join(outdir, 'tempo_harmonic_ssm.csv'), tp_hm_ssm_detection) + # + # np.savetxt(join(outdir, 'gammatone_timbre_tempo_ssm.csv'), gt_tb_tp_ssm_detection) + # np.savetxt(join(outdir, 'gammatone_timbre_harmonic_ssm.csv'), gt_hm_tb_ssm_detection) + # np.savetxt(join(outdir, 'gammatone_tempo_harmonic_ssm.csv'), gt_hm_tp_ssm_detection) + # np.savetxt(join(outdir, 'timbre_tempo_harmonic_ssm.csv'), hm_tb_tp_ssm_detection) + # np.savetxt(join(outdir, 'gammatone_timbre_tempo_harmonic_ssm.csv'), gt_hm_tb_tp_ssm_detection) + # + # gt_hm_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_ssm_detection, tolerance=0.5, combine=1.0) + # gt_hm_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_ssm_detection, tolerance=3, combine=1.0) + # gt_tb_ssm_res_05 = self.pairwiseF(ao.gt, gt_tb_ssm_detection, tolerance=0.5, combine=1.0) + # gt_tb_ssm_res_3 = self.pairwiseF(ao.gt, gt_tb_ssm_detection, tolerance=3, combine=1.0) + # gt_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_tp_ssm_detection, tolerance=0.5, combine=1.0) + # gt_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_tp_ssm_detection, tolerance=3, combine=1.0) + # tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, tb_tp_ssm_detection, tolerance=0.5, combine=1.0) + # tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, tb_tp_ssm_detection, tolerance=3, combine=1.0) + # tb_hm_ssm_res_05 = self.pairwiseF(ao.gt, tb_hm_ssm_detection, tolerance=0.5, combine=1.0) + # tb_hm_ssm_res_3 = self.pairwiseF(ao.gt, tb_hm_ssm_detection, tolerance=3, combine=1.0) + # tp_hm_ssm_res_05 = self.pairwiseF(ao.gt, tp_hm_ssm_detection, tolerance=0.5, combine=1.0) + # tp_hm_ssm_res_3 = self.pairwiseF(ao.gt, tp_hm_ssm_detection, tolerance=3, combine=1.0) + # + # gt_hm_tb_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tb_ssm_detection, tolerance=0.5, combine=1.0) + # gt_hm_tb_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tb_ssm_detection, tolerance=3, combine=1.0) + # gt_hm_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tp_ssm_detection, tolerance=0.5, combine=1.0) + # gt_hm_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tp_ssm_detection, tolerance=3, combine=1.0) + # gt_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_tb_tp_ssm_detection, tolerance=0.5, combine=1.0) + # gt_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_tb_tp_ssm_detection, tolerance=3, combine=1.0) + # hm_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_detection, tolerance=0.5, combine=1.0) + # hm_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, hm_tb_tp_ssm_detection, tolerance=3, combine=1.0) + # + # gt_hm_tb_tp_ssm_res_05 = self.pairwiseF(ao.gt, gt_hm_tb_tp_ssm_detection, tolerance=0.5, combine=1.0) + # gt_hm_tb_tp_ssm_res_3 = self.pairwiseF(ao.gt, gt_hm_tb_tp_ssm_detection, tolerance=3, combine=1.0) + # + # with open(outfile3, 'a') as f: + # csvwriter = csv.writer(f, delimiter=',') + # csvwriter.writerow([ao.name, gt_tb_ssm_res_05.P, gt_tb_ssm_res_05.R, gt_tb_ssm_res_05.F, gt_tb_ssm_res_3.P, gt_tb_ssm_res_3.R, gt_tb_ssm_res_3.F, gt_tp_ssm_res_05.P, gt_tp_ssm_res_05.R, gt_tp_ssm_res_05.F, \ + # gt_tp_ssm_res_3.P, gt_tp_ssm_res_3.R, gt_tp_ssm_res_3.F, gt_hm_ssm_res_05.P, gt_hm_ssm_res_05.R, gt_hm_ssm_res_05.F, gt_hm_ssm_res_3.P, gt_hm_ssm_res_3.R, gt_hm_ssm_res_3.F, \ + # tb_tp_ssm_res_05.P, tb_tp_ssm_res_05.R, tb_tp_ssm_res_05.F, tb_tp_ssm_res_3.P, tb_tp_ssm_res_3.R, tb_tp_ssm_res_3.F, tb_hm_ssm_res_05.P, tb_hm_ssm_res_05.R, tb_hm_ssm_res_05.F, \ + # tb_hm_ssm_res_3.P, tb_hm_ssm_res_3.R, tb_hm_ssm_res_3.F, tp_hm_ssm_res_05.P, tp_hm_ssm_res_05.R, tp_hm_ssm_res_05.F, tp_hm_ssm_res_3.P, tp_hm_ssm_res_3.R, tp_hm_ssm_res_3.F, \ + # gt_tb_tp_ssm_res_05.P, gt_tb_tp_ssm_res_05.R, gt_tb_tp_ssm_res_05.F, gt_tb_tp_ssm_res_3.P, gt_tb_tp_ssm_res_3.R, gt_tb_tp_ssm_res_3.F, gt_hm_tb_ssm_res_05.P, gt_hm_tb_ssm_res_05.R, gt_hm_tb_ssm_res_05.F, \ + # gt_hm_tb_ssm_res_3.P, gt_hm_tb_ssm_res_3.R, gt_hm_tb_ssm_res_3.F, gt_hm_tp_ssm_res_05.P, gt_hm_tp_ssm_res_05.R, gt_hm_tp_ssm_res_05.F, gt_hm_tp_ssm_res_3.P, gt_hm_tp_ssm_res_3.R, gt_hm_tp_ssm_res_3.F, \ + # hm_tb_tp_ssm_res_05.P, hm_tb_tp_ssm_res_05.R, hm_tb_tp_ssm_res_05.F, hm_tb_tp_ssm_res_3.P, hm_tb_tp_ssm_res_3.R, hm_tb_tp_ssm_res_3.F, gt_hm_tb_tp_ssm_res_05.P, gt_hm_tb_tp_ssm_res_05.R, gt_hm_tb_tp_ssm_res_05.F, \ + # gt_hm_tb_tp_ssm_res_3.P, gt_hm_tb_tp_ssm_res_3.R, gt_hm_tb_tp_ssm_res_3.F]) + + +def main(): + segmenter = SSMseg() + segmenter.process() + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/utils_2dfmc.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,130 @@ +""" +Set of util functions for the section similarity project. +""" + +import copy +import numpy as np +import json +import scipy.fftpack +import pylab as plt + +def resample_mx(X, incolpos, outcolpos): + """ + Y = resample_mx(X, incolpos, outcolpos) + X is taken as a set of columns, each starting at 'time' + colpos, and continuing until the start of the next column. + Y is a similar matrix, with time boundaries defined by + outcolpos. Each column of Y is a duration-weighted average of + the overlapping columns of X. + 2010-04-14 Dan Ellis dpwe@ee.columbia.edu based on samplemx/beatavg + -> python: TBM, 2011-11-05, TESTED + """ + noutcols = len(outcolpos) + Y = np.zeros((X.shape[0], noutcols)) + # assign 'end times' to final columns + if outcolpos.max() > incolpos.max(): + incolpos = np.concatenate([incolpos,[outcolpos.max()]]) + X = np.concatenate([X, X[:,-1].reshape(X.shape[0],1)], axis=1) + outcolpos = np.concatenate([outcolpos, [outcolpos[-1]]]) + # durations (default weights) of input columns) + incoldurs = np.concatenate([np.diff(incolpos), [1]]) + + for c in range(noutcols): + firstincol = np.where(incolpos <= outcolpos[c])[0][-1] + firstincolnext = np.where(incolpos < outcolpos[c+1])[0][-1] + lastincol = max(firstincol,firstincolnext) + # default weights + wts = copy.deepcopy(incoldurs[firstincol:lastincol+1]) + # now fix up by partial overlap at ends + if len(wts) > 1: + wts[0] = wts[0] - (outcolpos[c] - incolpos[firstincol]) + wts[-1] = wts[-1] - (incolpos[lastincol+1] - outcolpos[c+1]) + wts = wts * 1. /sum(wts) + Y[:,c] = np.dot(X[:,firstincol:lastincol+1], wts) + # done + return Y + +def magnitude(X): + """Magnitude of a complex matrix.""" + r = np.real(X) + i = np.imag(X) + return np.sqrt(r * r + i * i); + +def json_to_bounds(segments_json): + """Extracts the boundaries from a json file and puts them into + an np array.""" + f = open(segments_json) + segments = json.load(f)["segments"] + bounds = [] + for segment in segments: + bounds.append(segment["start"]) + bounds.append(bounds[-1] + segments[-1]["duration"]) # Add last boundary + f.close() + return np.asarray(bounds) + +def json_bounds_to_bounds(bounds_json): + """Extracts the boundaries from a bounds json file and puts them into + an np array.""" + f = open(bounds_json) + segments = json.load(f)["bounds"] + bounds = [] + for segment in segments: + bounds.append(segment["start"]) + f.close() + return np.asarray(bounds) + +def json_to_labels(segments_json): + """Extracts the labels from a json file and puts them into + an np array.""" + f = open(segments_json) + segments = json.load(f)["segments"] + labels = [] + str_labels = [] + for segment in segments: + if not segment["label"] in str_labels: + str_labels.append(segment["label"]) + labels.append(len(str_labels)-1) + else: + label_idx = np.where(np.asarray(str_labels) == segment["label"])[0][0] + labels.append(label_idx) + f.close() + return np.asarray(labels) + +def json_to_beats(beats_json_file): + """Extracts the beats from the beats_json_file and puts them into + an np array.""" + f = open(beats_json_file, "r") + beats_json = json.load(f) + beats = [] + for beat in beats_json["beats"]: + beats.append(beat["start"]) + f.close() + return np.asarray(beats) + +def analyze_results(file): + f = open(file, "r") + lines = f.readlines() + F = [] + for line in lines: + F.append(float(line.split("\t")[0])) + f.close() + print np.mean(F) + +def compute_ffmc2d(X): + """Computes the 2D-Fourier Magnitude Coefficients.""" + # 2d-fft + fft2 = scipy.fftpack.fft2(X) + + # Magnitude + fft2m = magnitude(fft2) + + # FFTshift and flatten + fftshift = scipy.fftpack.fftshift(fft2m).flatten() + + #cmap = plt.cm.get_cmap('hot') + #plt.imshow(np.log1p(scipy.fftpack.fftshift(fft2m)).T, interpolation="nearest", + # aspect="auto", cmap=cmap) + #plt.show() + + # Take out redundant components + return fftshift[:fftshift.shape[0]/2+1]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/xmeans.py Thu Apr 02 18:09:27 2015 +0100 @@ -0,0 +1,227 @@ +#!/usr/bin/env python +"""Class that implements X-means.""" + +import argparse +import numpy as np +import logging +import time +import pylab as plt +import scipy.cluster.vq as vq +from scipy.spatial import distance + + +class XMeans: + def __init__(self, X, init_K=2, plot=False): + self.X = X + self.init_K = init_K + self.plot = plot + + def estimate_K_xmeans(self, th=0.2, maxK = 10): + """Estimates K running X-means algorithm (Pelleg & Moore, 2000).""" + + # Run initial K-means + means, labels = self.run_kmeans(self.X, self.init_K) + + # Run X-means algorithm + stop = False + curr_K = self.init_K + while not stop: + stop = True + final_means = [] + for k in xrange(curr_K): + # Find the data that corresponds to the k-th cluster + D = self.get_clustered_data(self.X, labels, k) + if len(D) == 0 or D.shape[0] == 1: + continue + + # Whiten and find whitened mean + stdD = np.std(D, axis=0) + #D = vq.whiten(D) + D /= stdD # Same as line above + mean = D.mean(axis=0) + + # Cluster this subspace by half (K=2) + half_means, half_labels = self.run_kmeans(D, K=2) + + # Compute BICs + bic1 = self.compute_bic(D, [mean], K=1, + labels=np.zeros(D.shape[0]), + R=D.shape[0]) + bic2 = self.compute_bic(D, half_means, K=2, + labels=half_labels, R=D.shape[0]) + + # Split or not + max_bic = np.max([np.abs(bic1), np.abs(bic2)]) + norm_bic1 = bic1 / max_bic + norm_bic2 = bic2 / max_bic + diff_bic = np.abs(norm_bic1 - norm_bic2) + + # Split! + print "diff_bic", diff_bic + if diff_bic > th: + final_means.append(half_means[0] * stdD) + final_means.append(half_means[1] * stdD) + curr_K += 1 + stop = False + # Don't split + else: + final_means.append(mean * stdD) + + final_means = np.asarray(final_means) + + print "Estimated K: ", curr_K + if self.plot: + plt.scatter(self.X[:, 0], self.X[:, 1]) + plt.scatter(final_means[:, 0], final_means[:, 1], color="y") + plt.show() + + if curr_K >= maxK or self.X.shape[-1] != final_means.shape[-1]: + stop = True + else: + labels, dist = vq.vq(self.X, final_means) + + return curr_K + + def estimate_K_knee(self, th=.015, maxK=12): + """Estimates the K using K-means and BIC, by sweeping various K and + choosing the optimal BIC.""" + # Sweep K-means + if self.X.shape[0] < maxK: + maxK = self.X.shape[0] + if maxK < 2: + maxK = 2 + K = np.arange(1, maxK) + bics = [] + for k in K: + means, labels = self.run_kmeans(self.X, k) + bic = self.compute_bic(self.X, means, labels, K=k, + R=self.X.shape[0]) + bics.append(bic) + diff_bics = np.diff(bics) + finalK = K[-1] + + if len(bics) == 1: + finalK = 2 + else: + # Normalize + bics = np.asarray(bics) + bics -= bics.min() + #bics /= bics.max() + diff_bics -= diff_bics.min() + #diff_bics /= diff_bics.max() + + #print bics, diff_bics + + # Find optimum K + for i in xrange(len(K[:-1])): + #if bics[i] > diff_bics[i]: + if diff_bics[i] < th and K[i] != 1: + finalK = K[i] + break + + logging.info("Estimated Unique Number of Segments: %d" % finalK) + if self.plot: + plt.subplot(2, 1, 1) + plt.plot(K, bics, label="BIC") + plt.plot(K[:-1], diff_bics, label="BIC diff") + plt.legend(loc=2) + plt.subplot(2, 1, 2) + plt.scatter(self.X[:, 0], self.X[:, 1]) + plt.show() + + return finalK + + def get_clustered_data(self, X, labels, label_index): + """Returns the data with a specific label_index, using the previously + learned labels.""" + D = X[np.argwhere(labels == label_index)] + return D.reshape((D.shape[0], D.shape[-1])) + + def run_kmeans(self, X, K): + """Runs k-means and returns the labels assigned to the data.""" + wX = vq.whiten(X) + means, dist = vq.kmeans(wX, K, iter=100) + labels, dist = vq.vq(wX, means) + return means, labels + + def compute_bic(self, D, means, labels, K, R): + """Computes the Bayesian Information Criterion.""" + D = vq.whiten(D) + Rn = D.shape[0] + M = D.shape[1] + + if R == K: + return 1 + + # Maximum likelihood estimate (MLE) + mle_var = 0 + for k in xrange(len(means)): + X = D[np.argwhere(labels == k)] + X = X.reshape((X.shape[0], X.shape[-1])) + for x in X: + mle_var += distance.euclidean(x, means[k]) + #print x, means[k], mle_var + mle_var /= (float(R - K)) + + # Log-likelihood of the data + l_D = - Rn/2. * np.log(2*np.pi) - (Rn * M)/2. * np.log(mle_var) - \ + (Rn - K) / 2. + Rn * np.log(Rn) - Rn * np.log(R) + + # Params of BIC + p = (K-1) + M * K + mle_var + + #print "BIC:", l_D, p, R, K + + # Return the bic + return l_D - p/2. * np.log(R) + + @classmethod + def generate_2d_data(self, N=100, K=5): + """Generates N*K 2D data points with K means and N data points + for each mean.""" + # Seed the random + np.random.seed(seed=int(time.time())) + + # Amount of spread of the centroids + spread = 30 + + # Generate random data + X = np.empty((0, 2)) + for i in xrange(K): + mean = np.array([np.random.random()*spread, + np.random.random()*spread]) + x = np.random.normal(0.0, scale=1.0, size=(N, 2)) + mean + X = np.append(X, x, axis=0) + + return X + + +def test_kmeans(K=5): + """Test k-means with the synthetic data.""" + X = XMeans.generate_2d_data(K=4) + wX = vq.whiten(X) + dic, dist = vq.kmeans(wX, K, iter=100) + + plt.scatter(wX[:, 0], wX[:, 1]) + plt.scatter(dic[:, 0], dic[:, 1], color="m") + plt.show() + + +def main(args): + #test_kmeans(6) + X = XMeans.generate_2d_data(K=args.k) + xmeans = XMeans(X, init_K=2, plot=args.plot) + est_K = xmeans.estimate_K_xmeans() + est_K_knee = xmeans.estimate_K_knee() + print "Estimated x-means K:", est_K + print "Estimated Knee Point Detection K:", est_K_knee + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Runs x-means") + parser.add_argument("k", + metavar="k", type=int, + help="Number of clusters to estimate.") + parser.add_argument("-p", action="store_true", default=False, + dest="plot", help="Plot the results") + main(parser.parse_args())