comparison cnmf.py @ 17:c01fcb752221

new annotations
author mitian
date Fri, 21 Aug 2015 10:15:29 +0100
parents 915c849b17ea
children b4bf37f94e92
comparison
equal deleted inserted replaced
16:8b814fe5781d 17:c01fcb752221
17 17
18 # Local stuff 18 # Local stuff
19 from utils import SegUtil 19 from utils import SegUtil
20 20
21 # Algorithm params 21 # Algorithm params
22 h = 8 # Size of median filter for features in C-NMF 22 h = 8 # Size of median filter for features in C-NMF
23 R = 15 # Size of the median filter for the activation matrix C-NMF 23 R = 15 # Size of the median filter for the activation matrix C-NMF
24 rank = 4 # Rank of decomposition for the boundaries 24 rank = 4 # Rank of decomposition for the boundaries
25 rank_labels = 6 # Rank of decomposition for the labels 25 rank_labels = 6 # Rank of decomposition for the labels
26 R_labels = 6 # Size of the median filter for the labels 26 R_labels = 6 # Size of the median filter for the labels
27 27
28 def cnmf(S, rank, niter=500): 28 def cnmf(S, rank, niter=500):
29 """(Convex) Non-Negative Matrix Factorization. 29 """(Convex) Non-Negative Matrix Factorization.
30 30
31 Parameters 31 Parameters
32 ---------- 32 ----------
33 S: np.array(p, N) 33 S: np.array(p, N)
34 Features matrix. p row features and N column observations. 34 Features matrix. p row features and N column observations.
35 rank: int 35 rank: int
36 Rank of decomposition 36 Rank of decomposition
37 niter: int 37 niter: int
38 Number of iterations to be used 38 Number of iterations to be used
39 39
40 Returns 40 Returns
41 ------- 41 -------
42 F: np.array 42 F: np.array
43 Cluster matrix (decomposed matrix) 43 Cluster matrix (decomposed matrix)
44 G: np.array 44 G: np.array
45 Activation matrix (decomposed matrix) 45 Activation matrix (decomposed matrix)
46 (s.t. S ~= F * G) 46 (s.t. S ~= F * G)
47 """ 47 """
48 nmf_mdl = pymf.CNMF(S, num_bases=rank) 48 nmf_mdl = pymf.CNMF(S, num_bases=rank)
49 nmf_mdl.factorize(niter=niter) 49 nmf_mdl.factorize(niter=niter)
50 F = np.asarray(nmf_mdl.W) 50 F = np.asarray(nmf_mdl.W)
51 G = np.asarray(nmf_mdl.H) 51 G = np.asarray(nmf_mdl.H)
52 return F, G 52 return F, G
53 53
54 def nmf(S, rank, nither=500): 54 def nmf(S, rank, nither=500):
55 nmf_mdl = pymf.NMF(S, num_bases=rank, niter=nither) 55 nmf_mdl = pymf.NMF(S, num_bases=rank, niter=nither)
56 nmf_mdl.factorize() 56 nmf_mdl.factorize()
57 F = np.asarray(nmf_mdl.W) 57 F = np.asarray(nmf_mdl.W)
58 G = np.asarray(nmf_mdl.H) 58 G = np.asarray(nmf_mdl.H)
59 return F, G 59 return F, G
60 60
61 61
62 def most_frequent(x): 62 def most_frequent(x):
63 """Returns the most frequent value in x.""" 63 """Returns the most frequent value in x."""
64 return np.argmax(np.bincount(x)) 64 return np.argmax(np.bincount(x))
65 65
66 66
67 def compute_labels(X, rank, R, bound_idxs, niter=300): 67 def compute_labels(X, rank, R, bound_idxs, niter=300):
68 """Computes the labels using the bounds.""" 68 """Computes the labels using the bounds."""
69 69
70 X = X.T 70 X = X.T
71 try: 71 try:
72 F, G = cnmf(X, rank, niter=niter) 72 F, G = cnmf(X, rank, niter=niter)
73 except: 73 except:
74 return [1] 74 return [1]
75 75
76 label_frames = filter_activation_matrix(G.T, R) 76 label_frames = filter_activation_matrix(G.T, R)
77 label_frames = np.asarray(label_frames, dtype=int) 77 label_frames = np.asarray(label_frames, dtype=int)
78 78
79 # Get labels from the label frames 79 # Get labels from the label frames
80 labels = [] 80 labels = []
81 bound_inters = zip(bound_idxs[:-1], bound_idxs[1:]) 81 bound_inters = zip(bound_idxs[:-1], bound_idxs[1:])
82 for bound_inter in bound_inters: 82 for bound_inter in bound_inters:
83 if bound_inter[1] - bound_inter[0] <= 0: 83 if bound_inter[1] - bound_inter[0] <= 0:
84 labels.append(np.max(label_frames) + 1) 84 labels.append(np.max(label_frames) + 1)
85 else: 85 else:
86 labels.append(most_frequent( 86 labels.append(most_frequent(
87 label_frames[bound_inter[0]:bound_inter[1]])) 87 label_frames[bound_inter[0]:bound_inter[1]]))
88 88
89 return labels 89 return labels
90 90
91 91
92 def filter_activation_matrix(G, R): 92 def filter_activation_matrix(G, R):
93 """Filters the activation matrix G, and returns a flattened copy.""" 93 """Filters the activation matrix G, and returns a flattened copy."""
94 idx = np.argmax(G, axis=1) 94 idx = np.argmax(G, axis=1)
95 max_idx = np.arange(G.shape[0]) 95 max_idx = np.arange(G.shape[0])
96 max_idx = (max_idx, idx.flatten()) 96 max_idx = (max_idx, idx.flatten())
97 G[:, :] = 0 97 G[:, :] = 0
98 G[max_idx] = idx + 1 98 G[max_idx] = idx + 1
99 G = np.sum(G, axis=1) 99 G = np.sum(G, axis=1)
100 G = SegUtil.median_filter(G[:, np.newaxis], R) 100 G = SegUtil.median_filter(G[:, np.newaxis], R)
101 return G.flatten() 101 return G.flatten()
102 102
103 103
104 def segmentation(X, rank=4, R=15, h=8, niter=300, CNMF=True): 104 def segmentation(X, rank=4, R=15, h=8, niter=300, CNMF=True):
105 """ 105 """
106 Gets the segmentation (boundaries and labels) from the factorization 106 Gets the segmentation (boundaries and labels) from the factorization
107 matrices. 107 matrices.
108 108
109 Parameters 109 Parameters
110 ---------- 110 ----------
111 X: np.array() 111 X: np.array()
112 Features matrix (e.g. chromagram) 112 Features matrix (e.g. chromagram)
113 rank: int 113 rank: int
114 Rank of decomposition 114 Rank of decomposition
115 R: int 115 R: int
116 Size of the median filter for activation matrix 116 Size of the median filter for activation matrix
117 niter: int 117 niter: int
118 Number of iterations for k-means 118 Number of iterations for k-means
119 bound_idxs : list 119 bound_idxs : list
120 Use previously found boundaries (None to detect them) 120 Use previously found boundaries (None to detect them)
121 CNMF : bool 121 CNMF : bool
122 If True, use CNMF; otherwise use NMF 122 If True, use CNMF; otherwise use NMF
123 123
124 Returns 124 Returns
125 ------- 125 -------
126 bounds_idx: np.array 126 bounds_idx: np.array
127 Bound indeces found 127 Bound indeces found
128 labels: np.array 128 labels: np.array
129 Indeces of the labels representing the similarity between segments. 129 Indeces of the labels representing the similarity between segments.
130 """ 130 """
131 131
132 # Filter 132 # Filter
133 X = SegUtil.median_filter(X, M=h) 133 X = SegUtil.median_filter(X, M=h)
134 X = X.T 134 X = X.T
135 135
136 # Find non filtered boundaries 136 # Find non filtered boundaries
137 bound_idxs = None 137 bound_idxs = None
138 while True: 138 while True:
139 if bound_idxs is None: 139 if bound_idxs is None:
140 try: 140 try:
141 if CNMF: F, G = cnmf(X, rank, niter=niter) 141 if CNMF: F, G = cnmf(X, rank, niter=niter)
142 else: F, G = nmf(X, rank, niter=niter) 142 else: F, G = nmf(X, rank, niter=niter)
143 except: 143 except:
144 return np.empty(0), [1] 144 return np.empty(0), [1]
145 145
146 # Filter G 146 # Filter G
147 G = filter_activation_matrix(G.T, R) 147 G = filter_activation_matrix(G.T, R)
148 if bound_idxs is None: 148 if bound_idxs is None:
149 bound_idxs = np.where(np.diff(G) != 0)[0] + 1 149 bound_idxs = np.where(np.diff(G) != 0)[0] + 1
150 150
151 if len(np.unique(bound_idxs)) <= 2: 151 if len(np.unique(bound_idxs)) <= 2:
152 rank += 1 152 rank += 1
153 bound_idxs = None 153 bound_idxs = None
154 else: 154 else:
155 break 155 break
156 156
157 return G, bound_idxs 157 return G, bound_idxs