Mercurial > hg > multiomr
comparison Process/PipelineAlignment.py @ 2:46fb79167a61 tip
Main Code
author | Victor Padilla <victor.padilla.mc@gmail.com> |
---|---|
date | Mon, 04 May 2015 22:56:18 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:0f7f611deca4 | 2:46fb79167a61 |
---|---|
1 ''' | |
2 Created on 10/11/2014 | |
3 | |
4 @organization: Lancaster University & University of Leeds | |
5 @version: 1.0 | |
6 Created on 11/12/2014 | |
7 | |
8 @author: Victor Padilla | |
9 @contact: v.padilla@lancaster.ac.uk | |
10 | |
11 Functions related to the alignment | |
12 and voting process | |
13 ''' | |
14 from Alignment import FastAlignmentArrays | |
15 from SymbolConversion import SymbolConversion | |
16 from Functions import FilesFunctions | |
17 import numpy as np | |
18 from Clustering import Clustering | |
19 import math | |
20 from Alignment import NWunsch | |
21 | |
22 | |
23 class PipelineAlignment: | |
24 def alignGround(self,OMRs,part): | |
25 ''' | |
26 Returns one part and the ground aligned. The first array value of OMRs | |
27 should be the ground and the second the omr to align | |
28 ''' | |
29 sc=SymbolConversion() | |
30 OMRs_symbols=[] | |
31 omr_symbolsAlign=[] | |
32 for omr in OMRs: | |
33 omr_symbols=sc.filterOMR(omr,part)[1] | |
34 OMRs_symbols.append(omr_symbols) | |
35 omr_symbolsAlign.append([]) | |
36 | |
37 faa=FastAlignmentArrays() | |
38 out=faa.needleman_wunsch(OMRs_symbols[0], OMRs_symbols[1])[0] | |
39 | |
40 | |
41 return out | |
42 | |
43 def getDistances(self,OMRs_symbols): | |
44 ''' | |
45 Returns the distance matrix from several omr | |
46 in symbols, using the first symbol only | |
47 [u'N:E-4_0.25', 0.25, '', 2.75, 3.0, None] y | |
48 [u'N:E-4_0.25', 0.33, '', 2.50, 2.75, None] | |
49 | |
50 are equals | |
51 | |
52 Returns a triangular matrix | |
53 [[ 0. 0.17647058 0.19141912] | |
54 [ 0. 0. 0.17647058] | |
55 [ 0. 0. 0. ]] | |
56 | |
57 Uses the algorithm implemented in C for increasing the speed | |
58 Alignment/C_Libraries/NWunsch | |
59 ''' | |
60 ls=len(OMRs_symbols) | |
61 dimension= (ls,ls) | |
62 distances=np.zeros(dimension) | |
63 for i in range(len(OMRs_symbols)): | |
64 for j in range(i+1,len(OMRs_symbols)): | |
65 print i,j | |
66 align1=[] | |
67 align2=[] | |
68 for s in OMRs_symbols[i]: | |
69 align1.append(s[0]) | |
70 for s in OMRs_symbols[j]: | |
71 align2.append(s[0]) | |
72 #Algorithm implemented in C | |
73 if len(align1)==0 or len(align2)==0: | |
74 score=0 | |
75 else: | |
76 print"-------------------------" | |
77 | |
78 score=NWunsch.NWunsch_getSimilarity(align1,align2) | |
79 print"-------------------------" | |
80 if math.isnan(score): | |
81 score=0 | |
82 distances[i][j]=1-score | |
83 return distances | |
84 | |
85 def getDistanceLength(self,OMRs_symbols): | |
86 ''' | |
87 Similar to getDistance, but based on the length | |
88 of the omrs. Testing purposes | |
89 ''' | |
90 ls=len(OMRs_symbols) | |
91 dimension= (ls,ls) | |
92 distances=np.zeros(dimension) | |
93 for i in range(len(OMRs_symbols)): | |
94 for j in range(i+1,len(OMRs_symbols)): | |
95 print i,j | |
96 len_i=len(OMRs_symbols[i]) | |
97 len_j=len(OMRs_symbols[j]) | |
98 maxLen=len_j | |
99 if len_i>=len_j: | |
100 maxLen=len_i | |
101 | |
102 score=(len_i-len_j)*1.0/maxLen | |
103 if score<0: | |
104 score=score*-1 | |
105 distances[i][j]=score | |
106 return distances | |
107 | |
108 def __getMinimum(self,distance): | |
109 ''' | |
110 Returns the minimum value and the x,y position | |
111 in the distance matrix | |
112 ''' | |
113 minim=1000 | |
114 iMin=0 | |
115 jMin=0 | |
116 for i in range(len(distance[0])): | |
117 for j in range(i+1,len(distance[0])): | |
118 dist=distance[i][j] | |
119 if isinstance(dist,list): | |
120 dist=dist[0] | |
121 if dist<minim: | |
122 minim=dist | |
123 iMin=i | |
124 jMin=j | |
125 | |
126 return minim,iMin,jMin | |
127 | |
128 | |
129 def __recalculeDistances(self,distance,iMin,jMin): | |
130 ''' | |
131 Removes the rows and the column in the distance matrix | |
132 and calculates the new matrix | |
133 ''' | |
134 for i in range(len(distance[0])): | |
135 for j in range(i+1,len(distance[0])): | |
136 if j==iMin: | |
137 dist=distance[i][j] | |
138 dist2=distance[i][jMin] | |
139 distance[i][j]=(dist+dist2)/2 | |
140 distance=np.delete(distance, jMin, 0) | |
141 distance=np.delete(distance, jMin, 1) | |
142 return distance | |
143 | |
144 | |
145 def __getPairingReal(self,iMin,jMin,removedArray): | |
146 ''' | |
147 Returns the real omr position in the original matrix | |
148 based on the actual position and the elements removed | |
149 | |
150 usage: | |
151 self._getPairingReal(0,1,[1]) | |
152 returns | |
153 0,2 | |
154 ''' | |
155 iMinReal=iMin | |
156 jMinReal=jMin | |
157 removedArray.sort() | |
158 for removedItem in removedArray: | |
159 if iMinReal>=removedItem: | |
160 iMinReal+=1 | |
161 if jMinReal>=removedItem: | |
162 jMinReal+=1 | |
163 return iMinReal,jMinReal | |
164 | |
165 def selectBetterOMRs(self,OMRs_symbols): | |
166 ''' | |
167 Based on Philogenetic trees, this function | |
168 takes the best omrs based on the distances between them | |
169 ''' | |
170 distanceSimple=self.getDistances(OMRs_symbols) | |
171 clustering=Clustering() | |
172 distances=clustering.getCompleteMatrix(distanceSimple) | |
173 species = [] | |
174 for i in range(len(OMRs_symbols)): | |
175 species.append(i) | |
176 clu = clustering.make_clusters(species) | |
177 tree = clustering.regroup(clu, distances) | |
178 | |
179 #at least 3 leafs in the tree | |
180 maintree=tree | |
181 for i in range(3,len(OMRs_symbols)): | |
182 maintree=clustering.getBetterTree(tree,i) | |
183 if len(clustering.getLeafs(maintree))>=3: | |
184 break | |
185 | |
186 | |
187 betterOmrIds= clustering.getLeafs(maintree) | |
188 | |
189 #Graphic representation | |
190 strTree=clustering.getStringTree(tree,tree.height,"") | |
191 print strTree | |
192 clustering.showTree(strTree) | |
193 strMainTree=clustering.getStringTree(maintree,maintree.height,"") | |
194 print strMainTree | |
195 clustering.showTree(strMainTree) | |
196 | |
197 newOMRs=[] | |
198 for i in betterOmrIds: | |
199 newOMRs.append(OMRs_symbols[i]) | |
200 | |
201 return newOMRs,betterOmrIds | |
202 | |
203 # def alignNJ(self,idPart,fsOMRs,partOMRs): | |
204 # ''' | |
205 # Main function for aligning the different OMRs | |
206 # | |
207 # Returns: | |
208 # omr_symbolsAligned. OMR array of symbols aligned (only the best) | |
209 # betterOmrIds. Id array of better OMRs (for writing the log file) | |
210 # | |
211 # usage: | |
212 # pa=PipelineAlignment() | |
213 # omr_symbolsAligned,betterOmrIds=pa.alignNJ(idPart,fsOMRs,partOMRs) | |
214 # ''' | |
215 # | |
216 # | |
217 # OMRs_symbols=[] | |
218 # sc=SymbolConversion() | |
219 # print "2---converting to symbols---" | |
220 # for omr in OMRs: | |
221 # if omr!=[]: | |
222 # omr_symbols=sc.filterOMR(omr,idPart)[1] | |
223 # OMRs_symbols.append(omr_symbols) | |
224 # else: | |
225 # OMRs_symbols.append([]) | |
226 # | |
227 # print "3---removing worst OMR---" | |
228 # # betterOmrIds=[] | |
229 # OMRs_symbols,betterOmrIds=self.selectBetterOMRs(OMRs_symbols) | |
230 # | |
231 # print "4---calculating distances---" | |
232 # distances=self.getDistances(OMRs_symbols) | |
233 # | |
234 # print "5---aligning symbols---" | |
235 # | |
236 # omr_symbolsAligned=self.setOMRSymbolsAligned(OMRs_symbols,distances) | |
237 # | |
238 # return omr_symbolsAligned,betterOmrIds | |
239 | |
240 def alignNJ_files(self,idPart,fsOMRs_files,partOMRs_files): | |
241 ''' | |
242 Main function for aligning the different OMRs | |
243 | |
244 Returns: | |
245 omr_symbolsAligned. OMR array of symbols aligned (only the best) | |
246 betterOmrIds. Id array of better OMRs (for writing the log file) | |
247 | |
248 usage: | |
249 pa=PipelineAlignment() | |
250 omr_symbolsAligned,betterOmrIds=pa.alignNJ(idPart,fsOMRs,partOMRs) | |
251 ''' | |
252 | |
253 | |
254 ff=FilesFunctions() | |
255 OMRs_files=partOMRs_files+fsOMRs_files | |
256 OMRs_symbols=[] | |
257 sc=SymbolConversion() | |
258 print "2---converting to symbols---" | |
259 for omr_file in OMRs_files: | |
260 omr=ff.getOMR(omr_file) | |
261 if omr!=[]: | |
262 omr_symbols=sc.filterOMR(omr,idPart)[1] | |
263 OMRs_symbols.append(omr_symbols) | |
264 else: | |
265 OMRs_symbols.append([]) | |
266 | |
267 print "3---removing worst OMR---" | |
268 # betterOmrIds=[] | |
269 | |
270 | |
271 OMRs_symbols,betterOmrIds=self.selectBetterOMRs(OMRs_symbols) | |
272 | |
273 print "4---calculating distances---" | |
274 distances=self.getDistances(OMRs_symbols) | |
275 | |
276 print "5---aligning symbols---" | |
277 | |
278 omr_symbolsAligned=self.setOMRSymbolsAligned(OMRs_symbols,distances) | |
279 | |
280 return omr_symbolsAligned,betterOmrIds | |
281 def __setVoidArray(self,length): | |
282 ''' | |
283 private function to create a void array | |
284 ''' | |
285 arrayOut=[] | |
286 for _ in range(length): | |
287 arrayOut.append([]) | |
288 | |
289 return arrayOut | |
290 | |
291 def setOMRSymbolsAligned(self,OMRs_symbols,distances): | |
292 ''' | |
293 returns the OMRs symbols aligned from OMR symbols and | |
294 the distances matrix | |
295 ''' | |
296 pairings=[] | |
297 pairingsReal=[] | |
298 gapArray=[] | |
299 removedArray=[] | |
300 omr_symbolsAlign=self.__setVoidArray(len(OMRs_symbols)) | |
301 faa=FastAlignmentArrays() | |
302 while len(distances[0])>1: | |
303 minim,iMin,jMin=self.__getMinimum(distances) | |
304 print distances,minim,iMin,jMin | |
305 pairings.append([iMin,jMin]) | |
306 iMinReal,jMinReal=self.__getPairingReal(iMin,jMin,removedArray) | |
307 | |
308 pairingsReal.append([iMinReal,jMinReal]) | |
309 if len(omr_symbolsAlign[iMinReal])==0: | |
310 omr_symbolsAlign[iMinReal]=OMRs_symbols[iMin] | |
311 if len(omr_symbolsAlign[jMinReal])==0: | |
312 omr_symbolsAlign[jMinReal]=OMRs_symbols[jMin] | |
313 | |
314 out=faa.needleman_wunsch(omr_symbolsAlign[iMinReal], omr_symbolsAlign[jMinReal])[0] | |
315 omr_symbolsAlign[iMinReal]=out[0] | |
316 omr_symbolsAlign[jMinReal]=out[1] | |
317 gap1=out[2] | |
318 gap2=out[3] | |
319 gapArray.append([gap1,gap2]) | |
320 | |
321 | |
322 OMRs_symbols.pop(jMin) | |
323 removedArray.append(jMinReal) | |
324 | |
325 distances=self.__recalculeDistances(distances,iMin,jMin) | |
326 | |
327 | |
328 omr_symbolsAlign=self.__fillingGaps(omr_symbolsAlign,gapArray,pairingsReal) | |
329 | |
330 | |
331 return omr_symbolsAlign | |
332 | |
333 def __fillingGaps(self,omr_symbolsAlign,gapArray,pairingsReal): | |
334 ''' | |
335 private function to complete gaps based on the gap matrix | |
336 and the pairs stablished | |
337 ''' | |
338 for p in range(len(pairingsReal)-1,0,-1): | |
339 for i in range(2): | |
340 for j in range(2): | |
341 for t in range(1,p+1): | |
342 if(pairingsReal[p][i]==pairingsReal[p-t][j]): | |
343 if j==0: | |
344 s=1 | |
345 if j==1: | |
346 s=0 | |
347 omrIndex=pairingsReal[p-t][s] | |
348 newGap=[] | |
349 for gap in gapArray[p][i]: | |
350 omr_symbolsAlign[omrIndex].insert(gap,"*") | |
351 newGap.append(gap) | |
352 gapArray[p-t][s]=gapArray[p-t][s]+newGap | |
353 | |
354 | |
355 return omr_symbolsAlign | |
356 | |
357 | |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | |
368 | |
369 | |
370 | |
371 | |
372 |