Mercurial > hg > multiomr
view Process/PipelineAlignment.py @ 2:46fb79167a61 tip
Main Code
author | Victor Padilla <victor.padilla.mc@gmail.com> |
---|---|
date | Mon, 04 May 2015 22:56:18 +0200 |
parents | |
children |
line wrap: on
line source
''' Created on 10/11/2014 @organization: Lancaster University & University of Leeds @version: 1.0 Created on 11/12/2014 @author: Victor Padilla @contact: v.padilla@lancaster.ac.uk Functions related to the alignment and voting process ''' from Alignment import FastAlignmentArrays from SymbolConversion import SymbolConversion from Functions import FilesFunctions import numpy as np from Clustering import Clustering import math from Alignment import NWunsch class PipelineAlignment: def alignGround(self,OMRs,part): ''' Returns one part and the ground aligned. The first array value of OMRs should be the ground and the second the omr to align ''' sc=SymbolConversion() OMRs_symbols=[] omr_symbolsAlign=[] for omr in OMRs: omr_symbols=sc.filterOMR(omr,part)[1] OMRs_symbols.append(omr_symbols) omr_symbolsAlign.append([]) faa=FastAlignmentArrays() out=faa.needleman_wunsch(OMRs_symbols[0], OMRs_symbols[1])[0] return out def getDistances(self,OMRs_symbols): ''' Returns the distance matrix from several omr in symbols, using the first symbol only [u'N:E-4_0.25', 0.25, '', 2.75, 3.0, None] y [u'N:E-4_0.25', 0.33, '', 2.50, 2.75, None] are equals Returns a triangular matrix [[ 0. 0.17647058 0.19141912] [ 0. 0. 0.17647058] [ 0. 0. 0. ]] Uses the algorithm implemented in C for increasing the speed Alignment/C_Libraries/NWunsch ''' ls=len(OMRs_symbols) dimension= (ls,ls) distances=np.zeros(dimension) for i in range(len(OMRs_symbols)): for j in range(i+1,len(OMRs_symbols)): print i,j align1=[] align2=[] for s in OMRs_symbols[i]: align1.append(s[0]) for s in OMRs_symbols[j]: align2.append(s[0]) #Algorithm implemented in C if len(align1)==0 or len(align2)==0: score=0 else: print"-------------------------" score=NWunsch.NWunsch_getSimilarity(align1,align2) print"-------------------------" if math.isnan(score): score=0 distances[i][j]=1-score return distances def getDistanceLength(self,OMRs_symbols): ''' Similar to getDistance, but based on the length of the omrs. Testing purposes ''' ls=len(OMRs_symbols) dimension= (ls,ls) distances=np.zeros(dimension) for i in range(len(OMRs_symbols)): for j in range(i+1,len(OMRs_symbols)): print i,j len_i=len(OMRs_symbols[i]) len_j=len(OMRs_symbols[j]) maxLen=len_j if len_i>=len_j: maxLen=len_i score=(len_i-len_j)*1.0/maxLen if score<0: score=score*-1 distances[i][j]=score return distances def __getMinimum(self,distance): ''' Returns the minimum value and the x,y position in the distance matrix ''' minim=1000 iMin=0 jMin=0 for i in range(len(distance[0])): for j in range(i+1,len(distance[0])): dist=distance[i][j] if isinstance(dist,list): dist=dist[0] if dist<minim: minim=dist iMin=i jMin=j return minim,iMin,jMin def __recalculeDistances(self,distance,iMin,jMin): ''' Removes the rows and the column in the distance matrix and calculates the new matrix ''' for i in range(len(distance[0])): for j in range(i+1,len(distance[0])): if j==iMin: dist=distance[i][j] dist2=distance[i][jMin] distance[i][j]=(dist+dist2)/2 distance=np.delete(distance, jMin, 0) distance=np.delete(distance, jMin, 1) return distance def __getPairingReal(self,iMin,jMin,removedArray): ''' Returns the real omr position in the original matrix based on the actual position and the elements removed usage: self._getPairingReal(0,1,[1]) returns 0,2 ''' iMinReal=iMin jMinReal=jMin removedArray.sort() for removedItem in removedArray: if iMinReal>=removedItem: iMinReal+=1 if jMinReal>=removedItem: jMinReal+=1 return iMinReal,jMinReal def selectBetterOMRs(self,OMRs_symbols): ''' Based on Philogenetic trees, this function takes the best omrs based on the distances between them ''' distanceSimple=self.getDistances(OMRs_symbols) clustering=Clustering() distances=clustering.getCompleteMatrix(distanceSimple) species = [] for i in range(len(OMRs_symbols)): species.append(i) clu = clustering.make_clusters(species) tree = clustering.regroup(clu, distances) #at least 3 leafs in the tree maintree=tree for i in range(3,len(OMRs_symbols)): maintree=clustering.getBetterTree(tree,i) if len(clustering.getLeafs(maintree))>=3: break betterOmrIds= clustering.getLeafs(maintree) #Graphic representation strTree=clustering.getStringTree(tree,tree.height,"") print strTree clustering.showTree(strTree) strMainTree=clustering.getStringTree(maintree,maintree.height,"") print strMainTree clustering.showTree(strMainTree) newOMRs=[] for i in betterOmrIds: newOMRs.append(OMRs_symbols[i]) return newOMRs,betterOmrIds # def alignNJ(self,idPart,fsOMRs,partOMRs): # ''' # Main function for aligning the different OMRs # # Returns: # omr_symbolsAligned. OMR array of symbols aligned (only the best) # betterOmrIds. Id array of better OMRs (for writing the log file) # # usage: # pa=PipelineAlignment() # omr_symbolsAligned,betterOmrIds=pa.alignNJ(idPart,fsOMRs,partOMRs) # ''' # # # OMRs_symbols=[] # sc=SymbolConversion() # print "2---converting to symbols---" # for omr in OMRs: # if omr!=[]: # omr_symbols=sc.filterOMR(omr,idPart)[1] # OMRs_symbols.append(omr_symbols) # else: # OMRs_symbols.append([]) # # print "3---removing worst OMR---" # # betterOmrIds=[] # OMRs_symbols,betterOmrIds=self.selectBetterOMRs(OMRs_symbols) # # print "4---calculating distances---" # distances=self.getDistances(OMRs_symbols) # # print "5---aligning symbols---" # # omr_symbolsAligned=self.setOMRSymbolsAligned(OMRs_symbols,distances) # # return omr_symbolsAligned,betterOmrIds def alignNJ_files(self,idPart,fsOMRs_files,partOMRs_files): ''' Main function for aligning the different OMRs Returns: omr_symbolsAligned. OMR array of symbols aligned (only the best) betterOmrIds. Id array of better OMRs (for writing the log file) usage: pa=PipelineAlignment() omr_symbolsAligned,betterOmrIds=pa.alignNJ(idPart,fsOMRs,partOMRs) ''' ff=FilesFunctions() OMRs_files=partOMRs_files+fsOMRs_files OMRs_symbols=[] sc=SymbolConversion() print "2---converting to symbols---" for omr_file in OMRs_files: omr=ff.getOMR(omr_file) if omr!=[]: omr_symbols=sc.filterOMR(omr,idPart)[1] OMRs_symbols.append(omr_symbols) else: OMRs_symbols.append([]) print "3---removing worst OMR---" # betterOmrIds=[] OMRs_symbols,betterOmrIds=self.selectBetterOMRs(OMRs_symbols) print "4---calculating distances---" distances=self.getDistances(OMRs_symbols) print "5---aligning symbols---" omr_symbolsAligned=self.setOMRSymbolsAligned(OMRs_symbols,distances) return omr_symbolsAligned,betterOmrIds def __setVoidArray(self,length): ''' private function to create a void array ''' arrayOut=[] for _ in range(length): arrayOut.append([]) return arrayOut def setOMRSymbolsAligned(self,OMRs_symbols,distances): ''' returns the OMRs symbols aligned from OMR symbols and the distances matrix ''' pairings=[] pairingsReal=[] gapArray=[] removedArray=[] omr_symbolsAlign=self.__setVoidArray(len(OMRs_symbols)) faa=FastAlignmentArrays() while len(distances[0])>1: minim,iMin,jMin=self.__getMinimum(distances) print distances,minim,iMin,jMin pairings.append([iMin,jMin]) iMinReal,jMinReal=self.__getPairingReal(iMin,jMin,removedArray) pairingsReal.append([iMinReal,jMinReal]) if len(omr_symbolsAlign[iMinReal])==0: omr_symbolsAlign[iMinReal]=OMRs_symbols[iMin] if len(omr_symbolsAlign[jMinReal])==0: omr_symbolsAlign[jMinReal]=OMRs_symbols[jMin] out=faa.needleman_wunsch(omr_symbolsAlign[iMinReal], omr_symbolsAlign[jMinReal])[0] omr_symbolsAlign[iMinReal]=out[0] omr_symbolsAlign[jMinReal]=out[1] gap1=out[2] gap2=out[3] gapArray.append([gap1,gap2]) OMRs_symbols.pop(jMin) removedArray.append(jMinReal) distances=self.__recalculeDistances(distances,iMin,jMin) omr_symbolsAlign=self.__fillingGaps(omr_symbolsAlign,gapArray,pairingsReal) return omr_symbolsAlign def __fillingGaps(self,omr_symbolsAlign,gapArray,pairingsReal): ''' private function to complete gaps based on the gap matrix and the pairs stablished ''' for p in range(len(pairingsReal)-1,0,-1): for i in range(2): for j in range(2): for t in range(1,p+1): if(pairingsReal[p][i]==pairingsReal[p-t][j]): if j==0: s=1 if j==1: s=0 omrIndex=pairingsReal[p-t][s] newGap=[] for gap in gapArray[p][i]: omr_symbolsAlign[omrIndex].insert(gap,"*") newGap.append(gap) gapArray[p-t][s]=gapArray[p-t][s]+newGap return omr_symbolsAlign