victor@2: ''' victor@2: Created on 10/11/2014 victor@2: victor@2: @organization: Lancaster University & University of Leeds victor@2: @version: 1.0 victor@2: Created on 11/12/2014 victor@2: victor@2: @author: Victor Padilla victor@2: @contact: v.padilla@lancaster.ac.uk victor@2: victor@2: Functions related to the alignment victor@2: and voting process victor@2: ''' victor@2: from Alignment import FastAlignmentArrays victor@2: from SymbolConversion import SymbolConversion victor@2: from Functions import FilesFunctions victor@2: import numpy as np victor@2: from Clustering import Clustering victor@2: import math victor@2: from Alignment import NWunsch victor@2: victor@2: victor@2: class PipelineAlignment: victor@2: def alignGround(self,OMRs,part): victor@2: ''' victor@2: Returns one part and the ground aligned. The first array value of OMRs victor@2: should be the ground and the second the omr to align victor@2: ''' victor@2: sc=SymbolConversion() victor@2: OMRs_symbols=[] victor@2: omr_symbolsAlign=[] victor@2: for omr in OMRs: victor@2: omr_symbols=sc.filterOMR(omr,part)[1] victor@2: OMRs_symbols.append(omr_symbols) victor@2: omr_symbolsAlign.append([]) victor@2: victor@2: faa=FastAlignmentArrays() victor@2: out=faa.needleman_wunsch(OMRs_symbols[0], OMRs_symbols[1])[0] victor@2: victor@2: victor@2: return out victor@2: victor@2: def getDistances(self,OMRs_symbols): victor@2: ''' victor@2: Returns the distance matrix from several omr victor@2: in symbols, using the first symbol only victor@2: [u'N:E-4_0.25', 0.25, '', 2.75, 3.0, None] y victor@2: [u'N:E-4_0.25', 0.33, '', 2.50, 2.75, None] victor@2: victor@2: are equals victor@2: victor@2: Returns a triangular matrix victor@2: [[ 0. 0.17647058 0.19141912] victor@2: [ 0. 0. 0.17647058] victor@2: [ 0. 0. 0. ]] victor@2: victor@2: Uses the algorithm implemented in C for increasing the speed victor@2: Alignment/C_Libraries/NWunsch victor@2: ''' victor@2: ls=len(OMRs_symbols) victor@2: dimension= (ls,ls) victor@2: distances=np.zeros(dimension) victor@2: for i in range(len(OMRs_symbols)): victor@2: for j in range(i+1,len(OMRs_symbols)): victor@2: print i,j victor@2: align1=[] victor@2: align2=[] victor@2: for s in OMRs_symbols[i]: victor@2: align1.append(s[0]) victor@2: for s in OMRs_symbols[j]: victor@2: align2.append(s[0]) victor@2: #Algorithm implemented in C victor@2: if len(align1)==0 or len(align2)==0: victor@2: score=0 victor@2: else: victor@2: print"-------------------------" victor@2: victor@2: score=NWunsch.NWunsch_getSimilarity(align1,align2) victor@2: print"-------------------------" victor@2: if math.isnan(score): victor@2: score=0 victor@2: distances[i][j]=1-score victor@2: return distances victor@2: victor@2: def getDistanceLength(self,OMRs_symbols): victor@2: ''' victor@2: Similar to getDistance, but based on the length victor@2: of the omrs. Testing purposes victor@2: ''' victor@2: ls=len(OMRs_symbols) victor@2: dimension= (ls,ls) victor@2: distances=np.zeros(dimension) victor@2: for i in range(len(OMRs_symbols)): victor@2: for j in range(i+1,len(OMRs_symbols)): victor@2: print i,j victor@2: len_i=len(OMRs_symbols[i]) victor@2: len_j=len(OMRs_symbols[j]) victor@2: maxLen=len_j victor@2: if len_i>=len_j: victor@2: maxLen=len_i victor@2: victor@2: score=(len_i-len_j)*1.0/maxLen victor@2: if score<0: victor@2: score=score*-1 victor@2: distances[i][j]=score victor@2: return distances victor@2: victor@2: def __getMinimum(self,distance): victor@2: ''' victor@2: Returns the minimum value and the x,y position victor@2: in the distance matrix victor@2: ''' victor@2: minim=1000 victor@2: iMin=0 victor@2: jMin=0 victor@2: for i in range(len(distance[0])): victor@2: for j in range(i+1,len(distance[0])): victor@2: dist=distance[i][j] victor@2: if isinstance(dist,list): victor@2: dist=dist[0] victor@2: if dist=removedItem: victor@2: iMinReal+=1 victor@2: if jMinReal>=removedItem: victor@2: jMinReal+=1 victor@2: return iMinReal,jMinReal victor@2: victor@2: def selectBetterOMRs(self,OMRs_symbols): victor@2: ''' victor@2: Based on Philogenetic trees, this function victor@2: takes the best omrs based on the distances between them victor@2: ''' victor@2: distanceSimple=self.getDistances(OMRs_symbols) victor@2: clustering=Clustering() victor@2: distances=clustering.getCompleteMatrix(distanceSimple) victor@2: species = [] victor@2: for i in range(len(OMRs_symbols)): victor@2: species.append(i) victor@2: clu = clustering.make_clusters(species) victor@2: tree = clustering.regroup(clu, distances) victor@2: victor@2: #at least 3 leafs in the tree victor@2: maintree=tree victor@2: for i in range(3,len(OMRs_symbols)): victor@2: maintree=clustering.getBetterTree(tree,i) victor@2: if len(clustering.getLeafs(maintree))>=3: victor@2: break victor@2: victor@2: victor@2: betterOmrIds= clustering.getLeafs(maintree) victor@2: victor@2: #Graphic representation victor@2: strTree=clustering.getStringTree(tree,tree.height,"") victor@2: print strTree victor@2: clustering.showTree(strTree) victor@2: strMainTree=clustering.getStringTree(maintree,maintree.height,"") victor@2: print strMainTree victor@2: clustering.showTree(strMainTree) victor@2: victor@2: newOMRs=[] victor@2: for i in betterOmrIds: victor@2: newOMRs.append(OMRs_symbols[i]) victor@2: victor@2: return newOMRs,betterOmrIds victor@2: victor@2: # def alignNJ(self,idPart,fsOMRs,partOMRs): victor@2: # ''' victor@2: # Main function for aligning the different OMRs victor@2: # victor@2: # Returns: victor@2: # omr_symbolsAligned. OMR array of symbols aligned (only the best) victor@2: # betterOmrIds. Id array of better OMRs (for writing the log file) victor@2: # victor@2: # usage: victor@2: # pa=PipelineAlignment() victor@2: # omr_symbolsAligned,betterOmrIds=pa.alignNJ(idPart,fsOMRs,partOMRs) victor@2: # ''' victor@2: # victor@2: # victor@2: # OMRs_symbols=[] victor@2: # sc=SymbolConversion() victor@2: # print "2---converting to symbols---" victor@2: # for omr in OMRs: victor@2: # if omr!=[]: victor@2: # omr_symbols=sc.filterOMR(omr,idPart)[1] victor@2: # OMRs_symbols.append(omr_symbols) victor@2: # else: victor@2: # OMRs_symbols.append([]) victor@2: # victor@2: # print "3---removing worst OMR---" victor@2: # # betterOmrIds=[] victor@2: # OMRs_symbols,betterOmrIds=self.selectBetterOMRs(OMRs_symbols) victor@2: # victor@2: # print "4---calculating distances---" victor@2: # distances=self.getDistances(OMRs_symbols) victor@2: # victor@2: # print "5---aligning symbols---" victor@2: # victor@2: # omr_symbolsAligned=self.setOMRSymbolsAligned(OMRs_symbols,distances) victor@2: # victor@2: # return omr_symbolsAligned,betterOmrIds victor@2: victor@2: def alignNJ_files(self,idPart,fsOMRs_files,partOMRs_files): victor@2: ''' victor@2: Main function for aligning the different OMRs victor@2: victor@2: Returns: victor@2: omr_symbolsAligned. OMR array of symbols aligned (only the best) victor@2: betterOmrIds. Id array of better OMRs (for writing the log file) victor@2: victor@2: usage: victor@2: pa=PipelineAlignment() victor@2: omr_symbolsAligned,betterOmrIds=pa.alignNJ(idPart,fsOMRs,partOMRs) victor@2: ''' victor@2: victor@2: victor@2: ff=FilesFunctions() victor@2: OMRs_files=partOMRs_files+fsOMRs_files victor@2: OMRs_symbols=[] victor@2: sc=SymbolConversion() victor@2: print "2---converting to symbols---" victor@2: for omr_file in OMRs_files: victor@2: omr=ff.getOMR(omr_file) victor@2: if omr!=[]: victor@2: omr_symbols=sc.filterOMR(omr,idPart)[1] victor@2: OMRs_symbols.append(omr_symbols) victor@2: else: victor@2: OMRs_symbols.append([]) victor@2: victor@2: print "3---removing worst OMR---" victor@2: # betterOmrIds=[] victor@2: victor@2: victor@2: OMRs_symbols,betterOmrIds=self.selectBetterOMRs(OMRs_symbols) victor@2: victor@2: print "4---calculating distances---" victor@2: distances=self.getDistances(OMRs_symbols) victor@2: victor@2: print "5---aligning symbols---" victor@2: victor@2: omr_symbolsAligned=self.setOMRSymbolsAligned(OMRs_symbols,distances) victor@2: victor@2: return omr_symbolsAligned,betterOmrIds victor@2: def __setVoidArray(self,length): victor@2: ''' victor@2: private function to create a void array victor@2: ''' victor@2: arrayOut=[] victor@2: for _ in range(length): victor@2: arrayOut.append([]) victor@2: victor@2: return arrayOut victor@2: victor@2: def setOMRSymbolsAligned(self,OMRs_symbols,distances): victor@2: ''' victor@2: returns the OMRs symbols aligned from OMR symbols and victor@2: the distances matrix victor@2: ''' victor@2: pairings=[] victor@2: pairingsReal=[] victor@2: gapArray=[] victor@2: removedArray=[] victor@2: omr_symbolsAlign=self.__setVoidArray(len(OMRs_symbols)) victor@2: faa=FastAlignmentArrays() victor@2: while len(distances[0])>1: victor@2: minim,iMin,jMin=self.__getMinimum(distances) victor@2: print distances,minim,iMin,jMin victor@2: pairings.append([iMin,jMin]) victor@2: iMinReal,jMinReal=self.__getPairingReal(iMin,jMin,removedArray) victor@2: victor@2: pairingsReal.append([iMinReal,jMinReal]) victor@2: if len(omr_symbolsAlign[iMinReal])==0: victor@2: omr_symbolsAlign[iMinReal]=OMRs_symbols[iMin] victor@2: if len(omr_symbolsAlign[jMinReal])==0: victor@2: omr_symbolsAlign[jMinReal]=OMRs_symbols[jMin] victor@2: victor@2: out=faa.needleman_wunsch(omr_symbolsAlign[iMinReal], omr_symbolsAlign[jMinReal])[0] victor@2: omr_symbolsAlign[iMinReal]=out[0] victor@2: omr_symbolsAlign[jMinReal]=out[1] victor@2: gap1=out[2] victor@2: gap2=out[3] victor@2: gapArray.append([gap1,gap2]) victor@2: victor@2: victor@2: OMRs_symbols.pop(jMin) victor@2: removedArray.append(jMinReal) victor@2: victor@2: distances=self.__recalculeDistances(distances,iMin,jMin) victor@2: victor@2: victor@2: omr_symbolsAlign=self.__fillingGaps(omr_symbolsAlign,gapArray,pairingsReal) victor@2: victor@2: victor@2: return omr_symbolsAlign victor@2: victor@2: def __fillingGaps(self,omr_symbolsAlign,gapArray,pairingsReal): victor@2: ''' victor@2: private function to complete gaps based on the gap matrix victor@2: and the pairs stablished victor@2: ''' victor@2: for p in range(len(pairingsReal)-1,0,-1): victor@2: for i in range(2): victor@2: for j in range(2): victor@2: for t in range(1,p+1): victor@2: if(pairingsReal[p][i]==pairingsReal[p-t][j]): victor@2: if j==0: victor@2: s=1 victor@2: if j==1: victor@2: s=0 victor@2: omrIndex=pairingsReal[p-t][s] victor@2: newGap=[] victor@2: for gap in gapArray[p][i]: victor@2: omr_symbolsAlign[omrIndex].insert(gap,"*") victor@2: newGap.append(gap) victor@2: gapArray[p-t][s]=gapArray[p-t][s]+newGap victor@2: victor@2: victor@2: return omr_symbolsAlign victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: victor@2: