victor@0
|
1 #include <stdio.h>
|
victor@0
|
2 #include <stdlib.h>
|
victor@0
|
3
|
victor@0
|
4 #include <Python.h>
|
victor@0
|
5
|
victor@0
|
6
|
victor@0
|
7 float needleman_wunsch_matrix(char* seq1[],char* seq2[],int n,int m)
|
victor@0
|
8 {
|
victor@0
|
9
|
victor@0
|
10 //int DIAG = 0;
|
victor@0
|
11 int UP = 1;
|
victor@0
|
12 int LEFT = 2;
|
victor@0
|
13 int score=0;
|
victor@0
|
14
|
victor@0
|
15 int indel=-1;
|
victor@0
|
16
|
victor@0
|
17
|
victor@0
|
18 //ZERO MATRIX
|
victor@0
|
19 int i,j;
|
victor@0
|
20
|
victor@0
|
21 int maxArray=n;
|
victor@0
|
22 if (m>n)
|
victor@0
|
23 {
|
victor@0
|
24 maxArray=m;
|
victor@0
|
25 }
|
victor@0
|
26 int memorySpace=2*maxArray+100;
|
victor@0
|
27 int **s;
|
victor@0
|
28 int **ptr;
|
victor@0
|
29 s =calloc(memorySpace , sizeof(int*));
|
victor@0
|
30 for (i = 0; i < memorySpace; i++)
|
victor@0
|
31 s[i] = calloc(memorySpace,sizeof(int));
|
victor@0
|
32
|
victor@0
|
33 ptr = (int**) calloc(memorySpace , sizeof(int));
|
victor@0
|
34 for (i = 0; i < memorySpace; i++)
|
victor@0
|
35 ptr[i] = calloc(memorySpace,sizeof(int));
|
victor@0
|
36
|
victor@0
|
37
|
victor@0
|
38
|
victor@0
|
39 //INITIALIZE SCORING MATRIX
|
victor@0
|
40
|
victor@0
|
41 for (i = 1; i <n+1; i++)
|
victor@0
|
42 {
|
victor@0
|
43 s[i][0]=indel*i;
|
victor@0
|
44 }
|
victor@0
|
45 for (j = 1; j <m+1; j++)
|
victor@0
|
46 {
|
victor@0
|
47 s[0][j]=indel*j;
|
victor@0
|
48 }
|
victor@0
|
49 //INITIALIZE TRACE BACK MATRIX
|
victor@0
|
50 for (i = 1; i <n+1; i++)
|
victor@0
|
51 {
|
victor@0
|
52 ptr[0][i]=LEFT;
|
victor@0
|
53 }
|
victor@0
|
54 for (j = 1; j <m+1; j++)
|
victor@0
|
55 {
|
victor@0
|
56 ptr[j][0]=UP;
|
victor@0
|
57 }
|
victor@0
|
58 //#####################################
|
victor@0
|
59
|
victor@0
|
60 int p;
|
victor@0
|
61 int q;
|
victor@0
|
62 for (i = 1; i <n+1; i++)
|
victor@0
|
63 {
|
victor@0
|
64 p=i-350;
|
victor@0
|
65 q=i+350;
|
victor@0
|
66 if(p<1){
|
victor@0
|
67 p=1;
|
victor@0
|
68 }
|
victor@0
|
69 if(q>m+1)
|
victor@0
|
70 {
|
victor@0
|
71 q=m+1;
|
victor@0
|
72 }
|
victor@0
|
73 for(j=p;j<q;j++)
|
victor@0
|
74 {
|
victor@0
|
75 char* myseq1=seq1[i-1];
|
victor@0
|
76 char* myseq2=seq2[j-1];
|
victor@0
|
77
|
victor@0
|
78 if(strcmp(myseq1,myseq2)==0)
|
victor@0
|
79 {
|
victor@0
|
80 score=1;
|
victor@0
|
81 }else{
|
victor@0
|
82 score=-1;
|
victor@0
|
83 }
|
victor@0
|
84
|
victor@0
|
85 s[i][j]=s[i-1][j-1]+score;
|
victor@0
|
86
|
victor@0
|
87 if(s[i-1][j]+indel>s[i][j])
|
victor@0
|
88 {
|
victor@0
|
89 s[i][j]=s[i-1][j]+indel;
|
victor@0
|
90 ptr[i][j]=UP;
|
victor@0
|
91 }
|
victor@0
|
92 if(s[i][j-1]+indel>s[i][j])
|
victor@0
|
93 {
|
victor@0
|
94 s[i][j]=s[i][j-1]+indel;
|
victor@0
|
95 ptr[i][j]=LEFT;
|
victor@0
|
96
|
victor@0
|
97 }
|
victor@0
|
98
|
victor@0
|
99 }
|
victor@0
|
100
|
victor@0
|
101 }
|
victor@0
|
102
|
victor@0
|
103 float finalScore;
|
victor@0
|
104 finalScore=(float)s[n][m]/(float)maxArray;
|
victor@0
|
105
|
victor@0
|
106 for (i = 0; i < memorySpace; i++){
|
victor@0
|
107 free(s[i]);
|
victor@0
|
108 }
|
victor@0
|
109 free(s);
|
victor@0
|
110 for (i = 0; i < memorySpace; i++){
|
victor@0
|
111 free(ptr[i]);
|
victor@0
|
112 }
|
victor@0
|
113 free(ptr);
|
victor@0
|
114 return finalScore;
|
victor@0
|
115
|
victor@0
|
116 }
|
victor@0
|
117
|
victor@0
|
118
|
victor@0
|
119 static PyObject *
|
victor@0
|
120 NWunsch_getSimilarity(PyObject* self, PyObject* args)
|
victor@0
|
121 {
|
victor@0
|
122
|
victor@0
|
123 PyObject *t1,*t2;
|
victor@0
|
124 int t_seqlen1=0;
|
victor@0
|
125 int t_seqlen2=0;
|
victor@0
|
126 int i;
|
victor@0
|
127 PyObject *x_obj,*y_obj;
|
victor@0
|
128
|
victor@0
|
129
|
victor@0
|
130
|
victor@0
|
131 if (!PyArg_ParseTuple(args, "OO", &x_obj, &y_obj))
|
victor@0
|
132 return NULL;
|
victor@0
|
133
|
victor@0
|
134 //Sequence 1
|
victor@0
|
135 t1 = PySequence_Fast(x_obj, "argument must be iterable");
|
victor@0
|
136 t_seqlen1 = PySequence_Fast_GET_SIZE(t1);
|
victor@0
|
137
|
victor@0
|
138 int width=1;
|
victor@0
|
139 char **align1 =(char **)malloc(t_seqlen1 * sizeof(char*));
|
victor@0
|
140 *align1 = malloc(t_seqlen1 * width * sizeof(char));
|
victor@0
|
141 for (i = 0; i < t_seqlen1; i++)
|
victor@0
|
142 {
|
victor@0
|
143 align1[i] = *align1 + i*width;
|
victor@0
|
144 align1[i]=PyString_AsString(PySequence_Fast_GET_ITEM(t1, i));;
|
victor@0
|
145 }
|
victor@0
|
146
|
victor@0
|
147
|
victor@0
|
148 //Sequence 2
|
victor@0
|
149 t2 = PySequence_Fast(y_obj, "argument must be iterable");
|
victor@0
|
150 t_seqlen2 = PySequence_Fast_GET_SIZE(t2);
|
victor@0
|
151
|
victor@0
|
152
|
victor@0
|
153 char **align2 =(char **)malloc(t_seqlen2 * sizeof(char*));
|
victor@0
|
154 *align2 = malloc(t_seqlen2 * width * sizeof(char));
|
victor@0
|
155 for (i = 0; i < t_seqlen2; i++)
|
victor@0
|
156 {
|
victor@0
|
157 align2[i] = *align2 + i*width;
|
victor@0
|
158 align2[i]=PyString_AsString(PySequence_Fast_GET_ITEM(t2, i));
|
victor@0
|
159 }
|
victor@0
|
160
|
victor@0
|
161
|
victor@0
|
162
|
victor@0
|
163 float score;
|
victor@0
|
164 score=needleman_wunsch_matrix(align1,align2,t_seqlen1,t_seqlen2);
|
victor@0
|
165
|
victor@0
|
166 free(*align1);
|
victor@0
|
167 free(align1);
|
victor@0
|
168
|
victor@0
|
169 free(*align2);
|
victor@0
|
170 free(align2);
|
victor@0
|
171
|
victor@0
|
172 Py_XDECREF(t1);
|
victor@0
|
173 Py_XDECREF(t2);
|
victor@0
|
174
|
victor@0
|
175
|
victor@0
|
176
|
victor@0
|
177
|
victor@0
|
178 return Py_BuildValue("f", score);
|
victor@0
|
179 }
|
victor@0
|
180
|
victor@0
|
181
|
victor@0
|
182 static PyMethodDef NWunschMethods[] =
|
victor@0
|
183 {
|
victor@0
|
184 {"NWunsch_getSimilarity", NWunsch_getSimilarity, METH_VARARGS, "NWunsch_getSimilarity"},
|
victor@0
|
185 {NULL, NULL, 0, NULL}
|
victor@0
|
186 };
|
victor@0
|
187
|
victor@0
|
188 PyMODINIT_FUNC
|
victor@0
|
189 initNWunsch(void)
|
victor@0
|
190 {
|
victor@0
|
191 (void) Py_InitModule("NWunsch", NWunschMethods);
|
victor@0
|
192 }
|
victor@0
|
193
|
victor@0
|
194
|