Chris@49
|
1 // Copyright (C) 2008-2013 NICTA (www.nicta.com.au)
|
Chris@49
|
2 // Copyright (C) 2008-2013 Conrad Sanderson
|
Chris@49
|
3 // Copyright (C) 2012 Ryan Curtin
|
Chris@49
|
4 //
|
Chris@49
|
5 // This Source Code Form is subject to the terms of the Mozilla Public
|
Chris@49
|
6 // License, v. 2.0. If a copy of the MPL was not distributed with this
|
Chris@49
|
7 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
Chris@49
|
8
|
Chris@49
|
9
|
Chris@49
|
10 //! \addtogroup op_strans
|
Chris@49
|
11 //! @{
|
Chris@49
|
12
|
Chris@49
|
13
|
Chris@49
|
14
|
Chris@49
|
15 //! for tiny square matrices (size <= 4x4)
|
Chris@49
|
16 template<typename eT, typename TA>
|
Chris@49
|
17 arma_hot
|
Chris@49
|
18 inline
|
Chris@49
|
19 void
|
Chris@49
|
20 op_strans::apply_noalias_tinysq(Mat<eT>& out, const TA& A)
|
Chris@49
|
21 {
|
Chris@49
|
22 const eT* Am = A.memptr();
|
Chris@49
|
23 eT* outm = out.memptr();
|
Chris@49
|
24
|
Chris@49
|
25 switch(A.n_rows)
|
Chris@49
|
26 {
|
Chris@49
|
27 case 1:
|
Chris@49
|
28 {
|
Chris@49
|
29 outm[0] = Am[0];
|
Chris@49
|
30 }
|
Chris@49
|
31 break;
|
Chris@49
|
32
|
Chris@49
|
33 case 2:
|
Chris@49
|
34 {
|
Chris@49
|
35 outm[pos<false,0,0>::n2] = Am[pos<true,0,0>::n2];
|
Chris@49
|
36 outm[pos<false,1,0>::n2] = Am[pos<true,1,0>::n2];
|
Chris@49
|
37
|
Chris@49
|
38 outm[pos<false,0,1>::n2] = Am[pos<true,0,1>::n2];
|
Chris@49
|
39 outm[pos<false,1,1>::n2] = Am[pos<true,1,1>::n2];
|
Chris@49
|
40 }
|
Chris@49
|
41 break;
|
Chris@49
|
42
|
Chris@49
|
43 case 3:
|
Chris@49
|
44 {
|
Chris@49
|
45 outm[pos<false,0,0>::n3] = Am[pos<true,0,0>::n3];
|
Chris@49
|
46 outm[pos<false,1,0>::n3] = Am[pos<true,1,0>::n3];
|
Chris@49
|
47 outm[pos<false,2,0>::n3] = Am[pos<true,2,0>::n3];
|
Chris@49
|
48
|
Chris@49
|
49 outm[pos<false,0,1>::n3] = Am[pos<true,0,1>::n3];
|
Chris@49
|
50 outm[pos<false,1,1>::n3] = Am[pos<true,1,1>::n3];
|
Chris@49
|
51 outm[pos<false,2,1>::n3] = Am[pos<true,2,1>::n3];
|
Chris@49
|
52
|
Chris@49
|
53 outm[pos<false,0,2>::n3] = Am[pos<true,0,2>::n3];
|
Chris@49
|
54 outm[pos<false,1,2>::n3] = Am[pos<true,1,2>::n3];
|
Chris@49
|
55 outm[pos<false,2,2>::n3] = Am[pos<true,2,2>::n3];
|
Chris@49
|
56 }
|
Chris@49
|
57 break;
|
Chris@49
|
58
|
Chris@49
|
59 case 4:
|
Chris@49
|
60 {
|
Chris@49
|
61 outm[pos<false,0,0>::n4] = Am[pos<true,0,0>::n4];
|
Chris@49
|
62 outm[pos<false,1,0>::n4] = Am[pos<true,1,0>::n4];
|
Chris@49
|
63 outm[pos<false,2,0>::n4] = Am[pos<true,2,0>::n4];
|
Chris@49
|
64 outm[pos<false,3,0>::n4] = Am[pos<true,3,0>::n4];
|
Chris@49
|
65
|
Chris@49
|
66 outm[pos<false,0,1>::n4] = Am[pos<true,0,1>::n4];
|
Chris@49
|
67 outm[pos<false,1,1>::n4] = Am[pos<true,1,1>::n4];
|
Chris@49
|
68 outm[pos<false,2,1>::n4] = Am[pos<true,2,1>::n4];
|
Chris@49
|
69 outm[pos<false,3,1>::n4] = Am[pos<true,3,1>::n4];
|
Chris@49
|
70
|
Chris@49
|
71 outm[pos<false,0,2>::n4] = Am[pos<true,0,2>::n4];
|
Chris@49
|
72 outm[pos<false,1,2>::n4] = Am[pos<true,1,2>::n4];
|
Chris@49
|
73 outm[pos<false,2,2>::n4] = Am[pos<true,2,2>::n4];
|
Chris@49
|
74 outm[pos<false,3,2>::n4] = Am[pos<true,3,2>::n4];
|
Chris@49
|
75
|
Chris@49
|
76 outm[pos<false,0,3>::n4] = Am[pos<true,0,3>::n4];
|
Chris@49
|
77 outm[pos<false,1,3>::n4] = Am[pos<true,1,3>::n4];
|
Chris@49
|
78 outm[pos<false,2,3>::n4] = Am[pos<true,2,3>::n4];
|
Chris@49
|
79 outm[pos<false,3,3>::n4] = Am[pos<true,3,3>::n4];
|
Chris@49
|
80 }
|
Chris@49
|
81 break;
|
Chris@49
|
82
|
Chris@49
|
83 default:
|
Chris@49
|
84 ;
|
Chris@49
|
85 }
|
Chris@49
|
86
|
Chris@49
|
87 }
|
Chris@49
|
88
|
Chris@49
|
89
|
Chris@49
|
90
|
Chris@49
|
91 //! Immediate transpose of a dense matrix
|
Chris@49
|
92 template<typename eT, typename TA>
|
Chris@49
|
93 arma_hot
|
Chris@49
|
94 inline
|
Chris@49
|
95 void
|
Chris@49
|
96 op_strans::apply_noalias(Mat<eT>& out, const TA& A)
|
Chris@49
|
97 {
|
Chris@49
|
98 arma_extra_debug_sigprint();
|
Chris@49
|
99
|
Chris@49
|
100 const uword A_n_cols = A.n_cols;
|
Chris@49
|
101 const uword A_n_rows = A.n_rows;
|
Chris@49
|
102
|
Chris@49
|
103 out.set_size(A_n_cols, A_n_rows);
|
Chris@49
|
104
|
Chris@49
|
105 if( (TA::is_row) || (TA::is_col) || (A_n_cols == 1) || (A_n_rows == 1) )
|
Chris@49
|
106 {
|
Chris@49
|
107 arrayops::copy( out.memptr(), A.memptr(), A.n_elem );
|
Chris@49
|
108 }
|
Chris@49
|
109 else
|
Chris@49
|
110 {
|
Chris@49
|
111 if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
|
Chris@49
|
112 {
|
Chris@49
|
113 op_strans::apply_noalias_tinysq(out, A);
|
Chris@49
|
114 }
|
Chris@49
|
115 else
|
Chris@49
|
116 {
|
Chris@49
|
117 for(uword k=0; k < A_n_cols; ++k)
|
Chris@49
|
118 {
|
Chris@49
|
119 uword i, j;
|
Chris@49
|
120
|
Chris@49
|
121 const eT* colptr = A.colptr(k);
|
Chris@49
|
122
|
Chris@49
|
123 for(i=0, j=1; j < A_n_rows; i+=2, j+=2)
|
Chris@49
|
124 {
|
Chris@49
|
125 const eT tmp_i = colptr[i];
|
Chris@49
|
126 const eT tmp_j = colptr[j];
|
Chris@49
|
127
|
Chris@49
|
128 out.at(k, i) = tmp_i;
|
Chris@49
|
129 out.at(k, j) = tmp_j;
|
Chris@49
|
130 }
|
Chris@49
|
131
|
Chris@49
|
132 if(i < A_n_rows)
|
Chris@49
|
133 {
|
Chris@49
|
134 out.at(k, i) = colptr[i];
|
Chris@49
|
135 }
|
Chris@49
|
136 }
|
Chris@49
|
137 }
|
Chris@49
|
138 }
|
Chris@49
|
139 }
|
Chris@49
|
140
|
Chris@49
|
141
|
Chris@49
|
142
|
Chris@49
|
143 template<typename eT, typename TA>
|
Chris@49
|
144 arma_hot
|
Chris@49
|
145 inline
|
Chris@49
|
146 void
|
Chris@49
|
147 op_strans::apply(Mat<eT>& out, const TA& A)
|
Chris@49
|
148 {
|
Chris@49
|
149 arma_extra_debug_sigprint();
|
Chris@49
|
150
|
Chris@49
|
151 if(&out != &A)
|
Chris@49
|
152 {
|
Chris@49
|
153 op_strans::apply_noalias(out, A);
|
Chris@49
|
154 }
|
Chris@49
|
155 else
|
Chris@49
|
156 {
|
Chris@49
|
157 const uword n_rows = A.n_rows;
|
Chris@49
|
158 const uword n_cols = A.n_cols;
|
Chris@49
|
159
|
Chris@49
|
160 if(n_rows == n_cols)
|
Chris@49
|
161 {
|
Chris@49
|
162 arma_extra_debug_print("op_strans::apply(): doing in-place transpose of a square matrix");
|
Chris@49
|
163
|
Chris@49
|
164 const uword N = n_rows;
|
Chris@49
|
165
|
Chris@49
|
166 for(uword k=0; k < N; ++k)
|
Chris@49
|
167 {
|
Chris@49
|
168 eT* colptr = out.colptr(k);
|
Chris@49
|
169
|
Chris@49
|
170 uword i,j;
|
Chris@49
|
171
|
Chris@49
|
172 for(i=(k+1), j=(k+2); j < N; i+=2, j+=2)
|
Chris@49
|
173 {
|
Chris@49
|
174 std::swap(out.at(k,i), colptr[i]);
|
Chris@49
|
175 std::swap(out.at(k,j), colptr[j]);
|
Chris@49
|
176 }
|
Chris@49
|
177
|
Chris@49
|
178 if(i < N)
|
Chris@49
|
179 {
|
Chris@49
|
180 std::swap(out.at(k,i), colptr[i]);
|
Chris@49
|
181 }
|
Chris@49
|
182 }
|
Chris@49
|
183 }
|
Chris@49
|
184 else
|
Chris@49
|
185 {
|
Chris@49
|
186 Mat<eT> tmp;
|
Chris@49
|
187 op_strans::apply_noalias(tmp, A);
|
Chris@49
|
188
|
Chris@49
|
189 out.steal_mem(tmp);
|
Chris@49
|
190 }
|
Chris@49
|
191 }
|
Chris@49
|
192 }
|
Chris@49
|
193
|
Chris@49
|
194
|
Chris@49
|
195
|
Chris@49
|
196 template<typename T1>
|
Chris@49
|
197 arma_hot
|
Chris@49
|
198 inline
|
Chris@49
|
199 void
|
Chris@49
|
200 op_strans::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X)
|
Chris@49
|
201 {
|
Chris@49
|
202 arma_extra_debug_sigprint();
|
Chris@49
|
203
|
Chris@49
|
204 typedef typename T1::elem_type eT;
|
Chris@49
|
205
|
Chris@49
|
206 const Proxy<T1> P(X);
|
Chris@49
|
207
|
Chris@49
|
208 // allow detection of in-place transpose
|
Chris@49
|
209 if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1>::fake_mat == false) )
|
Chris@49
|
210 {
|
Chris@49
|
211 const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
|
Chris@49
|
212
|
Chris@49
|
213 op_strans::apply(out, tmp.M);
|
Chris@49
|
214 }
|
Chris@49
|
215 else
|
Chris@49
|
216 {
|
Chris@49
|
217 const uword n_rows = P.get_n_rows();
|
Chris@49
|
218 const uword n_cols = P.get_n_cols();
|
Chris@49
|
219
|
Chris@49
|
220 const bool is_alias = P.is_alias(out);
|
Chris@49
|
221
|
Chris@49
|
222 if( (resolves_to_vector<T1>::value == true) && (Proxy<T1>::prefer_at_accessor == false) )
|
Chris@49
|
223 {
|
Chris@49
|
224 if(is_alias == false)
|
Chris@49
|
225 {
|
Chris@49
|
226 out.set_size(n_cols, n_rows);
|
Chris@49
|
227
|
Chris@49
|
228 eT* out_mem = out.memptr();
|
Chris@49
|
229
|
Chris@49
|
230 const uword n_elem = P.get_n_elem();
|
Chris@49
|
231
|
Chris@49
|
232 typename Proxy<T1>::ea_type Pea = P.get_ea();
|
Chris@49
|
233
|
Chris@49
|
234 uword i,j;
|
Chris@49
|
235 for(i=0, j=1; j < n_elem; i+=2, j+=2)
|
Chris@49
|
236 {
|
Chris@49
|
237 const eT tmp_i = Pea[i];
|
Chris@49
|
238 const eT tmp_j = Pea[j];
|
Chris@49
|
239
|
Chris@49
|
240 out_mem[i] = tmp_i;
|
Chris@49
|
241 out_mem[j] = tmp_j;
|
Chris@49
|
242 }
|
Chris@49
|
243
|
Chris@49
|
244 if(i < n_elem)
|
Chris@49
|
245 {
|
Chris@49
|
246 out_mem[i] = Pea[i];
|
Chris@49
|
247 }
|
Chris@49
|
248 }
|
Chris@49
|
249 else // aliasing
|
Chris@49
|
250 {
|
Chris@49
|
251 Mat<eT> out2(n_cols, n_rows);
|
Chris@49
|
252
|
Chris@49
|
253 eT* out_mem = out2.memptr();
|
Chris@49
|
254
|
Chris@49
|
255 const uword n_elem = P.get_n_elem();
|
Chris@49
|
256
|
Chris@49
|
257 typename Proxy<T1>::ea_type Pea = P.get_ea();
|
Chris@49
|
258
|
Chris@49
|
259 uword i,j;
|
Chris@49
|
260 for(i=0, j=1; j < n_elem; i+=2, j+=2)
|
Chris@49
|
261 {
|
Chris@49
|
262 const eT tmp_i = Pea[i];
|
Chris@49
|
263 const eT tmp_j = Pea[j];
|
Chris@49
|
264
|
Chris@49
|
265 out_mem[i] = tmp_i;
|
Chris@49
|
266 out_mem[j] = tmp_j;
|
Chris@49
|
267 }
|
Chris@49
|
268
|
Chris@49
|
269 if(i < n_elem)
|
Chris@49
|
270 {
|
Chris@49
|
271 out_mem[i] = Pea[i];
|
Chris@49
|
272 }
|
Chris@49
|
273
|
Chris@49
|
274 out.steal_mem(out2);
|
Chris@49
|
275 }
|
Chris@49
|
276 }
|
Chris@49
|
277 else // general matrix transpose
|
Chris@49
|
278 {
|
Chris@49
|
279 if(is_alias == false)
|
Chris@49
|
280 {
|
Chris@49
|
281 out.set_size(n_cols, n_rows);
|
Chris@49
|
282
|
Chris@49
|
283 for(uword k=0; k < n_cols; ++k)
|
Chris@49
|
284 {
|
Chris@49
|
285 uword i, j;
|
Chris@49
|
286
|
Chris@49
|
287 for(i=0, j=1; j < n_rows; i+=2, j+=2)
|
Chris@49
|
288 {
|
Chris@49
|
289 const eT tmp_i = P.at(i,k);
|
Chris@49
|
290 const eT tmp_j = P.at(j,k);
|
Chris@49
|
291
|
Chris@49
|
292 out.at(k,i) = tmp_i;
|
Chris@49
|
293 out.at(k,j) = tmp_j;
|
Chris@49
|
294 }
|
Chris@49
|
295
|
Chris@49
|
296 if(i < n_rows)
|
Chris@49
|
297 {
|
Chris@49
|
298 out.at(k,i) = P.at(i,k);
|
Chris@49
|
299 }
|
Chris@49
|
300 }
|
Chris@49
|
301 }
|
Chris@49
|
302 else // aliasing
|
Chris@49
|
303 {
|
Chris@49
|
304 Mat<eT> out2(n_cols, n_rows);
|
Chris@49
|
305
|
Chris@49
|
306 for(uword k=0; k < n_cols; ++k)
|
Chris@49
|
307 {
|
Chris@49
|
308 uword i, j;
|
Chris@49
|
309
|
Chris@49
|
310 for(i=0, j=1; j < n_rows; i+=2, j+=2)
|
Chris@49
|
311 {
|
Chris@49
|
312 const eT tmp_i = P.at(i,k);
|
Chris@49
|
313 const eT tmp_j = P.at(j,k);
|
Chris@49
|
314
|
Chris@49
|
315 out2.at(k,i) = tmp_i;
|
Chris@49
|
316 out2.at(k,j) = tmp_j;
|
Chris@49
|
317 }
|
Chris@49
|
318
|
Chris@49
|
319 if(i < n_rows)
|
Chris@49
|
320 {
|
Chris@49
|
321 out2.at(k,i) = P.at(i,k);
|
Chris@49
|
322 }
|
Chris@49
|
323 }
|
Chris@49
|
324
|
Chris@49
|
325 out.steal_mem(out2);
|
Chris@49
|
326 }
|
Chris@49
|
327 }
|
Chris@49
|
328 }
|
Chris@49
|
329 }
|
Chris@49
|
330
|
Chris@49
|
331
|
Chris@49
|
332
|
Chris@49
|
333 template<typename T1>
|
Chris@49
|
334 arma_hot
|
Chris@49
|
335 inline
|
Chris@49
|
336 void
|
Chris@49
|
337 op_strans::apply(Mat<typename T1::elem_type>& out, const Op<T1,op_strans>& in)
|
Chris@49
|
338 {
|
Chris@49
|
339 arma_extra_debug_sigprint();
|
Chris@49
|
340
|
Chris@49
|
341 op_strans::apply_proxy(out, in.m);
|
Chris@49
|
342 }
|
Chris@49
|
343
|
Chris@49
|
344
|
Chris@49
|
345
|
Chris@49
|
346 //
|
Chris@49
|
347 // op_strans2
|
Chris@49
|
348
|
Chris@49
|
349
|
Chris@49
|
350
|
Chris@49
|
351 //! for tiny square matrices (size <= 4x4)
|
Chris@49
|
352 template<typename eT, typename TA>
|
Chris@49
|
353 arma_hot
|
Chris@49
|
354 inline
|
Chris@49
|
355 void
|
Chris@49
|
356 op_strans2::apply_noalias_tinysq(Mat<eT>& out, const TA& A, const eT val)
|
Chris@49
|
357 {
|
Chris@49
|
358 const eT* Am = A.memptr();
|
Chris@49
|
359 eT* outm = out.memptr();
|
Chris@49
|
360
|
Chris@49
|
361 switch(A.n_rows)
|
Chris@49
|
362 {
|
Chris@49
|
363 case 1:
|
Chris@49
|
364 {
|
Chris@49
|
365 outm[0] = val * Am[0];
|
Chris@49
|
366 }
|
Chris@49
|
367 break;
|
Chris@49
|
368
|
Chris@49
|
369 case 2:
|
Chris@49
|
370 {
|
Chris@49
|
371 outm[pos<false,0,0>::n2] = val * Am[pos<true,0,0>::n2];
|
Chris@49
|
372 outm[pos<false,1,0>::n2] = val * Am[pos<true,1,0>::n2];
|
Chris@49
|
373
|
Chris@49
|
374 outm[pos<false,0,1>::n2] = val * Am[pos<true,0,1>::n2];
|
Chris@49
|
375 outm[pos<false,1,1>::n2] = val * Am[pos<true,1,1>::n2];
|
Chris@49
|
376 }
|
Chris@49
|
377 break;
|
Chris@49
|
378
|
Chris@49
|
379 case 3:
|
Chris@49
|
380 {
|
Chris@49
|
381 outm[pos<false,0,0>::n3] = val * Am[pos<true,0,0>::n3];
|
Chris@49
|
382 outm[pos<false,1,0>::n3] = val * Am[pos<true,1,0>::n3];
|
Chris@49
|
383 outm[pos<false,2,0>::n3] = val * Am[pos<true,2,0>::n3];
|
Chris@49
|
384
|
Chris@49
|
385 outm[pos<false,0,1>::n3] = val * Am[pos<true,0,1>::n3];
|
Chris@49
|
386 outm[pos<false,1,1>::n3] = val * Am[pos<true,1,1>::n3];
|
Chris@49
|
387 outm[pos<false,2,1>::n3] = val * Am[pos<true,2,1>::n3];
|
Chris@49
|
388
|
Chris@49
|
389 outm[pos<false,0,2>::n3] = val * Am[pos<true,0,2>::n3];
|
Chris@49
|
390 outm[pos<false,1,2>::n3] = val * Am[pos<true,1,2>::n3];
|
Chris@49
|
391 outm[pos<false,2,2>::n3] = val * Am[pos<true,2,2>::n3];
|
Chris@49
|
392 }
|
Chris@49
|
393 break;
|
Chris@49
|
394
|
Chris@49
|
395 case 4:
|
Chris@49
|
396 {
|
Chris@49
|
397 outm[pos<false,0,0>::n4] = val * Am[pos<true,0,0>::n4];
|
Chris@49
|
398 outm[pos<false,1,0>::n4] = val * Am[pos<true,1,0>::n4];
|
Chris@49
|
399 outm[pos<false,2,0>::n4] = val * Am[pos<true,2,0>::n4];
|
Chris@49
|
400 outm[pos<false,3,0>::n4] = val * Am[pos<true,3,0>::n4];
|
Chris@49
|
401
|
Chris@49
|
402 outm[pos<false,0,1>::n4] = val * Am[pos<true,0,1>::n4];
|
Chris@49
|
403 outm[pos<false,1,1>::n4] = val * Am[pos<true,1,1>::n4];
|
Chris@49
|
404 outm[pos<false,2,1>::n4] = val * Am[pos<true,2,1>::n4];
|
Chris@49
|
405 outm[pos<false,3,1>::n4] = val * Am[pos<true,3,1>::n4];
|
Chris@49
|
406
|
Chris@49
|
407 outm[pos<false,0,2>::n4] = val * Am[pos<true,0,2>::n4];
|
Chris@49
|
408 outm[pos<false,1,2>::n4] = val * Am[pos<true,1,2>::n4];
|
Chris@49
|
409 outm[pos<false,2,2>::n4] = val * Am[pos<true,2,2>::n4];
|
Chris@49
|
410 outm[pos<false,3,2>::n4] = val * Am[pos<true,3,2>::n4];
|
Chris@49
|
411
|
Chris@49
|
412 outm[pos<false,0,3>::n4] = val * Am[pos<true,0,3>::n4];
|
Chris@49
|
413 outm[pos<false,1,3>::n4] = val * Am[pos<true,1,3>::n4];
|
Chris@49
|
414 outm[pos<false,2,3>::n4] = val * Am[pos<true,2,3>::n4];
|
Chris@49
|
415 outm[pos<false,3,3>::n4] = val * Am[pos<true,3,3>::n4];
|
Chris@49
|
416 }
|
Chris@49
|
417 break;
|
Chris@49
|
418
|
Chris@49
|
419 default:
|
Chris@49
|
420 ;
|
Chris@49
|
421 }
|
Chris@49
|
422
|
Chris@49
|
423 }
|
Chris@49
|
424
|
Chris@49
|
425
|
Chris@49
|
426
|
Chris@49
|
427 template<typename eT, typename TA>
|
Chris@49
|
428 arma_hot
|
Chris@49
|
429 inline
|
Chris@49
|
430 void
|
Chris@49
|
431 op_strans2::apply_noalias(Mat<eT>& out, const TA& A, const eT val)
|
Chris@49
|
432 {
|
Chris@49
|
433 arma_extra_debug_sigprint();
|
Chris@49
|
434
|
Chris@49
|
435 const uword A_n_cols = A.n_cols;
|
Chris@49
|
436 const uword A_n_rows = A.n_rows;
|
Chris@49
|
437
|
Chris@49
|
438 out.set_size(A_n_cols, A_n_rows);
|
Chris@49
|
439
|
Chris@49
|
440 if( (TA::is_col) || (TA::is_row) || (A_n_cols == 1) || (A_n_rows == 1) )
|
Chris@49
|
441 {
|
Chris@49
|
442 const uword N = A.n_elem;
|
Chris@49
|
443
|
Chris@49
|
444 const eT* A_mem = A.memptr();
|
Chris@49
|
445 eT* out_mem = out.memptr();
|
Chris@49
|
446
|
Chris@49
|
447 uword i,j;
|
Chris@49
|
448 for(i=0, j=1; j < N; i+=2, j+=2)
|
Chris@49
|
449 {
|
Chris@49
|
450 const eT tmp_i = A_mem[i];
|
Chris@49
|
451 const eT tmp_j = A_mem[j];
|
Chris@49
|
452
|
Chris@49
|
453 out_mem[i] = val * tmp_i;
|
Chris@49
|
454 out_mem[j] = val * tmp_j;
|
Chris@49
|
455 }
|
Chris@49
|
456
|
Chris@49
|
457 if(i < N)
|
Chris@49
|
458 {
|
Chris@49
|
459 out_mem[i] = val * A_mem[i];
|
Chris@49
|
460 }
|
Chris@49
|
461 }
|
Chris@49
|
462 else
|
Chris@49
|
463 {
|
Chris@49
|
464 if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
|
Chris@49
|
465 {
|
Chris@49
|
466 op_strans2::apply_noalias_tinysq(out, A, val);
|
Chris@49
|
467 }
|
Chris@49
|
468 else
|
Chris@49
|
469 {
|
Chris@49
|
470 for(uword k=0; k < A_n_cols; ++k)
|
Chris@49
|
471 {
|
Chris@49
|
472 uword i, j;
|
Chris@49
|
473
|
Chris@49
|
474 const eT* colptr = A.colptr(k);
|
Chris@49
|
475
|
Chris@49
|
476 for(i=0, j=1; j < A_n_rows; i+=2, j+=2)
|
Chris@49
|
477 {
|
Chris@49
|
478 const eT tmp_i = colptr[i];
|
Chris@49
|
479 const eT tmp_j = colptr[j];
|
Chris@49
|
480
|
Chris@49
|
481 out.at(k, i) = val * tmp_i;
|
Chris@49
|
482 out.at(k, j) = val * tmp_j;
|
Chris@49
|
483 }
|
Chris@49
|
484
|
Chris@49
|
485 if(i < A_n_rows)
|
Chris@49
|
486 {
|
Chris@49
|
487 out.at(k, i) = val * colptr[i];
|
Chris@49
|
488 }
|
Chris@49
|
489 }
|
Chris@49
|
490 }
|
Chris@49
|
491 }
|
Chris@49
|
492 }
|
Chris@49
|
493
|
Chris@49
|
494
|
Chris@49
|
495
|
Chris@49
|
496 template<typename eT, typename TA>
|
Chris@49
|
497 arma_hot
|
Chris@49
|
498 inline
|
Chris@49
|
499 void
|
Chris@49
|
500 op_strans2::apply(Mat<eT>& out, const TA& A, const eT val)
|
Chris@49
|
501 {
|
Chris@49
|
502 arma_extra_debug_sigprint();
|
Chris@49
|
503
|
Chris@49
|
504 if(&out != &A)
|
Chris@49
|
505 {
|
Chris@49
|
506 op_strans2::apply_noalias(out, A, val);
|
Chris@49
|
507 }
|
Chris@49
|
508 else
|
Chris@49
|
509 {
|
Chris@49
|
510 const uword n_rows = out.n_rows;
|
Chris@49
|
511 const uword n_cols = out.n_cols;
|
Chris@49
|
512
|
Chris@49
|
513 if(n_rows == n_cols)
|
Chris@49
|
514 {
|
Chris@49
|
515 arma_extra_debug_print("op_strans2::apply(): doing in-place transpose of a square matrix");
|
Chris@49
|
516
|
Chris@49
|
517 const uword N = n_rows;
|
Chris@49
|
518
|
Chris@49
|
519 // TODO: do multiplication while swapping
|
Chris@49
|
520
|
Chris@49
|
521 for(uword k=0; k < N; ++k)
|
Chris@49
|
522 {
|
Chris@49
|
523 eT* colptr = out.colptr(k);
|
Chris@49
|
524
|
Chris@49
|
525 uword i,j;
|
Chris@49
|
526
|
Chris@49
|
527 for(i=(k+1), j=(k+2); j < N; i+=2, j+=2)
|
Chris@49
|
528 {
|
Chris@49
|
529 std::swap(out.at(k,i), colptr[i]);
|
Chris@49
|
530 std::swap(out.at(k,j), colptr[j]);
|
Chris@49
|
531 }
|
Chris@49
|
532
|
Chris@49
|
533 if(i < N)
|
Chris@49
|
534 {
|
Chris@49
|
535 std::swap(out.at(k,i), colptr[i]);
|
Chris@49
|
536 }
|
Chris@49
|
537 }
|
Chris@49
|
538
|
Chris@49
|
539 arrayops::inplace_mul( out.memptr(), val, out.n_elem );
|
Chris@49
|
540 }
|
Chris@49
|
541 else
|
Chris@49
|
542 {
|
Chris@49
|
543 Mat<eT> tmp;
|
Chris@49
|
544 op_strans2::apply_noalias(tmp, A, val);
|
Chris@49
|
545
|
Chris@49
|
546 out.steal_mem(tmp);
|
Chris@49
|
547 }
|
Chris@49
|
548 }
|
Chris@49
|
549 }
|
Chris@49
|
550
|
Chris@49
|
551
|
Chris@49
|
552
|
Chris@49
|
553 template<typename T1>
|
Chris@49
|
554 arma_hot
|
Chris@49
|
555 inline
|
Chris@49
|
556 void
|
Chris@49
|
557 op_strans2::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X, const typename T1::elem_type val)
|
Chris@49
|
558 {
|
Chris@49
|
559 arma_extra_debug_sigprint();
|
Chris@49
|
560
|
Chris@49
|
561 typedef typename T1::elem_type eT;
|
Chris@49
|
562
|
Chris@49
|
563 const Proxy<T1> P(X);
|
Chris@49
|
564
|
Chris@49
|
565 // allow detection of in-place transpose
|
Chris@49
|
566 if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1>::fake_mat == false) )
|
Chris@49
|
567 {
|
Chris@49
|
568 const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
|
Chris@49
|
569
|
Chris@49
|
570 op_strans2::apply(out, tmp.M, val);
|
Chris@49
|
571 }
|
Chris@49
|
572 else
|
Chris@49
|
573 {
|
Chris@49
|
574 const uword n_rows = P.get_n_rows();
|
Chris@49
|
575 const uword n_cols = P.get_n_cols();
|
Chris@49
|
576
|
Chris@49
|
577 const bool is_alias = P.is_alias(out);
|
Chris@49
|
578
|
Chris@49
|
579 if( (resolves_to_vector<T1>::value == true) && (Proxy<T1>::prefer_at_accessor == false) )
|
Chris@49
|
580 {
|
Chris@49
|
581 if(is_alias == false)
|
Chris@49
|
582 {
|
Chris@49
|
583 out.set_size(n_cols, n_rows);
|
Chris@49
|
584
|
Chris@49
|
585 eT* out_mem = out.memptr();
|
Chris@49
|
586
|
Chris@49
|
587 const uword n_elem = P.get_n_elem();
|
Chris@49
|
588
|
Chris@49
|
589 typename Proxy<T1>::ea_type Pea = P.get_ea();
|
Chris@49
|
590
|
Chris@49
|
591 uword i,j;
|
Chris@49
|
592 for(i=0, j=1; j < n_elem; i+=2, j+=2)
|
Chris@49
|
593 {
|
Chris@49
|
594 const eT tmp_i = Pea[i];
|
Chris@49
|
595 const eT tmp_j = Pea[j];
|
Chris@49
|
596
|
Chris@49
|
597 out_mem[i] = val * tmp_i;
|
Chris@49
|
598 out_mem[j] = val * tmp_j;
|
Chris@49
|
599 }
|
Chris@49
|
600
|
Chris@49
|
601 if(i < n_elem)
|
Chris@49
|
602 {
|
Chris@49
|
603 out_mem[i] = val * Pea[i];
|
Chris@49
|
604 }
|
Chris@49
|
605 }
|
Chris@49
|
606 else // aliasing
|
Chris@49
|
607 {
|
Chris@49
|
608 Mat<eT> out2(n_cols, n_rows);
|
Chris@49
|
609
|
Chris@49
|
610 eT* out_mem = out2.memptr();
|
Chris@49
|
611
|
Chris@49
|
612 const uword n_elem = P.get_n_elem();
|
Chris@49
|
613
|
Chris@49
|
614 typename Proxy<T1>::ea_type Pea = P.get_ea();
|
Chris@49
|
615
|
Chris@49
|
616 uword i,j;
|
Chris@49
|
617 for(i=0, j=1; j < n_elem; i+=2, j+=2)
|
Chris@49
|
618 {
|
Chris@49
|
619 const eT tmp_i = Pea[i];
|
Chris@49
|
620 const eT tmp_j = Pea[j];
|
Chris@49
|
621
|
Chris@49
|
622 out_mem[i] = val * tmp_i;
|
Chris@49
|
623 out_mem[j] = val * tmp_j;
|
Chris@49
|
624 }
|
Chris@49
|
625
|
Chris@49
|
626 if(i < n_elem)
|
Chris@49
|
627 {
|
Chris@49
|
628 out_mem[i] = val * Pea[i];
|
Chris@49
|
629 }
|
Chris@49
|
630
|
Chris@49
|
631 out.steal_mem(out2);
|
Chris@49
|
632 }
|
Chris@49
|
633 }
|
Chris@49
|
634 else // general matrix transpose
|
Chris@49
|
635 {
|
Chris@49
|
636 if(is_alias == false)
|
Chris@49
|
637 {
|
Chris@49
|
638 out.set_size(n_cols, n_rows);
|
Chris@49
|
639
|
Chris@49
|
640 for(uword k=0; k < n_cols; ++k)
|
Chris@49
|
641 {
|
Chris@49
|
642 uword i, j;
|
Chris@49
|
643
|
Chris@49
|
644 for(i=0, j=1; j < n_rows; i+=2, j+=2)
|
Chris@49
|
645 {
|
Chris@49
|
646 const eT tmp_i = P.at(i,k);
|
Chris@49
|
647 const eT tmp_j = P.at(j,k);
|
Chris@49
|
648
|
Chris@49
|
649 out.at(k,i) = val * tmp_i;
|
Chris@49
|
650 out.at(k,j) = val * tmp_j;
|
Chris@49
|
651 }
|
Chris@49
|
652
|
Chris@49
|
653 if(i < n_rows)
|
Chris@49
|
654 {
|
Chris@49
|
655 out.at(k,i) = val * P.at(i,k);
|
Chris@49
|
656 }
|
Chris@49
|
657 }
|
Chris@49
|
658 }
|
Chris@49
|
659 else // aliasing
|
Chris@49
|
660 {
|
Chris@49
|
661 Mat<eT> out2(n_cols, n_rows);
|
Chris@49
|
662
|
Chris@49
|
663 for(uword k=0; k < n_cols; ++k)
|
Chris@49
|
664 {
|
Chris@49
|
665 uword i, j;
|
Chris@49
|
666
|
Chris@49
|
667 for(i=0, j=1; j < n_rows; i+=2, j+=2)
|
Chris@49
|
668 {
|
Chris@49
|
669 const eT tmp_i = P.at(i,k);
|
Chris@49
|
670 const eT tmp_j = P.at(j,k);
|
Chris@49
|
671
|
Chris@49
|
672 out2.at(k,i) = val * tmp_i;
|
Chris@49
|
673 out2.at(k,j) = val * tmp_j;
|
Chris@49
|
674 }
|
Chris@49
|
675
|
Chris@49
|
676 if(i < n_rows)
|
Chris@49
|
677 {
|
Chris@49
|
678 out2.at(k,i) = val * P.at(i,k);
|
Chris@49
|
679 }
|
Chris@49
|
680 }
|
Chris@49
|
681
|
Chris@49
|
682 out.steal_mem(out2);
|
Chris@49
|
683 }
|
Chris@49
|
684 }
|
Chris@49
|
685 }
|
Chris@49
|
686 }
|
Chris@49
|
687
|
Chris@49
|
688
|
Chris@49
|
689
|
Chris@49
|
690 //! @}
|