Chris@49
|
1 // Copyright (C) 2008-2013 NICTA (www.nicta.com.au)
|
Chris@49
|
2 // Copyright (C) 2008-2013 Conrad Sanderson
|
Chris@49
|
3 //
|
Chris@49
|
4 // This Source Code Form is subject to the terms of the Mozilla Public
|
Chris@49
|
5 // License, v. 2.0. If a copy of the MPL was not distributed with this
|
Chris@49
|
6 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
Chris@49
|
7
|
Chris@49
|
8
|
Chris@49
|
9 //! \addtogroup gemv
|
Chris@49
|
10 //! @{
|
Chris@49
|
11
|
Chris@49
|
12
|
Chris@49
|
13
|
Chris@49
|
14 //! for tiny square matrices, size <= 4x4
|
Chris@49
|
15 template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>
|
Chris@49
|
16 class gemv_emul_tinysq
|
Chris@49
|
17 {
|
Chris@49
|
18 public:
|
Chris@49
|
19
|
Chris@49
|
20
|
Chris@49
|
21 template<const uword row, const uword col>
|
Chris@49
|
22 struct pos
|
Chris@49
|
23 {
|
Chris@49
|
24 static const uword n2 = (do_trans_A == false) ? (row + col*2) : (col + row*2);
|
Chris@49
|
25 static const uword n3 = (do_trans_A == false) ? (row + col*3) : (col + row*3);
|
Chris@49
|
26 static const uword n4 = (do_trans_A == false) ? (row + col*4) : (col + row*4);
|
Chris@49
|
27 };
|
Chris@49
|
28
|
Chris@49
|
29
|
Chris@49
|
30
|
Chris@49
|
31 template<typename eT, const uword i>
|
Chris@49
|
32 arma_hot
|
Chris@49
|
33 arma_inline
|
Chris@49
|
34 static
|
Chris@49
|
35 void
|
Chris@49
|
36 assign(eT* y, const eT acc, const eT alpha, const eT beta)
|
Chris@49
|
37 {
|
Chris@49
|
38 if(use_beta == false)
|
Chris@49
|
39 {
|
Chris@49
|
40 y[i] = (use_alpha == false) ? acc : alpha*acc;
|
Chris@49
|
41 }
|
Chris@49
|
42 else
|
Chris@49
|
43 {
|
Chris@49
|
44 const eT tmp = y[i];
|
Chris@49
|
45
|
Chris@49
|
46 y[i] = beta*tmp + ( (use_alpha == false) ? acc : alpha*acc );
|
Chris@49
|
47 }
|
Chris@49
|
48 }
|
Chris@49
|
49
|
Chris@49
|
50
|
Chris@49
|
51
|
Chris@49
|
52 template<typename eT, typename TA>
|
Chris@49
|
53 arma_hot
|
Chris@49
|
54 inline
|
Chris@49
|
55 static
|
Chris@49
|
56 void
|
Chris@49
|
57 apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )
|
Chris@49
|
58 {
|
Chris@49
|
59 arma_extra_debug_sigprint();
|
Chris@49
|
60
|
Chris@49
|
61 const eT* Am = A.memptr();
|
Chris@49
|
62
|
Chris@49
|
63 switch(A.n_rows)
|
Chris@49
|
64 {
|
Chris@49
|
65 case 1:
|
Chris@49
|
66 {
|
Chris@49
|
67 const eT acc = Am[0] * x[0];
|
Chris@49
|
68
|
Chris@49
|
69 assign<eT, 0>(y, acc, alpha, beta);
|
Chris@49
|
70 }
|
Chris@49
|
71 break;
|
Chris@49
|
72
|
Chris@49
|
73
|
Chris@49
|
74 case 2:
|
Chris@49
|
75 {
|
Chris@49
|
76 const eT x0 = x[0];
|
Chris@49
|
77 const eT x1 = x[1];
|
Chris@49
|
78
|
Chris@49
|
79 const eT acc0 = Am[pos<0,0>::n2]*x0 + Am[pos<0,1>::n2]*x1;
|
Chris@49
|
80 const eT acc1 = Am[pos<1,0>::n2]*x0 + Am[pos<1,1>::n2]*x1;
|
Chris@49
|
81
|
Chris@49
|
82 assign<eT, 0>(y, acc0, alpha, beta);
|
Chris@49
|
83 assign<eT, 1>(y, acc1, alpha, beta);
|
Chris@49
|
84 }
|
Chris@49
|
85 break;
|
Chris@49
|
86
|
Chris@49
|
87
|
Chris@49
|
88 case 3:
|
Chris@49
|
89 {
|
Chris@49
|
90 const eT x0 = x[0];
|
Chris@49
|
91 const eT x1 = x[1];
|
Chris@49
|
92 const eT x2 = x[2];
|
Chris@49
|
93
|
Chris@49
|
94 const eT acc0 = Am[pos<0,0>::n3]*x0 + Am[pos<0,1>::n3]*x1 + Am[pos<0,2>::n3]*x2;
|
Chris@49
|
95 const eT acc1 = Am[pos<1,0>::n3]*x0 + Am[pos<1,1>::n3]*x1 + Am[pos<1,2>::n3]*x2;
|
Chris@49
|
96 const eT acc2 = Am[pos<2,0>::n3]*x0 + Am[pos<2,1>::n3]*x1 + Am[pos<2,2>::n3]*x2;
|
Chris@49
|
97
|
Chris@49
|
98 assign<eT, 0>(y, acc0, alpha, beta);
|
Chris@49
|
99 assign<eT, 1>(y, acc1, alpha, beta);
|
Chris@49
|
100 assign<eT, 2>(y, acc2, alpha, beta);
|
Chris@49
|
101 }
|
Chris@49
|
102 break;
|
Chris@49
|
103
|
Chris@49
|
104
|
Chris@49
|
105 case 4:
|
Chris@49
|
106 {
|
Chris@49
|
107 const eT x0 = x[0];
|
Chris@49
|
108 const eT x1 = x[1];
|
Chris@49
|
109 const eT x2 = x[2];
|
Chris@49
|
110 const eT x3 = x[3];
|
Chris@49
|
111
|
Chris@49
|
112 const eT acc0 = Am[pos<0,0>::n4]*x0 + Am[pos<0,1>::n4]*x1 + Am[pos<0,2>::n4]*x2 + Am[pos<0,3>::n4]*x3;
|
Chris@49
|
113 const eT acc1 = Am[pos<1,0>::n4]*x0 + Am[pos<1,1>::n4]*x1 + Am[pos<1,2>::n4]*x2 + Am[pos<1,3>::n4]*x3;
|
Chris@49
|
114 const eT acc2 = Am[pos<2,0>::n4]*x0 + Am[pos<2,1>::n4]*x1 + Am[pos<2,2>::n4]*x2 + Am[pos<2,3>::n4]*x3;
|
Chris@49
|
115 const eT acc3 = Am[pos<3,0>::n4]*x0 + Am[pos<3,1>::n4]*x1 + Am[pos<3,2>::n4]*x2 + Am[pos<3,3>::n4]*x3;
|
Chris@49
|
116
|
Chris@49
|
117 assign<eT, 0>(y, acc0, alpha, beta);
|
Chris@49
|
118 assign<eT, 1>(y, acc1, alpha, beta);
|
Chris@49
|
119 assign<eT, 2>(y, acc2, alpha, beta);
|
Chris@49
|
120 assign<eT, 3>(y, acc3, alpha, beta);
|
Chris@49
|
121 }
|
Chris@49
|
122 break;
|
Chris@49
|
123
|
Chris@49
|
124
|
Chris@49
|
125 default:
|
Chris@49
|
126 ;
|
Chris@49
|
127 }
|
Chris@49
|
128 }
|
Chris@49
|
129
|
Chris@49
|
130 };
|
Chris@49
|
131
|
Chris@49
|
132
|
Chris@49
|
133
|
Chris@49
|
134 class gemv_emul_large_helper
|
Chris@49
|
135 {
|
Chris@49
|
136 public:
|
Chris@49
|
137
|
Chris@49
|
138 template<typename eT, typename TA>
|
Chris@49
|
139 arma_hot
|
Chris@49
|
140 inline
|
Chris@49
|
141 static
|
Chris@49
|
142 typename arma_not_cx<eT>::result
|
Chris@49
|
143 dot_row_col( const TA& A, const eT* x, const uword row, const uword N )
|
Chris@49
|
144 {
|
Chris@49
|
145 eT acc1 = eT(0);
|
Chris@49
|
146 eT acc2 = eT(0);
|
Chris@49
|
147
|
Chris@49
|
148 uword i,j;
|
Chris@49
|
149 for(i=0, j=1; j < N; i+=2, j+=2)
|
Chris@49
|
150 {
|
Chris@49
|
151 const eT xi = x[i];
|
Chris@49
|
152 const eT xj = x[j];
|
Chris@49
|
153
|
Chris@49
|
154 acc1 += A.at(row,i) * xi;
|
Chris@49
|
155 acc2 += A.at(row,j) * xj;
|
Chris@49
|
156 }
|
Chris@49
|
157
|
Chris@49
|
158 if(i < N)
|
Chris@49
|
159 {
|
Chris@49
|
160 acc1 += A.at(row,i) * x[i];
|
Chris@49
|
161 }
|
Chris@49
|
162
|
Chris@49
|
163 return (acc1 + acc2);
|
Chris@49
|
164 }
|
Chris@49
|
165
|
Chris@49
|
166
|
Chris@49
|
167
|
Chris@49
|
168 template<typename eT, typename TA>
|
Chris@49
|
169 arma_hot
|
Chris@49
|
170 inline
|
Chris@49
|
171 static
|
Chris@49
|
172 typename arma_cx_only<eT>::result
|
Chris@49
|
173 dot_row_col( const TA& A, const eT* x, const uword row, const uword N )
|
Chris@49
|
174 {
|
Chris@49
|
175 typedef typename get_pod_type<eT>::result T;
|
Chris@49
|
176
|
Chris@49
|
177 T val_real = T(0);
|
Chris@49
|
178 T val_imag = T(0);
|
Chris@49
|
179
|
Chris@49
|
180 for(uword i=0; i<N; ++i)
|
Chris@49
|
181 {
|
Chris@49
|
182 const std::complex<T>& Ai = A.at(row,i);
|
Chris@49
|
183 const std::complex<T>& xi = x[i];
|
Chris@49
|
184
|
Chris@49
|
185 const T a = Ai.real();
|
Chris@49
|
186 const T b = Ai.imag();
|
Chris@49
|
187
|
Chris@49
|
188 const T c = xi.real();
|
Chris@49
|
189 const T d = xi.imag();
|
Chris@49
|
190
|
Chris@49
|
191 val_real += (a*c) - (b*d);
|
Chris@49
|
192 val_imag += (a*d) + (b*c);
|
Chris@49
|
193 }
|
Chris@49
|
194
|
Chris@49
|
195 return std::complex<T>(val_real, val_imag);
|
Chris@49
|
196 }
|
Chris@49
|
197
|
Chris@49
|
198 };
|
Chris@49
|
199
|
Chris@49
|
200
|
Chris@49
|
201
|
Chris@49
|
202 //! \brief
|
Chris@49
|
203 //! Partial emulation of ATLAS/BLAS gemv().
|
Chris@49
|
204 //! 'y' is assumed to have been set to the correct size (i.e. taking into account the transpose)
|
Chris@49
|
205
|
Chris@49
|
206 template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>
|
Chris@49
|
207 class gemv_emul_large
|
Chris@49
|
208 {
|
Chris@49
|
209 public:
|
Chris@49
|
210
|
Chris@49
|
211 template<typename eT, typename TA>
|
Chris@49
|
212 arma_hot
|
Chris@49
|
213 inline
|
Chris@49
|
214 static
|
Chris@49
|
215 void
|
Chris@49
|
216 apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )
|
Chris@49
|
217 {
|
Chris@49
|
218 arma_extra_debug_sigprint();
|
Chris@49
|
219
|
Chris@49
|
220 const uword A_n_rows = A.n_rows;
|
Chris@49
|
221 const uword A_n_cols = A.n_cols;
|
Chris@49
|
222
|
Chris@49
|
223 if(do_trans_A == false)
|
Chris@49
|
224 {
|
Chris@49
|
225 if(A_n_rows == 1)
|
Chris@49
|
226 {
|
Chris@49
|
227 const eT acc = op_dot::direct_dot_arma(A_n_cols, A.memptr(), x);
|
Chris@49
|
228
|
Chris@49
|
229 if( (use_alpha == false) && (use_beta == false) )
|
Chris@49
|
230 {
|
Chris@49
|
231 y[0] = acc;
|
Chris@49
|
232 }
|
Chris@49
|
233 else
|
Chris@49
|
234 if( (use_alpha == true) && (use_beta == false) )
|
Chris@49
|
235 {
|
Chris@49
|
236 y[0] = alpha * acc;
|
Chris@49
|
237 }
|
Chris@49
|
238 else
|
Chris@49
|
239 if( (use_alpha == false) && (use_beta == true) )
|
Chris@49
|
240 {
|
Chris@49
|
241 y[0] = acc + beta*y[0];
|
Chris@49
|
242 }
|
Chris@49
|
243 else
|
Chris@49
|
244 if( (use_alpha == true) && (use_beta == true) )
|
Chris@49
|
245 {
|
Chris@49
|
246 y[0] = alpha*acc + beta*y[0];
|
Chris@49
|
247 }
|
Chris@49
|
248 }
|
Chris@49
|
249 else
|
Chris@49
|
250 for(uword row=0; row < A_n_rows; ++row)
|
Chris@49
|
251 {
|
Chris@49
|
252 const eT acc = gemv_emul_large_helper::dot_row_col(A, x, row, A_n_cols);
|
Chris@49
|
253
|
Chris@49
|
254 if( (use_alpha == false) && (use_beta == false) )
|
Chris@49
|
255 {
|
Chris@49
|
256 y[row] = acc;
|
Chris@49
|
257 }
|
Chris@49
|
258 else
|
Chris@49
|
259 if( (use_alpha == true) && (use_beta == false) )
|
Chris@49
|
260 {
|
Chris@49
|
261 y[row] = alpha * acc;
|
Chris@49
|
262 }
|
Chris@49
|
263 else
|
Chris@49
|
264 if( (use_alpha == false) && (use_beta == true) )
|
Chris@49
|
265 {
|
Chris@49
|
266 y[row] = acc + beta*y[row];
|
Chris@49
|
267 }
|
Chris@49
|
268 else
|
Chris@49
|
269 if( (use_alpha == true) && (use_beta == true) )
|
Chris@49
|
270 {
|
Chris@49
|
271 y[row] = alpha*acc + beta*y[row];
|
Chris@49
|
272 }
|
Chris@49
|
273 }
|
Chris@49
|
274 }
|
Chris@49
|
275 else
|
Chris@49
|
276 if(do_trans_A == true)
|
Chris@49
|
277 {
|
Chris@49
|
278 for(uword col=0; col < A_n_cols; ++col)
|
Chris@49
|
279 {
|
Chris@49
|
280 // col is interpreted as row when storing the results in 'y'
|
Chris@49
|
281
|
Chris@49
|
282
|
Chris@49
|
283 // const eT* A_coldata = A.colptr(col);
|
Chris@49
|
284 //
|
Chris@49
|
285 // eT acc = eT(0);
|
Chris@49
|
286 // for(uword row=0; row < A_n_rows; ++row)
|
Chris@49
|
287 // {
|
Chris@49
|
288 // acc += A_coldata[row] * x[row];
|
Chris@49
|
289 // }
|
Chris@49
|
290
|
Chris@49
|
291 const eT acc = op_dot::direct_dot_arma(A_n_rows, A.colptr(col), x);
|
Chris@49
|
292
|
Chris@49
|
293 if( (use_alpha == false) && (use_beta == false) )
|
Chris@49
|
294 {
|
Chris@49
|
295 y[col] = acc;
|
Chris@49
|
296 }
|
Chris@49
|
297 else
|
Chris@49
|
298 if( (use_alpha == true) && (use_beta == false) )
|
Chris@49
|
299 {
|
Chris@49
|
300 y[col] = alpha * acc;
|
Chris@49
|
301 }
|
Chris@49
|
302 else
|
Chris@49
|
303 if( (use_alpha == false) && (use_beta == true) )
|
Chris@49
|
304 {
|
Chris@49
|
305 y[col] = acc + beta*y[col];
|
Chris@49
|
306 }
|
Chris@49
|
307 else
|
Chris@49
|
308 if( (use_alpha == true) && (use_beta == true) )
|
Chris@49
|
309 {
|
Chris@49
|
310 y[col] = alpha*acc + beta*y[col];
|
Chris@49
|
311 }
|
Chris@49
|
312
|
Chris@49
|
313 }
|
Chris@49
|
314 }
|
Chris@49
|
315 }
|
Chris@49
|
316
|
Chris@49
|
317 };
|
Chris@49
|
318
|
Chris@49
|
319
|
Chris@49
|
320
|
Chris@49
|
321 template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>
|
Chris@49
|
322 class gemv_emul
|
Chris@49
|
323 {
|
Chris@49
|
324 public:
|
Chris@49
|
325
|
Chris@49
|
326 template<typename eT, typename TA>
|
Chris@49
|
327 arma_hot
|
Chris@49
|
328 inline
|
Chris@49
|
329 static
|
Chris@49
|
330 void
|
Chris@49
|
331 apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0), const typename arma_not_cx<eT>::result* junk = 0 )
|
Chris@49
|
332 {
|
Chris@49
|
333 arma_extra_debug_sigprint();
|
Chris@49
|
334 arma_ignore(junk);
|
Chris@49
|
335
|
Chris@49
|
336 const uword A_n_rows = A.n_rows;
|
Chris@49
|
337 const uword A_n_cols = A.n_cols;
|
Chris@49
|
338
|
Chris@49
|
339 if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
|
Chris@49
|
340 {
|
Chris@49
|
341 gemv_emul_tinysq<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alpha, beta);
|
Chris@49
|
342 }
|
Chris@49
|
343 else
|
Chris@49
|
344 {
|
Chris@49
|
345 gemv_emul_large<do_trans_A, use_alpha, use_beta>::apply(y, A, x, alpha, beta);
|
Chris@49
|
346 }
|
Chris@49
|
347 }
|
Chris@49
|
348
|
Chris@49
|
349
|
Chris@49
|
350
|
Chris@49
|
351 template<typename eT>
|
Chris@49
|
352 arma_hot
|
Chris@49
|
353 inline
|
Chris@49
|
354 static
|
Chris@49
|
355 void
|
Chris@49
|
356 apply( eT* y, const Mat<eT>& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0), const typename arma_cx_only<eT>::result* junk = 0 )
|
Chris@49
|
357 {
|
Chris@49
|
358 arma_extra_debug_sigprint();
|
Chris@49
|
359 arma_ignore(junk);
|
Chris@49
|
360
|
Chris@49
|
361 Mat<eT> tmp_A;
|
Chris@49
|
362
|
Chris@49
|
363 if(do_trans_A)
|
Chris@49
|
364 {
|
Chris@49
|
365 op_htrans::apply_noalias(tmp_A, A);
|
Chris@49
|
366 }
|
Chris@49
|
367
|
Chris@49
|
368 const Mat<eT>& AA = (do_trans_A == false) ? A : tmp_A;
|
Chris@49
|
369
|
Chris@49
|
370 const uword AA_n_rows = AA.n_rows;
|
Chris@49
|
371 const uword AA_n_cols = AA.n_cols;
|
Chris@49
|
372
|
Chris@49
|
373 if( (AA_n_rows <= 4) && (AA_n_rows == AA_n_cols) )
|
Chris@49
|
374 {
|
Chris@49
|
375 gemv_emul_tinysq<false, use_alpha, use_beta>::apply(y, AA, x, alpha, beta);
|
Chris@49
|
376 }
|
Chris@49
|
377 else
|
Chris@49
|
378 {
|
Chris@49
|
379 gemv_emul_large<false, use_alpha, use_beta>::apply(y, AA, x, alpha, beta);
|
Chris@49
|
380 }
|
Chris@49
|
381 }
|
Chris@49
|
382 };
|
Chris@49
|
383
|
Chris@49
|
384
|
Chris@49
|
385
|
Chris@49
|
386 //! \brief
|
Chris@49
|
387 //! Wrapper for ATLAS/BLAS gemv function, using template arguments to control the arguments passed to gemv.
|
Chris@49
|
388 //! 'y' is assumed to have been set to the correct size (i.e. taking into account the transpose)
|
Chris@49
|
389
|
Chris@49
|
390 template<const bool do_trans_A=false, const bool use_alpha=false, const bool use_beta=false>
|
Chris@49
|
391 class gemv
|
Chris@49
|
392 {
|
Chris@49
|
393 public:
|
Chris@49
|
394
|
Chris@49
|
395 template<typename eT, typename TA>
|
Chris@49
|
396 inline
|
Chris@49
|
397 static
|
Chris@49
|
398 void
|
Chris@49
|
399 apply_blas_type( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )
|
Chris@49
|
400 {
|
Chris@49
|
401 arma_extra_debug_sigprint();
|
Chris@49
|
402
|
Chris@49
|
403 //const uword threshold = (is_complex<eT>::value == true) ? 16u : 64u;
|
Chris@49
|
404 const uword threshold = (is_complex<eT>::value == true) ? 64u : 100u;
|
Chris@49
|
405
|
Chris@49
|
406 if(A.n_elem <= threshold)
|
Chris@49
|
407 {
|
Chris@49
|
408 gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);
|
Chris@49
|
409 }
|
Chris@49
|
410 else
|
Chris@49
|
411 {
|
Chris@49
|
412 #if defined(ARMA_USE_ATLAS)
|
Chris@49
|
413 {
|
Chris@49
|
414 if(is_complex<eT>::value == false)
|
Chris@49
|
415 {
|
Chris@49
|
416 // use gemm() instead of gemv() to work around a speed issue in Atlas 3.8.4
|
Chris@49
|
417
|
Chris@49
|
418 arma_extra_debug_print("atlas::cblas_gemm()");
|
Chris@49
|
419
|
Chris@49
|
420 atlas::cblas_gemm<eT>
|
Chris@49
|
421 (
|
Chris@49
|
422 atlas::CblasColMajor,
|
Chris@49
|
423 (do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas::CblasTrans ) : atlas::CblasNoTrans,
|
Chris@49
|
424 atlas::CblasNoTrans,
|
Chris@49
|
425 (do_trans_A) ? A.n_cols : A.n_rows,
|
Chris@49
|
426 1,
|
Chris@49
|
427 (do_trans_A) ? A.n_rows : A.n_cols,
|
Chris@49
|
428 (use_alpha) ? alpha : eT(1),
|
Chris@49
|
429 A.mem,
|
Chris@49
|
430 A.n_rows,
|
Chris@49
|
431 x,
|
Chris@49
|
432 (do_trans_A) ? A.n_rows : A.n_cols,
|
Chris@49
|
433 (use_beta) ? beta : eT(0),
|
Chris@49
|
434 y,
|
Chris@49
|
435 (do_trans_A) ? A.n_cols : A.n_rows
|
Chris@49
|
436 );
|
Chris@49
|
437 }
|
Chris@49
|
438 else
|
Chris@49
|
439 {
|
Chris@49
|
440 arma_extra_debug_print("atlas::cblas_gemv()");
|
Chris@49
|
441
|
Chris@49
|
442 atlas::cblas_gemv<eT>
|
Chris@49
|
443 (
|
Chris@49
|
444 atlas::CblasColMajor,
|
Chris@49
|
445 (do_trans_A) ? ( is_complex<eT>::value ? CblasConjTrans : atlas::CblasTrans ) : atlas::CblasNoTrans,
|
Chris@49
|
446 A.n_rows,
|
Chris@49
|
447 A.n_cols,
|
Chris@49
|
448 (use_alpha) ? alpha : eT(1),
|
Chris@49
|
449 A.mem,
|
Chris@49
|
450 A.n_rows,
|
Chris@49
|
451 x,
|
Chris@49
|
452 1,
|
Chris@49
|
453 (use_beta) ? beta : eT(0),
|
Chris@49
|
454 y,
|
Chris@49
|
455 1
|
Chris@49
|
456 );
|
Chris@49
|
457 }
|
Chris@49
|
458 }
|
Chris@49
|
459 #elif defined(ARMA_USE_BLAS)
|
Chris@49
|
460 {
|
Chris@49
|
461 arma_extra_debug_print("blas::gemv()");
|
Chris@49
|
462
|
Chris@49
|
463 const char trans_A = (do_trans_A) ? ( is_complex<eT>::value ? 'C' : 'T' ) : 'N';
|
Chris@49
|
464 const blas_int m = A.n_rows;
|
Chris@49
|
465 const blas_int n = A.n_cols;
|
Chris@49
|
466 const eT local_alpha = (use_alpha) ? alpha : eT(1);
|
Chris@49
|
467 //const blas_int lda = A.n_rows;
|
Chris@49
|
468 const blas_int inc = 1;
|
Chris@49
|
469 const eT local_beta = (use_beta) ? beta : eT(0);
|
Chris@49
|
470
|
Chris@49
|
471 arma_extra_debug_print( arma_boost::format("blas::gemv(): trans_A = %c") % trans_A );
|
Chris@49
|
472
|
Chris@49
|
473 blas::gemv<eT>
|
Chris@49
|
474 (
|
Chris@49
|
475 &trans_A,
|
Chris@49
|
476 &m,
|
Chris@49
|
477 &n,
|
Chris@49
|
478 &local_alpha,
|
Chris@49
|
479 A.mem,
|
Chris@49
|
480 &m, // lda
|
Chris@49
|
481 x,
|
Chris@49
|
482 &inc,
|
Chris@49
|
483 &local_beta,
|
Chris@49
|
484 y,
|
Chris@49
|
485 &inc
|
Chris@49
|
486 );
|
Chris@49
|
487 }
|
Chris@49
|
488 #else
|
Chris@49
|
489 {
|
Chris@49
|
490 gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);
|
Chris@49
|
491 }
|
Chris@49
|
492 #endif
|
Chris@49
|
493 }
|
Chris@49
|
494
|
Chris@49
|
495 }
|
Chris@49
|
496
|
Chris@49
|
497
|
Chris@49
|
498
|
Chris@49
|
499 template<typename eT, typename TA>
|
Chris@49
|
500 arma_inline
|
Chris@49
|
501 static
|
Chris@49
|
502 void
|
Chris@49
|
503 apply( eT* y, const TA& A, const eT* x, const eT alpha = eT(1), const eT beta = eT(0) )
|
Chris@49
|
504 {
|
Chris@49
|
505 gemv_emul<do_trans_A, use_alpha, use_beta>::apply(y,A,x,alpha,beta);
|
Chris@49
|
506 }
|
Chris@49
|
507
|
Chris@49
|
508
|
Chris@49
|
509
|
Chris@49
|
510 template<typename TA>
|
Chris@49
|
511 arma_inline
|
Chris@49
|
512 static
|
Chris@49
|
513 void
|
Chris@49
|
514 apply
|
Chris@49
|
515 (
|
Chris@49
|
516 float* y,
|
Chris@49
|
517 const TA& A,
|
Chris@49
|
518 const float* x,
|
Chris@49
|
519 const float alpha = float(1),
|
Chris@49
|
520 const float beta = float(0)
|
Chris@49
|
521 )
|
Chris@49
|
522 {
|
Chris@49
|
523 gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);
|
Chris@49
|
524 }
|
Chris@49
|
525
|
Chris@49
|
526
|
Chris@49
|
527
|
Chris@49
|
528 template<typename TA>
|
Chris@49
|
529 arma_inline
|
Chris@49
|
530 static
|
Chris@49
|
531 void
|
Chris@49
|
532 apply
|
Chris@49
|
533 (
|
Chris@49
|
534 double* y,
|
Chris@49
|
535 const TA& A,
|
Chris@49
|
536 const double* x,
|
Chris@49
|
537 const double alpha = double(1),
|
Chris@49
|
538 const double beta = double(0)
|
Chris@49
|
539 )
|
Chris@49
|
540 {
|
Chris@49
|
541 gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);
|
Chris@49
|
542 }
|
Chris@49
|
543
|
Chris@49
|
544
|
Chris@49
|
545
|
Chris@49
|
546 template<typename TA>
|
Chris@49
|
547 arma_inline
|
Chris@49
|
548 static
|
Chris@49
|
549 void
|
Chris@49
|
550 apply
|
Chris@49
|
551 (
|
Chris@49
|
552 std::complex<float>* y,
|
Chris@49
|
553 const TA& A,
|
Chris@49
|
554 const std::complex<float>* x,
|
Chris@49
|
555 const std::complex<float> alpha = std::complex<float>(1),
|
Chris@49
|
556 const std::complex<float> beta = std::complex<float>(0)
|
Chris@49
|
557 )
|
Chris@49
|
558 {
|
Chris@49
|
559 gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);
|
Chris@49
|
560 }
|
Chris@49
|
561
|
Chris@49
|
562
|
Chris@49
|
563
|
Chris@49
|
564 template<typename TA>
|
Chris@49
|
565 arma_inline
|
Chris@49
|
566 static
|
Chris@49
|
567 void
|
Chris@49
|
568 apply
|
Chris@49
|
569 (
|
Chris@49
|
570 std::complex<double>* y,
|
Chris@49
|
571 const TA& A,
|
Chris@49
|
572 const std::complex<double>* x,
|
Chris@49
|
573 const std::complex<double> alpha = std::complex<double>(1),
|
Chris@49
|
574 const std::complex<double> beta = std::complex<double>(0)
|
Chris@49
|
575 )
|
Chris@49
|
576 {
|
Chris@49
|
577 gemv<do_trans_A, use_alpha, use_beta>::apply_blas_type(y,A,x,alpha,beta);
|
Chris@49
|
578 }
|
Chris@49
|
579
|
Chris@49
|
580
|
Chris@49
|
581
|
Chris@49
|
582 };
|
Chris@49
|
583
|
Chris@49
|
584
|
Chris@49
|
585 //! @}
|