Chris@49
|
1 // Copyright (C) 2012-2013 NICTA (www.nicta.com.au)
|
Chris@49
|
2 // Copyright (C) 2012-2013 Conrad Sanderson
|
Chris@49
|
3 //
|
Chris@49
|
4 // This Source Code Form is subject to the terms of the Mozilla Public
|
Chris@49
|
5 // License, v. 2.0. If a copy of the MPL was not distributed with this
|
Chris@49
|
6 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
Chris@49
|
7
|
Chris@49
|
8
|
Chris@49
|
9 //! \addtogroup memory
|
Chris@49
|
10 //! @{
|
Chris@49
|
11
|
Chris@49
|
12
|
Chris@49
|
13 class memory
|
Chris@49
|
14 {
|
Chris@49
|
15 public:
|
Chris@49
|
16
|
Chris@49
|
17 arma_inline static uword enlarge_to_mult_of_chunksize(const uword n_elem);
|
Chris@49
|
18
|
Chris@49
|
19 template<typename eT> arma_inline arma_malloc static eT* acquire(const uword n_elem);
|
Chris@49
|
20
|
Chris@49
|
21 template<typename eT> arma_inline arma_malloc static eT* acquire_chunked(const uword n_elem);
|
Chris@49
|
22
|
Chris@49
|
23 template<typename eT> arma_inline static void release(eT* mem);
|
Chris@49
|
24
|
Chris@49
|
25
|
Chris@49
|
26 template<typename eT> arma_inline static bool is_aligned(const eT* mem);
|
Chris@49
|
27 template<typename eT> arma_inline static void mark_as_aligned( eT*& mem);
|
Chris@49
|
28 template<typename eT> arma_inline static void mark_as_aligned(const eT*& mem);
|
Chris@49
|
29 };
|
Chris@49
|
30
|
Chris@49
|
31
|
Chris@49
|
32
|
Chris@49
|
33 arma_inline
|
Chris@49
|
34 uword
|
Chris@49
|
35 memory::enlarge_to_mult_of_chunksize(const uword n_elem)
|
Chris@49
|
36 {
|
Chris@49
|
37 const uword chunksize = arma_config::spmat_chunksize;
|
Chris@49
|
38
|
Chris@49
|
39 // this relies on integer division
|
Chris@49
|
40 const uword n_elem_mod = ((n_elem % chunksize) != 0) ? ((n_elem / chunksize) + 1) * chunksize : n_elem;
|
Chris@49
|
41
|
Chris@49
|
42 return n_elem_mod;
|
Chris@49
|
43 }
|
Chris@49
|
44
|
Chris@49
|
45
|
Chris@49
|
46
|
Chris@49
|
47 template<typename eT>
|
Chris@49
|
48 arma_inline
|
Chris@49
|
49 arma_malloc
|
Chris@49
|
50 eT*
|
Chris@49
|
51 memory::acquire(const uword n_elem)
|
Chris@49
|
52 {
|
Chris@49
|
53 #if defined(ARMA_USE_TBB_ALLOC)
|
Chris@49
|
54 {
|
Chris@49
|
55 return ( (eT *) scalable_malloc(sizeof(eT)*n_elem) );
|
Chris@49
|
56 }
|
Chris@49
|
57 #elif defined(ARMA_USE_MKL_ALLOC)
|
Chris@49
|
58 {
|
Chris@49
|
59 return ( (eT *) mkl_malloc( sizeof(eT)*n_elem, 128 ) );
|
Chris@49
|
60 }
|
Chris@49
|
61 #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
|
Chris@49
|
62 {
|
Chris@49
|
63 eT* memptr;
|
Chris@49
|
64
|
Chris@49
|
65 const size_t alignment = 16; // change the 16 to 64 if you wish to align to the cache line
|
Chris@49
|
66
|
Chris@49
|
67 int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), sizeof(eT)*n_elem);
|
Chris@49
|
68
|
Chris@49
|
69 return (status == 0) ? memptr : NULL;
|
Chris@49
|
70 }
|
Chris@49
|
71 #elif defined(_MSC_VER)
|
Chris@49
|
72 {
|
Chris@49
|
73 return ( (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ) ); // lives in malloc.h
|
Chris@49
|
74 }
|
Chris@49
|
75 #else
|
Chris@49
|
76 {
|
Chris@49
|
77 //return ( new(std::nothrow) eT[n_elem] );
|
Chris@49
|
78 return ( (eT *) malloc(sizeof(eT)*n_elem) );
|
Chris@49
|
79 }
|
Chris@49
|
80 #endif
|
Chris@49
|
81
|
Chris@49
|
82 // TODO: for mingw, use __mingw_aligned_malloc
|
Chris@49
|
83 }
|
Chris@49
|
84
|
Chris@49
|
85
|
Chris@49
|
86
|
Chris@49
|
87 //! get memory in multiples of chunks, holding at least n_elem
|
Chris@49
|
88 template<typename eT>
|
Chris@49
|
89 arma_inline
|
Chris@49
|
90 arma_malloc
|
Chris@49
|
91 eT*
|
Chris@49
|
92 memory::acquire_chunked(const uword n_elem)
|
Chris@49
|
93 {
|
Chris@49
|
94 const uword n_elem_mod = memory::enlarge_to_mult_of_chunksize(n_elem);
|
Chris@49
|
95
|
Chris@49
|
96 return memory::acquire<eT>(n_elem_mod);
|
Chris@49
|
97 }
|
Chris@49
|
98
|
Chris@49
|
99
|
Chris@49
|
100
|
Chris@49
|
101 template<typename eT>
|
Chris@49
|
102 arma_inline
|
Chris@49
|
103 void
|
Chris@49
|
104 memory::release(eT* mem)
|
Chris@49
|
105 {
|
Chris@49
|
106 #if defined(ARMA_USE_TBB_ALLOC)
|
Chris@49
|
107 {
|
Chris@49
|
108 scalable_free( (void *)(mem) );
|
Chris@49
|
109 }
|
Chris@49
|
110 #elif defined(ARMA_USE_MKL_ALLOC)
|
Chris@49
|
111 {
|
Chris@49
|
112 mkl_free( (void *)(mem) );
|
Chris@49
|
113 }
|
Chris@49
|
114 #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
|
Chris@49
|
115 {
|
Chris@49
|
116 free( (void *)(mem) );
|
Chris@49
|
117 }
|
Chris@49
|
118 #elif defined(_MSC_VER)
|
Chris@49
|
119 {
|
Chris@49
|
120 _aligned_free( (void *)(mem) );
|
Chris@49
|
121 }
|
Chris@49
|
122 #else
|
Chris@49
|
123 {
|
Chris@49
|
124 //delete [] mem;
|
Chris@49
|
125 free( (void *)(mem) );
|
Chris@49
|
126 }
|
Chris@49
|
127 #endif
|
Chris@49
|
128
|
Chris@49
|
129 // TODO: for mingw, use __mingw_aligned_free
|
Chris@49
|
130 }
|
Chris@49
|
131
|
Chris@49
|
132
|
Chris@49
|
133
|
Chris@49
|
134 template<typename eT>
|
Chris@49
|
135 arma_inline
|
Chris@49
|
136 bool
|
Chris@49
|
137 memory::is_aligned(const eT* mem)
|
Chris@49
|
138 {
|
Chris@49
|
139 #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT)
|
Chris@49
|
140 {
|
Chris@49
|
141 return ((std::ptrdiff_t(mem) & 0x0F) == 0);
|
Chris@49
|
142 }
|
Chris@49
|
143 #else
|
Chris@49
|
144 {
|
Chris@49
|
145 arma_ignore(mem);
|
Chris@49
|
146
|
Chris@49
|
147 return false;
|
Chris@49
|
148 }
|
Chris@49
|
149 #endif
|
Chris@49
|
150 }
|
Chris@49
|
151
|
Chris@49
|
152
|
Chris@49
|
153
|
Chris@49
|
154 template<typename eT>
|
Chris@49
|
155 arma_inline
|
Chris@49
|
156 void
|
Chris@49
|
157 memory::mark_as_aligned(eT*& mem)
|
Chris@49
|
158 {
|
Chris@49
|
159 #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
|
Chris@49
|
160 {
|
Chris@49
|
161 __assume_aligned(mem, 16);
|
Chris@49
|
162 }
|
Chris@49
|
163 #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
|
Chris@49
|
164 {
|
Chris@49
|
165 mem = (eT*)__builtin_assume_aligned(mem, 16);
|
Chris@49
|
166 }
|
Chris@49
|
167 #else
|
Chris@49
|
168 {
|
Chris@49
|
169 arma_ignore(mem);
|
Chris@49
|
170 }
|
Chris@49
|
171 #endif
|
Chris@49
|
172
|
Chris@49
|
173 // TODO: MSVC? __assume( (mem & 0x0F) == 0 );
|
Chris@49
|
174 //
|
Chris@49
|
175 // http://comments.gmane.org/gmane.comp.gcc.patches/239430
|
Chris@49
|
176 // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned,
|
Chris@49
|
177 // so for lvalue first argument ICC's __assume_aligned can be emulated using
|
Chris@49
|
178 // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align)
|
Chris@49
|
179 //
|
Chris@49
|
180 // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf
|
Chris@49
|
181 // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm
|
Chris@49
|
182 // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf
|
Chris@49
|
183 }
|
Chris@49
|
184
|
Chris@49
|
185
|
Chris@49
|
186
|
Chris@49
|
187 template<typename eT>
|
Chris@49
|
188 arma_inline
|
Chris@49
|
189 void
|
Chris@49
|
190 memory::mark_as_aligned(const eT*& mem)
|
Chris@49
|
191 {
|
Chris@49
|
192 #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
|
Chris@49
|
193 {
|
Chris@49
|
194 __assume_aligned(mem, 16);
|
Chris@49
|
195 }
|
Chris@49
|
196 #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
|
Chris@49
|
197 {
|
Chris@49
|
198 mem = (const eT*)__builtin_assume_aligned(mem, 16);
|
Chris@49
|
199 }
|
Chris@49
|
200 #else
|
Chris@49
|
201 {
|
Chris@49
|
202 arma_ignore(mem);
|
Chris@49
|
203 }
|
Chris@49
|
204 #endif
|
Chris@49
|
205 }
|
Chris@49
|
206
|
Chris@49
|
207
|
Chris@49
|
208
|
Chris@49
|
209 //! @}
|