annotate armadillo-3.900.4/include/armadillo_bits/memory.hpp @ 84:55a047986812 tip

Update library URI so as not to be document-local
author Chris Cannam
date Wed, 22 Apr 2020 14:21:57 +0100
parents 1ec0e2823891
children
rev   line source
Chris@49 1 // Copyright (C) 2012-2013 NICTA (www.nicta.com.au)
Chris@49 2 // Copyright (C) 2012-2013 Conrad Sanderson
Chris@49 3 //
Chris@49 4 // This Source Code Form is subject to the terms of the Mozilla Public
Chris@49 5 // License, v. 2.0. If a copy of the MPL was not distributed with this
Chris@49 6 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
Chris@49 7
Chris@49 8
Chris@49 9 //! \addtogroup memory
Chris@49 10 //! @{
Chris@49 11
Chris@49 12
Chris@49 13 class memory
Chris@49 14 {
Chris@49 15 public:
Chris@49 16
Chris@49 17 arma_inline static uword enlarge_to_mult_of_chunksize(const uword n_elem);
Chris@49 18
Chris@49 19 template<typename eT> arma_inline arma_malloc static eT* acquire(const uword n_elem);
Chris@49 20
Chris@49 21 template<typename eT> arma_inline arma_malloc static eT* acquire_chunked(const uword n_elem);
Chris@49 22
Chris@49 23 template<typename eT> arma_inline static void release(eT* mem);
Chris@49 24
Chris@49 25
Chris@49 26 template<typename eT> arma_inline static bool is_aligned(const eT* mem);
Chris@49 27 template<typename eT> arma_inline static void mark_as_aligned( eT*& mem);
Chris@49 28 template<typename eT> arma_inline static void mark_as_aligned(const eT*& mem);
Chris@49 29 };
Chris@49 30
Chris@49 31
Chris@49 32
Chris@49 33 arma_inline
Chris@49 34 uword
Chris@49 35 memory::enlarge_to_mult_of_chunksize(const uword n_elem)
Chris@49 36 {
Chris@49 37 const uword chunksize = arma_config::spmat_chunksize;
Chris@49 38
Chris@49 39 // this relies on integer division
Chris@49 40 const uword n_elem_mod = ((n_elem % chunksize) != 0) ? ((n_elem / chunksize) + 1) * chunksize : n_elem;
Chris@49 41
Chris@49 42 return n_elem_mod;
Chris@49 43 }
Chris@49 44
Chris@49 45
Chris@49 46
Chris@49 47 template<typename eT>
Chris@49 48 arma_inline
Chris@49 49 arma_malloc
Chris@49 50 eT*
Chris@49 51 memory::acquire(const uword n_elem)
Chris@49 52 {
Chris@49 53 #if defined(ARMA_USE_TBB_ALLOC)
Chris@49 54 {
Chris@49 55 return ( (eT *) scalable_malloc(sizeof(eT)*n_elem) );
Chris@49 56 }
Chris@49 57 #elif defined(ARMA_USE_MKL_ALLOC)
Chris@49 58 {
Chris@49 59 return ( (eT *) mkl_malloc( sizeof(eT)*n_elem, 128 ) );
Chris@49 60 }
Chris@49 61 #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
Chris@49 62 {
Chris@49 63 eT* memptr;
Chris@49 64
Chris@49 65 const size_t alignment = 16; // change the 16 to 64 if you wish to align to the cache line
Chris@49 66
Chris@49 67 int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), sizeof(eT)*n_elem);
Chris@49 68
Chris@49 69 return (status == 0) ? memptr : NULL;
Chris@49 70 }
Chris@49 71 #elif defined(_MSC_VER)
Chris@49 72 {
Chris@49 73 return ( (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ) ); // lives in malloc.h
Chris@49 74 }
Chris@49 75 #else
Chris@49 76 {
Chris@49 77 //return ( new(std::nothrow) eT[n_elem] );
Chris@49 78 return ( (eT *) malloc(sizeof(eT)*n_elem) );
Chris@49 79 }
Chris@49 80 #endif
Chris@49 81
Chris@49 82 // TODO: for mingw, use __mingw_aligned_malloc
Chris@49 83 }
Chris@49 84
Chris@49 85
Chris@49 86
Chris@49 87 //! get memory in multiples of chunks, holding at least n_elem
Chris@49 88 template<typename eT>
Chris@49 89 arma_inline
Chris@49 90 arma_malloc
Chris@49 91 eT*
Chris@49 92 memory::acquire_chunked(const uword n_elem)
Chris@49 93 {
Chris@49 94 const uword n_elem_mod = memory::enlarge_to_mult_of_chunksize(n_elem);
Chris@49 95
Chris@49 96 return memory::acquire<eT>(n_elem_mod);
Chris@49 97 }
Chris@49 98
Chris@49 99
Chris@49 100
Chris@49 101 template<typename eT>
Chris@49 102 arma_inline
Chris@49 103 void
Chris@49 104 memory::release(eT* mem)
Chris@49 105 {
Chris@49 106 #if defined(ARMA_USE_TBB_ALLOC)
Chris@49 107 {
Chris@49 108 scalable_free( (void *)(mem) );
Chris@49 109 }
Chris@49 110 #elif defined(ARMA_USE_MKL_ALLOC)
Chris@49 111 {
Chris@49 112 mkl_free( (void *)(mem) );
Chris@49 113 }
Chris@49 114 #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
Chris@49 115 {
Chris@49 116 free( (void *)(mem) );
Chris@49 117 }
Chris@49 118 #elif defined(_MSC_VER)
Chris@49 119 {
Chris@49 120 _aligned_free( (void *)(mem) );
Chris@49 121 }
Chris@49 122 #else
Chris@49 123 {
Chris@49 124 //delete [] mem;
Chris@49 125 free( (void *)(mem) );
Chris@49 126 }
Chris@49 127 #endif
Chris@49 128
Chris@49 129 // TODO: for mingw, use __mingw_aligned_free
Chris@49 130 }
Chris@49 131
Chris@49 132
Chris@49 133
Chris@49 134 template<typename eT>
Chris@49 135 arma_inline
Chris@49 136 bool
Chris@49 137 memory::is_aligned(const eT* mem)
Chris@49 138 {
Chris@49 139 #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT)
Chris@49 140 {
Chris@49 141 return ((std::ptrdiff_t(mem) & 0x0F) == 0);
Chris@49 142 }
Chris@49 143 #else
Chris@49 144 {
Chris@49 145 arma_ignore(mem);
Chris@49 146
Chris@49 147 return false;
Chris@49 148 }
Chris@49 149 #endif
Chris@49 150 }
Chris@49 151
Chris@49 152
Chris@49 153
Chris@49 154 template<typename eT>
Chris@49 155 arma_inline
Chris@49 156 void
Chris@49 157 memory::mark_as_aligned(eT*& mem)
Chris@49 158 {
Chris@49 159 #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
Chris@49 160 {
Chris@49 161 __assume_aligned(mem, 16);
Chris@49 162 }
Chris@49 163 #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
Chris@49 164 {
Chris@49 165 mem = (eT*)__builtin_assume_aligned(mem, 16);
Chris@49 166 }
Chris@49 167 #else
Chris@49 168 {
Chris@49 169 arma_ignore(mem);
Chris@49 170 }
Chris@49 171 #endif
Chris@49 172
Chris@49 173 // TODO: MSVC? __assume( (mem & 0x0F) == 0 );
Chris@49 174 //
Chris@49 175 // http://comments.gmane.org/gmane.comp.gcc.patches/239430
Chris@49 176 // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned,
Chris@49 177 // so for lvalue first argument ICC's __assume_aligned can be emulated using
Chris@49 178 // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align)
Chris@49 179 //
Chris@49 180 // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf
Chris@49 181 // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm
Chris@49 182 // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf
Chris@49 183 }
Chris@49 184
Chris@49 185
Chris@49 186
Chris@49 187 template<typename eT>
Chris@49 188 arma_inline
Chris@49 189 void
Chris@49 190 memory::mark_as_aligned(const eT*& mem)
Chris@49 191 {
Chris@49 192 #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
Chris@49 193 {
Chris@49 194 __assume_aligned(mem, 16);
Chris@49 195 }
Chris@49 196 #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
Chris@49 197 {
Chris@49 198 mem = (const eT*)__builtin_assume_aligned(mem, 16);
Chris@49 199 }
Chris@49 200 #else
Chris@49 201 {
Chris@49 202 arma_ignore(mem);
Chris@49 203 }
Chris@49 204 #endif
Chris@49 205 }
Chris@49 206
Chris@49 207
Chris@49 208
Chris@49 209 //! @}