Chris@49: // Copyright (C) 2012-2013 NICTA (www.nicta.com.au) Chris@49: // Copyright (C) 2012-2013 Conrad Sanderson Chris@49: // Chris@49: // This Source Code Form is subject to the terms of the Mozilla Public Chris@49: // License, v. 2.0. If a copy of the MPL was not distributed with this Chris@49: // file, You can obtain one at http://mozilla.org/MPL/2.0/. Chris@49: Chris@49: Chris@49: //! \addtogroup memory Chris@49: //! @{ Chris@49: Chris@49: Chris@49: class memory Chris@49: { Chris@49: public: Chris@49: Chris@49: arma_inline static uword enlarge_to_mult_of_chunksize(const uword n_elem); Chris@49: Chris@49: template arma_inline arma_malloc static eT* acquire(const uword n_elem); Chris@49: Chris@49: template arma_inline arma_malloc static eT* acquire_chunked(const uword n_elem); Chris@49: Chris@49: template arma_inline static void release(eT* mem); Chris@49: Chris@49: Chris@49: template arma_inline static bool is_aligned(const eT* mem); Chris@49: template arma_inline static void mark_as_aligned( eT*& mem); Chris@49: template arma_inline static void mark_as_aligned(const eT*& mem); Chris@49: }; Chris@49: Chris@49: Chris@49: Chris@49: arma_inline Chris@49: uword Chris@49: memory::enlarge_to_mult_of_chunksize(const uword n_elem) Chris@49: { Chris@49: const uword chunksize = arma_config::spmat_chunksize; Chris@49: Chris@49: // this relies on integer division Chris@49: const uword n_elem_mod = ((n_elem % chunksize) != 0) ? ((n_elem / chunksize) + 1) * chunksize : n_elem; Chris@49: Chris@49: return n_elem_mod; Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_inline Chris@49: arma_malloc Chris@49: eT* Chris@49: memory::acquire(const uword n_elem) Chris@49: { Chris@49: #if defined(ARMA_USE_TBB_ALLOC) Chris@49: { Chris@49: return ( (eT *) scalable_malloc(sizeof(eT)*n_elem) ); Chris@49: } Chris@49: #elif defined(ARMA_USE_MKL_ALLOC) Chris@49: { Chris@49: return ( (eT *) mkl_malloc( sizeof(eT)*n_elem, 128 ) ); Chris@49: } Chris@49: #elif defined(ARMA_HAVE_POSIX_MEMALIGN) Chris@49: { Chris@49: eT* memptr; Chris@49: Chris@49: const size_t alignment = 16; // change the 16 to 64 if you wish to align to the cache line Chris@49: Chris@49: int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), sizeof(eT)*n_elem); Chris@49: Chris@49: return (status == 0) ? memptr : NULL; Chris@49: } Chris@49: #elif defined(_MSC_VER) Chris@49: { Chris@49: return ( (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ) ); // lives in malloc.h Chris@49: } Chris@49: #else Chris@49: { Chris@49: //return ( new(std::nothrow) eT[n_elem] ); Chris@49: return ( (eT *) malloc(sizeof(eT)*n_elem) ); Chris@49: } Chris@49: #endif Chris@49: Chris@49: // TODO: for mingw, use __mingw_aligned_malloc Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: //! get memory in multiples of chunks, holding at least n_elem Chris@49: template Chris@49: arma_inline Chris@49: arma_malloc Chris@49: eT* Chris@49: memory::acquire_chunked(const uword n_elem) Chris@49: { Chris@49: const uword n_elem_mod = memory::enlarge_to_mult_of_chunksize(n_elem); Chris@49: Chris@49: return memory::acquire(n_elem_mod); Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_inline Chris@49: void Chris@49: memory::release(eT* mem) Chris@49: { Chris@49: #if defined(ARMA_USE_TBB_ALLOC) Chris@49: { Chris@49: scalable_free( (void *)(mem) ); Chris@49: } Chris@49: #elif defined(ARMA_USE_MKL_ALLOC) Chris@49: { Chris@49: mkl_free( (void *)(mem) ); Chris@49: } Chris@49: #elif defined(ARMA_HAVE_POSIX_MEMALIGN) Chris@49: { Chris@49: free( (void *)(mem) ); Chris@49: } Chris@49: #elif defined(_MSC_VER) Chris@49: { Chris@49: _aligned_free( (void *)(mem) ); Chris@49: } Chris@49: #else Chris@49: { Chris@49: //delete [] mem; Chris@49: free( (void *)(mem) ); Chris@49: } Chris@49: #endif Chris@49: Chris@49: // TODO: for mingw, use __mingw_aligned_free Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_inline Chris@49: bool Chris@49: memory::is_aligned(const eT* mem) Chris@49: { Chris@49: #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT) Chris@49: { Chris@49: return ((std::ptrdiff_t(mem) & 0x0F) == 0); Chris@49: } Chris@49: #else Chris@49: { Chris@49: arma_ignore(mem); Chris@49: Chris@49: return false; Chris@49: } Chris@49: #endif Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_inline Chris@49: void Chris@49: memory::mark_as_aligned(eT*& mem) Chris@49: { Chris@49: #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) Chris@49: { Chris@49: __assume_aligned(mem, 16); Chris@49: } Chris@49: #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED) Chris@49: { Chris@49: mem = (eT*)__builtin_assume_aligned(mem, 16); Chris@49: } Chris@49: #else Chris@49: { Chris@49: arma_ignore(mem); Chris@49: } Chris@49: #endif Chris@49: Chris@49: // TODO: MSVC? __assume( (mem & 0x0F) == 0 ); Chris@49: // Chris@49: // http://comments.gmane.org/gmane.comp.gcc.patches/239430 Chris@49: // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned, Chris@49: // so for lvalue first argument ICC's __assume_aligned can be emulated using Chris@49: // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align) Chris@49: // Chris@49: // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf Chris@49: // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm Chris@49: // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_inline Chris@49: void Chris@49: memory::mark_as_aligned(const eT*& mem) Chris@49: { Chris@49: #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) Chris@49: { Chris@49: __assume_aligned(mem, 16); Chris@49: } Chris@49: #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED) Chris@49: { Chris@49: mem = (const eT*)__builtin_assume_aligned(mem, 16); Chris@49: } Chris@49: #else Chris@49: { Chris@49: arma_ignore(mem); Chris@49: } Chris@49: #endif Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: //! @}