lin*_*ina 11 c++ pointers memory-alignment unions
提前道歉,这可能是一个蹩脚的第一篇文章.虽然这个主题有很多材料,但对我来说很少是明确的和/或可理解的.
我有一个AlignedArray模板类,可以在堆上动态分配任意对齐的内存(我需要为AVX程序集例程进行32字节对齐).这需要一些丑陋的指针操作.
Agner Fog在cppexamples.zip中提供了一个样本类,它滥用了一个联合(http://www.agner.org/optimize/optimization_manuals.zip).但是,我知道写一个联盟的一个成员然后从另一个成员读取会导致UB.
AFAICT可以安全地将任何指针类型别名为a char *,但只能在一个方向上.这是我理解变得模糊的地方.这是我AlignedArray
课程的精简版(基本上是对Agner的重写,以帮助我理解):
template <typename T, size_t alignment = 32>
class AlignedArray
{
size_t m_size;
char * m_unaligned;
T * m_aligned;
public:
AlignedArray (size_t const size)
: m_size(0)
, m_unaligned(0)
, m_aligned(0)
{
this->size(size);
}
~AlignedArray ()
{
this->size(0);
}
T const & operator [] (size_t const i) const { return m_aligned[i]; }
T & operator [] (size_t const i) { return m_aligned[i]; }
size_t const size () { return m_size; }
void size (size_t const size)
{
if (size > 0)
{
if (size != m_size)
{
char * unaligned = 0;
unaligned = new char [size * sizeof(T) + alignment - 1];
if (unaligned)
{
// Agner:
/*
union {
char * c;
T * t;
size_t s;
} aligned;
aligned.c = unaligned + alignment - 1;
aligned.s &= ~(alignment - 1);
*/
// Me:
T * aligned = reinterpret_cast<T *>((reinterpret_cast<size_t>(unaligned) + alignment - 1) & ~(alignment - 1));
if (m_unaligned)
{
// Agner:
//memcpy(aligned.c, m_aligned, std::min(size, m_size));
// Me:
memcpy(aligned, m_aligned, std::min(size, m_size));
delete [] m_unaligned;
}
m_size = size;
m_unaligned = unaligned;
// Agner:
//m_aligned = aligned.t;
// Me:
m_aligned = aligned;
}
return;
}
return;
}
if (m_unaligned)
{
delete [] m_unaligned;
m_size = 0;
m_unaligned = 0;
m_aligned = 0;
}
}
};
Run Code Online (Sandbox Code Playgroud)
那么哪种方法是安全的(r)?
我有实现(替换)new和delete运算符的代码,适用于 SIMD(即 SSE / AVX)。它使用以下您可能会觉得有用的函数:
static inline void *G0__SIMD_malloc (size_t size)
{
constexpr size_t align = G0_SIMD_ALIGN;
void *ptr, *uptr;
static_assert(G0_SIMD_ALIGN >= sizeof(void *),
"insufficient alignment for pointer storage");
static_assert((G0_SIMD_ALIGN & (G0_SIMD_ALIGN - 1)) == 0,
"G0_SIMD_ALIGN value must be a power of (2)");
size += align; // raw pointer storage with alignment padding.
if ((uptr = malloc(size)) == nullptr)
return nullptr;
// size_t addr = reinterpret_cast<size_t>(uptr);
uintptr_t addr = reinterpret_cast<uintptr_t>(uptr);
ptr = reinterpret_cast<void *>
((addr + align) & ~(align - 1));
*(reinterpret_cast<void **>(ptr) - 1) = uptr; // (raw ptr)
return ptr;
}
static inline void G0__SIMD_free (void *ptr)
{
if (ptr != nullptr)
free(*(reinterpret_cast<void **>(ptr) - 1)); // (raw ptr)
}
Run Code Online (Sandbox Code Playgroud)
这应该很容易适应。显然,您将替换mallocand free,因为您正在使用全局newanddelete进行原始(字符)存储。它假设size_t对于地址算术来说足够宽——实际上是正确的,但uintptr_tfrom<cstdint>会更正确。