Skip to content

Commit

Permalink
Use unaligned memory accesses unconditionally
Browse files Browse the repository at this point in the history
CPL_CPU_REQUIRES_ALIGNED_ACCESS is undefined for x86 which can cause
-fsanitize=alignment failures.

Let's just use CPL_CPU_REQUIRES_ALIGNED_ACCESS unconditionally. Modern
compilers are able to optimize memcpy.

It seems that GDALCopyWords64 performs more conditions and
FileGDBIndexIterator::GetNextRow() has less efficient code with this
change, but the difference unlikely matters in practice.
  • Loading branch information
MaskRay committed Aug 17, 2023
1 parent b87a3a8 commit 80cbb00
Show file tree
Hide file tree
Showing 6 changed files with 1 addition and 60 deletions.
8 changes: 0 additions & 8 deletions alg/gdalwarpkernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2962,13 +2962,9 @@ static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,

static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
unsigned int i;
memcpy(&i, ptr, 4);
__m128i xmm_i = _mm_cvtsi32_si128(s);
#else
__m128i xmm_i = _mm_cvtsi32_si128(*(unsigned int *)(ptr));
#endif
// Zero extend 4 packed unsigned 8-bit integers in a to packed
// 32-bit integers.
#if __SSE4_1__
Expand All @@ -2982,13 +2978,9 @@ static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)

static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
GUInt64 i;
memcpy(&i, ptr, 8);
__m128i xmm_i = _mm_cvtsi64_si128(s);
#else
__m128i xmm_i = _mm_cvtsi64_si128(*(GUInt64 *)(ptr));
#endif
// Zero extend 4 packed unsigned 16-bit integers in a to packed
// 32-bit integers.
#if __SSE4_1__
Expand Down
11 changes: 0 additions & 11 deletions gcore/gdal_priv_templates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,24 +491,13 @@ inline void GDALCopy8Words(const Tin *pValueIn, Tout *const pValueOut)

static inline void GDALCopyXMMToInt32(const __m128i xmm, void *pDest)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
int n32 = _mm_cvtsi128_si32(xmm); // Extract lower 32 bit word
memcpy(pDest, &n32, sizeof(n32));
#else
*static_cast<int *>(pDest) = _mm_cvtsi128_si32(xmm);
#endif
}

static inline void GDALCopyXMMToInt64(const __m128i xmm, void *pDest)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
GInt64 n64 = _mm_cvtsi128_si64(xmm); // Extract lower 64 bit word
memcpy(pDest, &n64, sizeof(n64));
#elif defined(__i386__) || defined(_M_IX86)
_mm_storel_epi64(reinterpret_cast<__m128i *>(pDest), xmm);
#else
*static_cast<GInt64 *>(pDest) = _mm_cvtsi128_si64(xmm);
#endif
}

#if __SSSE3__
Expand Down
17 changes: 1 addition & 16 deletions gcore/gdalsse_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,48 +51,33 @@

static inline __m128i GDALCopyInt16ToXMM(const void *ptr)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
unsigned short s;
memcpy(&s, ptr, 2);
return _mm_cvtsi32_si128(s);
#else
return _mm_cvtsi32_si128(*static_cast<const unsigned short *>(ptr));
#endif
}

static inline __m128i GDALCopyInt32ToXMM(const void *ptr)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
GInt32 i;
memcpy(&i, ptr, 4);
return _mm_cvtsi32_si128(i);
#else
return _mm_cvtsi32_si128(*static_cast<const GInt32 *>(ptr));
#endif
}

static inline __m128i GDALCopyInt64ToXMM(const void *ptr)
{
#if defined(__i386__) || defined(_M_IX86)
return _mm_loadl_epi64(static_cast<const __m128i *>(ptr));
#elif defined(CPL_CPU_REQUIRES_ALIGNED_ACCESS)
#else
GInt64 i;
memcpy(&i, ptr, 8);
return _mm_cvtsi64_si128(i);
#else
return _mm_cvtsi64_si128(*static_cast<const GInt64 *>(ptr));
#endif
}

static inline void GDALCopyXMMToInt16(const __m128i xmm, void *pDest)
{
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
GInt16 i = static_cast<GInt16>(_mm_extract_epi16(xmm, 0));
memcpy(pDest, &i, 2);
#else
*static_cast<GInt16 *>(pDest) =
static_cast<GInt16>(_mm_extract_epi16(xmm, 0));
#endif
}

class XMMReg2Double
Expand Down
2 changes: 0 additions & 2 deletions gcore/rasterio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3312,7 +3312,6 @@ void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
{
// On platforms where alignment matters, be careful
const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
((reinterpret_cast<GPtrDiff_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
Expand Down Expand Up @@ -3354,7 +3353,6 @@ void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
}
return;
}
#endif

// Deal with the case where we're replicating a single word into the
// provided buffer
Expand Down
15 changes: 0 additions & 15 deletions ogr/ogrsf_frmts/openfilegdb/filegdbindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,6 @@ bool FileGDBIndexIterator::FindPages(int iLevel, int nPage)
case FGFT_STRING:
{
GUInt16 *pasMax;
#if defined(CPL_MSB) || defined(CPL_CPU_REQUIRES_ALIGNED_ACCESS)
GUInt16 asMax[MAX_CAR_COUNT_INDEXED_STR];
pasMax = asMax;
memcpy(asMax,
Expand All @@ -1280,11 +1279,6 @@ bool FileGDBIndexIterator::FindPages(int iLevel, int nPage)
nStrLen * sizeof(GUInt16));
for (int j = 0; j < nStrLen; j++)
CPL_LSBPTR16(&asMax[j]);
#else
pasMax = reinterpret_cast<GUInt16 *>(
abyPage[iLevel] + nOffsetFirstValInPage +
nStrLen * sizeof(GUInt16) * i);
#endif
#ifdef DEBUG_INDEX_CONSISTENCY
returnErrorIf(i > 0 && FileGDBUTF16StrCompare(pasMax, asLastMax,
nStrLen) < 0);
Expand Down Expand Up @@ -1629,7 +1623,6 @@ int FileGDBIndexIterator::GetNextRow()

case FGFT_STRING:
{
#if defined(CPL_MSB) || defined(CPL_CPU_REQUIRES_ALIGNED_ACCESS)
GUInt16 asVal[MAX_CAR_COUNT_INDEXED_STR];
memcpy(asVal,
abyPageFeature + nOffsetFirstValInPage +
Expand All @@ -1638,14 +1631,6 @@ int FileGDBIndexIterator::GetNextRow()
for (int j = 0; j < nStrLen; j++)
CPL_LSBPTR16(&asVal[j]);
nComp = FileGDBUTF16StrCompare(asUTF16Str, asVal, nStrLen);
#else
nComp = FileGDBUTF16StrCompare(
asUTF16Str,
reinterpret_cast<GUInt16 *>(
abyPageFeature + nOffsetFirstValInPage +
nStrLen * 2 * iCurFeatureInPage),
nStrLen);
#endif
break;
}

Expand Down
8 changes: 0 additions & 8 deletions port/cpl_port.h
Original file line number Diff line number Diff line change
Expand Up @@ -1077,14 +1077,6 @@ int sprintf(char *str, const char *fmt, ...) CPL_PRINT_FUNC_FORMAT(2, 3)
CPL_C_END
#endif /* !defined(_MSC_VER) && !defined(__APPLE__) */

#if defined(MAKE_SANITIZE_HAPPY) || \
!(defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
defined(_M_X64))
/*! @cond Doxygen_Suppress */
#define CPL_CPU_REQUIRES_ALIGNED_ACCESS
/*! @endcond */
#endif

#if defined(__cplusplus)
#ifndef CPPCHECK
/** Returns the size of C style arrays. */
Expand Down

0 comments on commit 80cbb00

Please sign in to comment.