diff --git a/include/freerdp/codec/progressive.h b/include/freerdp/codec/progressive.h index 5aff2a22a..c1d4597ea 100644 --- a/include/freerdp/codec/progressive.h +++ b/include/freerdp/codec/progressive.h @@ -80,7 +80,8 @@ extern "C" * @return \b TRUE in case of success, \b FALSE for any error */ FREERDP_API BOOL progressive_rfx_write_message_progressive_simple( - PROGRESSIVE_CONTEXT* progressive, wStream* s, const RFX_MESSAGE* msg); + PROGRESSIVE_CONTEXT* WINPR_RESTRICT progressive, wStream* WINPR_RESTRICT s, + const RFX_MESSAGE* WINPR_RESTRICT msg); #ifdef __cplusplus } diff --git a/include/freerdp/codec/rfx.h b/include/freerdp/codec/rfx.h index 5968bb572..81d00d55f 100644 --- a/include/freerdp/codec/rfx.h +++ b/include/freerdp/codec/rfx.h @@ -75,43 +75,51 @@ extern "C" typedef struct S_RFX_MESSAGE RFX_MESSAGE; typedef struct S_RFX_CONTEXT RFX_CONTEXT; - FREERDP_API BOOL rfx_process_message(RFX_CONTEXT* context, const BYTE* data, UINT32 length, - UINT32 left, UINT32 top, BYTE* dst, UINT32 dstFormat, - UINT32 dstStride, UINT32 dstHeight, - REGION16* invalidRegion); + FREERDP_API BOOL rfx_process_message(RFX_CONTEXT* WINPR_RESTRICT context, + const BYTE* WINPR_RESTRICT data, UINT32 length, + UINT32 left, UINT32 top, BYTE* WINPR_RESTRICT dst, + UINT32 dstFormat, UINT32 dstStride, UINT32 dstHeight, + REGION16* WINPR_RESTRICT invalidRegion); - FREERDP_API UINT32 rfx_message_get_frame_idx(const RFX_MESSAGE* message); - FREERDP_API const UINT32* rfx_message_get_quants(const RFX_MESSAGE* message, - UINT16* numQuantVals); + FREERDP_API UINT32 rfx_message_get_frame_idx(const RFX_MESSAGE* WINPR_RESTRICT message); + FREERDP_API const UINT32* rfx_message_get_quants(const RFX_MESSAGE* WINPR_RESTRICT message, + UINT16* WINPR_RESTRICT numQuantVals); - FREERDP_API const RFX_TILE** rfx_message_get_tiles(const RFX_MESSAGE* message, - UINT16* numTiles); - FREERDP_API UINT16 rfx_message_get_tile_count(const RFX_MESSAGE* message); + FREERDP_API const RFX_TILE** rfx_message_get_tiles(const RFX_MESSAGE* WINPR_RESTRICT message, + UINT16* WINPR_RESTRICT numTiles); + FREERDP_API UINT16 rfx_message_get_tile_count(const RFX_MESSAGE* WINPR_RESTRICT message); - FREERDP_API const RFX_RECT* rfx_message_get_rects(const RFX_MESSAGE* message, UINT16* numRects); - FREERDP_API UINT16 rfx_message_get_rect_count(const RFX_MESSAGE* message); + FREERDP_API const RFX_RECT* rfx_message_get_rects(const RFX_MESSAGE* WINPR_RESTRICT message, + UINT16* WINPR_RESTRICT numRects); + FREERDP_API UINT16 rfx_message_get_rect_count(const RFX_MESSAGE* WINPR_RESTRICT message); - FREERDP_API void rfx_message_free(RFX_CONTEXT* context, RFX_MESSAGE* message); + FREERDP_API void rfx_message_free(RFX_CONTEXT* WINPR_RESTRICT context, + RFX_MESSAGE* WINPR_RESTRICT message); - FREERDP_API BOOL rfx_compose_message(RFX_CONTEXT* context, wStream* s, const RFX_RECT* rects, - size_t num_rects, const BYTE* image_data, UINT32 width, + FREERDP_API BOOL rfx_compose_message(RFX_CONTEXT* WINPR_RESTRICT context, + wStream* WINPR_RESTRICT s, + const RFX_RECT* WINPR_RESTRICT rects, size_t num_rects, + const BYTE* WINPR_RESTRICT image_data, UINT32 width, UINT32 height, UINT32 rowstride); - FREERDP_API RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* context, const RFX_RECT* rects, - size_t numRects, const BYTE* data, UINT32 width, - UINT32 height, size_t scanline); + FREERDP_API RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* WINPR_RESTRICT context, + const RFX_RECT* WINPR_RESTRICT rects, + size_t numRects, const BYTE* WINPR_RESTRICT data, + UINT32 width, UINT32 height, size_t scanline); - FREERDP_API RFX_MESSAGE_LIST* rfx_encode_messages(RFX_CONTEXT* context, const RFX_RECT* rects, - size_t numRects, const BYTE* data, - UINT32 width, UINT32 height, UINT32 scanline, - size_t* numMessages, size_t maxDataSize); + FREERDP_API RFX_MESSAGE_LIST* + rfx_encode_messages(RFX_CONTEXT* WINPR_RESTRICT context, const RFX_RECT* WINPR_RESTRICT rects, + size_t numRects, const BYTE* WINPR_RESTRICT data, UINT32 width, + UINT32 height, UINT32 scanline, size_t* WINPR_RESTRICT numMessages, + size_t maxDataSize); FREERDP_API void rfx_message_list_free(RFX_MESSAGE_LIST* messages); - FREERDP_API const RFX_MESSAGE* rfx_message_list_get(const RFX_MESSAGE_LIST* messages, - size_t idx); + FREERDP_API const RFX_MESSAGE* + rfx_message_list_get(const RFX_MESSAGE_LIST* WINPR_RESTRICT messages, size_t idx); - FREERDP_API BOOL rfx_write_message(RFX_CONTEXT* context, wStream* s, - const RFX_MESSAGE* message); + FREERDP_API BOOL rfx_write_message(RFX_CONTEXT* WINPR_RESTRICT context, + wStream* WINPR_RESTRICT s, + const RFX_MESSAGE* WINPR_RESTRICT message); FREERDP_API void rfx_context_free(RFX_CONTEXT* context); @@ -124,7 +132,7 @@ extern "C" FREERDP_API BOOL rfx_context_reset(RFX_CONTEXT* WINPR_RESTRICT context, UINT32 width, UINT32 height); - FREERDP_API BOOL rfx_context_set_mode(RFX_CONTEXT* context, RLGR_MODE mode); + FREERDP_API BOOL rfx_context_set_mode(RFX_CONTEXT* WINPR_RESTRICT context, RLGR_MODE mode); /** Getter for RFX mode * @param context The RFX context to query @@ -170,8 +178,9 @@ extern "C" * @since version 3.0.0 * @return \b TRUE in case of success, \b FALSE for any error */ - FREERDP_API BOOL rfx_write_message_progressive_simple(RFX_CONTEXT* rfx, wStream* s, - const RFX_MESSAGE* msg); + FREERDP_API BOOL rfx_write_message_progressive_simple(RFX_CONTEXT* WINPR_RESTRICT rfx, + wStream* WINPR_RESTRICT s, + const RFX_MESSAGE* WINPR_RESTRICT msg); #ifdef __cplusplus } diff --git a/include/freerdp/codec/zgfx.h b/include/freerdp/codec/zgfx.h index 3a57e7e9a..0a0d43613 100644 --- a/include/freerdp/codec/zgfx.h +++ b/include/freerdp/codec/zgfx.h @@ -45,7 +45,7 @@ extern "C" UINT32* WINPR_RESTRICT pDstSize, UINT32 flags); FREERDP_API int zgfx_compress(ZGFX_CONTEXT* WINPR_RESTRICT zgfx, const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize, - BYTE** WINPR_RESTRICT ppDstData, UINT32* pDstSize, + BYTE** WINPR_RESTRICT ppDstData, UINT32* WINPR_RESTRICT pDstSize, UINT32* WINPR_RESTRICT pFlags); FREERDP_API int zgfx_compress_to_stream(ZGFX_CONTEXT* WINPR_RESTRICT zgfx, wStream* WINPR_RESTRICT sDst, diff --git a/libfreerdp/codec/clear.c b/libfreerdp/codec/clear.c index b9b86d102..421ae4ec8 100644 --- a/libfreerdp/codec/clear.c +++ b/libfreerdp/codec/clear.c @@ -1207,7 +1207,7 @@ error_nsc: return NULL; } -void clear_context_free(CLEAR_CONTEXT* clear) +void clear_context_free(CLEAR_CONTEXT* WINPR_RESTRICT clear) { if (!clear) return; diff --git a/libfreerdp/codec/color.c b/libfreerdp/codec/color.c index 99f9d9602..a2fe53296 100644 --- a/libfreerdp/codec/color.c +++ b/libfreerdp/codec/color.c @@ -1127,8 +1127,9 @@ BOOL freerdp_image_fill(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 n return TRUE; } -BOOL freerdp_image_fill_ex(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, - UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, UINT32 color, UINT32 flags) +BOOL freerdp_image_fill_ex(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, + UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, UINT32 color, + UINT32 flags) { if (FreeRDPColorHasAlpha(DstFormat) && ((flags & FREERDP_IMAGE_FILL_IGNORE_ALPHA) != 0)) { diff --git a/libfreerdp/codec/progressive.c b/libfreerdp/codec/progressive.c index 86f62332e..842134cfc 100644 --- a/libfreerdp/codec/progressive.c +++ b/libfreerdp/codec/progressive.c @@ -2379,8 +2379,9 @@ fail: return rc; } -BOOL progressive_rfx_write_message_progressive_simple(PROGRESSIVE_CONTEXT* progressive, wStream* s, - const RFX_MESSAGE* msg) +BOOL progressive_rfx_write_message_progressive_simple( + PROGRESSIVE_CONTEXT* WINPR_RESTRICT progressive, wStream* WINPR_RESTRICT s, + const RFX_MESSAGE* WINPR_RESTRICT msg) { RFX_CONTEXT* context = NULL; diff --git a/libfreerdp/codec/sse/nsc_sse2.c b/libfreerdp/codec/sse/nsc_sse2.c index 784fe4211..f792680c3 100644 --- a/libfreerdp/codec/sse/nsc_sse2.c +++ b/libfreerdp/codec/sse/nsc_sse2.c @@ -377,7 +377,8 @@ static void nsc_encode_subsampling_sse2(NSC_CONTEXT* context) } } -static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanline) +static BOOL nsc_encode_sse2(NSC_CONTEXT* WINPR_RESTRICT context, const BYTE* WINPR_RESTRICT data, + UINT32 scanline) { if (!nsc_encode_argb_to_aycocg_sse2(context, data, scanline)) return FALSE; diff --git a/libfreerdp/codec/xcrush.h b/libfreerdp/codec/xcrush.h index 5997c21b4..d05a836b2 100644 --- a/libfreerdp/codec/xcrush.h +++ b/libfreerdp/codec/xcrush.h @@ -32,14 +32,18 @@ extern "C" { #endif - FREERDP_LOCAL int xcrush_compress(XCRUSH_CONTEXT* xcrush, const BYTE* pSrcData, UINT32 SrcSize, - BYTE* pDstBuffer, const BYTE** ppDstData, UINT32* pDstSize, - UINT32* pFlags); - FREERDP_LOCAL int xcrush_decompress(XCRUSH_CONTEXT* xcrush, const BYTE* pSrcData, - UINT32 SrcSize, const BYTE** ppDstData, UINT32* pDstSize, - UINT32 flags); + FREERDP_LOCAL int xcrush_compress(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush, + const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize, + BYTE* WINPR_RESTRICT pDstBuffer, + const BYTE** WINPR_RESTRICT ppDstData, + UINT32* WINPR_RESTRICT pDstSize, + UINT32* WINPR_RESTRICT pFlags); + FREERDP_LOCAL int xcrush_decompress(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush, + const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize, + const BYTE** WINPR_RESTRICT ppDstData, + UINT32* WINPR_RESTRICT pDstSize, UINT32 flags); - FREERDP_LOCAL void xcrush_context_reset(XCRUSH_CONTEXT* xcrush, BOOL flush); + FREERDP_LOCAL void xcrush_context_reset(XCRUSH_CONTEXT* WINPR_RESTRICT xcrush, BOOL flush); FREERDP_LOCAL XCRUSH_CONTEXT* xcrush_context_new(BOOL Compressor); FREERDP_LOCAL void xcrush_context_free(XCRUSH_CONTEXT* xcrush); diff --git a/libfreerdp/primitives/prim_YCoCg.c b/libfreerdp/primitives/prim_YCoCg.c index 5fabb7d0d..de1346c6b 100644 --- a/libfreerdp/primitives/prim_YCoCg.c +++ b/libfreerdp/primitives/prim_YCoCg.c @@ -34,9 +34,10 @@ static INT16 convert(UINT8 raw, int shift) } /* ------------------------------------------------------------------------- */ -static pstatus_t general_YCoCgToRGB_8u_AC4R(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, - UINT32 DstFormat, INT32 dstStep, UINT32 width, - UINT32 height, UINT8 shift, BOOL withAlpha) +static pstatus_t general_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, + BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, + INT32 dstStep, UINT32 width, UINT32 height, UINT8 shift, + BOOL withAlpha) { const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat); fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, TRUE); diff --git a/libfreerdp/primitives/prim_alphaComp.c b/libfreerdp/primitives/prim_alphaComp.c index e2aaaf5ef..544d8e862 100644 --- a/libfreerdp/primitives/prim_alphaComp.c +++ b/libfreerdp/primitives/prim_alphaComp.c @@ -30,8 +30,9 @@ #define ALPHA(_k_) (((_k_)&0xFF000000U) >> 24) /* ------------------------------------------------------------------------- */ -static pstatus_t general_alphaComp_argb(const BYTE* pSrc1, UINT32 src1Step, const BYTE* pSrc2, - UINT32 src2Step, BYTE* pDst, UINT32 dstStep, UINT32 width, +static pstatus_t general_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step, + const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step, + BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width, UINT32 height) { for (size_t y = 0; y < height; y++) diff --git a/libfreerdp/primitives/prim_andor.c b/libfreerdp/primitives/prim_andor.c index 993c2e2c4..23d1ca9ce 100644 --- a/libfreerdp/primitives/prim_andor.c +++ b/libfreerdp/primitives/prim_andor.c @@ -24,7 +24,8 @@ /* ---------------------------------------------------------------------------- * 32-bit AND with a constant. */ -static pstatus_t general_andC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len) +static pstatus_t general_andC_32u(const UINT32* WINPR_RESTRICT pSrc, UINT32 val, + UINT32* WINPR_RESTRICT pDst, INT32 len) { if (val == 0) return PRIMITIVES_SUCCESS; @@ -38,7 +39,8 @@ static pstatus_t general_andC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, /* ---------------------------------------------------------------------------- * 32-bit OR with a constant. */ -static pstatus_t general_orC_32u(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len) +static pstatus_t general_orC_32u(const UINT32* WINPR_RESTRICT pSrc, UINT32 val, + UINT32* WINPR_RESTRICT pDst, INT32 len) { if (val == 0) return PRIMITIVES_SUCCESS; diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c index d5a26b698..5089245a2 100644 --- a/libfreerdp/primitives/prim_copy.c +++ b/libfreerdp/primitives/prim_copy.c @@ -84,7 +84,8 @@ static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, co } /* ------------------------------------------------------------------------- */ -static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len) +static pstatus_t general_copy_8u(const BYTE* WINPR_RESTRICT pSrc, BYTE* WINPR_RESTRICT pDst, + INT32 len) { if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len)) { @@ -103,8 +104,9 @@ static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len) * The addresses are assumed to have been already offset to the upper-left * corners of the source and destination region of interest. */ -static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep, - INT32 width, INT32 height) +static pstatus_t general_copy_8u_AC4r(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, + BYTE* WINPR_RESTRICT pDst, INT32 dstStep, INT32 width, + INT32 height) { const BYTE* src = pSrc; BYTE* dst = pDst; @@ -420,7 +422,7 @@ void primitives_init_copy(primitives_t* WINPR_RESTRICT prims) prims->copy_no_overlap = generic_image_copy_no_overlap; } -void primitives_init_copy_opt(primitives_t* prims) +void primitives_init_copy_opt(primitives_t* WINPR_RESTRICT prims) { primitives_init_copy_sse41(prims); #if defined(WITH_AVX2) diff --git a/libfreerdp/primitives/prim_set.c b/libfreerdp/primitives/prim_set.c index 3fe5ce44d..d261f77bb 100644 --- a/libfreerdp/primitives/prim_set.c +++ b/libfreerdp/primitives/prim_set.c @@ -25,21 +25,21 @@ #include "prim_set.h" /* ========================================================================= */ -static pstatus_t general_set_8u(BYTE val, BYTE* pDst, UINT32 len) +static pstatus_t general_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len) { memset((void*)pDst, (int)val, (size_t)len); return PRIMITIVES_SUCCESS; } /* ------------------------------------------------------------------------- */ -static pstatus_t general_zero(void* pDst, size_t len) +static pstatus_t general_zero(void* WINPR_RESTRICT pDst, size_t len) { memset(pDst, 0, len); return PRIMITIVES_SUCCESS; } /* ========================================================================= */ -static pstatus_t general_set_32s(INT32 val, INT32* pDst, UINT32 len) +static pstatus_t general_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len) { INT32* dptr = pDst; size_t span = 0; @@ -78,7 +78,7 @@ static pstatus_t general_set_32s(INT32 val, INT32* pDst, UINT32 len) } /* ------------------------------------------------------------------------- */ -static pstatus_t general_set_32u(UINT32 val, UINT32* pDst, UINT32 len) +static pstatus_t general_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len) { UINT32* dptr = pDst; size_t span = 0; diff --git a/libfreerdp/primitives/sse/prim_sign_ssse3.c b/libfreerdp/primitives/sse/prim_sign_ssse3.c index 57646f290..8b761ce58 100644 --- a/libfreerdp/primitives/sse/prim_sign_ssse3.c +++ b/libfreerdp/primitives/sse/prim_sign_ssse3.c @@ -31,8 +31,7 @@ static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ -static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pDst, - UINT32 len) +static pstatus_t ssse3_sign_16s(const INT16* pSrc, INT16* pDst, UINT32 len) { const INT16* sptr = pSrc; INT16* dptr = pDst; diff --git a/libfreerdp/primitives/sse/prim_templates.h b/libfreerdp/primitives/sse/prim_templates.h index 9d2703dd3..a91f60bb3 100644 --- a/libfreerdp/primitives/sse/prim_templates.h +++ b/libfreerdp/primitives/sse/prim_templates.h @@ -134,7 +134,8 @@ * PRE = preload xmm0 with the constant. */ #define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ - static pstatus_t _name_(const _type_* pSrc, _type_ val, _type_* pDst, INT32 ilen) \ + static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, _type_ val, \ + _type_* WINPR_RESTRICT pDst, INT32 ilen) \ { \ size_t len = WINPR_ASSERTING_INT_CAST(size_t, ilen); \ int shifts = 0; \ @@ -232,78 +233,80 @@ /* ---------------------------------------------------------------------------- * SSD = Source1, Source2, Destination */ -#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ - static pstatus_t _name_(const _type_* pSrc1, const _type_* pSrc2, _type_* pDst, UINT32 len) \ - { \ - int shifts = 0; \ - const _type_* sptr1 = pSrc1; \ - const _type_* sptr2 = pSrc2; \ - _type_* dptr = pDst; \ - size_t count; \ - if (len < 16) /* pointless if too small */ \ - { \ - return _fallback_(pSrc1, pSrc2, pDst, len); \ - } \ - if (sizeof(_type_) == 1) \ - shifts = 1; \ - else if (sizeof(_type_) == 2) \ - shifts = 2; \ - else if (sizeof(_type_) == 4) \ - shifts = 3; \ - else if (sizeof(_type_) == 8) \ - shifts = 4; \ - /* Use 4 128-bit SSE registers. */ \ - count = len >> (7 - shifts); \ - len -= count << (7 - shifts); \ - /* Aligned loads */ \ - while (count--) \ - { \ - __m128i xmm0 = LOAD_SI128(sptr1); \ - sptr1 += (16 / sizeof(_type_)); \ - __m128i xmm1 = LOAD_SI128(sptr1); \ - sptr1 += (16 / sizeof(_type_)); \ - __m128i xmm2 = LOAD_SI128(sptr1); \ - sptr1 += (16 / sizeof(_type_)); \ - __m128i xmm3 = LOAD_SI128(sptr1); \ - sptr1 += (16 / sizeof(_type_)); \ - __m128i xmm4 = LOAD_SI128(sptr2); \ - sptr2 += (16 / sizeof(_type_)); \ - __m128i xmm5 = LOAD_SI128(sptr2); \ - sptr2 += (16 / sizeof(_type_)); \ - __m128i xmm6 = LOAD_SI128(sptr2); \ - sptr2 += (16 / sizeof(_type_)); \ - __m128i xmm7 = LOAD_SI128(sptr2); \ - sptr2 += (16 / sizeof(_type_)); \ - xmm0 = _op_(xmm0, xmm4); \ - xmm1 = _op_(xmm1, xmm5); \ - xmm2 = _op_(xmm2, xmm6); \ - xmm3 = _op_(xmm3, xmm7); \ - STORE_SI128(dptr, xmm0); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm1); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm2); \ - dptr += (16 / sizeof(_type_)); \ - STORE_SI128(dptr, xmm3); \ - dptr += (16 / sizeof(_type_)); \ - } \ - /* Use a single 128-bit SSE register. */ \ - count = len >> (5 - shifts); \ - len -= count << (5 - shifts); \ - while (count--) \ - { \ - __m128i xmm0 = LOAD_SI128(sptr1); \ - sptr1 += (16 / sizeof(_type_)); \ - __m128i xmm1 = LOAD_SI128(sptr2); \ - sptr2 += (16 / sizeof(_type_)); \ - xmm0 = _op_(xmm0, xmm1); \ - STORE_SI128(dptr, xmm0); \ - dptr += (16 / sizeof(_type_)); \ - } \ - /* Finish off the remainder. */ \ - while (len--) \ - { \ - _slowWay_; \ - } \ - return PRIMITIVES_SUCCESS; \ +#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \ + static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc1, \ + const _type_* WINPR_RESTRICT pSrc2, _type_* WINPR_RESTRICT pDst, \ + UINT32 len) \ + { \ + int shifts = 0; \ + const _type_* sptr1 = pSrc1; \ + const _type_* sptr2 = pSrc2; \ + _type_* dptr = pDst; \ + size_t count; \ + if (len < 16) /* pointless if too small */ \ + { \ + return _fallback_(pSrc1, pSrc2, pDst, len); \ + } \ + if (sizeof(_type_) == 1) \ + shifts = 1; \ + else if (sizeof(_type_) == 2) \ + shifts = 2; \ + else if (sizeof(_type_) == 4) \ + shifts = 3; \ + else if (sizeof(_type_) == 8) \ + shifts = 4; \ + /* Use 4 128-bit SSE registers. */ \ + count = len >> (7 - shifts); \ + len -= count << (7 - shifts); \ + /* Aligned loads */ \ + while (count--) \ + { \ + __m128i xmm0 = LOAD_SI128(sptr1); \ + sptr1 += (16 / sizeof(_type_)); \ + __m128i xmm1 = LOAD_SI128(sptr1); \ + sptr1 += (16 / sizeof(_type_)); \ + __m128i xmm2 = LOAD_SI128(sptr1); \ + sptr1 += (16 / sizeof(_type_)); \ + __m128i xmm3 = LOAD_SI128(sptr1); \ + sptr1 += (16 / sizeof(_type_)); \ + __m128i xmm4 = LOAD_SI128(sptr2); \ + sptr2 += (16 / sizeof(_type_)); \ + __m128i xmm5 = LOAD_SI128(sptr2); \ + sptr2 += (16 / sizeof(_type_)); \ + __m128i xmm6 = LOAD_SI128(sptr2); \ + sptr2 += (16 / sizeof(_type_)); \ + __m128i xmm7 = LOAD_SI128(sptr2); \ + sptr2 += (16 / sizeof(_type_)); \ + xmm0 = _op_(xmm0, xmm4); \ + xmm1 = _op_(xmm1, xmm5); \ + xmm2 = _op_(xmm2, xmm6); \ + xmm3 = _op_(xmm3, xmm7); \ + STORE_SI128(dptr, xmm0); \ + dptr += (16 / sizeof(_type_)); \ + STORE_SI128(dptr, xmm1); \ + dptr += (16 / sizeof(_type_)); \ + STORE_SI128(dptr, xmm2); \ + dptr += (16 / sizeof(_type_)); \ + STORE_SI128(dptr, xmm3); \ + dptr += (16 / sizeof(_type_)); \ + } \ + /* Use a single 128-bit SSE register. */ \ + count = len >> (5 - shifts); \ + len -= count << (5 - shifts); \ + while (count--) \ + { \ + __m128i xmm0 = LOAD_SI128(sptr1); \ + sptr1 += (16 / sizeof(_type_)); \ + __m128i xmm1 = LOAD_SI128(sptr2); \ + sptr2 += (16 / sizeof(_type_)); \ + xmm0 = _op_(xmm0, xmm1); \ + STORE_SI128(dptr, xmm0); \ + dptr += (16 / sizeof(_type_)); \ + } \ + /* Finish off the remainder. */ \ + while (len--) \ + { \ + _slowWay_; \ + } \ + return PRIMITIVES_SUCCESS; \ }