diff --git a/libfreerdp/codec/progressive.c b/libfreerdp/codec/progressive.c index 5f8598c55..2e86094ee 100644 --- a/libfreerdp/codec/progressive.c +++ b/libfreerdp/codec/progressive.c @@ -797,13 +797,14 @@ static INLINE size_t progressive_rfx_get_band_h_count(size_t level) return (64 + (1 << (level - 1))) >> level; } -static INLINE void progressive_rfx_dwt_2d_decode_block(INT16* buffer, INT16* temp, size_t level) +static INLINE void progressive_rfx_dwt_2d_decode_block(INT16* WINPR_RESTRICT buffer, + INT16* WINPR_RESTRICT temp, size_t level) { size_t nDstStepX; size_t nDstStepY; - INT16 *HL, *LH; - INT16 *HH, *LL; - INT16 *L, *H, *LLx; + INT16 *WINPR_RESTRICT HL, *WINPR_RESTRICT LH; + INT16 *WINPR_RESTRICT HH, *WINPR_RESTRICT LL; + INT16 *WINPR_RESTRICT L, *WINPR_RESTRICT H, *WINPR_RESTRICT LLx; const size_t nBandL = progressive_rfx_get_band_l_count(level); const size_t nBandH = progressive_rfx_get_band_h_count(level); @@ -835,7 +836,7 @@ static INLINE void progressive_rfx_dwt_2d_decode_block(INT16* buffer, INT16* tem nBandL + nBandH); } -void rfx_dwt_2d_extrapolate_decode(INT16* buffer, INT16* temp) +void rfx_dwt_2d_extrapolate_decode(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT temp) { WINPR_ASSERT(buffer); WINPR_ASSERT(temp); diff --git a/libfreerdp/codec/rfx_decode.c b/libfreerdp/codec/rfx_decode.c index c77e86dcb..a61a8ab5a 100644 --- a/libfreerdp/codec/rfx_decode.c +++ b/libfreerdp/codec/rfx_decode.c @@ -35,8 +35,10 @@ #include "rfx_decode.h" -void rfx_decode_component(RFX_CONTEXT* context, const UINT32* quantization_values, const BYTE* data, - int size, INT16* buffer) +void rfx_decode_component(RFX_CONTEXT* WINPR_RESTRICT context, + const UINT32* WINPR_RESTRICT quantization_values, + const BYTE* WINPR_RESTRICT data, size_t size, + INT16* WINPR_RESTRICT buffer) { INT16* dwt_buffer; dwt_buffer = BufferPool_Take(context->priv->BufferPool, -1); /* dwt_buffer */ diff --git a/libfreerdp/codec/rfx_decode.h b/libfreerdp/codec/rfx_decode.h index f973fd511..978b1929c 100644 --- a/libfreerdp/codec/rfx_decode.h +++ b/libfreerdp/codec/rfx_decode.h @@ -20,12 +20,17 @@ #ifndef FREERDP_LIB_CODEC_RFX_DECODE_H #define FREERDP_LIB_CODEC_RFX_DECODE_H +#include + #include #include /* stride is bytes between rows in the output buffer. */ -FREERDP_LOCAL BOOL rfx_decode_rgb(RFX_CONTEXT* context, const RFX_TILE* tile, BYTE* rgb_buffer, - UINT32 stride); -FREERDP_LOCAL void rfx_decode_component(RFX_CONTEXT* context, const UINT32* quantization_values, - const BYTE* data, int size, INT16* buffer); +FREERDP_LOCAL BOOL rfx_decode_rgb(RFX_CONTEXT* WINPR_RESTRICT context, + const RFX_TILE* WINPR_RESTRICT tile, + BYTE* WINPR_RESTRICT rgb_buffer, UINT32 stride); +FREERDP_LOCAL void rfx_decode_component(RFX_CONTEXT* WINPR_RESTRICT context, + const UINT32* WINPR_RESTRICT quantization_values, + const BYTE* WINPR_RESTRICT data, size_t size, + INT16* WINPR_RESTRICT buffer); #endif /* FREERDP_LIB_CODEC_RFX_DECODE_H */ diff --git a/libfreerdp/codec/rfx_dwt.c b/libfreerdp/codec/rfx_dwt.c index d27d9864e..0164cee24 100644 --- a/libfreerdp/codec/rfx_dwt.c +++ b/libfreerdp/codec/rfx_dwt.c @@ -25,11 +25,12 @@ #include "rfx_dwt.h" -static void rfx_dwt_2d_decode_block(INT16* buffer, INT16* idwt, int subband_width) +static void rfx_dwt_2d_decode_block(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT idwt, + int subband_width) { - INT16 *dst, *l, *h; - INT16 *l_dst, *h_dst; - INT16 *hl, *lh, *hh, *ll; + INT16 *WINPR_RESTRICT dst, *WINPR_RESTRICT l, *WINPR_RESTRICT h; + INT16 *WINPR_RESTRICT l_dst, *WINPR_RESTRICT h_dst; + INT16 *WINPR_RESTRICT hl, *WINPR_RESTRICT lh, *WINPR_RESTRICT hh, *WINPR_RESTRICT ll; int total_width; int x, y; int n; @@ -109,30 +110,31 @@ static void rfx_dwt_2d_decode_block(INT16* buffer, INT16* idwt, int subband_widt } } -void rfx_dwt_2d_decode(INT16* buffer, INT16* dwt_buffer) +void rfx_dwt_2d_decode(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(dwt_buffer); + rfx_dwt_2d_decode_block(&buffer[3840], dwt_buffer, 8); rfx_dwt_2d_decode_block(&buffer[3072], dwt_buffer, 16); rfx_dwt_2d_decode_block(&buffer[0], dwt_buffer, 32); } -static void rfx_dwt_2d_encode_block(INT16* buffer, INT16* dwt, int subband_width) +static void rfx_dwt_2d_encode_block(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt, + UINT32 subband_width) { INT16 *src, *l, *h; INT16 *l_src, *h_src; INT16 *hl, *lh, *hh, *ll; - int total_width; - int x, y; - int n; - total_width = subband_width << 1; + const UINT32 total_width = subband_width << 1; /* DWT in vertical direction, results in 2 sub-bands in L, H order in tmp buffer dwt. */ - for (x = 0; x < total_width; x++) + for (UINT32 x = 0; x < total_width; x++) { - for (n = 0; n < subband_width; n++) + for (UINT32 n = 0; n < subband_width; n++) { - y = n << 1; + UINT32 y = n << 1; l = dwt + n * total_width + x; h = l + subband_width * total_width; src = buffer + y * total_width + x; @@ -160,12 +162,12 @@ static void rfx_dwt_2d_encode_block(INT16* buffer, INT16* dwt, int subband_width hh = buffer + subband_width * subband_width * 2; h_src = dwt + subband_width * subband_width * 2; - for (y = 0; y < subband_width; y++) + for (UINT32 y = 0; y < subband_width; y++) { /* L */ - for (n = 0; n < subband_width; n++) + for (UINT32 n = 0; n < subband_width; n++) { - x = n << 1; + UINT32 x = n << 1; /* HL */ hl[n] = @@ -175,9 +177,9 @@ static void rfx_dwt_2d_encode_block(INT16* buffer, INT16* dwt, int subband_width } /* H */ - for (n = 0; n < subband_width; n++) + for (UINT32 n = 0; n < subband_width; n++) { - x = n << 1; + UINT32 x = n << 1; /* HH */ hh[n] = @@ -196,8 +198,11 @@ static void rfx_dwt_2d_encode_block(INT16* buffer, INT16* dwt, int subband_width } } -void rfx_dwt_2d_encode(INT16* buffer, INT16* dwt_buffer) +void rfx_dwt_2d_encode(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(dwt_buffer); + rfx_dwt_2d_encode_block(&buffer[0], dwt_buffer, 32); rfx_dwt_2d_encode_block(&buffer[3072], dwt_buffer, 16); rfx_dwt_2d_encode_block(&buffer[3840], dwt_buffer, 8); diff --git a/libfreerdp/codec/rfx_dwt.h b/libfreerdp/codec/rfx_dwt.h index cca23347c..445b1a1be 100644 --- a/libfreerdp/codec/rfx_dwt.h +++ b/libfreerdp/codec/rfx_dwt.h @@ -20,11 +20,15 @@ #ifndef FREERDP_LIB_CODEC_RFX_DWT_H #define FREERDP_LIB_CODEC_RFX_DWT_H +#include #include #include -FREERDP_LOCAL void rfx_dwt_2d_decode(INT16* buffer, INT16* dwt_buffer); -FREERDP_LOCAL void rfx_dwt_2d_encode(INT16* buffer, INT16* dwt_buffer); -FREERDP_LOCAL void rfx_dwt_2d_extrapolate_decode(INT16* buffer, INT16* dwt_buffer); +FREERDP_LOCAL void rfx_dwt_2d_decode(INT16* WINPR_RESTRICT buffer, + INT16* WINPR_RESTRICT dwt_buffer); +FREERDP_LOCAL void rfx_dwt_2d_encode(INT16* WINPR_RESTRICT buffer, + INT16* WINPR_RESTRICT dwt_buffer); +FREERDP_LOCAL void rfx_dwt_2d_extrapolate_decode(INT16* WINPR_RESTRICT buffer, + INT16* WINPR_RESTRICT dwt_buffer); #endif /* FREERDP_LIB_CODEC_RFX_DWT_H */ diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/rfx_neon.c index 451de022b..7f04beb49 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/rfx_neon.c @@ -33,7 +33,8 @@ /* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */ static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_quantization_decode_block_NEON(INT16* buffer, const int buffer_size, const UINT32 factor) +rfx_quantization_decode_block_NEON(INT16* WINPR_RESTRICT buffer, const int buffer_size, + const UINT32 factor) { int16x8_t quantFactors = vdupq_n_s16(factor); int16x8_t* buf = (int16x8_t*)buffer; @@ -48,8 +49,12 @@ rfx_quantization_decode_block_NEON(INT16* buffer, const int buffer_size, const U } while (buf < buf_end); } -static void rfx_quantization_decode_NEON(INT16* buffer, const UINT32* quantVals) +static void rfx_quantization_decode_NEON(INT16* WINPR_RESTRICT buffer, + const UINT32* WINPR_RESTRICT quantVals) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(quantVals); + rfx_quantization_decode_block_NEON(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */ rfx_quantization_decode_block_NEON(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ rfx_quantization_decode_block_NEON(&buffer[2048], 1024, quantVals[9] - 1); /* HH1 */ @@ -63,7 +68,8 @@ static void rfx_quantization_decode_NEON(INT16* buffer, const UINT32* quantVals) } static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_dwt_2d_decode_block_horiz_NEON(INT16* l, INT16* h, INT16* dst, int subband_width) +rfx_dwt_2d_decode_block_horiz_NEON(INT16* WINPR_RESTRICT l, INT16* WINPR_RESTRICT h, + INT16* WINPR_RESTRICT dst, int subband_width) { int y, n; INT16* l_ptr = l; @@ -126,7 +132,8 @@ rfx_dwt_2d_decode_block_horiz_NEON(INT16* l, INT16* h, INT16* dst, int subband_w } static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_dwt_2d_decode_block_vert_NEON(INT16* l, INT16* h, INT16* dst, int subband_width) +rfx_dwt_2d_decode_block_vert_NEON(INT16* WINPR_RESTRICT l, INT16* WINPR_RESTRICT h, + INT16* WINPR_RESTRICT dst, int subband_width) { int x, n; INT16* l_ptr = l; @@ -197,7 +204,8 @@ rfx_dwt_2d_decode_block_vert_NEON(INT16* l, INT16* h, INT16* dst, int subband_wi } static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_dwt_2d_decode_block_NEON(INT16* buffer, INT16* idwt, int subband_width) +rfx_dwt_2d_decode_block_NEON(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT idwt, + int subband_width) { INT16 *hl, *lh, *hh, *ll; INT16 *l_dst, *h_dst; @@ -218,7 +226,7 @@ rfx_dwt_2d_decode_block_NEON(INT16* buffer, INT16* idwt, int subband_width) rfx_dwt_2d_decode_block_vert_NEON(l_dst, h_dst, buffer, subband_width); } -static void rfx_dwt_2d_decode_NEON(INT16* buffer, INT16* dwt_buffer) +static void rfx_dwt_2d_decode_NEON(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer) { rfx_dwt_2d_decode_block_NEON(buffer + 3840, dwt_buffer, 8); rfx_dwt_2d_decode_block_NEON(buffer + 3072, dwt_buffer, 16); @@ -462,7 +470,8 @@ static INLINE size_t prfx_get_band_h_count(size_t level) return (64 + (1 << (level - 1))) >> level; } -static INLINE void rfx_dwt_2d_decode_extrapolate_block_neon(INT16* buffer, INT16* temp, +static INLINE void rfx_dwt_2d_decode_extrapolate_block_neon(INT16* WINPR_RESTRICT buffer, + INT16* WINPR_RESTRICT temp, size_t level) { size_t nDstStepX; @@ -504,7 +513,8 @@ static INLINE void rfx_dwt_2d_decode_extrapolate_block_neon(INT16* buffer, INT16 nBandL + nBandH); } -static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp) +static void rfx_dwt_2d_extrapolate_decode_neon(INT16* WINPR_RESTRICT buffer, + INT16* WINPR_RESTRICT temp) { WINPR_ASSERT(buffer); WINPR_ASSERT(temp); diff --git a/libfreerdp/codec/rfx_quantization.c b/libfreerdp/codec/rfx_quantization.c index 69fa67444..be75d3f45 100644 --- a/libfreerdp/codec/rfx_quantization.c +++ b/libfreerdp/codec/rfx_quantization.c @@ -41,7 +41,8 @@ * LL3 4032 8x8 64 */ -static void rfx_quantization_decode_block(const primitives_t* prims, INT16* buffer, int buffer_size, +static void rfx_quantization_decode_block(const primitives_t* WINPR_RESTRICT prims, + INT16* WINPR_RESTRICT buffer, UINT32 buffer_size, UINT32 factor) { if (factor == 0) @@ -50,9 +51,11 @@ static void rfx_quantization_decode_block(const primitives_t* prims, INT16* buff prims->lShiftC_16s(buffer, factor, buffer, buffer_size); } -void rfx_quantization_decode(INT16* buffer, const UINT32* quantVals) +void rfx_quantization_decode(INT16* WINPR_RESTRICT buffer, const UINT32* WINPR_RESTRICT quantVals) { const primitives_t* prims = primitives_get(); + WINPR_ASSERT(buffer); + WINPR_ASSERT(quantVals); rfx_quantization_decode_block(prims, &buffer[0], 1024, quantVals[8] - 1); /* HL1 */ rfx_quantization_decode_block(prims, &buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ @@ -66,7 +69,8 @@ void rfx_quantization_decode(INT16* buffer, const UINT32* quantVals) rfx_quantization_decode_block(prims, &buffer[4032], 64, quantVals[0] - 1); /* LL3 */ } -static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 factor) +static void rfx_quantization_encode_block(INT16* WINPR_RESTRICT buffer, size_t buffer_size, + UINT32 factor) { INT16* dst; INT16 half; @@ -82,8 +86,12 @@ static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 } } -void rfx_quantization_encode(INT16* buffer, const UINT32* quantization_values) +void rfx_quantization_encode(INT16* WINPR_RESTRICT buffer, + const UINT32* WINPR_RESTRICT quantization_values) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(quantization_values); + rfx_quantization_encode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */ rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ diff --git a/libfreerdp/codec/rfx_rlgr.c b/libfreerdp/codec/rfx_rlgr.c index 133287256..be75a5720 100644 --- a/libfreerdp/codec/rfx_rlgr.c +++ b/libfreerdp/codec/rfx_rlgr.c @@ -135,8 +135,8 @@ static INLINE UINT32 lzcnt_s(UINT32 x) return __lzcnt(x); } -int rfx_rlgr_decode(RLGR_MODE mode, const BYTE* pSrcData, UINT32 SrcSize, INT16* pDstData, - UINT32 rDstSize) +int rfx_rlgr_decode(RLGR_MODE mode, const BYTE* WINPR_RESTRICT pSrcData, UINT32 SrcSize, + INT16* WINPR_RESTRICT pDstData, UINT32 rDstSize) { int vk = 0; size_t run = 0; @@ -629,8 +629,8 @@ static void rfx_rlgr_code_gr(RFX_BITSTREAM* bs, int* krp, UINT32 val) } } -int rfx_rlgr_encode(RLGR_MODE mode, const INT16* data, UINT32 data_size, BYTE* buffer, - UINT32 buffer_size) +int rfx_rlgr_encode(RLGR_MODE mode, const INT16* WINPR_RESTRICT data, UINT32 data_size, + BYTE* WINPR_RESTRICT buffer, UINT32 buffer_size) { int k; int kp; diff --git a/libfreerdp/codec/rfx_sse2.c b/libfreerdp/codec/rfx_sse2.c index 694daed48..ac6d1b37b 100644 --- a/libfreerdp/codec/rfx_sse2.c +++ b/libfreerdp/codec/rfx_sse2.c @@ -82,8 +82,12 @@ rfx_quantization_decode_block_sse2(INT16* buffer, const int buffer_size, const U } while (ptr < buf_end); } -static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantVals) +static void rfx_quantization_decode_sse2(INT16* WINPR_RESTRICT buffer, + const UINT32* WINPR_RESTRICT quantVals) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(quantVals); + _mm_prefetch_buffer((char*)buffer, 4096 * sizeof(INT16)); rfx_quantization_decode_block_sse2(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */ rfx_quantization_decode_block_sse2(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ @@ -120,8 +124,12 @@ rfx_quantization_encode_block_sse2(INT16* buffer, const int buffer_size, const U } while (ptr < buf_end); } -static void rfx_quantization_encode_sse2(INT16* buffer, const UINT32* quantization_values) +static void rfx_quantization_encode_sse2(INT16* WINPR_RESTRICT buffer, + const UINT32* WINPR_RESTRICT quantization_values) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(quantization_values); + _mm_prefetch_buffer((char*)buffer, 4096 * sizeof(INT16)); rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */ rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ @@ -312,8 +320,11 @@ rfx_dwt_2d_decode_block_sse2(INT16* buffer, INT16* idwt, int subband_width) rfx_dwt_2d_decode_block_vert_sse2(l_dst, h_dst, buffer, subband_width); } -static void rfx_dwt_2d_decode_sse2(INT16* buffer, INT16* dwt_buffer) +static void rfx_dwt_2d_decode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(dwt_buffer); + _mm_prefetch_buffer((char*)buffer, 4096 * sizeof(INT16)); rfx_dwt_2d_decode_block_sse2(&buffer[3840], dwt_buffer, 8); rfx_dwt_2d_decode_block_sse2(&buffer[3072], dwt_buffer, 16); @@ -445,8 +456,11 @@ rfx_dwt_2d_encode_block_sse2(INT16* buffer, INT16* dwt, int subband_width) rfx_dwt_2d_encode_block_horiz_sse2(h_src, lh, hh, subband_width); } -static void rfx_dwt_2d_encode_sse2(INT16* buffer, INT16* dwt_buffer) +static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer) { + WINPR_ASSERT(buffer); + WINPR_ASSERT(dwt_buffer); + _mm_prefetch_buffer((char*)buffer, 4096 * sizeof(INT16)); rfx_dwt_2d_encode_block_sse2(buffer, dwt_buffer, 32); rfx_dwt_2d_encode_block_sse2(buffer + 3072, dwt_buffer, 16); diff --git a/libfreerdp/codec/rfx_types.h b/libfreerdp/codec/rfx_types.h index 707efbe67..a9cd314da 100644 --- a/libfreerdp/codec/rfx_types.h +++ b/libfreerdp/codec/rfx_types.h @@ -165,15 +165,17 @@ struct S_RFX_CONTEXT struct S_RFX_MESSAGE currentMessage; /* routines */ - void (*quantization_decode)(INT16* buffer, const UINT32* quantization_values); - void (*quantization_encode)(INT16* buffer, const UINT32* quantization_values); - void (*dwt_2d_decode)(INT16* buffer, INT16* dwt_buffer); - void (*dwt_2d_extrapolate_decode)(INT16* src, INT16* temp); - void (*dwt_2d_encode)(INT16* buffer, INT16* dwt_buffer); - int (*rlgr_decode)(RLGR_MODE mode, const BYTE* data, UINT32 data_size, INT16* buffer, - UINT32 buffer_size); - int (*rlgr_encode)(RLGR_MODE mode, const INT16* data, UINT32 data_size, BYTE* buffer, - UINT32 buffer_size); + void (*quantization_decode)(INT16* WINPR_RESTRICT buffer, + const UINT32* WINPR_RESTRICT quantization_values); + void (*quantization_encode)(INT16* WINPR_RESTRICT buffer, + const UINT32* WINPR_RESTRICT quantization_values); + void (*dwt_2d_decode)(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer); + void (*dwt_2d_extrapolate_decode)(INT16* WINPR_RESTRICT src, INT16* WINPR_RESTRICT temp); + void (*dwt_2d_encode)(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RESTRICT dwt_buffer); + int (*rlgr_decode)(RLGR_MODE mode, const BYTE* WINPR_RESTRICT data, UINT32 data_size, + INT16* WINPR_RESTRICT buffer, UINT32 buffer_size); + int (*rlgr_encode)(RLGR_MODE mode, const INT16* WINPR_RESTRICT data, UINT32 data_size, + BYTE* WINPR_RESTRICT buffer, UINT32 buffer_size); /* private definitions */ RFX_CONTEXT_PRIV* priv;