[primitives] fix warnings and restricted use

This commit is contained in:
akallabeth
2025-03-10 21:47:44 +01:00
parent a59a3e25a2
commit 2b53807256
5 changed files with 126 additions and 109 deletions

View File

@@ -140,13 +140,20 @@ typedef pstatus_t (*__copy_no_overlap_t)(BYTE* WINPR_RESTRICT pDstData, DWORD Ds
UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
UINT32 flags);
typedef pstatus_t (*__lShiftC_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst, UINT32 val, UINT32 len);
typedef pstatus_t (*__lShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__rShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__rShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__shiftC_16s_t)(const INT16* pSrc, INT32 val, INT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__shiftC_16u_t)(const UINT16* pSrc, INT32 val, UINT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__sign_16s_t)(const INT16* pSrc, INT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__lShiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, UINT32 val,
INT16* WINPR_RESTRICT pSrcDst, UINT32 len);
typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, UINT32 val,
UINT16* WINPR_RESTRICT pSrcDst, UINT32 len);
typedef pstatus_t (*__rShiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, UINT32 val,
INT16* WINPR_RESTRICT pSrcDst, UINT32 len);
typedef pstatus_t (*__rShiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, UINT32 val,
UINT16* WINPR_RESTRICT pSrcDst, UINT32 len);
typedef pstatus_t (*__shiftC_16s_t)(const INT16* WINPR_RESTRICT pSrc, INT32 val,
INT16* WINPR_RESTRICT pSrcDst, UINT32 len);
typedef pstatus_t (*__shiftC_16u_t)(const UINT16* WINPR_RESTRICT pSrc, INT32 val,
UINT16* WINPR_RESTRICT pSrcDst, UINT32 len);
typedef pstatus_t (*__sign_16s_t)(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pSrcDst,
UINT32 len);
typedef pstatus_t (*__yCbCrToRGB_16s8u_P3AC4R_t)(const INT16* WINPR_RESTRICT pSrc[3],
UINT32 srcStep, BYTE* WINPR_RESTRICT pDst,
UINT32 dstStep, UINT32 DstFormat,

View File

@@ -44,7 +44,8 @@ static INLINE pstatus_t general_lShiftC_16s_inplace(INT16* WINPR_RESTRICT pSrcDs
return PRIMITIVES_SUCCESS;
}
static INLINE pstatus_t general_lShiftC_16s(const INT16* pSrc, UINT32 val, INT16* pDst, UINT32 len)
static INLINE pstatus_t general_lShiftC_16s(const INT16* WINPR_RESTRICT pSrc, UINT32 val,
INT16* WINPR_RESTRICT pDst, UINT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
@@ -58,7 +59,8 @@ static INLINE pstatus_t general_lShiftC_16s(const INT16* pSrc, UINT32 val, INT16
}
/* ------------------------------------------------------------------------- */
static INLINE pstatus_t general_rShiftC_16s(const INT16* pSrc, UINT32 val, INT16* pDst, UINT32 len)
static INLINE pstatus_t general_rShiftC_16s(const INT16* WINPR_RESTRICT pSrc, UINT32 val,
INT16* WINPR_RESTRICT pDst, UINT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
@@ -72,8 +74,8 @@ static INLINE pstatus_t general_rShiftC_16s(const INT16* pSrc, UINT32 val, INT16
}
/* ------------------------------------------------------------------------- */
static INLINE pstatus_t general_lShiftC_16u(const UINT16* pSrc, UINT32 val, UINT16* pDst,
UINT32 len)
static INLINE pstatus_t general_lShiftC_16u(const UINT16* WINPR_RESTRICT pSrc, UINT32 val,
UINT16* WINPR_RESTRICT pDst, UINT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
@@ -87,8 +89,8 @@ static INLINE pstatus_t general_lShiftC_16u(const UINT16* pSrc, UINT32 val, UINT
}
/* ------------------------------------------------------------------------- */
static INLINE pstatus_t general_rShiftC_16u(const UINT16* pSrc, UINT32 val, UINT16* pDst,
UINT32 len)
static INLINE pstatus_t general_rShiftC_16u(const UINT16* WINPR_RESTRICT pSrc, UINT32 val,
UINT16* WINPR_RESTRICT pDst, UINT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
@@ -102,7 +104,8 @@ static INLINE pstatus_t general_rShiftC_16u(const UINT16* pSrc, UINT32 val, UINT
}
/* ------------------------------------------------------------------------- */
static INLINE pstatus_t general_shiftC_16s(const INT16* pSrc, INT32 val, INT16* pDst, UINT32 len)
static INLINE pstatus_t general_shiftC_16s(const INT16* WINPR_RESTRICT pSrc, INT32 val,
INT16* WINPR_RESTRICT pDst, UINT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;
@@ -114,7 +117,8 @@ static INLINE pstatus_t general_shiftC_16s(const INT16* pSrc, INT32 val, INT16*
}
/* ------------------------------------------------------------------------- */
static INLINE pstatus_t general_shiftC_16u(const UINT16* pSrc, INT32 val, UINT16* pDst, UINT32 len)
static INLINE pstatus_t general_shiftC_16u(const UINT16* WINPR_RESTRICT pSrc, INT32 val,
UINT16* WINPR_RESTRICT pDst, UINT32 len)
{
if (val == 0)
return PRIMITIVES_SUCCESS;

View File

@@ -31,15 +31,16 @@
static primitives_t* generic = NULL;
static pstatus_t sse2_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len)
static pstatus_t sse2_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 ulen)
{
size_t len = ulen;
BYTE byte = 0;
BYTE* dptr = NULL;
__m128i xmm0;
size_t count = 0;
if (len < 16)
return generic->set_8u(val, pDst, len);
return generic->set_8u(val, pDst, ulen);
byte = val;
dptr = pDst;
@@ -114,8 +115,9 @@ static pstatus_t sse2_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len)
}
/* ------------------------------------------------------------------------- */
static pstatus_t sse2_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len)
static pstatus_t sse2_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 ulen)
{
size_t len = ulen;
const primitives_t* prim = primitives_get_generic();
UINT32* dptr = pDst;
__m128i xmm0;
@@ -133,7 +135,7 @@ static pstatus_t sse2_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 le
/* Assure we can reach 16-byte alignment. */
if (((ULONG_PTR)dptr & 0x03) != 0)
{
return prim->set_32u(val, pDst, len);
return prim->set_32u(val, pDst, ulen);
}
/* Seek 16-byte alignment. */

View File

@@ -31,21 +31,23 @@
static primitives_t* generic = NULL;
/* ------------------------------------------------------------------------- */
static pstatus_t ssse3_sign_16s(const INT16* pSrc, INT16* pDst, UINT32 len)
static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pDst,
UINT32 ulen)
{
size_t len = ulen;
const INT16* sptr = pSrc;
INT16* dptr = pDst;
size_t count = 0;
if (len < 16)
{
return generic->sign_16s(pSrc, pDst, len);
return generic->sign_16s(pSrc, pDst, ulen);
}
/* Check for 16-byte alignment (eventually). */
if ((ULONG_PTR)pDst & 0x01)
{
return generic->sign_16s(pSrc, pDst, len);
return generic->sign_16s(pSrc, pDst, ulen);
}
/* Seek 16-byte alignment. */

View File

@@ -40,93 +40,95 @@
/* ----------------------------------------------------------------------------
* SCD = Source, Constant, Destination
*/
#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _op_type_, _slowWay_) \
static pstatus_t _name_(const _type_* pSrc, UINT32 val, _type_* pDst, UINT32 len) \
{ \
INT32 shifts = 0; \
const _type_* sptr = pSrc; \
_type_* dptr = pDst; \
if (val == 0) \
return PRIMITIVES_SUCCESS; \
if (val >= 16) \
return -1; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc, val, pDst, len); \
} \
if (sizeof(_type_) == 1) \
shifts = 1; \
else if (sizeof(_type_) == 2) \
shifts = 2; \
else if (sizeof(_type_) == 4) \
shifts = 3; \
else if (sizeof(_type_) == 8) \
shifts = 4; \
/* Use 8 128-bit SSE registers. */ \
size_t count = len >> (8 - shifts); \
len -= count << (8 - shifts); \
\
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm1 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm2 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm3 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm4 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm5 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm6 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm7 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, (_op_type_)val); \
xmm1 = _op_(xmm1, (_op_type_)val); \
xmm2 = _op_(xmm2, (_op_type_)val); \
xmm3 = _op_(xmm3, (_op_type_)val); \
xmm4 = _op_(xmm4, (_op_type_)val); \
xmm5 = _op_(xmm5, (_op_type_)val); \
xmm6 = _op_(xmm6, (_op_type_)val); \
xmm7 = _op_(xmm7, (_op_type_)val); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm1); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm2); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm3); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm4); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm5); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm6); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm7); \
dptr += (16 / sizeof(_type_)); \
} \
\
/* Use a single 128-bit SSE register. */ \
count = len >> (5 - shifts); \
len -= count << (5 - shifts); \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, (_op_type_)val); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) \
{ \
_slowWay_; \
} \
return PRIMITIVES_SUCCESS; \
#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _op_type_, _slowWay_) \
static pstatus_t _name_(const _type_* WINPR_RESTRICT pSrc, UINT32 val, \
_type_* WINPR_RESTRICT pDst, UINT32 ulen) \
{ \
size_t len = ulen; \
INT32 shifts = 0; \
const _type_* sptr = pSrc; \
_type_* dptr = pDst; \
if (val == 0) \
return PRIMITIVES_SUCCESS; \
if (val >= 16) \
return -1; \
if (len < 16) /* pointless if too small */ \
{ \
return _fallback_(pSrc, val, pDst, ulen); \
} \
if (sizeof(_type_) == 1) \
shifts = 1; \
else if (sizeof(_type_) == 2) \
shifts = 2; \
else if (sizeof(_type_) == 4) \
shifts = 3; \
else if (sizeof(_type_) == 8) \
shifts = 4; \
/* Use 8 128-bit SSE registers. */ \
size_t count = len >> (8 - shifts); \
len -= count << (8 - shifts); \
\
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm1 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm2 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm3 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm4 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm5 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm6 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
__m128i xmm7 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, (_op_type_)val); \
xmm1 = _op_(xmm1, (_op_type_)val); \
xmm2 = _op_(xmm2, (_op_type_)val); \
xmm3 = _op_(xmm3, (_op_type_)val); \
xmm4 = _op_(xmm4, (_op_type_)val); \
xmm5 = _op_(xmm5, (_op_type_)val); \
xmm6 = _op_(xmm6, (_op_type_)val); \
xmm7 = _op_(xmm7, (_op_type_)val); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm1); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm2); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm3); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm4); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm5); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm6); \
dptr += (16 / sizeof(_type_)); \
STORE_SI128(dptr, xmm7); \
dptr += (16 / sizeof(_type_)); \
} \
\
/* Use a single 128-bit SSE register. */ \
count = len >> (5 - shifts); \
len -= count << (5 - shifts); \
while (count--) \
{ \
__m128i xmm0 = LOAD_SI128(sptr); \
sptr += (16 / sizeof(_type_)); \
xmm0 = _op_(xmm0, (_op_type_)val); \
STORE_SI128(dptr, xmm0); \
dptr += (16 / sizeof(_type_)); \
} \
/* Finish off the remainder. */ \
while (len--) \
{ \
_slowWay_; \
} \
return PRIMITIVES_SUCCESS; \
}
/* ----------------------------------------------------------------------------