primitives: updated tests, cleanup and build fixes

This commit is contained in:
Bernhard Miklautz
2013-02-28 10:49:39 +01:00
parent 589d32dc56
commit 081be8b01f
11 changed files with 118 additions and 26 deletions

View File

@@ -31,11 +31,28 @@ set(PRIMITIVE_TEST_CFILES
test_set.c
test_shift.c
test_sign.c
../prim_add.c
../prim_andor.c
../prim_alphaComp.c
../prim_colors.c
../prim_copy.c
../prim_set.c
../prim_shift.c
../prim_sign.c
../prim_add_opt.c
../prim_alphaComp_opt.c
../prim_andor_opt.c
../prim_colors_opt.c
../prim_set_opt.c
../prim_shift_opt.c
../prim_sign_opt.c
../primitives.c
)
set(PRIMITIVE_TEST_HEADERS
measure.h
prim_test.h
../prim_internal.h
)
set(PRIMITIVE_TEST_SRCS
@@ -121,7 +138,7 @@ endif()
set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo)
target_link_libraries(prim_test rt winpr-sysinfo)
if(NOT TESTING_OUTPUT_DIRECTORY)
set(TESTING_OUTPUT_DIRECTORY .)
endif()

View File

@@ -44,8 +44,8 @@ typedef struct
} flagpair_t;
static const flagpair_t flags[] =
#ifdef _M_IX86_AMD64
{
#ifdef _M_IX86_AMD64
{ PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" },
{ PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" },
{ PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" },

View File

@@ -101,7 +101,7 @@ extern int test_or_32u_speed(void);
/* Since so much of this code is repeated, define a macro to build
* functions to do speed tests.
*/
#ifdef armel
#ifdef _M_ARM
#define SIMD_TYPE "Neon"
#else
#define SIMD_TYPE "SSE"
@@ -122,7 +122,7 @@ extern int test_or_32u_speed(void);
} \
} while (0)
#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON))
#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(_M_ARM) && defined(WITH_NEON))
#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
@@ -160,6 +160,7 @@ extern int test_or_32u_speed(void);
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_)
#endif
#define PRIM_NOP do {} while (0)
/* ------------------------------------------------------------------------- */
#define STD_SPEED_TEST( \
_name_, _srctype_, _dsttype_, _prework_, \

View File

@@ -34,7 +34,9 @@ int test_add16s_func(void)
INT16 ALIGN(src1[FUNC_TEST_SIZE+3]), ALIGN(src2[FUNC_TEST_SIZE+3]),
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]);
int failed = 0;
#if defined(WITH_SSE2) || defined(WITH_IPP)
int i;
#endif
char testStr[256];
testStr[0] = '\0';
@@ -43,7 +45,7 @@ int test_add16s_func(void)
memset(d1, 0, sizeof(d1));
memset(d2, 0, sizeof(d2));
general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
@@ -70,7 +72,7 @@ int test_add16s_func(void)
}
}
}
#endif /* i386 */
#endif
#ifdef WITH_IPP
strcat(testStr, " IPP");
ippsAdd_16s(src1+1, src2+1, d2+1, FUNC_TEST_SIZE);
@@ -91,7 +93,11 @@ int test_add16s_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_add_16s(src1, src2, dst, size),
#ifdef WITH_SSE2
TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsAdd_16s(src1, src2, dst, size));
int test_add16s_speed(void)

View File

@@ -132,7 +132,7 @@ int test_alphaComp_func(void)
general_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
@@ -165,7 +165,7 @@ int test_alphaComp_func(void)
x, y, s1, s2, c0, c1);
error = 1;
}
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
@@ -206,8 +206,13 @@ int test_alphaComp_func(void)
STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4,
TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size),
#ifdef WITH_SSE2
TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size));

View File

@@ -55,7 +55,7 @@ int test_and_32u_func(void)
++failed;
}
}
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
@@ -92,7 +92,11 @@ int test_and_32u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_andC_32u(src1, constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsAndC_32u(src1, constant, dst, size))
int test_and_32u_speed(void)
@@ -127,7 +131,7 @@ int test_or_32u_func(void)
++failed;
}
}
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
@@ -164,7 +168,11 @@ int test_or_32u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_orC_32u(src1, constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsOrC_32u(src1, constant, dst, size))
int test_or_32u_speed(void)

View File

@@ -31,12 +31,17 @@ extern pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
extern pstatus_t sse2_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
extern pstatus_t neon_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
/* ------------------------------------------------------------------------- */
int test_RGBToRGB_16s8u_P3AC4R_func(void)
{
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
UINT32 ALIGN(out1[4096]);
#ifdef WITH_SSE2
UINT32 ALIGN(out2[4096]);
#endif
int i;
int failed = 0;
char testStr[256];
@@ -61,7 +66,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(BYTE *) out1, 64*4, &roi);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
@@ -88,9 +93,13 @@ STD_SPEED_TEST(
rgb_to_argb_speed, INT16*, UINT32, dst=dst,
TRUE, general_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
#ifdef WITH_SSE2
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
FALSE, dst=dst);
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
@@ -165,7 +174,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
out2[2] = b2;
general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
@@ -190,8 +199,15 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
STD_SPEED_TEST(
ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
#ifdef WITH_SSE2
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#elif defined(WITH_NEON)
TRUE, neon_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
FALSE, dst=dst);
int test_yCbCrToRGB_16s16s_P3P3_speed(void)

View File

@@ -71,7 +71,7 @@ int test_copy8u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memcpy(dst, src1, size),
FALSE, NULL, 0, FALSE,
FALSE, PRIM_NOP, 0, FALSE,
TRUE, ippsCopy_8u(src1, dst, size));
int test_copy8u_speed(void)

View File

@@ -37,13 +37,15 @@ static const int set_sizes[] = { 1, 4, 16, 32, 64, 256, 1024, 4096 };
/* ------------------------------------------------------------------------- */
int test_set8u_func(void)
{
#if defined(WITH_SSE2) || defined(WITH_IPP)
BYTE ALIGN(dest[48]);
int failed = 0;
int off;
#endif
int failed = 0;
char testStr[256];
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
/* Test SSE under various alignments */
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
@@ -101,7 +103,7 @@ int test_set8u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memset(dst, constant, size),
FALSE, NULL, 0, FALSE,
FALSE, PRIM_NOP, 0, FALSE,
TRUE, ippsSet_8u(constant, dst, size));
int test_set8u_speed(void)
@@ -115,13 +117,15 @@ int test_set8u_speed(void)
/* ------------------------------------------------------------------------- */
int test_set32s_func(void)
{
#if defined(WITH_SSE2) || defined(WITH_IPP)
INT32 ALIGN(dest[512]);
int failed = 0;
int off;
#endif
int failed = 0;
char testStr[256];
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
/* Test SSE under various alignments */
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
@@ -176,13 +180,15 @@ int test_set32s_func(void)
/* ------------------------------------------------------------------------- */
int test_set32u_func(void)
{
#if defined(WITH_SSE2) || defined(WITH_IPP)
UINT32 ALIGN(dest[512]);
int failed = 0;
int off;
#endif
int failed = 0;
char testStr[256];
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
/* Test SSE under various alignments */
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
@@ -246,7 +252,11 @@ static inline void memset32u_naive(
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, memset32u_naive(constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ipp_wrapper_set_32u(constant, dst, size));
int test_set32u_speed(void)
@@ -274,7 +284,11 @@ static inline void memset32s_naive(
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
TRUE, memset32s_naive(constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, NULL, 0, FALSE,
#endif
TRUE, ippsSet_32s(constant, dst, size));
int test_set32s_speed(void)

View File

@@ -48,7 +48,7 @@ extern pstatus_t sse2_rShiftC_16u(
extern pstatus_t sse2_shiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
int _name_(void) \
{ \
@@ -109,19 +109,35 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
/* ========================================================================= */
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
TRUE, general_lShiftC_16s(src1, constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_lShiftC_16u(src1, constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
TRUE, general_rShiftC_16s(src1, constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_rShiftC_16u(src1, constant, dst, size),
#ifdef WITH_SSE2
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
/* ------------------------------------------------------------------------- */

View File

@@ -23,21 +23,26 @@ static const int SIGN_PRETEST_ITERATIONS = 100000;
static const float TEST_TIME = 1.0;
extern pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, int len);
#ifdef WITH_SSE2
extern pstatus_t ssse3_sign_16s(const INT16 *pSrc, INT16 *pDst, int len);
#endif
/* ------------------------------------------------------------------------- */
int test_sign16s_func(void)
{
INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
int failed = 0;
INT16 ALIGN(src[65535]), ALIGN(d1[65535]);
#ifdef WITH_SSE2
INT16 ALIGN(d2[65535]);
int i;
#endif
int failed = 0;
char testStr[256];
/* Test when we can reach 16-byte alignment */
testStr[0] = '\0';
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+1, 65535);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
strcat(testStr, " SSSE3");
@@ -57,7 +62,7 @@ int test_sign16s_func(void)
/* Test when we cannot reach 16-byte alignment */
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+2, 65535);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
ssse3_sign_16s(src+1, d2+2, 65535);
@@ -79,7 +84,11 @@ int test_sign16s_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_sign_16s(src1, dst, size),
#ifdef WITH_SSE2
TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
FALSE, dst=dst);
int test_sign16s_speed(void)