mirror of
https://github.com/morgan9e/FreeRDP
synced 2026-04-15 00:44:19 +09:00
opencl: inline the openCL program in the source code
This commit is contained in:
@@ -144,7 +144,6 @@ typedef pstatus_t (*__andC_32u_t)(const UINT32* pSrc, UINT32 val, UINT32* pDst,
|
||||
typedef pstatus_t (*__orC_32u_t)(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len);
|
||||
typedef pstatus_t (*primitives_uninit_t)(void);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* Memory-to-memory copy routines */
|
||||
@@ -208,11 +207,10 @@ extern "C"
|
||||
FREERDP_API void primitives_set_hints(primitive_hints hints);
|
||||
FREERDP_API primitive_hints primitives_get_hints(void);
|
||||
FREERDP_API primitives_t* primitives_get_generic(void);
|
||||
FREERDP_API DWORD primitives_flags(primitives_t *p);
|
||||
FREERDP_API BOOL primitives_init(primitives_t *p, primitive_hints hints);
|
||||
FREERDP_API DWORD primitives_flags(primitives_t* p);
|
||||
FREERDP_API BOOL primitives_init(primitives_t* p, primitive_hints hints);
|
||||
FREERDP_API void primitives_uninit();
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -293,13 +293,11 @@ if (WITH_NEON)
|
||||
endif()
|
||||
|
||||
if (WITH_OPENCL)
|
||||
freerdp_definition_add(-DOPENCL_SOURCE_PATH="${CMAKE_INSTALL_PREFIX}/${FREERDP_PLUGIN_PATH}")
|
||||
set(PRIMITIVES_OPENCL_SRCS primitives/prim_YUV_opencl.c)
|
||||
|
||||
freerdp_include_directory_add(${OpenCL_INCLUDE_DIRS})
|
||||
freerdp_library_add(OpenCL::OpenCL)
|
||||
|
||||
install(FILES primitives/primitives.cl DESTINATION ${FREERDP_PLUGIN_PATH})
|
||||
endif()
|
||||
|
||||
set(PRIMITIVES_OPT_SRCS
|
||||
|
||||
@@ -567,8 +567,8 @@ static void dump_window_state_order(wLog* log, const char* msg, const WINDOW_ORD
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_TITLE)
|
||||
DUMP_APPEND(buffer, bufferSize, " title");
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_CLIENT_AREA_OFFSET)
|
||||
DUMP_APPEND(buffer, bufferSize, " clientOffset=(%"PRId32",%"PRId32")",
|
||||
state->clientOffsetX, state->clientOffsetY);
|
||||
DUMP_APPEND(buffer, bufferSize, " clientOffset=(%" PRId32 ",%" PRId32 ")",
|
||||
state->clientOffsetX, state->clientOffsetY);
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_CLIENT_AREA_SIZE)
|
||||
DUMP_APPEND(buffer, bufferSize, " clientAreaWidth=%" PRIu32 " clientAreaHeight=%" PRIu32 "",
|
||||
state->clientAreaWidth, state->clientAreaHeight);
|
||||
@@ -585,10 +585,11 @@ static void dump_window_state_order(wLog* log, const char* msg, const WINDOW_ORD
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_ROOT_PARENT)
|
||||
DUMP_APPEND(buffer, bufferSize, " rootParent=0x%" PRIx32 "", state->rootParentHandle);
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_WND_OFFSET)
|
||||
DUMP_APPEND(buffer, bufferSize, " windowOffset=(%"PRId32",%"PRId32")", state->windowOffsetX, state->windowOffsetY);
|
||||
DUMP_APPEND(buffer, bufferSize, " windowOffset=(%" PRId32 ",%" PRId32 ")",
|
||||
state->windowOffsetX, state->windowOffsetY);
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_WND_CLIENT_DELTA)
|
||||
DUMP_APPEND(buffer, bufferSize, " windowClientDelta=(%"PRId32",%"PRId32")",
|
||||
state->windowClientDeltaX, state->windowClientDeltaY);
|
||||
DUMP_APPEND(buffer, bufferSize, " windowClientDelta=(%" PRId32 ",%" PRId32 ")",
|
||||
state->windowClientDeltaX, state->windowClientDeltaY);
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_WND_SIZE)
|
||||
DUMP_APPEND(buffer, bufferSize, " windowWidth=%" PRIu32 " windowHeight=%" PRIu32 "",
|
||||
state->windowWidth, state->windowHeight);
|
||||
@@ -607,8 +608,8 @@ static void dump_window_state_order(wLog* log, const char* msg, const WINDOW_ORD
|
||||
}
|
||||
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_VIS_OFFSET)
|
||||
DUMP_APPEND(buffer, bufferSize, " visibleOffset=(%"PRId32",%"PRId32")", state->visibleOffsetX,
|
||||
state->visibleOffsetY);
|
||||
DUMP_APPEND(buffer, bufferSize, " visibleOffset=(%" PRId32 ",%" PRId32 ")",
|
||||
state->visibleOffsetX, state->visibleOffsetY);
|
||||
|
||||
if (order->fieldFlags & WINDOW_ORDER_FIELD_VISIBILITY)
|
||||
{
|
||||
|
||||
@@ -54,12 +54,13 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
|
||||
{
|
||||
cl_int ret;
|
||||
int i;
|
||||
cl_mem objs[3] = {NULL, NULL, NULL};
|
||||
cl_mem objs[3] = { NULL, NULL, NULL };
|
||||
cl_mem destObj;
|
||||
cl_kernel kernel;
|
||||
cl_event events[3];
|
||||
size_t indexes[2];
|
||||
const char *sourceNames[] = {"Y", "U", "V"};
|
||||
primitives_opencl_context *cl = primitives_get_opencl_context();
|
||||
const char* sourceNames[] = { "Y", "U", "V" };
|
||||
primitives_opencl_context* cl = primitives_get_opencl_context();
|
||||
|
||||
kernel = clCreateKernel(cl->program, kernelName, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
@@ -70,15 +71,16 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
objs[i] = clCreateBuffer(cl->context, CL_MEM_READ_ONLY, srcStep[i] * roi->height, NULL, &ret);
|
||||
objs[i] =
|
||||
clCreateBuffer(cl->context, CL_MEM_READ_ONLY, srcStep[i] * roi->height, NULL, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
|
||||
goto error_objs;
|
||||
}
|
||||
|
||||
ret = clEnqueueWriteBuffer(cl->commandQueue, objs[i], CL_TRUE, 0, srcStep[i] * roi->height,
|
||||
pSrc[i], 0, NULL, NULL);
|
||||
ret = clEnqueueWriteBuffer(cl->commandQueue, objs[i], CL_FALSE, 0, srcStep[i] * roi->height,
|
||||
pSrc[i], 0, NULL, &events[i]);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to enqueue write command for %sobj", sourceNames[i]);
|
||||
@@ -96,14 +98,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
|
||||
/* push source + stride arguments*/
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), (void *)&objs[i]);
|
||||
ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), (void*)&objs[i]);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
|
||||
goto error_set_args;
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), (void *)&srcStep[i]);
|
||||
ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), (void*)&srcStep[i]);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
|
||||
@@ -111,14 +113,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
|
||||
}
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&destObj);
|
||||
ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&destObj);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg destObj");
|
||||
goto error_set_args;
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&dstStep);
|
||||
ret = clSetKernelArg(kernel, 7, sizeof(cl_int), (void*)&dstStep);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg dstStep");
|
||||
@@ -127,8 +129,7 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
|
||||
|
||||
indexes[0] = roi->width;
|
||||
indexes[1] = roi->height;
|
||||
ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL,
|
||||
0, NULL, NULL);
|
||||
ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL, 3, events, NULL);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to enqueue call kernel");
|
||||
@@ -136,7 +137,8 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
|
||||
}
|
||||
|
||||
/* Transfer result to host */
|
||||
ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0, NULL, NULL);
|
||||
ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0,
|
||||
NULL, NULL);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to read back buffer");
|
||||
@@ -184,18 +186,19 @@ pstatus_t primitives_uninit_opencl(void)
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static const char* openclProgram =
|
||||
#include "primitives.cl"
|
||||
;
|
||||
|
||||
BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
|
||||
{
|
||||
cl_platform_id* platform_ids = NULL;
|
||||
cl_uint ndevices, nplatforms, i;
|
||||
cl_kernel kernel;
|
||||
cl_int ret;
|
||||
char sourcePath[1000];
|
||||
|
||||
BOOL gotGPU = FALSE;
|
||||
FILE* f;
|
||||
size_t programLen;
|
||||
char* programSource;
|
||||
|
||||
ret = clGetPlatformIDs(0, NULL, &nplatforms);
|
||||
if (ret != CL_SUCCESS || nplatforms < 1)
|
||||
@@ -270,45 +273,14 @@ BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
snprintf(sourcePath, sizeof(sourcePath), "%s/primitives.cl", OPENCL_SOURCE_PATH);
|
||||
|
||||
f = fopen(sourcePath, "r");
|
||||
if (!f)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to open source file %s", sourcePath);
|
||||
goto error_source_file;
|
||||
}
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
programLen = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
programSource = malloc(programLen);
|
||||
if (!programSource)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to allocate memory(%d bytes) for source file %s", programLen,
|
||||
sourcePath);
|
||||
fclose(f);
|
||||
goto error_source_file;
|
||||
}
|
||||
|
||||
if (fread(programSource, programLen, 1, f) <= 0)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to read openCL program in %s", sourcePath);
|
||||
free(programSource);
|
||||
fclose(f);
|
||||
goto error_source_file;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
programLen = strlen(openclProgram);
|
||||
cl->program =
|
||||
clCreateProgramWithSource(cl->context, 1, (const char**)&programSource, &programLen, &ret);
|
||||
clCreateProgramWithSource(cl->context, 1, (const char**)&openclProgram, &programLen, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to create command queue");
|
||||
WLog_ERR(TAG, "openCL: unable to create program");
|
||||
goto out_program_create;
|
||||
}
|
||||
free(programSource);
|
||||
|
||||
ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL);
|
||||
if (ret != CL_SUCCESS)
|
||||
@@ -343,7 +315,6 @@ BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
|
||||
|
||||
out_program_build:
|
||||
clReleaseProgram(cl->program);
|
||||
error_source_file:
|
||||
out_program_create:
|
||||
clReleaseCommandQueue(cl->commandQueue);
|
||||
clReleaseContext(cl->context);
|
||||
@@ -363,11 +334,12 @@ BOOL primitives_init_opencl(primitives_t* prims)
|
||||
}
|
||||
|
||||
static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3],
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi)
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
{
|
||||
const char *kernel_name;
|
||||
const char* kernel_name;
|
||||
|
||||
switch(DstFormat)
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
@@ -394,7 +366,4 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32
|
||||
void primitives_init_YUV_opencl(primitives_t* prims)
|
||||
{
|
||||
prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1448,7 +1448,6 @@ static pstatus_t ssse3_YUV420CombineToYUV444(avc444_frame_type type, const BYTE*
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void primitives_init_YUV_opt(primitives_t* prims)
|
||||
{
|
||||
generic = primitives_get_generic();
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#define HAVE_CPU_OPTIMIZED_PRIMITIVES 1
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(WITH_SSE2)
|
||||
/* Use lddqu for unaligned; load for 16-byte aligned. */
|
||||
#define LOAD_SI128(_ptr_) \
|
||||
|
||||
@@ -46,7 +46,6 @@ primitive_hints primitives_get_hints(void)
|
||||
return primitivesHints;
|
||||
}
|
||||
|
||||
|
||||
/* Singleton pointer used throughout the program when requested. */
|
||||
static primitives_t pPrimitivesGeneric = { 0 };
|
||||
static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
||||
@@ -62,14 +61,12 @@ static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_OPTIMIZED_PRIMITIVES)
|
||||
static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
||||
#endif
|
||||
|
||||
static primitives_t pPrimitives = { 0 };
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static BOOL primitives_init_generic(primitives_t *prims)
|
||||
static BOOL primitives_init_generic(primitives_t* prims)
|
||||
{
|
||||
primitives_init_add(prims);
|
||||
primitives_init_andor(prims);
|
||||
@@ -93,7 +90,7 @@ static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PV
|
||||
return primitives_init_generic(&pPrimitivesGeneric);
|
||||
}
|
||||
|
||||
static BOOL primitives_init_optimized(primitives_t *prims)
|
||||
static BOOL primitives_init_optimized(primitives_t* prims)
|
||||
{
|
||||
primitives_init_generic(prims);
|
||||
|
||||
@@ -113,11 +110,12 @@ static BOOL primitives_init_optimized(primitives_t *prims)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
BYTE *channels[3];
|
||||
typedef struct
|
||||
{
|
||||
BYTE* channels[3];
|
||||
UINT32 steps[3];
|
||||
prim_size_t roi;
|
||||
BYTE *outputBuffer;
|
||||
BYTE* outputBuffer;
|
||||
UINT32 outputStride;
|
||||
UINT32 testedFormat;
|
||||
} primitives_YUV_benchmark;
|
||||
@@ -155,7 +153,7 @@ static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_be
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
BYTE *buf = ret->channels[i] = malloc(roi->width * roi->height);
|
||||
BYTE* buf = ret->channels[i] = malloc(roi->width * roi->height);
|
||||
if (!buf)
|
||||
goto fail;
|
||||
|
||||
@@ -170,11 +168,11 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark *bench, primitives_t *prims,
|
||||
UINT64 runTime, UINT32 *computations)
|
||||
static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims,
|
||||
UINT64 runTime, UINT32* computations)
|
||||
{
|
||||
ULONGLONG dueDate = GetTickCount64() + runTime;
|
||||
const BYTE *channels[3];
|
||||
ULONGLONG dueDate;
|
||||
const BYTE* channels[3];
|
||||
int i;
|
||||
|
||||
*computations = 0;
|
||||
@@ -182,10 +180,20 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark *bench, primit
|
||||
for (i = 0; i < 3; i++)
|
||||
channels[i] = bench->channels[i];
|
||||
|
||||
/* do a first dry run to initialize cache and such */
|
||||
pstatus_t status =
|
||||
prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
|
||||
bench->outputStride, bench->testedFormat, &bench->roi);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* let's run the benchmark */
|
||||
dueDate = GetTickCount64() + runTime;
|
||||
while (GetTickCount64() < dueDate)
|
||||
{
|
||||
pstatus_t status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
|
||||
bench->outputStride, bench->testedFormat, &bench->roi);
|
||||
pstatus_t status =
|
||||
prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
|
||||
bench->outputStride, bench->testedFormat, &bench->roi);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
*computations = *computations + 1;
|
||||
@@ -193,10 +201,10 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark *bench, primit
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL primitives_autodetect_best(primitives_t *prims)
|
||||
static BOOL primitives_autodetect_best(primitives_t* prims)
|
||||
{
|
||||
BOOL ret = FALSE;
|
||||
UINT64 benchDuration = 150; // 100 ms
|
||||
UINT64 benchDuration = 150; /* 150 ms */
|
||||
UINT32 genericCount = 0;
|
||||
UINT32 bestCount;
|
||||
primitives_t* genericPrims = primitives_get_generic();
|
||||
@@ -258,25 +266,14 @@ static BOOL primitives_autodetect_best(primitives_t *prims)
|
||||
}
|
||||
#endif
|
||||
|
||||
WLog_DBG(TAG,
|
||||
"benchmark result: generic=%" PRIu32
|
||||
WLog_DBG(TAG, "primitives benchmark result:");
|
||||
WLog_DBG(TAG, " * generic=%" PRIu32, genericCount);
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
" optimized=%" PRIu32
|
||||
WLog_DBG(TAG, " * optimized=%" PRIu32, optimizedCount);
|
||||
#endif
|
||||
#if defined(WITH_OPENCL)
|
||||
" openCL=%" PRIu32
|
||||
WLog_DBG(TAG, " * openCL=%" PRIu32, openclCount);
|
||||
#endif
|
||||
,
|
||||
genericCount
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
,
|
||||
optimizedCount
|
||||
#endif
|
||||
#if defined(WITH_OPENCL)
|
||||
,
|
||||
openclCount
|
||||
#endif
|
||||
);
|
||||
WLog_INFO(TAG, "primitives autodetect, using %s", primName);
|
||||
ret = TRUE;
|
||||
out:
|
||||
@@ -291,6 +288,9 @@ static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID*
|
||||
WINPR_UNUSED(param);
|
||||
WINPR_UNUSED(context);
|
||||
|
||||
if (!primitives_init_optimized(&pPrimitivesGpu))
|
||||
return FALSE;
|
||||
|
||||
if (!primitives_init_opencl(&pPrimitivesGpu))
|
||||
return FALSE;
|
||||
|
||||
@@ -305,10 +305,8 @@ static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID*
|
||||
WINPR_UNUSED(param);
|
||||
WINPR_UNUSED(context);
|
||||
|
||||
if (!primitives_init_optimized(&pPrimitivesCpu))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
return primitives_init_optimized(&pPrimitivesCpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -323,22 +321,26 @@ static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID
|
||||
|
||||
BOOL primitives_init(primitives_t* p, primitive_hints hints)
|
||||
{
|
||||
switch(hints)
|
||||
switch (hints)
|
||||
{
|
||||
case PRIMITIVES_AUTODETECT:
|
||||
return primitives_autodetect_best(p);
|
||||
case PRIMITIVES_PURE_SOFT:
|
||||
*p = pPrimitivesGeneric;
|
||||
return TRUE;
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
case PRIMITIVES_ONLY_CPU:
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
*p = pPrimitivesCpu;
|
||||
return TRUE;
|
||||
#else
|
||||
*p = pPrimitivesGeneric;
|
||||
#endif
|
||||
#if defined(WITH_OPENCL)
|
||||
return TRUE;
|
||||
case PRIMITIVES_ONLY_GPU:
|
||||
#if defined(WITH_OPENCL)
|
||||
*p = pPrimitivesGpu;
|
||||
return TRUE;
|
||||
#else
|
||||
return FALSE;
|
||||
#endif
|
||||
default:
|
||||
WLog_ERR(TAG, "unknown hint %d", hints);
|
||||
@@ -367,7 +369,7 @@ static void setup(void)
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
|
||||
#endif
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
#if defined(WITH_OPENCL)
|
||||
InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL);
|
||||
#endif
|
||||
InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL);
|
||||
@@ -388,18 +390,23 @@ primitives_t* primitives_get_generic(void)
|
||||
primitives_t* primitives_get_by_type(DWORD type)
|
||||
{
|
||||
InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
#if defined(WITH_OPENCL)
|
||||
case PRIMITIVES_ONLY_GPU:
|
||||
InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
|
||||
return &pPrimitivesGpu;
|
||||
#if defined(WITH_OPENCL)
|
||||
if (InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
|
||||
return &pPrimitivesGpu;
|
||||
#endif
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
return NULL;
|
||||
|
||||
case PRIMITIVES_ONLY_CPU:
|
||||
InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
|
||||
return &pPrimitivesCpu;
|
||||
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
||||
if (InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
|
||||
return &pPrimitivesCpu;
|
||||
#endif
|
||||
return NULL;
|
||||
|
||||
case PRIMITIVES_PURE_SOFT:
|
||||
default:
|
||||
return &pPrimitivesGeneric;
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
/**
|
||||
* FreeRDP: A Remote Desktop Protocol Implementation
|
||||
* Optimized operations using openCL
|
||||
* vi:ts=4 sw=4
|
||||
*
|
||||
* Copyright 2019 David Fort <contact@hardening-consulting.com>
|
||||
* Copyright 2019 Rangee Gmbh
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License. You may obtain
|
||||
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*/
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
|
||||
STRINGIFY(
|
||||
unsigned char clamp_uc(int v, int l, int h)
|
||||
{
|
||||
if (v > h)
|
||||
@@ -79,4 +81,4 @@ __kernel void yuv420_to_bgra_1b(
|
||||
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
}
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user