mirror of
https://github.com/morgan9e/FreeRDP
synced 2026-04-15 00:44:19 +09:00
[primitives,opencl] fix alpha handling
This commit is contained in:
@@ -45,114 +45,191 @@ typedef struct
|
||||
cl_program program;
|
||||
} primitives_opencl_context;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
primitives_opencl_context* cl;
|
||||
cl_kernel kernel;
|
||||
cl_mem srcObjs[3];
|
||||
cl_mem dstObj;
|
||||
prim_size_t roi;
|
||||
size_t dstStep;
|
||||
} primitives_cl_kernel;
|
||||
|
||||
static primitives_opencl_context* primitives_get_opencl_context(void);
|
||||
|
||||
static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const pSrc[3],
|
||||
const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep,
|
||||
const prim_size_t* roi)
|
||||
static void cl_kernel_free(primitives_cl_kernel* kernel)
|
||||
{
|
||||
cl_int ret = 0;
|
||||
cl_mem objs[3] = { NULL, NULL, NULL };
|
||||
cl_mem destObj = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
size_t indexes[2] = { 0 };
|
||||
const char* sourceNames[] = { "Y", "U", "V" };
|
||||
primitives_opencl_context* cl = primitives_get_opencl_context();
|
||||
if (!kernel)
|
||||
return;
|
||||
|
||||
kernel = clCreateKernel(cl->program, kernelName, &ret);
|
||||
if (kernel->dstObj)
|
||||
clReleaseMemObject(kernel->dstObj);
|
||||
|
||||
for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
|
||||
{
|
||||
cl_mem obj = kernel->srcObjs[i];
|
||||
kernel->srcObjs[i] = NULL;
|
||||
if (obj)
|
||||
clReleaseMemObject(obj);
|
||||
}
|
||||
|
||||
if (kernel->kernel)
|
||||
clReleaseKernel(kernel->kernel);
|
||||
|
||||
free(kernel);
|
||||
}
|
||||
|
||||
static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi)
|
||||
{
|
||||
WINPR_ASSERT(kernelName);
|
||||
WINPR_ASSERT(roi);
|
||||
|
||||
primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel));
|
||||
if (!kernel)
|
||||
goto fail;
|
||||
|
||||
kernel->roi = *roi;
|
||||
kernel->cl = primitives_get_opencl_context();
|
||||
if (!kernel->cl)
|
||||
goto fail;
|
||||
|
||||
cl_int ret = CL_INVALID_VALUE;
|
||||
kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName);
|
||||
return -1;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (cl_uint i = 0; i < 3; i++)
|
||||
return kernel;
|
||||
fail:
|
||||
cl_kernel_free(kernel);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx,
|
||||
const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3])
|
||||
{
|
||||
const char* sourceNames[] = { "Y", "U", "V" };
|
||||
|
||||
WINPR_ASSERT(ctx);
|
||||
WINPR_ASSERT(pSrc);
|
||||
WINPR_ASSERT(srcStep);
|
||||
|
||||
for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
|
||||
{
|
||||
objs[i] = clCreateBuffer(cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
srcStep[i] * roi->height, (char*)pSrc[i], &ret);
|
||||
cl_int ret = CL_INVALID_VALUE;
|
||||
ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
1ull * srcStep[i] * ctx->roi.height, pSrc[i], &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
|
||||
goto error_objs;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
destObj = clCreateBuffer(cl->context, CL_MEM_WRITE_ONLY, dstStep * roi->height, NULL, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to create dest obj");
|
||||
goto error_objs;
|
||||
}
|
||||
|
||||
/* push source + stride arguments*/
|
||||
for (cl_uint i = 0; i < 3; i++)
|
||||
{
|
||||
ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), &objs[i]);
|
||||
ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), &ctx->srcObjs[i]);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
|
||||
goto error_set_args;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), &srcStep[i]);
|
||||
ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
|
||||
goto error_set_args;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), &destObj);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
|
||||
{
|
||||
|
||||
WINPR_ASSERT(ctx);
|
||||
|
||||
ctx->dstStep = dstStep;
|
||||
cl_int ret = CL_INVALID_VALUE;
|
||||
ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
|
||||
1ull * dstStep * ctx->roi.height, NULL, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to create dest obj");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->dstObj);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg destObj");
|
||||
goto error_set_args;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ret = clSetKernelArg(kernel, 7, sizeof(cl_int), &dstStep);
|
||||
ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to set arg dstStep");
|
||||
goto error_set_args;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
indexes[0] = roi->width;
|
||||
indexes[1] = roi->height;
|
||||
ret = clEnqueueNDRangeKernel(cl->commandQueue, kernel, 2, NULL, indexes, NULL, 0, NULL, NULL);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
|
||||
{
|
||||
WINPR_ASSERT(ctx);
|
||||
WINPR_ASSERT(pDst);
|
||||
|
||||
size_t indexes[2] = { 0 };
|
||||
indexes[0] = ctx->roi.width;
|
||||
indexes[1] = ctx->roi.height;
|
||||
|
||||
cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
|
||||
0, NULL, NULL);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to enqueue call kernel");
|
||||
goto error_set_args;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Transfer result to host */
|
||||
ret = clEnqueueReadBuffer(cl->commandQueue, destObj, CL_TRUE, 0, roi->height * dstStep, pDst, 0,
|
||||
NULL, NULL);
|
||||
ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
|
||||
ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "unable to read back buffer");
|
||||
goto error_set_args;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* cleanup things */
|
||||
clReleaseMemObject(destObj);
|
||||
for (cl_uint i = 0; i < 3; i++)
|
||||
if (objs[i])
|
||||
clReleaseMemObject(objs[i]);
|
||||
clReleaseKernel(kernel);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* const WINPR_RESTRICT pSrc[3],
|
||||
const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
|
||||
const prim_size_t* WINPR_RESTRICT roi)
|
||||
{
|
||||
pstatus_t res = -1;
|
||||
|
||||
error_set_args:
|
||||
clReleaseMemObject(destObj);
|
||||
error_objs:
|
||||
for (cl_uint i = 0; i < 3; i++)
|
||||
{
|
||||
if (objs[i])
|
||||
clReleaseMemObject(objs[i]);
|
||||
}
|
||||
clReleaseKernel(kernel);
|
||||
return -1;
|
||||
primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
|
||||
if (!ctx)
|
||||
goto fail;
|
||||
|
||||
if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
|
||||
goto fail;
|
||||
|
||||
if (!cl_kernel_set_destination(ctx, dstStep))
|
||||
goto fail;
|
||||
|
||||
if (!cl_kernel_process(ctx, pDst))
|
||||
goto fail;
|
||||
|
||||
res = PRIMITIVES_SUCCESS;
|
||||
|
||||
fail:
|
||||
cl_kernel_free(ctx);
|
||||
return res;
|
||||
}
|
||||
|
||||
static primitives_opencl_context openclContext = { 0 };
|
||||
@@ -162,20 +239,27 @@ static primitives_opencl_context* primitives_get_opencl_context(void)
|
||||
return &openclContext;
|
||||
}
|
||||
|
||||
static void cl_context_free(primitives_opencl_context* ctx)
|
||||
{
|
||||
if (!ctx)
|
||||
return;
|
||||
clReleaseProgram(ctx->program);
|
||||
clReleaseCommandQueue(ctx->commandQueue);
|
||||
clReleaseContext(ctx->context);
|
||||
clReleaseDevice(ctx->deviceId);
|
||||
ctx->support = FALSE;
|
||||
}
|
||||
|
||||
static pstatus_t primitives_uninit_opencl(void)
|
||||
{
|
||||
if (!openclContext.support)
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
||||
clReleaseProgram(openclContext.program);
|
||||
clReleaseCommandQueue(openclContext.commandQueue);
|
||||
clReleaseContext(openclContext.context);
|
||||
clReleaseDevice(openclContext.deviceId);
|
||||
openclContext.support = FALSE;
|
||||
cl_context_free(&openclContext);
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static const char* openclProgram =
|
||||
static const char openclProgram[] =
|
||||
#include "primitives.cl"
|
||||
;
|
||||
|
||||
@@ -209,8 +293,8 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
|
||||
{
|
||||
cl_device_id device_id = NULL;
|
||||
cl_context context = NULL;
|
||||
char platformName[1000];
|
||||
char deviceName[1000];
|
||||
char platformName[1000] = { 0 };
|
||||
char deviceName[1000] = { 0 };
|
||||
|
||||
ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName),
|
||||
platformName, NULL);
|
||||
@@ -263,13 +347,13 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
programLen = strlen(openclProgram);
|
||||
cl->program =
|
||||
clCreateProgramWithSource(cl->context, 1, (const char**)&openclProgram, &programLen, &ret);
|
||||
programLen = strnlen(openclProgram, sizeof(openclProgram));
|
||||
const char* ptr = openclProgram;
|
||||
cl->program = clCreateProgramWithSource(cl->context, 1, &ptr, &programLen, &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to create program");
|
||||
goto out_program_create;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = clBuildProgram(cl->program, 1, &cl->deviceId, NULL, NULL, NULL);
|
||||
@@ -289,42 +373,55 @@ static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer);
|
||||
}
|
||||
goto out_program_build;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
kernel = clCreateKernel(cl->program, "yuv420_to_bgra_1b", &ret);
|
||||
if (ret != CL_SUCCESS)
|
||||
{
|
||||
WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel");
|
||||
goto out_program_build;
|
||||
goto fail;
|
||||
}
|
||||
clReleaseKernel(kernel);
|
||||
|
||||
cl->support = TRUE;
|
||||
return TRUE;
|
||||
|
||||
out_program_build:
|
||||
clReleaseProgram(cl->program);
|
||||
out_program_create:
|
||||
clReleaseCommandQueue(cl->commandQueue);
|
||||
clReleaseContext(cl->context);
|
||||
clReleaseDevice(cl->deviceId);
|
||||
fail:
|
||||
cl_context_free(cl);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
|
||||
const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
|
||||
UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* WINPR_RESTRICT roi)
|
||||
{
|
||||
const char* kernel_name = NULL;
|
||||
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_ABGR32:
|
||||
kernel_name = "yuv420_to_abgr_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_XBGR32:
|
||||
kernel_name = "yuv420_to_xbgr_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
kernel_name = "yuv420_to_rgba_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
kernel_name = "yuv420_to_rgbx_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
kernel_name = "yuv420_to_bgra_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
kernel_name = "yuv420_to_bgrx_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_XRGB32:
|
||||
kernel_name = "yuv420_to_xrgb_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_ARGB32:
|
||||
kernel_name = "yuv420_to_argb_1b";
|
||||
break;
|
||||
@@ -340,19 +437,36 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const U
|
||||
return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
|
||||
}
|
||||
|
||||
static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const pSrc[3], const UINT32 srcStep[3],
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3],
|
||||
const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
|
||||
UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* WINPR_RESTRICT roi)
|
||||
{
|
||||
const char* kernel_name = NULL;
|
||||
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_ABGR32:
|
||||
kernel_name = "yuv444_to_abgr_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_XBGR32:
|
||||
kernel_name = "yuv444_to_xbgr_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
kernel_name = "yuv444_to_rgba_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
kernel_name = "yuv444_to_rgbx_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
kernel_name = "yuv444_to_bgra_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
kernel_name = "yuv444_to_bgrx_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_XRGB32:
|
||||
kernel_name = "yuv444_to_xrgb_1b";
|
||||
break;
|
||||
case PIXEL_FORMAT_ARGB32:
|
||||
kernel_name = "yuv444_to_argb_1b";
|
||||
break;
|
||||
|
||||
@@ -28,11 +28,11 @@ uchar clamp_uc(int v, short l, short h)
|
||||
return (uchar)v;
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_argb_1b(
|
||||
__global const uchar *bufY, int strideY,
|
||||
__global const uchar *bufU, int strideU,
|
||||
__global const uchar *bufV, int strideV,
|
||||
__global uchar *dest, int strideDest)
|
||||
__kernel void yuv420_to_rgba_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
@@ -49,17 +49,234 @@ __kernel void yuv420_to_argb_1b(
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = 0xff; /* A */
|
||||
destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
|
||||
destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
|
||||
/* A */
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_abgr_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
|
||||
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
/* A */
|
||||
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_abgr_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[y * strideU + x] - 128;
|
||||
short V = bufV[y * strideV + x] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
/* A */
|
||||
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_rgba_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[y * strideU + x] - 128;
|
||||
short V = bufV[y * strideV + x] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
/* A */
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_rgbx_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
|
||||
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
|
||||
destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_xbgr_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
|
||||
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = 0xff; /* A */
|
||||
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_xbgr_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[y * strideU + x] - 128;
|
||||
short V = bufV[y * strideV + x] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = 0xff; /* A */
|
||||
destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_rgbx_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[y * strideU + x] - 128;
|
||||
short V = bufV[y * strideV + x] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
}
|
||||
|
||||
|
||||
__kernel void yuv420_to_argb_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
|
||||
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
/* A */
|
||||
destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
|
||||
destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
|
||||
destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_bgra_1b(
|
||||
__global const uchar *bufY, int strideY,
|
||||
__global const uchar *bufU, int strideU,
|
||||
__global const uchar *bufV, int strideV,
|
||||
__global uchar *dest, int strideDest)
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
@@ -79,14 +296,14 @@ __kernel void yuv420_to_bgra_1b(
|
||||
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
/* A */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_bgra_1b(
|
||||
__global const uchar *bufY, int strideY,
|
||||
__global const uchar *bufU, int strideU,
|
||||
__global const uchar *bufV, int strideV,
|
||||
__global uchar *dest, int strideDest)
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
@@ -106,14 +323,14 @@ __kernel void yuv444_to_bgra_1b(
|
||||
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
/* A */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_argb_1b(
|
||||
__global const uchar *bufY, int strideY,
|
||||
__global const uchar *bufU, int strideU,
|
||||
__global const uchar *bufV, int strideV,
|
||||
__global uchar *dest, int strideDest)
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
@@ -133,6 +350,114 @@ __kernel void yuv444_to_argb_1b(
|
||||
destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[0] = 0xff; /* A */
|
||||
/* A */
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_xrgb_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
|
||||
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = 0xff; /* A */
|
||||
destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */
|
||||
destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */
|
||||
destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */
|
||||
}
|
||||
|
||||
__kernel void yuv420_to_bgrx_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
|
||||
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_bgrx_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[y * strideU + x] - 128;
|
||||
short V = bufV[y * strideV + x] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[3] = 0xff; /* A */
|
||||
}
|
||||
|
||||
__kernel void yuv444_to_xrgb_1b(
|
||||
__global const uchar *bufY, unsigned strideY,
|
||||
__global const uchar *bufU, unsigned strideU,
|
||||
__global const uchar *bufV, unsigned strideV,
|
||||
__global uchar *dest, unsigned strideDest)
|
||||
{
|
||||
unsigned int x = get_global_id(0);
|
||||
unsigned int y = get_global_id(1);
|
||||
|
||||
short Y = bufY[y * strideY + x];
|
||||
short U = bufU[y * strideU + x] - 128;
|
||||
short V = bufV[y * strideV + x] - 128;
|
||||
|
||||
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
|
||||
|
||||
/**
|
||||
* | R | ( | 256 0 403 | | Y | )
|
||||
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
|
||||
* | B | ( | 256 475 0 | | V - 128 | )
|
||||
*/
|
||||
int y256 = 256 * Y;
|
||||
destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
|
||||
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
|
||||
destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
|
||||
destPtr[0] = 0xff; /* A */
|
||||
}
|
||||
)
|
||||
|
||||
@@ -45,7 +45,7 @@ static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 for
|
||||
|
||||
for (x = 0; x < size; x++)
|
||||
{
|
||||
const double maxDiff = 4.0;
|
||||
const LONG maxDiff = 4;
|
||||
UINT32 sColor = 0;
|
||||
UINT32 dColor = 0;
|
||||
BYTE sR = 0;
|
||||
@@ -63,7 +63,7 @@ static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 for
|
||||
FreeRDPSplitColor(sColor, format, &sR, &sG, &sB, &sA, NULL);
|
||||
FreeRDPSplitColor(dColor, format, &dR, &dG, &dB, &dA, NULL);
|
||||
|
||||
if ((abs(sR - dR) > maxDiff) || (abs(sG - dG) > maxDiff) || (abs(sB - dB) > maxDiff))
|
||||
if ((labs(sR - dR) > maxDiff) || (labs(sG - dG) > maxDiff) || (labs(sB - dB) > maxDiff))
|
||||
{
|
||||
fprintf(
|
||||
stderr,
|
||||
@@ -477,7 +477,7 @@ static BOOL TestPrimitiveYUV(primitives_t* prims, prim_size_t roi, BOOL use444)
|
||||
yuv_step[1] = uvwidth;
|
||||
yuv_step[2] = uvwidth;
|
||||
|
||||
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
|
||||
for (x = 0; x < ARRAYSIZE(formats); x++)
|
||||
{
|
||||
pstatus_t rc = 0;
|
||||
const UINT32 DstFormat = formats[x];
|
||||
|
||||
Reference in New Issue
Block a user