From 7b0489e0ced80f118bcca5c96d01ffdd4b79a108 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Fri, 10 Jan 2025 12:08:38 +0100 Subject: [PATCH] [primitives,yuv] fix opencl conversion routines --- libfreerdp/primitives/opencl/primitives.cl | 583 +++++++++++---------- 1 file changed, 299 insertions(+), 284 deletions(-) diff --git a/libfreerdp/primitives/opencl/primitives.cl b/libfreerdp/primitives/opencl/primitives.cl index 796d59ae4..d02b255c4 100644 --- a/libfreerdp/primitives/opencl/primitives.cl +++ b/libfreerdp/primitives/opencl/primitives.cl @@ -16,20 +16,35 @@ * permissions and limitations under the License. */ -uchar clamp_uc(int v, short l, short h) +static uchar clamp_uc(int v, short l, short h) { - if (v > h) - v = h; - if (v < l) - v = l; - return (uchar)v; + if (v > h) + v = h; + if (v < l) + v = l; + return (uchar)v; } -__kernel void yuv420_to_rgba_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +static short avgUV(const uchar* buf, unsinged stride, unsigned x, unsigned y) +{ + const short U00 = buf[y * stride]; + if ((x != 0) || (y != 0)) + return U00; + const short U01 = buf[y * stride + 1]; + const short U10 = buf[(y + 1) * stride]; + const short U11 = buf[(y + 1) * stride + 1]; + const short avg = U00 * 4 - U01 - U10 - U11; + const short avgU = clamp_uc(avg, 0, 255); + const short diff = abs(U00 - avgU); + if (diff < 30) + return U00; + return avgU; +} + +__kernel void yuv420_to_rgba_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -38,7 +53,7 @@ __kernel void yuv420_to_rgba_1b( short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -46,17 +61,16 @@ __kernel void yuv420_to_rgba_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ - destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ - /* A */ + destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ + /* A */ } -__kernel void yuv420_to_abgr_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv420_to_abgr_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -65,7 +79,7 @@ __kernel void yuv420_to_abgr_1b( short U = bufU[(y / 2) * strideU + (x / 2)] - 128; short V = bufV[(y / 2) * strideV + (x / 2)] - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -74,25 +88,26 @@ __kernel void yuv420_to_abgr_1b( */ int y256 = 256 * Y; /* A */ - destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - (48 * U) - (120 * V)) >> 8, 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ } -__kernel void yuv444_to_abgr_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv444_to_abgr_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -101,25 +116,26 @@ __kernel void yuv444_to_abgr_1b( */ int y256 = 256 * Y; /* A */ - destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ } -__kernel void yuv444_to_rgba_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv444_to_rgba_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -127,126 +143,16 @@ __kernel void yuv444_to_rgba_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - /* A */ + destPtr[0] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + /* A */ } -__kernel void yuv420_to_rgbx_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) -{ - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); - - short Y = bufY[y * strideY + x]; - short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; - short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; - - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); - - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ - destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ - destPtr[3] = 0xff; /* A */ -} - -__kernel void yuv420_to_xbgr_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) -{ - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); - - short Y = bufY[y * strideY + x]; - short U = bufU[(y / 2) * strideU + (x / 2)] - 128; - short V = bufV[(y / 2) * strideV + (x / 2)] - 128; - - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); - - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = 0xff; /* A */ - destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ -} - -__kernel void yuv444_to_xbgr_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) -{ - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); - - short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; - - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); - - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = 0xff; /* A */ - destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ -} - -__kernel void yuv444_to_rgbx_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) -{ - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); - - short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; - - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); - - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[3] = 0xff; /* A */ -} - - -__kernel void yuv420_to_argb_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv420_to_rgbx_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -255,7 +161,7 @@ __kernel void yuv420_to_argb_1b( short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -263,17 +169,16 @@ __kernel void yuv420_to_argb_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - /* A */ - destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ - destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ - destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ + destPtr[0] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ + destPtr[3] = 0xff; /* A */ } -__kernel void yuv420_to_bgra_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv420_to_xbgr_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); @@ -282,7 +187,7 @@ __kernel void yuv420_to_bgra_1b( short U = bufU[(y / 2) * strideU + (x / 2)] - 128; short V = bufV[(y / 2) * strideV + (x / 2)] - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -290,26 +195,27 @@ __kernel void yuv420_to_bgra_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - /* A */ + destPtr[0] = 0xff; /* A */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - (48 * U) - (120 * V)) >> 8, 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ } -__kernel void yuv444_to_bgra_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv444_to_xbgr_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -317,26 +223,53 @@ __kernel void yuv444_to_bgra_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - /* A */ + destPtr[0] = 0xff; /* A */ + destPtr[1] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ } -__kernel void yuv444_to_argb_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv444_to_rgbx_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { unsigned int x = get_global_id(0); unsigned int y = get_global_id(1); short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ + destPtr[1] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + destPtr[3] = 0xff; /* A */ +} + +__kernel void yuv420_to_argb_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; + short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); /** * | R | ( | 256 0 403 | | Y | ) @@ -344,116 +277,198 @@ __kernel void yuv444_to_argb_1b( * | B | ( | 256 475 0 | | V - 128 | ) */ int y256 = 256 * Y; - destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ /* A */ + destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8, 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ } -__kernel void yuv420_to_xrgb_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv420_to_bgra_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); - short Y = bufY[y * strideY + x]; - short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; - short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; + short Y = bufY[y * strideY + x]; + short U = bufU[(y / 2) * strideU + (x / 2)] - 128; + short V = bufV[(y / 2) * strideV + (x / 2)] - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = 0xff; /* A */ - destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ - destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8 , 0, 255); /* G */ - destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[1] = clamp_uc((y256 - (48 * U) - (120 * V)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + /* A */ } -__kernel void yuv420_to_bgrx_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv444_to_bgra_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); - short Y = bufY[y * strideY + x]; - short U = bufU[(y / 2) * strideU + (x / 2)] - 128; - short V = bufV[(y / 2) * strideV + (x / 2)] - 128; + short Y = bufY[y * strideY + x]; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[3] = 0xff; /* A */ + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + destPtr[1] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ + /* A */ } -__kernel void yuv444_to_bgrx_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv444_to_argb_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); - short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; + short Y = bufY[y * strideY + x]; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[3] = 0xff; /* A */ + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[3] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[1] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ + /* A */ } -__kernel void yuv444_to_xrgb_1b( - __global const uchar *bufY, unsigned strideY, - __global const uchar *bufU, unsigned strideU, - __global const uchar *bufV, unsigned strideV, - __global uchar *dest, unsigned strideDest) +__kernel void yuv420_to_xrgb_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) { - unsigned int x = get_global_id(0); - unsigned int y = get_global_id(1); + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); - short Y = bufY[y * strideY + x]; - short U = bufU[y * strideU + x] - 128; - short V = bufV[y * strideV + x] - 128; + short Y = bufY[y * strideY + x]; + short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128; + short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128; - __global uchar *destPtr = dest + (strideDest * y) + (x * 4); + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); - /** - * | R | ( | 256 0 403 | | Y | ) - * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 - * | B | ( | 256 475 0 | | V - 128 | ) - */ - int y256 = 256 * Y; - destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ - destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */ - destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ - destPtr[0] = 0xff; /* A */ + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = 0xff; /* A */ + destPtr[1] = clamp_uc((y256 + (403 * Vdim)) >> 8, 0, 255); /* R */ + destPtr[2] = clamp_uc((y256 - (48 * Udim) - (120 * Vdim)) >> 8, 0, 255); /* G */ + destPtr[3] = clamp_uc((y256 + (475 * Udim)) >> 8, 0, 255); /* B */ +} + +__kernel void yuv420_to_bgrx_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = bufU[(y / 2) * strideU + (x / 2)] - 128; + short V = bufV[(y / 2) * strideV + (x / 2)] - 128; + + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */ + destPtr[1] = clamp_uc((y256 - (48 * U) - (120 * V)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */ + destPtr[3] = 0xff; /* A */ +} + +__kernel void yuv444_to_bgrx_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; + + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[0] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + destPtr[1] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[2] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ + destPtr[3] = 0xff; /* A */ +} + +__kernel void yuv444_to_xrgb_1b(__global const uchar* bufY, unsigned strideY, + __global const uchar* bufU, unsigned strideU, + __global const uchar* bufV, unsigned strideV, __global uchar* dest, + unsigned strideDest) +{ + unsigned int x = get_global_id(0); + unsigned int y = get_global_id(1); + + short Y = bufY[y * strideY + x]; + short U = avgUV(bufU, strideU, x, y); + short V = avgUV(bufV, strideV, x, y); + short D = U - 128; + short E = V - 128; + + __global uchar* destPtr = dest + (strideDest * y) + (x * 4); + + /** + * | R | ( | 256 0 403 | | Y | ) + * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ + int y256 = 256 * Y; + destPtr[3] = clamp_uc((y256 + (475 * D)) >> 8, 0, 255); /* B */ + destPtr[2] = clamp_uc((y256 - (48 * D) - (120 * E)) >> 8, 0, 255); /* G */ + destPtr[1] = clamp_uc((y256 + (403 * E)) >> 8, 0, 255); /* R */ + destPtr[0] = 0xff; /* A */ }