Files
grd/shaders/grd-avc-dual-view.comp
2026-02-13 13:06:50 +09:00

355 lines
10 KiB
Plaintext

/*
* Copyright (C) 2024 Pascal Nowack
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#version 460
#extension GL_EXT_null_initializer : require
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
layout (local_size_x = 16, local_size_y = 16) in;
layout (constant_id = 0) const uint32_t SOURCE_WIDTH = 256;
layout (constant_id = 1) const uint32_t SOURCE_HEIGHT = 256;
layout (constant_id = 2) const uint32_t TARGET_WIDTH = 256;
layout (constant_id = 3) const uint32_t TARGET_HEIGHT = 256;
layout (constant_id = 4) const uint32_t PERFORM_DMG_DETECTION = 0;
layout (constant_id = 5) const uint32_t STATE_BUFFER_STRIDE = 0;
layout (set = 0, binding = 0, r8) uniform writeonly image2D main_y_layer;
layout (set = 0, binding = 1, rg8) uniform writeonly image2D main_uv_layer;
layout (set = 1, binding = 0, r8) uniform writeonly image2D aux_y_layer;
layout (set = 1, binding = 1, rg8) uniform writeonly image2D aux_uv_layer;
layout (set = 2, binding = 0) buffer writeonly DamageBuffer
{
uint32_t data[];
} needs_update;
layout (set = 2, binding = 1) buffer writeonly AuxiliaryViewInfoBuffer
{
uint32_t data[];
} needs_auxiliary_view;
layout (set = 3, binding = 0) uniform sampler2D src_new_sampler;
layout (set = 3, binding = 1) uniform sampler2D src_old_sampler;
float
rgb_to_y (int32_t r,
int32_t g,
int32_t b)
{
return float ((54 * r + 183 * g + 18 * b) >> 8);
}
float
rgb_to_u (int32_t r,
int32_t g,
int32_t b)
{
return float (((-29 * r - 99 * g + 128 * b) >> 8) + 128);
}
float
rgb_to_v (int32_t r,
int32_t g,
int32_t b)
{
return float (((128 * r - 116 * g - 12 * b) >> 8) + 128);
}
shared uint32_t have_block_damage = {};
shared uint32_t have_chroma_offset = {};
shared float block_u2[16][16] = {};
shared float block_v2[16][16] = {};
void
main ()
{
const uint32_t x_2x2 = gl_GlobalInvocationID.x;
const uint32_t y_2x2 = gl_GlobalInvocationID.y;
const uint32_t local_x = gl_LocalInvocationID.x;
const uint32_t local_y = gl_LocalInvocationID.y;
const uint32_t tw_half = TARGET_WIDTH >> 1;
uint32_t x_1x1;
uint32_t y_1x1;
uint32_t x_4x4;
uint32_t x_64x64;
uint32_t y_64x64;
float y0, y1, y2, y3;
float u0, u1, u2, u3;
float v0, v1, v2, v3;
float u_filtered, v_filtered;
uint32_t dmg_p0, dmg_p1, dmg_p2, dmg_p3;
vec4 uv_filtered;
x_1x1 = x_2x2 << 1;
y_1x1 = y_2x2 << 1;
x_4x4 = x_2x2 >> 1;
x_64x64 = x_2x2 >> 5;
y_64x64 = y_2x2 >> 5;
dmg_p0 = dmg_p1 = dmg_p2 = dmg_p3 = 0;
/*
* YUV444:
*
* Y U V
* ----------- ----------- -----------
* | y0 | y1 | | u0 | u1 | | v0 | v1 |
* ----------- ----------- -----------
* | y2 | y3 | | u2 | u3 | | v2 | v3 |
* ----------- ----------- -----------
*/
if (x_1x1 < SOURCE_WIDTH && y_1x1 < SOURCE_HEIGHT)
{
vec4 bgrx_new;
vec4 bgrx_old;
int32_t b, g, r;
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1, y_1x1));
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1, y_1x1));
dmg_p0 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
b = int32_t (bgrx_new.b * 255.0f);
g = int32_t (bgrx_new.g * 255.0f);
r = int32_t (bgrx_new.r * 255.0f);
y0 = rgb_to_y (r, g, b);
u0 = rgb_to_u (r, g, b);
v0 = rgb_to_v (r, g, b);
}
else
{
y0 = 0.0f;
u0 = 128.0f;
v0 = 128.0f;
}
if (x_1x1 + 1 < SOURCE_WIDTH && y_1x1 < SOURCE_HEIGHT)
{
vec4 bgrx_new;
vec4 bgrx_old;
int32_t b, g, r;
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1 + 1, y_1x1));
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1 + 1, y_1x1));
dmg_p1 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
b = int32_t (bgrx_new.b * 255.0f);
g = int32_t (bgrx_new.g * 255.0f);
r = int32_t (bgrx_new.r * 255.0f);
y1 = rgb_to_y (r, g, b);
u1 = rgb_to_u (r, g, b);
v1 = rgb_to_v (r, g, b);
}
else
{
y1 = y0;
u1 = u0;
v1 = v0;
}
if (x_1x1 < SOURCE_WIDTH && y_1x1 + 1 < SOURCE_HEIGHT)
{
vec4 bgrx_new;
vec4 bgrx_old;
int32_t b, g, r;
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1, y_1x1 + 1));
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1, y_1x1 + 1));
dmg_p2 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
b = int32_t (bgrx_new.b * 255.0f);
g = int32_t (bgrx_new.g * 255.0f);
r = int32_t (bgrx_new.r * 255.0f);
y2 = rgb_to_y (r, g, b);
u2 = rgb_to_u (r, g, b);
v2 = rgb_to_v (r, g, b);
if (x_1x1 + 1 < SOURCE_WIDTH)
{
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1 + 1, y_1x1 + 1));
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1 + 1, y_1x1 + 1));
dmg_p3 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
b = int32_t (bgrx_new.b * 255.0f);
g = int32_t (bgrx_new.g * 255.0f);
r = int32_t (bgrx_new.r * 255.0f);
y3 = rgb_to_y (r, g, b);
u3 = rgb_to_u (r, g, b);
v3 = rgb_to_v (r, g, b);
}
else
{
y3 = y2;
u3 = u2;
v3 = v2;
}
}
else
{
y2 = y0;
u2 = u0;
v2 = v0;
y3 = y1;
u3 = u1;
v3 = v1;
}
block_u2[local_x][local_y] = u2;
block_v2[local_x][local_y] = v2;
u_filtered = (u0 + u1 + u2 + u3) / 4;
v_filtered = (v0 + v1 + v2 + v3) / 4;
/*
* When decoding and reassembling the original frame, the client side should
* use the filtered value over the reversed value, when their value
* difference is lower than a specific threshold, due to potential artifacts
* stemming from the quantization process of the AVC encoding.
* As a threshold 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode
* ([MS-RDPEGFX]) considers the value 30.
* This threshold can also serve the server side here by checking whether an
* auxiliary view is actually needed.
*/
if (abs (u_filtered - u0) > 30 ||
abs (u_filtered - u1) > 30 ||
abs (u_filtered - u2) > 30 ||
abs (u_filtered - u3) > 30 ||
abs (v_filtered - v0) > 30 ||
abs (v_filtered - v1) > 30 ||
abs (v_filtered - v2) > 30 ||
abs (v_filtered - v3) > 30)
have_chroma_offset = 1;
/* We cannot bail out early here due to the barrier() call */
if (x_2x2 < TARGET_WIDTH >> 1 && y_2x2 < TARGET_HEIGHT >> 1)
{
if (PERFORM_DMG_DETECTION == 0 ||
dmg_p0 != 0 || dmg_p1 != 0 || dmg_p2 != 0 || dmg_p3 != 0)
have_block_damage = 1;
}
barrier ();
if (x_2x2 >= TARGET_WIDTH >> 1 || y_2x2 >= TARGET_HEIGHT >> 1)
return;
if (local_y == 0 &&
(local_x == 0 || local_x == 1))
{
uint32_t state_pos;
state_pos = y_64x64 * STATE_BUFFER_STRIDE + x_64x64;
if (local_x == 0 && have_block_damage == 1)
needs_update.data[state_pos] = 1;
if (local_x == 1 && have_chroma_offset == 1 && have_block_damage == 1)
needs_auxiliary_view.data[state_pos] = 1;
}
/*
* See also 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode
* ([MS-RDPEGFX]) for the construction of the main and auxiliary view.
*/
/*
* The main view is constructed the same way as in the AVC420 case
* (4 Luma values (4Y), 2 Chroma values (1U, 1V) per 2x2 BGRX block).
*
* The chroma values are calculated from the average U/V values.
*/
imageStore (main_y_layer, ivec2 (x_1x1, y_1x1), vec4 (y0 / 255.0f));
imageStore (main_y_layer, ivec2 (x_1x1 + 1, y_1x1), vec4 (y1 / 255.0f));
imageStore (main_y_layer, ivec2 (x_1x1, y_1x1 + 1), vec4 (y2 / 255.0f));
imageStore (main_y_layer, ivec2 (x_1x1 + 1, y_1x1 + 1), vec4 (y3 / 255.0f));
uv_filtered = vec4 (u_filtered / 255.0f,
v_filtered / 255.0f,
0,
0);
imageStore (main_uv_layer, ivec2 (x_2x2, y_2x2), uv_filtered);
/*
* The auxiliary view is constructed as follows (simplified):
*
* Luma:
* -----------------------------------------------
* | u1 | u1 | u1 | u1 | ... | v1 | v1 | v1 | v1 |
* -----------------------------------------------
* | u3 | u3 | u3 | u3 | ... | v3 | v3 | v3 | v3 |
* -----------------------------------------------
* | u1 | u1 | u1 | u1 | ... | v1 | v1 | v1 | v1 |
* -----------------------------------------------
* | u3 | u3 | u3 | u3 | ... | v3 | v3 | v3 | v3 |
* -----------------------------------------------
* ...
*
* Chroma U:
* -----------------------------------------------
* | u2 | u2 | u2 | u2 | ... | v2 | v2 | v2 | v2 |
* -----------------------------------------------
* ...
*
* Chroma V:
* -----------------------------------------------
* | u2 | u2 | u2 | u2 | ... | v2 | v2 | v2 | v2 |
* -----------------------------------------------
* ...
*
* If x_1x1 MOD 4 == 0, then u2 and v2 are written to U,
* otherwise they will be written to V.
*/
imageStore (aux_y_layer, ivec2 (x_2x2, y_1x1), vec4 (u1 / 255.0f));
imageStore (aux_y_layer, ivec2 (x_2x2 + tw_half, y_1x1), vec4 (v1 / 255.0f));
imageStore (aux_y_layer, ivec2 (x_2x2, y_1x1 + 1), vec4 (u3 / 255.0f));
imageStore (aux_y_layer, ivec2 (x_2x2 + tw_half, y_1x1 + 1), vec4 (v3 / 255.0f));
if (x_2x2 == x_4x4 << 1)
{
vec4 u2s;
u2s = vec4 (block_u2[local_x][local_y] / 255.0f,
block_u2[local_x + 1][local_y] / 255.0f,
0,
0);
imageStore (aux_uv_layer, ivec2 (x_4x4, y_2x2), u2s);
}
else
{
vec4 v2s;
v2s = vec4 (block_v2[local_x - 1][local_y] / 255.0f,
block_v2[local_x][local_y] / 255.0f,
0,
0);
imageStore (aux_uv_layer, ivec2 (x_4x4 + (tw_half >> 1), y_2x2), v2s);
}
}