mirror of
https://github.com/morgan9e/grd
synced 2026-04-15 00:44:04 +09:00
.
This commit is contained in:
354
shaders/grd-avc-dual-view.comp
Normal file
354
shaders/grd-avc-dual-view.comp
Normal file
@@ -0,0 +1,354 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Pascal Nowack
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
||||
* 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#version 460
|
||||
#extension GL_EXT_null_initializer : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
|
||||
layout (local_size_x = 16, local_size_y = 16) in;
|
||||
|
||||
layout (constant_id = 0) const uint32_t SOURCE_WIDTH = 256;
|
||||
layout (constant_id = 1) const uint32_t SOURCE_HEIGHT = 256;
|
||||
|
||||
layout (constant_id = 2) const uint32_t TARGET_WIDTH = 256;
|
||||
layout (constant_id = 3) const uint32_t TARGET_HEIGHT = 256;
|
||||
|
||||
layout (constant_id = 4) const uint32_t PERFORM_DMG_DETECTION = 0;
|
||||
layout (constant_id = 5) const uint32_t STATE_BUFFER_STRIDE = 0;
|
||||
|
||||
layout (set = 0, binding = 0, r8) uniform writeonly image2D main_y_layer;
|
||||
layout (set = 0, binding = 1, rg8) uniform writeonly image2D main_uv_layer;
|
||||
|
||||
layout (set = 1, binding = 0, r8) uniform writeonly image2D aux_y_layer;
|
||||
layout (set = 1, binding = 1, rg8) uniform writeonly image2D aux_uv_layer;
|
||||
|
||||
layout (set = 2, binding = 0) buffer writeonly DamageBuffer
|
||||
{
|
||||
uint32_t data[];
|
||||
} needs_update;
|
||||
|
||||
layout (set = 2, binding = 1) buffer writeonly AuxiliaryViewInfoBuffer
|
||||
{
|
||||
uint32_t data[];
|
||||
} needs_auxiliary_view;
|
||||
|
||||
layout (set = 3, binding = 0) uniform sampler2D src_new_sampler;
|
||||
layout (set = 3, binding = 1) uniform sampler2D src_old_sampler;
|
||||
|
||||
float
|
||||
rgb_to_y (int32_t r,
|
||||
int32_t g,
|
||||
int32_t b)
|
||||
{
|
||||
return float ((54 * r + 183 * g + 18 * b) >> 8);
|
||||
}
|
||||
|
||||
float
|
||||
rgb_to_u (int32_t r,
|
||||
int32_t g,
|
||||
int32_t b)
|
||||
{
|
||||
return float (((-29 * r - 99 * g + 128 * b) >> 8) + 128);
|
||||
}
|
||||
|
||||
float
|
||||
rgb_to_v (int32_t r,
|
||||
int32_t g,
|
||||
int32_t b)
|
||||
{
|
||||
return float (((128 * r - 116 * g - 12 * b) >> 8) + 128);
|
||||
}
|
||||
|
||||
shared uint32_t have_block_damage = {};
|
||||
shared uint32_t have_chroma_offset = {};
|
||||
shared float block_u2[16][16] = {};
|
||||
shared float block_v2[16][16] = {};
|
||||
|
||||
void
|
||||
main ()
|
||||
{
|
||||
const uint32_t x_2x2 = gl_GlobalInvocationID.x;
|
||||
const uint32_t y_2x2 = gl_GlobalInvocationID.y;
|
||||
const uint32_t local_x = gl_LocalInvocationID.x;
|
||||
const uint32_t local_y = gl_LocalInvocationID.y;
|
||||
const uint32_t tw_half = TARGET_WIDTH >> 1;
|
||||
uint32_t x_1x1;
|
||||
uint32_t y_1x1;
|
||||
uint32_t x_4x4;
|
||||
uint32_t x_64x64;
|
||||
uint32_t y_64x64;
|
||||
float y0, y1, y2, y3;
|
||||
float u0, u1, u2, u3;
|
||||
float v0, v1, v2, v3;
|
||||
float u_filtered, v_filtered;
|
||||
uint32_t dmg_p0, dmg_p1, dmg_p2, dmg_p3;
|
||||
vec4 uv_filtered;
|
||||
|
||||
x_1x1 = x_2x2 << 1;
|
||||
y_1x1 = y_2x2 << 1;
|
||||
x_4x4 = x_2x2 >> 1;
|
||||
x_64x64 = x_2x2 >> 5;
|
||||
y_64x64 = y_2x2 >> 5;
|
||||
|
||||
dmg_p0 = dmg_p1 = dmg_p2 = dmg_p3 = 0;
|
||||
|
||||
/*
|
||||
* YUV444:
|
||||
*
|
||||
* Y U V
|
||||
* ----------- ----------- -----------
|
||||
* | y0 | y1 | | u0 | u1 | | v0 | v1 |
|
||||
* ----------- ----------- -----------
|
||||
* | y2 | y3 | | u2 | u3 | | v2 | v3 |
|
||||
* ----------- ----------- -----------
|
||||
*/
|
||||
if (x_1x1 < SOURCE_WIDTH && y_1x1 < SOURCE_HEIGHT)
|
||||
{
|
||||
vec4 bgrx_new;
|
||||
vec4 bgrx_old;
|
||||
int32_t b, g, r;
|
||||
|
||||
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1, y_1x1));
|
||||
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1, y_1x1));
|
||||
|
||||
dmg_p0 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
|
||||
|
||||
b = int32_t (bgrx_new.b * 255.0f);
|
||||
g = int32_t (bgrx_new.g * 255.0f);
|
||||
r = int32_t (bgrx_new.r * 255.0f);
|
||||
|
||||
y0 = rgb_to_y (r, g, b);
|
||||
u0 = rgb_to_u (r, g, b);
|
||||
v0 = rgb_to_v (r, g, b);
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = 0.0f;
|
||||
u0 = 128.0f;
|
||||
v0 = 128.0f;
|
||||
}
|
||||
|
||||
if (x_1x1 + 1 < SOURCE_WIDTH && y_1x1 < SOURCE_HEIGHT)
|
||||
{
|
||||
vec4 bgrx_new;
|
||||
vec4 bgrx_old;
|
||||
int32_t b, g, r;
|
||||
|
||||
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1 + 1, y_1x1));
|
||||
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1 + 1, y_1x1));
|
||||
|
||||
dmg_p1 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
|
||||
|
||||
b = int32_t (bgrx_new.b * 255.0f);
|
||||
g = int32_t (bgrx_new.g * 255.0f);
|
||||
r = int32_t (bgrx_new.r * 255.0f);
|
||||
|
||||
y1 = rgb_to_y (r, g, b);
|
||||
u1 = rgb_to_u (r, g, b);
|
||||
v1 = rgb_to_v (r, g, b);
|
||||
}
|
||||
else
|
||||
{
|
||||
y1 = y0;
|
||||
u1 = u0;
|
||||
v1 = v0;
|
||||
}
|
||||
|
||||
if (x_1x1 < SOURCE_WIDTH && y_1x1 + 1 < SOURCE_HEIGHT)
|
||||
{
|
||||
vec4 bgrx_new;
|
||||
vec4 bgrx_old;
|
||||
int32_t b, g, r;
|
||||
|
||||
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1, y_1x1 + 1));
|
||||
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1, y_1x1 + 1));
|
||||
|
||||
dmg_p2 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
|
||||
|
||||
b = int32_t (bgrx_new.b * 255.0f);
|
||||
g = int32_t (bgrx_new.g * 255.0f);
|
||||
r = int32_t (bgrx_new.r * 255.0f);
|
||||
|
||||
y2 = rgb_to_y (r, g, b);
|
||||
u2 = rgb_to_u (r, g, b);
|
||||
v2 = rgb_to_v (r, g, b);
|
||||
|
||||
if (x_1x1 + 1 < SOURCE_WIDTH)
|
||||
{
|
||||
bgrx_new = texture (src_new_sampler, ivec2 (x_1x1 + 1, y_1x1 + 1));
|
||||
bgrx_old = texture (src_old_sampler, ivec2 (x_1x1 + 1, y_1x1 + 1));
|
||||
|
||||
dmg_p3 = bgrx_new.bgr != bgrx_old.bgr ? 1 : 0;
|
||||
|
||||
b = int32_t (bgrx_new.b * 255.0f);
|
||||
g = int32_t (bgrx_new.g * 255.0f);
|
||||
r = int32_t (bgrx_new.r * 255.0f);
|
||||
|
||||
y3 = rgb_to_y (r, g, b);
|
||||
u3 = rgb_to_u (r, g, b);
|
||||
v3 = rgb_to_v (r, g, b);
|
||||
}
|
||||
else
|
||||
{
|
||||
y3 = y2;
|
||||
u3 = u2;
|
||||
v3 = v2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
y2 = y0;
|
||||
u2 = u0;
|
||||
v2 = v0;
|
||||
y3 = y1;
|
||||
u3 = u1;
|
||||
v3 = v1;
|
||||
}
|
||||
|
||||
block_u2[local_x][local_y] = u2;
|
||||
block_v2[local_x][local_y] = v2;
|
||||
|
||||
u_filtered = (u0 + u1 + u2 + u3) / 4;
|
||||
v_filtered = (v0 + v1 + v2 + v3) / 4;
|
||||
|
||||
/*
|
||||
* When decoding and reassembling the original frame, the client side should
|
||||
* use the filtered value over the reversed value, when their value
|
||||
* difference is lower than a specific threshold, due to potential artifacts
|
||||
* stemming from the quantization process of the AVC encoding.
|
||||
* As a threshold 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode
|
||||
* ([MS-RDPEGFX]) considers the value 30.
|
||||
* This threshold can also serve the server side here by checking whether an
|
||||
* auxiliary view is actually needed.
|
||||
*/
|
||||
if (abs (u_filtered - u0) > 30 ||
|
||||
abs (u_filtered - u1) > 30 ||
|
||||
abs (u_filtered - u2) > 30 ||
|
||||
abs (u_filtered - u3) > 30 ||
|
||||
abs (v_filtered - v0) > 30 ||
|
||||
abs (v_filtered - v1) > 30 ||
|
||||
abs (v_filtered - v2) > 30 ||
|
||||
abs (v_filtered - v3) > 30)
|
||||
have_chroma_offset = 1;
|
||||
|
||||
/* We cannot bail out early here due to the barrier() call */
|
||||
if (x_2x2 < TARGET_WIDTH >> 1 && y_2x2 < TARGET_HEIGHT >> 1)
|
||||
{
|
||||
if (PERFORM_DMG_DETECTION == 0 ||
|
||||
dmg_p0 != 0 || dmg_p1 != 0 || dmg_p2 != 0 || dmg_p3 != 0)
|
||||
have_block_damage = 1;
|
||||
}
|
||||
|
||||
barrier ();
|
||||
if (x_2x2 >= TARGET_WIDTH >> 1 || y_2x2 >= TARGET_HEIGHT >> 1)
|
||||
return;
|
||||
|
||||
if (local_y == 0 &&
|
||||
(local_x == 0 || local_x == 1))
|
||||
{
|
||||
uint32_t state_pos;
|
||||
|
||||
state_pos = y_64x64 * STATE_BUFFER_STRIDE + x_64x64;
|
||||
|
||||
if (local_x == 0 && have_block_damage == 1)
|
||||
needs_update.data[state_pos] = 1;
|
||||
if (local_x == 1 && have_chroma_offset == 1 && have_block_damage == 1)
|
||||
needs_auxiliary_view.data[state_pos] = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* See also 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode
|
||||
* ([MS-RDPEGFX]) for the construction of the main and auxiliary view.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The main view is constructed the same way as in the AVC420 case
|
||||
* (4 Luma values (4Y), 2 Chroma values (1U, 1V) per 2x2 BGRX block).
|
||||
*
|
||||
* The chroma values are calculated from the average U/V values.
|
||||
*/
|
||||
|
||||
imageStore (main_y_layer, ivec2 (x_1x1, y_1x1), vec4 (y0 / 255.0f));
|
||||
imageStore (main_y_layer, ivec2 (x_1x1 + 1, y_1x1), vec4 (y1 / 255.0f));
|
||||
imageStore (main_y_layer, ivec2 (x_1x1, y_1x1 + 1), vec4 (y2 / 255.0f));
|
||||
imageStore (main_y_layer, ivec2 (x_1x1 + 1, y_1x1 + 1), vec4 (y3 / 255.0f));
|
||||
|
||||
uv_filtered = vec4 (u_filtered / 255.0f,
|
||||
v_filtered / 255.0f,
|
||||
0,
|
||||
0);
|
||||
imageStore (main_uv_layer, ivec2 (x_2x2, y_2x2), uv_filtered);
|
||||
|
||||
/*
|
||||
* The auxiliary view is constructed as follows (simplified):
|
||||
*
|
||||
* Luma:
|
||||
* -----------------------------------------------
|
||||
* | u1 | u1 | u1 | u1 | ... | v1 | v1 | v1 | v1 |
|
||||
* -----------------------------------------------
|
||||
* | u3 | u3 | u3 | u3 | ... | v3 | v3 | v3 | v3 |
|
||||
* -----------------------------------------------
|
||||
* | u1 | u1 | u1 | u1 | ... | v1 | v1 | v1 | v1 |
|
||||
* -----------------------------------------------
|
||||
* | u3 | u3 | u3 | u3 | ... | v3 | v3 | v3 | v3 |
|
||||
* -----------------------------------------------
|
||||
* ...
|
||||
*
|
||||
* Chroma U:
|
||||
* -----------------------------------------------
|
||||
* | u2 | u2 | u2 | u2 | ... | v2 | v2 | v2 | v2 |
|
||||
* -----------------------------------------------
|
||||
* ...
|
||||
*
|
||||
* Chroma V:
|
||||
* -----------------------------------------------
|
||||
* | u2 | u2 | u2 | u2 | ... | v2 | v2 | v2 | v2 |
|
||||
* -----------------------------------------------
|
||||
* ...
|
||||
*
|
||||
* If x_1x1 MOD 4 == 0, then u2 and v2 are written to U,
|
||||
* otherwise they will be written to V.
|
||||
*/
|
||||
|
||||
imageStore (aux_y_layer, ivec2 (x_2x2, y_1x1), vec4 (u1 / 255.0f));
|
||||
imageStore (aux_y_layer, ivec2 (x_2x2 + tw_half, y_1x1), vec4 (v1 / 255.0f));
|
||||
imageStore (aux_y_layer, ivec2 (x_2x2, y_1x1 + 1), vec4 (u3 / 255.0f));
|
||||
imageStore (aux_y_layer, ivec2 (x_2x2 + tw_half, y_1x1 + 1), vec4 (v3 / 255.0f));
|
||||
|
||||
if (x_2x2 == x_4x4 << 1)
|
||||
{
|
||||
vec4 u2s;
|
||||
|
||||
u2s = vec4 (block_u2[local_x][local_y] / 255.0f,
|
||||
block_u2[local_x + 1][local_y] / 255.0f,
|
||||
0,
|
||||
0);
|
||||
imageStore (aux_uv_layer, ivec2 (x_4x4, y_2x2), u2s);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec4 v2s;
|
||||
|
||||
v2s = vec4 (block_v2[local_x - 1][local_y] / 255.0f,
|
||||
block_v2[local_x][local_y] / 255.0f,
|
||||
0,
|
||||
0);
|
||||
imageStore (aux_uv_layer, ivec2 (x_4x4 + (tw_half >> 1), y_2x2), v2s);
|
||||
}
|
||||
}
|
||||
17
shaders/meson.build
Normal file
17
shaders/meson.build
Normal file
@@ -0,0 +1,17 @@
|
||||
if have_rdp
|
||||
shaders = [
|
||||
'grd-avc-dual-view',
|
||||
]
|
||||
|
||||
spirv_sources = []
|
||||
|
||||
foreach shader : shaders
|
||||
run_command(glslc, '-o', shader + '.spv', shader + '.comp', check: true)
|
||||
run_command(spirv_opt, '-o', shader + '_opt.spv', '-O', shader + '.spv', check: true)
|
||||
spirv_sources += [shader + '_opt.spv']
|
||||
endforeach
|
||||
|
||||
install_data(spirv_sources,
|
||||
install_dir: grd_shaderdir
|
||||
)
|
||||
endif
|
||||
Reference in New Issue
Block a user