From e8fa3b49503a54c5bca9a83e65f23f953d9abd49 Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Fri, 29 Nov 2024 12:44:28 +0100 Subject: [PATCH] r600: make vertex r10g10b10a2_snorm conformant on palm and beyond The mode r10g10b10a2_snorm processed as vertex on palm at the hardware level doesn't follow the current standard. Indeed, the .w component (2-bits) is not calculated as expected. The table below describes the situation. This change fixes this issue by adding three gpu instructions at the vertex fetch shader stage. An equivalent C representation and a gpu asm dump of the generated sequence are available below. .w(2-bits) expected palm 0 0.0 0.000000 1 1.0 0.333333 2 -1.0 0.666667 3 -1.0 1.000000 w_out = (4.*w_in > 1. ? 1. : 4.*w_in) - (w_in > 0.5 ? 2. : 0.); 0002 00000008 A0080000 ALU 3 @16 0016 00000C02 A0000CC0 1 y: MOV*4_sat __.y, R2.w 0018 801F8C02 600004A0 w: SETGT*2 __.w, R2.w, 0.5 0020 839FC4FE 60400010 2 w: ADD R2.w, PV.y, -PV.w Note: The rv770 and cypress don't need this correction. This is definitely a hardware change between these gpus. This change was tested on palm, barts and cayman. Here are the tests fixed: spec/arb_vertex_type_2_10_10_10_rev/arb_vertex_type_2_10_10_10_rev-array_types: fail pass deqp-gles3/functional/draw/random/124: fail pass deqp-gles3/functional/vertex_arrays/single_attribute/normalize/int2_10_10_10/components4_quads1: fail pass deqp-gles3/functional/vertex_arrays/single_attribute/normalize/int2_10_10_10/components4_quads256: fail pass khr-gl43/vertex_attrib_binding/basic-input-case5: fail pass khr-gl44/vertex_attrib_binding/basic-input-case5: fail pass khr-gl45/vertex_attrib_binding/basic-input-case5: fail pass Cc: mesa-stable Signed-off-by: Patrick Lerda Reviewed-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/r600_shader.c | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2d829cb5f2a..f48612788e1 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -435,6 +435,56 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, else shader->width_correction[elements[i].vertex_buffer_index] = 8 - 6; } + + if (unlikely(rctx->b.family >= CHIP_PALM && + format == FMT_2_10_10_10 && + !num_format && format_comp && + desc->swizzle[3] >= PIPE_SWIZZLE_X && + desc->swizzle[3] <= PIPE_SWIZZLE_W)) { + struct r600_bytecode_alu alu; + const unsigned sel_main = i + 1; + + bc.force_add_cf = 1; + + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = sel_main; + alu.src[0].chan = desc->swizzle[3]; + alu.dst.chan = 1; + alu.omod = 2; + alu.dst.clamp = 1; + + if (unlikely(r = r600_bytecode_add_alu(&bc, &alu))) + goto fail; + + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_SETGT; + alu.src[0].sel = sel_main; + alu.src[0].chan = desc->swizzle[3]; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x3f000000; + alu.dst.chan = 3; + alu.omod = 1; + alu.last = 1; + + if (unlikely(r = r600_bytecode_add_alu(&bc, &alu))) + goto fail; + + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_ADD; + alu.src[0].sel = V_SQ_ALU_SRC_PV; + alu.src[0].chan = 1; + alu.src[1].sel = V_SQ_ALU_SRC_PV; + alu.src[1].chan = 3; + alu.src[1].neg = 1; + alu.dst.sel = sel_main; + alu.dst.chan = desc->swizzle[3]; + alu.dst.write = 1; + alu.last = 1; + + if (unlikely(r = r600_bytecode_add_alu(&bc, &alu))) + goto fail; + } } r600_bytecode_add_cfinst(&bc, CF_OP_RET);