mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-07 08:00:36 +01:00
r600: make vertex r10g10b10a2_sscaled conformant on palm and beyond
This is a gl4.3 issue very similar to e8fa3b4950.
The mode r10g10b10a2_sscaled processed as vertex on palm at the
hardware level doesn't follow the current standard. Indeed, the .w
component (2-bits) is not calculated as expected. The table below
describes the situation.
This change fixes this issue by adding two gpu instructions at
the vertex fetch shader stage. An equivalent C representation and
a gpu asm dump of the generated sequence are available below.
.w(2-bits) expected palm cypress
0 0 0 0
1 1 1 1
2 -2 2 -2
3 -1 3 -1
w_out = w_in - (w_in > 1. ? 4. : 0.);
0002 00000024 A0040000 ALU 2 @72
0072 801F2C0A 600004C0 1 w: SETGT*4 __.w, R10.w, 1.0
0074 839FCC0A 61400010 2 w: ADD R10.w, R10.w, -PV.w
Note: cypress returns the expected value, and does not need
this correction.
This change was tested on palm, barts and cayman. Here are the tests fixed:
khr-gl4[3-6]/vertex_attrib_binding/basic-input-case6: fail pass
khr-gles31/core/vertex_attrib_binding/basic-input-case6: fail pass
Cc: mesa-stable
Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38849>
This commit is contained in:
parent
da1108dcc4
commit
2ed761021f
1 changed files with 46 additions and 18 deletions
|
|
@ -338,6 +338,12 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
|
|||
int i, j, r, fs_size;
|
||||
uint32_t buffer_mask = 0;
|
||||
struct r600_fetch_shader *shader;
|
||||
unsigned post_fix_count = 0;
|
||||
struct post_fix {
|
||||
uint16_t gpr;
|
||||
uint16_t swizzle3;
|
||||
uint16_t num_format;
|
||||
} post_fix[VERT_ATTRIB_MAX];
|
||||
|
||||
assert(count < 32);
|
||||
|
||||
|
|
@ -434,33 +440,50 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
|
|||
|
||||
if (unlikely(rctx->b.family >= CHIP_PALM &&
|
||||
format == FMT_2_10_10_10 &&
|
||||
!num_format && format_comp &&
|
||||
(!num_format || num_format == 2) &&
|
||||
format_comp &&
|
||||
desc->swizzle[3] >= PIPE_SWIZZLE_X &&
|
||||
desc->swizzle[3] <= PIPE_SWIZZLE_W)) {
|
||||
post_fix[post_fix_count].gpr = i + 1;
|
||||
post_fix[post_fix_count].swizzle3 = desc->swizzle[3];
|
||||
post_fix[post_fix_count].num_format = num_format;
|
||||
post_fix_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(post_fix_count)) {
|
||||
bc.force_add_cf = 1;
|
||||
|
||||
for (i = 0; i < post_fix_count; i++) {
|
||||
struct r600_bytecode_alu alu;
|
||||
const unsigned sel_main = i + 1;
|
||||
const uint16_t sel_main = post_fix[i].gpr;
|
||||
const uint16_t swizzle3 = post_fix[i].swizzle3;
|
||||
const uint16_t local_num_format = post_fix[i].num_format;
|
||||
|
||||
bc.force_add_cf = 1;
|
||||
if (!local_num_format) {
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.src[0].sel = sel_main;
|
||||
alu.src[0].chan = swizzle3;
|
||||
alu.dst.chan = 1;
|
||||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.src[0].sel = sel_main;
|
||||
alu.src[0].chan = desc->swizzle[3];
|
||||
alu.dst.chan = 1;
|
||||
alu.omod = 2;
|
||||
alu.dst.clamp = 1;
|
||||
alu.omod = 2;
|
||||
alu.dst.clamp = 1;
|
||||
|
||||
if (unlikely(r = r600_bytecode_add_alu(&bc, &alu)))
|
||||
goto fail;
|
||||
if (unlikely(r = r600_bytecode_add_alu(&bc, &alu)))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = ALU_OP2_SETGT;
|
||||
alu.src[0].sel = sel_main;
|
||||
alu.src[0].chan = desc->swizzle[3];
|
||||
alu.src[0].chan = swizzle3;
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[1].value = 0x3f000000;
|
||||
alu.src[1].value = !local_num_format ?
|
||||
0x3f000000 :
|
||||
0x3f800000;
|
||||
alu.dst.chan = 3;
|
||||
alu.omod = 1;
|
||||
alu.omod = !local_num_format ? 1 : 2;
|
||||
alu.last = 1;
|
||||
|
||||
if (unlikely(r = r600_bytecode_add_alu(&bc, &alu)))
|
||||
|
|
@ -468,13 +491,18 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
|
|||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = ALU_OP2_ADD;
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_PV;
|
||||
alu.src[0].chan = 1;
|
||||
if (!local_num_format) {
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_PV;
|
||||
alu.src[0].chan = 1;
|
||||
} else {
|
||||
alu.src[0].sel = sel_main;
|
||||
alu.src[0].chan = swizzle3;
|
||||
}
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_PV;
|
||||
alu.src[1].chan = 3;
|
||||
alu.src[1].neg = 1;
|
||||
alu.dst.sel = sel_main;
|
||||
alu.dst.chan = desc->swizzle[3];
|
||||
alu.dst.chan = swizzle3;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue