mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-17 11:20:18 +01:00
nir/alu_to_scalar: Use ssa_for_alu_src in hand-rolled expansions
The ssa_for_alu_src helper will correctly handle swizzles and other source modifiers for you. The expansions for unpack_half_2x16, pack_uvec2_to_uint, and pack_uvec4_to_uint were all broken with regards to swizzles. The brokenness of unpack_half_2x16 was causing rendering errors in Rise of the Tomb Raider on Intel ever sincec11833ab24which added an extra copy propagation to the optimization pipeline and caused us to start seeing swizzles where we hadn't seen any before. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107926 Fixes:9ce901058f"nir: Add lowering of nir_op_unpack_half_2x16." Fixes:9b8786eba9"nir: Add lowering support for packing opcodes." Tested-by: Alex Smith <asmith@feralinteractive.com> Tested-by: Józef Kucia <joseph.kucia@gmail.com> Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commitdd553bc67f)
This commit is contained in:
parent
43079480da
commit
2006c70812
1 changed files with 18 additions and 15 deletions
|
|
@ -107,11 +107,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
if (!b->shader->options->lower_pack_half_2x16)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, instr, 0);
|
||||
|
||||
nir_ssa_def *val =
|
||||
nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa,
|
||||
instr->src[0].swizzle[0]),
|
||||
nir_channel(b, instr->src[0].src.ssa,
|
||||
instr->src[0].swizzle[1]));
|
||||
nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
|
||||
nir_channel(b, src_vec2, 1));
|
||||
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
|
||||
nir_instr_remove(&instr->instr);
|
||||
|
|
@ -130,9 +130,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
if (!b->shader->options->lower_unpack_half_2x16)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *packed = nir_ssa_for_alu_src(b, instr, 0);
|
||||
|
||||
nir_ssa_def *comps[2];
|
||||
comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa);
|
||||
comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa);
|
||||
comps[0] = nir_unpack_half_2x16_split_x(b, packed);
|
||||
comps[1] = nir_unpack_half_2x16_split_y(b, packed);
|
||||
nir_ssa_def *vec = nir_vec(b, comps, 2);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
|
||||
|
|
@ -144,8 +146,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
assert(b->shader->options->lower_pack_snorm_2x16 ||
|
||||
b->shader->options->lower_pack_unorm_2x16);
|
||||
|
||||
nir_ssa_def *word =
|
||||
nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *word = nir_extract_u16(b, nir_ssa_for_alu_src(b, instr, 0),
|
||||
nir_imm_int(b, 0));
|
||||
nir_ssa_def *val =
|
||||
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
|
||||
nir_channel(b, word, 0));
|
||||
|
|
@ -159,8 +161,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
assert(b->shader->options->lower_pack_snorm_4x8 ||
|
||||
b->shader->options->lower_pack_unorm_4x8);
|
||||
|
||||
nir_ssa_def *byte =
|
||||
nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *byte = nir_extract_u8(b, nir_ssa_for_alu_src(b, instr, 0),
|
||||
nir_imm_int(b, 0));
|
||||
nir_ssa_def *val =
|
||||
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
|
||||
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
|
||||
|
|
@ -173,14 +175,15 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
|||
}
|
||||
|
||||
case nir_op_fdph: {
|
||||
nir_ssa_def *src0_vec = nir_ssa_for_alu_src(b, instr, 0);
|
||||
nir_ssa_def *src1_vec = nir_ssa_for_alu_src(b, instr, 1);
|
||||
|
||||
nir_ssa_def *sum[4];
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
|
||||
instr->src[0].swizzle[i]),
|
||||
nir_channel(b, instr->src[1].src.ssa,
|
||||
instr->src[1].swizzle[i]));
|
||||
sum[i] = nir_fmul(b, nir_channel(b, src0_vec, i),
|
||||
nir_channel(b, src1_vec, i));
|
||||
}
|
||||
sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
|
||||
sum[3] = nir_channel(b, src1_vec, 3);
|
||||
|
||||
nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
|
||||
nir_fadd(b, sum[2], sum[3]));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue