mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 05:00:09 +01:00
pan/bi: Lower removed instructions in algebraic on v11+
This lower all instructions that were removed on v11 to equivalents in algebraic and assert in BIR emission to ensure they are never rematerialize. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33608>
This commit is contained in:
parent
be011e8675
commit
d79a31bf81
3 changed files with 77 additions and 5 deletions
|
|
@ -2748,6 +2748,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
if (!(src_sz == 32 && comps == 2))
|
||||
break;
|
||||
|
||||
/* Starting with v11, we don't have V2XXX_TO_V2F16, this should have been
|
||||
* lowered before if there is more than one components */
|
||||
assert(b->shader->arch < 11);
|
||||
|
||||
nir_alu_src *src = &instr->src[0];
|
||||
bi_index idx = bi_src_index(&src->src);
|
||||
bi_index s0 = bi_extract(b, idx, src->swizzle[0]);
|
||||
|
|
@ -3027,6 +3031,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
}
|
||||
|
||||
case nir_op_f2i32:
|
||||
/* v11 removed F16_TO_S32 */
|
||||
assert(src_sz == 32 || (b->shader->arch < 11 && src_sz == 16));
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_f32_to_s32_to(b, dst, s0);
|
||||
else
|
||||
|
|
@ -3035,6 +3042,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
|
||||
/* Note 32-bit sources => no vectorization, so 32-bit works */
|
||||
case nir_op_f2u16:
|
||||
/* v11 removed V2F16_TO_V2U16 */
|
||||
assert(src_sz == 32 || (b->shader->arch < 11 && src_sz == 16));
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_f32_to_u32_to(b, dst, s0);
|
||||
else
|
||||
|
|
@ -3042,6 +3052,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_f2i16:
|
||||
/* v11 removed V2F16_TO_V2S16 */
|
||||
assert(src_sz == 32 || (b->shader->arch < 11 && src_sz == 16));
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_f32_to_s32_to(b, dst, s0);
|
||||
else
|
||||
|
|
@ -3049,6 +3062,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_f2u32:
|
||||
/* v11 removed F16_TO_U32 */
|
||||
assert(src_sz == 32 || (b->shader->arch < 11 && src_sz == 16));
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_f32_to_u32_to(b, dst, s0);
|
||||
else
|
||||
|
|
@ -3056,6 +3072,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_u2f16:
|
||||
/* Starting with v11, we don't have V2XXX_TO_V2F16, this should have been
|
||||
* lowered before by algebraic. */
|
||||
assert(b->shader->arch < 11);
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false));
|
||||
else if (src_sz == 16)
|
||||
|
|
@ -3065,6 +3085,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_u2f32:
|
||||
/* v11 removed U16_TO_F32 and U8_TO_F32 */
|
||||
assert(src_sz == 32 || (b->shader->arch < 11 && (src_sz == 16 || src_sz == 8)));
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_u32_to_f32_to(b, dst, s0);
|
||||
else if (src_sz == 16)
|
||||
|
|
@ -3074,6 +3097,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_i2f16:
|
||||
/* Starting with v11, we don't have V2XXX_TO_V2F16, this should have been
|
||||
* lowered before by algebraic. */
|
||||
assert(b->shader->arch < 11);
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false));
|
||||
else if (src_sz == 16)
|
||||
|
|
@ -3083,7 +3110,8 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_i2f32:
|
||||
assert(src_sz == 32 || src_sz == 16 || src_sz == 8);
|
||||
/* v11 removed S16_TO_F32 and S8_TO_F32 */
|
||||
assert(src_sz == 32 || (b->shader->arch < 11 && (src_sz == 16 || src_sz == 8)));
|
||||
|
||||
if (src_sz == 32)
|
||||
bi_s32_to_f32_to(b, dst, s0);
|
||||
|
|
@ -4883,6 +4911,8 @@ bi_vectorize_filter(const nir_instr *instr, const void *data)
|
|||
case nir_op_f2f16:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_f2f16_rtne:
|
||||
case nir_op_u2f16:
|
||||
case nir_op_i2f16:
|
||||
if (pan_arch(gpu_id) >= 11)
|
||||
return 1;
|
||||
|
||||
|
|
@ -5116,7 +5146,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
|
|||
|
||||
/* Prepass to simplify instruction selection */
|
||||
late_algebraic = false;
|
||||
NIR_PASS(late_algebraic, nir, bifrost_nir_lower_algebraic_late);
|
||||
NIR_PASS(late_algebraic, nir, bifrost_nir_lower_algebraic_late, pan_arch(gpu_id));
|
||||
|
||||
while (late_algebraic) {
|
||||
late_algebraic = false;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
bool bifrost_nir_lower_algebraic_late(nir_shader *shader);
|
||||
bool bifrost_nir_lower_algebraic_late(nir_shader *shader, unsigned gpu_arch);
|
||||
bool bifrost_nir_lower_xfb(nir_shader *shader);
|
||||
bool bifrost_nir_opt_boolean_bitwise(nir_shader *shader);
|
||||
bool bifrost_nir_lower_load_output(nir_shader *nir);
|
||||
|
|
|
|||
|
|
@ -75,8 +75,48 @@ algebraic_late = [
|
|||
# XXX: Duplicate of nir_lower_pack
|
||||
(('unpack_64_2x32', a), ('vec2', ('unpack_64_2x32_split_x', a),
|
||||
('unpack_64_2x32_split_y', a))),
|
||||
|
||||
# On v11+, all non integer variant to convert to F32 are gone except for S32_TO_F32.
|
||||
(('i2f32', 'a@8'), ('i2f32', ('i2i32', a)), 'gpu_arch >= 11'),
|
||||
(('i2f32', 'a@16'), ('i2f32', ('i2i32', a)), 'gpu_arch >= 11'),
|
||||
(('u2f32', 'a@8'), ('u2f32', ('u2u32', a)), 'gpu_arch >= 11'),
|
||||
(('u2f32', 'a@16'), ('u2f32', ('u2u32', a)), 'gpu_arch >= 11'),
|
||||
|
||||
# On v11+, all non integer variant to convert to F16 are gone except for S32_TO_F32.
|
||||
(('i2f16', 'a'), ('f2f16', ('i2f32', ('i2i32', a))), 'gpu_arch >= 11'),
|
||||
(('u2f16', 'a'), ('f2f16', ('u2f32', ('u2u32', a))), 'gpu_arch >= 11'),
|
||||
|
||||
# On v11+, V2F16_TO_V2S16 / V2F16_TO_V2U16 are gone
|
||||
(('f2i16', 'a@16'), ('f2i16', ('f2f32', a)), 'gpu_arch >= 11'),
|
||||
(('f2u16', 'a@16'), ('f2u16', ('f2f32', a)), 'gpu_arch >= 11'),
|
||||
|
||||
# On v11+, F16_TO_S32/F16_TO_U32 is gone but we still have F32_TO_S32/F32_TO_U32
|
||||
(('f2i32', 'a@16'), ('f2i32', ('f2f32', a)), 'gpu_arch >= 11'),
|
||||
(('f2u32', 'a@16'), ('f2u32', ('f2f32', a)), 'gpu_arch >= 11'),
|
||||
|
||||
# On v11+, IABS.v4s8 is gone
|
||||
(('iabs', 'a@8'), ('i2i8', ('iabs', ('i2i16', a))), 'gpu_arch >= 11'),
|
||||
|
||||
# On v11+, ISUB.v4s8 is gone
|
||||
(('ineg', 'a@8'), ('i2i8', ('ineg', ('i2i16', a))), 'gpu_arch >= 11'),
|
||||
(('isub', 'a@8', 'b@8'), ('i2i8', ('isub', ('i2i16', a), ('i2i16', b))), 'gpu_arch >= 11'),
|
||||
(('isub_sat', 'a@8', 'b@8'), ('i2i8', ('isub_sat', ('i2i16', a), ('i2i16', b))), 'gpu_arch >= 11'),
|
||||
(('usub_sat', 'a@8', 'b@8'), ('u2u8', ('usub_sat', ('u2u16', a), ('u2u16', b))), 'gpu_arch >= 11'),
|
||||
]
|
||||
|
||||
# On v11+, ICMP_OR.v4u8 was removed
|
||||
for cond in ['ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
|
||||
convert_8bit = 'u2u8'
|
||||
convert_16bit = 'u2u16'
|
||||
|
||||
if cond[0] == 'i':
|
||||
convert_8bit = 'i2i8'
|
||||
convert_16bit = 'i2i16'
|
||||
|
||||
algebraic_late += [
|
||||
((f'{cond}8', a, b), (convert_8bit, (f'{cond}16', (convert_16bit, a), (convert_16bit, b))), 'gpu_arch >= 11'),
|
||||
]
|
||||
|
||||
# Handling all combinations of boolean and float sizes for b2f is nontrivial.
|
||||
# bcsel has the same problem in more generality; lower b2f to bcsel in NIR to
|
||||
# reuse the efficient implementations of bcsel. This includes special handling
|
||||
|
|
@ -108,8 +148,10 @@ def run():
|
|||
print(nir_algebraic.AlgebraicPass("bifrost_nir_opt_boolean_bitwise",
|
||||
opt_bool_bitwise).render())
|
||||
print(nir_algebraic.AlgebraicPass("bifrost_nir_lower_algebraic_late",
|
||||
algebraic_late).render())
|
||||
|
||||
algebraic_late,
|
||||
[
|
||||
("unsigned ", "gpu_arch")
|
||||
]).render())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue