nir: optimize open-coded quadVote* directly to new nir_quad intrinsics

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>
This commit is contained in:
Daniel Schürmann 2023-11-01 16:37:46 +01:00 committed by Marge Bot
parent 0d186d356c
commit 88afbbba11
3 changed files with 8 additions and 11 deletions

View file

@ -7888,7 +7888,7 @@ emit_uniform_reduce(isel_context* ctx, nir_intrinsic_instr* instr)
Temp thread_count =
bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), Operand(exec, bld.lm));
set_wqm(ctx, nir_intrinsic_include_helpers(instr));
set_wqm(ctx);
emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count);
} else {
@ -8606,8 +8606,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
instr->intrinsic == nir_intrinsic_reduce ? nir_intrinsic_cluster_size(instr) : 0;
cluster_size = util_next_power_of_two(
MIN2(cluster_size ? cluster_size : ctx->program->wave_size, ctx->program->wave_size));
bool create_helpers =
instr->intrinsic == nir_intrinsic_reduce && nir_intrinsic_include_helpers(instr);
if (!nir_src_is_divergent(instr->src[0]) && cluster_size == ctx->program->wave_size &&
instr->def.bit_size != 1) {
@ -8667,7 +8665,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
else
emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size, Definition(dst), src);
}
set_wqm(ctx, create_helpers);
set_wqm(ctx);
break;
}
case nir_intrinsic_quad_broadcast:

View file

@ -172,9 +172,6 @@ index("unsigned", "reduction_op")
# Cluster size for reduction operations
index("unsigned", "cluster_size")
# Requires that the operation creates and includes helper invocations
index("bool", "include_helpers")
# Parameter index for a load_param intrinsic
index("unsigned", "param_idx")
@ -510,7 +507,7 @@ intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0,
indices=[EXECUTION_SCOPE, CLUSTER_SIZE], flags=[CAN_ELIMINATE]);
intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[REDUCTION_OP, CLUSTER_SIZE, INCLUDE_HELPERS], flags=[CAN_ELIMINATE])
indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE])
intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,

View file

@ -206,9 +206,11 @@ try_opt_quad_vote(nir_builder *b, nir_alu_instr *alu, bool block_has_discard)
if (lanes_read != 0xffff)
return NULL;
/* Create reduction. */
return nir_reduce(b, quad_broadcasts[0]->src[0].ssa, .reduction_op = alu->op, .cluster_size = 4,
.include_helpers = true);
/* Create quad vote. */
if (alu->op == nir_op_iand)
return nir_quad_vote_all(b, 1, quad_broadcasts[0]->src[0].ssa);
else
return nir_quad_vote_any(b, 1, quad_broadcasts[0]->src[0].ssa);
}
static bool