nir: change signature of nir_src_is_divergent()

Now, it takes nir_src * instead of nir_src.
Also move the implementation to nir_divergence_analysis.c.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30787>
This commit is contained in:
Daniel Schürmann 2024-09-10 12:31:27 +02:00 committed by Marge Bot
parent 421b42637d
commit c8348139fd
16 changed files with 44 additions and 42 deletions

View file

@ -432,7 +432,7 @@ move_coords_from_divergent_cf(struct move_tex_coords_state *state, nir_function_
*divergent_discard = true;
break;
case nir_intrinsic_terminate_if:
if (divergent_cf || nir_src_is_divergent(intrin->src[0]))
if (divergent_cf || nir_src_is_divergent(&intrin->src[0]))
*divergent_discard = true;
break;
case nir_intrinsic_ddx:
@ -458,7 +458,7 @@ move_coords_from_divergent_cf(struct move_tex_coords_state *state, nir_function_
nir_if *nif = nir_cf_node_as_if(cf_node);
bool divergent_discard_then = *divergent_discard;
bool divergent_discard_else = *divergent_discard;
bool then_else_divergent = divergent_cf || nir_src_is_divergent(nif->condition);
bool then_else_divergent = divergent_cf || nir_src_is_divergent(&nif->condition);
progress |= move_coords_from_divergent_cf(state, impl, &nif->then_list,
&divergent_discard_then, then_else_divergent);
progress |= move_coords_from_divergent_cf(state, impl, &nif->else_list,

View file

@ -1105,7 +1105,7 @@ emit_bcsel(isel_context* ctx, nir_alu_instr* instr, Temp dst)
assert(els.regClass() == bld.lm);
}
if (!nir_src_is_divergent(instr->src[0].src)) { /* uniform condition and values in sgpr */
if (!nir_src_is_divergent(&instr->src[0].src)) { /* uniform condition and values in sgpr */
if (dst.regClass() == s1 || dst.regClass() == s2) {
assert((then.regClass() == s1 || then.regClass() == s2) &&
els.regClass() == then.regClass());
@ -8585,7 +8585,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
unsigned opsel_hi = 0b11;
Temp tl = src;
if (nir_src_is_divergent(instr->src[0]))
if (nir_src_is_divergent(&instr->src[0]))
tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1);
Builder::Result sub =
@ -8593,7 +8593,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
sub->valu().neg_lo[1] = true;
sub->valu().neg_hi[1] = true;
if (nir_src_is_divergent(instr->src[0]) && dpp_ctrl2 != dpp_quad_perm(0, 1, 2, 3))
if (nir_src_is_divergent(&instr->src[0]) && dpp_ctrl2 != dpp_quad_perm(0, 1, 2, 3))
bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), sub, dpp_ctrl2);
else
bld.copy(Definition(dst), sub);
@ -8603,7 +8603,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
instr->def.bit_size == 16 ? aco_opcode::v_subrev_f16 : aco_opcode::v_subrev_f32;
bool use_interp = dpp_ctrl1 == dpp_quad_perm(0, 0, 0, 0) && instr->def.bit_size == 32 &&
ctx->program->gfx_level >= GFX11_5;
if (!nir_src_is_divergent(instr->src[0])) {
if (!nir_src_is_divergent(&instr->src[0])) {
bld.vop2(subrev, Definition(dst), src, src);
} else if (use_interp && dpp_ctrl2 == dpp_quad_perm(1, 1, 1, 1)) {
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), src,
@ -8685,12 +8685,12 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_read_invocation: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
assert(instr->def.bit_size != 1);
if (!nir_src_is_divergent(instr->src[0])) {
if (!nir_src_is_divergent(&instr->src[0])) {
emit_uniform_subgroup(ctx, instr, src);
} else {
Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
if (instr->intrinsic == nir_intrinsic_read_invocation ||
!nir_src_is_divergent(instr->src[1]))
!nir_src_is_divergent(&instr->src[1]))
tid = bld.as_uniform(tid);
Temp dst = get_ssa_temp(ctx, &instr->def);
@ -8727,7 +8727,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->def);
assert(instr->def.bit_size > 1 && instr->def.bit_size <= 32);
if (!nir_src_is_divergent(instr->src[0])) {
if (!nir_src_is_divergent(&instr->src[0])) {
emit_uniform_subgroup(ctx, instr, src);
break;
}
@ -8906,7 +8906,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
const unsigned bit_size = instr->src[0].ssa->bit_size;
assert(bit_size != 1);
if (!nir_src_is_divergent(instr->src[0])) {
if (!nir_src_is_divergent(&instr->src[0])) {
/* We use divergence analysis to assign the regclass, so check if it's
* working as expected */
ASSERTED bool expected_divergent = instr->intrinsic == nir_intrinsic_exclusive_scan;
@ -9190,7 +9190,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
cond =
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
ctx->cf_info.had_divergent_discard |= nir_src_is_divergent(instr->src[0]);
ctx->cf_info.had_divergent_discard |= nir_src_is_divergent(&instr->src[0]);
}
bld.pseudo(aco_opcode::p_discard_if, cond);
@ -10932,7 +10932,7 @@ visit_if(isel_context* ctx, nir_if* if_stmt)
aco_ptr<Instruction> branch;
if_context ic;
if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */
if (!nir_src_is_divergent(&if_stmt->condition)) { /* uniform condition */
/**
* Uniform conditionals are represented in the following way*) :
*

View file

@ -194,16 +194,16 @@ apply_nuw_to_offsets(isel_context* ctx, nir_function_impl* impl)
case nir_intrinsic_load_constant:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_push_constant:
if (!nir_src_is_divergent(intrin->src[0]))
if (!nir_src_is_divergent(&intrin->src[0]))
apply_nuw_to_ssa(ctx, intrin->src[0].ssa);
break;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
if (!nir_src_is_divergent(intrin->src[1]))
if (!nir_src_is_divergent(&intrin->src[1]))
apply_nuw_to_ssa(ctx, intrin->src[1].ssa);
break;
case nir_intrinsic_store_ssbo:
if (!nir_src_is_divergent(intrin->src[2]))
if (!nir_src_is_divergent(&intrin->src[2]))
apply_nuw_to_ssa(ctx, intrin->src[2].ssa);
break;
case nir_intrinsic_load_scratch: apply_nuw_to_ssa(ctx, intrin->src[0].ssa); break;
@ -682,7 +682,7 @@ init_context(isel_context* ctx, nir_shader* shader)
if (nir_cf_node_prev(&block->cf_node) &&
nir_cf_node_prev(&block->cf_node)->type == nir_cf_node_if) {
nir_if* nif = nir_cf_node_as_if(nir_cf_node_prev(&block->cf_node));
divergent_merge = nir_src_is_divergent(nif->condition);
divergent_merge = nir_src_is_divergent(&nif->condition);
}
/* In case of uniform phis after divergent merges, ensure that the dst is an

View file

@ -2900,7 +2900,7 @@ emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
*/
bool is_uniform_offset =
!vir_in_nonuniform_control_flow(c) &&
!nir_src_is_divergent(instr->src[1]);
!nir_src_is_divergent(&instr->src[1]);
vir_VPM_WRITE_indirect(c, val, offset, is_uniform_offset);
if (vir_in_nonuniform_control_flow(c)) {
@ -2928,7 +2928,7 @@ emit_store_output_vs(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_uniform_ui(c, base));
bool is_uniform_offset =
!vir_in_nonuniform_control_flow(c) &&
!nir_src_is_divergent(instr->src[1]);
!nir_src_is_divergent(&instr->src[1]);
vir_VPM_WRITE_indirect(c, val, offset, is_uniform_offset);
}
}
@ -3154,7 +3154,7 @@ ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
/* We can only use unifa if the offset is uniform */
nir_src offset = is_uniform ? instr->src[0] : instr->src[1];
if (nir_src_is_divergent(offset))
if (nir_src_is_divergent(&offset))
return false;
/* Emitting loads from unifa may not be safe under non-uniform control
@ -4370,7 +4370,7 @@ ntq_emit_if(struct v3d_compile *c, nir_if *nif)
bool was_in_control_flow = c->in_control_flow;
c->in_control_flow = true;
if (!vir_in_nonuniform_control_flow(c) &&
!nir_src_is_divergent(nif->condition)) {
!nir_src_is_divergent(&nif->condition)) {
ntq_emit_uniform_if(c, nif);
} else {
ntq_emit_nonuniform_if(c, nif);

View file

@ -1295,7 +1295,7 @@ v3d_instr_delay_cb(nir_instr *instr, void *data)
case nir_intrinsic_image_load:
return 3;
case nir_intrinsic_load_ubo:
if (nir_src_is_divergent(intr->src[1]))
if (nir_src_is_divergent(&intr->src[1]))
return 3;
FALLTHROUGH;
default:
@ -1386,7 +1386,7 @@ v3d_nir_sort_constant_ubo_load(nir_block *block, nir_intrinsic_instr *ref)
continue;
/* We only produce unifa sequences for non-divergent loads */
if (nir_src_is_divergent(intr->src[1]))
if (nir_src_is_divergent(&intr->src[1]))
continue;
/* If there are any UBO loads that are not constant or that
@ -1453,7 +1453,7 @@ v3d_nir_sort_constant_ubo_load(nir_block *block, nir_intrinsic_instr *ref)
if (tmp_intr->intrinsic != nir_intrinsic_load_ubo)
continue;
if (nir_src_is_divergent(tmp_intr->src[1]))
if (nir_src_is_divergent(&tmp_intr->src[1]))
continue;
/* Stop if we find a unifa UBO load that breaks the

View file

@ -1213,11 +1213,7 @@ nir_src_is_undef(nir_src src)
return src.ssa->parent_instr->type == nir_instr_type_undef;
}
static inline bool
nir_src_is_divergent(nir_src src)
{
return src.ssa->divergent;
}
bool nir_src_is_divergent(nir_src *src);
/* Are all components the same, ie. .xxxx */
static inline bool

View file

@ -69,6 +69,12 @@ struct divergence_state {
static bool
visit_cf_list(struct exec_list *list, struct divergence_state *state);
bool
nir_src_is_divergent(nir_src *src)
{
return src->ssa->divergent;
}
static bool
visit_alu(nir_alu_instr *instr, struct divergence_state *state)
{

View file

@ -403,7 +403,7 @@ isolate_phi_nodes_block(nir_shader *shader, nir_block *block, void *dead_ctx)
entry->dest_is_reg = false;
nir_def_init(&pcopy->instr, &entry->dest.def,
phi->def.num_components, phi->def.bit_size);
entry->dest.def.divergent = nir_src_is_divergent(src->src);
entry->dest.def.divergent = nir_src_is_divergent(&src->src);
/* We're adding a source to a live instruction so we need to use
* nir_instr_init_src()

View file

@ -298,9 +298,9 @@ opt_uniform_atomics(nir_function_impl *impl, bool fs_atomics_predicated)
nir_num_opcodes)
continue;
if (nir_src_is_divergent(intrin->src[offset_src]))
if (nir_src_is_divergent(&intrin->src[offset_src]))
continue;
if (nir_src_is_divergent(intrin->src[offset2_src]))
if (nir_src_is_divergent(&intrin->src[offset2_src]))
continue;
if (is_atomic_already_optimized(b.shader, intrin))

View file

@ -31,12 +31,12 @@ opt_uniform_subgroup_filter(const nir_instr *instr, const void *_state)
case nir_intrinsic_masked_swizzle_amd:
case nir_intrinsic_vote_all:
case nir_intrinsic_vote_any:
return !nir_src_is_divergent(intrin->src[0]);
return !nir_src_is_divergent(&intrin->src[0]);
case nir_intrinsic_reduce:
case nir_intrinsic_exclusive_scan:
case nir_intrinsic_inclusive_scan: {
if (nir_src_is_divergent(intrin->src[0]))
if (nir_src_is_divergent(&intrin->src[0]))
return false;
const nir_op reduction_op = (nir_op) nir_intrinsic_reduction_op(intrin);

View file

@ -4355,7 +4355,7 @@ nif_can_be_predicated(nir_if *nif)
/* For non-divergent branches, predication is more expensive than a branch
* because the latter can potentially skip all instructions.
*/
if (!nir_src_is_divergent(nif->condition))
if (!nir_src_is_divergent(&nif->condition))
return false;
/* Although it could potentially be possible to allow a limited form of

View file

@ -50,7 +50,7 @@ intel_nir_blockify_uniform_loads_instr(nir_builder *b,
if (devinfo->ver < 9)
return false;
if (nir_src_is_divergent(intrin->src[1]))
if (nir_src_is_divergent(&intrin->src[1]))
return false;
if (intrin->def.bit_size != 32)
@ -73,7 +73,7 @@ intel_nir_blockify_uniform_loads_instr(nir_builder *b,
if (devinfo->ver < 11)
return false;
if (nir_src_is_divergent(intrin->src[0]))
if (nir_src_is_divergent(&intrin->src[0]))
return false;
if (intrin->def.bit_size != 32)
@ -91,7 +91,7 @@ intel_nir_blockify_uniform_loads_instr(nir_builder *b,
return true;
case nir_intrinsic_load_global_constant:
if (nir_src_is_divergent(intrin->src[0]))
if (nir_src_is_divergent(&intrin->src[0]))
return false;
if (intrin->def.bit_size != 32)

View file

@ -45,7 +45,7 @@ intel_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b,
return false;
if (nir_src_is_always_uniform(intrin->src[0]) ||
!nir_src_is_divergent(intrin->src[0]))
!nir_src_is_divergent(&intrin->src[0]))
return false;
if (intrin->def.parent_instr->pass_flags != 0)
@ -93,7 +93,7 @@ intel_nir_lower_non_uniform_interpolated_input_instr(nir_builder *b,
return false;
if (nir_src_is_always_uniform(bary->src[0]) ||
!nir_src_is_divergent(bary->src[0]))
!nir_src_is_divergent(&bary->src[0]))
return false;
nir_def *sample_id = bary->src[0].ssa;

View file

@ -44,7 +44,7 @@ lower_ubo_load_instr(nir_builder *b, nir_intrinsic_instr *load,
unsigned byte_size = bit_size / 8;
nir_def *val;
if (!nir_src_is_divergent(load->src[0]) && nir_src_is_const(load->src[1])) {
if (!nir_src_is_divergent(&load->src[0]) && nir_src_is_const(load->src[1])) {
uint32_t offset = nir_src_as_uint(load->src[1]);
/* Things should be component-aligned. */

View file

@ -44,7 +44,7 @@ lower_ubo_load_instr(nir_builder *b, nir_intrinsic_instr *load,
unsigned byte_size = bit_size / 8;
nir_def *val;
if (!nir_src_is_divergent(load->src[0]) && nir_src_is_const(load->src[1])) {
if (!nir_src_is_divergent(&load->src[0]) && nir_src_is_const(load->src[1])) {
uint32_t offset = nir_src_as_uint(load->src[1]);
/* Things should be component-aligned. */

View file

@ -70,7 +70,7 @@ bi_lower_divergent_indirects_impl(nir_builder *b, nir_intrinsic_instr *intr,
return false;
}
if (!nir_src_is_divergent(*offset))
if (!nir_src_is_divergent(offset))
return false;
/* This indirect does need it */