diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 5814175202c..aa71b1a00d2 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -401,6 +401,9 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state) nir_imul_imm(b, load_subgroup_id_lowered(s, b), s->wave_size)); } break; + case nir_intrinsic_load_subgroup_invocation: + replacement = nir_mbcnt_amd(b, nir_imm_intN_t(b, ~0ull, s->wave_size), nir_imm_int(b, 0)); + break; default: return false; } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 56d55e16a00..e4a18d24227 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8139,10 +8139,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) break; } - case nir_intrinsic_load_subgroup_invocation: { - emit_mbcnt(ctx, get_ssa_temp(ctx, &instr->def)); - break; - } case nir_intrinsic_ballot_relaxed: case nir_intrinsic_ballot: { Temp src = get_ssa_temp(ctx, instr->src[0].ssa); diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 14eb1448ac4..9ded11da086 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -550,7 +550,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: case nir_intrinsic_load_interpolated_input: - case nir_intrinsic_load_subgroup_invocation: case nir_intrinsic_load_tess_coord: case nir_intrinsic_write_invocation_amd: case nir_intrinsic_mbcnt_amd: diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index f8b8361e813..c6514a01fee 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -1671,9 +1671,9 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa) //>> BB6 //! /* logical preds: BB5, / linear preds: BB5, / kind: uniform, */ //>> s1: %sgpr0 = p_parallelcopy 42 - //>> v1: %vgpr0 = v_mbcnt_hi_u32_b32_e64 -1, %_ + //>> v1: %vgpr0 = v_mbcnt_hi_u32_b32_e64 %_, %_ val_sgpr = nir_imm_int(nb, 42); - val_vgpr = nir_load_subgroup_invocation(nb); + val_vgpr = nir_mbcnt_amd(nb, nir_imm_intN_t(nb, UINT64_MAX, 64), nir_imm_int(nb, 0)); //>> BB7 //! /* logical preds: BB6, / linear preds: BB6, / kind: uniform, break, */ diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 570025121b9..e58183a1609 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2774,9 +2774,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_as_uniform: result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL); break; - case nir_intrinsic_load_subgroup_invocation: - result = ac_get_thread_id(&ctx->ac); - break; case nir_intrinsic_load_workgroup_id: { LLVMValueRef values[3] = {ctx->ac.i32_0, ctx->ac.i32_0, ctx->ac.i32_0};