From daaa8ddb8ee1fd2a6ed70a98f8bfc04bcd31179d Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Thu, 11 Aug 2022 10:19:47 +0800 Subject: [PATCH] ac/llvm,radeonsi: lower nir primitive counter add intrinsics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 15 ---------- src/amd/llvm/ac_shader_abi.h | 9 ------ .../drivers/radeonsi/gfx10_shader_ngg.c | 30 ------------------- .../drivers/radeonsi/si_nir_lower_abi.c | 20 +++++++++++++ .../drivers/radeonsi/si_shader_internal.h | 2 -- src/gallium/drivers/radeonsi/si_shader_llvm.c | 1 - 6 files changed, 20 insertions(+), 57 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index c092aa1f523..3f8aec622ca 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4465,21 +4465,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins result = ac_build_gather_values(&ctx->ac, global_count, instr->num_components); break; } - case nir_intrinsic_atomic_add_gs_emit_prim_count_amd: - ctx->abi->atomic_add_prim_count(ctx->abi, ~0U, get_src(ctx, instr->src[0]), - ac_prim_count_gs_emit); - break; - case nir_intrinsic_atomic_add_gen_prim_count_amd: - case nir_intrinsic_atomic_add_xfb_prim_count_amd: { - LLVMValueRef prim_count = get_src(ctx, instr->src[0]); - unsigned stream = nir_intrinsic_stream_id(instr); - enum ac_prim_count count_type = - instr->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ? - ac_prim_count_gen : ac_prim_count_xfb; - - ctx->abi->atomic_add_prim_count(ctx->abi, stream, prim_count, count_type); - break; - } default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 36519b995f0..83aadfba544 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -34,12 +34,6 @@ #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) -enum ac_prim_count { - ac_prim_count_gs_emit, - ac_prim_count_gen, - ac_prim_count_xfb, -}; - /* Document the shader ABI during compilation. This is what allows radeonsi and * radv to share a compiler backend. */ @@ -73,9 +67,6 @@ struct ac_shader_abi { void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx, LLVMValueRef *addrs); - void (*atomic_add_prim_count)(struct ac_shader_abi *abi, unsigned stream, - LLVMValueRef prim_count, enum ac_prim_count count_type); - LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, unsigned driver_location, unsigned component, unsigned num_components, unsigned vertex_index, diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index d38c9453f7c..17d0bea7441 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -124,36 +124,6 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi) si_llvm_build_vs_exports(ctx, outputs, num_outputs); } -void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream, - LLVMValueRef prim_count, enum ac_prim_count count_type) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - unsigned offset; - LLVMValueRef query_buf; - if (count_type == ac_prim_count_gs_emit) { - offset = si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4; - query_buf = ngg_get_emulated_counters_buf(ctx); - } else { - offset = count_type == ac_prim_count_gen ? - offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) : - offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives); - - query_buf = ngg_get_query_buf(ctx); - } - - LLVMValueRef args[] = { - prim_count, - query_buf, - LLVMConstInt(ctx->ac.i32, offset, false), - ctx->ac.i32_0, /* soffset */ - ctx->ac.i32_0, /* cachepolicy */ - }; - - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", - ctx->ac.i32, args, 5, 0); -} - void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx) { LLVMBuilderRef builder = ctx->ac.builder; diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 4725c3a015e..4576317879f 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -233,6 +233,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s replacement = load_internal_binding(b, args, slot); break; } + case nir_intrinsic_atomic_add_gs_emit_prim_count_amd: + case nir_intrinsic_atomic_add_gen_prim_count_amd: + case nir_intrinsic_atomic_add_xfb_prim_count_amd: { + unsigned offset; + nir_ssa_def *buf; + if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) { + buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF); + offset = si_query_pipestat_end_dw_offset(sel->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4; + } else { + unsigned stream = nir_intrinsic_stream_id(intrin); + buf = load_internal_binding(b, args, SI_GS_QUERY_BUF); + offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ? + offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) : + offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives); + } + + nir_ssa_def *prim_count = intrin->src[0].ssa; + nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset); + break; + } default: return false; } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 494937e1cde..93e3925ce25 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -183,8 +183,6 @@ LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx); unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader); bool gfx10_ngg_export_prim_early(struct si_shader *shader); void gfx10_ngg_export_vertex(struct ac_shader_abi *abi); -void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream, - LLVMValueRef prim_count, enum ac_prim_count count_type); void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx); unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader); bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index c1d91d4198a..fc9ea32c161 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -778,7 +778,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad ctx->abi.intrinsic_load = si_llvm_load_intrinsic; ctx->abi.export_vertex = gfx10_ngg_export_vertex; - ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count; si_llvm_init_resource_callbacks(ctx); si_llvm_create_main_func(ctx);