nir,radv,radeonsi: add nir_atomic_add_gs_invocation_count_amd

For shader query emulation.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20156>
This commit is contained in:
Qiang Yu 2022-11-30 18:12:39 +08:00 committed by Marge Bot
parent 17c18a492a
commit fd240f759f
3 changed files with 25 additions and 12 deletions

View file

@ -370,6 +370,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_gs_invocation_count_amd:
/* TODO: add gs invocation query emulation. */
break;
case nir_intrinsic_load_streamout_config_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_config);

View file

@ -1538,6 +1538,10 @@ intrinsic("atomic_add_gs_emit_prim_count_amd", [1])
intrinsic("atomic_add_gen_prim_count_amd", [1], indices=[STREAM_ID])
intrinsic("atomic_add_xfb_prim_count_amd", [1], indices=[STREAM_ID])
# Atomically add current wave's invocation count to query result
# src[] = { invocation_count }.
intrinsic("atomic_add_gs_invocation_count_amd", [1])
# LDS offset for scratch section in NGG shader
system_value("lds_ngg_scratch_base_amd", 1)
# LDS offset for NGG GS shader vertex emit

View file

@ -262,20 +262,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
break;
}
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
case nir_intrinsic_atomic_add_gs_invocation_count_amd: {
nir_ssa_def *buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF);
enum pipe_statistics_query_index index =
intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ?
PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS;
unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4;
nir_ssa_def *count = intrin->src[0].ssa;
nir_buffer_atomic_add_amd(b, 32, buf, count, .base = offset);
break;
}
case nir_intrinsic_atomic_add_gen_prim_count_amd:
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
unsigned offset;
nir_ssa_def *buf;
if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) {
buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF);
offset = si_query_pipestat_end_dw_offset(sel->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
} else {
unsigned stream = nir_intrinsic_stream_id(intrin);
buf = load_internal_binding(b, args, SI_GS_QUERY_BUF);
offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
}
nir_ssa_def *buf = load_internal_binding(b, args, SI_GS_QUERY_BUF);
unsigned stream = nir_intrinsic_stream_id(intrin);
unsigned offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
nir_ssa_def *prim_count = intrin->src[0].ssa;
nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);