aco: use gds reg when ordered xfb counter add

This is currently only used by radeonsi.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23094>
This commit is contained in:
Qiang Yu 2023-05-16 16:54:59 +08:00 committed by Marge Bot
parent 438dcf6d0f
commit 8d5cc23c18
2 changed files with 12 additions and 3 deletions

View file

@ -9035,14 +9035,22 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
unsigned write_mask = nir_intrinsic_write_mask(instr);
bool use_gds_registers =
ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
for (unsigned i = 0; i < instr->num_components; i++) {
if (write_mask & (1 << i)) {
Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
if (use_gds_registers) {
ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1),
Operand(), chan_counter, i * 4, 0u, true);
} else {
m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1),
gds_base, chan_counter, m, i * 4, 0u, true);
ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1),
gds_base, chan_counter, m, i * 4, 0u, true);
}
ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
vec->operands[i] = Operand(ds_instr->definitions[0].getTemp());

View file

@ -134,6 +134,7 @@ struct aco_compiler_options {
bool optimisations_disabled;
uint8_t enable_mrt_output_nan_fixup;
bool wgp_mode;
bool is_opengl;
enum radeon_family family;
enum amd_gfx_level gfx_level;
uint32_t address32_hi;