mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-27 12:00:22 +01:00
ac/nir/lower_ngg: use voffset in global_atomic_add for xfb
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510>
This commit is contained in:
parent
b002564633
commit
1ea96a47cd
1 changed files with 10 additions and 15 deletions
|
|
@ -1881,7 +1881,9 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
workgroup_buffer_sizes[buffer] = undef;
|
||||
}
|
||||
|
||||
nir_def *buffer_offsets = NULL, *xfb_state_address = NULL;
|
||||
nir_def *buffer_offsets = NULL;
|
||||
nir_def *xfb_state_address = nir_load_xfb_state_address_gfx12_amd(b);
|
||||
nir_def *xfb_voffset = nir_imul_imm(b, tid_in_tg, 8);
|
||||
|
||||
/* Get current global offset of buffer and increase by amount of
|
||||
* workgroup buffer size. This is an ordered operation sorted by
|
||||
|
|
@ -1929,9 +1931,6 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
* - The whole structure should be entirely within one 64B block of memory
|
||||
* for performance. (the address bits above 64B should not differ between lanes)
|
||||
*/
|
||||
nir_def *voffset = nir_imul_imm(b, tid_in_tg, 8);
|
||||
xfb_state_address = nir_iadd(b, nir_load_xfb_state_address_gfx12_amd(b),
|
||||
nir_u2u64(b, voffset));
|
||||
nir_def *buffer_offset_per_lane;
|
||||
|
||||
/* The gfx12 intrinsic inserts hand-written assembly producing better code than current
|
||||
|
|
@ -1939,8 +1938,8 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
*/
|
||||
if (use_gfx12_xfb_intrinsic) {
|
||||
buffer_offset_per_lane =
|
||||
nir_ordered_xfb_counter_add_gfx12_amd(b, nir_load_xfb_state_address_gfx12_amd(b),
|
||||
voffset, ordered_id, atomic_src);
|
||||
nir_ordered_xfb_counter_add_gfx12_amd(b, xfb_state_address, xfb_voffset, ordered_id,
|
||||
atomic_src);
|
||||
} else {
|
||||
/* The NIR version of the above using nir_atomic_op_ordered_add_gfx12_amd. */
|
||||
enum { NUM_ATOMICS_IN_FLIGHT = 6 };
|
||||
|
|
@ -1962,7 +1961,7 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
*/
|
||||
for (unsigned i = 0; i < NUM_ATOMICS_IN_FLIGHT - 1; i++) {
|
||||
nir_store_var(b, result_ring[i],
|
||||
nir_global_atomic_amd(b, 64, xfb_state_address, atomic_src, nir_imm_int(b, 0),
|
||||
nir_global_atomic_amd(b, 64, xfb_state_address, atomic_src, xfb_voffset,
|
||||
.atomic_op = nir_atomic_op_ordered_add_gfx12_amd), 0x1);
|
||||
}
|
||||
|
||||
|
|
@ -1977,7 +1976,7 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
|
||||
/* Issue (or repeat) the atomic. */
|
||||
nir_store_var(b, result_ring[issue_index],
|
||||
nir_global_atomic_amd(b, 64, xfb_state_address, atomic_src, nir_imm_int(b, 0),
|
||||
nir_global_atomic_amd(b, 64, xfb_state_address, atomic_src, xfb_voffset,
|
||||
.atomic_op = nir_atomic_op_ordered_add_gfx12_amd), 0x1);
|
||||
|
||||
/* Break if the oldest atomic succeeded in incrementing the offsets. */
|
||||
|
|
@ -1987,7 +1986,7 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
|
||||
/* Debug: Write the vec4 into a shader log ring buffer. */
|
||||
#if 0
|
||||
ac_nir_store_debug_log_amd(b, nir_vec4(b, nir_u2u32(b, nir_load_xfb_state_address_gfx12_amd(b)),
|
||||
ac_nir_store_debug_log_amd(b, nir_vec4(b, nir_u2u32(b, xfb_state_address),
|
||||
ordered_id, loaded_ordered_id,
|
||||
loaded_dwords_written));
|
||||
#endif
|
||||
|
|
@ -2027,8 +2026,6 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
buffer_offsets = nir_vec(b, offset, 4);
|
||||
}
|
||||
nir_pop_if(b, if_4lanes);
|
||||
|
||||
xfb_state_address = nir_if_phi(b, xfb_state_address, nir_undef(b, 1, 64));
|
||||
buffer_offsets = nir_if_phi(b, buffer_offsets, nir_undef(b, 4, 32));
|
||||
|
||||
if_invocation_0 = nir_push_if(b, nir_ieq_imm(b, tid_in_tg, 0));
|
||||
|
|
@ -2101,10 +2098,8 @@ ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
nir_def *overflow_amount_per_lane =
|
||||
write_values_to_lanes(b, overflow_amount, info->buffers_written);
|
||||
|
||||
nir_def *address_per_lane = nir_iadd(b, xfb_state_address,
|
||||
nir_imm_intN_t(b, 4, 64));
|
||||
nir_global_atomic_amd(b, 32, address_per_lane, nir_ineg(b, overflow_amount_per_lane),
|
||||
nir_imm_int(b, 0), .atomic_op = nir_atomic_op_iadd);
|
||||
nir_global_atomic_amd(b, 32, xfb_state_address, nir_ineg(b, overflow_amount_per_lane),
|
||||
xfb_voffset, .base = 4, .atomic_op = nir_atomic_op_iadd);
|
||||
}
|
||||
nir_pop_if(b, if_any_overflow_4_lanes);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue