mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
aco: Fix 1D->2D dispatch conversion on <gfx9
out_args->scratch_offset and in_wg_id_x will alias on <gfx9.
To avoid the conversion code reading a garbage WG ID, move the
scratch/ring offset writing to the very end.
Fixes: 1e354172 ("radv,aco: Convert 1D ray launches to 2D")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30707>
This commit is contained in:
parent
563ec4754a
commit
bd525f4282
1 changed files with 14 additions and 9 deletions
|
|
@ -12598,6 +12598,11 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
assert(in_stack_base == out_launch_size_z);
|
||||
assert(in_local_ids[0] == out_launch_ids[0]);
|
||||
|
||||
/* <gfx9 reads in_scratch_offset at the end of the prolog to write out the scratch_offset
|
||||
* arg. Make sure no other outputs have overwritten it by then.
|
||||
*/
|
||||
assert(in_scratch_offset.reg() >= out_args->num_sgprs_used);
|
||||
|
||||
/* load raygen sbt */
|
||||
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_raygen_sbt, s2), Operand(in_sbt_desc, s2),
|
||||
Operand::c32(0u));
|
||||
|
|
@ -12649,15 +12654,6 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
bld.vop3(aco_opcode::v_mad_u32_u24, Definition(out_launch_ids[0], v1), Operand(in_wg_id_x, s1),
|
||||
Operand::c32(8), Operand(in_local_ids[0], v1));
|
||||
|
||||
if (options->gfx_level < GFX9) {
|
||||
/* write scratch/ring offsets to outputs, if needed */
|
||||
bld.sop1(aco_opcode::s_mov_b32,
|
||||
Definition(get_arg_reg(out_args, out_args->scratch_offset), s1),
|
||||
Operand(in_scratch_offset, s1));
|
||||
bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2),
|
||||
Operand(tmp_ring_offsets, s2));
|
||||
}
|
||||
|
||||
/* calculate shader record ptr: SBT + RADV_RT_HANDLE_SIZE */
|
||||
if (options->gfx_level < GFX9) {
|
||||
bld.vop2_e64(aco_opcode::v_add_co_u32, Definition(out_record_ptr, v1), Definition(vcc, s2),
|
||||
|
|
@ -12699,6 +12695,15 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
|
||||
Operand(out_launch_ids[1], v1), Operand(vcc, bld.lm));
|
||||
|
||||
if (options->gfx_level < GFX9) {
|
||||
/* write scratch/ring offsets to outputs, if needed */
|
||||
bld.sop1(aco_opcode::s_mov_b32,
|
||||
Definition(get_arg_reg(out_args, out_args->scratch_offset), s1),
|
||||
Operand(in_scratch_offset, s1));
|
||||
bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2),
|
||||
Operand(tmp_ring_offsets, s2));
|
||||
}
|
||||
|
||||
/* jump to raygen */
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand(out_uniform_shader_addr, s2));
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue