From ea66a8d1c5727a68972e10911bbeccfbd3258f33 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Thu, 27 Mar 2025 18:45:45 +0100 Subject: [PATCH] aco,nir: Add support for GFX12 ds_bvh_stack_push8_pop1_rtn_b32 instruction Part-of: --- src/amd/compiler/aco_ir.cpp | 4 +++- src/amd/compiler/aco_opcodes.py | 2 ++ src/amd/compiler/aco_optimizer.cpp | 4 +++- .../aco_select_nir_intrinsics.cpp | 18 ++++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index f706cd8822d..19c49610c60 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1445,7 +1445,9 @@ get_tied_defs(Instruction* instr) ops.push_back(2); } else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32 || instr->opcode == aco_opcode::s_cmovk_i32 || - instr->opcode == aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32) { + instr->opcode == aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32 || + instr->opcode == aco_opcode::ds_bvh_stack_push8_pop1_rtn_b32 || + instr->opcode == aco_opcode::ds_bvh_stack_push8_pop2_rtn_b64) { ops.push_back(0); } else if (instr->isMUBUF() && instr->definitions.size() == 1 && instr->operands.size() == 4) { ops.push_back(3); diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 765c1e223a5..25b3e7aede8 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -1650,6 +1650,8 @@ DS = { ("ds_pk_add_bf16", op(gfx12=0x9b)), ("ds_pk_add_rtn_bf16", op(gfx12=0xab)), ("ds_bvh_stack_push4_pop1_rtn_b32", op(gfx11=0xad, gfx12=0xe0)), #ds_bvh_stack_rtn in GFX11 + ("ds_bvh_stack_push8_pop1_rtn_b32", op(gfx12=0xe1)), + ("ds_bvh_stack_push8_pop2_rtn_b64", op(gfx12=0xe2)), } for (name, num) in DS: insn(name, num, Format.DS, InstrClass.DS) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 48a5740754a..ec706d19ad8 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1519,7 +1519,9 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) parse_base_offset(ctx, instr.get(), i, &base, &offset, false) && base.regClass() == instr->operands[i].regClass() && instr->opcode != aco_opcode::ds_swizzle_b32 && - instr->opcode != aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32) { + instr->opcode != aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32 && + instr->opcode != aco_opcode::ds_bvh_stack_push8_pop1_rtn_b32 && + instr->opcode != aco_opcode::ds_bvh_stack_push8_pop2_rtn_b64) { if (instr->opcode == aco_opcode::ds_write2_b32 || instr->opcode == aco_opcode::ds_read2_b32 || instr->opcode == aco_opcode::ds_write2_b64 || diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 6e65c81b023..45006d0619f 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -4052,6 +4052,23 @@ emit_ds_bvh_stack_push4_pop1_rtn(isel_context* ctx, nir_intrinsic_instr* instr, Operand(dst_node_pointer)); } +void +emit_ds_bvh_stack_push8_pop1_rtn(isel_context* ctx, nir_intrinsic_instr* instr, Builder& bld) +{ + Temp dst = get_ssa_temp(ctx, &instr->def); + Temp stack_addr = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)); + Temp last_node = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)); + Temp intersection_result = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)); + + Temp dst_stack_addr = bld.tmp(v1); + Temp dst_node_pointer = bld.tmp(v1); + bld.ds(aco_opcode::ds_bvh_stack_push8_pop1_rtn_b32, Definition(dst_stack_addr), + Definition(dst_node_pointer), Operand(stack_addr), Operand(last_node), + Operand(intersection_result), nir_intrinsic_stack_size(instr), 0); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(dst_stack_addr), + Operand(dst_node_pointer)); +} + } // namespace void @@ -5093,6 +5110,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_bvh_stack_rtn_amd: { switch (instr->num_components) { case 4: emit_ds_bvh_stack_push4_pop1_rtn(ctx, instr, bld); break; + case 8: emit_ds_bvh_stack_push8_pop1_rtn(ctx, instr, bld); break; default: unreachable("Invalid BVH stack component count!"); } break;