aco,nir: Add support for GFX12 ds_bvh_stack_push8_pop1_rtn_b32 instruction

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35269>
This commit is contained in:
Natalie Vock 2025-03-27 18:45:45 +01:00 committed by Marge Bot
parent f0aa383e09
commit ea66a8d1c5
4 changed files with 26 additions and 2 deletions

View file

@ -1445,7 +1445,9 @@ get_tied_defs(Instruction* instr)
ops.push_back(2);
} else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32 ||
instr->opcode == aco_opcode::s_cmovk_i32 ||
instr->opcode == aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32) {
instr->opcode == aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32 ||
instr->opcode == aco_opcode::ds_bvh_stack_push8_pop1_rtn_b32 ||
instr->opcode == aco_opcode::ds_bvh_stack_push8_pop2_rtn_b64) {
ops.push_back(0);
} else if (instr->isMUBUF() && instr->definitions.size() == 1 && instr->operands.size() == 4) {
ops.push_back(3);

View file

@ -1650,6 +1650,8 @@ DS = {
("ds_pk_add_bf16", op(gfx12=0x9b)),
("ds_pk_add_rtn_bf16", op(gfx12=0xab)),
("ds_bvh_stack_push4_pop1_rtn_b32", op(gfx11=0xad, gfx12=0xe0)), #ds_bvh_stack_rtn in GFX11
("ds_bvh_stack_push8_pop1_rtn_b32", op(gfx12=0xe1)),
("ds_bvh_stack_push8_pop2_rtn_b64", op(gfx12=0xe2)),
}
for (name, num) in DS:
insn(name, num, Format.DS, InstrClass.DS)

View file

@ -1519,7 +1519,9 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
base.regClass() == instr->operands[i].regClass() &&
instr->opcode != aco_opcode::ds_swizzle_b32 &&
instr->opcode != aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32) {
instr->opcode != aco_opcode::ds_bvh_stack_push4_pop1_rtn_b32 &&
instr->opcode != aco_opcode::ds_bvh_stack_push8_pop1_rtn_b32 &&
instr->opcode != aco_opcode::ds_bvh_stack_push8_pop2_rtn_b64) {
if (instr->opcode == aco_opcode::ds_write2_b32 ||
instr->opcode == aco_opcode::ds_read2_b32 ||
instr->opcode == aco_opcode::ds_write2_b64 ||

View file

@ -4052,6 +4052,23 @@ emit_ds_bvh_stack_push4_pop1_rtn(isel_context* ctx, nir_intrinsic_instr* instr,
Operand(dst_node_pointer));
}
void
emit_ds_bvh_stack_push8_pop1_rtn(isel_context* ctx, nir_intrinsic_instr* instr, Builder& bld)
{
Temp dst = get_ssa_temp(ctx, &instr->def);
Temp stack_addr = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
Temp last_node = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
Temp intersection_result = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
Temp dst_stack_addr = bld.tmp(v1);
Temp dst_node_pointer = bld.tmp(v1);
bld.ds(aco_opcode::ds_bvh_stack_push8_pop1_rtn_b32, Definition(dst_stack_addr),
Definition(dst_node_pointer), Operand(stack_addr), Operand(last_node),
Operand(intersection_result), nir_intrinsic_stack_size(instr), 0);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(dst_stack_addr),
Operand(dst_node_pointer));
}
} // namespace
void
@ -5093,6 +5110,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_bvh_stack_rtn_amd: {
switch (instr->num_components) {
case 4: emit_ds_bvh_stack_push4_pop1_rtn(ctx, instr, bld); break;
case 8: emit_ds_bvh_stack_push8_pop1_rtn(ctx, instr, bld); break;
default: unreachable("Invalid BVH stack component count!");
}
break;