aco,nir: Add support for new GFX12 ray tracing instructions

Adds image_bvh_dual_intersect_ray and image_bvh8_intersect_ray which can
handle the new BVH format. Both instructions write up to 10 VGPRs so
they need to use a vec16 definition in nir.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34273>
This commit is contained in:
Konstantin Seurer 2025-03-12 22:43:57 +01:00 committed by Marge Bot
parent ee0f784858
commit 978e9b670e
12 changed files with 80 additions and 6 deletions

View file

@ -838,7 +838,7 @@ emit_mimg_instruction_gfx12(asm_context& ctx, std::vector<uint32_t>& out, const
encoding = 0; encoding = 0;
if (!instr->definitions.empty()) if (!instr->definitions.empty())
encoding |= reg(ctx, instr->definitions[0], 8); /* VDATA */ encoding |= reg(ctx, instr->definitions.back(), 8); /* VDATA */
else if (!instr->operands[2].isUndefined()) else if (!instr->operands[2].isUndefined())
encoding |= reg(ctx, instr->operands[2], 8); /* VDATA */ encoding |= reg(ctx, instr->operands[2], 8); /* VDATA */
encoding |= reg(ctx, instr->operands[0]) << 9; /* T# (resource) */ encoding |= reg(ctx, instr->operands[0]) << 9; /* T# (resource) */

View file

@ -53,7 +53,9 @@ get_type(Program* program, aco_ptr<Instruction>& instr)
if (instr->isMIMG()) { if (instr->isMIMG()) {
switch (instr->opcode) { switch (instr->opcode) {
case aco_opcode::image_bvh_intersect_ray: case aco_opcode::image_bvh_intersect_ray:
case aco_opcode::image_bvh64_intersect_ray: return clause_bvh; case aco_opcode::image_bvh64_intersect_ray:
case aco_opcode::image_bvh_dual_intersect_ray:
case aco_opcode::image_bvh8_intersect_ray: return clause_bvh;
case aco_opcode::image_atomic_swap: case aco_opcode::image_atomic_swap:
case aco_opcode::image_atomic_cmpswap: case aco_opcode::image_atomic_cmpswap:
case aco_opcode::image_atomic_add: case aco_opcode::image_atomic_add:

View file

@ -650,8 +650,8 @@ gen(Instruction* instr, wait_ctx& ctx)
update_counters(ctx, ev, get_sync_info(instr)); update_counters(ctx, ev, get_sync_info(instr));
if (!instr->definitions.empty()) for (auto& definition : instr->definitions)
insert_wait_entry(ctx, instr->definitions[0], ev, type); insert_wait_entry(ctx, definition, ev, type);
if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) { if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) {
update_counters(ctx, event_vmem_gpr_lock); update_counters(ctx, event_vmem_gpr_lock);

View file

@ -5962,6 +5962,38 @@ visit_bvh64_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
emit_split_vector(ctx, dst, instr->def.num_components); emit_split_vector(ctx, dst, instr->def.num_components);
} }
void
visit_bvh8_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
{
Builder bld(ctx->program, ctx->block);
Temp dst = get_ssa_temp(ctx, &instr->def);
Temp resource = get_ssa_temp(ctx, instr->src[0].ssa);
Temp bvh_base = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
Temp cull_mask = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
Temp tmax = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
Temp origin = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[4].ssa));
Temp dir = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[5].ssa));
Temp node_id = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[6].ssa));
Temp result = bld.tmp(v10);
Temp new_origin = bld.tmp(v3);
Temp new_dir = bld.tmp(v3);
std::vector<Temp> args = {bvh_base,
bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), tmax, cull_mask),
origin, dir, node_id};
MIMG_instruction* mimg = emit_mimg(bld, aco_opcode::image_bvh8_intersect_ray,
{new_origin, new_dir, result}, resource, Operand(s4), args);
mimg->dim = ac_image_1d;
mimg->dmask = 0xf;
mimg->unrm = true;
mimg->r128 = true;
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(result), Operand(new_origin),
Operand(new_dir));
}
static std::vector<Temp> static std::vector<Temp>
get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr) get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
{ {
@ -8787,6 +8819,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
break; break;
} }
case nir_intrinsic_bvh64_intersect_ray_amd: visit_bvh64_intersect_ray_amd(ctx, instr); break; case nir_intrinsic_bvh64_intersect_ray_amd: visit_bvh64_intersect_ray_amd(ctx, instr); break;
case nir_intrinsic_bvh8_intersect_ray_amd: visit_bvh8_intersect_ray_amd(ctx, instr); break;
case nir_intrinsic_load_resume_shader_address_amd: { case nir_intrinsic_load_resume_shader_address_amd: {
bld.pseudo(aco_opcode::p_resume_shader_address, Definition(get_ssa_temp(ctx, &instr->def)), bld.pseudo(aco_opcode::p_resume_shader_address, Definition(get_ssa_temp(ctx, &instr->def)),
bld.def(s1, scc), Operand::c32(nir_intrinsic_call_idx(instr))); bld.def(s1, scc), Operand::c32(nir_intrinsic_call_idx(instr)));

View file

@ -562,6 +562,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_intrinsic_load_initial_edgeflags_amd: case nir_intrinsic_load_initial_edgeflags_amd:
case nir_intrinsic_gds_atomic_add_amd: case nir_intrinsic_gds_atomic_add_amd:
case nir_intrinsic_bvh64_intersect_ray_amd: case nir_intrinsic_bvh64_intersect_ray_amd:
case nir_intrinsic_bvh8_intersect_ray_amd:
case nir_intrinsic_load_vector_arg_amd: case nir_intrinsic_load_vector_arg_amd:
case nir_intrinsic_ordered_xfb_counter_add_gfx11_amd: case nir_intrinsic_ordered_xfb_counter_add_gfx11_amd:
case nir_intrinsic_cmat_muladd_amd: case nir_intrinsic_cmat_muladd_amd:

View file

@ -1414,7 +1414,6 @@ aco::small_vec<uint32_t, 2>
get_ops_fixed_to_def(Instruction* instr) get_ops_fixed_to_def(Instruction* instr)
{ {
aco::small_vec<uint32_t, 2> ops; aco::small_vec<uint32_t, 2> ops;
if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 || if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 ||
instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
instr->opcode == aco_opcode::v_fmac_f16 || instr->opcode == aco_opcode::v_mac_legacy_f32 || instr->opcode == aco_opcode::v_fmac_f16 || instr->opcode == aco_opcode::v_mac_legacy_f32 ||
@ -1432,6 +1431,10 @@ get_ops_fixed_to_def(Instruction* instr)
} else if (instr->isMIMG() && instr->definitions.size() == 1 && } else if (instr->isMIMG() && instr->definitions.size() == 1 &&
!instr->operands[2].isUndefined()) { !instr->operands[2].isUndefined()) {
ops.push_back(2); ops.push_back(2);
} else if (instr->opcode == aco_opcode::image_bvh8_intersect_ray) {
/* VADDR starts at 3. */
ops.push_back(3 + 2);
ops.push_back(3 + 3);
} }
return ops; return ops;
} }
@ -1439,7 +1442,8 @@ get_ops_fixed_to_def(Instruction* instr)
uint8_t uint8_t
get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr) get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
{ {
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray) if (instr->opcode == aco_opcode::image_bvh64_intersect_ray ||
instr->opcode == aco_opcode::image_bvh8_intersect_ray)
return vmem_bvh; return vmem_bvh;
else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load) else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load)
return vmem_sampler; return vmem_sampler;

View file

@ -292,6 +292,7 @@ struct RegClass {
v6 = 6 | (1 << 5), v6 = 6 | (1 << 5),
v7 = 7 | (1 << 5), v7 = 7 | (1 << 5),
v8 = 8 | (1 << 5), v8 = 8 | (1 << 5),
v10 = 10 | (1 << 5),
/* byte-sized register class */ /* byte-sized register class */
v1b = v1 | (1 << 7), v1b = v1 | (1 << 7),
v2b = v2 | (1 << 7), v2b = v2 | (1 << 7),
@ -360,6 +361,7 @@ static constexpr RegClass v5{RegClass::v5};
static constexpr RegClass v6{RegClass::v6}; static constexpr RegClass v6{RegClass::v6};
static constexpr RegClass v7{RegClass::v7}; static constexpr RegClass v7{RegClass::v7};
static constexpr RegClass v8{RegClass::v8}; static constexpr RegClass v8{RegClass::v8};
static constexpr RegClass v10{RegClass::v10};
static constexpr RegClass v1b{RegClass::v1b}; static constexpr RegClass v1b{RegClass::v1b};
static constexpr RegClass v2b{RegClass::v2b}; static constexpr RegClass v2b{RegClass::v2b};
static constexpr RegClass v3b{RegClass::v3b}; static constexpr RegClass v3b{RegClass::v3b};

View file

@ -1867,6 +1867,8 @@ MIMG = {
("image_gather4_c_lz_o", op(0x5f, gfx11=0x37)), ("image_gather4_c_lz_o", op(0x5f, gfx11=0x37)),
("image_bvh_intersect_ray", op(gfx10=0xe6, gfx11=0x19)), ("image_bvh_intersect_ray", op(gfx10=0xe6, gfx11=0x19)),
("image_bvh64_intersect_ray", op(gfx10=0xe7, gfx11=0x1a)), ("image_bvh64_intersect_ray", op(gfx10=0xe7, gfx11=0x1a)),
("image_bvh_dual_intersect_ray", op(gfx12=0x80)),
("image_bvh8_intersect_ray", op(gfx12=0x81)),
} }
for (name, num) in MIMG: for (name, num) in MIMG:
insn(name, num, Format.MIMG, InstrClass.VMem, is_atomic = "atomic" in name) insn(name, num, Format.MIMG, InstrClass.VMem, is_atomic = "atomic" in name)

View file

@ -887,6 +887,8 @@ validate_ir(Program* program)
program->gfx_level >= GFX12 ? (instr->operands.size() - 4) : 4; program->gfx_level >= GFX12 ? (instr->operands.size() - 4) : 4;
if (instr->opcode != aco_opcode::image_bvh_intersect_ray && if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
instr->opcode != aco_opcode::image_bvh64_intersect_ray && instr->opcode != aco_opcode::image_bvh64_intersect_ray &&
instr->opcode != aco_opcode::image_bvh_dual_intersect_ray &&
instr->opcode != aco_opcode::image_bvh8_intersect_ray &&
i < 3 + num_scalar) { i < 3 + num_scalar) {
check(instr->operands[i].regClass() == v1, check(instr->operands[i].regClass() == v1,
"first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get()); "first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get());

View file

@ -314,6 +314,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
gather_intrinsic_store_output_info(nir, instr, info, consider_force_vrs); gather_intrinsic_store_output_info(nir, instr, info, consider_force_vrs);
break; break;
case nir_intrinsic_bvh64_intersect_ray_amd: case nir_intrinsic_bvh64_intersect_ray_amd:
case nir_intrinsic_bvh8_intersect_ray_amd:
info->cs.uses_rt = true; info->cs.uses_rt = true;
break; break;
case nir_intrinsic_load_poly_line_smooth_enabled: case nir_intrinsic_load_poly_line_smooth_enabled:

View file

@ -693,6 +693,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_is_sparse_resident_zink: case nir_intrinsic_is_sparse_resident_zink:
case nir_intrinsic_sparse_residency_code_and: case nir_intrinsic_sparse_residency_code_and:
case nir_intrinsic_bvh64_intersect_ray_amd: case nir_intrinsic_bvh64_intersect_ray_amd:
case nir_intrinsic_bvh8_intersect_ray_amd:
case nir_intrinsic_image_deref_load_param_intel: case nir_intrinsic_image_deref_load_param_intel:
case nir_intrinsic_image_load_raw_intel: case nir_intrinsic_image_load_raw_intel:
case nir_intrinsic_get_ubo_size: case nir_intrinsic_get_ubo_size:

View file

@ -1777,6 +1777,32 @@ system_value("sbt_base_amd", 1, bit_sizes=[64])
# 6. inverse ray direction (componentwise 1.0/ray direction) # 6. inverse ray direction (componentwise 1.0/ray direction)
intrinsic("bvh64_intersect_ray_amd", [4, 2, 1, 3, 3, 3], 4, flags=[CAN_ELIMINATE, CAN_REORDER]) intrinsic("bvh64_intersect_ray_amd", [4, 2, 1, 3, 3, 3], 4, flags=[CAN_ELIMINATE, CAN_REORDER])
# 1. HW descriptor
# 2. BVH base
# 3. instance cull mask
# 4. ray extent
# 5. ray origin
# 6. ray direction
# 7. node ID
#
# dst:
# | component | box node | instance node | triangle node | procedural node |
# |-----------|-------------|----------------------|-----------------------------------|-----------------------------------|
# | 0 | child_id[0] | | t[0] | |
# | 1 | child_id[1] | | u[0] | |
# | 2 | child_id[2] | blas_addr_lo | v[0] | |
# | 3 | child_id[3] | blas_addr_hi | primitive_index_hit_kind[0] | primitive_index |
# | 4 | child_id[4] | | t[1] | |
# | 5 | child_id[5] | | u[1] | |
# | 6 | child_id[6] | user_data | v[1] | |
# | 7 | child_id[7] | next_node_ids | primitive_index_hit_kind[1] | |
# | 8 | | | geometry_index_navigation_bits[0] | geometry_index_navigation_bits[0] |
# | 9 | | | geometry_index_navigation_bits[1] | geometry_index_navigation_bits[1] |
# | [10,12] | | object_ray_origin | | |
# | [13,15] | | object_ray_direction | | |
#
intrinsic("bvh8_intersect_ray_amd", [4, 2, 1, 1, 3, 3, 1], 16, flags=[CAN_ELIMINATE, CAN_REORDER])
# Return of a callable in raytracing pipelines # Return of a callable in raytracing pipelines
intrinsic("rt_return_amd") intrinsic("rt_return_amd")