mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 18:10:11 +01:00
aco,nir: Add support for new GFX12 ray tracing instructions
Adds image_bvh_dual_intersect_ray and image_bvh8_intersect_ray which can handle the new BVH format. Both instructions write up to 10 VGPRs so they need to use a vec16 definition in nir. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34273>
This commit is contained in:
parent
ee0f784858
commit
978e9b670e
12 changed files with 80 additions and 6 deletions
|
|
@ -838,7 +838,7 @@ emit_mimg_instruction_gfx12(asm_context& ctx, std::vector<uint32_t>& out, const
|
|||
|
||||
encoding = 0;
|
||||
if (!instr->definitions.empty())
|
||||
encoding |= reg(ctx, instr->definitions[0], 8); /* VDATA */
|
||||
encoding |= reg(ctx, instr->definitions.back(), 8); /* VDATA */
|
||||
else if (!instr->operands[2].isUndefined())
|
||||
encoding |= reg(ctx, instr->operands[2], 8); /* VDATA */
|
||||
encoding |= reg(ctx, instr->operands[0]) << 9; /* T# (resource) */
|
||||
|
|
|
|||
|
|
@ -53,7 +53,9 @@ get_type(Program* program, aco_ptr<Instruction>& instr)
|
|||
if (instr->isMIMG()) {
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::image_bvh_intersect_ray:
|
||||
case aco_opcode::image_bvh64_intersect_ray: return clause_bvh;
|
||||
case aco_opcode::image_bvh64_intersect_ray:
|
||||
case aco_opcode::image_bvh_dual_intersect_ray:
|
||||
case aco_opcode::image_bvh8_intersect_ray: return clause_bvh;
|
||||
case aco_opcode::image_atomic_swap:
|
||||
case aco_opcode::image_atomic_cmpswap:
|
||||
case aco_opcode::image_atomic_add:
|
||||
|
|
|
|||
|
|
@ -650,8 +650,8 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
|
||||
update_counters(ctx, ev, get_sync_info(instr));
|
||||
|
||||
if (!instr->definitions.empty())
|
||||
insert_wait_entry(ctx, instr->definitions[0], ev, type);
|
||||
for (auto& definition : instr->definitions)
|
||||
insert_wait_entry(ctx, definition, ev, type);
|
||||
|
||||
if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) {
|
||||
update_counters(ctx, event_vmem_gpr_lock);
|
||||
|
|
|
|||
|
|
@ -5962,6 +5962,38 @@ visit_bvh64_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
emit_split_vector(ctx, dst, instr->def.num_components);
|
||||
}
|
||||
|
||||
void
|
||||
visit_bvh8_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
Temp resource = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
Temp bvh_base = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
|
||||
Temp cull_mask = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
|
||||
Temp tmax = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
|
||||
Temp origin = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[4].ssa));
|
||||
Temp dir = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[5].ssa));
|
||||
Temp node_id = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[6].ssa));
|
||||
|
||||
Temp result = bld.tmp(v10);
|
||||
Temp new_origin = bld.tmp(v3);
|
||||
Temp new_dir = bld.tmp(v3);
|
||||
|
||||
std::vector<Temp> args = {bvh_base,
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), tmax, cull_mask),
|
||||
origin, dir, node_id};
|
||||
|
||||
MIMG_instruction* mimg = emit_mimg(bld, aco_opcode::image_bvh8_intersect_ray,
|
||||
{new_origin, new_dir, result}, resource, Operand(s4), args);
|
||||
mimg->dim = ac_image_1d;
|
||||
mimg->dmask = 0xf;
|
||||
mimg->unrm = true;
|
||||
mimg->r128 = true;
|
||||
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(result), Operand(new_origin),
|
||||
Operand(new_dir));
|
||||
}
|
||||
|
||||
static std::vector<Temp>
|
||||
get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
|
||||
{
|
||||
|
|
@ -8787,6 +8819,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_bvh64_intersect_ray_amd: visit_bvh64_intersect_ray_amd(ctx, instr); break;
|
||||
case nir_intrinsic_bvh8_intersect_ray_amd: visit_bvh8_intersect_ray_amd(ctx, instr); break;
|
||||
case nir_intrinsic_load_resume_shader_address_amd: {
|
||||
bld.pseudo(aco_opcode::p_resume_shader_address, Definition(get_ssa_temp(ctx, &instr->def)),
|
||||
bld.def(s1, scc), Operand::c32(nir_intrinsic_call_idx(instr)));
|
||||
|
|
|
|||
|
|
@ -562,6 +562,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_intrinsic_load_initial_edgeflags_amd:
|
||||
case nir_intrinsic_gds_atomic_add_amd:
|
||||
case nir_intrinsic_bvh64_intersect_ray_amd:
|
||||
case nir_intrinsic_bvh8_intersect_ray_amd:
|
||||
case nir_intrinsic_load_vector_arg_amd:
|
||||
case nir_intrinsic_ordered_xfb_counter_add_gfx11_amd:
|
||||
case nir_intrinsic_cmat_muladd_amd:
|
||||
|
|
|
|||
|
|
@ -1414,7 +1414,6 @@ aco::small_vec<uint32_t, 2>
|
|||
get_ops_fixed_to_def(Instruction* instr)
|
||||
{
|
||||
aco::small_vec<uint32_t, 2> ops;
|
||||
|
||||
if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 ||
|
||||
instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
|
||||
instr->opcode == aco_opcode::v_fmac_f16 || instr->opcode == aco_opcode::v_mac_legacy_f32 ||
|
||||
|
|
@ -1432,6 +1431,10 @@ get_ops_fixed_to_def(Instruction* instr)
|
|||
} else if (instr->isMIMG() && instr->definitions.size() == 1 &&
|
||||
!instr->operands[2].isUndefined()) {
|
||||
ops.push_back(2);
|
||||
} else if (instr->opcode == aco_opcode::image_bvh8_intersect_ray) {
|
||||
/* VADDR starts at 3. */
|
||||
ops.push_back(3 + 2);
|
||||
ops.push_back(3 + 3);
|
||||
}
|
||||
return ops;
|
||||
}
|
||||
|
|
@ -1439,7 +1442,8 @@ get_ops_fixed_to_def(Instruction* instr)
|
|||
uint8_t
|
||||
get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray)
|
||||
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray ||
|
||||
instr->opcode == aco_opcode::image_bvh8_intersect_ray)
|
||||
return vmem_bvh;
|
||||
else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load)
|
||||
return vmem_sampler;
|
||||
|
|
|
|||
|
|
@ -292,6 +292,7 @@ struct RegClass {
|
|||
v6 = 6 | (1 << 5),
|
||||
v7 = 7 | (1 << 5),
|
||||
v8 = 8 | (1 << 5),
|
||||
v10 = 10 | (1 << 5),
|
||||
/* byte-sized register class */
|
||||
v1b = v1 | (1 << 7),
|
||||
v2b = v2 | (1 << 7),
|
||||
|
|
@ -360,6 +361,7 @@ static constexpr RegClass v5{RegClass::v5};
|
|||
static constexpr RegClass v6{RegClass::v6};
|
||||
static constexpr RegClass v7{RegClass::v7};
|
||||
static constexpr RegClass v8{RegClass::v8};
|
||||
static constexpr RegClass v10{RegClass::v10};
|
||||
static constexpr RegClass v1b{RegClass::v1b};
|
||||
static constexpr RegClass v2b{RegClass::v2b};
|
||||
static constexpr RegClass v3b{RegClass::v3b};
|
||||
|
|
|
|||
|
|
@ -1867,6 +1867,8 @@ MIMG = {
|
|||
("image_gather4_c_lz_o", op(0x5f, gfx11=0x37)),
|
||||
("image_bvh_intersect_ray", op(gfx10=0xe6, gfx11=0x19)),
|
||||
("image_bvh64_intersect_ray", op(gfx10=0xe7, gfx11=0x1a)),
|
||||
("image_bvh_dual_intersect_ray", op(gfx12=0x80)),
|
||||
("image_bvh8_intersect_ray", op(gfx12=0x81)),
|
||||
}
|
||||
for (name, num) in MIMG:
|
||||
insn(name, num, Format.MIMG, InstrClass.VMem, is_atomic = "atomic" in name)
|
||||
|
|
|
|||
|
|
@ -887,6 +887,8 @@ validate_ir(Program* program)
|
|||
program->gfx_level >= GFX12 ? (instr->operands.size() - 4) : 4;
|
||||
if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh64_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh_dual_intersect_ray &&
|
||||
instr->opcode != aco_opcode::image_bvh8_intersect_ray &&
|
||||
i < 3 + num_scalar) {
|
||||
check(instr->operands[i].regClass() == v1,
|
||||
"first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get());
|
||||
|
|
|
|||
|
|
@ -314,6 +314,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
|
|||
gather_intrinsic_store_output_info(nir, instr, info, consider_force_vrs);
|
||||
break;
|
||||
case nir_intrinsic_bvh64_intersect_ray_amd:
|
||||
case nir_intrinsic_bvh8_intersect_ray_amd:
|
||||
info->cs.uses_rt = true;
|
||||
break;
|
||||
case nir_intrinsic_load_poly_line_smooth_enabled:
|
||||
|
|
|
|||
|
|
@ -693,6 +693,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_is_sparse_resident_zink:
|
||||
case nir_intrinsic_sparse_residency_code_and:
|
||||
case nir_intrinsic_bvh64_intersect_ray_amd:
|
||||
case nir_intrinsic_bvh8_intersect_ray_amd:
|
||||
case nir_intrinsic_image_deref_load_param_intel:
|
||||
case nir_intrinsic_image_load_raw_intel:
|
||||
case nir_intrinsic_get_ubo_size:
|
||||
|
|
|
|||
|
|
@ -1777,6 +1777,32 @@ system_value("sbt_base_amd", 1, bit_sizes=[64])
|
|||
# 6. inverse ray direction (componentwise 1.0/ray direction)
|
||||
intrinsic("bvh64_intersect_ray_amd", [4, 2, 1, 3, 3, 3], 4, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# 1. HW descriptor
|
||||
# 2. BVH base
|
||||
# 3. instance cull mask
|
||||
# 4. ray extent
|
||||
# 5. ray origin
|
||||
# 6. ray direction
|
||||
# 7. node ID
|
||||
#
|
||||
# dst:
|
||||
# | component | box node | instance node | triangle node | procedural node |
|
||||
# |-----------|-------------|----------------------|-----------------------------------|-----------------------------------|
|
||||
# | 0 | child_id[0] | | t[0] | |
|
||||
# | 1 | child_id[1] | | u[0] | |
|
||||
# | 2 | child_id[2] | blas_addr_lo | v[0] | |
|
||||
# | 3 | child_id[3] | blas_addr_hi | primitive_index_hit_kind[0] | primitive_index |
|
||||
# | 4 | child_id[4] | | t[1] | |
|
||||
# | 5 | child_id[5] | | u[1] | |
|
||||
# | 6 | child_id[6] | user_data | v[1] | |
|
||||
# | 7 | child_id[7] | next_node_ids | primitive_index_hit_kind[1] | |
|
||||
# | 8 | | | geometry_index_navigation_bits[0] | geometry_index_navigation_bits[0] |
|
||||
# | 9 | | | geometry_index_navigation_bits[1] | geometry_index_navigation_bits[1] |
|
||||
# | [10,12] | | object_ray_origin | | |
|
||||
# | [13,15] | | object_ray_direction | | |
|
||||
#
|
||||
intrinsic("bvh8_intersect_ray_amd", [4, 2, 1, 1, 3, 3, 1], 16, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Return of a callable in raytracing pipelines
|
||||
intrinsic("rt_return_amd")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue