brw: Move ray payload bitfield generation to NIR

This will save us the trouble of faking constant folding for the BVH level and
trace ray control values when we lower this intrinsic in the new backends.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42006>
This commit is contained in:
Calder Young 2026-05-28 11:17:23 -07:00 committed by Marge Bot
parent 2a1588c09e
commit 9f2c6fdca4
8 changed files with 50 additions and 54 deletions

View file

@ -2866,8 +2866,8 @@ intrinsic("btd_stack_push_intel", indices=[STACK_SIZE])
intrinsic("btd_retire_intel")
# Intel-specific ray-tracing intrinsic
# src[] = { globals, level, operation } SYNCHRONOUS=synchronous
intrinsic("trace_ray_intel", src_comp=[1, 1, 1], indices=[SYNCHRONOUS])
# src[] = { globals, payload } SYNCHRONOUS=synchronous
intrinsic("trace_ray_intel", src_comp=[1, 1], indices=[SYNCHRONOUS])
# System values used for ray-tracing on Intel
system_value("ray_base_mem_addr_intel", 1, bit_sizes=[64])

View file

@ -679,10 +679,8 @@ enum memory_flags {
enum rt_logical_srcs {
/** Address of the globals */
RT_LOGICAL_SRC_GLOBALS,
/** Level at which the tracing should start */
RT_LOGICAL_SRC_BVH_LEVEL,
/** Type of tracing operation */
RT_LOGICAL_SRC_TRACE_RAY_CONTROL,
/** Trace ray payloads */
RT_LOGICAL_SRC_PAYLOADS,
/** Synchronous tracing (ray query) */
RT_LOGICAL_SRC_SYNCHRONOUS,

View file

@ -5746,8 +5746,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
brw_reg globals = get_nir_src(ntb, instr->src[0], -1);
srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals);
srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(ntb, instr->src[1], 0);
srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(ntb, instr->src[2], 0);
srcs[RT_LOGICAL_SRC_PAYLOADS] = get_nir_src(ntb, instr->src[1], 0);
srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);
/* Bspec 57508, 47937: Structure_SIMD16TraceRayMessage:: RayQuery Enable

View file

@ -2044,22 +2044,9 @@ static void
lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
{
const intel_device_info *devinfo = bld.shader->devinfo;
/* The emit_uniformize() in brw_from_nir.cpp will generate an horizontal
* stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q
* types in on Gfx12.5, we need to tweak the stride with a value of 1 dword
* so that the MOV operates on 2 components rather than twice the same
* component.
*/
const brw_reg bvh_level =
inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == IMM ?
inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL],
inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL));
const brw_reg trace_ray_control =
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == IMM ?
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] :
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL],
inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL));
const brw_reg payload =
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_PAYLOADS],
inst->components_read(RT_LOGICAL_SRC_PAYLOADS));
const brw_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS];
assert(synchronous_src.file == IMM);
const bool synchronous = synchronous_src.ud;
@ -2075,6 +2062,12 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
const brw_reg globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS];
if (globals_addr.file != UNIFORM) {
/* The emit_uniformize() in brw_from_nir.cpp will generate an horizontal
* stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q
* types in on Gfx12.5, we need to tweak the stride with a value of 1 dword
* so that the MOV operates on 2 components rather than twice the same
* component.
*/
brw_reg addr_ud = retype(globals_addr, BRW_TYPE_UD);
addr_ud.stride = 1;
ubld.group(2, 0).MOV(header, addr_ud);
@ -2105,16 +2098,6 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));
const unsigned ex_mlen = inst->exec_size / 8;
brw_reg payload = bld.vgrf(BRW_TYPE_UD);
if (bvh_level.file == IMM &&
trace_ray_control.file == IMM) {
uint32_t high = devinfo->ver >= 20 ? 10 : 9;
bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, high, 8) |
(bvh_level.ud & 0x7)));
} else {
bld.SHL(payload, trace_ray_control, brw_imm_ud(8));
bld.OR(payload, payload, bvh_level);
}
/* When doing synchronous traversal, the HW implicitly computes the
* stack_id using the following formula :

View file

@ -319,8 +319,8 @@ lower_ray_query_intrinsic(nir_builder *b,
/* Do not use state->rq_globals, we want a uniform value for the
* tracing call.
*/
nir_trace_ray_intel(b, nir_load_ray_query_global_intel(b),
level, ctrl, .synchronous = true);
brw_nir_trace_ray(b, nir_load_ray_query_global_intel(b),
level, ctrl, true);
struct brw_nir_rt_mem_hit_defs hit_in = {};
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false,

View file

@ -232,11 +232,11 @@ lower_shader_trace_ray(nir_builder *b, nir_intrinsic_instr *call, void *data)
brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD, devinfo);
nir_trace_ray_intel(b,
nir_load_btd_global_arg_addr_intel(b),
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
nir_imm_int(b, GEN_RT_TRACE_RAY_INITIAL),
.synchronous = false);
brw_nir_trace_ray(b,
nir_load_btd_global_arg_addr_intel(b),
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
nir_imm_int(b, GEN_RT_TRACE_RAY_INITIAL),
false);
return true;
}
@ -359,8 +359,8 @@ brw_nir_create_null_ahs_shader(const struct brw_compiler *compiler,
brw_nir_rt_load_mem_hit(b, &hit_in, false, compiler->devinfo);
nir_def *ray_level = hit_in.bvh_level;
nir_def *ray_op = nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT);
nir_trace_ray_intel(b,
nir_load_btd_global_arg_addr_intel(b),
ray_level, ray_op);
brw_nir_trace_ray(b,
nir_load_btd_global_arg_addr_intel(b),
ray_level, ray_op, false);
return nir;
}

View file

@ -271,11 +271,11 @@ lower_ray_walk_intrinsics(nir_shader *shader,
* optimization passes.
*/
nir_push_if(&b, nir_imm_true(&b));
nir_trace_ray_intel(&b,
nir_load_btd_global_arg_addr_intel(&b),
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
.synchronous = false);
brw_nir_trace_ray(&b,
nir_load_btd_global_arg_addr_intel(&b),
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
false);
nir_jump(&b, nir_jump_halt);
nir_pop_if(&b, NULL);
progress = true;
@ -293,11 +293,11 @@ lower_ray_walk_intrinsics(nir_shader *shader,
}
nir_push_else(&b, NULL);
{
nir_trace_ray_intel(&b,
nir_load_btd_global_arg_addr_intel(&b),
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
.synchronous = false);
brw_nir_trace_ray(&b,
nir_load_btd_global_arg_addr_intel(&b),
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
false);
nir_jump(&b, nir_jump_halt);
}
nir_pop_if(&b, NULL);

View file

@ -126,6 +126,22 @@ brw_nir_btd_return(struct nir_builder *b)
brw_nir_btd_spawn(b, resume_addr);
}
static inline void
brw_nir_trace_ray(nir_builder *b,
nir_def *globals,
nir_def *bvh_level,
nir_def *trace_ray_control,
bool synchronous)
{
nir_trace_ray_intel(b,
globals,
nir_bfi(b,
nir_imm_int(b, INTEL_MASK(10, 8)),
nir_u2u32(b, trace_ray_control),
nir_u2u32(b, bvh_level)),
.synchronous = synchronous);
}
static inline void
assert_def_size(nir_def *def, unsigned num_components, unsigned bit_size)
{