mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-10 05:38:18 +02:00
brw: Move ray payload bitfield generation to NIR
This will save us the trouble of faking constant folding for the BVH level and trace ray control values when we lower this intrinsic in the new backends. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42006>
This commit is contained in:
parent
2a1588c09e
commit
9f2c6fdca4
8 changed files with 50 additions and 54 deletions
|
|
@ -2866,8 +2866,8 @@ intrinsic("btd_stack_push_intel", indices=[STACK_SIZE])
|
|||
intrinsic("btd_retire_intel")
|
||||
|
||||
# Intel-specific ray-tracing intrinsic
|
||||
# src[] = { globals, level, operation } SYNCHRONOUS=synchronous
|
||||
intrinsic("trace_ray_intel", src_comp=[1, 1, 1], indices=[SYNCHRONOUS])
|
||||
# src[] = { globals, payload } SYNCHRONOUS=synchronous
|
||||
intrinsic("trace_ray_intel", src_comp=[1, 1], indices=[SYNCHRONOUS])
|
||||
|
||||
# System values used for ray-tracing on Intel
|
||||
system_value("ray_base_mem_addr_intel", 1, bit_sizes=[64])
|
||||
|
|
|
|||
|
|
@ -679,10 +679,8 @@ enum memory_flags {
|
|||
enum rt_logical_srcs {
|
||||
/** Address of the globals */
|
||||
RT_LOGICAL_SRC_GLOBALS,
|
||||
/** Level at which the tracing should start */
|
||||
RT_LOGICAL_SRC_BVH_LEVEL,
|
||||
/** Type of tracing operation */
|
||||
RT_LOGICAL_SRC_TRACE_RAY_CONTROL,
|
||||
/** Trace ray payloads */
|
||||
RT_LOGICAL_SRC_PAYLOADS,
|
||||
/** Synchronous tracing (ray query) */
|
||||
RT_LOGICAL_SRC_SYNCHRONOUS,
|
||||
|
||||
|
|
|
|||
|
|
@ -5746,8 +5746,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
|
||||
brw_reg globals = get_nir_src(ntb, instr->src[0], -1);
|
||||
srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals);
|
||||
srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(ntb, instr->src[1], 0);
|
||||
srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(ntb, instr->src[2], 0);
|
||||
srcs[RT_LOGICAL_SRC_PAYLOADS] = get_nir_src(ntb, instr->src[1], 0);
|
||||
srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);
|
||||
|
||||
/* Bspec 57508, 47937: Structure_SIMD16TraceRayMessage:: RayQuery Enable
|
||||
|
|
|
|||
|
|
@ -2044,22 +2044,9 @@ static void
|
|||
lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
|
||||
{
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
/* The emit_uniformize() in brw_from_nir.cpp will generate an horizontal
|
||||
* stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q
|
||||
* types in on Gfx12.5, we need to tweak the stride with a value of 1 dword
|
||||
* so that the MOV operates on 2 components rather than twice the same
|
||||
* component.
|
||||
*/
|
||||
const brw_reg bvh_level =
|
||||
inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == IMM ?
|
||||
inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :
|
||||
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL],
|
||||
inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL));
|
||||
const brw_reg trace_ray_control =
|
||||
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == IMM ?
|
||||
inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] :
|
||||
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL],
|
||||
inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL));
|
||||
const brw_reg payload =
|
||||
bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_PAYLOADS],
|
||||
inst->components_read(RT_LOGICAL_SRC_PAYLOADS));
|
||||
const brw_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS];
|
||||
assert(synchronous_src.file == IMM);
|
||||
const bool synchronous = synchronous_src.ud;
|
||||
|
|
@ -2075,6 +2062,12 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
|
||||
const brw_reg globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS];
|
||||
if (globals_addr.file != UNIFORM) {
|
||||
/* The emit_uniformize() in brw_from_nir.cpp will generate an horizontal
|
||||
* stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q
|
||||
* types in on Gfx12.5, we need to tweak the stride with a value of 1 dword
|
||||
* so that the MOV operates on 2 components rather than twice the same
|
||||
* component.
|
||||
*/
|
||||
brw_reg addr_ud = retype(globals_addr, BRW_TYPE_UD);
|
||||
addr_ud.stride = 1;
|
||||
ubld.group(2, 0).MOV(header, addr_ud);
|
||||
|
|
@ -2105,16 +2098,6 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));
|
||||
|
||||
const unsigned ex_mlen = inst->exec_size / 8;
|
||||
brw_reg payload = bld.vgrf(BRW_TYPE_UD);
|
||||
if (bvh_level.file == IMM &&
|
||||
trace_ray_control.file == IMM) {
|
||||
uint32_t high = devinfo->ver >= 20 ? 10 : 9;
|
||||
bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, high, 8) |
|
||||
(bvh_level.ud & 0x7)));
|
||||
} else {
|
||||
bld.SHL(payload, trace_ray_control, brw_imm_ud(8));
|
||||
bld.OR(payload, payload, bvh_level);
|
||||
}
|
||||
|
||||
/* When doing synchronous traversal, the HW implicitly computes the
|
||||
* stack_id using the following formula :
|
||||
|
|
|
|||
|
|
@ -319,8 +319,8 @@ lower_ray_query_intrinsic(nir_builder *b,
|
|||
/* Do not use state->rq_globals, we want a uniform value for the
|
||||
* tracing call.
|
||||
*/
|
||||
nir_trace_ray_intel(b, nir_load_ray_query_global_intel(b),
|
||||
level, ctrl, .synchronous = true);
|
||||
brw_nir_trace_ray(b, nir_load_ray_query_global_intel(b),
|
||||
level, ctrl, true);
|
||||
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false,
|
||||
|
|
|
|||
|
|
@ -232,11 +232,11 @@ lower_shader_trace_ray(nir_builder *b, nir_intrinsic_instr *call, void *data)
|
|||
|
||||
brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD, devinfo);
|
||||
|
||||
nir_trace_ray_intel(b,
|
||||
nir_load_btd_global_arg_addr_intel(b),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_INITIAL),
|
||||
.synchronous = false);
|
||||
brw_nir_trace_ray(b,
|
||||
nir_load_btd_global_arg_addr_intel(b),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_INITIAL),
|
||||
false);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -359,8 +359,8 @@ brw_nir_create_null_ahs_shader(const struct brw_compiler *compiler,
|
|||
brw_nir_rt_load_mem_hit(b, &hit_in, false, compiler->devinfo);
|
||||
nir_def *ray_level = hit_in.bvh_level;
|
||||
nir_def *ray_op = nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT);
|
||||
nir_trace_ray_intel(b,
|
||||
nir_load_btd_global_arg_addr_intel(b),
|
||||
ray_level, ray_op);
|
||||
brw_nir_trace_ray(b,
|
||||
nir_load_btd_global_arg_addr_intel(b),
|
||||
ray_level, ray_op, false);
|
||||
return nir;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -271,11 +271,11 @@ lower_ray_walk_intrinsics(nir_shader *shader,
|
|||
* optimization passes.
|
||||
*/
|
||||
nir_push_if(&b, nir_imm_true(&b));
|
||||
nir_trace_ray_intel(&b,
|
||||
nir_load_btd_global_arg_addr_intel(&b),
|
||||
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
|
||||
nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
|
||||
.synchronous = false);
|
||||
brw_nir_trace_ray(&b,
|
||||
nir_load_btd_global_arg_addr_intel(&b),
|
||||
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
|
||||
nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE),
|
||||
false);
|
||||
nir_jump(&b, nir_jump_halt);
|
||||
nir_pop_if(&b, NULL);
|
||||
progress = true;
|
||||
|
|
@ -293,11 +293,11 @@ lower_ray_walk_intrinsics(nir_shader *shader,
|
|||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_trace_ray_intel(&b,
|
||||
nir_load_btd_global_arg_addr_intel(&b),
|
||||
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
|
||||
nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
.synchronous = false);
|
||||
brw_nir_trace_ray(&b,
|
||||
nir_load_btd_global_arg_addr_intel(&b),
|
||||
nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT),
|
||||
nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
false);
|
||||
nir_jump(&b, nir_jump_halt);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
|
|
|||
|
|
@ -126,6 +126,22 @@ brw_nir_btd_return(struct nir_builder *b)
|
|||
brw_nir_btd_spawn(b, resume_addr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_trace_ray(nir_builder *b,
|
||||
nir_def *globals,
|
||||
nir_def *bvh_level,
|
||||
nir_def *trace_ray_control,
|
||||
bool synchronous)
|
||||
{
|
||||
nir_trace_ray_intel(b,
|
||||
globals,
|
||||
nir_bfi(b,
|
||||
nir_imm_int(b, INTEL_MASK(10, 8)),
|
||||
nir_u2u32(b, trace_ray_control),
|
||||
nir_u2u32(b, bvh_level)),
|
||||
.synchronous = synchronous);
|
||||
}
|
||||
|
||||
static inline void
|
||||
assert_def_size(nir_def *def, unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue