diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 6a1ff384db9..eb7ef7fc009 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2866,8 +2866,8 @@ intrinsic("btd_stack_push_intel", indices=[STACK_SIZE]) intrinsic("btd_retire_intel") # Intel-specific ray-tracing intrinsic -# src[] = { globals, level, operation } SYNCHRONOUS=synchronous -intrinsic("trace_ray_intel", src_comp=[1, 1, 1], indices=[SYNCHRONOUS]) +# src[] = { globals, payload } SYNCHRONOUS=synchronous +intrinsic("trace_ray_intel", src_comp=[1, 1], indices=[SYNCHRONOUS]) # System values used for ray-tracing on Intel system_value("ray_base_mem_addr_intel", 1, bit_sizes=[64]) diff --git a/src/intel/compiler/brw/brw_eu_defines.h b/src/intel/compiler/brw/brw_eu_defines.h index d2581a2ae81..d26ba1b1c76 100644 --- a/src/intel/compiler/brw/brw_eu_defines.h +++ b/src/intel/compiler/brw/brw_eu_defines.h @@ -679,10 +679,8 @@ enum memory_flags { enum rt_logical_srcs { /** Address of the globals */ RT_LOGICAL_SRC_GLOBALS, - /** Level at which the tracing should start */ - RT_LOGICAL_SRC_BVH_LEVEL, - /** Type of tracing operation */ - RT_LOGICAL_SRC_TRACE_RAY_CONTROL, + /** Trace ray payloads */ + RT_LOGICAL_SRC_PAYLOADS, /** Synchronous tracing (ray query) */ RT_LOGICAL_SRC_SYNCHRONOUS, diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 683519745b9..2a01556ef30 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -5746,8 +5746,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, brw_reg globals = get_nir_src(ntb, instr->src[0], -1); srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals); - srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(ntb, instr->src[1], 0); - srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(ntb, instr->src[2], 0); + srcs[RT_LOGICAL_SRC_PAYLOADS] = get_nir_src(ntb, instr->src[1], 0); srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous); /* Bspec 57508, 47937: Structure_SIMD16TraceRayMessage:: RayQuery Enable diff --git a/src/intel/compiler/brw/brw_lower_logical_sends.cpp b/src/intel/compiler/brw/brw_lower_logical_sends.cpp index 9444028a090..8e3ae239d1c 100644 --- a/src/intel/compiler/brw/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw/brw_lower_logical_sends.cpp @@ -2044,22 +2044,9 @@ static void lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - /* The emit_uniformize() in brw_from_nir.cpp will generate an horizontal - * stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q - * types in on Gfx12.5, we need to tweak the stride with a value of 1 dword - * so that the MOV operates on 2 components rather than twice the same - * component. - */ - const brw_reg bvh_level = - inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == IMM ? - inst->src[RT_LOGICAL_SRC_BVH_LEVEL] : - bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_BVH_LEVEL], - inst->components_read(RT_LOGICAL_SRC_BVH_LEVEL)); - const brw_reg trace_ray_control = - inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL].file == IMM ? - inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] : - bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_TRACE_RAY_CONTROL], - inst->components_read(RT_LOGICAL_SRC_TRACE_RAY_CONTROL)); + const brw_reg payload = + bld.move_to_vgrf(inst->src[RT_LOGICAL_SRC_PAYLOADS], + inst->components_read(RT_LOGICAL_SRC_PAYLOADS)); const brw_reg synchronous_src = inst->src[RT_LOGICAL_SRC_SYNCHRONOUS]; assert(synchronous_src.file == IMM); const bool synchronous = synchronous_src.ud; @@ -2075,6 +2062,12 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst) const brw_reg globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS]; if (globals_addr.file != UNIFORM) { + /* The emit_uniformize() in brw_from_nir.cpp will generate an horizontal + * stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q + * types in on Gfx12.5, we need to tweak the stride with a value of 1 dword + * so that the MOV operates on 2 components rather than twice the same + * component. + */ brw_reg addr_ud = retype(globals_addr, BRW_TYPE_UD); addr_ud.stride = 1; ubld.group(2, 0).MOV(header, addr_ud); @@ -2105,16 +2098,6 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst) ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous)); const unsigned ex_mlen = inst->exec_size / 8; - brw_reg payload = bld.vgrf(BRW_TYPE_UD); - if (bvh_level.file == IMM && - trace_ray_control.file == IMM) { - uint32_t high = devinfo->ver >= 20 ? 10 : 9; - bld.MOV(payload, brw_imm_ud(SET_BITS(trace_ray_control.ud, high, 8) | - (bvh_level.ud & 0x7))); - } else { - bld.SHL(payload, trace_ray_control, brw_imm_ud(8)); - bld.OR(payload, payload, bvh_level); - } /* When doing synchronous traversal, the HW implicitly computes the * stack_id using the following formula : diff --git a/src/intel/compiler/brw/brw_nir_lower_ray_queries.c b/src/intel/compiler/brw/brw_nir_lower_ray_queries.c index 11faecd593e..495868a9c8a 100644 --- a/src/intel/compiler/brw/brw_nir_lower_ray_queries.c +++ b/src/intel/compiler/brw/brw_nir_lower_ray_queries.c @@ -319,8 +319,8 @@ lower_ray_query_intrinsic(nir_builder *b, /* Do not use state->rq_globals, we want a uniform value for the * tracing call. */ - nir_trace_ray_intel(b, nir_load_ray_query_global_intel(b), - level, ctrl, .synchronous = true); + brw_nir_trace_ray(b, nir_load_ray_query_global_intel(b), + level, ctrl, true); struct brw_nir_rt_mem_hit_defs hit_in = {}; brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false, diff --git a/src/intel/compiler/brw/brw_nir_lower_shader_calls.c b/src/intel/compiler/brw/brw_nir_lower_shader_calls.c index 99ea3f67393..26a52d88c83 100644 --- a/src/intel/compiler/brw/brw_nir_lower_shader_calls.c +++ b/src/intel/compiler/brw/brw_nir_lower_shader_calls.c @@ -232,11 +232,11 @@ lower_shader_trace_ray(nir_builder *b, nir_intrinsic_instr *call, void *data) brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD, devinfo); - nir_trace_ray_intel(b, - nir_load_btd_global_arg_addr_intel(b), - nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD), - nir_imm_int(b, GEN_RT_TRACE_RAY_INITIAL), - .synchronous = false); + brw_nir_trace_ray(b, + nir_load_btd_global_arg_addr_intel(b), + nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD), + nir_imm_int(b, GEN_RT_TRACE_RAY_INITIAL), + false); return true; } @@ -359,8 +359,8 @@ brw_nir_create_null_ahs_shader(const struct brw_compiler *compiler, brw_nir_rt_load_mem_hit(b, &hit_in, false, compiler->devinfo); nir_def *ray_level = hit_in.bvh_level; nir_def *ray_op = nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT); - nir_trace_ray_intel(b, - nir_load_btd_global_arg_addr_intel(b), - ray_level, ray_op); + brw_nir_trace_ray(b, + nir_load_btd_global_arg_addr_intel(b), + ray_level, ray_op, false); return nir; } diff --git a/src/intel/compiler/brw/brw_nir_rt.c b/src/intel/compiler/brw/brw_nir_rt.c index 1c94b65ad5b..415d5601fec 100644 --- a/src/intel/compiler/brw/brw_nir_rt.c +++ b/src/intel/compiler/brw/brw_nir_rt.c @@ -271,11 +271,11 @@ lower_ray_walk_intrinsics(nir_shader *shader, * optimization passes. */ nir_push_if(&b, nir_imm_true(&b)); - nir_trace_ray_intel(&b, - nir_load_btd_global_arg_addr_intel(&b), - nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT), - nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE), - .synchronous = false); + brw_nir_trace_ray(&b, + nir_load_btd_global_arg_addr_intel(&b), + nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT), + nir_imm_int(&b, GEN_RT_TRACE_RAY_CONTINUE), + false); nir_jump(&b, nir_jump_halt); nir_pop_if(&b, NULL); progress = true; @@ -293,11 +293,11 @@ lower_ray_walk_intrinsics(nir_shader *shader, } nir_push_else(&b, NULL); { - nir_trace_ray_intel(&b, - nir_load_btd_global_arg_addr_intel(&b), - nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT), - nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT), - .synchronous = false); + brw_nir_trace_ray(&b, + nir_load_btd_global_arg_addr_intel(&b), + nir_imm_int(&b, BRW_RT_BVH_LEVEL_OBJECT), + nir_imm_int(&b, GEN_RT_TRACE_RAY_COMMIT), + false); nir_jump(&b, nir_jump_halt); } nir_pop_if(&b, NULL); diff --git a/src/intel/compiler/brw/brw_nir_rt_builder.h b/src/intel/compiler/brw/brw_nir_rt_builder.h index e4521e99f1b..45436614e2a 100644 --- a/src/intel/compiler/brw/brw_nir_rt_builder.h +++ b/src/intel/compiler/brw/brw_nir_rt_builder.h @@ -126,6 +126,22 @@ brw_nir_btd_return(struct nir_builder *b) brw_nir_btd_spawn(b, resume_addr); } +static inline void +brw_nir_trace_ray(nir_builder *b, + nir_def *globals, + nir_def *bvh_level, + nir_def *trace_ray_control, + bool synchronous) +{ + nir_trace_ray_intel(b, + globals, + nir_bfi(b, + nir_imm_int(b, INTEL_MASK(10, 8)), + nir_u2u32(b, trace_ray_control), + nir_u2u32(b, bvh_level)), + .synchronous = synchronous); +} + static inline void assert_def_size(nir_def *def, unsigned num_components, unsigned bit_size) {