diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index beaeed46907..acd8bf8a8c3 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6859,7 +6859,14 @@ static void lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - const fs_reg &globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS]; + /* The emit_uniformize() in brw_fs_nir.cpp will generate an horizontal + * stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q + * types in on Gfx12.5, we need to tweak the stride with a value of 1 dword + * so that the MOV operates on 2 components rather than twice the same + * component. + */ + fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_REGISTER_TYPE_UD); + globals_addr.stride = 1; const fs_reg &bvh_level = inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ? inst->src[RT_LOGICAL_SRC_BVH_LEVEL] : @@ -6878,7 +6885,7 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) const fs_builder ubld = bld.exec_all().group(8, 0); fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD); ubld.MOV(header, brw_imm_ud(0)); - ubld.group(2, 0).MOV(header, retype(globals_addr, BRW_REGISTER_TYPE_UD)); + ubld.group(2, 0).MOV(header, globals_addr); if (synchronous) ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous)); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 5da550aacea..efbd0196b8f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5948,7 +5948,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr emit_rt_lsc_fence(bld, LSC_FENCE_LOCAL, LSC_FLUSH_TYPE_NONE); fs_reg srcs[RT_LOGICAL_NUM_SRCS]; - srcs[RT_LOGICAL_SRC_GLOBALS] = get_nir_src(instr->src[0]); + + fs_reg globals = get_nir_src(instr->src[0]); + srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals); srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(instr->src[1]); srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(instr->src[2]); srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);