diff --git a/.pick_status.json b/.pick_status.json index bcfbabe2d6e..67b3132db4a 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1732,7 +1732,7 @@ "description": "intel/fs: ray query fix for global address", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "bb40e999d114db361539cdc4ddb65ace37d76562" }, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a5af1b098ee..fea67288ba7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6707,7 +6707,14 @@ static void lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - const fs_reg &globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS]; + /* The emit_uniformize() in brw_fs_nir.cpp will generate an horizontal + * stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q + * types in on Gfx12.5, we need to tweak the stride with a value of 1 dword + * so that the MOV operates on 2 components rather than twice the same + * component. + */ + fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_REGISTER_TYPE_UD); + globals_addr.stride = 1; const fs_reg &bvh_level = inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ? inst->src[RT_LOGICAL_SRC_BVH_LEVEL] : @@ -6726,7 +6733,7 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) const fs_builder ubld = bld.exec_all().group(8, 0); fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD); ubld.MOV(header, brw_imm_ud(0)); - ubld.group(2, 0).MOV(header, retype(globals_addr, BRW_REGISTER_TYPE_UD)); + ubld.group(2, 0).MOV(header, globals_addr); if (synchronous) ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous)); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 81fb50624a3..0cf782c5018 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5884,7 +5884,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (synchronous) emit_rt_lsc_fence(bld, LSC_FLUSH_TYPE_EVICT); fs_reg srcs[RT_LOGICAL_NUM_SRCS]; - srcs[RT_LOGICAL_SRC_GLOBALS] = get_nir_src(instr->src[0]); + + fs_reg globals = get_nir_src(instr->src[0]); + srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals); srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(instr->src[1]); srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(instr->src[2]); srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);