From 0869e0aac5f206dca9d41edd7667c258b9d8a10f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 23 Jun 2022 14:15:51 +0300 Subject: [PATCH] intel/fs: ray query fix for global address With stages dispatching with a mask, we can run into situations where we don't have the global address in all lanes. The existing code always assumed we had the addres in at least lane0. Signed-off-by: Lionel Landwerlin Fixes: bb40e999d114 ("intel/nir: use a single intel intrinsic to deal with ray traversal") Reviewed-by: Caio Oliveira Part-of: (cherry picked from commit 9680e0e4a2d1f7f5490ea27e4c5250baa2f21d48) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs.cpp | 11 +++++++++-- src/intel/compiler/brw_fs_nir.cpp | 4 +++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index bcfbabe2d6e..67b3132db4a 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1732,7 +1732,7 @@ "description": "intel/fs: ray query fix for global address", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "bb40e999d114db361539cdc4ddb65ace37d76562" }, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a5af1b098ee..fea67288ba7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6707,7 +6707,14 @@ static void lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; - const fs_reg &globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS]; + /* The emit_uniformize() in brw_fs_nir.cpp will generate an horizontal + * stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q + * types in on Gfx12.5, we need to tweak the stride with a value of 1 dword + * so that the MOV operates on 2 components rather than twice the same + * component. + */ + fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_REGISTER_TYPE_UD); + globals_addr.stride = 1; const fs_reg &bvh_level = inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ? inst->src[RT_LOGICAL_SRC_BVH_LEVEL] : @@ -6726,7 +6733,7 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) const fs_builder ubld = bld.exec_all().group(8, 0); fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD); ubld.MOV(header, brw_imm_ud(0)); - ubld.group(2, 0).MOV(header, retype(globals_addr, BRW_REGISTER_TYPE_UD)); + ubld.group(2, 0).MOV(header, globals_addr); if (synchronous) ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous)); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 81fb50624a3..0cf782c5018 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5884,7 +5884,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (synchronous) emit_rt_lsc_fence(bld, LSC_FLUSH_TYPE_EVICT); fs_reg srcs[RT_LOGICAL_NUM_SRCS]; - srcs[RT_LOGICAL_SRC_GLOBALS] = get_nir_src(instr->src[0]); + + fs_reg globals = get_nir_src(instr->src[0]); + srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals); srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(instr->src[1]); srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(instr->src[2]); srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);