From 385f9a96622f56fc6dbfae49f5541875b507bea6 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 8 Feb 2023 20:48:25 +0200 Subject: [PATCH] intel/fs: bound subgroup invocation read to dispatch size This is to avoid out of bound register accesses (potentially leading to hangs) when the dispatch size is smaller than when is reported in the NIR subgroup_size. v2: Implement bounding with a mask (since workgroup sizes are powers of 2) (Faith) Signed-off-by: Lionel Landwerlin Fixes: 530de844ef4d ("intel,anv,iris,crocus: Drop subgroup size from the shader key") Reviewed-by: Faith Ekstrand Reviewed-by: Kenneth Graunke Part-of: (cherry picked from commit 9ac192d79dbef726983d704c3e965e3b058769f6) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs_nir.cpp | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index d9a969c0f90..74968058978 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3316,7 +3316,7 @@ "description": "intel/fs: bound subgroup invocation read to dispatch size", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "530de844ef4d6f8d64276ad4558dd8a3d787e390" }, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 0007f0ee80e..9065fd39db7 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5452,10 +5452,22 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_read_invocation: { const fs_reg value = get_nir_src(instr->src[0]); const fs_reg invocation = get_nir_src(instr->src[1]); + fs_reg tmp = bld.vgrf(value.type); + /* When for some reason the subgroup_size picked by NIR is larger than + * the dispatch size picked by the backend (this could happen in RT, + * FS), bound the invocation to the dispatch size. + */ + fs_reg bound_invocation; + if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) { + bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1)); + } else { + bound_invocation = invocation; + } bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value, - bld.emit_uniformize(invocation)); + bld.emit_uniformize(bound_invocation)); bld.MOV(retype(dest, value.type), fs_reg(component(tmp, 0))); break;