intel/fs/gfx12: Don't consider multipolygon PS to have packed dispatch.

This fixes a number of regressions and hangs in multipolygon fragment
shaders that have FIND_LIVE_CHANNEL sequences which would otherwise
lead to access of a dead channel.  Note that the failures don't seem
to be reproducible in simulation.

Acked-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26585>
This commit is contained in:
Francisco Jerez 2023-12-07 19:38:02 -08:00 committed by Marge Bot
parent 8f92baa5d3
commit 5e0760a993
3 changed files with 14 additions and 9 deletions

View file

@ -2190,7 +2190,7 @@ brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,
*/
static inline bool
brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
gl_shader_stage stage,
gl_shader_stage stage, unsigned max_polygons,
const struct brw_stage_prog_data *prog_data)
{
/* The code below makes assumptions about the hardware's thread dispatch
@ -2214,7 +2214,8 @@ brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
(const struct brw_wm_prog_data *)prog_data;
return devinfo->verx10 < 125 &&
!wm_prog_data->persample_dispatch &&
wm_prog_data->uses_vmask;
wm_prog_data->uses_vmask &&
max_polygons < 2;
}
case MESA_SHADER_COMPUTE:
/* Compute shaders will be spawned with either a fully enabled dispatch

View file

@ -3293,7 +3293,8 @@ fs_visitor::eliminate_find_live_channel()
bool progress = false;
unsigned depth = 0;
if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
if (!brw_stage_has_packed_dispatch(devinfo, stage, max_polygons,
stage_prog_data)) {
/* The optimization below assumes that channel zero is live on thread
* dispatch, which may not be the case if the fixed function dispatches
* threads sparsely.
@ -5625,7 +5626,8 @@ fs_visitor::lower_find_live_channel()
return false;
bool packed_dispatch =
brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data);
brw_stage_has_packed_dispatch(devinfo, stage, max_polygons,
stage_prog_data);
bool vmask =
stage == MESA_SHADER_FRAGMENT &&
brw_wm_prog_data(stage_prog_data)->uses_vmask;
@ -8232,13 +8234,15 @@ brw_compile_bs(const struct brw_compiler *compiler,
static UNUSED void
brw_fs_test_dispatch_packing(const fs_builder &bld)
{
const gl_shader_stage stage = bld.shader->stage;
const fs_visitor *shader = static_cast<const fs_visitor *>(bld.shader);
const gl_shader_stage stage = shader->stage;
const bool uses_vmask =
stage == MESA_SHADER_FRAGMENT &&
brw_wm_prog_data(bld.shader->stage_prog_data)->uses_vmask;
brw_wm_prog_data(shader->stage_prog_data)->uses_vmask;
if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage,
bld.shader->stage_prog_data)) {
if (brw_stage_has_packed_dispatch(shader->devinfo, stage,
shader->max_polygons,
shader->stage_prog_data)) {
const fs_builder ubld = bld.exec_all().group(1, 0);
const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0);
const fs_reg mask = uses_vmask ? brw_vmask_reg() : brw_dmask_reg();

View file

@ -1208,7 +1208,7 @@ vec4_visitor::eliminate_find_live_channel()
bool progress = false;
unsigned depth = 0;
if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
if (!brw_stage_has_packed_dispatch(devinfo, stage, 0, stage_prog_data)) {
/* The optimization below assumes that channel zero is live on thread
* dispatch, which may not be the case if the fixed function dispatches
* threads sparsely.