diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 5a8f4d5e9e9..924d3a8e930 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -2190,7 +2190,7 @@ brw_cs_get_dispatch_info(const struct intel_device_info *devinfo, */ static inline bool brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo, - gl_shader_stage stage, + gl_shader_stage stage, unsigned max_polygons, const struct brw_stage_prog_data *prog_data) { /* The code below makes assumptions about the hardware's thread dispatch @@ -2214,7 +2214,8 @@ brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo, (const struct brw_wm_prog_data *)prog_data; return devinfo->verx10 < 125 && !wm_prog_data->persample_dispatch && - wm_prog_data->uses_vmask; + wm_prog_data->uses_vmask && + max_polygons < 2; } case MESA_SHADER_COMPUTE: /* Compute shaders will be spawned with either a fully enabled dispatch diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 90d9f939620..46c51318165 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3293,7 +3293,8 @@ fs_visitor::eliminate_find_live_channel() bool progress = false; unsigned depth = 0; - if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) { + if (!brw_stage_has_packed_dispatch(devinfo, stage, max_polygons, + stage_prog_data)) { /* The optimization below assumes that channel zero is live on thread * dispatch, which may not be the case if the fixed function dispatches * threads sparsely. @@ -5625,7 +5626,8 @@ fs_visitor::lower_find_live_channel() return false; bool packed_dispatch = - brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data); + brw_stage_has_packed_dispatch(devinfo, stage, max_polygons, + stage_prog_data); bool vmask = stage == MESA_SHADER_FRAGMENT && brw_wm_prog_data(stage_prog_data)->uses_vmask; @@ -8232,13 +8234,15 @@ brw_compile_bs(const struct brw_compiler *compiler, static UNUSED void brw_fs_test_dispatch_packing(const fs_builder &bld) { - const gl_shader_stage stage = bld.shader->stage; + const fs_visitor *shader = static_cast(bld.shader); + const gl_shader_stage stage = shader->stage; const bool uses_vmask = stage == MESA_SHADER_FRAGMENT && - brw_wm_prog_data(bld.shader->stage_prog_data)->uses_vmask; + brw_wm_prog_data(shader->stage_prog_data)->uses_vmask; - if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage, - bld.shader->stage_prog_data)) { + if (brw_stage_has_packed_dispatch(shader->devinfo, stage, + shader->max_polygons, + shader->stage_prog_data)) { const fs_builder ubld = bld.exec_all().group(1, 0); const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0); const fs_reg mask = uses_vmask ? brw_vmask_reg() : brw_dmask_reg(); diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 631050f31e3..634af11e137 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1208,7 +1208,7 @@ vec4_visitor::eliminate_find_live_channel() bool progress = false; unsigned depth = 0; - if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) { + if (!brw_stage_has_packed_dispatch(devinfo, stage, 0, stage_prog_data)) { /* The optimization below assumes that channel zero is live on thread * dispatch, which may not be the case if the fixed function dispatches * threads sparsely.