diff --git a/.pick_status.json b/.pick_status.json index 7a8f708070a..5876cb2bc5d 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -49,7 +49,7 @@ "description": "intel/compiler: make uses_pos_offset a tri-state", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "d8dfd153c50f24ea50578202832b3eccfb61edf8" }, diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index 28fa9e3fd65..0bc05a2d7de 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -6505,7 +6505,9 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, * look useful at the moment. We might need this in future. */ ps.PositionXYOffsetSelect = - wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; + brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, + 0 /* msaa_flags */) ? + POSOFFSET_SAMPLE : POSOFFSET_NONE; if (wm_prog_data->base.total_scratch) { struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT); @@ -7272,10 +7274,10 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, * We only require XY sample offsets. So, this recommendation doesn't * look useful at the moment. We might need this in future. */ - if (wm_prog_data->uses_pos_offset) - wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE; - else - wm.PositionXYOffsetSelect = POSOFFSET_NONE; + wm.PositionXYOffsetSelect = + brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, + 0 /* msaa_flags */) ? + POSOFFSET_SAMPLE : POSOFFSET_NONE; #endif wm.LineStippleEnable = cso->cso.line_stipple_enable; wm.PolygonStippleEnable = cso->cso.poly_stipple_enable; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 3717ed0ef54..c888b7690e7 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4936,7 +4936,9 @@ iris_store_fs_state(const struct intel_device_info *devinfo, * look useful at the moment. We might need this in future. */ ps.PositionXYOffsetSelect = - wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; + brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, + 0 /* msaa_flags */) ? + POSOFFSET_SAMPLE : POSOFFSET_NONE; if (prog_data->total_scratch) { INIT_THREAD_SCRATCH_SIZE(ps); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index f456acc35ae..7acffd62742 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -923,7 +923,8 @@ struct brw_wm_prog_data { bool dispatch_16; bool dispatch_32; bool dual_src_blend; - bool uses_pos_offset; + enum brw_sometimes uses_pos_offset; + bool read_pos_offset_input; bool uses_omask; bool uses_kill; bool uses_src_depth; @@ -1186,6 +1187,25 @@ brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data, return prog_data->coarse_pixel_dispatch; } +static inline bool +brw_wm_prog_data_uses_position_xy_offset(const struct brw_wm_prog_data *prog_data, + enum brw_wm_msaa_flags pushed_msaa_flags) +{ + bool per_sample; + if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) { + per_sample = (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) != 0; + } else { + assert(prog_data->persample_dispatch == BRW_ALWAYS || + prog_data->persample_dispatch == BRW_NEVER); + per_sample = prog_data->persample_dispatch == BRW_ALWAYS; + } + + if (!per_sample) + return false; + + return prog_data->read_pos_offset_input; +} + struct brw_push_const_block { unsigned dwords; /* Dword count, not reg aligned */ unsigned regs; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 59637d36fb2..0a0ed57fd9a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7365,12 +7365,16 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader, * per-sample dispatch. If we need gl_SamplePosition and we don't have * persample dispatch, we hard-code it to 0.5. */ - prog_data->uses_pos_offset = - prog_data->persample_dispatch != BRW_NEVER && - (BITSET_TEST(shader->info.system_values_read, - SYSTEM_VALUE_SAMPLE_POS) || - BITSET_TEST(shader->info.system_values_read, - SYSTEM_VALUE_SAMPLE_POS_OR_CENTER)); + prog_data->read_pos_offset_input = + BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_SAMPLE_POS) || + BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_SAMPLE_POS_OR_CENTER); + + if (prog_data->read_pos_offset_input) + prog_data->uses_pos_offset = prog_data->persample_dispatch; + else + prog_data->uses_pos_offset = BRW_NEVER; } prog_data->has_render_target_reads = shader->info.outputs_read != 0ull; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 3ae9d6b1f41..eae7026f79a 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1516,7 +1516,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.BindingTableEntryCount = fs_bin->bind_map.surface_count; ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || wm_prog_data->base.ubo_ranges[0].length; - ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + ps.PositionXYOffsetSelect = + brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, + 0 /* msaa_flags */) ? POSOFFSET_SAMPLE: POSOFFSET_NONE; ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1; diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c index bd13d520686..9a8483af687 100644 --- a/src/intel/vulkan_hasvk/genX_pipeline.c +++ b/src/intel/vulkan_hasvk/genX_pipeline.c @@ -1695,8 +1695,10 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.BindingTableEntryCount = fs_bin->bind_map.surface_count; ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || wm_prog_data->base.ubo_ranges[0].length; - ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE; + ps.PositionXYOffsetSelect = + brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, + 0 /* msaa_flags */) ? + POSOFFSET_SAMPLE : POSOFFSET_NONE; #if GFX_VER < 8 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;