anv: move 3DSTATE_PS to partial packing

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27803>
This commit is contained in:
Lionel Landwerlin 2024-04-02 14:11:29 +03:00 committed by Marge Bot
parent 3a336a98e9
commit 815d2e3e8b
4 changed files with 111 additions and 35 deletions

View file

@ -566,7 +566,6 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
diff_fix_state(VS, final.vs);
diff_fix_state(HS, final.hs);
diff_fix_state(DS, final.ds);
diff_fix_state(PS, final.ps);
diff_fix_state(CLIP, partial.clip);
diff_fix_state(SF, partial.sf);
@ -576,6 +575,7 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
diff_fix_state(GS, partial.gs);
diff_fix_state(TE, partial.te);
diff_fix_state(VFG, partial.vfg);
diff_fix_state(PS, partial.ps);
diff_fix_state(PS_EXTRA, partial.ps_extra);
if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {

View file

@ -1553,6 +1553,31 @@ struct anv_gfx_dynamic_state {
uint32_t LineStippleRepeatCount;
} ls;
/* 3DSTATE_PS */
struct {
uint32_t PositionXYOffsetSelect;
uint32_t KernelStartPointer0;
uint32_t KernelStartPointer1;
uint32_t KernelStartPointer2;
uint32_t DispatchGRFStartRegisterForConstantSetupData0;
uint32_t DispatchGRFStartRegisterForConstantSetupData1;
uint32_t DispatchGRFStartRegisterForConstantSetupData2;
/* Pre-Gfx20 only */
bool _8PixelDispatchEnable;
bool _16PixelDispatchEnable;
bool _32PixelDispatchEnable;
/* Gfx20+ only */
bool Kernel0Enable;
bool Kernel1Enable;
uint32_t Kernel0SIMDWidth;
uint32_t Kernel1SIMDWidth;
uint32_t Kernel0PolyPackingPolicy;
} ps;
/* 3DSTATE_PS_EXTRA */
struct {
bool PixelShaderIsPerSample;
@ -4635,7 +4660,6 @@ struct anv_graphics_pipeline {
struct anv_gfx_state_ptr vs;
struct anv_gfx_state_ptr hs;
struct anv_gfx_state_ptr ds;
struct anv_gfx_state_ptr ps;
struct anv_gfx_state_ptr task_control;
struct anv_gfx_state_ptr task_shader;
@ -4659,6 +4683,7 @@ struct anv_graphics_pipeline {
struct anv_gfx_state_ptr so;
struct anv_gfx_state_ptr gs;
struct anv_gfx_state_ptr te;
struct anv_gfx_state_ptr ps;
struct anv_gfx_state_ptr vfg;
} partial;
};

View file

@ -31,6 +31,7 @@
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "common/intel_genX_state_brw.h"
#include "common/intel_guardband.h"
#include "common/intel_tiled_render.h"
#include "compiler/brw_prim.h"
@ -580,6 +581,52 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_FS_MSAA_FLAGS)) {
if (wm_prog_data) {
const struct anv_shader_bin *fs_bin =
pipeline->base.shaders[MESA_SHADER_FRAGMENT];
struct GENX(3DSTATE_PS) ps = {};
intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
MAX2(dyn->ms.rasterization_samples, 1),
gfx->fs_msaa_flags);
SET(PS, ps.KernelStartPointer0,
fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
SET(PS, ps.KernelStartPointer1,
fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
#if GFX_VER < 20
SET(PS, ps.KernelStartPointer2,
fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
#endif
SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0));
SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1));
#if GFX_VER < 20
SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2));
#endif
#if GFX_VER < 20
SET(PS, ps._8PixelDispatchEnable, ps._8PixelDispatchEnable);
SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable);
SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable);
#else
SET(PS, ps.Kernel0Enable, ps.Kernel0Enable);
SET(PS, ps.Kernel1Enable, ps.Kernel1Enable);
SET(PS, ps.Kernel0SIMDWidth, ps.Kernel0SIMDWidth);
SET(PS, ps.Kernel1SIMDWidth, ps.Kernel1SIMDWidth);
SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy);
#endif
SET(PS, ps.PositionXYOffsetSelect,
!wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
brw_wm_prog_data_is_persample(wm_prog_data, gfx->fs_msaa_flags) ?
POSOFFSET_SAMPLE : POSOFFSET_CENTROID);
SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample,
brw_wm_prog_data_is_persample(wm_prog_data, gfx->fs_msaa_flags));
#if GFX_VER >= 11
@ -595,6 +642,15 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
#endif
SET(WM, wm.BarycentricInterpolationMode,
wm_prog_data_barycentric_modes(wm_prog_data, gfx->fs_msaa_flags));
} else {
#if GFX_VER < 20
SET(PS, ps._8PixelDispatchEnable, false);
SET(PS, ps._16PixelDispatchEnable, false);
SET(PS, ps._32PixelDispatchEnable, false);
#else
SET(PS, ps.Kernel0Enable, false);
SET(PS, ps.Kernel1Enable, false);
#endif
}
}
@ -1609,9 +1665,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
#endif
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps);
if (device->vk.enabled_extensions.EXT_mesh_shader) {
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_control);
@ -1654,6 +1707,32 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
/* Now the potentially dynamic instructions */
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS),
pipeline, partial.ps, ps) {
SET(ps, ps, KernelStartPointer0);
SET(ps, ps, KernelStartPointer1);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1);
#if GFX_VER < 20
SET(ps, ps, KernelStartPointer2);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2);
SET(ps, ps, _8PixelDispatchEnable);
SET(ps, ps, _16PixelDispatchEnable);
SET(ps, ps, _32PixelDispatchEnable);
#else
SET(ps, ps, Kernel0Enable);
SET(ps, ps, Kernel1Enable);
SET(ps, ps, Kernel0SIMDWidth);
SET(ps, ps, Kernel1SIMDWidth);
SET(ps, ps, Kernel0PolyPackingPolicy);
#endif
SET(ps, ps, PositionXYOffsetSelect);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
pipeline, partial.ps_extra, pse) {

View file

@ -1578,20 +1578,13 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
pipeline->base.shaders[MESA_SHADER_FRAGMENT];
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_pipeline_emit(pipeline, final.ps, GENX(3DSTATE_PS), ps);
anv_pipeline_emit(pipeline, partial.ps, GENX(3DSTATE_PS), ps);
return;
}
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_pipeline_emit(pipeline, final.ps, GENX(3DSTATE_PS), ps) {
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
ms != NULL ? ms->rasterization_samples : 1,
pipeline->fs_msaa_flags);
const bool persample =
brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags);
anv_pipeline_emit(pipeline, partial.ps, GENX(3DSTATE_PS), ps) {
#if GFX_VER == 12
assert(wm_prog_data->dispatch_multi == 0 ||
(wm_prog_data->dispatch_multi == 16 && wm_prog_data->max_polygons == 2));
@ -1604,15 +1597,6 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.OverlappingSubspansEnable = false;
#endif
ps.KernelStartPointer0 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.KernelStartPointer1 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.KernelStartPointer2 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
#endif
ps.SingleProgramFlow = false;
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
/* Wa_1606682166 */
@ -1622,21 +1606,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
#endif
ps.PositionXYOffsetSelect =
!wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID;
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
#endif
#if GFX_VERx10 >= 125
ps.ScratchSpaceBuffer =
get_scratch_surf(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin);