anv, blorp, iris: Update 3DSTATE_PS programming for xe2

Rework:
 * Jordan: Move code into intel_update_ps_state()

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26600>
This commit is contained in:
Jordan Justen 2022-08-02 17:30:16 -07:00 committed by Marge Bot
parent 80d9294d2d
commit f170995e66
7 changed files with 159 additions and 1 deletions

View file

@ -5134,7 +5134,9 @@ iris_store_fs_state(const struct intel_device_info *devinfo,
ps.MaximumNumberofThreadsPerPSD =
devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
#if GFX_VER < 20
ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0;
#endif
/* From the documentation for this packet:
* "If the PS kernel does not need the Position XY Offsets to
@ -7167,15 +7169,19 @@ iris_upload_dirty_render_state(struct iris_context *ice,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
#endif
ps.KernelStartPointer0 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.KernelStartPointer1 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.KernelStartPointer2 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
#endif
#if GFX_VERx10 >= 125
ps.ScratchSpaceBuffer = scratch_addr >> 4;

View file

@ -947,15 +947,19 @@ blorp_emit_ps_config(struct blorp_batch *batch,
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
#if GFX_VER < 20
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
#endif
ps.KernelStartPointer0 = params->wm_prog_kernel +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = params->wm_prog_kernel +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
#if GFX_VER < 20
ps.KernelStartPointer2 = params->wm_prog_kernel +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
#endif
}
}

View file

@ -119,9 +119,17 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
assert(enable_8 || enable_16 || enable_32);
#if GFX_VER >= 20
assert(!enable_8);
ps->Kernel0Enable = enable_16 || enable_32;
ps->Kernel0SIMDWidth = enable_16 ? 0 : 1;
ps->Kernel1Enable = enable_16 && enable_32;
ps->Kernel1SIMDWidth = SIMD32 /* SIMD32 */;
#else
ps->_8PixelDispatchEnable = enable_8;
ps->_16PixelDispatchEnable = enable_16;
ps->_32PixelDispatchEnable = enable_32;
#endif
}
#endif

View file

@ -1062,6 +1062,39 @@ struct brw_wm_prog_data {
uint8_t urb_setup_attribs_count;
};
#ifdef GFX_VERx10
#if GFX_VERx10 >= 200
/** Returns the SIMD width corresponding to a given KSP index
*
* The "Variable Pixel Dispatch" table in the PRM (which can be found, for
* example in Vol. 7 of the SKL PRM) has a mapping from dispatch widths to
* kernel start pointer (KSP) indices that is based on what dispatch widths
* are enabled. This function provides, effectively, the reverse mapping.
*
* If the given KSP is valid with respect to the SIMD8/16/32 enables, a SIMD
* width of 8, 16, or 32 is returned. If the KSP is invalid, 0 is returned.
*/
static inline unsigned
brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool enabled, unsigned width_sel)
{
if (ksp_idx < 2) {
return enabled ? (width_sel == 0 ? 16 : 32) : 0;
} else {
unreachable("Invalid KSP index");
}
}
#define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \
(ksp_idx == 0 ? \
brw_fs_simd_width_for_ksp(ksp_idx, (wm_state).Kernel0Enable, \
(wm_state).Kernel0SIMDWidth) : \
brw_fs_simd_width_for_ksp(ksp_idx, (wm_state).Kernel1Enable, \
(wm_state).Kernel1SIMDWidth))
#else
/** Returns the SIMD width corresponding to a given KSP index
*
* The "Variable Pixel Dispatch" table in the PRM (which can be found, for
@ -1091,11 +1124,15 @@ brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled,
}
}
#define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \
#define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \
brw_fs_simd_width_for_ksp((ksp_idx), (wm_state)._8PixelDispatchEnable, \
(wm_state)._16PixelDispatchEnable, \
(wm_state)._32PixelDispatchEnable)
#endif
#endif
#define brw_wm_state_has_ksp(wm_state, ksp_idx) \
(brw_wm_state_simd_width_for_ksp((wm_state), (ksp_idx)) != 0)

View file

@ -188,6 +188,97 @@
<field name="LOD" start="240" end="243" type="uint" />
<field name="Render Target View Extent" start="245" end="255" type="uint" />
</instruction>
<instruction name="3DSTATE_PS" bias="2" length="12" engine="render">
<field name="DWord Length" start="0" end="7" type="uint" default="10" />
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="32" />
<field name="3D Command Opcode" start="24" end="26" type="uint" default="0" />
<field name="Command SubType" start="27" end="28" type="uint" default="3" />
<field name="Command Type" start="29" end="31" type="uint" default="3" />
<field name="Kernel 0 Enable" start="32" end="32" type="bool" />
<field name="Kernel[0] : SIMD Width" start="33" end="33" type="uint" prefix="PS">
<value name="SIMD16" value="0" />
<value name="SIMD32" value="1" />
</field>
<field name="Kernel Start Pointer 0" start="38" end="95" type="offset" />
<field name="Software Exception Enable" start="103" end="103" type="bool" />
<field name="Mask Stack Exception Enable" start="107" end="107" type="bool" />
<field name="Illegal Opcode Exception Enable" start="109" end="109" type="bool" />
<field name="Rounding Mode" start="110" end="111" type="uint">
<value name="RTNE" value="0" />
<value name="RU" value="1" />
<value name="RD" value="2" />
<value name="RTZ" value="3" />
</field>
<field name="Floating Point Mode" start="112" end="112" type="uint">
<value name="IEEE-754" value="0" />
<value name="Alternate" value="1" />
</field>
<field name="Thread Dispatch Priority" start="113" end="113" type="uint">
<value name="Normal" value="0" />
<value name="High" value="1" />
</field>
<field name="Binding Table Entry Count" start="114" end="121" type="uint" />
<field name="Single Precision Denormal Mode" start="122" end="122" type="uint">
<value name="Flushed to Zero" value="0" />
<value name="Retained" value="1" />
</field>
<field name="Sampler Count" start="123" end="125" type="uint">
<value name="No Samplers" value="0" />
<value name="1-4 Samplers" value="1" />
<value name="5-8 Samplers" value="2" />
<value name="9-12 Samplers" value="3" />
<value name="13-16 Samplers" value="4" />
</field>
<field name="Vector Mask Enable" start="126" end="126" type="uint">
<value name="Dmask" value="0" />
<value name="Vmask" value="1" />
</field>
<field name="Single Program Flow" start="127" end="127" type="uint">
<value name="Multiple" value="0" />
<value name="Single" value="1" />
</field>
<field name="Scratch Space Buffer" start="138" end="159" type="uint" />
<field name="Position XY Offset Select" start="195" end="196" type="uint">
<value name="POSOFFSET_NONE" value="0" />
<value name="POSOFFSET_CENTROID" value="2" />
<value name="POSOFFSET_SAMPLE" value="3" />
</field>
<field name="Render Target Resolve Type" start="198" end="199" type="uint">
<value name="RESOLVE_DISABLED" value="0" />
<value name="RESOLVE_PARTIAL" value="1" />
<value name="FAST_CLEAR_0" value="2" />
<value name="RESOLVE_FULL" value="3" />
</field>
<field name="Render Target Fast Clear Enable" start="200" end="200" type="bool" />
<field name="Overlapping Subspans Enable" start="201" end="201" type="bool" />
<field name="3D Scoreboard Address Size select" start="202" end="202" type="uint">
<value name="2x2" value="0" />
<value name="4x2" value="1" />
</field>
<field name="Clear/Resolve BTI for Render Target" start="204" end="211" type="uint" />
<field name="Pixel Scoreboard Disable" start="213" end="213" type="bool" />
<field name="Maximum Number of Threads Per PSD" start="215" end="223" type="uint" />
<field name="Dispatch GRF Start Register For Constant/Setup Data 1" start="232" end="239" type="uint" />
<field name="Dispatch GRF Start Register For Constant/Setup Data 0" start="240" end="247" type="uint" />
<field name="Kernel[0] : Poly Packing Policy" start="248" end="250" type="uint">
<value name="POLY_PACK4_DYNAMIC" value="0" />
<value name="POLY_PACK8_DYNAMIC" value="1" />
<value name="POLY_PACK16_DYNAMIC" value="2" />
<value name="POLY_PACK16_FIXED" value="3" />
<value name="POLY_PACK8_FIXED" value="4" />
</field>
<field name="Kernel[0] : Maximum Polys per Thread" start="251" end="253" type="uint" />
<field name="Kernel 1 Selection Priority" start="255" end="255" type="uint">
<value name="PRI_SIMD" value="0" />
<value name="PRI_POLY" value="1" />
</field>
<field name="Kernel 1 Enable" start="256" end="256" type="bool" />
<field name="Kernel[1] : SIMD Width" start="257" end="257" type="uint" prefix="PS">
<value name="SIMD16" value="0" />
<value name="SIMD32" value="1" />
</field>
<field name="Kernel Start Pointer 1" start="262" end="319" type="offset" />
</instruction>
<instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="8" engine="render">
<field name="DWord Length" start="0" end="7" type="uint" default="6" />
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6" />

View file

@ -1575,16 +1575,20 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.KernelStartPointer1 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.KernelStartPointer2 = fs_bin->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
#endif
ps.SingleProgramFlow = false;
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
/* Wa_1606682166 */
ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(fs_bin);
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
#if GFX_VER < 20
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
#endif
ps.PositionXYOffsetSelect =
!wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID;
@ -1595,8 +1599,10 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
#if GFX_VER < 20
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
#endif
#if GFX_VERx10 >= 125
ps.ScratchSpaceBuffer =

View file

@ -184,22 +184,28 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
ps.VectorMaskEnable = prog_data->uses_vmask;
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
#if GFX_VER < 20
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
prog_data->base.ubo_ranges[0].length;
#endif
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
#if GFX_VER < 20
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
#endif
ps.KernelStartPointer0 = state->kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 0);
ps.KernelStartPointer1 = state->kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 1);
#if GFX_VER < 20
ps.KernelStartPointer2 = state->kernel->kernel.offset +
brw_wm_prog_data_prog_offset(prog_data, ps, 2);
#endif
ps.MaximumNumberofThreadsPerPSD = device->info->max_threads_per_psd - 1;
}