mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 18:50:10 +01:00
intel/blorp: Remove Gfx9+ references in elk code
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
This commit is contained in:
parent
0e582f0dfd
commit
80cfc3d712
2 changed files with 25 additions and 687 deletions
|
|
@ -24,6 +24,10 @@
|
|||
#ifndef BLORP_GENX_EXEC_ELK_H
|
||||
#define BLORP_GENX_EXEC_ELK_H
|
||||
|
||||
#if GFX_VER > 8
|
||||
#error "ELK doesn't support Gfx > 8."
|
||||
#endif
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
|
|
@ -106,7 +110,7 @@ static uint64_t
|
|||
blorp_get_surface_address(struct blorp_batch *batch,
|
||||
struct blorp_address address);
|
||||
|
||||
#if GFX_VER >= 7 && GFX_VER < 10
|
||||
#if GFX_VER >= 7
|
||||
static struct blorp_address
|
||||
blorp_get_surface_base_address(struct blorp_batch *batch);
|
||||
#endif
|
||||
|
|
@ -300,13 +304,6 @@ emit_urb_config(struct blorp_batch *batch,
|
|||
}
|
||||
}
|
||||
|
||||
if (batch->blorp->config.use_mesh_shading) {
|
||||
#if GFX_VERx10 >= 125
|
||||
blorp_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
|
||||
blorp_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
|
||||
#endif
|
||||
}
|
||||
|
||||
#else /* GFX_VER < 7 */
|
||||
struct intel_urb_config urb_cfg = {
|
||||
.size = { vs_entry_size, 0, 0, 0, sf_entry_size, },
|
||||
|
|
@ -400,8 +397,7 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
|
|||
struct blorp_address clear_color_input_addr = *addr;
|
||||
clear_color_input_addr.offset += 16;
|
||||
|
||||
const unsigned clear_color_size =
|
||||
GFX_VER < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4;
|
||||
const unsigned clear_color_size = batch->blorp->isl_dev->ss.clear_value_size;
|
||||
blorp_emit_memcpy(batch, clear_color_input_addr,
|
||||
params->dst.clear_color_addr,
|
||||
clear_color_size);
|
||||
|
|
@ -439,10 +435,6 @@ blorp_fill_vertex_buffer_state(struct GENX(VERTEX_BUFFER_STATE) *vb,
|
|||
vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
|
||||
vb[idx].MaxIndex = stride > 0 ? size / stride : 0;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
vb[idx].L3BypassDisable = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -638,10 +630,6 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
|
|||
sgvs.InstanceIDElementOffset = 0;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 11
|
||||
blorp_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
||||
#endif
|
||||
|
||||
for (unsigned i = 0; i < num_elements; i++) {
|
||||
blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
|
||||
vf.VertexElementIndex = i;
|
||||
|
|
@ -741,8 +729,6 @@ blorp_emit_vs_config(struct blorp_batch *batch,
|
|||
const struct blorp_params *params)
|
||||
{
|
||||
struct elk_vs_prog_data *vs_prog_data = params->vs_prog_data;
|
||||
assert(!vs_prog_data || GFX_VER < 11 ||
|
||||
vs_prog_data->base.dispatch_mode == INTEL_DISPATCH_MODE_SIMD8);
|
||||
|
||||
blorp_emit(batch, GENX(3DSTATE_VS), vs) {
|
||||
if (vs_prog_data) {
|
||||
|
|
@ -797,9 +783,6 @@ blorp_emit_sf_config(struct blorp_batch *batch,
|
|||
#if GFX_VER >= 8
|
||||
|
||||
blorp_emit(batch, GENX(3DSTATE_SF), sf) {
|
||||
#if GFX_VER >= 12
|
||||
sf.DerefBlockSize = urb_deref_block_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
|
||||
|
|
@ -818,11 +801,6 @@ blorp_emit_sf_config(struct blorp_batch *batch,
|
|||
}
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
|
||||
#if GFX_VER >= 9
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif GFX_VER >= 7
|
||||
|
|
@ -919,24 +897,9 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
switch (params->fast_clear_op) {
|
||||
case ISL_AUX_OP_NONE:
|
||||
break;
|
||||
#if GFX_VER >= 10
|
||||
case ISL_AUX_OP_AMBIGUATE:
|
||||
ps.RenderTargetFastClearEnable = true;
|
||||
ps.RenderTargetResolveType = FAST_CLEAR_0;
|
||||
break;
|
||||
#endif
|
||||
#if GFX_VER >= 9
|
||||
case ISL_AUX_OP_PARTIAL_RESOLVE:
|
||||
ps.RenderTargetResolveType = RESOLVE_PARTIAL;
|
||||
break;
|
||||
case ISL_AUX_OP_FULL_RESOLVE:
|
||||
ps.RenderTargetResolveType = RESOLVE_FULL;
|
||||
break;
|
||||
#else
|
||||
case ISL_AUX_OP_FULL_RESOLVE:
|
||||
ps.RenderTargetResolveEnable = true;
|
||||
break;
|
||||
#endif
|
||||
case ISL_AUX_OP_FAST_CLEAR:
|
||||
ps.RenderTargetFastClearEnable = true;
|
||||
break;
|
||||
|
|
@ -978,33 +941,24 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
elk_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
||||
elk_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
|
||||
#if GFX_VER < 20
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
elk_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
|
||||
#endif
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel +
|
||||
elk_wm_prog_data_prog_offset(prog_data, ps, 0);
|
||||
ps.KernelStartPointer1 = params->wm_prog_kernel +
|
||||
elk_wm_prog_data_prog_offset(prog_data, ps, 1);
|
||||
#if GFX_VER < 20
|
||||
ps.KernelStartPointer2 = params->wm_prog_kernel +
|
||||
elk_wm_prog_data_prog_offset(prog_data, ps, 2);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
|
||||
if (prog_data) {
|
||||
psx.PixelShaderValid = true;
|
||||
#if GFX_VER < 20
|
||||
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
|
||||
#endif
|
||||
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
|
||||
psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
|
||||
#if GFX_VER >= 9
|
||||
psx.PixelShaderComputesStencil = prog_data->computed_stencil;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (params->src.enabled)
|
||||
|
|
@ -1285,9 +1239,6 @@ blorp_emit_depth_stencil_state(struct blorp_batch *batch,
|
|||
ds.StencilPassDepthPassOp = STENCILOP_REPLACE;
|
||||
|
||||
ds.StencilWriteMask = params->stencil_mask;
|
||||
#if GFX_VER >= 9
|
||||
ds.StencilReferenceValue = params->stencil_ref;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
|
|
@ -1313,14 +1264,6 @@ blorp_emit_depth_stencil_state(struct blorp_batch *batch,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
blorp_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
|
||||
db.DepthBoundsTestEnable = false;
|
||||
db.DepthBoundsTestMinValue = 0.0;
|
||||
db.DepthBoundsTestMaxValue = 1.0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
|
@ -1399,16 +1342,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
|
|||
|
||||
UNUSED uint32_t mocs = isl_mocs(batch->blorp->isl_dev, 0, false);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
blorp_emit(batch, GENX(3DSTATE_CONSTANT_ALL), pc) {
|
||||
/* Update empty push constants for all stages (bitmask = 11111b) */
|
||||
pc.ShaderUpdateEnable = 0x1f;
|
||||
pc.MOCS = mocs;
|
||||
}
|
||||
#else
|
||||
#if GFX_VER >= 9
|
||||
#define CONSTANT_MOCS xs.MOCS = mocs
|
||||
#elif GFX_VER == 7
|
||||
#if GFX_VER == 7
|
||||
#define CONSTANT_MOCS xs.ConstantBody.MOCS = mocs
|
||||
#else
|
||||
#define CONSTANT_MOCS
|
||||
|
|
@ -1420,7 +1354,6 @@ blorp_emit_pipeline(struct blorp_batch *batch,
|
|||
#endif
|
||||
blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), xs) { CONSTANT_MOCS; }
|
||||
blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), xs) { CONSTANT_MOCS; }
|
||||
#endif
|
||||
#undef CONSTANT_MOCS
|
||||
|
||||
if (params->src.enabled)
|
||||
|
|
@ -1459,18 +1392,6 @@ blorp_emit_pipeline(struct blorp_batch *batch,
|
|||
blorp_emit_ps_config(batch, params);
|
||||
|
||||
blorp_emit_cc_viewport(batch);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
/* Disable Primitive Replication. */
|
||||
blorp_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
||||
#endif
|
||||
|
||||
if (batch->blorp->config.use_mesh_shading) {
|
||||
#if GFX_VERx10 >= 125
|
||||
blorp_emit(batch, GENX(3DSTATE_MESH_CONTROL), zero);
|
||||
blorp_emit(batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/******** This is the end of the pipeline setup code ********/
|
||||
|
|
@ -1555,9 +1476,6 @@ blorp_emit_surface_state(struct blorp_batch *batch,
|
|||
write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;
|
||||
}
|
||||
|
||||
const bool use_clear_address =
|
||||
GFX_VER >= 10 && (surface->clear_color_addr.buffer != NULL);
|
||||
|
||||
isl_surf_fill_state(batch->blorp->isl_dev, state,
|
||||
.surf = &surf, .view = &surface->view,
|
||||
.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
|
||||
|
|
@ -1565,12 +1483,8 @@ blorp_emit_surface_state(struct blorp_batch *batch,
|
|||
blorp_get_surface_address(batch, surface->addr),
|
||||
.aux_address = !use_aux_address ? 0 :
|
||||
blorp_get_surface_address(batch, surface->aux_addr),
|
||||
.clear_address = !use_clear_address ? 0 :
|
||||
blorp_get_surface_address(batch,
|
||||
surface->clear_color_addr),
|
||||
.mocs = surface->addr.mocs,
|
||||
.clear_color = surface->clear_color,
|
||||
.use_clear_address = use_clear_address,
|
||||
.write_disables = write_disable_mask);
|
||||
|
||||
blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
|
||||
|
|
@ -1588,13 +1502,7 @@ blorp_emit_surface_state(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) {
|
||||
#if GFX_VER >= 10
|
||||
assert((surface->clear_color_addr.offset & 0x3f) == 0);
|
||||
uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;
|
||||
blorp_surface_reloc(batch, state_offset +
|
||||
isl_dev->ss.clear_color_state_offset,
|
||||
surface->clear_color_addr, *clear_addr);
|
||||
#elif GFX_VER >= 7
|
||||
#if GFX_VER >= 7
|
||||
/* Fast clears just whack the AUX surface and don't actually use the
|
||||
* clear color for anything. We can avoid the MI memcpy on that case.
|
||||
*/
|
||||
|
|
@ -1635,9 +1543,7 @@ blorp_emit_null_surface_state(struct blorp_batch *batch,
|
|||
.SurfaceArray = surface->surf.dim != ISL_SURF_DIM_3D,
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
.TileMode = TILE4,
|
||||
#elif GFX_VER >= 8
|
||||
#if GFX_VER >= 8
|
||||
.TileMode = YMAJOR,
|
||||
#else
|
||||
.TiledSurface = true,
|
||||
|
|
@ -1798,22 +1704,6 @@ blorp_emit_depth_stencil_config(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
isl_emit_depth_stencil_hiz_s(isl_dev, dw, &info);
|
||||
|
||||
#if GFX_VER >= 11
|
||||
/* Wa_1408224581
|
||||
*
|
||||
* Workaround: Gfx12LP Astep only An additional pipe control with
|
||||
* post-sync = store dword operation would be required.( w/a is to
|
||||
* have an additional pipe control after the stencil state whenever
|
||||
* the surface state bits of this state is changing).
|
||||
*
|
||||
* This also seems sufficient to handle Wa_14014097488.
|
||||
*/
|
||||
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.PostSyncOperation = WriteImmediateData;
|
||||
pc.Address = blorp_get_workaround_address(batch);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
|
|
@ -1891,9 +1781,6 @@ blorp_emit_gfx8_hiz_op(struct blorp_batch *batch,
|
|||
hzp.DepthBufferClearEnable = params->depth.enabled;
|
||||
hzp.StencilClearValue = params->stencil_ref;
|
||||
hzp.FullSurfaceDepthandStencilClear = params->full_surface_hiz_op;
|
||||
#if GFX_VER >= 20
|
||||
hzp.DepthClearValue = params->depth.clear_color.f32[0];
|
||||
#endif
|
||||
break;
|
||||
case ISL_AUX_OP_FULL_RESOLVE:
|
||||
assert(params->full_surface_hiz_op);
|
||||
|
|
@ -1942,84 +1829,7 @@ blorp_update_clear_color(UNUSED struct blorp_batch *batch,
|
|||
const struct blorp_surface_info *info)
|
||||
{
|
||||
assert(info->clear_color_addr.buffer != NULL);
|
||||
#if GFX_VER == 11
|
||||
/* 2 QWORDS */
|
||||
const unsigned inlinedata_dw = 2 * 2;
|
||||
const unsigned num_dwords = GENX(MI_ATOMIC_length) + inlinedata_dw;
|
||||
|
||||
struct blorp_address clear_addr = info->clear_color_addr;
|
||||
uint32_t *dw = blorp_emitn(batch, GENX(MI_ATOMIC), num_dwords,
|
||||
.DataSize = MI_ATOMIC_QWORD,
|
||||
.ATOMICOPCODE = MI_ATOMIC_OP_MOVE8B,
|
||||
.InlineData = true,
|
||||
.MemoryAddress = clear_addr);
|
||||
/* dw starts at dword 1, but we need to fill dwords 3 and 5 */
|
||||
dw[2] = info->clear_color.u32[0];
|
||||
dw[3] = 0;
|
||||
dw[4] = info->clear_color.u32[1];
|
||||
dw[5] = 0;
|
||||
|
||||
clear_addr.offset += 8;
|
||||
dw = blorp_emitn(batch, GENX(MI_ATOMIC), num_dwords,
|
||||
.DataSize = MI_ATOMIC_QWORD,
|
||||
.ATOMICOPCODE = MI_ATOMIC_OP_MOVE8B,
|
||||
.CSSTALL = true,
|
||||
.ReturnDataControl = true,
|
||||
.InlineData = true,
|
||||
.MemoryAddress = clear_addr);
|
||||
/* dw starts at dword 1, but we need to fill dwords 3 and 5 */
|
||||
dw[2] = info->clear_color.u32[2];
|
||||
dw[3] = 0;
|
||||
dw[4] = info->clear_color.u32[3];
|
||||
dw[5] = 0;
|
||||
|
||||
#elif GFX_VER >= 9
|
||||
|
||||
/* According to Wa_2201730850, in the Clear Color Programming Note under
|
||||
* the Red channel, "Software shall write the converted Depth Clear to this
|
||||
* dword." The only depth formats listed under the red channel are IEEE_FP
|
||||
* and UNORM24_X8. These two requirements are incompatible with the UNORM16
|
||||
* depth format, so just ignore that case and simply perform the conversion
|
||||
* for all depth formats.
|
||||
*/
|
||||
union isl_color_value fixed_color = info->clear_color;
|
||||
if (GFX_VER == 12 && isl_surf_usage_is_depth(info->surf.usage)) {
|
||||
isl_color_value_pack(&info->clear_color, info->surf.format,
|
||||
fixed_color.u32);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||
sdi.Address = info->clear_color_addr;
|
||||
sdi.Address.offset += i * 4;
|
||||
sdi.ImmediateData = fixed_color.u32[i];
|
||||
#if GFX_VER >= 12
|
||||
if (i == 3)
|
||||
sdi.ForceWriteCompletionCheck = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* The RENDER_SURFACE_STATE::ClearColor field states that software should
|
||||
* write the converted depth value 16B after the clear address:
|
||||
*
|
||||
* 3D Sampler will always fetch clear depth from the location 16-bytes
|
||||
* above this address, where the clear depth, converted to native
|
||||
* surface format by software, will be stored.
|
||||
*
|
||||
*/
|
||||
#if GFX_VER >= 12
|
||||
if (isl_surf_usage_is_depth(info->surf.usage)) {
|
||||
blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||
sdi.Address = info->clear_color_addr;
|
||||
sdi.Address.offset += 4 * 4;
|
||||
sdi.ImmediateData = fixed_color.u32[0];
|
||||
sdi.ForceWriteCompletionCheck = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#elif GFX_VER >= 7
|
||||
#if GFX_VER >= 7
|
||||
blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||
sdi.Address = info->clear_color_addr;
|
||||
sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
|
||||
|
|
@ -2098,9 +1908,6 @@ blorp_exec_3d(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
||||
#if GFX_VER >= 7
|
||||
prim.PredicateEnable = batch->flags & BLORP_BATCH_PREDICATE_ENABLE;
|
||||
#endif
|
||||
#if GFX_VERx10 >= 125
|
||||
prim.TBIMREnable = use_tbimr;
|
||||
#endif
|
||||
prim.VertexCountPerInstance = 3;
|
||||
prim.InstanceCount = params->num_layers;
|
||||
|
|
@ -2131,9 +1938,6 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
|
||||
uint32_t push_const_offset;
|
||||
uint32_t *push_const =
|
||||
GFX_VERx10 >= 125 ?
|
||||
blorp_alloc_general_state(batch, push_const_size, 64,
|
||||
&push_const_offset) :
|
||||
blorp_alloc_dynamic_state(batch, push_const_size, 64,
|
||||
&push_const_offset);
|
||||
memset(push_const, 0x0, push_const_size);
|
||||
|
|
@ -2147,8 +1951,6 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
src += cs_prog_data->push.cross_thread.size;
|
||||
}
|
||||
|
||||
assert(GFX_VERx10 < 125 || cs_prog_data->push.per_thread.size == 0);
|
||||
#if GFX_VERx10 < 125
|
||||
if (cs_prog_data->push.per_thread.size > 0) {
|
||||
for (unsigned t = 0; t < threads; t++) {
|
||||
memcpy(dst, src, (cs_prog_data->push.per_thread.dwords - 1) * 4);
|
||||
|
|
@ -2159,7 +1961,6 @@ blorp_get_compute_push_const(struct blorp_batch *batch,
|
|||
dst += cs_prog_data->push.per_thread.size;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*state_offset = push_const_offset;
|
||||
*state_size = push_const_size;
|
||||
|
|
@ -2194,57 +1995,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
|
||||
#endif /* GFX_VER >= 7 */
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
assert(cs_prog_data->push.per_thread.regs == 0);
|
||||
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
|
||||
cw.SIMDSize = dispatch.simd_size / 16;
|
||||
cw.LocalXMaximum = cs_prog_data->local_size[0] - 1;
|
||||
cw.LocalYMaximum = cs_prog_data->local_size[1] - 1;
|
||||
cw.LocalZMaximum = cs_prog_data->local_size[2] - 1;
|
||||
cw.ThreadGroupIDStartingX = group_x0;
|
||||
cw.ThreadGroupIDStartingY = group_y0;
|
||||
cw.ThreadGroupIDStartingZ = group_z0;
|
||||
cw.ThreadGroupIDXDimension = group_x1;
|
||||
cw.ThreadGroupIDYDimension = group_y1;
|
||||
cw.ThreadGroupIDZDimension = group_z1;
|
||||
cw.ExecutionMask = 0xffffffff;
|
||||
cw.PostSync.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false);
|
||||
|
||||
uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
|
||||
|
||||
uint32_t samplers_offset =
|
||||
params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
|
||||
|
||||
uint32_t push_const_offset;
|
||||
unsigned push_const_size;
|
||||
blorp_get_compute_push_const(batch, params, dispatch.threads,
|
||||
&push_const_offset, &push_const_size);
|
||||
cw.IndirectDataStartAddress = push_const_offset;
|
||||
cw.IndirectDataLength = push_const_size;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
cw.GenerateLocalID = cs_prog_data->generate_local_id != 0;
|
||||
cw.EmitLocal = cs_prog_data->generate_local_id;
|
||||
cw.WalkOrder = cs_prog_data->walk_order;
|
||||
cw.TileLayout = cs_prog_data->walk_order == ELK_WALK_ORDER_YXZ ?
|
||||
TileY32bpe : Linear;
|
||||
#endif
|
||||
|
||||
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
||||
.KernelStartPointer = params->cs_prog_kernel,
|
||||
.SamplerStatePointer = samplers_offset,
|
||||
.SamplerCount = params->src.enabled ? 1 : 0,
|
||||
.BindingTableEntryCount = params->src.enabled ? 2 : 1,
|
||||
.BindingTablePointer = surfaces_offset,
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, prog_data->total_shared),
|
||||
.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo),
|
||||
.NumberOfBarriers = cs_prog_data->uses_barrier,
|
||||
};
|
||||
}
|
||||
|
||||
#elif GFX_VER >= 7
|
||||
#if GFX_VER >= 7
|
||||
|
||||
/* The MEDIA_VFE_STATE documentation for Gfx8+ says:
|
||||
*
|
||||
|
|
@ -2266,13 +2017,9 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0;
|
||||
#if GFX_VER < 11
|
||||
vfe.ResetGatewayTimer =
|
||||
Resettingrelativetimerandlatchingtheglobaltimestamp;
|
||||
#endif
|
||||
#if GFX_VER < 9
|
||||
vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
|
||||
#endif
|
||||
#if GFX_VER == 7
|
||||
vfe.GPGPUMode = true;
|
||||
#endif
|
||||
|
|
@ -2307,8 +2054,8 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
.BindingTablePointer = surfaces_offset,
|
||||
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||
prog_data->total_shared),
|
||||
.SharedLocalMemorySize = elk_encode_slm_size(GFX_VER,
|
||||
prog_data->total_shared),
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
#if GFX_VER >= 8 || GFX_VERx10 == 75
|
||||
.CrossThreadConstantDataReadLength =
|
||||
|
|
@ -2354,337 +2101,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
blorp_measure_end(batch, params);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
* -- BLORP on blitter
|
||||
* -----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "isl/isl_genX_helpers.h"
|
||||
|
||||
#if GFX_VER >= 12
|
||||
static uint32_t
|
||||
xy_bcb_tiling(const struct isl_surf *surf)
|
||||
{
|
||||
switch (surf->tiling) {
|
||||
case ISL_TILING_LINEAR:
|
||||
return XY_TILE_LINEAR;
|
||||
#if GFX_VERx10 >= 125
|
||||
case ISL_TILING_X:
|
||||
return XY_TILE_X;
|
||||
case ISL_TILING_4:
|
||||
return XY_TILE_4;
|
||||
case ISL_TILING_64:
|
||||
case ISL_TILING_64_XE2:
|
||||
return XY_TILE_64;
|
||||
#else
|
||||
case ISL_TILING_Y0:
|
||||
return XY_TILE_Y;
|
||||
#endif
|
||||
default:
|
||||
unreachable("Invalid tiling for XY_BLOCK_COPY_BLT");
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
xy_color_depth(const struct isl_format_layout *fmtl)
|
||||
{
|
||||
switch (fmtl->bpb) {
|
||||
case 128: return XY_BPP_128_BIT;
|
||||
case 96: return XY_BPP_96_BIT;
|
||||
case 64: return XY_BPP_64_BIT;
|
||||
case 32: return XY_BPP_32_BIT;
|
||||
case 16: return XY_BPP_16_BIT;
|
||||
case 8: return XY_BPP_8_BIT;
|
||||
default:
|
||||
unreachable("Invalid bpp");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static uint32_t
|
||||
xy_bcb_surf_dim(const struct isl_surf *surf)
|
||||
{
|
||||
switch (surf->dim) {
|
||||
case ISL_SURF_DIM_1D:
|
||||
return XY_SURFTYPE_1D;
|
||||
case ISL_SURF_DIM_2D:
|
||||
return XY_SURFTYPE_2D;
|
||||
case ISL_SURF_DIM_3D:
|
||||
return XY_SURFTYPE_3D;
|
||||
default:
|
||||
unreachable("Invalid dimensionality for XY_BLOCK_COPY_BLT");
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
xy_bcb_surf_depth(const struct isl_surf *surf)
|
||||
{
|
||||
return surf->dim == ISL_SURF_DIM_3D ? surf->logical_level0_px.depth
|
||||
: surf->logical_level0_px.array_len;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
xy_aux_mode(const struct blorp_surface_info *info)
|
||||
{
|
||||
switch (info->aux_usage) {
|
||||
case ISL_AUX_USAGE_CCS_E:
|
||||
case ISL_AUX_USAGE_FCV_CCS_E:
|
||||
case ISL_AUX_USAGE_STC_CCS:
|
||||
return XY_CCS_E;
|
||||
case ISL_AUX_USAGE_NONE:
|
||||
return XY_NONE;
|
||||
default:
|
||||
unreachable("Unsupported aux mode");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
UNUSED static void
|
||||
blorp_xy_block_copy_blt(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
#if GFX_VER < 12
|
||||
unreachable("Blitter is only supported on Gfx12+");
|
||||
#else
|
||||
UNUSED const struct isl_device *isl_dev = batch->blorp->isl_dev;
|
||||
|
||||
assert(batch->flags & BLORP_BATCH_USE_BLITTER);
|
||||
assert(!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR));
|
||||
assert(!(batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
|
||||
assert(params->hiz_op == ISL_AUX_OP_NONE);
|
||||
|
||||
assert(params->num_layers == 1);
|
||||
assert(params->dst.view.levels == 1);
|
||||
assert(params->src.view.levels == 1);
|
||||
|
||||
#if GFX_VERx10 < 125
|
||||
assert(params->dst.view.base_array_layer == 0);
|
||||
assert(params->dst.z_offset == 0);
|
||||
#endif
|
||||
|
||||
unsigned dst_x0 = params->x0;
|
||||
unsigned dst_x1 = params->x1;
|
||||
unsigned src_x0 =
|
||||
dst_x0 - params->wm_inputs.coord_transform[0].offset;
|
||||
ASSERTED unsigned src_x1 =
|
||||
dst_x1 - params->wm_inputs.coord_transform[0].offset;
|
||||
unsigned dst_y0 = params->y0;
|
||||
unsigned dst_y1 = params->y1;
|
||||
unsigned src_y0 =
|
||||
dst_y0 - params->wm_inputs.coord_transform[1].offset;
|
||||
ASSERTED unsigned src_y1 =
|
||||
dst_y1 - params->wm_inputs.coord_transform[1].offset;
|
||||
|
||||
assert(src_x1 - src_x0 == dst_x1 - dst_x0);
|
||||
assert(src_y1 - src_y0 == dst_y1 - dst_y0);
|
||||
|
||||
const struct isl_surf *src_surf = ¶ms->src.surf;
|
||||
const struct isl_surf *dst_surf = ¶ms->dst.surf;
|
||||
|
||||
const struct isl_format_layout *fmtl =
|
||||
isl_format_get_layout(params->dst.view.format);
|
||||
|
||||
if (fmtl->bpb == 96) {
|
||||
assert(src_surf->tiling == ISL_TILING_LINEAR &&
|
||||
dst_surf->tiling == ISL_TILING_LINEAR);
|
||||
}
|
||||
|
||||
assert(src_surf->samples == 1);
|
||||
assert(dst_surf->samples == 1);
|
||||
|
||||
unsigned dst_pitch_unit = dst_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
|
||||
unsigned src_pitch_unit = src_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
struct isl_extent3d src_align = isl_get_image_alignment(src_surf);
|
||||
struct isl_extent3d dst_align = isl_get_image_alignment(dst_surf);
|
||||
#endif
|
||||
|
||||
blorp_emit(batch, GENX(XY_BLOCK_COPY_BLT), blt) {
|
||||
blt.ColorDepth = xy_color_depth(fmtl);
|
||||
|
||||
blt.DestinationPitch = (dst_surf->row_pitch_B / dst_pitch_unit) - 1;
|
||||
blt.DestinationMOCS = params->dst.addr.mocs;
|
||||
blt.DestinationTiling = xy_bcb_tiling(dst_surf);
|
||||
blt.DestinationX1 = dst_x0;
|
||||
blt.DestinationY1 = dst_y0;
|
||||
blt.DestinationX2 = dst_x1;
|
||||
blt.DestinationY2 = dst_y1;
|
||||
blt.DestinationBaseAddress = params->dst.addr;
|
||||
blt.DestinationXOffset = params->dst.tile_x_sa;
|
||||
blt.DestinationYOffset = params->dst.tile_y_sa;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
blt.DestinationSurfaceType = xy_bcb_surf_dim(dst_surf);
|
||||
blt.DestinationSurfaceWidth = dst_surf->logical_level0_px.w - 1;
|
||||
blt.DestinationSurfaceHeight = dst_surf->logical_level0_px.h - 1;
|
||||
blt.DestinationSurfaceDepth = xy_bcb_surf_depth(dst_surf) - 1;
|
||||
blt.DestinationArrayIndex =
|
||||
params->dst.view.base_array_layer + params->dst.z_offset;
|
||||
blt.DestinationSurfaceQPitch = isl_get_qpitch(dst_surf) >> 2;
|
||||
blt.DestinationLOD = params->dst.view.base_level;
|
||||
blt.DestinationMipTailStartLOD = dst_surf->miptail_start_level;
|
||||
blt.DestinationHorizontalAlign = isl_encode_halign(dst_align.width);
|
||||
blt.DestinationVerticalAlign = isl_encode_valign(dst_align.height);
|
||||
/* XY_BLOCK_COPY_BLT only supports AUX_CCS. */
|
||||
blt.DestinationDepthStencilResource =
|
||||
params->dst.aux_usage == ISL_AUX_USAGE_STC_CCS;
|
||||
blt.DestinationTargetMemory =
|
||||
params->dst.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
|
||||
|
||||
if (params->dst.aux_usage != ISL_AUX_USAGE_NONE) {
|
||||
blt.DestinationAuxiliarySurfaceMode = xy_aux_mode(¶ms->dst);
|
||||
blt.DestinationCompressionEnable = true;
|
||||
blt.DestinationCompressionFormat =
|
||||
isl_get_render_compression_format(dst_surf->format);
|
||||
blt.DestinationClearValueEnable = !!params->dst.clear_color_addr.buffer;
|
||||
blt.DestinationClearAddress = params->dst.clear_color_addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
blt.SourceX1 = src_x0;
|
||||
blt.SourceY1 = src_y0;
|
||||
blt.SourcePitch = (src_surf->row_pitch_B / src_pitch_unit) - 1;
|
||||
blt.SourceMOCS = params->src.addr.mocs;
|
||||
blt.SourceTiling = xy_bcb_tiling(src_surf);
|
||||
blt.SourceBaseAddress = params->src.addr;
|
||||
blt.SourceXOffset = params->src.tile_x_sa;
|
||||
blt.SourceYOffset = params->src.tile_y_sa;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
blt.SourceSurfaceType = xy_bcb_surf_dim(src_surf);
|
||||
blt.SourceSurfaceWidth = src_surf->logical_level0_px.w - 1;
|
||||
blt.SourceSurfaceHeight = src_surf->logical_level0_px.h - 1;
|
||||
blt.SourceSurfaceDepth = xy_bcb_surf_depth(src_surf) - 1;
|
||||
blt.SourceArrayIndex =
|
||||
params->src.view.base_array_layer + params->src.z_offset;
|
||||
blt.SourceSurfaceQPitch = isl_get_qpitch(src_surf) >> 2;
|
||||
blt.SourceLOD = params->src.view.base_level;
|
||||
blt.SourceMipTailStartLOD = src_surf->miptail_start_level;
|
||||
blt.SourceHorizontalAlign = isl_encode_halign(src_align.width);
|
||||
blt.SourceVerticalAlign = isl_encode_valign(src_align.height);
|
||||
/* XY_BLOCK_COPY_BLT only supports AUX_CCS. */
|
||||
blt.SourceDepthStencilResource =
|
||||
params->src.aux_usage == ISL_AUX_USAGE_STC_CCS;
|
||||
blt.SourceTargetMemory =
|
||||
params->src.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
|
||||
|
||||
if (params->src.aux_usage != ISL_AUX_USAGE_NONE) {
|
||||
blt.SourceAuxiliarySurfaceMode = xy_aux_mode(¶ms->src);
|
||||
blt.SourceCompressionEnable = true;
|
||||
blt.SourceCompressionFormat =
|
||||
isl_get_render_compression_format(src_surf->format);
|
||||
blt.SourceClearValueEnable = !!params->src.clear_color_addr.buffer;
|
||||
blt.SourceClearAddress = params->src.clear_color_addr;
|
||||
}
|
||||
|
||||
/* XeHP needs special MOCS values for the blitter */
|
||||
blt.DestinationMOCS = isl_dev->mocs.blitter_dst;
|
||||
blt.SourceMOCS = isl_dev->mocs.blitter_src;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
UNUSED static void
|
||||
blorp_xy_fast_color_blit(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
#if GFX_VER < 12
|
||||
unreachable("Blitter is only supported on Gfx12+");
|
||||
#else
|
||||
UNUSED const struct isl_device *isl_dev = batch->blorp->isl_dev;
|
||||
const struct isl_surf *dst_surf = ¶ms->dst.surf;
|
||||
const struct isl_format_layout *fmtl =
|
||||
isl_format_get_layout(params->dst.view.format);
|
||||
|
||||
assert(batch->flags & BLORP_BATCH_USE_BLITTER);
|
||||
assert(!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR));
|
||||
assert(!(batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
|
||||
assert(params->hiz_op == ISL_AUX_OP_NONE);
|
||||
|
||||
assert(params->num_layers == 1);
|
||||
assert(params->dst.view.levels == 1);
|
||||
assert(dst_surf->samples == 1);
|
||||
assert(fmtl->bpb != 96 || dst_surf->tiling == ISL_TILING_LINEAR);
|
||||
|
||||
#if GFX_VERx10 < 125
|
||||
assert(params->dst.view.base_array_layer == 0);
|
||||
assert(params->dst.z_offset == 0);
|
||||
#endif
|
||||
|
||||
unsigned dst_pitch_unit = dst_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
struct isl_extent3d dst_align = isl_get_image_alignment(dst_surf);
|
||||
#endif
|
||||
|
||||
blorp_emit(batch, GENX(XY_FAST_COLOR_BLT), blt) {
|
||||
blt.ColorDepth = xy_color_depth(fmtl);
|
||||
|
||||
blt.DestinationPitch = (dst_surf->row_pitch_B / dst_pitch_unit) - 1;
|
||||
blt.DestinationTiling = xy_bcb_tiling(dst_surf);
|
||||
blt.DestinationX1 = params->x0;
|
||||
blt.DestinationY1 = params->y0;
|
||||
blt.DestinationX2 = params->x1;
|
||||
blt.DestinationY2 = params->y1;
|
||||
blt.DestinationBaseAddress = params->dst.addr;
|
||||
blt.DestinationXOffset = params->dst.tile_x_sa;
|
||||
blt.DestinationYOffset = params->dst.tile_y_sa;
|
||||
|
||||
isl_color_value_pack((union isl_color_value *)
|
||||
params->wm_inputs.clear_color,
|
||||
params->dst.view.format, blt.FillColor);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
blt.DestinationSurfaceType = xy_bcb_surf_dim(dst_surf);
|
||||
blt.DestinationSurfaceWidth = dst_surf->logical_level0_px.w - 1;
|
||||
blt.DestinationSurfaceHeight = dst_surf->logical_level0_px.h - 1;
|
||||
blt.DestinationSurfaceDepth = xy_bcb_surf_depth(dst_surf) - 1;
|
||||
blt.DestinationArrayIndex =
|
||||
params->dst.view.base_array_layer + params->dst.z_offset;
|
||||
blt.DestinationSurfaceQPitch = isl_get_qpitch(dst_surf) >> 2;
|
||||
blt.DestinationLOD = params->dst.view.base_level;
|
||||
blt.DestinationMipTailStartLOD = dst_surf->miptail_start_level;
|
||||
blt.DestinationHorizontalAlign = isl_encode_halign(dst_align.width);
|
||||
blt.DestinationVerticalAlign = isl_encode_valign(dst_align.height);
|
||||
/* XY_FAST_COLOR_BLT only supports AUX_CCS. */
|
||||
blt.DestinationDepthStencilResource =
|
||||
params->dst.aux_usage == ISL_AUX_USAGE_STC_CCS;
|
||||
blt.DestinationTargetMemory =
|
||||
params->dst.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
|
||||
|
||||
if (params->dst.aux_usage != ISL_AUX_USAGE_NONE) {
|
||||
blt.DestinationAuxiliarySurfaceMode = xy_aux_mode(¶ms->dst);
|
||||
blt.DestinationCompressionEnable = true;
|
||||
blt.DestinationCompressionFormat =
|
||||
isl_get_render_compression_format(dst_surf->format);
|
||||
blt.DestinationClearValueEnable = !!params->dst.clear_color_addr.buffer;
|
||||
blt.DestinationClearAddress = params->dst.clear_color_addr;
|
||||
}
|
||||
|
||||
/* XeHP needs special MOCS values for the blitter */
|
||||
blt.DestinationMOCS = isl_dev->mocs.blitter_dst;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
blorp_exec_blitter(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
blorp_measure_start(batch, params);
|
||||
|
||||
if (params->src.enabled)
|
||||
blorp_xy_block_copy_blt(batch, params);
|
||||
else
|
||||
blorp_xy_fast_color_blit(batch, params);
|
||||
|
||||
blorp_measure_end(batch, params);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Execute a blit or render pass operation.
|
||||
*
|
||||
|
|
@ -2697,9 +2113,10 @@ blorp_exec_blitter(struct blorp_batch *batch,
|
|||
static void
|
||||
blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
|
||||
{
|
||||
if (batch->flags & BLORP_BATCH_USE_BLITTER) {
|
||||
blorp_exec_blitter(batch, params);
|
||||
} else if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
|
||||
/* Not supported in Gfx versions that use Elk. */
|
||||
assert((batch->flags & BLORP_BATCH_USE_BLITTER) == 0);
|
||||
|
||||
if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
|
||||
blorp_exec_compute(batch, params);
|
||||
} else {
|
||||
blorp_exec_3d(batch, params);
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@
|
|||
#error This file should only be included by genX files.
|
||||
#endif
|
||||
|
||||
#if GFX_VER > 8
|
||||
#error "ELK doesn't support Gfx > 8."
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "dev/intel_device_info.h"
|
||||
|
|
@ -52,7 +56,6 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
|||
bool enable_16 = prog_data->dispatch_16;
|
||||
bool enable_32 = prog_data->dispatch_32;
|
||||
|
||||
#if GFX_VER >= 9
|
||||
/* SKL PRMs, Volume 2a: Command Reference: Instructions:
|
||||
* 3DSTATE_PS_BODY::8 Pixel Dispatch Enable:
|
||||
*
|
||||
|
|
@ -60,11 +63,7 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
|||
* Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit must be
|
||||
* DISABLED."
|
||||
*/
|
||||
if (ps->RenderTargetFastClearEnable ||
|
||||
ps->RenderTargetResolveType == RESOLVE_PARTIAL ||
|
||||
ps->RenderTargetResolveType == RESOLVE_FULL)
|
||||
enable_8 = false;
|
||||
#elif GFX_VER >= 8
|
||||
#if GFX_VER >= 8
|
||||
/* BDW has the same wording as SKL, except some of the fields mentioned
|
||||
* don't exist...
|
||||
*/
|
||||
|
|
@ -77,103 +76,25 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
|||
elk_wm_prog_data_is_persample(prog_data, msaa_flags);
|
||||
|
||||
if (is_persample_dispatch) {
|
||||
/* TGL PRMs, Volume 2d: Command Reference: Structures:
|
||||
* 3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
|
||||
*
|
||||
* "Must not be enabled when dispatch rate is sample AND NUM_MULTISAMPLES > 1."
|
||||
*/
|
||||
if (GFX_VER >= 12 && rasterization_samples > 1)
|
||||
enable_32 = false;
|
||||
|
||||
/* Starting with SandyBridge (where we first get MSAA), the different
|
||||
* pixel dispatch combinations are grouped into classifications A
|
||||
* through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware
|
||||
* generations, the only configurations supporting persample dispatch
|
||||
* are those in which only one dispatch width is enabled.
|
||||
*
|
||||
* The Gfx12 hardware spec has a similar dispatch grouping table, but
|
||||
* the following conflicting restriction applies (from the page on
|
||||
* "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
|
||||
*
|
||||
* "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also
|
||||
* enabled."
|
||||
*/
|
||||
if (enable_32 || enable_16)
|
||||
enable_8 = false;
|
||||
if (GFX_VER < 12 && enable_32)
|
||||
if (enable_32)
|
||||
enable_16 = false;
|
||||
}
|
||||
|
||||
/* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
|
||||
*
|
||||
* "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
|
||||
* SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
|
||||
* mode."
|
||||
*
|
||||
* 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
|
||||
*/
|
||||
if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) {
|
||||
assert(enable_8 || enable_16);
|
||||
enable_32 = false;
|
||||
}
|
||||
assert(enable_8 || enable_16 || enable_32);
|
||||
assert(!prog_data->dispatch_multi);
|
||||
|
||||
assert(enable_8 || enable_16 || enable_32 ||
|
||||
(GFX_VER >= 12 && prog_data->dispatch_multi));
|
||||
assert(!prog_data->dispatch_multi ||
|
||||
(GFX_VER >= 12 && !enable_8));
|
||||
|
||||
#if GFX_VER >= 20
|
||||
if (prog_data->dispatch_multi) {
|
||||
ps->Kernel0Enable = true;
|
||||
ps->Kernel0SIMDWidth = (prog_data->dispatch_multi == 32 ?
|
||||
PS_SIMD32 : PS_SIMD16);
|
||||
ps->Kernel0MaximumPolysperThread =
|
||||
prog_data->max_polygons - 1;
|
||||
switch (prog_data->dispatch_multi /
|
||||
prog_data->max_polygons) {
|
||||
case 8:
|
||||
ps->Kernel0PolyPackingPolicy = POLY_PACK8_FIXED;
|
||||
break;
|
||||
case 16:
|
||||
ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED;
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid polygon width");
|
||||
}
|
||||
|
||||
} else if (enable_16) {
|
||||
ps->Kernel0Enable = true;
|
||||
ps->Kernel0SIMDWidth = PS_SIMD16;
|
||||
ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED;
|
||||
}
|
||||
|
||||
if (enable_32) {
|
||||
ps->Kernel1Enable = true;
|
||||
ps->Kernel1SIMDWidth = PS_SIMD32;
|
||||
|
||||
} else if (enable_16 && prog_data->dispatch_multi == 16) {
|
||||
ps->Kernel1Enable = true;
|
||||
ps->Kernel1SIMDWidth = PS_SIMD16;
|
||||
}
|
||||
#else
|
||||
ps->_8PixelDispatchEnable = enable_8 ||
|
||||
(GFX_VER == 12 && prog_data->dispatch_multi);
|
||||
ps->_16PixelDispatchEnable = enable_16;
|
||||
ps->_32PixelDispatchEnable = enable_32;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
||||
UNUSED static int
|
||||
preferred_slm_allocation_size(const struct intel_device_info *devinfo)
|
||||
{
|
||||
if (devinfo->platform == INTEL_PLATFORM_LNL && devinfo->revision == 0)
|
||||
return SLM_ENCODES_128K;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue