hasvk/state: Rip out SKL+

v2: Fix incorrectly removed l3cr.SLMEnable setting (Lionel)

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19852>
This commit is contained in:
Jason Ekstrand 2022-09-02 22:41:01 -05:00 committed by Marge Bot
parent ce57cc4397
commit 6d80ce1283
3 changed files with 1 additions and 541 deletions

View file

@ -3146,28 +3146,6 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
goto fail_workaround_bo;
if (device->info->ver >= 12 &&
device->vk.enabled_extensions.KHR_fragment_shading_rate) {
uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
if (device->info->has_coarse_pixel_primitive_and_cb)
n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
n_cps_states += 1; /* Disable CPS */
/* Each of the combinaison must be replicated on all viewports */
n_cps_states *= MAX_VIEWPORTS;
device->cps_states =
anv_state_pool_alloc(&device->dynamic_state_pool,
n_cps_states * CPS_STATE_length(device->info) * 4,
32);
if (device->cps_states.map == NULL)
goto fail_trivial_batch;
anv_genX(device->info, init_cps_device_state)(device);
}
/* Allocate a null surface state at surface state offset 0. This makes
* NULL descriptor handling trivial because we can just memset structures
* to zero and they have a valid descriptor.
@ -3223,7 +3201,6 @@ VkResult anv_CreateDevice(
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
fail_trivial_batch_bo_and_scratch_pool:
anv_scratch_pool_finish(device, &device->scratch_pool);
fail_trivial_batch:
anv_device_release_bo(device, device->trivial_batch_bo);
fail_workaround_bo:
anv_device_release_bo(device, device->workaround_bo);

View file

@ -56,8 +56,6 @@ void genX(init_physical_device_state)(struct anv_physical_device *device);
VkResult genX(init_device_state)(struct anv_device *device);
void genX(init_cps_device_state)(struct anv_device *device);
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
@ -128,10 +126,6 @@ void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
void genX(emit_sample_pattern)(struct anv_batch *batch,
const struct vk_sample_locations_state *sl);
void genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline,
const struct vk_fragment_shading_rate_state *fsr);
void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size);

View file

@ -38,198 +38,6 @@
#include "vk_standard_sample_locations.h"
#include "vk_util.h"
static void
genX(emit_slice_hashing_state)(struct anv_device *device,
struct anv_batch *batch)
{
#if GFX_VER == 11
/* Gfx11 hardware has two pixel pipes at most. */
for (unsigned i = 2; i < ARRAY_SIZE(device->info->ppipe_subslices); i++)
assert(device->info->ppipe_subslices[i] == 0);
if (device->info->ppipe_subslices[0] == device->info->ppipe_subslices[1])
return;
if (!device->slice_hash.alloc_size) {
unsigned size = GENX(SLICE_HASH_TABLE_length) * 4;
device->slice_hash =
anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
const bool flip = device->info->ppipe_subslices[0] <
device->info->ppipe_subslices[1];
struct GENX(SLICE_HASH_TABLE) table;
intel_compute_pixel_hash_table_3way(16, 16, 3, 3, flip, table.Entry[0]);
GENX(SLICE_HASH_TABLE_pack)(NULL, device->slice_hash.map, &table);
}
anv_batch_emit(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
ptr.SliceHashStatePointerValid = true;
ptr.SliceHashTableStatePointer = device->slice_hash.offset;
}
anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) {
mode.SliceHashingTableEnable = true;
}
#elif GFX_VERx10 == 120
/* For each n calculate ppipes_of[n], equal to the number of pixel pipes
* present with n active dual subslices.
*/
unsigned ppipes_of[3] = {};
for (unsigned n = 0; n < ARRAY_SIZE(ppipes_of); n++) {
for (unsigned p = 0; p < 3; p++)
ppipes_of[n] += (device->info->ppipe_subslices[p] == n);
}
/* Gfx12 has three pixel pipes. */
for (unsigned p = 3; p < ARRAY_SIZE(device->info->ppipe_subslices); p++)
assert(device->info->ppipe_subslices[p] == 0);
if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
/* All three pixel pipes have the maximum number of active dual
* subslices, or there is only one active pixel pipe: Nothing to do.
*/
return;
}
anv_batch_emit(batch, GENX(3DSTATE_SUBSLICE_HASH_TABLE), p) {
p.SliceHashControl[0] = TABLE_0;
if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
intel_compute_pixel_hash_table_3way(8, 16, 2, 2, 0, p.TwoWayTableEntry[0]);
else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
intel_compute_pixel_hash_table_3way(8, 16, 3, 3, 0, p.TwoWayTableEntry[0]);
if (ppipes_of[2] == 2 && ppipes_of[1] == 1)
intel_compute_pixel_hash_table_3way(8, 16, 5, 4, 0, p.ThreeWayTableEntry[0]);
else if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
intel_compute_pixel_hash_table_3way(8, 16, 2, 2, 0, p.ThreeWayTableEntry[0]);
else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
intel_compute_pixel_hash_table_3way(8, 16, 3, 3, 0, p.ThreeWayTableEntry[0]);
else
unreachable("Illegal fusing.");
}
anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), p) {
p.SubsliceHashingTableEnable = true;
p.SubsliceHashingTableEnableMask = true;
}
#elif GFX_VERx10 == 125
uint32_t ppipe_mask = 0;
for (unsigned p = 0; p < ARRAY_SIZE(device->info->ppipe_subslices); p++) {
if (device->info->ppipe_subslices[p])
ppipe_mask |= (1u << p);
}
assert(ppipe_mask);
if (!device->slice_hash.alloc_size) {
unsigned size = GENX(SLICE_HASH_TABLE_length) * 4;
device->slice_hash =
anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
struct GENX(SLICE_HASH_TABLE) table;
/* Note that the hardware expects an array with 7 tables, each
* table is intended to specify the pixel pipe hashing behavior
* for every possible slice count between 2 and 8, however that
* doesn't actually work, among other reasons due to hardware
* bugs that will cause the GPU to erroneously access the table
* at the wrong index in some cases, so in practice all 7 tables
* need to be initialized to the same value.
*/
for (unsigned i = 0; i < 7; i++)
intel_compute_pixel_hash_table_nway(16, 16, ppipe_mask, table.Entry[i][0]);
GENX(SLICE_HASH_TABLE_pack)(NULL, device->slice_hash.map, &table);
}
anv_batch_emit(batch, GENX(3DSTATE_SLICE_TABLE_STATE_POINTERS), ptr) {
ptr.SliceHashStatePointerValid = true;
ptr.SliceHashTableStatePointer = device->slice_hash.offset;
}
anv_batch_emit(batch, GENX(3DSTATE_3D_MODE), mode) {
mode.SliceHashingTableEnable = true;
mode.SliceHashingTableEnableMask = true;
mode.CrossSliceHashingMode = (util_bitcount(ppipe_mask) > 1 ?
hashing32x32 : NormalMode);
mode.CrossSliceHashingModeMask = -1;
}
#endif
}
static void
init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
{
UNUSED struct anv_device *device = queue->device;
#if GFX_VER >= 11
/* Starting with GFX version 11, SLM is no longer part of the L3$ config
* so it never changes throughout the lifetime of the VkDevice.
*/
const struct intel_l3_config *cfg = intel_get_default_l3_config(device->info);
genX(emit_l3_config)(batch, device, cfg);
device->l3_config = cfg;
#endif
#if GFX_VERx10 >= 125
/* GEN:BUG:1607854226:
*
* Non-pipelined state has issues with not applying in MEDIA/GPGPU mode.
* Fortunately, we always start the context off in 3D mode.
*/
uint32_t mocs = device->isl_dev.mocs.internal;
anv_batch_emit(batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.GeneralStateBufferSize = 0xfffff;
sba.GeneralStateMOCS = mocs;
sba.GeneralStateBaseAddressModifyEnable = true;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.StatelessDataPortAccessMOCS = mocs;
sba.SurfaceStateBaseAddress =
(struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS };
sba.SurfaceStateMOCS = mocs;
sba.SurfaceStateBaseAddressModifyEnable = true;
sba.DynamicStateBaseAddress =
(struct anv_address) { .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS };
sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
sba.DynamicStateMOCS = mocs;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 };
sba.IndirectObjectBufferSize = 0xfffff;
sba.IndirectObjectMOCS = mocs;
sba.IndirectObjectBaseAddressModifyEnable = true;
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.InstructionBaseAddress =
(struct anv_address) { .offset = INSTRUCTION_STATE_POOL_MIN_ADDRESS };
sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
sba.InstructionMOCS = mocs;
sba.InstructionBaseAddressModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
sba.BindlessSurfaceStateBaseAddress =
(struct anv_address) { .offset = SURFACE_STATE_POOL_MIN_ADDRESS };
sba.BindlessSurfaceStateSize = (1 << 20) - 1;
sba.BindlessSurfaceStateMOCS = mocs;
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.BindlessSamplerStateMOCS = mocs;
sba.BindlessSamplerStateBaseAddressModifyEnable = true;
sba.BindlessSamplerStateBufferSize = 0;
sba.L1CacheControl = L1CC_WB;
}
#endif
}
static VkResult
init_render_queue_state(struct anv_queue *queue)
{
@ -242,24 +50,9 @@ init_render_queue_state(struct anv_queue *queue)
};
anv_batch_emit(&batch, GENX(PIPELINE_SELECT), ps) {
#if GFX_VER >= 9
ps.MaskBits = GFX_VER >= 12 ? 0x13 : 3;
ps.MediaSamplerDOPClockGateEnable = GFX_VER >= 12;
#endif
ps.PipelineSelection = _3D;
}
#if GFX_VER == 9
anv_batch_write_reg(&batch, GENX(CACHE_MODE_1), cm1) {
cm1.FloatBlendOptimizationEnable = true;
cm1.FloatBlendOptimizationEnableMask = true;
cm1.MSCRAWHazardAvoidanceBit = true;
cm1.MSCRAWHazardAvoidanceBitMask = true;
cm1.PartialResolveDisableInVC = true;
cm1.PartialResolveDisableInVCMask = true;
}
#endif
anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa);
anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
@ -287,113 +80,13 @@ init_render_queue_state(struct anv_queue *queue)
anv_batch_emit(&batch, GENX(3DSTATE_WM_HZ_OP), hzp);
#endif
#if GFX_VER == 11
/* The default behavior of bit 5 "Headerless Message for Pre-emptable
* Contexts" in SAMPLER MODE register is set to 0, which means
* headerless sampler messages are not allowed for pre-emptable
* contexts. Set the bit 5 to 1 to allow them.
*/
anv_batch_write_reg(&batch, GENX(SAMPLER_MODE), sm) {
sm.HeaderlessMessageforPreemptableContexts = true;
sm.HeaderlessMessageforPreemptableContextsMask = true;
}
/* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
* HALF_SLICE_CHICKEN7 register.
*/
anv_batch_write_reg(&batch, GENX(HALF_SLICE_CHICKEN7), hsc7) {
hsc7.EnabledTexelOffsetPrecisionFix = true;
hsc7.EnabledTexelOffsetPrecisionFixMask = true;
}
anv_batch_write_reg(&batch, GENX(TCCNTLREG), tcc) {
tcc.L3DataPartialWriteMergingEnable = true;
tcc.ColorZPartialWriteMergingEnable = true;
tcc.URBPartialWriteMergingEnable = true;
tcc.TCDisable = true;
}
#endif
genX(emit_slice_hashing_state)(device, &batch);
#if GFX_VER >= 11
/* hardware specification recommends disabling repacking for
* the compatibility with decompression mechanism in display controller.
*/
if (device->info->disable_ccs_repack) {
anv_batch_write_reg(&batch, GENX(CACHE_MODE_0), cm0) {
cm0.DisableRepackingforCompression = true;
cm0.DisableRepackingforCompressionMask = true;
}
}
/* an unknown issue is causing vs push constants to become
* corrupted during object-level preemption. For now, restrict
* to command buffer level preemption to avoid rendering
* corruption.
*/
anv_batch_write_reg(&batch, GENX(CS_CHICKEN1), cc1) {
cc1.ReplayMode = MidcmdbufferPreemption;
cc1.ReplayModeMask = true;
#if GFX_VERx10 == 120
cc1.DisablePreemptionandHighPriorityPausingdueto3DPRIMITIVECommand = true;
cc1.DisablePreemptionandHighPriorityPausingdueto3DPRIMITIVECommandMask = true;
#endif
}
#if GFX_VERx10 == 120
/* Wa_1806527549 says to disable the following HiZ optimization when the
* depth buffer is D16_UNORM. We've found the WA to help with more depth
* buffer configurations however, so we always disable it just to be safe.
*/
anv_batch_write_reg(&batch, GENX(HIZ_CHICKEN), reg) {
reg.HZDepthTestLEGEOptimizationDisable = true;
reg.HZDepthTestLEGEOptimizationDisableMask = true;
}
#endif
#if GFX_VERx10 < 125
#define AA_LINE_QUALITY_REG GENX(3D_CHICKEN3)
#else
#define AA_LINE_QUALITY_REG GENX(CHICKEN_RASTER_1)
#endif
/* Enable the new line drawing algorithm that produces higher quality
* lines.
*/
anv_batch_write_reg(&batch, AA_LINE_QUALITY_REG, c3) {
c3.AALineQualityFix = true;
c3.AALineQualityFixMask = true;
}
#endif
#if GFX_VER == 12
if (device->info->has_aux_map) {
uint64_t aux_base_addr = intel_aux_map_get_base(device->aux_map_ctx);
assert(aux_base_addr % (32 * 1024) == 0);
anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num);
lri.DataDWord = aux_base_addr & 0xffffffff;
}
anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num) + 4;
lri.DataDWord = aux_base_addr >> 32;
}
}
#endif
/* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
* 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
*
* This is only safe on kernels with context isolation support.
*/
if (GFX_VER >= 8 && device->physical->info.has_context_isolation) {
#if GFX_VER >= 9
anv_batch_write_reg(&batch, GENX(CS_DEBUG_MODE2), csdm2) {
csdm2.CONSTANT_BUFFERAddressOffsetDisable = true;
csdm2.CONSTANT_BUFFERAddressOffsetDisableMask = true;
}
#elif GFX_VER == 8
#if GFX_VER == 8
anv_batch_write_reg(&batch, GENX(INSTPM), instpm) {
instpm.CONSTANT_BUFFERAddressOffsetDisable = true;
instpm.CONSTANT_BUFFERAddressOffsetDisableMask = true;
@ -401,8 +94,6 @@ init_render_queue_state(struct anv_queue *queue)
#endif
}
init_common_queue_state(queue, &batch);
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
assert(batch.next <= batch.end);
@ -439,111 +130,6 @@ genX(init_device_state)(struct anv_device *device)
return res;
}
#if GFX_VERx10 >= 125
#define maybe_for_each_shading_rate_op(name) \
for (VkFragmentShadingRateCombinerOpKHR name = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; \
name <= VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR; \
name++)
#elif GFX_VER >= 12
#define maybe_for_each_shading_rate_op(name)
#endif
/* Rather than reemitting the CPS_STATE structure everything those changes and
* for as many viewports as needed, we can just prepare all possible cases and
* just pick the right offset from the prepacked states when needed.
*/
void
genX(init_cps_device_state)(struct anv_device *device)
{
#if GFX_VER >= 12
void *cps_state_ptr = device->cps_states.map;
/* Disabled CPS mode */
for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) {
struct GENX(CPS_STATE) cps_state = {
.CoarsePixelShadingMode = CPS_MODE_CONSTANT,
.MinCPSizeX = 1,
.MinCPSizeY = 1,
#if GFX_VERx10 >= 125
.Combiner0OpcodeforCPsize = PASSTHROUGH,
.Combiner1OpcodeforCPsize = PASSTHROUGH,
#endif /* GFX_VERx10 >= 125 */
};
GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state);
cps_state_ptr += GENX(CPS_STATE_length) * 4;
}
maybe_for_each_shading_rate_op(op0) {
maybe_for_each_shading_rate_op(op1) {
for (uint32_t x = 1; x <= 4; x *= 2) {
for (uint32_t y = 1; y <= 4; y *= 2) {
struct GENX(CPS_STATE) cps_state = {
.CoarsePixelShadingMode = CPS_MODE_CONSTANT,
.MinCPSizeX = x,
.MinCPSizeY = y,
};
#if GFX_VERx10 >= 125
static const uint32_t combiner_ops[] = {
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = PASSTHROUGH,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = OVERRIDE,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = HIGH_QUALITY,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = LOW_QUALITY,
[VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = RELATIVE,
};
cps_state.Combiner0OpcodeforCPsize = combiner_ops[op0];
cps_state.Combiner1OpcodeforCPsize = combiner_ops[op1];
#endif /* GFX_VERx10 >= 125 */
for (uint32_t __v = 0; __v < MAX_VIEWPORTS; __v++) {
GENX(CPS_STATE_pack)(NULL, cps_state_ptr, &cps_state);
cps_state_ptr += GENX(CPS_STATE_length) * 4;
}
}
}
}
}
#endif /* GFX_VER >= 12 */
}
#if GFX_VER >= 12
static uint32_t
get_cps_state_offset(struct anv_device *device, bool cps_enabled,
const struct vk_fragment_shading_rate_state *fsr)
{
if (!cps_enabled)
return device->cps_states.offset;
uint32_t offset;
static const uint32_t size_index[] = {
[1] = 0,
[2] = 1,
[4] = 2,
};
#if GFX_VERx10 >= 125
offset =
1 + /* skip disabled */
fsr->combiner_ops[0] * 5 * 3 * 3 +
fsr->combiner_ops[1] * 3 * 3 +
size_index[fsr->fragment_size.width] * 3 +
size_index[fsr->fragment_size.height];
#else
offset =
1 + /* skip disabled */
size_index[fsr->fragment_size.width] * 3 +
size_index[fsr->fragment_size.height];
#endif
offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
return device->cps_states.offset + offset;
}
#endif /* GFX_VER >= 12 */
void
genX(emit_l3_config)(struct anv_batch *batch,
const struct anv_device *device,
@ -553,33 +139,14 @@ genX(emit_l3_config)(struct anv_batch *batch,
#if GFX_VER >= 8
#if GFX_VER >= 12
#define L3_ALLOCATION_REG GENX(L3ALLOC)
#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num)
#else
#define L3_ALLOCATION_REG GENX(L3CNTLREG)
#define L3_ALLOCATION_REG_num GENX(L3CNTLREG_num)
#endif
anv_batch_write_reg(batch, L3_ALLOCATION_REG, l3cr) {
if (cfg == NULL) {
#if GFX_VER >= 12
l3cr.L3FullWayAllocationEnable = true;
#else
unreachable("Invalid L3$ config");
#endif
} else {
#if GFX_VER < 11
l3cr.SLMEnable = cfg->n[INTEL_L3P_SLM];
#endif
#if GFX_VER == 11
/* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be
* set in L3CNTLREG register. The default setting of the bit is not
* the desirable behavior.
*/
l3cr.ErrorDetectionBehaviorControl = true;
l3cr.UseFullWays = true;
#endif /* GFX_VER == 11 */
assert(cfg->n[INTEL_L3P_IS] == 0);
assert(cfg->n[INTEL_L3P_C] == 0);
assert(cfg->n[INTEL_L3P_T] == 0);
@ -786,52 +353,6 @@ genX(emit_sample_pattern)(struct anv_batch *batch,
}
#endif
#if GFX_VER >= 11
void
genX(emit_shading_rate)(struct anv_batch *batch,
const struct anv_graphics_pipeline *pipeline,
const struct vk_fragment_shading_rate_state *fsr)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
const bool cps_enable = wm_prog_data && wm_prog_data->per_coarse_pixel_dispatch;
#if GFX_VER == 11
anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) {
cps.CoarsePixelShadingMode = cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE;
if (cps_enable) {
cps.MinCPSizeX = fsr->fragment_size.width;
cps.MinCPSizeY = fsr->fragment_size.height;
}
}
#elif GFX_VER >= 12
/* TODO: we can optimize this flush in the following cases:
*
* In the case where the last geometry shader emits a value that is not
* constant, we can avoid this stall because we can synchronize the
* pixel shader internally with
* 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
*
* If we know that the previous pipeline and the current one are using
* the same fragment shading rate.
*/
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
#if GFX_VERx10 >= 125
pc.PSSStallSyncEnable = true;
#else
pc.PSDSyncEnable = true;
#endif
}
anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) {
struct anv_device *device = pipeline->base.device;
cps.CoarsePixelShadingStateArrayPointer =
get_cps_state_offset(device, cps_enable, fsr);
}
#endif
}
#endif /* GFX_VER >= 11 */
static uint32_t
vk_to_intel_tex_filter(VkFilter filter, bool anisotropyEnable)
{
@ -886,14 +407,6 @@ static const uint32_t vk_to_intel_shadow_compare_op[] = {
[VK_COMPARE_OP_ALWAYS] = PREFILTEROP_NEVER,
};
#if GFX_VER >= 9
static const uint32_t vk_to_intel_sampler_reduction_mode[] = {
[VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE] = STD_FILTER,
[VK_SAMPLER_REDUCTION_MODE_MIN] = MINIMUM,
[VK_SAMPLER_REDUCTION_MODE_MAX] = MAXIMUM,
};
#endif
VkResult genX(CreateSampler)(
VkDevice _device,
const VkSamplerCreateInfo* pCreateInfo,
@ -926,11 +439,6 @@ VkResult genX(CreateSampler)(
border_color_offset = sampler->custom_border_color.offset;
}
#if GFX_VER >= 9
unsigned sampler_reduction_mode = STD_FILTER;
bool enable_sampler_reduction = false;
#endif
vk_foreach_struct_const(ext, pCreateInfo->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO: {
@ -951,16 +459,6 @@ VkResult genX(CreateSampler)(
sampler->conversion = conversion;
break;
}
#if GFX_VER >= 9
case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO: {
VkSamplerReductionModeCreateInfo *sampler_reduction =
(VkSamplerReductionModeCreateInfo *) ext;
sampler_reduction_mode =
vk_to_intel_sampler_reduction_mode[sampler_reduction->reductionMode];
enable_sampler_reduction = true;
break;
}
#endif
case VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT: {
VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
(VkSamplerCustomBorderColorCreateInfoEXT *) ext;
@ -1041,10 +539,6 @@ VkResult genX(CreateSampler)(
.SamplerDisable = false,
.TextureBorderColorMode = DX10OGL,
#if GFX_VER >= 11
.CPSLODCompensationEnable = true,
#endif
#if GFX_VER >= 8
.LODPreClampMode = CLAMP_MODE_OGL,
#else
@ -1088,11 +582,6 @@ VkResult genX(CreateSampler)(
.TCXAddressControlMode = vk_to_intel_tex_address[pCreateInfo->addressModeU],
.TCYAddressControlMode = vk_to_intel_tex_address[pCreateInfo->addressModeV],
.TCZAddressControlMode = vk_to_intel_tex_address[pCreateInfo->addressModeW],
#if GFX_VER >= 9
.ReductionType = sampler_reduction_mode,
.ReductionTypeEnable = enable_sampler_reduction,
#endif
};
GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state);