anv: implement Wa_16014912113

When URB state for DS changes, we need to emit URB setup for VS with
256 handles and 0 for rest, commit this using a HDC flush before
setting real values.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26920>
This commit is contained in:
Tapani Pälli 2024-01-10 14:20:36 +02:00 committed by Marge Bot
parent 263f693760
commit 1693d0b857
9 changed files with 101 additions and 21 deletions

View file

@ -37,6 +37,7 @@
#endif
struct intel_sample_positions;
struct intel_urb_config;
extern const uint32_t genX(vk_to_intel_cullmode)[];
@ -88,6 +89,9 @@ void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
unsigned width, unsigned height,
unsigned scale);
void genX(urb_workaround)(struct anv_cmd_buffer *cmd_buffer,
const struct intel_urb_config *urb_cfg);
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
void genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline,
@ -172,7 +176,8 @@ void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const unsigned entry_size[4],
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out,
enum intel_urb_deref_block_size *deref_block_size);
void genX(emit_sample_pattern)(struct anv_batch *batch,

View file

@ -3333,6 +3333,8 @@ struct anv_simple_shader {
struct anv_shader_bin *kernel;
/* L3 config used by the shader */
const struct intel_l3_config *l3_config;
/* Current URB config */
const struct intel_urb_config *urb_cfg;
/* Managed by the simpler shader helper*/
struct anv_state bt_state;
@ -3443,6 +3445,8 @@ struct anv_cmd_graphics_state {
*/
bool viewport_set;
struct intel_urb_config urb_cfg;
uint32_t n_occlusion_queries;
struct anv_gfx_dynamic_state dyn_state;
@ -4275,6 +4279,9 @@ struct anv_graphics_pipeline {
*/
uint32_t batch_data[416];
/* Urb setup utilized by this pipeline. */
struct intel_urb_config urb_cfg;
/* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
struct {
struct anv_gfx_state_ptr urb;

View file

@ -2956,6 +2956,7 @@ genX(CmdExecuteCommands)(
container->state.current_hash_scale = 0;
container->state.gfx.push_constant_stages = 0;
container->state.gfx.ds_write_state = false;
memset(&container->state.gfx.urb_cfg, 0, sizeof(struct intel_urb_config));
memcpy(container->state.gfx.dyn_state.dirty,
device->gfx_dirty_state,
sizeof(container->state.gfx.dyn_state.dirty));
@ -5513,6 +5514,30 @@ genX(batch_emit_fast_color_dummy_blit)(struct anv_batch *batch,
#endif
}
void
genX(urb_workaround)(struct anv_cmd_buffer *cmd_buffer,
const struct intel_urb_config *urb_cfg)
{
#if INTEL_NEEDS_WA_16014912113
const struct intel_urb_config *current =
&cmd_buffer->state.gfx.urb_cfg;
if (intel_urb_setup_changed(urb_cfg, current, MESA_SHADER_TESS_EVAL) &&
current->size[0] != 0) {
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = current->start[i];
urb.VSURBEntryAllocationSize = current->size[i] - 1;
urb.VSNumberofURBEntries = i == 0 ? 256 : 0;
}
}
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.HDCPipelineFlushEnable = true;
}
}
#endif
}
struct anv_state
genX(cmd_buffer_begin_companion_rcs_syncpoint)(
struct anv_cmd_buffer *cmd_buffer)

View file

@ -151,6 +151,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
.kernel = device->internal_kernels[
ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
.l3_config = device->internal_kernels_l3_config,
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
};
genX(emit_simple_shader_init)(state);
@ -478,6 +479,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
.kernel = device->internal_kernels[
ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
.l3_config = device->internal_kernels_l3_config,
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
};
genX(emit_simple_shader_init)(&simple_state);

View file

@ -1413,9 +1413,15 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->vk.dynamic_graphics_state;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB))
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
genX(urb_workaround)(cmd_buffer, &pipeline->urb_cfg);
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
sizeof(struct intel_urb_config));
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ms);

View file

@ -53,6 +53,8 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
static void
emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out,
const struct intel_l3_config *l3_config)
{
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
@ -102,9 +104,11 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
* store the data that VF is going to pass to SOL.
*/
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
memcpy(urb_cfg_out->size, &entry_size, sizeof(entry_size));
genX(emit_urb_setup)(device, batch, l3_config,
VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
VK_SHADER_STAGE_VERTEX_BIT, urb_cfg_in, urb_cfg_out,
NULL);
#if GFX_VER >= 12
/* Disable Primitive Replication. */
@ -258,7 +262,10 @@ genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
genX(emit_l3_config)(batch, device, cfg);
genX(emit_pipeline_select)(batch, _3D, device);
emit_common_so_memcpy(batch, device, cfg);
struct intel_urb_config urb_cfg_in = { 0 };
struct intel_urb_config urb_cfg = { 0 };
emit_common_so_memcpy(batch, device, &urb_cfg_in, &urb_cfg, cfg);
}
void
@ -325,7 +332,11 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
genX(flush_pipeline_select_3d)(cmd_buffer);
struct intel_urb_config urb_cfg;
emit_common_so_memcpy(&cmd_buffer->batch, cmd_buffer->device,
&cmd_buffer->state.gfx.urb_cfg,
&urb_cfg,
cmd_buffer->state.current_l3_config);
emit_so_memcpy(&cmd_buffer->batch, cmd_buffer->device, dst, src, size);
@ -334,6 +345,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
1ull << 32);
#endif
/* Update urb config after memcpy. */
memcpy(&cmd_buffer->state.gfx.urb_cfg, &urb_cfg,
sizeof(struct intel_urb_config));
/* Flag all the instructions emitted by the memcpy. */
struct anv_gfx_dynamic_state *hw_state =
&cmd_buffer->state.gfx.dyn_state;

View file

@ -371,29 +371,42 @@ void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const unsigned entry_size[4],
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out,
enum intel_urb_deref_block_size *deref_block_size)
{
const struct intel_device_info *devinfo = device->info;
struct intel_urb_config urb_cfg = {
.size = { entry_size[0], entry_size[1], entry_size[2], entry_size[3], },
};
bool constrained;
intel_get_urb_config(devinfo, l3_config,
active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
&urb_cfg, deref_block_size,
urb_cfg_out, deref_block_size,
&constrained);
#if INTEL_NEEDS_WA_16014912113
if (intel_urb_setup_changed(urb_cfg_in, urb_cfg_out,
MESA_SHADER_TESS_EVAL) && urb_cfg_in->size[0] != 0) {
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg_in->start[i];
urb.VSURBEntryAllocationSize = urb_cfg_in->size[i] - 1;
urb.VSNumberofURBEntries = i == 0 ? 256 : 0;
}
}
genx_batch_emit_pipe_control(batch, device->info, _3D,
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
}
#endif
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg.start[i];
urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
urb.VSNumberofURBEntries = urb_cfg.entries[i];
urb.VSURBStartingAddress = urb_cfg_out->start[i];
urb.VSURBEntryAllocationSize = urb_cfg_out->size[i] - 1;
urb.VSNumberofURBEntries = urb_cfg_out->entries[i];
}
}
#if GFX_VERx10 >= 125
@ -460,13 +473,12 @@ emit_urb_setup(struct anv_graphics_pipeline *pipeline,
return;
}
#endif
struct intel_urb_config urb_cfg;
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
const struct brw_vue_prog_data *prog_data =
!anv_pipeline_has_stage(pipeline, i) ? NULL :
(const struct brw_vue_prog_data *) pipeline->base.shaders[i]->prog_data;
urb_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1;
pipeline->urb_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1;
}
struct anv_device *device = pipeline->base.base.device;
@ -480,17 +492,18 @@ emit_urb_setup(struct anv_graphics_pipeline *pipeline,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
pipeline->base.base.active_stages &
VK_SHADER_STAGE_GEOMETRY_BIT,
&urb_cfg, deref_block_size,
&pipeline->urb_cfg, deref_block_size,
&constrained);
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg.start[i];
urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
urb.VSNumberofURBEntries = urb_cfg.entries[i];
urb.VSURBStartingAddress = pipeline->urb_cfg.start[i];
urb.VSURBEntryAllocationSize = pipeline->urb_cfg.size[i] - 1;
urb.VSNumberofURBEntries = pipeline->urb_cfg.entries[i];
}
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_extensions.EXT_mesh_shader) {
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), zero);

View file

@ -1744,6 +1744,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE :
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT],
.l3_config = device->internal_kernels_l3_config,
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
};
genX(emit_simple_shader_init)(&state);

View file

@ -103,7 +103,9 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
* allocate space for the VS. Even though one isn't run, we need VUEs to
* store the data that VF is going to pass to SOL.
*/
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
struct intel_urb_config urb_cfg_out = {
.size = { DIV_ROUND_UP(32, 64), 1, 1, 1 },
};
genX(emit_l3_config)(batch, device, state->l3_config);
@ -112,7 +114,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
enum intel_urb_deref_block_size deref_block_size;
genX(emit_urb_setup)(device, batch, state->l3_config,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
entry_size, &deref_block_size);
state->urb_cfg, &urb_cfg_out, &deref_block_size);
anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
ps_blend.HasWriteableRT = true;
@ -344,6 +346,10 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL);
}
/* Update urb config after simple shader. */
memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg_out,
sizeof(struct intel_urb_config));
state->cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0);
state->cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
ANV_CMD_DIRTY_XFB_ENABLE);