mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
anv: implement Wa_16014912113
When URB state for DS changes, we need to emit URB setup for VS with 256 handles and 0 for rest, commit this using a HDC flush before setting real values. Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26920>
This commit is contained in:
parent
263f693760
commit
1693d0b857
9 changed files with 101 additions and 21 deletions
|
|
@ -37,6 +37,7 @@
|
|||
#endif
|
||||
|
||||
struct intel_sample_positions;
|
||||
struct intel_urb_config;
|
||||
|
||||
extern const uint32_t genX(vk_to_intel_cullmode)[];
|
||||
|
||||
|
|
@ -88,6 +89,9 @@ void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
|
|||
unsigned width, unsigned height,
|
||||
unsigned scale);
|
||||
|
||||
void genX(urb_workaround)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct intel_urb_config *urb_cfg);
|
||||
|
||||
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(emit_pipeline_select)(struct anv_batch *batch, uint32_t pipeline,
|
||||
|
|
@ -172,7 +176,8 @@ void
|
|||
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||
const struct intel_l3_config *l3_config,
|
||||
VkShaderStageFlags active_stages,
|
||||
const unsigned entry_size[4],
|
||||
const struct intel_urb_config *urb_cfg_in,
|
||||
struct intel_urb_config *urb_cfg_out,
|
||||
enum intel_urb_deref_block_size *deref_block_size);
|
||||
|
||||
void genX(emit_sample_pattern)(struct anv_batch *batch,
|
||||
|
|
|
|||
|
|
@ -3333,6 +3333,8 @@ struct anv_simple_shader {
|
|||
struct anv_shader_bin *kernel;
|
||||
/* L3 config used by the shader */
|
||||
const struct intel_l3_config *l3_config;
|
||||
/* Current URB config */
|
||||
const struct intel_urb_config *urb_cfg;
|
||||
|
||||
/* Managed by the simpler shader helper*/
|
||||
struct anv_state bt_state;
|
||||
|
|
@ -3443,6 +3445,8 @@ struct anv_cmd_graphics_state {
|
|||
*/
|
||||
bool viewport_set;
|
||||
|
||||
struct intel_urb_config urb_cfg;
|
||||
|
||||
uint32_t n_occlusion_queries;
|
||||
|
||||
struct anv_gfx_dynamic_state dyn_state;
|
||||
|
|
@ -4275,6 +4279,9 @@ struct anv_graphics_pipeline {
|
|||
*/
|
||||
uint32_t batch_data[416];
|
||||
|
||||
/* Urb setup utilized by this pipeline. */
|
||||
struct intel_urb_config urb_cfg;
|
||||
|
||||
/* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
|
||||
struct {
|
||||
struct anv_gfx_state_ptr urb;
|
||||
|
|
|
|||
|
|
@ -2956,6 +2956,7 @@ genX(CmdExecuteCommands)(
|
|||
container->state.current_hash_scale = 0;
|
||||
container->state.gfx.push_constant_stages = 0;
|
||||
container->state.gfx.ds_write_state = false;
|
||||
memset(&container->state.gfx.urb_cfg, 0, sizeof(struct intel_urb_config));
|
||||
memcpy(container->state.gfx.dyn_state.dirty,
|
||||
device->gfx_dirty_state,
|
||||
sizeof(container->state.gfx.dyn_state.dirty));
|
||||
|
|
@ -5513,6 +5514,30 @@ genX(batch_emit_fast_color_dummy_blit)(struct anv_batch *batch,
|
|||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
genX(urb_workaround)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct intel_urb_config *urb_cfg)
|
||||
{
|
||||
#if INTEL_NEEDS_WA_16014912113
|
||||
const struct intel_urb_config *current =
|
||||
&cmd_buffer->state.gfx.urb_cfg;
|
||||
if (intel_urb_setup_changed(urb_cfg, current, MESA_SHADER_TESS_EVAL) &&
|
||||
current->size[0] != 0) {
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
urb.VSURBStartingAddress = current->start[i];
|
||||
urb.VSURBEntryAllocationSize = current->size[i] - 1;
|
||||
urb.VSNumberofURBEntries = i == 0 ? 256 : 0;
|
||||
}
|
||||
}
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.HDCPipelineFlushEnable = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
struct anv_state
|
||||
genX(cmd_buffer_begin_companion_rcs_syncpoint)(
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
|
|
|
|||
|
|
@ -151,6 +151,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
|
|||
.kernel = device->internal_kernels[
|
||||
ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
|
||||
.l3_config = device->internal_kernels_l3_config,
|
||||
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
|
||||
};
|
||||
|
||||
genX(emit_simple_shader_init)(state);
|
||||
|
|
@ -478,6 +479,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
|||
.kernel = device->internal_kernels[
|
||||
ANV_INTERNAL_KERNEL_GENERATED_DRAWS],
|
||||
.l3_config = device->internal_kernels_l3_config,
|
||||
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
|
||||
};
|
||||
genX(emit_simple_shader_init)(&simple_state);
|
||||
|
||||
|
|
|
|||
|
|
@ -1413,9 +1413,15 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
|
||||
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB))
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
|
||||
genX(urb_workaround)(cmd_buffer, &pipeline->urb_cfg);
|
||||
|
||||
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
|
||||
|
||||
memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
|
||||
sizeof(struct intel_urb_config));
|
||||
}
|
||||
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE))
|
||||
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ms);
|
||||
|
||||
|
|
|
|||
|
|
@ -53,6 +53,8 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
|
|||
|
||||
static void
|
||||
emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
||||
const struct intel_urb_config *urb_cfg_in,
|
||||
struct intel_urb_config *urb_cfg_out,
|
||||
const struct intel_l3_config *l3_config)
|
||||
{
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
|
|
@ -102,9 +104,11 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
* store the data that VF is going to pass to SOL.
|
||||
*/
|
||||
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
|
||||
memcpy(urb_cfg_out->size, &entry_size, sizeof(entry_size));
|
||||
|
||||
genX(emit_urb_setup)(device, batch, l3_config,
|
||||
VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
|
||||
VK_SHADER_STAGE_VERTEX_BIT, urb_cfg_in, urb_cfg_out,
|
||||
NULL);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
/* Disable Primitive Replication. */
|
||||
|
|
@ -258,7 +262,10 @@ genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
|
|||
genX(emit_l3_config)(batch, device, cfg);
|
||||
genX(emit_pipeline_select)(batch, _3D, device);
|
||||
|
||||
emit_common_so_memcpy(batch, device, cfg);
|
||||
struct intel_urb_config urb_cfg_in = { 0 };
|
||||
struct intel_urb_config urb_cfg = { 0 };
|
||||
|
||||
emit_common_so_memcpy(batch, device, &urb_cfg_in, &urb_cfg, cfg);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -325,7 +332,11 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
genX(flush_pipeline_select_3d)(cmd_buffer);
|
||||
|
||||
struct intel_urb_config urb_cfg;
|
||||
|
||||
emit_common_so_memcpy(&cmd_buffer->batch, cmd_buffer->device,
|
||||
&cmd_buffer->state.gfx.urb_cfg,
|
||||
&urb_cfg,
|
||||
cmd_buffer->state.current_l3_config);
|
||||
emit_so_memcpy(&cmd_buffer->batch, cmd_buffer->device, dst, src, size);
|
||||
|
||||
|
|
@ -334,6 +345,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
|||
1ull << 32);
|
||||
#endif
|
||||
|
||||
/* Update urb config after memcpy. */
|
||||
memcpy(&cmd_buffer->state.gfx.urb_cfg, &urb_cfg,
|
||||
sizeof(struct intel_urb_config));
|
||||
|
||||
/* Flag all the instructions emitted by the memcpy. */
|
||||
struct anv_gfx_dynamic_state *hw_state =
|
||||
&cmd_buffer->state.gfx.dyn_state;
|
||||
|
|
|
|||
|
|
@ -371,29 +371,42 @@ void
|
|||
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||
const struct intel_l3_config *l3_config,
|
||||
VkShaderStageFlags active_stages,
|
||||
const unsigned entry_size[4],
|
||||
const struct intel_urb_config *urb_cfg_in,
|
||||
struct intel_urb_config *urb_cfg_out,
|
||||
enum intel_urb_deref_block_size *deref_block_size)
|
||||
{
|
||||
const struct intel_device_info *devinfo = device->info;
|
||||
|
||||
struct intel_urb_config urb_cfg = {
|
||||
.size = { entry_size[0], entry_size[1], entry_size[2], entry_size[3], },
|
||||
};
|
||||
|
||||
bool constrained;
|
||||
intel_get_urb_config(devinfo, l3_config,
|
||||
active_stages &
|
||||
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
|
||||
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
|
||||
&urb_cfg, deref_block_size,
|
||||
urb_cfg_out, deref_block_size,
|
||||
&constrained);
|
||||
|
||||
#if INTEL_NEEDS_WA_16014912113
|
||||
if (intel_urb_setup_changed(urb_cfg_in, urb_cfg_out,
|
||||
MESA_SHADER_TESS_EVAL) && urb_cfg_in->size[0] != 0) {
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
urb.VSURBStartingAddress = urb_cfg_in->start[i];
|
||||
urb.VSURBEntryAllocationSize = urb_cfg_in->size[i] - 1;
|
||||
urb.VSNumberofURBEntries = i == 0 ? 256 : 0;
|
||||
}
|
||||
}
|
||||
genx_batch_emit_pipe_control(batch, device->info, _3D,
|
||||
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
urb.VSURBStartingAddress = urb_cfg.start[i];
|
||||
urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
|
||||
urb.VSNumberofURBEntries = urb_cfg.entries[i];
|
||||
urb.VSURBStartingAddress = urb_cfg_out->start[i];
|
||||
urb.VSURBEntryAllocationSize = urb_cfg_out->size[i] - 1;
|
||||
urb.VSNumberofURBEntries = urb_cfg_out->entries[i];
|
||||
}
|
||||
}
|
||||
#if GFX_VERx10 >= 125
|
||||
|
|
@ -460,13 +473,12 @@ emit_urb_setup(struct anv_graphics_pipeline *pipeline,
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
struct intel_urb_config urb_cfg;
|
||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
const struct brw_vue_prog_data *prog_data =
|
||||
!anv_pipeline_has_stage(pipeline, i) ? NULL :
|
||||
(const struct brw_vue_prog_data *) pipeline->base.shaders[i]->prog_data;
|
||||
|
||||
urb_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1;
|
||||
pipeline->urb_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1;
|
||||
}
|
||||
|
||||
struct anv_device *device = pipeline->base.base.device;
|
||||
|
|
@ -480,17 +492,18 @@ emit_urb_setup(struct anv_graphics_pipeline *pipeline,
|
|||
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
|
||||
pipeline->base.base.active_stages &
|
||||
VK_SHADER_STAGE_GEOMETRY_BIT,
|
||||
&urb_cfg, deref_block_size,
|
||||
&pipeline->urb_cfg, deref_block_size,
|
||||
&constrained);
|
||||
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
urb.VSURBStartingAddress = urb_cfg.start[i];
|
||||
urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
|
||||
urb.VSNumberofURBEntries = urb_cfg.entries[i];
|
||||
urb.VSURBStartingAddress = pipeline->urb_cfg.start[i];
|
||||
urb.VSURBEntryAllocationSize = pipeline->urb_cfg.size[i] - 1;
|
||||
urb.VSNumberofURBEntries = pipeline->urb_cfg.entries[i];
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (device->vk.enabled_extensions.EXT_mesh_shader) {
|
||||
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), zero);
|
||||
|
|
|
|||
|
|
@ -1744,6 +1744,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
|
|||
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE :
|
||||
ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT],
|
||||
.l3_config = device->internal_kernels_l3_config,
|
||||
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
|
||||
};
|
||||
genX(emit_simple_shader_init)(&state);
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,9 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
|
|||
* allocate space for the VS. Even though one isn't run, we need VUEs to
|
||||
* store the data that VF is going to pass to SOL.
|
||||
*/
|
||||
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
|
||||
struct intel_urb_config urb_cfg_out = {
|
||||
.size = { DIV_ROUND_UP(32, 64), 1, 1, 1 },
|
||||
};
|
||||
|
||||
genX(emit_l3_config)(batch, device, state->l3_config);
|
||||
|
||||
|
|
@ -112,7 +114,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
|
|||
enum intel_urb_deref_block_size deref_block_size;
|
||||
genX(emit_urb_setup)(device, batch, state->l3_config,
|
||||
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
entry_size, &deref_block_size);
|
||||
state->urb_cfg, &urb_cfg_out, &deref_block_size);
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
|
||||
ps_blend.HasWriteableRT = true;
|
||||
|
|
@ -344,6 +346,10 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
|
|||
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL);
|
||||
}
|
||||
|
||||
/* Update urb config after simple shader. */
|
||||
memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg_out,
|
||||
sizeof(struct intel_urb_config));
|
||||
|
||||
state->cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0);
|
||||
state->cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |
|
||||
ANV_CMD_DIRTY_XFB_ENABLE);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue