From f28af4e7f27d01e60a2ea3e608e63c0e677322e8 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 27 Mar 2025 18:13:02 +0200 Subject: [PATCH] anv: move URB programming to dynamic emission path With the pipeline object going away, we have nowhere to store this. Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_cmd_buffer.c | 1 - src/intel/vulkan/anv_genX.h | 8 +- src/intel/vulkan/anv_private.h | 27 ++- src/intel/vulkan/anv_util.c | 1 + src/intel/vulkan/anv_utrace.c | 1 - src/intel/vulkan/genX_blorp_exec.c | 8 +- .../vulkan/genX_cmd_draw_generated_indirect.h | 4 - src/intel/vulkan/genX_gfx_state.c | 131 +++++++++++-- src/intel/vulkan/genX_gpu_memcpy.c | 13 +- src/intel/vulkan/genX_pipeline.c | 184 ------------------ src/intel/vulkan/genX_query.c | 3 - src/intel/vulkan/genX_simple_shader.c | 25 ++- 12 files changed, 174 insertions(+), 232 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 132f8b805f5..96c36fac6e4 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -554,7 +554,6 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer, * We avoid comparing protected packets as all the fields but the scratch * surface are identical. we just need to select the right one at emission. */ - diff_fix_state(URB, final.urb); diff_fix_state(VF_SGVS, final.vf_sgvs); if (cmd_buffer->device->info->ver >= 11) diff_fix_state(VF_SGVS_2, final.vf_sgvs_2); diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index c9b33ba9f76..1c39f3e7fc2 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -232,11 +232,9 @@ void genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer, uint32_t total_scratch); void -genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, - const struct intel_l3_config *l3_config, - VkShaderStageFlags active_stages, - const struct intel_urb_config *urb_cfg_in, - struct intel_urb_config *urb_cfg_out); +genX(emit_urb_setup)(struct anv_batch *batch, + const struct anv_device *device, + const struct intel_urb_config *urb_cfg); void genX(emit_sample_pattern)(struct anv_batch *batch, const struct vk_sample_locations_state *sl); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 675cae12a1e..23158da0d1c 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1588,6 +1588,27 @@ enum anv_coarse_pixel_state { * be reemitted outside of the VkPipeline object are tracked here. */ struct anv_gfx_dynamic_state { + /* 3DSTATE_URB_* */ + struct intel_urb_config urb_cfg; + + /* 3DSTATE_URB_ALLOC_TASK */ + struct { + uint32_t TASKURBEntryAllocationSize; + uint32_t TASKNumberofURBEntriesSlice0; + uint32_t TASKNumberofURBEntriesSliceN; + uint32_t TASKURBStartingAddressSlice0; + uint32_t TASKURBStartingAddressSliceN; + } urb_task; + + /* 3DSTATE_URB_ALLOC_TASK */ + struct { + uint32_t MESHURBEntryAllocationSize; + uint32_t MESHNumberofURBEntriesSlice0; + uint32_t MESHNumberofURBEntriesSliceN; + uint32_t MESHURBStartingAddressSlice0; + uint32_t MESHURBStartingAddressSliceN; + } urb_mesh; + /* 3DSTATE_BLEND_STATE_POINTERS */ struct { bool AlphaToCoverageEnable; @@ -1765,6 +1786,7 @@ struct anv_gfx_dynamic_state { /* 3DSTATE_SF */ struct { + uint32_t DerefBlockSize; float LineWidth; uint32_t TriangleStripListProvokingVertexSelect; uint32_t LineStripListProvokingVertexSelect; @@ -3985,10 +4007,6 @@ struct anv_simple_shader { struct anv_batch *batch; /* Shader to use */ struct anv_shader_bin *kernel; - /* L3 config used by the shader */ - const struct intel_l3_config *l3_config; - /* Current URB config */ - const struct intel_urb_config *urb_cfg; /* Managed by the simpler shader helper*/ struct anv_state bt_state; @@ -5074,7 +5092,6 @@ struct anv_graphics_pipeline { /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */ struct { - struct anv_gfx_state_ptr urb; struct anv_gfx_state_ptr vf_sgvs; struct anv_gfx_state_ptr vf_sgvs_2; struct anv_gfx_state_ptr vf_sgvs_instancing; diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index f40725a1c2f..4483a3a6b79 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -186,6 +186,7 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state) NAME(WM_DEPTH_STENCIL); NAME(PMA_FIX); NAME(WA_18019816803); + NAME(WA_14018283232); NAME(TBIMR_TILE_PASS_INFO); NAME(FS_MSAA_FLAGS); NAME(TCS_INPUT_VERTICES); diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index 30e38901546..96ed8748ad5 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -241,7 +241,6 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, .general_state_stream = &submit->general_state_stream, .batch = batch, .kernel = copy_kernel, - .l3_config = device->internal_kernels_l3_config, }; anv_genX(device->info, emit_simple_shader_init)(&submit->simple_state); diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index dbaaf8ecb11..d10100fe956 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -266,15 +266,17 @@ static void blorp_pre_emit_urb_config(struct blorp_batch *blorp_batch, struct intel_urb_config *urb_cfg) { +#if INTEL_NEEDS_WA_16014912113 struct anv_cmd_buffer *cmd_buffer = blorp_batch->driver_batch; - if (genX(need_wa_16014912113)(&cmd_buffer->state.gfx.urb_cfg, urb_cfg)) { + if (genX(need_wa_16014912113)( + &cmd_buffer->state.gfx.urb_cfg, urb_cfg)) { genX(batch_emit_wa_16014912113)(&cmd_buffer->batch, &cmd_buffer->state.gfx.urb_cfg); } /* Update urb config. */ - memcpy(&cmd_buffer->state.gfx.urb_cfg, urb_cfg, - sizeof(struct intel_urb_config)); + memcpy(&cmd_buffer->state.gfx.urb_cfg, urb_cfg, sizeof(*urb_cfg)); +#endif } static const struct intel_l3_config * diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index 4bd7ac19992..1a2d76818da 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -200,8 +200,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b .general_state_stream = &cmd_buffer->general_state_stream, .batch = &cmd_buffer->generation.batch, .kernel = gen_kernel, - .l3_config = device->internal_kernels_l3_config, - .urb_cfg = &cmd_buffer->state.gfx.urb_cfg, }; genX(emit_simple_shader_init)(state); @@ -540,8 +538,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd .general_state_stream = &cmd_buffer->general_state_stream, .batch = &cmd_buffer->batch, .kernel = gen_kernel, - .l3_config = device->internal_kernels_l3_config, - .urb_cfg = &cmd_buffer->state.gfx.urb_cfg, }; genX(emit_simple_shader_init)(&simple_state); diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 002baede0ba..02f2a87bdfa 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -770,6 +770,52 @@ calculate_tile_dimensions(const struct anv_device *device, UNREACHABLE("Invalid provoking vertex mode"); \ } \ +ALWAYS_INLINE static void +update_urb_config(struct anv_gfx_dynamic_state *hw_state, + const struct anv_graphics_pipeline *pipeline, + const struct anv_device *device) +{ + struct intel_urb_config new_cfg = { 0 }; + +#if GFX_VERx10 >= 125 + if (anv_pipeline_is_mesh(pipeline)) { + const struct brw_task_prog_data *task_prog_data = + get_task_prog_data(pipeline); + const struct brw_mesh_prog_data *mesh_prog_data = + get_mesh_prog_data(pipeline); + intel_get_mesh_urb_config(device->info, device->l3_config, + task_prog_data ? task_prog_data->map.size_dw : 0, + mesh_prog_data->map.size / 4, &new_cfg); + } else +#endif + { + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + const struct brw_vue_prog_data *prog_data = + !anv_pipeline_has_stage(pipeline, i) ? NULL : + (const struct brw_vue_prog_data *) pipeline->base.shaders[i]->prog_data; + + new_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1; + } + + UNUSED bool constrained; + intel_get_urb_config(device->info, device->l3_config, + pipeline->base.base.active_stages & + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + pipeline->base.base.active_stages & VK_SHADER_STAGE_GEOMETRY_BIT, + &new_cfg, &constrained); + } + +#if GFX_VER >= 12 + SET(SF, sf.DerefBlockSize, new_cfg.deref_block_size); +#endif + + for (int s = 0; s <= MESA_SHADER_MESH; s++) { + SET(URB, urb_cfg.size[s], new_cfg.size[s]); + SET(URB, urb_cfg.start[s], new_cfg.start[s]); + SET(URB, urb_cfg.entries[s], new_cfg.entries[s]); + } +} + ALWAYS_INLINE static void update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state, const struct vk_dynamic_graphics_state *dyn, @@ -1922,6 +1968,9 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) update_fs_msaa_flags(hw_state, dyn, pipeline); + if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS) + update_urb_config(hw_state, pipeline, device); + if ((gfx->dirty & ANV_CMD_DIRTY_PS) || BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) { update_ps(hw_state, device, dyn, pipeline); @@ -2245,6 +2294,55 @@ genX(batch_emit_wa_14018283232)(struct anv_batch *batch) } #endif +void +genX(emit_urb_setup)(struct anv_batch *batch, + const struct anv_device *device, + const struct intel_urb_config *urb_cfg) +{ + for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { +#if GFX_VER >= 12 + anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) { + urb._3DCommandSubOpcode += i; + if (urb_cfg->size[i] > 0) + urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1; + urb.VSURBStartingAddressSlice0 = urb_cfg->start[i]; + urb.VSURBStartingAddressSliceN = urb_cfg->start[i]; + urb.VSNumberofURBEntriesSlice0 = urb_cfg->entries[i]; + urb.VSNumberofURBEntriesSliceN = urb_cfg->entries[i]; + } +#else + anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { + urb._3DCommandSubOpcode += i; + if (urb_cfg->size[i] > 0) + urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1; + urb.VSURBStartingAddress = urb_cfg->start[i]; + urb.VSNumberofURBEntries = urb_cfg->entries[i]; + } +#endif + } + +#if GFX_VERx10 >= 125 + if (device->vk.enabled_extensions.EXT_mesh_shader) { + anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) { + if (urb_cfg->size[MESA_SHADER_TASK] > 0) + urb.TASKURBEntryAllocationSize = urb_cfg->size[MESA_SHADER_TASK] - 1; + urb.TASKNumberofURBEntriesSlice0 = urb_cfg->entries[MESA_SHADER_TASK]; + urb.TASKNumberofURBEntriesSliceN = urb_cfg->entries[MESA_SHADER_TASK]; + urb.TASKURBStartingAddressSlice0 = urb_cfg->start[MESA_SHADER_TASK]; + urb.TASKURBStartingAddressSliceN = urb_cfg->start[MESA_SHADER_TASK]; + } + anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) { + if (urb_cfg->size[MESA_SHADER_MESH] > 0) + urb.MESHURBEntryAllocationSize = urb_cfg->size[MESA_SHADER_MESH] - 1; + urb.MESHNumberofURBEntriesSlice0 = urb_cfg->entries[MESA_SHADER_MESH]; + urb.MESHNumberofURBEntriesSliceN = urb_cfg->entries[MESA_SHADER_MESH]; + urb.MESHURBStartingAddressSlice0 = urb_cfg->start[MESA_SHADER_MESH]; + urb.MESHURBStartingAddressSliceN = urb_cfg->start[MESA_SHADER_MESH]; + } + } +#endif +} + /** * This function handles dirty state emission to the batch buffer. */ @@ -2324,15 +2422,24 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) gfx->base.push_constants_data_dirty = true; } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) { - if (genX(need_wa_16014912113)(&gfx->urb_cfg, &pipeline->urb_cfg)) { - genX(batch_emit_wa_16014912113)(&cmd_buffer->batch, - &gfx->urb_cfg); - } - anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb); +#define INIT(category, name) \ + .name = hw_state->category.name +#define SET(s, category, name) \ + s.name = hw_state->category.name - memcpy(&gfx->urb_cfg, &pipeline->urb_cfg, - sizeof(struct intel_urb_config)); + if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) { +#if INTEL_NEEDS_WA_16014912113 + if (genX(need_wa_16014912113)( + &cmd_buffer->state.gfx.urb_cfg, &hw_state->urb_cfg)) { + genX(batch_emit_wa_16014912113)(&cmd_buffer->batch, + &cmd_buffer->state.gfx.urb_cfg); + } + /* Update urb config. */ + memcpy(&cmd_buffer->state.gfx.urb_cfg, &hw_state->urb_cfg, + sizeof(hw_state->urb_cfg)); +#endif + + genX(emit_urb_setup)(&cmd_buffer->batch, device, &hw_state->urb_cfg); } if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION)) @@ -2453,11 +2560,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH)); } -#define INIT(category, name) \ - .name = hw_state->category.name -#define SET(s, category, name) \ - s.name = hw_state->category.name - /* Now the potentially dynamic instructions */ if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) { @@ -2704,6 +2806,9 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) { anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF), pipeline, partial.sf, sf) { +#if GFX_VER >= 12 + SET(sf, sf, DerefBlockSize); +#endif SET(sf, sf, LineWidth); SET(sf, sf, TriangleStripListProvokingVertexSelect); SET(sf, sf, LineStripListProvokingVertexSelect); diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index de6243ec03a..b665c511d4f 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -125,11 +125,16 @@ emit_common_so_memcpy(struct anv_memcpy_state *state, * allocate space for the VS. Even though one isn't run, we need VUEs to * store the data that VF is going to pass to SOL. */ - const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 }; - memcpy(state->urb_cfg.size, &entry_size, sizeof(entry_size)); + state->urb_cfg = (struct intel_urb_config) { + .size = { DIV_ROUND_UP(32, 64), 1, 1, 1 }, + }; + UNUSED bool constrained; + intel_get_urb_config(device->info, l3_config, false, false, + &state->urb_cfg, &constrained); - genX(emit_urb_setup)(device, batch, l3_config, - VK_SHADER_STAGE_VERTEX_BIT, urb_cfg_in, &state->urb_cfg); + if (genX(need_wa_16014912113)(urb_cfg_in, &state->urb_cfg)) + genX(batch_emit_wa_16014912113)(batch, urb_cfg_in); + genX(emit_urb_setup)(batch, device, &state->urb_cfg); #if GFX_VER >= 12 /* Disable Primitive Replication. */ diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 717ab0da3ed..7bc83a2e5fb 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -445,184 +445,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline, } } -void -genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, - const struct intel_l3_config *l3_config, - VkShaderStageFlags active_stages, - const struct intel_urb_config *urb_cfg_in, - struct intel_urb_config *urb_cfg_out) -{ - const struct intel_device_info *devinfo = device->info; - - bool constrained; - intel_get_urb_config(devinfo, l3_config, - active_stages & - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, - active_stages & VK_SHADER_STAGE_GEOMETRY_BIT, - urb_cfg_out, &constrained); - -#if INTEL_NEEDS_WA_16014912113 - if (genX(need_wa_16014912113)(urb_cfg_in, urb_cfg_out)) { - for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { -#if GFX_VER >= 12 - anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) { - urb._3DCommandSubOpcode += i; - urb.VSURBEntryAllocationSize = urb_cfg_in->size[i] - 1; - urb.VSURBStartingAddressSlice0 = urb_cfg_in->start[i]; - urb.VSURBStartingAddressSliceN = urb_cfg_in->start[i]; - urb.VSNumberofURBEntriesSlice0 = i == 0 ? 256 : 0; - urb.VSNumberofURBEntriesSliceN = i == 0 ? 256 : 0; - } -#else - anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { - urb._3DCommandSubOpcode += i; - urb.VSURBStartingAddress = urb_cfg_in->start[i]; - urb.VSURBEntryAllocationSize = urb_cfg_in->size[i] - 1; - urb.VSNumberofURBEntries = i == 0 ? 256 : 0; - } -#endif - } - genx_batch_emit_pipe_control(batch, device->info, _3D, - ANV_PIPE_HDC_PIPELINE_FLUSH_BIT); - } -#endif - - for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { -#if GFX_VER >= 12 - anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) { - urb._3DCommandSubOpcode += i; - urb.VSURBEntryAllocationSize = urb_cfg_out->size[i] - 1; - urb.VSURBStartingAddressSlice0 = urb_cfg_out->start[i]; - urb.VSURBStartingAddressSliceN = urb_cfg_out->start[i]; - urb.VSNumberofURBEntriesSlice0 = urb_cfg_out->entries[i]; - urb.VSNumberofURBEntriesSliceN = urb_cfg_out->entries[i]; - } -#else - anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) { - urb._3DCommandSubOpcode += i; - urb.VSURBStartingAddress = urb_cfg_out->start[i]; - urb.VSURBEntryAllocationSize = urb_cfg_out->size[i] - 1; - urb.VSNumberofURBEntries = urb_cfg_out->entries[i]; - } -#endif - } - -#if GFX_VERx10 >= 125 - if (device->vk.enabled_extensions.EXT_mesh_shader) { - anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero); - anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero); - } -#endif -} - -#if GFX_VERx10 >= 125 -static void -emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline) -{ - const struct intel_device_info *devinfo = pipeline->base.base.device->info; - - const struct brw_task_prog_data *task_prog_data = - anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ? - get_task_prog_data(pipeline) : NULL; - const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - - intel_get_mesh_urb_config(devinfo, pipeline->base.base.device->l3_config, - task_prog_data ? task_prog_data->map.size_dw : 0, - mesh_prog_data->map.size / 4, - &pipeline->urb_cfg); - - /* Zero out the primitive pipeline URB allocations. */ - for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { -#if GFX_VER >= 12 - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_VS), urb) { - urb._3DCommandSubOpcode += i; - } -#else - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_VS), urb) { - urb._3DCommandSubOpcode += i; - } -#endif - } - - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), urb) { - if (task_prog_data) { - urb.TASKURBEntryAllocationSize = pipeline->urb_cfg.size[MESA_SHADER_TASK] - 1; - urb.TASKNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[MESA_SHADER_TASK]; - urb.TASKNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[MESA_SHADER_TASK]; - urb.TASKURBStartingAddressSlice0 = pipeline->urb_cfg.start[MESA_SHADER_TASK]; - urb.TASKURBStartingAddressSliceN = pipeline->urb_cfg.start[MESA_SHADER_TASK]; - } - } - - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_MESH), urb) { - urb.MESHURBEntryAllocationSize = pipeline->urb_cfg.size[MESA_SHADER_MESH] - 1; - urb.MESHNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[MESA_SHADER_MESH]; - urb.MESHNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[MESA_SHADER_MESH]; - urb.MESHURBStartingAddressSlice0 = pipeline->urb_cfg.start[MESA_SHADER_MESH]; - urb.MESHURBStartingAddressSliceN = pipeline->urb_cfg.start[MESA_SHADER_MESH]; - } -} -#endif - -static void -emit_urb_setup(struct anv_graphics_pipeline *pipeline) -{ -#if GFX_VERx10 >= 125 - if (anv_pipeline_is_mesh(pipeline)) { - emit_urb_setup_mesh(pipeline); - return; - } -#endif - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { - const struct brw_vue_prog_data *prog_data = - !anv_pipeline_has_stage(pipeline, i) ? NULL : - (const struct brw_vue_prog_data *) pipeline->base.shaders[i]->prog_data; - - pipeline->urb_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1; - } - - struct anv_device *device = pipeline->base.base.device; - const struct intel_device_info *devinfo = device->info; - - - bool constrained; - intel_get_urb_config(devinfo, - pipeline->base.base.device->l3_config, - pipeline->base.base.active_stages & - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, - pipeline->base.base.active_stages & - VK_SHADER_STAGE_GEOMETRY_BIT, - &pipeline->urb_cfg, &constrained); - - for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { -#if GFX_VER >= 12 - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_VS), urb) { - urb._3DCommandSubOpcode += i; - urb.VSURBEntryAllocationSize = pipeline->urb_cfg.size[i] - 1; - urb.VSURBStartingAddressSlice0 = pipeline->urb_cfg.start[i]; - urb.VSURBStartingAddressSliceN = pipeline->urb_cfg.start[i]; - urb.VSNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[i]; - urb.VSNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[i]; - } -#else - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_VS), urb) { - urb._3DCommandSubOpcode += i; - urb.VSURBStartingAddress = pipeline->urb_cfg.start[i]; - urb.VSURBEntryAllocationSize = pipeline->urb_cfg.size[i] - 1; - urb.VSNumberofURBEntries = pipeline->urb_cfg.entries[i]; - } -#endif - } - -#if GFX_VERx10 >= 125 - if (device->vk.enabled_extensions.EXT_mesh_shader) { - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), zero); - anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_MESH), zero); - } -#endif - -} - static bool sbe_primitive_id_override(struct anv_graphics_pipeline *pipeline) { @@ -797,10 +619,6 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline) sf.VertexSubPixelPrecisionSelect = _8Bit; sf.AALineDistanceMode = true; -#if GFX_VER >= 12 - sf.DerefBlockSize = pipeline->urb_cfg.deref_block_size; -#endif - bool point_from_shader; if (anv_pipeline_is_primitive(pipeline)) { const struct brw_vue_prog_data *last_vue_prog_data = @@ -1909,8 +1727,6 @@ void genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { - emit_urb_setup(pipeline); - emit_rs_state(pipeline); compute_kill_pixel(pipeline, state->ms, state); diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 11fd3c5d3b3..7f2f7ccb70c 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -1836,7 +1836,6 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, uint32_t query_count, VkQueryResultFlags flags) { - struct anv_device *device = cmd_buffer->device; enum anv_pipe_bits needed_flushes = 0; trace_intel_begin_query_copy_shader(&cmd_buffer->trace); @@ -1921,8 +1920,6 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer, .general_state_stream = &cmd_buffer->general_state_stream, .batch = &cmd_buffer->batch, .kernel = copy_kernel, - .l3_config = device->internal_kernels_l3_config, - .urb_cfg = &cmd_buffer->state.gfx.urb_cfg, }; genX(emit_simple_shader_init)(&state); diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 856908a9af0..3c3f95af6f4 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -116,16 +116,23 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) * allocate space for the VS. Even though one isn't run, we need VUEs to * store the data that VF is going to pass to SOL. */ - struct intel_urb_config urb_cfg_out = { + struct intel_urb_config urb_cfg = { .size = { DIV_ROUND_UP(32, 64), 1, 1, 1 }, }; - genX(emit_l3_config)(batch, device, state->l3_config); - state->cmd_buffer->state.current_l3_config = state->l3_config; + genX(emit_l3_config)(batch, device, device->l3_config); + state->cmd_buffer->state.current_l3_config = device->l3_config; - genX(emit_urb_setup)(device, batch, state->l3_config, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, - state->urb_cfg, &urb_cfg_out); + bool constrained; + intel_get_urb_config(device->info, device->l3_config, false, false, + &urb_cfg, &constrained); + + if (genX(need_wa_16014912113)(&state->cmd_buffer->state.gfx.urb_cfg, + &urb_cfg)) { + genX(batch_emit_wa_16014912113)( + batch, &state->cmd_buffer->state.gfx.urb_cfg); + } + genX(emit_urb_setup)(batch, device, &urb_cfg); anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { ps_blend.HasWriteableRT = true; @@ -168,7 +175,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) anv_batch_emit(batch, GENX(3DSTATE_SF), sf) { #if GFX_VER >= 12 - sf.DerefBlockSize = urb_cfg_out.deref_block_size; + sf.DerefBlockSize = urb_cfg.deref_block_size; #endif } @@ -376,8 +383,8 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) } /* Update urb config after simple shader. */ - memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg_out, - sizeof(struct intel_urb_config)); + memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg, + sizeof(urb_cfg)); state->cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0); state->cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |