anv: move URB programming to dynamic emission path

With the pipeline object going away, we have nowhere to store this.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36512>
This commit is contained in:
Lionel Landwerlin 2025-03-27 18:13:02 +02:00 committed by Marge Bot
parent 8d5cb999f9
commit f28af4e7f2
12 changed files with 174 additions and 232 deletions

View file

@ -554,7 +554,6 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
* We avoid comparing protected packets as all the fields but the scratch
* surface are identical. we just need to select the right one at emission.
*/
diff_fix_state(URB, final.urb);
diff_fix_state(VF_SGVS, final.vf_sgvs);
if (cmd_buffer->device->info->ver >= 11)
diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);

View file

@ -232,11 +232,9 @@ void genX(cmd_buffer_ensure_cfe_state)(struct anv_cmd_buffer *cmd_buffer,
uint32_t total_scratch);
void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out);
genX(emit_urb_setup)(struct anv_batch *batch,
const struct anv_device *device,
const struct intel_urb_config *urb_cfg);
void genX(emit_sample_pattern)(struct anv_batch *batch,
const struct vk_sample_locations_state *sl);

View file

@ -1588,6 +1588,27 @@ enum anv_coarse_pixel_state {
* be reemitted outside of the VkPipeline object are tracked here.
*/
struct anv_gfx_dynamic_state {
/* 3DSTATE_URB_* */
struct intel_urb_config urb_cfg;
/* 3DSTATE_URB_ALLOC_TASK */
struct {
uint32_t TASKURBEntryAllocationSize;
uint32_t TASKNumberofURBEntriesSlice0;
uint32_t TASKNumberofURBEntriesSliceN;
uint32_t TASKURBStartingAddressSlice0;
uint32_t TASKURBStartingAddressSliceN;
} urb_task;
/* 3DSTATE_URB_ALLOC_TASK */
struct {
uint32_t MESHURBEntryAllocationSize;
uint32_t MESHNumberofURBEntriesSlice0;
uint32_t MESHNumberofURBEntriesSliceN;
uint32_t MESHURBStartingAddressSlice0;
uint32_t MESHURBStartingAddressSliceN;
} urb_mesh;
/* 3DSTATE_BLEND_STATE_POINTERS */
struct {
bool AlphaToCoverageEnable;
@ -1765,6 +1786,7 @@ struct anv_gfx_dynamic_state {
/* 3DSTATE_SF */
struct {
uint32_t DerefBlockSize;
float LineWidth;
uint32_t TriangleStripListProvokingVertexSelect;
uint32_t LineStripListProvokingVertexSelect;
@ -3985,10 +4007,6 @@ struct anv_simple_shader {
struct anv_batch *batch;
/* Shader to use */
struct anv_shader_bin *kernel;
/* L3 config used by the shader */
const struct intel_l3_config *l3_config;
/* Current URB config */
const struct intel_urb_config *urb_cfg;
/* Managed by the simpler shader helper*/
struct anv_state bt_state;
@ -5074,7 +5092,6 @@ struct anv_graphics_pipeline {
/* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
struct {
struct anv_gfx_state_ptr urb;
struct anv_gfx_state_ptr vf_sgvs;
struct anv_gfx_state_ptr vf_sgvs_2;
struct anv_gfx_state_ptr vf_sgvs_instancing;

View file

@ -186,6 +186,7 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
NAME(WM_DEPTH_STENCIL);
NAME(PMA_FIX);
NAME(WA_18019816803);
NAME(WA_14018283232);
NAME(TBIMR_TILE_PASS_INFO);
NAME(FS_MSAA_FLAGS);
NAME(TCS_INPUT_VERTICES);

View file

@ -241,7 +241,6 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
.general_state_stream = &submit->general_state_stream,
.batch = batch,
.kernel = copy_kernel,
.l3_config = device->internal_kernels_l3_config,
};
anv_genX(device->info, emit_simple_shader_init)(&submit->simple_state);

View file

@ -266,15 +266,17 @@ static void
blorp_pre_emit_urb_config(struct blorp_batch *blorp_batch,
struct intel_urb_config *urb_cfg)
{
#if INTEL_NEEDS_WA_16014912113
struct anv_cmd_buffer *cmd_buffer = blorp_batch->driver_batch;
if (genX(need_wa_16014912113)(&cmd_buffer->state.gfx.urb_cfg, urb_cfg)) {
if (genX(need_wa_16014912113)(
&cmd_buffer->state.gfx.urb_cfg, urb_cfg)) {
genX(batch_emit_wa_16014912113)(&cmd_buffer->batch,
&cmd_buffer->state.gfx.urb_cfg);
}
/* Update urb config. */
memcpy(&cmd_buffer->state.gfx.urb_cfg, urb_cfg,
sizeof(struct intel_urb_config));
memcpy(&cmd_buffer->state.gfx.urb_cfg, urb_cfg, sizeof(*urb_cfg));
#endif
}
static const struct intel_l3_config *

View file

@ -200,8 +200,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
.general_state_stream = &cmd_buffer->general_state_stream,
.batch = &cmd_buffer->generation.batch,
.kernel = gen_kernel,
.l3_config = device->internal_kernels_l3_config,
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
};
genX(emit_simple_shader_init)(state);
@ -540,8 +538,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
.general_state_stream = &cmd_buffer->general_state_stream,
.batch = &cmd_buffer->batch,
.kernel = gen_kernel,
.l3_config = device->internal_kernels_l3_config,
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
};
genX(emit_simple_shader_init)(&simple_state);

View file

@ -770,6 +770,52 @@ calculate_tile_dimensions(const struct anv_device *device,
UNREACHABLE("Invalid provoking vertex mode"); \
} \
ALWAYS_INLINE static void
update_urb_config(struct anv_gfx_dynamic_state *hw_state,
const struct anv_graphics_pipeline *pipeline,
const struct anv_device *device)
{
struct intel_urb_config new_cfg = { 0 };
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline)) {
const struct brw_task_prog_data *task_prog_data =
get_task_prog_data(pipeline);
const struct brw_mesh_prog_data *mesh_prog_data =
get_mesh_prog_data(pipeline);
intel_get_mesh_urb_config(device->info, device->l3_config,
task_prog_data ? task_prog_data->map.size_dw : 0,
mesh_prog_data->map.size / 4, &new_cfg);
} else
#endif
{
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
const struct brw_vue_prog_data *prog_data =
!anv_pipeline_has_stage(pipeline, i) ? NULL :
(const struct brw_vue_prog_data *) pipeline->base.shaders[i]->prog_data;
new_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1;
}
UNUSED bool constrained;
intel_get_urb_config(device->info, device->l3_config,
pipeline->base.base.active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
pipeline->base.base.active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
&new_cfg, &constrained);
}
#if GFX_VER >= 12
SET(SF, sf.DerefBlockSize, new_cfg.deref_block_size);
#endif
for (int s = 0; s <= MESA_SHADER_MESH; s++) {
SET(URB, urb_cfg.size[s], new_cfg.size[s]);
SET(URB, urb_cfg.start[s], new_cfg.start[s]);
SET(URB, urb_cfg.entries[s], new_cfg.entries[s]);
}
}
ALWAYS_INLINE static void
update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
@ -1922,6 +1968,9 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
update_fs_msaa_flags(hw_state, dyn, pipeline);
if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS)
update_urb_config(hw_state, pipeline, device);
if ((gfx->dirty & ANV_CMD_DIRTY_PS) ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
update_ps(hw_state, device, dyn, pipeline);
@ -2245,6 +2294,55 @@ genX(batch_emit_wa_14018283232)(struct anv_batch *batch)
}
#endif
void
genX(emit_urb_setup)(struct anv_batch *batch,
const struct anv_device *device,
const struct intel_urb_config *urb_cfg)
{
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
if (urb_cfg->size[i] > 0)
urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1;
urb.VSURBStartingAddressSlice0 = urb_cfg->start[i];
urb.VSURBStartingAddressSliceN = urb_cfg->start[i];
urb.VSNumberofURBEntriesSlice0 = urb_cfg->entries[i];
urb.VSNumberofURBEntriesSliceN = urb_cfg->entries[i];
}
#else
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
if (urb_cfg->size[i] > 0)
urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1;
urb.VSURBStartingAddress = urb_cfg->start[i];
urb.VSNumberofURBEntries = urb_cfg->entries[i];
}
#endif
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_extensions.EXT_mesh_shader) {
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
if (urb_cfg->size[MESA_SHADER_TASK] > 0)
urb.TASKURBEntryAllocationSize = urb_cfg->size[MESA_SHADER_TASK] - 1;
urb.TASKNumberofURBEntriesSlice0 = urb_cfg->entries[MESA_SHADER_TASK];
urb.TASKNumberofURBEntriesSliceN = urb_cfg->entries[MESA_SHADER_TASK];
urb.TASKURBStartingAddressSlice0 = urb_cfg->start[MESA_SHADER_TASK];
urb.TASKURBStartingAddressSliceN = urb_cfg->start[MESA_SHADER_TASK];
}
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
if (urb_cfg->size[MESA_SHADER_MESH] > 0)
urb.MESHURBEntryAllocationSize = urb_cfg->size[MESA_SHADER_MESH] - 1;
urb.MESHNumberofURBEntriesSlice0 = urb_cfg->entries[MESA_SHADER_MESH];
urb.MESHNumberofURBEntriesSliceN = urb_cfg->entries[MESA_SHADER_MESH];
urb.MESHURBStartingAddressSlice0 = urb_cfg->start[MESA_SHADER_MESH];
urb.MESHURBStartingAddressSliceN = urb_cfg->start[MESA_SHADER_MESH];
}
}
#endif
}
/**
* This function handles dirty state emission to the batch buffer.
*/
@ -2324,15 +2422,24 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
gfx->base.push_constants_data_dirty = true;
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
if (genX(need_wa_16014912113)(&gfx->urb_cfg, &pipeline->urb_cfg)) {
genX(batch_emit_wa_16014912113)(&cmd_buffer->batch,
&gfx->urb_cfg);
}
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
#define INIT(category, name) \
.name = hw_state->category.name
#define SET(s, category, name) \
s.name = hw_state->category.name
memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
sizeof(struct intel_urb_config));
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
#if INTEL_NEEDS_WA_16014912113
if (genX(need_wa_16014912113)(
&cmd_buffer->state.gfx.urb_cfg, &hw_state->urb_cfg)) {
genX(batch_emit_wa_16014912113)(&cmd_buffer->batch,
&cmd_buffer->state.gfx.urb_cfg);
}
/* Update urb config. */
memcpy(&cmd_buffer->state.gfx.urb_cfg, &hw_state->urb_cfg,
sizeof(hw_state->urb_cfg));
#endif
genX(emit_urb_setup)(&cmd_buffer->batch, device, &hw_state->urb_cfg);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
@ -2453,11 +2560,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
}
#define INIT(category, name) \
.name = hw_state->category.name
#define SET(s, category, name) \
s.name = hw_state->category.name
/* Now the potentially dynamic instructions */
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
@ -2704,6 +2806,9 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
pipeline, partial.sf, sf) {
#if GFX_VER >= 12
SET(sf, sf, DerefBlockSize);
#endif
SET(sf, sf, LineWidth);
SET(sf, sf, TriangleStripListProvokingVertexSelect);
SET(sf, sf, LineStripListProvokingVertexSelect);

View file

@ -125,11 +125,16 @@ emit_common_so_memcpy(struct anv_memcpy_state *state,
* allocate space for the VS. Even though one isn't run, we need VUEs to
* store the data that VF is going to pass to SOL.
*/
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
memcpy(state->urb_cfg.size, &entry_size, sizeof(entry_size));
state->urb_cfg = (struct intel_urb_config) {
.size = { DIV_ROUND_UP(32, 64), 1, 1, 1 },
};
UNUSED bool constrained;
intel_get_urb_config(device->info, l3_config, false, false,
&state->urb_cfg, &constrained);
genX(emit_urb_setup)(device, batch, l3_config,
VK_SHADER_STAGE_VERTEX_BIT, urb_cfg_in, &state->urb_cfg);
if (genX(need_wa_16014912113)(urb_cfg_in, &state->urb_cfg))
genX(batch_emit_wa_16014912113)(batch, urb_cfg_in);
genX(emit_urb_setup)(batch, device, &state->urb_cfg);
#if GFX_VER >= 12
/* Disable Primitive Replication. */

View file

@ -445,184 +445,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
}
}
void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out)
{
const struct intel_device_info *devinfo = device->info;
bool constrained;
intel_get_urb_config(devinfo, l3_config,
active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
urb_cfg_out, &constrained);
#if INTEL_NEEDS_WA_16014912113
if (genX(need_wa_16014912113)(urb_cfg_in, urb_cfg_out)) {
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBEntryAllocationSize = urb_cfg_in->size[i] - 1;
urb.VSURBStartingAddressSlice0 = urb_cfg_in->start[i];
urb.VSURBStartingAddressSliceN = urb_cfg_in->start[i];
urb.VSNumberofURBEntriesSlice0 = i == 0 ? 256 : 0;
urb.VSNumberofURBEntriesSliceN = i == 0 ? 256 : 0;
}
#else
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg_in->start[i];
urb.VSURBEntryAllocationSize = urb_cfg_in->size[i] - 1;
urb.VSNumberofURBEntries = i == 0 ? 256 : 0;
}
#endif
}
genx_batch_emit_pipe_control(batch, device->info, _3D,
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
}
#endif
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBEntryAllocationSize = urb_cfg_out->size[i] - 1;
urb.VSURBStartingAddressSlice0 = urb_cfg_out->start[i];
urb.VSURBStartingAddressSliceN = urb_cfg_out->start[i];
urb.VSNumberofURBEntriesSlice0 = urb_cfg_out->entries[i];
urb.VSNumberofURBEntriesSliceN = urb_cfg_out->entries[i];
}
#else
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg_out->start[i];
urb.VSURBEntryAllocationSize = urb_cfg_out->size[i] - 1;
urb.VSNumberofURBEntries = urb_cfg_out->entries[i];
}
#endif
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_extensions.EXT_mesh_shader) {
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
}
#endif
}
#if GFX_VERx10 >= 125
static void
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline)
{
const struct intel_device_info *devinfo = pipeline->base.base.device->info;
const struct brw_task_prog_data *task_prog_data =
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ?
get_task_prog_data(pipeline) : NULL;
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
intel_get_mesh_urb_config(devinfo, pipeline->base.base.device->l3_config,
task_prog_data ? task_prog_data->map.size_dw : 0,
mesh_prog_data->map.size / 4,
&pipeline->urb_cfg);
/* Zero out the primitive pipeline URB allocations. */
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
}
#else
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
}
#endif
}
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
if (task_prog_data) {
urb.TASKURBEntryAllocationSize = pipeline->urb_cfg.size[MESA_SHADER_TASK] - 1;
urb.TASKNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[MESA_SHADER_TASK];
urb.TASKNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[MESA_SHADER_TASK];
urb.TASKURBStartingAddressSlice0 = pipeline->urb_cfg.start[MESA_SHADER_TASK];
urb.TASKURBStartingAddressSliceN = pipeline->urb_cfg.start[MESA_SHADER_TASK];
}
}
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
urb.MESHURBEntryAllocationSize = pipeline->urb_cfg.size[MESA_SHADER_MESH] - 1;
urb.MESHNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[MESA_SHADER_MESH];
urb.MESHNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[MESA_SHADER_MESH];
urb.MESHURBStartingAddressSlice0 = pipeline->urb_cfg.start[MESA_SHADER_MESH];
urb.MESHURBStartingAddressSliceN = pipeline->urb_cfg.start[MESA_SHADER_MESH];
}
}
#endif
static void
emit_urb_setup(struct anv_graphics_pipeline *pipeline)
{
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline)) {
emit_urb_setup_mesh(pipeline);
return;
}
#endif
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
const struct brw_vue_prog_data *prog_data =
!anv_pipeline_has_stage(pipeline, i) ? NULL :
(const struct brw_vue_prog_data *) pipeline->base.shaders[i]->prog_data;
pipeline->urb_cfg.size[i] = prog_data ? prog_data->urb_entry_size : 1;
}
struct anv_device *device = pipeline->base.base.device;
const struct intel_device_info *devinfo = device->info;
bool constrained;
intel_get_urb_config(devinfo,
pipeline->base.base.device->l3_config,
pipeline->base.base.active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
pipeline->base.base.active_stages &
VK_SHADER_STAGE_GEOMETRY_BIT,
&pipeline->urb_cfg, &constrained);
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBEntryAllocationSize = pipeline->urb_cfg.size[i] - 1;
urb.VSURBStartingAddressSlice0 = pipeline->urb_cfg.start[i];
urb.VSURBStartingAddressSliceN = pipeline->urb_cfg.start[i];
urb.VSNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[i];
urb.VSNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[i];
}
#else
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = pipeline->urb_cfg.start[i];
urb.VSURBEntryAllocationSize = pipeline->urb_cfg.size[i] - 1;
urb.VSNumberofURBEntries = pipeline->urb_cfg.entries[i];
}
#endif
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_extensions.EXT_mesh_shader) {
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), zero);
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_MESH), zero);
}
#endif
}
static bool
sbe_primitive_id_override(struct anv_graphics_pipeline *pipeline)
{
@ -797,10 +619,6 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline)
sf.VertexSubPixelPrecisionSelect = _8Bit;
sf.AALineDistanceMode = true;
#if GFX_VER >= 12
sf.DerefBlockSize = pipeline->urb_cfg.deref_block_size;
#endif
bool point_from_shader;
if (anv_pipeline_is_primitive(pipeline)) {
const struct brw_vue_prog_data *last_vue_prog_data =
@ -1909,8 +1727,6 @@ void
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
emit_urb_setup(pipeline);
emit_rs_state(pipeline);
compute_kill_pixel(pipeline, state->ms, state);

View file

@ -1836,7 +1836,6 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
uint32_t query_count,
VkQueryResultFlags flags)
{
struct anv_device *device = cmd_buffer->device;
enum anv_pipe_bits needed_flushes = 0;
trace_intel_begin_query_copy_shader(&cmd_buffer->trace);
@ -1921,8 +1920,6 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
.general_state_stream = &cmd_buffer->general_state_stream,
.batch = &cmd_buffer->batch,
.kernel = copy_kernel,
.l3_config = device->internal_kernels_l3_config,
.urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
};
genX(emit_simple_shader_init)(&state);

View file

@ -116,16 +116,23 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
* allocate space for the VS. Even though one isn't run, we need VUEs to
* store the data that VF is going to pass to SOL.
*/
struct intel_urb_config urb_cfg_out = {
struct intel_urb_config urb_cfg = {
.size = { DIV_ROUND_UP(32, 64), 1, 1, 1 },
};
genX(emit_l3_config)(batch, device, state->l3_config);
state->cmd_buffer->state.current_l3_config = state->l3_config;
genX(emit_l3_config)(batch, device, device->l3_config);
state->cmd_buffer->state.current_l3_config = device->l3_config;
genX(emit_urb_setup)(device, batch, state->l3_config,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
state->urb_cfg, &urb_cfg_out);
bool constrained;
intel_get_urb_config(device->info, device->l3_config, false, false,
&urb_cfg, &constrained);
if (genX(need_wa_16014912113)(&state->cmd_buffer->state.gfx.urb_cfg,
&urb_cfg)) {
genX(batch_emit_wa_16014912113)(
batch, &state->cmd_buffer->state.gfx.urb_cfg);
}
genX(emit_urb_setup)(batch, device, &urb_cfg);
anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
ps_blend.HasWriteableRT = true;
@ -168,7 +175,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
anv_batch_emit(batch, GENX(3DSTATE_SF), sf) {
#if GFX_VER >= 12
sf.DerefBlockSize = urb_cfg_out.deref_block_size;
sf.DerefBlockSize = urb_cfg.deref_block_size;
#endif
}
@ -376,8 +383,8 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
}
/* Update urb config after simple shader. */
memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg_out,
sizeof(struct intel_urb_config));
memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg,
sizeof(urb_cfg));
state->cmd_buffer->state.gfx.vb_dirty = BITFIELD_BIT(0);
state->cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_INDEX_BUFFER |