intel: move deref_block_size to intel_urb_config

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36512>
This commit is contained in:
Lionel Landwerlin 2025-03-27 17:16:04 +02:00 committed by Marge Bot
parent 99016a893a
commit be16985c82
16 changed files with 85 additions and 112 deletions

View file

@ -6084,7 +6084,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
batch->screen->l3_config_3d,
tess_present,
gs_present,
&urb_cfg, NULL, &constrained);
&urb_cfg, &constrained);
#if GFX_VER == 7
if (devinfo->platform == INTEL_PLATFORM_IVB)

View file

@ -172,7 +172,7 @@ blorp_emit_pipeline(struct blorp_batch *blorp_batch,
{
struct crocus_batch *batch = blorp_batch->driver_batch;
emit_urb_config(blorp_batch, params, NULL);
emit_urb_config(blorp_batch, params);
blorp_emit(blorp_batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
pp.PointertoVSState = blorp_emit_vs_state(blorp_batch);

View file

@ -1051,8 +1051,6 @@ struct iris_context {
/** Aux usage of the fb's depth buffer (which may or may not exist). */
enum isl_aux_usage hiz_usage;
enum intel_urb_deref_block_size urb_deref_block_size;
/** Are depth writes enabled? (Depth buffer may or may not exist.) */
bool depth_writes_enabled;

View file

@ -248,7 +248,7 @@ emit_indirect_generate_draw(struct iris_batch *batch,
iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
#if GFX_VER >= 12
sf.DerefBlockSize = ice->state.urb_deref_block_size;
sf.DerefBlockSize = ice->shaders.urb.cfg.deref_block_size;
#endif
}

View file

@ -897,7 +897,6 @@ genX(emit_urb_config)(struct iris_batch *batch,
has_tess_eval,
has_geometry,
&ice->shaders.urb.cfg,
&ice->state.urb_deref_block_size,
&ice->shaders.urb.constrained);
genX(urb_workaround)(batch, &ice->shaders.urb.cfg);
@ -7788,7 +7787,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
sf.ViewportTransformEnable = !ice->state.window_space_position;
#if GFX_VER >= 12
sf.DerefBlockSize = ice->state.urb_deref_block_size;
sf.DerefBlockSize = ice->shaders.urb.cfg.deref_block_size;
#endif
}
iris_emit_merge(batch, cso->sf, dynamic_sf,

View file

@ -252,7 +252,7 @@ _blorp_combine_address(struct blorp_batch *batch, void *location,
static void
emit_urb_config(struct blorp_batch *batch,
const struct blorp_params *params,
UNUSED enum intel_urb_deref_block_size *deref_block_size)
struct intel_urb_config *urb_cfg)
{
/* Once vertex fetcher has written full VUE entries with complete
* header the space requirement is as follows per vertex (in bytes):
@ -272,35 +272,34 @@ emit_urb_config(struct blorp_batch *batch,
/* The URB size is expressed in units of 64 bytes (512 bits) */
const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64);
struct intel_urb_config urb_cfg = {
*urb_cfg = (struct intel_urb_config) {
.size = { vs_entry_size, 1, 1, 1 },
};
bool constrained;
intel_get_urb_config(batch->blorp->compiler->brw->devinfo,
blorp_get_l3_config(batch),
false, false, &urb_cfg,
deref_block_size, &constrained);
false, false, urb_cfg, &constrained);
/* Tell drivers about the config. */
blorp_pre_emit_urb_config(batch, &urb_cfg);
blorp_pre_emit_urb_config(batch, urb_cfg);
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
blorp_emit(batch, GENX(3DSTATE_URB_ALLOC_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
urb.VSURBStartingAddressSlice0 = urb_cfg.start[i];
urb.VSURBStartingAddressSliceN = urb_cfg.start[i];
urb.VSNumberofURBEntriesSlice0 = urb_cfg.entries[i];
urb.VSNumberofURBEntriesSliceN = urb_cfg.entries[i];
urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1;
urb.VSURBStartingAddressSlice0 = urb_cfg->start[i];
urb.VSURBStartingAddressSliceN = urb_cfg->start[i];
urb.VSNumberofURBEntriesSlice0 = urb_cfg->entries[i];
urb.VSNumberofURBEntriesSliceN = urb_cfg->entries[i];
}
#else
blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = urb_cfg.start[i];
urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
urb.VSNumberofURBEntries = urb_cfg.entries[i];
urb.VSURBStartingAddress = urb_cfg->start[i];
urb.VSURBEntryAllocationSize = urb_cfg->size[i] - 1;
urb.VSNumberofURBEntries = urb_cfg->entries[i];
}
#endif
}
@ -720,7 +719,7 @@ blorp_emit_vs_config(struct blorp_batch *batch,
static void
blorp_emit_sf_config(struct blorp_batch *batch,
const struct blorp_params *params,
UNUSED enum intel_urb_deref_block_size urb_deref_block_size)
const struct intel_urb_config *urb_cfg)
{
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
@ -745,7 +744,7 @@ blorp_emit_sf_config(struct blorp_batch *batch,
blorp_emit(batch, GENX(3DSTATE_SF), sf) {
#if GFX_VER >= 12
sf.DerefBlockSize = urb_deref_block_size;
sf.DerefBlockSize = urb_cfg->deref_block_size;
#endif
}
@ -1099,8 +1098,8 @@ static void
blorp_emit_pipeline(struct blorp_batch *batch,
const struct blorp_params *params)
{
enum intel_urb_deref_block_size urb_deref_block_size;
emit_urb_config(batch, params, &urb_deref_block_size);
struct intel_urb_config urb_cfg;
emit_urb_config(batch, params, &urb_cfg);
if (params->wm_prog_data) {
blorp_emit_blend_state(batch, params);
@ -1154,7 +1153,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
clip.PerspectiveDivideDisable = true;
}
blorp_emit_sf_config(batch, params, urb_deref_block_size);
blorp_emit_sf_config(batch, params, &urb_cfg);
blorp_emit_ps_config(batch, params);
blorp_emit_cc_viewport(batch);

View file

@ -239,8 +239,7 @@ _blorp_combine_address(struct blorp_batch *batch, void *location,
*/
static void
emit_urb_config(struct blorp_batch *batch,
const struct blorp_params *params,
UNUSED enum intel_urb_deref_block_size *deref_block_size)
const struct blorp_params *params)
{
/* Once vertex fetcher has written full VUE entries with complete
* header the space requirement is as follows per vertex (in bytes):
@ -274,8 +273,7 @@ emit_urb_config(struct blorp_batch *batch,
bool constrained;
intel_get_urb_config(batch->blorp->compiler->elk->devinfo,
blorp_get_l3_config(batch),
false, false, &urb_cfg,
deref_block_size, &constrained);
false, false, &urb_cfg, &constrained);
/* Tell drivers about the config. */
blorp_pre_emit_urb_config(batch, &urb_cfg);
@ -761,8 +759,7 @@ blorp_emit_vs_config(struct blorp_batch *batch,
static void
blorp_emit_sf_config(struct blorp_batch *batch,
const struct blorp_params *params,
UNUSED enum intel_urb_deref_block_size urb_deref_block_size)
const struct blorp_params *params)
{
const struct elk_wm_prog_data *prog_data = params->wm_prog_data;
@ -1312,8 +1309,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
uint32_t color_calc_state_offset;
uint32_t depth_stencil_state_offset;
enum intel_urb_deref_block_size urb_deref_block_size;
emit_urb_config(batch, params, &urb_deref_block_size);
emit_urb_config(batch, params);
if (params->wm_prog_data) {
blend_state_offset = blorp_emit_blend_state(batch, params);
@ -1395,7 +1391,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
clip.PerspectiveDivideDisable = true;
}
blorp_emit_sf_config(batch, params, urb_deref_block_size);
blorp_emit_sf_config(batch, params);
blorp_emit_ps_config(batch, params);
blorp_emit_cc_viewport(batch);

View file

@ -110,13 +110,14 @@ struct intel_urb_config {
unsigned size[5];
unsigned entries[5];
unsigned start[5];
enum intel_urb_deref_block_size deref_block_size;
};
void intel_get_urb_config(const struct intel_device_info *devinfo,
const struct intel_l3_config *l3_cfg,
bool tess_present, bool gs_present,
struct intel_urb_config *urb_cfg,
enum intel_urb_deref_block_size *deref_block_size,
bool *constrained);
/* Returns if URB changed for given shader stage. */

View file

@ -65,7 +65,6 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
const struct intel_l3_config *l3_cfg,
bool tess_present, bool gs_present,
struct intel_urb_config *urb_cfg,
enum intel_urb_deref_block_size *deref_block_size,
bool *constrained)
{
unsigned urb_size_kB = intel_get_l3_config_urb_size(devinfo, l3_cfg);
@ -248,43 +247,41 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
}
}
if (deref_block_size) {
if (devinfo->ver >= 12) {
/* From the Gfx12 BSpec:
*
* "Deref Block size depends on the last enabled shader and number
* of handles programmed for that shader
*
* 1) For GS last shader enabled cases, the deref block is
* always set to a per poly(within hardware)
*
* If the last enabled shader is VS or DS.
*
* 1) If DS is last enabled shader then if the number of DS
* handles is less than 324, need to set per poly deref.
*
* 2) If VS is last enabled shader then if the number of VS
* handles is less than 192, need to set per poly deref"
*
* The default is 32 so we assume that's the right choice if we're
* not in one of the explicit cases listed above.
*/
if (gs_present) {
*deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
} else if (tess_present) {
if (urb_cfg->entries[MESA_SHADER_TESS_EVAL] < 324)
*deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
else
*deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_32;
} else {
if (urb_cfg->entries[MESA_SHADER_VERTEX] < 192)
*deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
else
*deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_32;
}
if (devinfo->ver >= 12) {
/* From the Gfx12 BSpec:
*
* "Deref Block size depends on the last enabled shader and number
* of handles programmed for that shader
*
* 1) For GS last shader enabled cases, the deref block is
* always set to a per poly(within hardware)
*
* If the last enabled shader is VS or DS.
*
* 1) If DS is last enabled shader then if the number of DS
* handles is less than 324, need to set per poly deref.
*
* 2) If VS is last enabled shader then if the number of VS
* handles is less than 192, need to set per poly deref"
*
* The default is 32 so we assume that's the right choice if we're not
* in one of the explicit cases listed above.
*/
if (gs_present) {
urb_cfg->deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
} else if (tess_present) {
if (urb_cfg->entries[MESA_SHADER_TESS_EVAL] < 324)
urb_cfg->deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
else
urb_cfg->deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_32;
} else {
*deref_block_size = 0;
if (urb_cfg->entries[MESA_SHADER_VERTEX] < 192)
urb_cfg->deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
else
urb_cfg->deref_block_size = INTEL_URB_DEREF_BLOCK_SIZE_32;
}
} else {
urb_cfg->deref_block_size = 0;
}
}

View file

@ -236,8 +236,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out,
enum intel_urb_deref_block_size *deref_block_size);
struct intel_urb_config *urb_cfg_out);
void genX(emit_sample_pattern)(struct anv_batch *batch,
const struct vk_sample_locations_state *sl);

View file

@ -129,8 +129,7 @@ emit_common_so_memcpy(struct anv_memcpy_state *state,
memcpy(state->urb_cfg.size, &entry_size, sizeof(entry_size));
genX(emit_urb_setup)(device, batch, l3_config,
VK_SHADER_STAGE_VERTEX_BIT, urb_cfg_in, &state->urb_cfg,
NULL);
VK_SHADER_STAGE_VERTEX_BIT, urb_cfg_in, &state->urb_cfg);
#if GFX_VER >= 12
/* Disable Primitive Replication. */

View file

@ -450,8 +450,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const struct intel_urb_config *urb_cfg_in,
struct intel_urb_config *urb_cfg_out,
enum intel_urb_deref_block_size *deref_block_size)
struct intel_urb_config *urb_cfg_out)
{
const struct intel_device_info *devinfo = device->info;
@ -460,8 +459,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
urb_cfg_out, deref_block_size,
&constrained);
urb_cfg_out, &constrained);
#if INTEL_NEEDS_WA_16014912113
if (genX(need_wa_16014912113)(urb_cfg_in, urb_cfg_out)) {
@ -519,8 +517,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
#if GFX_VERx10 >= 125
static void
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline,
enum intel_urb_deref_block_size *deref_block_size)
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline)
{
const struct intel_device_info *devinfo = pipeline->base.base.device->info;
@ -565,17 +562,16 @@ emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline,
urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb;
}
*deref_block_size = alloc.deref_block_size;
pipeline->urb_cfg.deref_block_size = alloc.deref_block_size;
}
#endif
static void
emit_urb_setup(struct anv_graphics_pipeline *pipeline,
enum intel_urb_deref_block_size *deref_block_size)
emit_urb_setup(struct anv_graphics_pipeline *pipeline)
{
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline)) {
emit_urb_setup_mesh(pipeline, deref_block_size);
emit_urb_setup_mesh(pipeline);
return;
}
#endif
@ -598,8 +594,7 @@ emit_urb_setup(struct anv_graphics_pipeline *pipeline,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
pipeline->base.base.active_stages &
VK_SHADER_STAGE_GEOMETRY_BIT,
&pipeline->urb_cfg, deref_block_size,
&constrained);
&pipeline->urb_cfg, &constrained);
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
#if GFX_VER >= 12
@ -796,8 +791,7 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
}
static void
emit_rs_state(struct anv_graphics_pipeline *pipeline,
enum intel_urb_deref_block_size urb_deref_block_size)
emit_rs_state(struct anv_graphics_pipeline *pipeline)
{
anv_pipeline_emit(pipeline, partial.sf, GENX(3DSTATE_SF), sf) {
sf.ViewportTransformEnable = true;
@ -806,7 +800,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
sf.AALineDistanceMode = true;
#if GFX_VER >= 12
sf.DerefBlockSize = urb_deref_block_size;
sf.DerefBlockSize = pipeline->urb_cfg.deref_block_size;
#endif
bool point_from_shader;
@ -1917,10 +1911,9 @@ void
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
enum intel_urb_deref_block_size urb_deref_block_size;
emit_urb_setup(pipeline, &urb_deref_block_size);
emit_urb_setup(pipeline);
emit_rs_state(pipeline, urb_deref_block_size);
emit_rs_state(pipeline);
compute_kill_pixel(pipeline, state->ms, state);
emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs);

View file

@ -123,10 +123,9 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
genX(emit_l3_config)(batch, device, state->l3_config);
state->cmd_buffer->state.current_l3_config = state->l3_config;
enum intel_urb_deref_block_size deref_block_size;
genX(emit_urb_setup)(device, batch, state->l3_config,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
state->urb_cfg, &urb_cfg_out, &deref_block_size);
state->urb_cfg, &urb_cfg_out);
anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
ps_blend.HasWriteableRT = true;
@ -169,7 +168,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
anv_batch_emit(batch, GENX(3DSTATE_SF), sf) {
#if GFX_VER >= 12
sf.DerefBlockSize = deref_block_size;
sf.DerefBlockSize = urb_cfg_out.deref_block_size;
#endif
}

View file

@ -113,8 +113,7 @@ void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const unsigned entry_size[4],
enum intel_urb_deref_block_size *deref_block_size);
const unsigned entry_size[4]);
void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples,
const struct vk_sample_locations_state *sl);

View file

@ -88,7 +88,7 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
const unsigned entry_size[4] = { DIV_ROUND_UP(32, 64), 1, 1, 1 };
genX(emit_urb_setup)(device, batch, l3_config,
VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);
VK_SHADER_STAGE_VERTEX_BIT, entry_size);
#if GFX_VER >= 8
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {

View file

@ -273,8 +273,7 @@ void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct intel_l3_config *l3_config,
VkShaderStageFlags active_stages,
const unsigned entry_size[4],
enum intel_urb_deref_block_size *deref_block_size)
const unsigned entry_size[4])
{
const struct intel_device_info *devinfo = device->info;
struct intel_urb_config urb_cfg = {
@ -286,7 +285,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
&urb_cfg, deref_block_size, &constrained);
&urb_cfg, &constrained);
#if GFX_VERx10 == 70
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
@ -315,8 +314,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
}
static void
emit_urb_setup(struct anv_graphics_pipeline *pipeline,
enum intel_urb_deref_block_size *deref_block_size)
emit_urb_setup(struct anv_graphics_pipeline *pipeline)
{
unsigned entry_size[4];
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
@ -329,8 +327,7 @@ emit_urb_setup(struct anv_graphics_pipeline *pipeline,
genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch,
pipeline->base.l3_config,
pipeline->active_stages, entry_size,
deref_block_size);
pipeline->active_stages, entry_size);
}
static void
@ -610,8 +607,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
const struct vk_input_assembly_state *ia,
const struct vk_rasterization_state *rs,
const struct vk_multisample_state *ms,
const struct vk_render_pass_state *rp,
enum intel_urb_deref_block_size urb_deref_block_size)
const struct vk_render_pass_state *rp)
{
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
@ -1829,12 +1825,10 @@ void
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
enum intel_urb_deref_block_size urb_deref_block_size;
emit_urb_setup(pipeline, &urb_deref_block_size);
emit_urb_setup(pipeline);
assert(state->rs != NULL);
emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
urb_deref_block_size);
emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp);
emit_ms_state(pipeline, state->ms);
emit_cb_state(pipeline, state->cb, state->ms, state->rp);
compute_kill_pixel(pipeline, state->ms, state);