intel: reuse intel_urb_config for mesh

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36512>
This commit is contained in:
Lionel Landwerlin 2025-03-27 18:12:18 +02:00 committed by Marge Bot
parent be16985c82
commit e52c1c3ba4
3 changed files with 66 additions and 58 deletions

View file

@ -107,9 +107,9 @@ enum intel_urb_deref_block_size {
};
struct intel_urb_config {
unsigned size[5];
unsigned entries[5];
unsigned start[5];
unsigned size[8];
unsigned entries[8];
unsigned start[8];
enum intel_urb_deref_block_size deref_block_size;
};
@ -134,21 +134,9 @@ intel_urb_setup_changed(const struct intel_urb_config *a,
return false;
}
struct intel_mesh_urb_allocation {
unsigned task_entries;
unsigned task_entry_size_64b;
unsigned task_starting_address_8kb;
unsigned mesh_entries;
unsigned mesh_entry_size_64b;
unsigned mesh_starting_address_8kb;
enum intel_urb_deref_block_size deref_block_size;
};
struct intel_mesh_urb_allocation
intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
const struct intel_l3_config *l3_cfg,
unsigned tue_size_dw, unsigned mue_size_dw);
void intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
const struct intel_l3_config *l3_cfg,
unsigned tue_size_dw, unsigned mue_size_dw,
struct intel_urb_config *urb_cfg);
#endif /* INTEL_L3_CONFIG_H */

View file

@ -216,6 +216,8 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
*/
assert(urb_cfg->entries[i] >= min_entries[i]);
}
urb_cfg->entries[MESA_SHADER_MESH] = 0;
urb_cfg->entries[MESA_SHADER_TASK] = 0;
/* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */
int first_urb = push_constant_chunks;
@ -246,6 +248,8 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
urb_cfg->start[i] = first_urb;
}
}
urb_cfg->start[MESA_SHADER_MESH] = 0;
urb_cfg->start[MESA_SHADER_TASK] = 0;
if (devinfo->ver >= 12) {
/* From the Gfx12 BSpec:
@ -285,19 +289,24 @@ intel_get_urb_config(const struct intel_device_info *devinfo,
}
}
struct intel_mesh_urb_allocation
void
intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
const struct intel_l3_config *l3_cfg,
unsigned tue_size_dw, unsigned mue_size_dw)
unsigned tue_size_dw, unsigned mue_size_dw,
struct intel_urb_config *urb_cfg)
{
struct intel_mesh_urb_allocation r = {0};
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
urb_cfg->start[i] = 0;
urb_cfg->size[i] = 0;
urb_cfg->entries[i] = 0;
}
/* Allocation Size must be aligned to 64B. */
r.task_entry_size_64b = DIV_ROUND_UP(tue_size_dw * 4, 64);
r.mesh_entry_size_64b = DIV_ROUND_UP(mue_size_dw * 4, 64);
urb_cfg->size[MESA_SHADER_TASK] = DIV_ROUND_UP(tue_size_dw * 4, 64);
urb_cfg->size[MESA_SHADER_MESH] = DIV_ROUND_UP(mue_size_dw * 4, 64);
assert(r.task_entry_size_64b <= 1024);
assert(r.mesh_entry_size_64b <= 1024);
assert(urb_cfg->size[MESA_SHADER_TASK] <= 1024);
assert(urb_cfg->size[MESA_SHADER_MESH] <= 1024);
/* Per-slice URB size. */
unsigned total_urb_kb = intel_get_l3_config_urb_size(devinfo, l3_cfg);
@ -322,7 +331,7 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
* the max? */
float task_urb_share = 0.0f;
if (r.task_entry_size_64b > 0) {
if (urb_cfg->size[MESA_SHADER_TASK] > 0) {
/* By default, split memory between TASK and MESH proportionally to
* their entry sizes. Environment variable allow us to tweak it.
*
@ -338,7 +347,9 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
if (task_urb_share_percentage >= 0) {
task_urb_share = task_urb_share_percentage / 100.0f;
} else {
task_urb_share = (float)r.task_entry_size_64b / (r.task_entry_size_64b + r.mesh_entry_size_64b);
task_urb_share = (float)urb_cfg->size[MESA_SHADER_TASK] /
(urb_cfg->size[MESA_SHADER_TASK] +
urb_cfg->size[MESA_SHADER_MESH]);
}
}
@ -347,10 +358,12 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
* MESH Number of URB Entries must be divisible by 8 if the MESH/TASK URB
* Entry Allocation Size is less than 9 512-bit URB entries.
*/
const unsigned min_mesh_entries = r.mesh_entry_size_64b < 9 ? 8 : 1;
const unsigned min_task_entries = r.task_entry_size_64b < 9 ? 8 : 1;
const unsigned min_mesh_urb_kb = ALIGN(r.mesh_entry_size_64b * min_mesh_entries * 64, 1024) / 1024;
const unsigned min_task_urb_kb = ALIGN(r.task_entry_size_64b * min_task_entries * 64, 1024) / 1024;
const unsigned min_mesh_entries = urb_cfg->size[MESA_SHADER_MESH] < 9 ? 8 : 1;
const unsigned min_task_entries = urb_cfg->size[MESA_SHADER_TASK] < 9 ? 8 : 1;
const unsigned min_mesh_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_MESH] *
min_mesh_entries * 64, 1024) / 1024;
const unsigned min_task_urb_kb = ALIGN(urb_cfg->size[MESA_SHADER_TASK] *
min_task_entries * 64, 1024) / 1024;
total_urb_kb -= (min_mesh_urb_kb + min_task_urb_kb);
@ -379,27 +392,36 @@ intel_get_mesh_urb_config(const struct intel_device_info *devinfo,
unsigned next_address_8kb = push_constant_kb / 8;
assert(push_constant_kb % 8 == 0);
r.mesh_starting_address_8kb = next_address_8kb;
r.mesh_entries = MIN2((mesh_urb_kb * 16) / r.mesh_entry_size_64b, 1548);
r.mesh_entries = r.mesh_entry_size_64b < 9 ? ROUND_DOWN_TO(r.mesh_entries, 8) : r.mesh_entries;
urb_cfg->start[MESA_SHADER_MESH] = next_address_8kb;
urb_cfg->entries[MESA_SHADER_MESH] =
MIN2((mesh_urb_kb * 16) / urb_cfg->size[MESA_SHADER_MESH], 1548);
urb_cfg->entries[MESA_SHADER_MESH] =
urb_cfg->size[MESA_SHADER_MESH] < 9 ?
ROUND_DOWN_TO(urb_cfg->entries[MESA_SHADER_MESH], 8) :
urb_cfg->entries[MESA_SHADER_MESH];
next_address_8kb += mesh_urb_kb / 8;
assert(mesh_urb_kb % 8 == 0);
r.task_starting_address_8kb = next_address_8kb;
urb_cfg->start[MESA_SHADER_TASK] = next_address_8kb;
task_urb_kb = total_urb_avail_mesh_task_kb - mesh_urb_kb;
if (r.task_entry_size_64b > 0) {
r.task_entries = MIN2((task_urb_kb * 16) / r.task_entry_size_64b, 1548);
r.task_entries = r.task_entry_size_64b < 9 ? ROUND_DOWN_TO(r.task_entries, 8) : r.task_entries;
if (urb_cfg->size[MESA_SHADER_TASK] > 0) {
urb_cfg->entries[MESA_SHADER_TASK] =
MIN2((task_urb_kb * 16) / urb_cfg->size[MESA_SHADER_TASK], 1548);
urb_cfg->entries[MESA_SHADER_TASK] =
urb_cfg->size[MESA_SHADER_TASK] < 9 ?
ROUND_DOWN_TO(urb_cfg->entries[MESA_SHADER_TASK], 8) :
urb_cfg->entries[MESA_SHADER_TASK];
} else {
urb_cfg->entries[MESA_SHADER_TASK] = 0;
}
r.deref_block_size = r.mesh_entries > 32 ?
urb_cfg->deref_block_size =
urb_cfg->entries[MESA_SHADER_MESH] > 32 ?
INTEL_URB_DEREF_BLOCK_SIZE_MESH :
INTEL_URB_DEREF_BLOCK_SIZE_PER_POLY;
assert(mesh_urb_kb + task_urb_kb <= total_urb_avail_mesh_task_kb);
assert(mesh_urb_kb >= min_mesh_urb_kb);
assert(task_urb_kb >= min_task_urb_kb);
return r;
}

View file

@ -526,10 +526,10 @@ emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline)
get_task_prog_data(pipeline) : NULL;
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
const struct intel_mesh_urb_allocation alloc =
intel_get_mesh_urb_config(devinfo, pipeline->base.base.device->l3_config,
task_prog_data ? task_prog_data->map.size_dw : 0,
mesh_prog_data->map.size / 4);
intel_get_mesh_urb_config(devinfo, pipeline->base.base.device->l3_config,
task_prog_data ? task_prog_data->map.size_dw : 0,
mesh_prog_data->map.size / 4,
&pipeline->urb_cfg);
/* Zero out the primitive pipeline URB allocations. */
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
@ -546,23 +546,21 @@ emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline)
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
if (task_prog_data) {
urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1;
urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries;
urb.TASKNumberofURBEntriesSliceN = alloc.task_entries;
urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb;
urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb;
urb.TASKURBEntryAllocationSize = pipeline->urb_cfg.size[MESA_SHADER_TASK] - 1;
urb.TASKNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[MESA_SHADER_TASK];
urb.TASKNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[MESA_SHADER_TASK];
urb.TASKURBStartingAddressSlice0 = pipeline->urb_cfg.start[MESA_SHADER_TASK];
urb.TASKURBStartingAddressSliceN = pipeline->urb_cfg.start[MESA_SHADER_TASK];
}
}
anv_pipeline_emit(pipeline, final.urb, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1;
urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries;
urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries;
urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb;
urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb;
urb.MESHURBEntryAllocationSize = pipeline->urb_cfg.size[MESA_SHADER_MESH] - 1;
urb.MESHNumberofURBEntriesSlice0 = pipeline->urb_cfg.entries[MESA_SHADER_MESH];
urb.MESHNumberofURBEntriesSliceN = pipeline->urb_cfg.entries[MESA_SHADER_MESH];
urb.MESHURBStartingAddressSlice0 = pipeline->urb_cfg.start[MESA_SHADER_MESH];
urb.MESHURBStartingAddressSliceN = pipeline->urb_cfg.start[MESA_SHADER_MESH];
}
pipeline->urb_cfg.deref_block_size = alloc.deref_block_size;
}
#endif