anv: use A64 messages for push constants loads on Gfx12.5+

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32895>
This commit is contained in:
Lionel Landwerlin 2024-12-28 15:37:18 +02:00 committed by Marge Bot
parent 5c17299084
commit a8b84e1898
6 changed files with 49 additions and 31 deletions

View file

@ -1208,7 +1208,6 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const struct anv_push_constants *data =
&cmd_buffer->state.gfx.base.push_constants;
@ -1222,10 +1221,6 @@ anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
* the 64-byte aligned address of the indirect data."
*/
struct anv_state state =
devinfo->verx10 >= 125 ?
anv_cmd_buffer_alloc_general_state(cmd_buffer,
align(sizeof(struct anv_push_constants), 64),
64) :
anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
sizeof(struct anv_push_constants),
32 /* bottom 5 bits MBZ */);

View file

@ -124,9 +124,13 @@ anv_nir_compute_push_layout(nir_shader *nir,
push_end = anv_drv_const_offset(cs.subgroup_id);
}
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) or 64B (for
* 3DSTATE_(MESH|TASK)_SHADER_DATA) boundary and make it no larger than
* push_end (no push constants is indicated by push_start = UINT_MAX).
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
* larger than push_end (no push constants is indicated by push_start =
* UINT_MAX).
*
* If we were to use
* 3DSTATE_(MESH|TASK)_SHADER_DATA::IndirectDataStartAddress we would need
* to align things to 64B.
*
* SKL PRMs, Volume 2d: Command Reference: Structures,
* 3DSTATE_CONSTANT::Constant Buffer 0 Read Length:
@ -146,12 +150,8 @@ anv_nir_compute_push_layout(nir_shader *nir,
* (unlike all Gfx stages) and so we can bound+align the allocation there
* (see anv_cmd_buffer_cs_push_constants).
*/
const unsigned push_alignment =
devinfo->verx10 >= 125 && (nir->info.stage == MESA_SHADER_TASK ||
nir->info.stage == MESA_SHADER_MESH) ?
64 : 32;
push_start = MIN2(push_start, push_end);
push_start = ROUND_DOWN_TO(push_start, push_alignment);
push_start = ROUND_DOWN_TO(push_start, 32);
/* For scalar, push data size needs to be aligned to a DWORD. */
const unsigned alignment = 4;

View file

@ -424,6 +424,8 @@ populate_task_prog_key(struct anv_pipeline_stage *stage,
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
stage->key.base.uses_inline_push_addr = true;
}
static void
@ -436,6 +438,7 @@ populate_mesh_prog_key(struct anv_pipeline_stage *stage,
populate_base_prog_key(stage, device);
stage->key.mesh.compact_mue = compact_mue;
stage->key.base.uses_inline_push_addr = true;
}
static uint32_t
@ -561,6 +564,8 @@ populate_cs_prog_key(struct anv_pipeline_stage *stage,
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
stage->key.base.uses_inline_push_addr = device->info->verx10 >= 125;
}
static void

View file

@ -223,6 +223,9 @@ struct intel_perf_query_result;
#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
/* Defines where various values are defined in the inline parameter register.
*/
#define ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET (0)
#define ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET (8)
/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
@ -4508,8 +4511,6 @@ anv_cmd_buffer_gfx_push_constants_state_address(struct anv_cmd_buffer *cmd_buffe
struct anv_state state)
{
return anv_state_pool_state_address(
cmd_buffer->device->info->verx10 >= 125 ?
&cmd_buffer->device->general_state_pool :
&cmd_buffer->device->dynamic_state_pool, state);
}

View file

@ -391,6 +391,10 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
uint64_t indirect_addr64 = anv_address_physical(indirect_addr);
uint64_t push_addr64 = anv_address_physical(
anv_state_pool_state_address(&cmd_buffer->device->general_state_pool,
comp_state->base.push_constants_state));
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch_size,
/* HSD 14016252163: Use of Morton walk order (and batching using a batch
@ -405,8 +409,6 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
TG_BATCH_1,
#endif
.MessageSIMD = dispatch_size,
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
.GenerateLocalID = prog_data->generate_local_id != 0,
.EmitLocal = prog_data->generate_local_id,
.WalkOrder = prog_data->walk_order,
@ -422,6 +424,8 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
&dispatch),
.EmitInlineParameter = prog_data->uses_inline_data,
.InlineData = {
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = UINT32_MAX,
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = indirect_addr64 & 0xffffffff,
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = indirect_addr64 >> 32,
@ -466,11 +470,13 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
num_workgroup_data[2] = groupCountZ;
}
uint64_t push_addr64 = anv_address_physical(
anv_state_pool_state_address(&cmd_buffer->device->general_state_pool,
comp_state->base.push_constants_state));
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
.GenerateLocalID = prog_data->generate_local_id != 0,
.EmitLocal = prog_data->generate_local_id,
.WalkOrder = prog_data->walk_order,
@ -491,6 +497,8 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
prog_data, &dispatch),
.EmitInlineParameter = prog_data->uses_inline_data,
.InlineData = {
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],

View file

@ -562,18 +562,27 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
}
#if GFX_VERx10 >= 125
static inline uint32_t
get_mesh_task_push_offset(struct anv_cmd_buffer *cmd_buffer,
const struct anv_push_range *range)
static inline uint64_t
get_mesh_task_push_addr64(struct anv_cmd_buffer *cmd_buffer,
const struct anv_graphics_pipeline *pipeline,
gl_shader_stage stage)
{
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
const struct anv_shader_bin *shader = pipeline->base.shaders[stage];
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
if (bind_map->push_ranges[0].length == 0)
return 0;
if (gfx_state->base.push_constants_state.alloc_size == 0) {
gfx_state->base.push_constants_state =
anv_cmd_buffer_gfx_push_constants(cmd_buffer);
}
return gfx_state->base.push_constants_state.offset + range->start * 32;
return anv_address_physical(
anv_address_add(
anv_cmd_buffer_gfx_push_constants_state_address(cmd_buffer,
gfx_state->base.push_constants_state),
bind_map->push_ranges[0].start * 32));
}
static void
@ -586,23 +595,23 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
if (dirty_stages & VK_SHADER_STAGE_TASK_BIT_EXT &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
const struct anv_shader_bin *shader = pipeline->base.shaders[MESA_SHADER_TASK];
const struct anv_push_range *range = &shader->bind_map.push_ranges[0];
uint64_t push_addr64 =
get_mesh_task_push_addr64(cmd_buffer, pipeline, MESA_SHADER_TASK);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TASK_SHADER_DATA), data) {
if (range->length > 0)
data.IndirectDataStartAddress = get_mesh_task_push_offset(cmd_buffer, range);
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff;
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32;
}
}
if (dirty_stages & VK_SHADER_STAGE_MESH_BIT_EXT &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
const struct anv_shader_bin *shader = pipeline->base.shaders[MESA_SHADER_MESH];
const struct anv_push_range *range = &shader->bind_map.push_ranges[0];
uint64_t push_addr64 =
get_mesh_task_push_addr64(cmd_buffer, pipeline, MESA_SHADER_MESH);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MESH_SHADER_DATA), data) {
if (range->length > 0)
data.IndirectDataStartAddress = get_mesh_task_push_offset(cmd_buffer, range);
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff;
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32;
}
}