mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 05:50:11 +01:00
anv: use A64 messages for push constants loads on Gfx12.5+
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32895>
This commit is contained in:
parent
5c17299084
commit
a8b84e1898
6 changed files with 49 additions and 31 deletions
|
|
@ -1208,7 +1208,6 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
|
|||
struct anv_state
|
||||
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
const struct anv_push_constants *data =
|
||||
&cmd_buffer->state.gfx.base.push_constants;
|
||||
|
||||
|
|
@ -1222,10 +1221,6 @@ anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
|||
* the 64-byte aligned address of the indirect data."
|
||||
*/
|
||||
struct anv_state state =
|
||||
devinfo->verx10 >= 125 ?
|
||||
anv_cmd_buffer_alloc_general_state(cmd_buffer,
|
||||
align(sizeof(struct anv_push_constants), 64),
|
||||
64) :
|
||||
anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
|
||||
sizeof(struct anv_push_constants),
|
||||
32 /* bottom 5 bits MBZ */);
|
||||
|
|
|
|||
|
|
@ -124,9 +124,13 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
push_end = anv_drv_const_offset(cs.subgroup_id);
|
||||
}
|
||||
|
||||
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) or 64B (for
|
||||
* 3DSTATE_(MESH|TASK)_SHADER_DATA) boundary and make it no larger than
|
||||
* push_end (no push constants is indicated by push_start = UINT_MAX).
|
||||
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
|
||||
* larger than push_end (no push constants is indicated by push_start =
|
||||
* UINT_MAX).
|
||||
*
|
||||
* If we were to use
|
||||
* 3DSTATE_(MESH|TASK)_SHADER_DATA::IndirectDataStartAddress we would need
|
||||
* to align things to 64B.
|
||||
*
|
||||
* SKL PRMs, Volume 2d: Command Reference: Structures,
|
||||
* 3DSTATE_CONSTANT::Constant Buffer 0 Read Length:
|
||||
|
|
@ -146,12 +150,8 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
* (unlike all Gfx stages) and so we can bound+align the allocation there
|
||||
* (see anv_cmd_buffer_cs_push_constants).
|
||||
*/
|
||||
const unsigned push_alignment =
|
||||
devinfo->verx10 >= 125 && (nir->info.stage == MESA_SHADER_TASK ||
|
||||
nir->info.stage == MESA_SHADER_MESH) ?
|
||||
64 : 32;
|
||||
push_start = MIN2(push_start, push_end);
|
||||
push_start = ROUND_DOWN_TO(push_start, push_alignment);
|
||||
push_start = ROUND_DOWN_TO(push_start, 32);
|
||||
|
||||
/* For scalar, push data size needs to be aligned to a DWORD. */
|
||||
const unsigned alignment = 4;
|
||||
|
|
|
|||
|
|
@ -424,6 +424,8 @@ populate_task_prog_key(struct anv_pipeline_stage *stage,
|
|||
memset(&stage->key, 0, sizeof(stage->key));
|
||||
|
||||
populate_base_prog_key(stage, device);
|
||||
|
||||
stage->key.base.uses_inline_push_addr = true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -436,6 +438,7 @@ populate_mesh_prog_key(struct anv_pipeline_stage *stage,
|
|||
populate_base_prog_key(stage, device);
|
||||
|
||||
stage->key.mesh.compact_mue = compact_mue;
|
||||
stage->key.base.uses_inline_push_addr = true;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -561,6 +564,8 @@ populate_cs_prog_key(struct anv_pipeline_stage *stage,
|
|||
memset(&stage->key, 0, sizeof(stage->key));
|
||||
|
||||
populate_base_prog_key(stage, device);
|
||||
|
||||
stage->key.base.uses_inline_push_addr = device->info->verx10 >= 125;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -223,6 +223,9 @@ struct intel_perf_query_result;
|
|||
|
||||
#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
|
||||
|
||||
/* Defines where various values are defined in the inline parameter register.
|
||||
*/
|
||||
#define ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET (0)
|
||||
#define ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET (8)
|
||||
|
||||
/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
|
||||
|
|
@ -4508,8 +4511,6 @@ anv_cmd_buffer_gfx_push_constants_state_address(struct anv_cmd_buffer *cmd_buffe
|
|||
struct anv_state state)
|
||||
{
|
||||
return anv_state_pool_state_address(
|
||||
cmd_buffer->device->info->verx10 >= 125 ?
|
||||
&cmd_buffer->device->general_state_pool :
|
||||
&cmd_buffer->device->dynamic_state_pool, state);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -391,6 +391,10 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
uint64_t indirect_addr64 = anv_address_physical(indirect_addr);
|
||||
|
||||
uint64_t push_addr64 = anv_address_physical(
|
||||
anv_state_pool_state_address(&cmd_buffer->device->general_state_pool,
|
||||
comp_state->base.push_constants_state));
|
||||
|
||||
struct GENX(COMPUTE_WALKER_BODY) body = {
|
||||
.SIMDSize = dispatch_size,
|
||||
/* HSD 14016252163: Use of Morton walk order (and batching using a batch
|
||||
|
|
@ -405,8 +409,6 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
TG_BATCH_1,
|
||||
#endif
|
||||
.MessageSIMD = dispatch_size,
|
||||
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
|
||||
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
|
||||
.GenerateLocalID = prog_data->generate_local_id != 0,
|
||||
.EmitLocal = prog_data->generate_local_id,
|
||||
.WalkOrder = prog_data->walk_order,
|
||||
|
|
@ -422,6 +424,8 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
&dispatch),
|
||||
.EmitInlineParameter = prog_data->uses_inline_data,
|
||||
.InlineData = {
|
||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
|
||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
|
||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = UINT32_MAX,
|
||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = indirect_addr64 & 0xffffffff,
|
||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = indirect_addr64 >> 32,
|
||||
|
|
@ -466,11 +470,13 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
num_workgroup_data[2] = groupCountZ;
|
||||
}
|
||||
|
||||
uint64_t push_addr64 = anv_address_physical(
|
||||
anv_state_pool_state_address(&cmd_buffer->device->general_state_pool,
|
||||
comp_state->base.push_constants_state));
|
||||
|
||||
struct GENX(COMPUTE_WALKER_BODY) body = {
|
||||
.SIMDSize = dispatch.simd_size / 16,
|
||||
.MessageSIMD = dispatch.simd_size / 16,
|
||||
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
|
||||
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
|
||||
.GenerateLocalID = prog_data->generate_local_id != 0,
|
||||
.EmitLocal = prog_data->generate_local_id,
|
||||
.WalkOrder = prog_data->walk_order,
|
||||
|
|
@ -491,6 +497,8 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
prog_data, &dispatch),
|
||||
.EmitInlineParameter = prog_data->uses_inline_data,
|
||||
.InlineData = {
|
||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff,
|
||||
[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32,
|
||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
|
||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
|
||||
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
|
||||
|
|
|
|||
|
|
@ -562,18 +562,27 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static inline uint32_t
|
||||
get_mesh_task_push_offset(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_push_range *range)
|
||||
static inline uint64_t
|
||||
get_mesh_task_push_addr64(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_graphics_pipeline *pipeline,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
|
||||
const struct anv_shader_bin *shader = pipeline->base.shaders[stage];
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
if (bind_map->push_ranges[0].length == 0)
|
||||
return 0;
|
||||
|
||||
if (gfx_state->base.push_constants_state.alloc_size == 0) {
|
||||
gfx_state->base.push_constants_state =
|
||||
anv_cmd_buffer_gfx_push_constants(cmd_buffer);
|
||||
}
|
||||
|
||||
return gfx_state->base.push_constants_state.offset + range->start * 32;
|
||||
return anv_address_physical(
|
||||
anv_address_add(
|
||||
anv_cmd_buffer_gfx_push_constants_state_address(cmd_buffer,
|
||||
gfx_state->base.push_constants_state),
|
||||
bind_map->push_ranges[0].start * 32));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -586,23 +595,23 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
if (dirty_stages & VK_SHADER_STAGE_TASK_BIT_EXT &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
|
||||
const struct anv_shader_bin *shader = pipeline->base.shaders[MESA_SHADER_TASK];
|
||||
const struct anv_push_range *range = &shader->bind_map.push_ranges[0];
|
||||
uint64_t push_addr64 =
|
||||
get_mesh_task_push_addr64(cmd_buffer, pipeline, MESA_SHADER_TASK);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TASK_SHADER_DATA), data) {
|
||||
if (range->length > 0)
|
||||
data.IndirectDataStartAddress = get_mesh_task_push_offset(cmd_buffer, range);
|
||||
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff;
|
||||
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty_stages & VK_SHADER_STAGE_MESH_BIT_EXT &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
|
||||
const struct anv_shader_bin *shader = pipeline->base.shaders[MESA_SHADER_MESH];
|
||||
const struct anv_push_range *range = &shader->bind_map.push_ranges[0];
|
||||
uint64_t push_addr64 =
|
||||
get_mesh_task_push_addr64(cmd_buffer, pipeline, MESA_SHADER_MESH);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MESH_SHADER_DATA), data) {
|
||||
if (range->length > 0)
|
||||
data.IndirectDataStartAddress = get_mesh_task_push_offset(cmd_buffer, range);
|
||||
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff;
|
||||
data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue