diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index e18609f23db..1cdd2dc6341 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -1208,7 +1208,6 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, struct anv_state anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer) { - const struct intel_device_info *devinfo = cmd_buffer->device->info; const struct anv_push_constants *data = &cmd_buffer->state.gfx.base.push_constants; @@ -1222,10 +1221,6 @@ anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer) * the 64-byte aligned address of the indirect data." */ struct anv_state state = - devinfo->verx10 >= 125 ? - anv_cmd_buffer_alloc_general_state(cmd_buffer, - align(sizeof(struct anv_push_constants), 64), - 64) : anv_cmd_buffer_alloc_temporary_state(cmd_buffer, sizeof(struct anv_push_constants), 32 /* bottom 5 bits MBZ */); diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index 61b1786d824..68b1b2a76c7 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -124,9 +124,13 @@ anv_nir_compute_push_layout(nir_shader *nir, push_end = anv_drv_const_offset(cs.subgroup_id); } - /* Align push_start down to a 32B (for 3DSTATE_CONSTANT) or 64B (for - * 3DSTATE_(MESH|TASK)_SHADER_DATA) boundary and make it no larger than - * push_end (no push constants is indicated by push_start = UINT_MAX). + /* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no + * larger than push_end (no push constants is indicated by push_start = + * UINT_MAX). + * + * If we were to use + * 3DSTATE_(MESH|TASK)_SHADER_DATA::IndirectDataStartAddress we would need + * to align things to 64B. * * SKL PRMs, Volume 2d: Command Reference: Structures, * 3DSTATE_CONSTANT::Constant Buffer 0 Read Length: @@ -146,12 +150,8 @@ anv_nir_compute_push_layout(nir_shader *nir, * (unlike all Gfx stages) and so we can bound+align the allocation there * (see anv_cmd_buffer_cs_push_constants). */ - const unsigned push_alignment = - devinfo->verx10 >= 125 && (nir->info.stage == MESA_SHADER_TASK || - nir->info.stage == MESA_SHADER_MESH) ? - 64 : 32; push_start = MIN2(push_start, push_end); - push_start = ROUND_DOWN_TO(push_start, push_alignment); + push_start = ROUND_DOWN_TO(push_start, 32); /* For scalar, push data size needs to be aligned to a DWORD. */ const unsigned alignment = 4; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 22275252d4c..c00510cd4d5 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -424,6 +424,8 @@ populate_task_prog_key(struct anv_pipeline_stage *stage, memset(&stage->key, 0, sizeof(stage->key)); populate_base_prog_key(stage, device); + + stage->key.base.uses_inline_push_addr = true; } static void @@ -436,6 +438,7 @@ populate_mesh_prog_key(struct anv_pipeline_stage *stage, populate_base_prog_key(stage, device); stage->key.mesh.compact_mue = compact_mue; + stage->key.base.uses_inline_push_addr = true; } static uint32_t @@ -561,6 +564,8 @@ populate_cs_prog_key(struct anv_pipeline_stage *stage, memset(&stage->key, 0, sizeof(stage->key)); populate_base_prog_key(stage, device); + + stage->key.base.uses_inline_push_addr = device->info->verx10 >= 125; } static void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 7ac64495691..8587a76d004 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -223,6 +223,9 @@ struct intel_perf_query_result; #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1) +/* Defines where various values are defined in the inline parameter register. + */ +#define ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET (0) #define ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET (8) /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64 @@ -4508,8 +4511,6 @@ anv_cmd_buffer_gfx_push_constants_state_address(struct anv_cmd_buffer *cmd_buffe struct anv_state state) { return anv_state_pool_state_address( - cmd_buffer->device->info->verx10 >= 125 ? - &cmd_buffer->device->general_state_pool : &cmd_buffer->device->dynamic_state_pool, state); } diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index d267658a146..9e5e299e645 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -391,6 +391,10 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer, uint64_t indirect_addr64 = anv_address_physical(indirect_addr); + uint64_t push_addr64 = anv_address_physical( + anv_state_pool_state_address(&cmd_buffer->device->general_state_pool, + comp_state->base.push_constants_state)); + struct GENX(COMPUTE_WALKER_BODY) body = { .SIMDSize = dispatch_size, /* HSD 14016252163: Use of Morton walk order (and batching using a batch @@ -405,8 +409,6 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer, TG_BATCH_1, #endif .MessageSIMD = dispatch_size, - .IndirectDataStartAddress = comp_state->base.push_constants_state.offset, - .IndirectDataLength = comp_state->base.push_constants_state.alloc_size, .GenerateLocalID = prog_data->generate_local_id != 0, .EmitLocal = prog_data->generate_local_id, .WalkOrder = prog_data->walk_order, @@ -422,6 +424,8 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer, &dispatch), .EmitInlineParameter = prog_data->uses_inline_data, .InlineData = { + [ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff, + [ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32, [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = UINT32_MAX, [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = indirect_addr64 & 0xffffffff, [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = indirect_addr64 >> 32, @@ -466,11 +470,13 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer, num_workgroup_data[2] = groupCountZ; } + uint64_t push_addr64 = anv_address_physical( + anv_state_pool_state_address(&cmd_buffer->device->general_state_pool, + comp_state->base.push_constants_state)); + struct GENX(COMPUTE_WALKER_BODY) body = { .SIMDSize = dispatch.simd_size / 16, .MessageSIMD = dispatch.simd_size / 16, - .IndirectDataStartAddress = comp_state->base.push_constants_state.offset, - .IndirectDataLength = comp_state->base.push_constants_state.alloc_size, .GenerateLocalID = prog_data->generate_local_id != 0, .EmitLocal = prog_data->generate_local_id, .WalkOrder = prog_data->walk_order, @@ -491,6 +497,8 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer, prog_data, &dispatch), .EmitInlineParameter = prog_data->uses_inline_data, .InlineData = { + [ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff, + [ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32, [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0], [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1], [ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2], diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index fa8a8986593..0bbcbbe9c3e 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -562,18 +562,27 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer, } #if GFX_VERx10 >= 125 -static inline uint32_t -get_mesh_task_push_offset(struct anv_cmd_buffer *cmd_buffer, - const struct anv_push_range *range) +static inline uint64_t +get_mesh_task_push_addr64(struct anv_cmd_buffer *cmd_buffer, + const struct anv_graphics_pipeline *pipeline, + gl_shader_stage stage) { struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx; + const struct anv_shader_bin *shader = pipeline->base.shaders[stage]; + const struct anv_pipeline_bind_map *bind_map = &shader->bind_map; + if (bind_map->push_ranges[0].length == 0) + return 0; if (gfx_state->base.push_constants_state.alloc_size == 0) { gfx_state->base.push_constants_state = anv_cmd_buffer_gfx_push_constants(cmd_buffer); } - return gfx_state->base.push_constants_state.offset + range->start * 32; + return anv_address_physical( + anv_address_add( + anv_cmd_buffer_gfx_push_constants_state_address(cmd_buffer, + gfx_state->base.push_constants_state), + bind_map->push_ranges[0].start * 32)); } static void @@ -586,23 +595,23 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer, if (dirty_stages & VK_SHADER_STAGE_TASK_BIT_EXT && anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { - const struct anv_shader_bin *shader = pipeline->base.shaders[MESA_SHADER_TASK]; - const struct anv_push_range *range = &shader->bind_map.push_ranges[0]; + uint64_t push_addr64 = + get_mesh_task_push_addr64(cmd_buffer, pipeline, MESA_SHADER_TASK); anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TASK_SHADER_DATA), data) { - if (range->length > 0) - data.IndirectDataStartAddress = get_mesh_task_push_offset(cmd_buffer, range); + data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff; + data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32; } } if (dirty_stages & VK_SHADER_STAGE_MESH_BIT_EXT && anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { - const struct anv_shader_bin *shader = pipeline->base.shaders[MESA_SHADER_MESH]; - const struct anv_push_range *range = &shader->bind_map.push_ranges[0]; + uint64_t push_addr64 = + get_mesh_task_push_addr64(cmd_buffer, pipeline, MESA_SHADER_MESH); anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MESH_SHADER_DATA), data) { - if (range->length > 0) - data.IndirectDataStartAddress = get_mesh_task_push_offset(cmd_buffer, range); + data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 0] = push_addr64 & 0xffffffff; + data.InlineData[ANV_INLINE_PARAM_PUSH_ADDRESS_OFFSET / 4 + 1] = push_addr64 >> 32; } }