From 1ebc14bcb9c74f8404d3948fd86f398a933c3460 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Mon, 27 Apr 2026 21:27:31 -0700 Subject: [PATCH] brw: Stop tracking inline parameter usage in prog_key/prog_data Since inline parameter is the last field of the thread payload, the backend can always assume they may exist. They won't affect the position of other payload fields and the register allocator will reuse any unused space. In Anv, also update EmitInlineParameter for Task/Mesh/CS to reflect previous changes in inline parameter setup. Remove/Update some stale comments since we are here. Finally, remove the prog_key/prog_data bits that tracked whether inline data or a push address was needed. Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw/brw_compile_cs.cpp | 3 --- src/intel/compiler/brw/brw_compile_mesh.cpp | 6 ----- src/intel/compiler/brw/brw_compiler.h | 9 +------- src/intel/compiler/brw/brw_from_nir.cpp | 6 +++-- src/intel/compiler/brw/brw_nir.c | 21 ------------------ src/intel/compiler/brw/brw_nir.h | 2 -- src/intel/compiler/brw/brw_thread_payload.cpp | 22 ++++++------------- src/intel/compiler/jay/jay_from_nir.c | 1 - src/intel/compiler/jay/jay_prog_data.c | 3 --- src/intel/vulkan/anv_shader_compile.c | 9 -------- src/intel/vulkan/genX_shader.c | 16 +++----------- 11 files changed, 15 insertions(+), 83 deletions(-) diff --git a/src/intel/compiler/brw/brw_compile_cs.cpp b/src/intel/compiler/brw/brw_compile_cs.cpp index 1e48151807d..4763a9019c4 100644 --- a/src/intel/compiler/brw/brw_compile_cs.cpp +++ b/src/intel/compiler/brw/brw_compile_cs.cpp @@ -131,9 +131,6 @@ brw_compile_cs(const struct brw_compiler *compiler, params->base.source_hash); brw_prog_data_init(&prog_data->base, ¶ms->base); - prog_data->uses_inline_data = brw_nir_uses_inline_data(nir) || - key->base.uses_inline_push_addr; - assert(compiler->devinfo->verx10 >= 125 || !prog_data->uses_inline_data); if (!nir->info.workgroup_size_variable) { prog_data->local_size[0] = nir->info.workgroup_size[0]; diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp index 3d2077bb710..d5aff0e0c33 100644 --- a/src/intel/compiler/brw/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw/brw_compile_mesh.cpp @@ -319,9 +319,6 @@ brw_compile_task(const struct brw_compiler *compiler, prog_data->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); - prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) || - key->base.uses_inline_push_addr; - brw_postprocess_nir_opts(pt); brw_simd_selection_state simd_state{ @@ -1036,9 +1033,6 @@ brw_compile_mesh(const struct brw_compiler *compiler, prog_data->autostrip_enable = brw_mesh_autostrip_enable(compiler, nir, &prog_data->map); - prog_data->base.uses_inline_data = brw_nir_uses_inline_data(nir) || - key->base.uses_inline_push_addr; - brw_postprocess_nir_opts(pt); const struct brw_lower_urb_cb_data cb_data = { diff --git a/src/intel/compiler/brw/brw_compiler.h b/src/intel/compiler/brw/brw_compiler.h index 99a40e08c8a..98b4097d5a0 100644 --- a/src/intel/compiler/brw/brw_compiler.h +++ b/src/intel/compiler/brw/brw_compiler.h @@ -220,8 +220,6 @@ struct brw_base_prog_key { enum brw_robustness_flags robust_flags:2; - bool uses_inline_push_addr:1; - enum intel_vue_layout vue_layout:2; /** @@ -233,7 +231,7 @@ struct brw_base_prog_key { enum brw_divergent_atomics_flags divergent_atomics_flags:2; - uint32_t padding:24; + uint32_t padding:25; }; /** @@ -852,11 +850,6 @@ struct brw_cs_prog_data { unsigned prog_spilled; bool uses_barrier; - bool uses_inline_data; - /** Whether inline push data is used to provide a 64bit pointer to push - * constants - */ - bool uses_inline_push_addr; bool uses_btd_stack_ids; bool uses_systolic; uint8_t generate_local_id; diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 8882c1a3e9a..40f18a0517b 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -5235,8 +5235,10 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_load_push_data_intel: - case nir_intrinsic_load_inline_data_intel: { + case nir_intrinsic_load_inline_data_intel: + assert(brw_shader_stage_has_inline_data(ntb.devinfo, ntb.s.stage)); + FALLTHROUGH; + case nir_intrinsic_load_push_data_intel: { /* Offsets are in bytes but they should always aligned to * the type size */ diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index d89383f1a7e..1168f9b66c0 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -3568,27 +3568,6 @@ brw_nir_get_var_type(const struct nir_shader *nir, nir_variable *var) return type; } -bool -brw_nir_uses_inline_data(nir_shader *shader) -{ - nir_foreach_function_impl(impl, shader) { - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_inline_data_intel) - continue; - - return true; - } - } - } - - return false; -} - /** * Move load_interpolated_input with simple (payload-based) barycentric modes * to the top of the program so we don't emit multiple PLNs for the same input. diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index e57a875d308..6cefd4be113 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -370,8 +370,6 @@ brw_nir_no_indirect_mask(mesa_shader_stage stage) return indirect_mask; } -bool brw_nir_uses_inline_data(nir_shader *shader); - nir_variable * brw_nir_find_complete_variable_with_location(nir_shader *shader, nir_variable_mode mode, diff --git a/src/intel/compiler/brw/brw_thread_payload.cpp b/src/intel/compiler/brw/brw_thread_payload.cpp index f7c908a11f0..ab06bc9845f 100644 --- a/src/intel/compiler/brw/brw_thread_payload.cpp +++ b/src/intel/compiler/brw/brw_thread_payload.cpp @@ -311,8 +311,6 @@ brw_cs_thread_payload::brw_cs_thread_payload(const brw_shader &v) unsigned r = reg_unit(v.devinfo); - prog_data->uses_inline_push_addr = v.key->uses_inline_push_addr; - /* See nir_setup_uniforms for subgroup_id in earlier versions. */ if (v.devinfo->verx10 >= 125) { subgroup_id_ = brw_ud1_grf(0, 2); @@ -332,14 +330,13 @@ brw_cs_thread_payload::brw_cs_thread_payload(const brw_shader &v) if (prog_data->uses_btd_stack_ids) r += reg_unit(v.devinfo); - if (v.stage == MESA_SHADER_COMPUTE && - (prog_data->uses_inline_data || - prog_data->uses_inline_push_addr)) { + if (v.stage == MESA_SHADER_COMPUTE) { + /* Since it is the last field of the thread payload, always expect + * inline parameters. Register allocator will reuse any unused space. + */ inline_parameter = brw_ud1_grf(r, 0); r += reg_unit(v.devinfo); } - } else { - assert(!prog_data->uses_inline_push_addr); } num_regs = r; @@ -371,9 +368,6 @@ brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v) * R3: Inline Parameter * * Local_ID.X values are 16 bits. - * - * Inline parameter is optional but always present since we use it to pass - * the address to descriptors. */ const brw_builder bld = brw_builder(&v); @@ -409,11 +403,9 @@ brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v) if (v.devinfo->ver < 20 && v.dispatch_width == 32) r += reg_unit(v.devinfo); - struct brw_cs_prog_data *prog_data = brw_cs_prog_data(v.prog_data); - if (prog_data->uses_inline_data || prog_data->uses_inline_push_addr) { - inline_parameter = brw_ud1_grf(r, 0); - r += reg_unit(v.devinfo); - } + /* See comment on inline parameters in the CS handling. */ + inline_parameter = brw_ud1_grf(r, 0); + r += reg_unit(v.devinfo); num_regs = r; } diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index df5b90b7b05..e9989236c60 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -1475,7 +1475,6 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr) case nir_intrinsic_load_inline_data_intel: { assert(cs && f->is_entrypoint && "todo: this needs ABI"); - b->shader->prog_data->cs.uses_inline_data = true; unsigned offset = nir_intrinsic_base(intr) / 4; unsigned nr = jay_num_values(dst); diff --git a/src/intel/compiler/jay/jay_prog_data.c b/src/intel/compiler/jay/jay_prog_data.c index bc56c13dae6..75b6f249253 100644 --- a/src/intel/compiler/jay/jay_prog_data.c +++ b/src/intel/compiler/jay/jay_prog_data.c @@ -556,9 +556,6 @@ jay_populate_prog_data(const struct intel_device_info *devinfo, populate_fs_prog_data(nir, devinfo, &key->fs, &prog_data->fs, NULL /* TODO: mue_map */, per_primitive_offsets); - } else if (mesa_shader_stage_is_compute(nir->info.stage)) { - prog_data->cs.uses_inline_push_addr = key->base.uses_inline_push_addr; - prog_data->cs.uses_inline_data |= key->base.uses_inline_push_addr; } if (nir->info.stage == MESA_SHADER_VERTEX || diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c index f8c27ac217e..a4347a9c7d3 100644 --- a/src/intel/vulkan/anv_shader_compile.c +++ b/src/intel/vulkan/anv_shader_compile.c @@ -385,8 +385,6 @@ populate_task_prog_key(struct brw_task_prog_key *key, VkShaderStageFlags link_stages) { populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages); - - key->base.uses_inline_push_addr = true; } static void @@ -397,8 +395,6 @@ populate_mesh_prog_key(struct brw_mesh_prog_key *key, VkShaderStageFlags link_stages) { populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages); - - key->base.uses_inline_push_addr = true; } static bool @@ -575,12 +571,7 @@ populate_cs_prog_key(struct brw_cs_prog_key *key, const struct vk_physical_device *device, const struct vk_pipeline_robustness_state *rs) { - const struct anv_physical_device *pdevice = - container_of(device, const struct anv_physical_device, vk); - populate_base_prog_key(&key->base, device, rs); - - key->base.uses_inline_push_addr = pdevice->info.verx10 >= 125; } static void diff --git a/src/intel/vulkan/genX_shader.c b/src/intel/vulkan/genX_shader.c index 5f16826709b..6cb46bc7acf 100644 --- a/src/intel/vulkan/genX_shader.c +++ b/src/intel/vulkan/genX_shader.c @@ -914,12 +914,7 @@ emit_task_shader(struct anv_batch *batch, task_dispatch.group_size, task_dispatch.simd_size); - /* - * 3DSTATE_TASK_SHADER_DATA.InlineData[0:1] will be used for an address - * of a buffer with push constants and descriptor set table and - * InlineData[2:7] will be used for first few push constants. - */ - task.EmitInlineParameter = true; + task.EmitInlineParameter = shader->bind_map.inline_dwords_count > 0; task.IndirectDataLength = align(shader->bind_map.push_ranges[0].length * 32, 64); task.XP0Required = task_prog_data->uses_drawid; @@ -1018,12 +1013,7 @@ emit_mesh_shader(struct anv_batch *batch, mesh_dispatch.group_size, mesh_dispatch.simd_size); - /* - * 3DSTATE_MESH_SHADER_DATA.InlineData[0:1] will be used for an address - * of a buffer with push constants and descriptor set table and - * InlineData[2:7] will be used for first few push constants. - */ - mesh.EmitInlineParameter = true; + mesh.EmitInlineParameter = shader->bind_map.inline_dwords_count > 0; mesh.IndirectDataLength = align(shader->bind_map.push_ranges[0].length * 32, 64); mesh.XP0Required = mesh_prog_data->uses_drawid; @@ -1209,7 +1199,7 @@ emit_cs_shader(struct anv_batch *batch, .RegistersPerThread = ptl_register_blocks(cs_prog_data->base.grf_used), #endif }, - .EmitInlineParameter = cs_prog_data->uses_inline_push_addr, + .EmitInlineParameter = shader->bind_map.inline_dwords_count > 0, }; assert(ARRAY_SIZE(shader->cs.gfx125.compute_walker_body) >=