mesa/src/intel/vulkan/anv_nir_lower_driver_values.c
Lionel Landwerlin 487586fefa anv: implement inline parameter promotion from push constants
Push constants on bindless stages of Gfx12.5+ don't get the data
delivered in the registers automatically. Instead the shader needs to
load the data with SEND messages.

Those stages do get a single InlineParameter 32B block of data
delivered into the EU. We can use that to promote some of the push
constant data that has to be pulled otherwise.

The driver will try to promote all push constant data (app + driver
values) if it can, if it can't it'll try to promote only the driver
values (usually a shader will only use a few driver values). If even
the drivers values won't fit, give up and don't use the inline
parameter at all.

LNL internal fossil-db:

Totals from 315738 (20.08% of 1572649) affected shaders:
Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00%
CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02%
Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00%
Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03%
Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13%
Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28%
Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00%
Max dispatch width: 8298224 -> 8299168 (+0.01%)
Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08%

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
2026-02-25 10:44:09 +00:00

141 lines
4.4 KiB
C

/* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "anv_private.h"
#include "anv_nir.h"
#include "nir/nir_builder.h"
static bool
lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin)
{
b->cursor = nir_before_instr(&intrin->instr);
/* Any constant-offset load_constant instructions should have been removed
* by constant folding.
*/
assert(!nir_src_is_const(intrin->src[0]));
nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
nir_intrinsic_base(intrin));
unsigned load_size = intrin->def.num_components *
intrin->def.bit_size / 8;
assert(load_size < b->shader->constant_data_size);
unsigned max_offset = b->shader->constant_data_size - load_size;
offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
nir_def *const_data_addr = nir_pack_64_2x32_split(b,
nir_iadd(b,
nir_load_reloc_const_intel(b, INTEL_SHADER_RELOC_CONST_DATA_ADDR_LOW),
offset),
nir_load_reloc_const_intel(b, INTEL_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
nir_def *data =
nir_load_global_constant(b, intrin->def.num_components,
intrin->def.bit_size,
const_data_addr);
nir_def_replace(&intrin->def, data);
return true;
}
static bool
lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin)
{
b->cursor = nir_before_instr(&intrin->instr);
nir_def *base_workgroup_id =
anv_load_driver_uniform(b, 3, cs.base_workgroup[0]);
nir_def_replace(&intrin->def, base_workgroup_id);
return true;
}
static bool
lower_subgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
const struct anv_physical_device *pdevice)
{
if (pdevice->info.verx10 >= 125)
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_def_replace(&intrin->def,
anv_load_driver_uniform(b, 1, cs.subgroup_id));
return true;
}
static bool
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin)
{
b->cursor = nir_before_instr(&intrin->instr);
nir_def *rq_globals = anv_load_driver_uniform(b, 1, ray_query_globals);
nir_def_replace(&intrin->def, rq_globals);
return true;
}
static bool
lower_driver_values(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
switch (intrin->intrinsic) {
case nir_intrinsic_load_constant:
return lower_load_constant(b, intrin);
case nir_intrinsic_load_base_workgroup_id:
return lower_base_workgroup_id(b, intrin);
case nir_intrinsic_load_subgroup_id:
return lower_subgroup_id(b, intrin, data);
case nir_intrinsic_load_ray_query_global_intel:
return lower_ray_query_globals(b, intrin);
default:
return false;
}
}
static bool
lower_num_workgroups(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
if (intrin->intrinsic != nir_intrinsic_load_num_workgroups)
return false;
/* For those stages, HW will generate values through payload registers. */
if (mesa_shader_stage_is_mesh(b->shader->info.stage))
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_def *num_workgroups =
anv_load_driver_uniform(b, 3, cs.num_workgroups[0]);
nir_def *num_workgroups_indirect;
nir_push_if(b, nir_ieq_imm(b, nir_channel(b, num_workgroups, 0), UINT32_MAX));
{
nir_def *addr = nir_pack_64_2x32_split(b,
nir_channel(b, num_workgroups, 1),
nir_channel(b, num_workgroups, 2));
num_workgroups_indirect = nir_load_global_constant(b, 3, 32, addr);
}
nir_pop_if(b, NULL);
num_workgroups = nir_if_phi(b, num_workgroups_indirect, num_workgroups);
nir_def_replace(&intrin->def, num_workgroups);
return true;
}
bool
anv_nir_lower_driver_values(nir_shader *shader,
const struct anv_physical_device *pdevice)
{
bool progress = nir_shader_intrinsics_pass(shader,
lower_driver_values,
nir_metadata_control_flow,
(void *)pdevice);
progress |= nir_shader_intrinsics_pass(shader,
lower_num_workgroups,
nir_metadata_none,
(void *)pdevice);
return progress;
}