mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-18 09:28:06 +02:00
Push constants on bindless stages of Gfx12.5+ don't get the data delivered in the registers automatically. Instead the shader needs to load the data with SEND messages. Those stages do get a single InlineParameter 32B block of data delivered into the EU. We can use that to promote some of the push constant data that has to be pulled otherwise. The driver will try to promote all push constant data (app + driver values) if it can, if it can't it'll try to promote only the driver values (usually a shader will only use a few driver values). If even the drivers values won't fit, give up and don't use the inline parameter at all. LNL internal fossil-db: Totals from 315738 (20.08% of 1572649) affected shaders: Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00% CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02% Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00% Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03% Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13% Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28% Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00% Max dispatch width: 8298224 -> 8299168 (+0.01%) Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08% Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
141 lines
4.4 KiB
C
141 lines
4.4 KiB
C
/* Copyright © 2025 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "anv_private.h"
|
|
#include "anv_nir.h"
|
|
#include "nir/nir_builder.h"
|
|
|
|
static bool
|
|
lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin)
|
|
{
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
|
|
|
/* Any constant-offset load_constant instructions should have been removed
|
|
* by constant folding.
|
|
*/
|
|
assert(!nir_src_is_const(intrin->src[0]));
|
|
nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
|
|
nir_intrinsic_base(intrin));
|
|
|
|
unsigned load_size = intrin->def.num_components *
|
|
intrin->def.bit_size / 8;
|
|
|
|
assert(load_size < b->shader->constant_data_size);
|
|
unsigned max_offset = b->shader->constant_data_size - load_size;
|
|
offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
|
|
|
|
nir_def *const_data_addr = nir_pack_64_2x32_split(b,
|
|
nir_iadd(b,
|
|
nir_load_reloc_const_intel(b, INTEL_SHADER_RELOC_CONST_DATA_ADDR_LOW),
|
|
offset),
|
|
nir_load_reloc_const_intel(b, INTEL_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
|
|
|
|
nir_def *data =
|
|
nir_load_global_constant(b, intrin->def.num_components,
|
|
intrin->def.bit_size,
|
|
const_data_addr);
|
|
|
|
nir_def_replace(&intrin->def, data);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin)
|
|
{
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
|
|
|
nir_def *base_workgroup_id =
|
|
anv_load_driver_uniform(b, 3, cs.base_workgroup[0]);
|
|
nir_def_replace(&intrin->def, base_workgroup_id);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_subgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
const struct anv_physical_device *pdevice)
|
|
{
|
|
if (pdevice->info.verx10 >= 125)
|
|
return false;
|
|
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
|
nir_def_replace(&intrin->def,
|
|
anv_load_driver_uniform(b, 1, cs.subgroup_id));
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin)
|
|
{
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
|
|
|
nir_def *rq_globals = anv_load_driver_uniform(b, 1, ray_query_globals);
|
|
nir_def_replace(&intrin->def, rq_globals);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_driver_values(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|
{
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_load_constant:
|
|
return lower_load_constant(b, intrin);
|
|
case nir_intrinsic_load_base_workgroup_id:
|
|
return lower_base_workgroup_id(b, intrin);
|
|
case nir_intrinsic_load_subgroup_id:
|
|
return lower_subgroup_id(b, intrin, data);
|
|
case nir_intrinsic_load_ray_query_global_intel:
|
|
return lower_ray_query_globals(b, intrin);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
lower_num_workgroups(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
|
{
|
|
if (intrin->intrinsic != nir_intrinsic_load_num_workgroups)
|
|
return false;
|
|
|
|
/* For those stages, HW will generate values through payload registers. */
|
|
if (mesa_shader_stage_is_mesh(b->shader->info.stage))
|
|
return false;
|
|
|
|
b->cursor = nir_before_instr(&intrin->instr);
|
|
nir_def *num_workgroups =
|
|
anv_load_driver_uniform(b, 3, cs.num_workgroups[0]);
|
|
|
|
nir_def *num_workgroups_indirect;
|
|
nir_push_if(b, nir_ieq_imm(b, nir_channel(b, num_workgroups, 0), UINT32_MAX));
|
|
{
|
|
nir_def *addr = nir_pack_64_2x32_split(b,
|
|
nir_channel(b, num_workgroups, 1),
|
|
nir_channel(b, num_workgroups, 2));
|
|
num_workgroups_indirect = nir_load_global_constant(b, 3, 32, addr);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
|
|
num_workgroups = nir_if_phi(b, num_workgroups_indirect, num_workgroups);
|
|
nir_def_replace(&intrin->def, num_workgroups);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
anv_nir_lower_driver_values(nir_shader *shader,
|
|
const struct anv_physical_device *pdevice)
|
|
{
|
|
bool progress = nir_shader_intrinsics_pass(shader,
|
|
lower_driver_values,
|
|
nir_metadata_control_flow,
|
|
(void *)pdevice);
|
|
progress |= nir_shader_intrinsics_pass(shader,
|
|
lower_num_workgroups,
|
|
nir_metadata_none,
|
|
(void *)pdevice);
|
|
return progress;
|
|
}
|