2015-09-11 15:53:53 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2016-10-11 18:26:24 +01:00
|
|
|
#ifndef ANV_NIR_H
|
|
|
|
|
#define ANV_NIR_H
|
2015-09-11 15:53:53 -07:00
|
|
|
|
2016-02-05 15:03:04 -08:00
|
|
|
#include "nir/nir.h"
|
2015-09-11 15:53:53 -07:00
|
|
|
#include "anv_private.h"
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
2024-08-05 20:33:38 +03:00
|
|
|
struct vk_pipeline_robustness_state;
|
|
|
|
|
|
2024-05-12 15:21:36 +03:00
|
|
|
#define anv_drv_const_offset(field) \
|
|
|
|
|
(offsetof(struct anv_push_constants, field))
|
anv: implement inline parameter promotion from push constants
Push constants on bindless stages of Gfx12.5+ don't get the data
delivered in the registers automatically. Instead the shader needs to
load the data with SEND messages.
Those stages do get a single InlineParameter 32B block of data
delivered into the EU. We can use that to promote some of the push
constant data that has to be pulled otherwise.
The driver will try to promote all push constant data (app + driver
values) if it can, if it can't it'll try to promote only the driver
values (usually a shader will only use a few driver values). If even
the drivers values won't fit, give up and don't use the inline
parameter at all.
LNL internal fossil-db:
Totals from 315738 (20.08% of 1572649) affected shaders:
Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00%
CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02%
Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00%
Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03%
Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13%
Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28%
Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00%
Max dispatch width: 8298224 -> 8299168 (+0.01%)
Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
2026-01-16 17:00:26 +02:00
|
|
|
#define anv_drv_const_dword(field) \
|
|
|
|
|
(offsetof(struct anv_push_constants, field) / 4)
|
2024-05-12 15:21:36 +03:00
|
|
|
#define anv_drv_const_size(field) \
|
|
|
|
|
(sizeof(((struct anv_push_constants *)0)->field))
|
anv: implement inline parameter promotion from push constants
Push constants on bindless stages of Gfx12.5+ don't get the data
delivered in the registers automatically. Instead the shader needs to
load the data with SEND messages.
Those stages do get a single InlineParameter 32B block of data
delivered into the EU. We can use that to promote some of the push
constant data that has to be pulled otherwise.
The driver will try to promote all push constant data (app + driver
values) if it can, if it can't it'll try to promote only the driver
values (usually a shader will only use a few driver values). If even
the drivers values won't fit, give up and don't use the inline
parameter at all.
LNL internal fossil-db:
Totals from 315738 (20.08% of 1572649) affected shaders:
Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00%
CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02%
Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00%
Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03%
Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13%
Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28%
Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00%
Max dispatch width: 8298224 -> 8299168 (+0.01%)
Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
2026-01-16 17:00:26 +02:00
|
|
|
#define anv_drv_const_includes_offset(field, offset) \
|
|
|
|
|
((offset) >= anv_drv_const_offset(field) && \
|
|
|
|
|
(offset) < (anv_drv_const_offset(field) + anv_drv_const_size(field)))
|
2024-05-12 15:21:36 +03:00
|
|
|
|
|
|
|
|
#define anv_load_driver_uniform(b, components, field) \
|
2025-12-01 13:00:46 +02:00
|
|
|
nir_load_push_data_intel(b, components, \
|
|
|
|
|
anv_drv_const_size(field) * 8, \
|
|
|
|
|
nir_imm_int(b, 0), \
|
|
|
|
|
.base = anv_drv_const_offset(field), \
|
|
|
|
|
.range = components * anv_drv_const_size(field))
|
|
|
|
|
/* Use ACCESS_NON_UNIFORM for indexed values since load_push_constant requires
|
|
|
|
|
* that the offset source is dynamically uniform in the subgroup which we
|
|
|
|
|
* cannot guarantee.
|
2025-04-16 08:54:14 +03:00
|
|
|
*/
|
2024-05-12 15:21:36 +03:00
|
|
|
#define anv_load_driver_uniform_indexed(b, components, field, idx) \
|
2025-12-01 13:00:46 +02:00
|
|
|
nir_load_push_data_intel(b, components, \
|
|
|
|
|
anv_drv_const_size(field[0]) * 8, \
|
|
|
|
|
nir_imul_imm(b, idx, \
|
|
|
|
|
anv_drv_const_size(field[0])), \
|
|
|
|
|
.base = anv_drv_const_offset(field), \
|
|
|
|
|
.range = anv_drv_const_size(field), \
|
|
|
|
|
.access = ACCESS_NON_UNIFORM)
|
2024-05-12 15:21:36 +03:00
|
|
|
|
2023-02-24 20:02:57 +02:00
|
|
|
/* This map is represent a mapping where the key is the NIR
|
|
|
|
|
* nir_intrinsic_resource_intel::block index. It allows mapping bindless UBOs
|
|
|
|
|
* accesses to descriptor entry.
|
|
|
|
|
*
|
|
|
|
|
* This map only temporary lives between the anv_nir_apply_pipeline_layout()
|
|
|
|
|
* and anv_nir_compute_push_layout() passes.
|
|
|
|
|
*/
|
|
|
|
|
struct anv_pipeline_push_map {
|
|
|
|
|
uint32_t block_count;
|
|
|
|
|
struct anv_pipeline_binding *block_to_descriptor;
|
|
|
|
|
};
|
|
|
|
|
|
2022-07-18 11:06:41 -05:00
|
|
|
bool anv_check_for_primitive_replication(struct anv_device *device,
|
|
|
|
|
VkShaderStageFlags stages,
|
|
|
|
|
nir_shader **shaders,
|
|
|
|
|
uint32_t view_mask);
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
|
2022-07-18 11:06:41 -05:00
|
|
|
bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
|
|
|
|
|
bool use_primitive_replication);
|
2017-03-22 15:37:17 -07:00
|
|
|
|
2019-04-18 12:08:57 -05:00
|
|
|
static inline nir_address_format
|
|
|
|
|
anv_nir_ssbo_addr_format(const struct anv_physical_device *pdevice,
|
2022-06-21 18:06:04 -07:00
|
|
|
enum brw_robustness_flags robust_flags)
|
2019-04-18 12:08:57 -05:00
|
|
|
{
|
2022-06-21 18:06:04 -07:00
|
|
|
if (robust_flags & BRW_ROBUSTNESS_SSBO)
|
2022-08-30 19:01:33 -07:00
|
|
|
return nir_address_format_64bit_bounded_global;
|
|
|
|
|
else
|
|
|
|
|
return nir_address_format_64bit_global_32bit_offset;
|
2019-04-18 12:08:57 -05:00
|
|
|
}
|
|
|
|
|
|
2021-01-15 16:44:44 -06:00
|
|
|
static inline nir_address_format
|
|
|
|
|
anv_nir_ubo_addr_format(const struct anv_physical_device *pdevice,
|
2022-06-21 18:06:04 -07:00
|
|
|
enum brw_robustness_flags robust_flags)
|
2021-01-15 16:44:44 -06:00
|
|
|
{
|
2022-06-21 18:06:04 -07:00
|
|
|
if (robust_flags & BRW_ROBUSTNESS_UBO)
|
2022-08-30 19:01:33 -07:00
|
|
|
return nir_address_format_64bit_bounded_global;
|
|
|
|
|
else
|
|
|
|
|
return nir_address_format_64bit_global_32bit_offset;
|
2021-01-15 16:44:44 -06:00
|
|
|
}
|
|
|
|
|
|
2021-01-15 14:59:42 -06:00
|
|
|
bool anv_nir_lower_ubo_loads(nir_shader *shader);
|
2025-08-14 19:31:56 +03:00
|
|
|
|
|
|
|
|
bool anv_nir_lower_driver_values(nir_shader *shader,
|
|
|
|
|
const struct anv_physical_device *pdevice);
|
2021-01-15 14:59:42 -06:00
|
|
|
|
2025-06-24 16:17:55 +00:00
|
|
|
bool anv_nir_apply_pipeline_layout(nir_shader *shader,
|
2022-06-23 15:26:17 +03:00
|
|
|
const struct anv_physical_device *pdevice,
|
2022-06-21 18:06:04 -07:00
|
|
|
enum brw_robustness_flags robust_flags,
|
2024-08-06 13:59:28 +03:00
|
|
|
struct anv_descriptor_set_layout * const *set_layouts,
|
|
|
|
|
uint32_t set_count,
|
|
|
|
|
const uint32_t *dynamic_offset_start,
|
2023-02-24 20:02:57 +02:00
|
|
|
struct anv_pipeline_bind_map *map,
|
|
|
|
|
struct anv_pipeline_push_map *push_map,
|
|
|
|
|
void *push_map_mem_ctx);
|
2015-09-11 15:53:53 -07:00
|
|
|
|
2025-04-22 18:42:35 +03:00
|
|
|
struct anv_nir_push_layout_info {
|
2025-05-08 12:16:06 +03:00
|
|
|
bool separate_tessellation;
|
2025-04-22 18:42:35 +03:00
|
|
|
bool fragment_dynamic;
|
|
|
|
|
bool mesh_dynamic;
|
|
|
|
|
};
|
|
|
|
|
|
2026-02-13 11:47:12 +02:00
|
|
|
bool anv_nir_shrink_push_constant_ranges(nir_shader *nir);
|
|
|
|
|
|
2025-06-24 16:17:55 +00:00
|
|
|
bool anv_nir_compute_push_layout(nir_shader *nir,
|
2022-06-23 15:26:17 +03:00
|
|
|
const struct anv_physical_device *pdevice,
|
2022-06-21 18:06:04 -07:00
|
|
|
enum brw_robustness_flags robust_flags,
|
2025-04-22 18:42:35 +03:00
|
|
|
const struct anv_nir_push_layout_info *info,
|
|
|
|
|
struct brw_base_prog_key *prog_key,
|
2019-11-07 17:16:14 -06:00
|
|
|
struct brw_stage_prog_data *prog_data,
|
|
|
|
|
struct anv_pipeline_bind_map *map,
|
2026-02-09 11:17:22 +02:00
|
|
|
const struct anv_pipeline_push_map *push_map);
|
2019-11-07 17:16:14 -06:00
|
|
|
|
2023-01-04 12:55:10 -08:00
|
|
|
void anv_nir_validate_push_layout(const struct anv_physical_device *pdevice,
|
|
|
|
|
struct brw_stage_prog_data *prog_data,
|
2019-11-07 17:16:14 -06:00
|
|
|
struct anv_pipeline_bind_map *map);
|
2019-11-08 09:42:30 -06:00
|
|
|
|
2022-12-22 20:44:07 +02:00
|
|
|
bool anv_nir_update_resource_intel_block(nir_shader *shader);
|
|
|
|
|
|
2025-05-22 15:43:47 +03:00
|
|
|
bool anv_nir_lower_desc_address(nir_shader *shader,
|
|
|
|
|
const struct anv_pipeline_bind_map *map);
|
|
|
|
|
|
2025-07-24 15:55:02 -07:00
|
|
|
bool anv_nir_lower_unaligned_dispatch(nir_shader *shader);
|
|
|
|
|
|
2022-12-22 20:44:07 +02:00
|
|
|
bool anv_nir_lower_resource_intel(nir_shader *shader,
|
|
|
|
|
const struct anv_physical_device *device,
|
|
|
|
|
enum anv_descriptor_set_layout_type desc_type);
|
|
|
|
|
|
2019-11-07 17:16:14 -06:00
|
|
|
bool anv_nir_add_base_work_group_id(nir_shader *shader);
|
2017-10-03 15:23:07 -07:00
|
|
|
|
2022-10-12 02:00:41 +03:00
|
|
|
uint32_t anv_nir_compute_used_push_descriptors(nir_shader *shader,
|
2024-08-06 13:59:28 +03:00
|
|
|
struct anv_descriptor_set_layout * const *set_layouts,
|
|
|
|
|
uint32_t set_count);
|
2022-10-12 02:00:41 +03:00
|
|
|
|
2025-02-13 12:38:59 +02:00
|
|
|
uint8_t anv_nir_loads_push_desc_buffer(nir_shader *nir,
|
|
|
|
|
struct anv_descriptor_set_layout * const *set_layouts,
|
|
|
|
|
uint32_t set_count,
|
|
|
|
|
const struct anv_pipeline_bind_map *bind_map);
|
2022-10-12 02:00:41 +03:00
|
|
|
|
|
|
|
|
uint32_t anv_nir_push_desc_ubo_fully_promoted(nir_shader *nir,
|
2024-08-06 13:59:28 +03:00
|
|
|
struct anv_descriptor_set_layout * const *set_layouts,
|
|
|
|
|
uint32_t set_count,
|
2022-10-12 02:00:41 +03:00
|
|
|
const struct anv_pipeline_bind_map *bind_map);
|
|
|
|
|
|
2025-05-16 10:32:23 +03:00
|
|
|
void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
|
|
|
|
|
struct nir_shader *fs_nir,
|
|
|
|
|
struct anv_device *device);
|
|
|
|
|
|
2025-05-22 15:43:47 +03:00
|
|
|
static inline bool
|
|
|
|
|
anv_nir_is_promotable_ubo_binding(nir_src src)
|
|
|
|
|
{
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
|
|
|
|
|
|
|
|
|
|
return intrin && intrin->intrinsic == nir_intrinsic_resource_intel &&
|
|
|
|
|
(nir_intrinsic_resource_access_intel(intrin) &
|
|
|
|
|
nir_resource_intel_pushable);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
anv_nir_is_internal_ubo(nir_src src)
|
|
|
|
|
{
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
|
|
|
|
|
|
|
|
|
|
return intrin && intrin->intrinsic == nir_intrinsic_resource_intel &&
|
|
|
|
|
(nir_intrinsic_resource_access_intel(intrin) &
|
|
|
|
|
nir_resource_intel_internal);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
|
anv_nir_get_ubo_binding_push_block(nir_src src)
|
|
|
|
|
{
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
|
|
|
|
|
assert(intrin && intrin->intrinsic == nir_intrinsic_resource_intel);
|
|
|
|
|
|
|
|
|
|
return nir_intrinsic_resource_block_intel(intrin);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void anv_nir_analyze_push_constants_ranges(nir_shader *nir,
|
|
|
|
|
const struct intel_device_info *devinfo,
|
|
|
|
|
const struct anv_pipeline_push_map *push_map,
|
|
|
|
|
struct anv_push_range out_ranges[4]);
|
|
|
|
|
|
2015-09-11 15:53:53 -07:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2016-10-11 18:26:24 +01:00
|
|
|
|
|
|
|
|
#endif /* ANV_NIR_H */
|