2024-08-07 23:32:23 +03:00
|
|
|
/* Copyright © 2024 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "anv_private.h"
|
|
|
|
|
#include "anv_nir.h"
|
|
|
|
|
#include "anv_shader.h"
|
|
|
|
|
|
|
|
|
|
#include "vk_nir_convert_ycbcr.h"
|
|
|
|
|
#include "vk_pipeline.h"
|
|
|
|
|
|
|
|
|
|
#include "common/intel_compute_slm.h"
|
|
|
|
|
#include "common/intel_l3_config.h"
|
|
|
|
|
|
2025-10-06 14:25:26 -07:00
|
|
|
#include "compiler/brw/brw_nir.h"
|
|
|
|
|
#include "compiler/brw/brw_nir_rt.h"
|
2024-08-07 23:32:23 +03:00
|
|
|
#include "compiler/intel_nir.h"
|
2025-11-27 17:57:18 -05:00
|
|
|
#include "compiler/jay/jay.h"
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2024-05-10 13:46:40 -07:00
|
|
|
#include "git_sha1.h"
|
|
|
|
|
|
2025-08-15 03:06:24 +00:00
|
|
|
typedef void (*game_wa_callback)(nir_shader *nir);
|
|
|
|
|
|
|
|
|
|
/* Structure to hold a game-specific workaround entry */
|
|
|
|
|
struct game_wa_entry {
|
|
|
|
|
game_wa_callback cb;
|
|
|
|
|
uint32_t shader_blake3s[16][BLAKE3_OUT_LEN32];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Workaround for a shader in Horizon Forbidden West that causes
|
|
|
|
|
* visual corruption. The shader writes the result of fsqrt to
|
|
|
|
|
* storage images with a 16-bit image format and misrendering
|
|
|
|
|
* occurs when those values are denormal for an unknown reason.
|
|
|
|
|
*
|
|
|
|
|
* This clamps the image writes to the smallest fp16 normalized
|
|
|
|
|
* value. (Pattern matching against fsqrt is easy to do in a one
|
|
|
|
|
* line algebraic pass, while matching image stores is harder.)
|
|
|
|
|
*
|
|
|
|
|
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/12555
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
wa_forbidden_west(nir_shader *nir)
|
|
|
|
|
{
|
|
|
|
|
NIR_PASS(_, nir, brw_nir_apply_sqrt_workarounds);
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-28 09:42:47 +02:00
|
|
|
/* Try to detect shaders relying on 32-wide subgroups. Usually they have a
|
|
|
|
|
* pattern like this:
|
|
|
|
|
*
|
|
|
|
|
* div 32 %1096 = @load_subgroup_invocation
|
|
|
|
|
* div 32 %1245 = iand %1096, %1228 (0x1f)
|
|
|
|
|
* div 32 %1246 = ixor %1245, %13 (0x1)
|
|
|
|
|
* div 32 %1247 = @shuffle (%1244, %1246)
|
|
|
|
|
*/
|
|
|
|
|
static bool is_alu1_iand_0x1f(nir_alu_instr *alu)
|
|
|
|
|
{
|
|
|
|
|
if (!alu || alu->op != nir_op_iand)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
|
|
|
if (nir_src_is_const(alu->src[i].src) &&
|
|
|
|
|
nir_alu_src_as_uint(alu->src[i]) == 0x1f)
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-12 12:21:42 +03:00
|
|
|
static bool is_simd32_shuffle(nir_intrinsic_instr *intrin)
|
2025-11-28 09:42:47 +02:00
|
|
|
{
|
|
|
|
|
nir_alu_instr *alu1 = nir_src_as_alu(intrin->src[1]);
|
|
|
|
|
if (alu1 == NULL)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (is_alu1_iand_0x1f(alu1))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
|
|
|
|
|
if (is_alu1_iand_0x1f(nir_src_as_alu(alu1->src[i].src)))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-12 12:21:42 +03:00
|
|
|
/* Try to detect shaders testing with a sequence like this :
|
|
|
|
|
*
|
|
|
|
|
* 32x3 %49 = @load_local_invocation_id
|
|
|
|
|
* 32 %1673 = load_const (0xffffffe0 = -32 = 4294967264)
|
|
|
|
|
* 32 %1674 = iand %49.x, %1673 (0xffffffe0)
|
|
|
|
|
* 32 %1675 = @load_subgroup_size
|
|
|
|
|
* 32 %1676 = umod %1674, %1675
|
|
|
|
|
*
|
|
|
|
|
* This sequence appears to be targetted at subgroup sizes larger than 32. The
|
|
|
|
|
* problem in this sequence is that subgroup size is expected to be >= 32 to
|
|
|
|
|
* match the masking of local_invocation_id above. If inferior, the umod
|
|
|
|
|
* operation returns the same value as if the subgroup was 32.
|
|
|
|
|
*/
|
|
|
|
|
static bool is_alu_used_for_umod_subgroup_size(nir_alu_instr *in_alu)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_use(src, &in_alu->def) {
|
|
|
|
|
nir_instr *instr = nir_src_use_instr(src);
|
|
|
|
|
if (instr->type != nir_instr_type_alu)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
|
|
|
if (alu->op != nir_op_umod &&
|
|
|
|
|
alu->op != nir_op_imod)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
|
|
|
if (&alu->src[i].src == src)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!nir_src_is_intrinsic(alu->src[i].src) ||
|
|
|
|
|
nir_src_as_intrinsic(alu->src[i].src)->intrinsic != nir_intrinsic_load_subgroup_size)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_local_invoc_id_used_with_simd32_assumption(nir_intrinsic_instr *subgroup_inv)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_use(src, &subgroup_inv->def) {
|
|
|
|
|
nir_instr *instr = nir_src_use_instr(src);
|
|
|
|
|
if (instr->type != nir_instr_type_alu)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
|
|
|
if (alu->op != nir_op_iand)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* nir_print_instr(&alu->instr, stderr); */
|
|
|
|
|
/* fprintf(stderr, "\n"); */
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
|
|
|
if (&alu->src[i].src == src)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!nir_src_is_const(alu->src[i].src))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (nir_src_as_uint(alu->src[i].src) != 0xffffffe0)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (is_alu_used_for_umod_subgroup_size(alu))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
detect_simd32_requirement(nir_builder *b,
|
|
|
|
|
nir_intrinsic_instr *intrin,
|
|
|
|
|
void *data)
|
|
|
|
|
{
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_shuffle:
|
|
|
|
|
return is_simd32_shuffle(intrin);
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_load_local_invocation_id:
|
|
|
|
|
return is_local_invoc_id_used_with_simd32_assumption(intrin);
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-15 03:06:24 +00:00
|
|
|
/* List of game-specific workarounds identified by BLAKE3 hash of the shader.
|
|
|
|
|
* Add new workarounds here as needed.
|
|
|
|
|
*/
|
|
|
|
|
static const struct game_wa_entry game_was[] = {
|
|
|
|
|
{
|
|
|
|
|
.cb = wa_forbidden_west,
|
|
|
|
|
.shader_blake3s = {
|
|
|
|
|
{0x51683151, 0xe044f0ce, 0xc210a762, 0xb12b2da4, 0x4e69ddc0, 0x237b1cc1, 0xc84bcf09, 0x31cfe883},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Apply game-specific workarounds based on the shader's BLAKE3 hash */
|
|
|
|
|
static void
|
|
|
|
|
anv_nir_apply_shader_workarounds(nir_shader *nir)
|
|
|
|
|
{
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(game_was); i++) {
|
|
|
|
|
const struct game_wa_entry *wa = &game_was[i];
|
|
|
|
|
for (unsigned j = 0; j < ARRAY_SIZE(wa->shader_blake3s); j++) {
|
|
|
|
|
if (_mesa_printed_blake3_equal(nir->info.source_blake3, wa->shader_blake3s[j])) {
|
|
|
|
|
wa->cb(nir);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-07 14:20:05 +03:00
|
|
|
static enum brw_robustness_flags
|
|
|
|
|
anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate)
|
|
|
|
|
{
|
|
|
|
|
return
|
|
|
|
|
((rstate->storage_buffers !=
|
|
|
|
|
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
|
|
|
|
|
BRW_ROBUSTNESS_SSBO : 0) |
|
|
|
|
|
((rstate->uniform_buffers !=
|
|
|
|
|
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
|
|
|
|
|
BRW_ROBUSTNESS_UBO : 0);
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
static enum anv_descriptor_set_layout_type
|
|
|
|
|
set_layouts_get_layout_type(struct anv_descriptor_set_layout * const *set_layouts,
|
|
|
|
|
uint32_t set_layout_count)
|
|
|
|
|
{
|
|
|
|
|
for (uint32_t s = 0; s < set_layout_count; s++) {
|
|
|
|
|
if (set_layouts[s]) {
|
|
|
|
|
return set_layouts[s]->type;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
anv_shader_init_uuid(struct anv_physical_device *device)
|
|
|
|
|
{
|
|
|
|
|
/* We should include any parameter here that will change the compiler's
|
|
|
|
|
* output. Mostly it's workarounds, but there is also settings for using
|
|
|
|
|
* indirect descriptors (a different binding model).
|
|
|
|
|
*
|
|
|
|
|
* The fp64 workaround is skipped because although it changes the
|
|
|
|
|
* compiler's output, not having that workaroung enabled with an app
|
|
|
|
|
* expecting fp64 support will just crash in the backend.
|
|
|
|
|
*/
|
2026-03-12 20:34:57 -04:00
|
|
|
blake3_hasher ctx;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_init(&ctx);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const bool indirect_descriptors = device->indirect_descriptors;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &indirect_descriptors, sizeof(indirect_descriptors));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const int spilling_rate = device->compiler->spilling_rate;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &spilling_rate, sizeof(spilling_rate));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const uint8_t afs = device->instance->assume_full_subgroups;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &afs, sizeof(afs));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const bool afswb = device->instance->assume_full_subgroups_with_barrier;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &afswb, sizeof(afswb));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const bool afs_shm = device->instance->assume_full_subgroups_with_shared_memory;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &afs_shm, sizeof(afs_shm));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const bool erwf = device->instance->emulate_read_without_format;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &erwf, sizeof(erwf));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const bool lttd = device->instance->lower_terminate_to_discard;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, <td, sizeof(lttd));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const bool large_wg_wa =
|
|
|
|
|
device->instance->large_workgroup_non_coherent_image_workaround;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, &large_wg_wa, sizeof(large_wg_wa));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-12-23 23:05:47 +00:00
|
|
|
const bool lto_disable = device->instance->disable_lto;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, <o_disable, sizeof(lto_disable));
|
2025-12-23 23:05:47 +00:00
|
|
|
|
2020-11-05 14:38:26 -08:00
|
|
|
const bool btp_bti_rcc = device->rt_change_needs_flush;
|
|
|
|
|
_mesa_blake3_update(&ctx, &btp_bti_rcc, sizeof(btp_bti_rcc));
|
|
|
|
|
|
2026-04-16 23:15:21 +03:00
|
|
|
const bool cbv_push_buffer = device->instance->promote_cbv_to_push_buffers;
|
|
|
|
|
_mesa_blake3_update(&ctx, &cbv_push_buffer, sizeof(cbv_push_buffer));
|
|
|
|
|
|
2026-03-12 22:26:24 -04:00
|
|
|
uint8_t blake3[BLAKE3_KEY_LEN];
|
|
|
|
|
_mesa_blake3_final(&ctx, blake3);
|
|
|
|
|
memcpy(device->shader_binary_uuid, blake3, sizeof(device->shader_binary_uuid));
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const struct nir_shader_compiler_options *
|
|
|
|
|
anv_shader_get_nir_options(struct vk_physical_device *device,
|
|
|
|
|
mesa_shader_stage stage,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, struct anv_physical_device, vk);
|
|
|
|
|
const struct brw_compiler *compiler = pdevice->compiler;
|
|
|
|
|
|
2025-09-17 11:20:50 -04:00
|
|
|
return &compiler->nir_options[stage];
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct spirv_to_nir_options
|
|
|
|
|
anv_shader_get_spirv_options(struct vk_physical_device *device,
|
|
|
|
|
mesa_shader_stage stage,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
|
|
|
{
|
|
|
|
|
struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, struct anv_physical_device, vk);
|
|
|
|
|
enum brw_robustness_flags robust_flags = anv_get_robust_flags(rs);
|
|
|
|
|
|
|
|
|
|
return (struct spirv_to_nir_options) {
|
|
|
|
|
.ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
|
|
|
|
|
.ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
|
|
|
|
|
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
|
|
|
|
.push_const_addr_format = nir_address_format_logical,
|
|
|
|
|
|
2026-04-06 22:22:29 +03:00
|
|
|
.printf = ANV_DEBUG(SHADER_PRINT),
|
2025-11-24 11:36:42 +02:00
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
/* TODO: Consider changing this to an address format that has the NULL
|
|
|
|
|
* pointer equals to 0. That might be a better format to play nice
|
|
|
|
|
* with certain code / code generators.
|
|
|
|
|
*/
|
|
|
|
|
.shared_addr_format = nir_address_format_32bit_offset,
|
|
|
|
|
|
|
|
|
|
.min_ubo_alignment = ANV_UBO_ALIGNMENT,
|
|
|
|
|
.min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
|
|
|
|
|
|
|
|
|
|
.workarounds = {
|
|
|
|
|
.lower_terminate_to_discard = pdevice->instance->lower_terminate_to_discard,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_preprocess_nir(struct vk_physical_device *device,
|
|
|
|
|
nir_shader *nir,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
|
|
|
{
|
|
|
|
|
struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, struct anv_physical_device, vk);
|
|
|
|
|
const struct brw_compiler *compiler = pdevice->compiler;
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
|
2025-11-03 09:39:31 +01:00
|
|
|
nir_shader_get_entrypoint(nir), nir_var_shader_out);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
|
|
|
|
|
.point_coord = true,
|
2026-03-19 12:10:19 -04:00
|
|
|
.primitive_id = nir->info.stage == MESA_SHADER_FRAGMENT,
|
2024-08-07 23:32:23 +03:00
|
|
|
};
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
|
|
|
|
|
|
2025-10-07 10:22:17 +03:00
|
|
|
NIR_PASS(_, nir, nir_update_image_intrinsic_from_var);
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
const nir_opt_access_options opt_access_options = {
|
|
|
|
|
.is_vulkan = true,
|
|
|
|
|
};
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
|
|
|
|
|
|
|
|
|
|
struct brw_nir_compiler_opts opts = {
|
|
|
|
|
.robust_image_access = rs->images == VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS ||
|
|
|
|
|
rs->images == VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
|
|
|
|
|
};
|
|
|
|
|
brw_preprocess_nir(compiler, nir, &opts);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_barrier_modes);
|
2025-09-10 09:50:19 +03:00
|
|
|
NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_QUEUE_FAMILY);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2026-04-06 22:22:29 +03:00
|
|
|
if (ANV_DEBUG(SHADER_PRINT)) {
|
2025-11-24 11:36:42 +02:00
|
|
|
const nir_lower_printf_options printf_opts = {
|
|
|
|
|
.ptr_bit_size = 64,
|
|
|
|
|
.hash_format_strings = true,
|
|
|
|
|
};
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_printf, &printf_opts);
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_base_prog_key(struct brw_base_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
|
|
|
|
|
|
/* We can avoid including this for hashing because the runtime already
|
|
|
|
|
* hashes that information. We just put it here for at compile time.
|
|
|
|
|
*/
|
|
|
|
|
if (rs != NULL)
|
|
|
|
|
key->robust_flags = anv_get_robust_flags(rs);
|
2026-04-02 13:30:34 +00:00
|
|
|
key->divergent_atomics_flags = pdevice->instance->enable_opt_divergent_atomics;
|
2024-08-07 23:32:23 +03:00
|
|
|
key->limit_trig_input_range = pdevice->instance->limit_trig_input_range;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_base_gfx_prog_key(struct brw_base_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *gfx_state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
|
|
|
|
|
|
populate_base_prog_key(key, device, rs);
|
|
|
|
|
|
2026-02-17 17:36:24 +02:00
|
|
|
key->view_mask = (gfx_state && gfx_state->mv) ? gfx_state->mv->view_mask : 0;
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
key->vue_layout =
|
|
|
|
|
(util_bitcount(link_stages) > 1 && (link_stages & VK_SHADER_STAGE_FRAGMENT_BIT)) ?
|
|
|
|
|
INTEL_VUE_LAYOUT_FIXED :
|
|
|
|
|
pdevice->info.verx10 >= 125 ? INTEL_VUE_LAYOUT_SEPARATE_MESH :
|
|
|
|
|
INTEL_VUE_LAYOUT_SEPARATE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_vs_prog_key(struct brw_vs_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
|
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
|
|
|
|
|
key->vf_component_packing = pdevice->instance->vf_component_packing;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_tcs_prog_key(struct brw_tcs_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
|
|
|
|
|
if (state && state->ts &&
|
|
|
|
|
!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS))
|
|
|
|
|
key->input_vertices = state->ts->patch_control_points;
|
|
|
|
|
|
|
|
|
|
key->separate_tess_vue_layout =
|
|
|
|
|
!(link_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_tes_prog_key(struct brw_tes_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
|
|
|
|
|
key->separate_tess_vue_layout =
|
|
|
|
|
!(link_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_gs_prog_key(struct brw_gs_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_task_prog_key(struct brw_task_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_mesh_prog_key(struct brw_mesh_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
pipeline_has_coarse_pixel(const struct vk_graphics_pipeline_state *state)
|
|
|
|
|
{
|
|
|
|
|
if (state == NULL)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/* The Vulkan 1.2.199 spec says:
|
|
|
|
|
*
|
|
|
|
|
* "If any of the following conditions are met, Cxy' must be set to
|
|
|
|
|
* {1,1}:
|
|
|
|
|
*
|
|
|
|
|
* * If Sample Shading is enabled.
|
|
|
|
|
* * [...]"
|
|
|
|
|
*
|
|
|
|
|
* And "sample shading" is defined as follows:
|
|
|
|
|
*
|
|
|
|
|
* "Sample shading is enabled for a graphics pipeline:
|
|
|
|
|
*
|
|
|
|
|
* * If the interface of the fragment shader entry point of the
|
|
|
|
|
* graphics pipeline includes an input variable decorated with
|
|
|
|
|
* SampleId or SamplePosition. In this case minSampleShadingFactor
|
|
|
|
|
* takes the value 1.0.
|
|
|
|
|
*
|
|
|
|
|
* * Else if the sampleShadingEnable member of the
|
|
|
|
|
* VkPipelineMultisampleStateCreateInfo structure specified when
|
|
|
|
|
* creating the graphics pipeline is set to VK_TRUE. In this case
|
|
|
|
|
* minSampleShadingFactor takes the value of
|
|
|
|
|
* VkPipelineMultisampleStateCreateInfo::minSampleShading.
|
|
|
|
|
*
|
|
|
|
|
* Otherwise, sample shading is considered disabled."
|
|
|
|
|
*
|
|
|
|
|
* The first bullet above is handled by the back-end compiler because those
|
|
|
|
|
* inputs both force per-sample dispatch. The second bullet is handled
|
|
|
|
|
* here. Note that this sample shading being enabled has nothing to do
|
|
|
|
|
* with minSampleShading.
|
|
|
|
|
*/
|
|
|
|
|
if (state->ms != NULL && state->ms->sample_shading_enable)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Not dynamic & pipeline has a 1x1 fragment shading rate with no
|
|
|
|
|
* possibility for element of the pipeline to change the value or fragment
|
|
|
|
|
* shading rate not specified at all.
|
|
|
|
|
*/
|
|
|
|
|
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_FSR) &&
|
|
|
|
|
(state->fsr == NULL ||
|
|
|
|
|
(state->fsr->fragment_size.width <= 1 &&
|
|
|
|
|
state->fsr->fragment_size.height <= 1 &&
|
|
|
|
|
state->fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
|
|
|
|
|
state->fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
|
rp_color_mask(const struct vk_graphics_pipeline_state *state)
|
|
|
|
|
{
|
|
|
|
|
if (state == NULL || state->rp == NULL ||
|
|
|
|
|
!vk_render_pass_state_has_attachment_info(state->rp))
|
|
|
|
|
return ((1u << MAX_RTS) - 1);
|
|
|
|
|
|
|
|
|
|
assert(state->rp->color_attachment_count <= MAX_RTS);
|
|
|
|
|
|
|
|
|
|
uint32_t color_mask = 0;
|
|
|
|
|
for (uint32_t i = 0; i < state->rp->color_attachment_count; i++) {
|
2025-09-22 17:06:36 -07:00
|
|
|
if (state->rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
|
|
|
|
|
if (state->cal) {
|
|
|
|
|
if (state->cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
|
|
|
|
|
color_mask |= BITFIELD_BIT(state->cal->color_map[i]);
|
|
|
|
|
} else {
|
|
|
|
|
color_mask |= BITFIELD_BIT(i);
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return color_mask;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2026-02-06 15:17:52 -08:00
|
|
|
populate_fs_prog_key(struct brw_fs_prog_key *key,
|
2024-08-07 23:32:23 +03:00
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
VkShaderStageFlags link_stages)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_physical_device *pdevice =
|
|
|
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
|
|
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
|
|
|
|
|
|
/* Consider all inputs as valid until look at the NIR variables. */
|
2026-01-12 14:07:57 +02:00
|
|
|
key->nr_color_regions = util_last_bit(rp_color_mask(state));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
/* To reduce possible shader recompilations we would need to know if
|
|
|
|
|
* there is a SampleMask output variable to compute if we should emit
|
|
|
|
|
* code to workaround the issue that hardware disables alpha to coverage
|
|
|
|
|
* when there is SampleMask output.
|
|
|
|
|
*
|
|
|
|
|
* If the pipeline we compile the fragment shader in includes the output
|
|
|
|
|
* interface, then we can be sure whether alpha_coverage is enabled or not.
|
|
|
|
|
* If we don't have that output interface, then we have to compile the
|
|
|
|
|
* shader with some conditionals.
|
|
|
|
|
*/
|
|
|
|
|
if (state != NULL && state->ms != NULL) {
|
|
|
|
|
/* VUID-VkGraphicsPipelineCreateInfo-rasterizerDiscardEnable-00751:
|
|
|
|
|
*
|
|
|
|
|
* "If the pipeline is being created with fragment shader state,
|
|
|
|
|
* pMultisampleState must be a valid pointer to a valid
|
|
|
|
|
* VkPipelineMultisampleStateCreateInfo structure"
|
|
|
|
|
*
|
|
|
|
|
* It's also required for the fragment output interface.
|
|
|
|
|
*/
|
|
|
|
|
key->multisample_fbo =
|
|
|
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
|
|
|
|
|
INTEL_SOMETIMES :
|
|
|
|
|
state->ms->rasterization_samples > 1 ? INTEL_ALWAYS : INTEL_NEVER;
|
|
|
|
|
key->persample_interp =
|
|
|
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
|
|
|
|
|
INTEL_SOMETIMES :
|
|
|
|
|
(state->ms->sample_shading_enable &&
|
|
|
|
|
(state->ms->min_sample_shading * state->ms->rasterization_samples) > 1) ?
|
|
|
|
|
INTEL_ALWAYS : INTEL_NEVER;
|
|
|
|
|
key->alpha_to_coverage =
|
|
|
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ?
|
|
|
|
|
INTEL_SOMETIMES :
|
|
|
|
|
(state->ms->alpha_to_coverage_enable ? INTEL_ALWAYS : INTEL_NEVER);
|
|
|
|
|
|
|
|
|
|
/* TODO: We should make this dynamic */
|
|
|
|
|
if (pdevice->instance->sample_mask_out_opengl_behaviour)
|
|
|
|
|
key->ignore_sample_mask_out = !key->multisample_fbo;
|
|
|
|
|
} else {
|
|
|
|
|
/* Consider all inputs as valid until we look at the NIR variables. */
|
|
|
|
|
key->nr_color_regions = MAX_RTS;
|
|
|
|
|
|
|
|
|
|
key->alpha_to_coverage = INTEL_SOMETIMES;
|
|
|
|
|
key->multisample_fbo = INTEL_SOMETIMES;
|
|
|
|
|
key->persample_interp = INTEL_SOMETIMES;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pdevice->info.verx10 >= 200) {
|
|
|
|
|
if (state != NULL && state->rs != NULL) {
|
|
|
|
|
key->provoking_vertex_last =
|
|
|
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) ?
|
|
|
|
|
INTEL_SOMETIMES :
|
|
|
|
|
state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT ?
|
|
|
|
|
INTEL_ALWAYS : INTEL_NEVER;
|
|
|
|
|
} else {
|
|
|
|
|
key->provoking_vertex_last = INTEL_SOMETIMES;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* Pre-Xe2 we don't care about this at all, make sure it's always set to
|
|
|
|
|
* NEVER to avoid it influencing the push constant.
|
|
|
|
|
*/
|
|
|
|
|
key->provoking_vertex_last = INTEL_NEVER;
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-09 17:12:25 -08:00
|
|
|
if (state != NULL && state->rs != NULL) {
|
|
|
|
|
key->conservative_raster =
|
|
|
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE) ?
|
|
|
|
|
INTEL_SOMETIMES :
|
|
|
|
|
state->rs->conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT ?
|
|
|
|
|
INTEL_NEVER : INTEL_ALWAYS;
|
|
|
|
|
} else {
|
|
|
|
|
key->conservative_raster = INTEL_SOMETIMES;
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
key->mesh_input =
|
|
|
|
|
(link_stages & VK_SHADER_STAGE_VERTEX_BIT) ? INTEL_NEVER :
|
|
|
|
|
(link_stages & VK_SHADER_STAGE_MESH_BIT_EXT) ? INTEL_ALWAYS :
|
2026-04-12 00:51:35 -07:00
|
|
|
pdevice->info.has_mesh_shading ? INTEL_SOMETIMES : INTEL_NEVER;
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
if (state && state->ms) {
|
|
|
|
|
key->min_sample_shading = state->ms->min_sample_shading;
|
|
|
|
|
key->api_sample_shading = state->ms->sample_shading_enable;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
key->coarse_pixel = pipeline_has_coarse_pixel(state);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_cs_prog_key(struct brw_cs_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
2025-12-05 23:21:23 +02:00
|
|
|
const struct vk_pipeline_robustness_state *rs)
|
2024-08-07 23:32:23 +03:00
|
|
|
{
|
|
|
|
|
populate_base_prog_key(&key->base, device, rs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
populate_bs_prog_key(struct brw_bs_prog_key *key,
|
|
|
|
|
const struct vk_physical_device *device,
|
|
|
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
|
|
|
VkPipelineCreateFlags2KHR flags)
|
|
|
|
|
{
|
|
|
|
|
populate_base_prog_key(&key->base, device, rs);
|
|
|
|
|
|
|
|
|
|
uint32_t ray_flags = 0;
|
|
|
|
|
const bool rt_skip_triangles =
|
|
|
|
|
flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR;
|
|
|
|
|
const bool rt_skip_aabbs =
|
|
|
|
|
flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR;
|
|
|
|
|
assert(!(rt_skip_triangles && rt_skip_aabbs));
|
|
|
|
|
if (rt_skip_triangles)
|
|
|
|
|
ray_flags |= BRW_RT_RAY_FLAG_SKIP_TRIANGLES;
|
|
|
|
|
else if (rt_skip_aabbs)
|
|
|
|
|
ray_flags |= BRW_RT_RAY_FLAG_SKIP_AABBS;
|
|
|
|
|
|
|
|
|
|
key->pipeline_ray_flags = ray_flags;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_hash_state(struct vk_physical_device *device,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
const struct vk_features *enabled_features,
|
|
|
|
|
VkShaderStageFlags stages,
|
|
|
|
|
blake3_hash blake3_out)
|
|
|
|
|
{
|
|
|
|
|
struct mesa_blake3 blake3_ctx;
|
|
|
|
|
_mesa_blake3_init(&blake3_ctx);
|
|
|
|
|
|
|
|
|
|
anv_foreach_vk_stage(stage, stages) {
|
|
|
|
|
union brw_any_prog_key key;
|
|
|
|
|
memset(&key, 0, sizeof(key));
|
|
|
|
|
|
|
|
|
|
switch (stage) {
|
|
|
|
|
case VK_SHADER_STAGE_VERTEX_BIT:
|
|
|
|
|
populate_vs_prog_key(&key.vs, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.vs, sizeof(key.vs));
|
|
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
|
|
|
|
|
populate_tcs_prog_key(&key.tcs, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.tcs, sizeof(key.tcs));
|
|
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
|
|
|
|
|
populate_tes_prog_key(&key.tes, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.tes, sizeof(key.tes));
|
|
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_GEOMETRY_BIT:
|
|
|
|
|
populate_gs_prog_key(&key.gs, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.gs, sizeof(key.gs));
|
|
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_TASK_BIT_EXT:
|
|
|
|
|
populate_task_prog_key(&key.task, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.task, sizeof(key.task));
|
|
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_MESH_BIT_EXT:
|
|
|
|
|
populate_mesh_prog_key(&key.mesh, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.mesh, sizeof(key.mesh));
|
|
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_FRAGMENT_BIT:
|
2026-02-06 15:17:52 -08:00
|
|
|
populate_fs_prog_key(&key.fs, device, NULL, state, stages);
|
|
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.fs, sizeof(key.fs));
|
2024-08-07 23:32:23 +03:00
|
|
|
break;
|
|
|
|
|
case VK_SHADER_STAGE_COMPUTE_BIT:
|
2025-12-05 23:21:23 +02:00
|
|
|
populate_cs_prog_key(&key.cs, device, NULL);
|
2024-08-07 23:32:23 +03:00
|
|
|
_mesa_blake3_update(&blake3_ctx, &key.cs, sizeof(key.cs));
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("Invalid stage");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_mesa_blake3_final(&blake3_ctx, blake3_out);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
merge_tess_info(struct shader_info *tes_info,
|
|
|
|
|
const struct shader_info *tcs_info)
|
|
|
|
|
{
|
|
|
|
|
/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
|
|
|
|
|
*
|
|
|
|
|
* "PointMode. Controls generation of points rather than triangles
|
|
|
|
|
* or lines. This functionality defaults to disabled, and is
|
|
|
|
|
* enabled if either shader stage includes the execution mode.
|
|
|
|
|
*
|
|
|
|
|
* and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
|
|
|
|
|
* PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
|
|
|
|
|
* and OutputVertices, it says:
|
|
|
|
|
*
|
|
|
|
|
* "One mode must be set in at least one of the tessellation
|
|
|
|
|
* shader stages."
|
|
|
|
|
*
|
|
|
|
|
* So, the fields can be set in either the TCS or TES, but they must
|
|
|
|
|
* agree if set in both. Our backend looks at TES, so bitwise-or in
|
|
|
|
|
* the values from the TCS.
|
|
|
|
|
*/
|
|
|
|
|
assert(tcs_info->tess.tcs_vertices_out == 0 ||
|
|
|
|
|
tes_info->tess.tcs_vertices_out == 0 ||
|
|
|
|
|
tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
|
|
|
|
|
tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
|
|
|
|
|
|
|
|
|
|
assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
|
|
|
|
|
tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
|
|
|
|
|
tcs_info->tess.spacing == tes_info->tess.spacing);
|
|
|
|
|
tes_info->tess.spacing |= tcs_info->tess.spacing;
|
|
|
|
|
|
|
|
|
|
assert(tcs_info->tess._primitive_mode == 0 ||
|
|
|
|
|
tes_info->tess._primitive_mode == 0 ||
|
|
|
|
|
tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
|
|
|
|
|
tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
|
|
|
|
|
tes_info->tess.ccw |= tcs_info->tess.ccw;
|
|
|
|
|
tes_info->tess.point_mode |= tcs_info->tess.point_mode;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_link_tcs(const struct brw_compiler *compiler,
|
|
|
|
|
struct brw_tcs_prog_key *key,
|
|
|
|
|
struct vk_shader_compile_info *tcs_stage,
|
|
|
|
|
struct vk_shader_compile_info *tes_stage)
|
|
|
|
|
{
|
|
|
|
|
assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
|
|
|
|
|
|
|
|
|
|
brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
|
|
|
|
|
|
|
|
|
|
nir_lower_patch_vertices(tes_stage->nir,
|
|
|
|
|
tcs_stage->nir->info.tess.tcs_vertices_out,
|
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
|
|
/* Copy TCS info into the TES info */
|
|
|
|
|
merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
|
|
|
|
|
|
|
|
|
|
/* Whacking the key after cache lookup is a bit sketchy, but all of
|
|
|
|
|
* this comes from the SPIR-V, which is part of the hash used for the
|
|
|
|
|
* pipeline cache. So it should be safe.
|
|
|
|
|
*/
|
|
|
|
|
key->_tes_primitive_mode = tes_stage->nir->info.tess._primitive_mode;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_link(const struct brw_compiler *compiler,
|
|
|
|
|
struct vk_shader_compile_info *prev_stage,
|
|
|
|
|
struct vk_shader_compile_info *next_stage)
|
|
|
|
|
{
|
|
|
|
|
brw_nir_link_shaders(compiler, prev_stage->nir, next_stage->nir);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const struct vk_ycbcr_conversion_state *
|
|
|
|
|
lookup_ycbcr_conversion(const void *_stage, uint32_t set,
|
|
|
|
|
uint32_t binding, uint32_t array_index)
|
|
|
|
|
{
|
|
|
|
|
const struct vk_shader_compile_info *stage = _stage;
|
|
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
if (set == VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS) {
|
|
|
|
|
assert(binding < stage->embedded_sampler_count);
|
|
|
|
|
return &stage->embedded_samplers[binding].ycbcr_conversion;
|
|
|
|
|
} else {
|
|
|
|
|
assert(set < MAX_SETS);
|
|
|
|
|
const struct anv_descriptor_set_layout *set_layout =
|
|
|
|
|
container_of(stage->set_layouts[set],
|
|
|
|
|
struct anv_descriptor_set_layout, vk);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
assert(binding < set_layout->binding_count);
|
|
|
|
|
const struct anv_descriptor_set_binding_layout *bind_layout =
|
|
|
|
|
&set_layout->binding[binding];
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
if (bind_layout->samplers == NULL)
|
|
|
|
|
return NULL;
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
array_index = MIN2(array_index, bind_layout->array_size - 1);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
const struct anv_descriptor_set_layout_sampler *sampler =
|
|
|
|
|
&bind_layout->samplers[array_index];
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
return sampler->has_ycbcr_conversion ?
|
|
|
|
|
&sampler->ycbcr_conversion_state : NULL;
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2025-11-28 09:42:47 +02:00
|
|
|
anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader)
|
2024-08-07 23:32:23 +03:00
|
|
|
{
|
2025-11-28 09:42:47 +02:00
|
|
|
struct shader_info *info = &shader->info;
|
2025-10-31 10:47:39 +02:00
|
|
|
const struct anv_instance *instance = device->physical->instance;
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
if (!mesa_shader_stage_uses_workgroup(info->stage))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
unsigned local_size = info->workgroup_size[0] *
|
|
|
|
|
info->workgroup_size[1] *
|
|
|
|
|
info->workgroup_size[2];
|
|
|
|
|
|
|
|
|
|
/* Games don't always request full subgroups when they should,
|
|
|
|
|
* which can cause bugs, as they may expect bigger size of the
|
|
|
|
|
* subgroup than we choose for the execution.
|
|
|
|
|
*/
|
|
|
|
|
if (instance->assume_full_subgroups &&
|
|
|
|
|
info->uses_wide_subgroup_intrinsics &&
|
2025-09-09 18:24:08 +02:00
|
|
|
info->api_subgroup_size == BRW_SUBGROUP_SIZE &&
|
2024-08-07 23:32:23 +03:00
|
|
|
local_size &&
|
2025-09-09 18:24:08 +02:00
|
|
|
local_size % BRW_SUBGROUP_SIZE == 0) {
|
|
|
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-10-31 10:47:39 +02:00
|
|
|
if (instance->assume_full_subgroups_with_barrier &&
|
|
|
|
|
info->stage == MESA_SHADER_COMPUTE &&
|
|
|
|
|
device->info->verx10 <= 125 &&
|
|
|
|
|
info->uses_control_barrier &&
|
|
|
|
|
info->min_subgroup_size != info->max_subgroup_size &&
|
|
|
|
|
local_size &&
|
|
|
|
|
local_size % BRW_SUBGROUP_SIZE == 0) {
|
|
|
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Similarly, sometimes games rely on the implicit synchronization of
|
|
|
|
|
* the shared memory accesses, and choosing smaller subgroups than the game
|
|
|
|
|
* expects will cause bugs. */
|
|
|
|
|
if (instance->assume_full_subgroups_with_shared_memory &&
|
|
|
|
|
info->shared_size > 0 &&
|
|
|
|
|
info->min_subgroup_size != info->max_subgroup_size &&
|
|
|
|
|
local_size &&
|
|
|
|
|
local_size % BRW_SUBGROUP_SIZE == 0) {
|
|
|
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
/* Cooperative matrix extension requires that all invocations in a subgroup
|
|
|
|
|
* be active. As a result, when the application does not request a specific
|
|
|
|
|
* subgroup size, we must use SIMD32.
|
|
|
|
|
*/
|
|
|
|
|
if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix &&
|
2025-09-09 18:24:08 +02:00
|
|
|
info->max_subgroup_size > info->min_subgroup_size) {
|
|
|
|
|
info->api_subgroup_size = info->max_subgroup_size;
|
|
|
|
|
info->min_subgroup_size = info->max_subgroup_size;
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
2025-11-28 09:42:47 +02:00
|
|
|
|
|
|
|
|
/* Only promote to SIMD32 if the max allows it. */
|
|
|
|
|
if (info->max_subgroup_size >= BRW_SUBGROUP_SIZE &&
|
|
|
|
|
info->min_subgroup_size != info->max_subgroup_size &&
|
|
|
|
|
info->uses_wide_subgroup_intrinsics &&
|
|
|
|
|
nir_shader_intrinsics_pass(shader,
|
2026-05-12 12:21:42 +03:00
|
|
|
detect_simd32_requirement,
|
2025-11-28 09:42:47 +02:00
|
|
|
nir_metadata_all,
|
|
|
|
|
NULL)) {
|
|
|
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_vs(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
2025-11-27 17:57:18 -05:00
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
2024-08-07 23:32:23 +03:00
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_vs_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &shader_data->key.vs,
|
|
|
|
|
.prog_data = &shader_data->prog_data.vs,
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-27 17:57:18 -05:00
|
|
|
if (intel_use_jay(devinfo, nir->info.stage)) {
|
|
|
|
|
struct jay_shader_bin *bin =
|
|
|
|
|
jay_compile(devinfo, mem_ctx, nir,
|
|
|
|
|
(union brw_any_prog_data *) params.prog_data,
|
|
|
|
|
(union brw_any_prog_key *) params.key);
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void *) bin->kernel;
|
|
|
|
|
} else {
|
|
|
|
|
shader_data->code = (void *) brw_compile_vs(compiler, ¶ms);
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_tcs(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
shader_data->key.tcs.outputs_written = nir->info.outputs_written;
|
|
|
|
|
shader_data->key.tcs.patch_outputs_written = nir->info.patch_outputs_written;
|
|
|
|
|
|
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_tcs_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &shader_data->key.tcs,
|
|
|
|
|
.prog_data = &shader_data->prog_data.tcs,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void *)brw_compile_tcs(compiler, ¶ms);
|
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_tes(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *tes_shader_data,
|
|
|
|
|
struct anv_shader_data *tcs_shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
nir_shader *nir = tes_shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
if (tcs_shader_data) {
|
|
|
|
|
tes_shader_data->key.tes.inputs_read =
|
|
|
|
|
tcs_shader_data->info->nir->info.outputs_written;
|
|
|
|
|
tes_shader_data->key.tes.patch_inputs_read =
|
|
|
|
|
tcs_shader_data->info->nir->info.patch_outputs_written;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tes_shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_tes_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = tes_shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = tes_shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = tes_shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &tes_shader_data->key.tes,
|
|
|
|
|
.prog_data = &tes_shader_data->prog_data.tes,
|
|
|
|
|
.input_vue_map = tcs_shader_data ?
|
|
|
|
|
&tcs_shader_data->prog_data.tcs.base.vue_map : NULL,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
tes_shader_data->code = (void *)brw_compile_tes(compiler, ¶ms);
|
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_gs(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_gs_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &shader_data->key.gs,
|
|
|
|
|
.prog_data = &shader_data->prog_data.gs,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void *)brw_compile_gs(compiler, ¶ms);
|
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_task(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_task_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &shader_data->key.task,
|
|
|
|
|
.prog_data = &shader_data->prog_data.task,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void *)brw_compile_task(compiler, ¶ms);
|
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
2026-03-31 11:27:49 +03:00
|
|
|
wa_18019110168_load_provoking_vertex(nir_builder *b, void *data)
|
2024-08-07 23:32:23 +03:00
|
|
|
{
|
anv: implement inline parameter promotion from push constants
Push constants on bindless stages of Gfx12.5+ don't get the data
delivered in the registers automatically. Instead the shader needs to
load the data with SEND messages.
Those stages do get a single InlineParameter 32B block of data
delivered into the EU. We can use that to promote some of the push
constant data that has to be pulled otherwise.
The driver will try to promote all push constant data (app + driver
values) if it can, if it can't it'll try to promote only the driver
values (usually a shader will only use a few driver values). If even
the drivers values won't fit, give up and don't use the inline
parameter at all.
LNL internal fossil-db:
Totals from 315738 (20.08% of 1572649) affected shaders:
Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00%
CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02%
Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00%
Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03%
Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13%
Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28%
Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00%
Max dispatch width: 8298224 -> 8299168 (+0.01%)
Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
2026-01-16 17:00:26 +02:00
|
|
|
const struct anv_pipeline_bind_map *bind_map = data;
|
2026-03-31 11:27:49 +03:00
|
|
|
nir_def *val = NULL;
|
anv: implement inline parameter promotion from push constants
Push constants on bindless stages of Gfx12.5+ don't get the data
delivered in the registers automatically. Instead the shader needs to
load the data with SEND messages.
Those stages do get a single InlineParameter 32B block of data
delivered into the EU. We can use that to promote some of the push
constant data that has to be pulled otherwise.
The driver will try to promote all push constant data (app + driver
values) if it can, if it can't it'll try to promote only the driver
values (usually a shader will only use a few driver values). If even
the drivers values won't fit, give up and don't use the inline
parameter at all.
LNL internal fossil-db:
Totals from 315738 (20.08% of 1572649) affected shaders:
Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00%
CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02%
Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00%
Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03%
Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13%
Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28%
Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00%
Max dispatch width: 8298224 -> 8299168 (+0.01%)
Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
2026-01-16 17:00:26 +02:00
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < bind_map->inline_dwords_count; i++) {
|
2026-03-31 11:27:49 +03:00
|
|
|
if (bind_map->inline_dwords[i] == anv_drv_const_dword(gfx.wa_18019110168)) {
|
|
|
|
|
val = nir_load_inline_data_intel(
|
|
|
|
|
b, 1, 32, nir_imm_int(b, 0),
|
|
|
|
|
.base = i * 4);
|
|
|
|
|
break;
|
anv: implement inline parameter promotion from push constants
Push constants on bindless stages of Gfx12.5+ don't get the data
delivered in the registers automatically. Instead the shader needs to
load the data with SEND messages.
Those stages do get a single InlineParameter 32B block of data
delivered into the EU. We can use that to promote some of the push
constant data that has to be pulled otherwise.
The driver will try to promote all push constant data (app + driver
values) if it can, if it can't it'll try to promote only the driver
values (usually a shader will only use a few driver values). If even
the drivers values won't fit, give up and don't use the inline
parameter at all.
LNL internal fossil-db:
Totals from 315738 (20.08% of 1572649) affected shaders:
Instrs: 155053691 -> 154920901 (-0.09%); split: -0.09%, +0.00%
CodeSize: 2578204272 -> 2574991568 (-0.12%); split: -0.15%, +0.02%
Send messages: 8235628 -> 8184485 (-0.62%); split: -0.62%, +0.00%
Cycle count: 43911938816 -> 43901857748 (-0.02%); split: -0.05%, +0.03%
Spill count: 481329 -> 473185 (-1.69%); split: -1.82%, +0.13%
Fill count: 405617 -> 399243 (-1.57%); split: -1.86%, +0.28%
Max live registers: 34309395 -> 34309300 (-0.00%); split: -0.00%, +0.00%
Max dispatch width: 8298224 -> 8299168 (+0.01%)
Non SSA regs after NIR: 18492887 -> 17631285 (-4.66%); split: -4.73%, +0.08%
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
2026-01-16 17:00:26 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-31 11:27:49 +03:00
|
|
|
if (val == NULL) {
|
|
|
|
|
val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0),
|
|
|
|
|
.base = anv_drv_const_offset(gfx.wa_18019110168) -
|
|
|
|
|
bind_map->push_ranges[0].start * 32,
|
|
|
|
|
.range = anv_drv_const_size(gfx.wa_18019110168));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_iand_imm(b, val, ANV_WA_18019110168_PROVOKING_VERTEX_MASK);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
wa_18019110168_load_per_primitive_remap_table(nir_builder *b, void *data)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_pipeline_bind_map *bind_map = data;
|
|
|
|
|
nir_def *val = NULL;
|
|
|
|
|
|
|
|
|
|
val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0),
|
|
|
|
|
.base = anv_drv_const_offset(gfx.wa_18019110168) -
|
|
|
|
|
bind_map->push_ranges[0].start * 32,
|
|
|
|
|
.range = anv_drv_const_size(gfx.wa_18019110168));
|
|
|
|
|
|
|
|
|
|
return nir_iand_imm(b, val, ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK);
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_mesh(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *mesh_shader_data,
|
|
|
|
|
struct anv_shader_data *task_shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
nir_shader *nir = mesh_shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
mesh_shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_mesh_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = mesh_shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = mesh_shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = mesh_shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &mesh_shader_data->key.mesh,
|
|
|
|
|
.prog_data = &mesh_shader_data->prog_data.mesh,
|
|
|
|
|
.tue_map = task_shader_data ?
|
|
|
|
|
&task_shader_data->prog_data.task.map :
|
|
|
|
|
NULL,
|
2026-03-31 11:27:49 +03:00
|
|
|
.wa_18019110168_load_provoking_vertex =
|
|
|
|
|
wa_18019110168_load_provoking_vertex,
|
|
|
|
|
.wa_18019110168_data = (void *)&mesh_shader_data->bind_map,
|
2024-08-07 23:32:23 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
mesh_shader_data->code = (void *)brw_compile_mesh(compiler, ¶ms);
|
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_fs(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
2025-11-27 17:57:18 -05:00
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
2024-08-07 23:32:23 +03:00
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
/* When using Primitive Replication for multiview, each view gets its own
|
|
|
|
|
* position slot.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t pos_slots = shader_data->use_primitive_replication ?
|
|
|
|
|
MAX2(1, util_bitcount(shader_data->key.base.view_mask)) : 1;
|
|
|
|
|
|
|
|
|
|
struct intel_vue_map prev_vue_map;
|
|
|
|
|
brw_compute_vue_map(compiler->devinfo,
|
|
|
|
|
&prev_vue_map,
|
|
|
|
|
nir->info.inputs_read,
|
|
|
|
|
nir->info.separate_shader,
|
|
|
|
|
pos_slots);
|
|
|
|
|
|
|
|
|
|
struct brw_compile_fs_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
2026-02-06 15:17:52 -08:00
|
|
|
.key = &shader_data->key.fs,
|
2026-02-06 15:17:52 -08:00
|
|
|
.prog_data = &shader_data->prog_data.fs,
|
2024-08-07 23:32:23 +03:00
|
|
|
.mue_map = shader_data->mue_map,
|
|
|
|
|
|
|
|
|
|
.allow_spilling = true,
|
|
|
|
|
.max_polygons = UCHAR_MAX,
|
2026-03-31 11:27:49 +03:00
|
|
|
|
|
|
|
|
.wa_18019110168_load_per_primitive_remap_table_offset =
|
|
|
|
|
wa_18019110168_load_per_primitive_remap_table,
|
|
|
|
|
.wa_18019110168_data = (void *)&shader_data->bind_map,
|
2024-08-07 23:32:23 +03:00
|
|
|
};
|
|
|
|
|
|
2025-11-27 17:57:18 -05:00
|
|
|
if (intel_use_jay(devinfo, nir->info.stage)) {
|
|
|
|
|
struct jay_shader_bin *bin =
|
|
|
|
|
jay_compile(devinfo, mem_ctx, nir,
|
|
|
|
|
(union brw_any_prog_data *) params.prog_data,
|
|
|
|
|
(union brw_any_prog_key *) params.key);
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void *) bin->kernel;
|
|
|
|
|
} else {
|
|
|
|
|
shader_data->code = (void *) brw_compile_fs(compiler, ¶ms);
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
|
2026-02-06 15:17:52 -08:00
|
|
|
shader_data->num_stats = (uint32_t)!!shader_data->prog_data.fs.dispatch_multi +
|
|
|
|
|
(uint32_t)shader_data->prog_data.fs.dispatch_8 +
|
|
|
|
|
(uint32_t)shader_data->prog_data.fs.dispatch_16 +
|
|
|
|
|
(uint32_t)shader_data->prog_data.fs.dispatch_32;
|
2024-08-07 23:32:23 +03:00
|
|
|
assert(shader_data->num_stats <= ARRAY_SIZE(shader_data->stats));
|
|
|
|
|
|
|
|
|
|
/* Update the push constant padding range now that we know the amount of
|
|
|
|
|
* per-primitive data delivered in the payload.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(shader_data->bind_map.push_ranges); i++) {
|
|
|
|
|
if (shader_data->bind_map.push_ranges[i].set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING) {
|
|
|
|
|
shader_data->bind_map.push_ranges[i].length = MAX2(
|
2026-02-06 15:17:52 -08:00
|
|
|
shader_data->prog_data.fs.num_per_primitive_inputs / 2,
|
2024-08-07 23:32:23 +03:00
|
|
|
shader_data->bind_map.push_ranges[i].length);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_cs(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
2025-11-27 17:57:18 -05:00
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
2024-08-07 23:32:23 +03:00
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_cs_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &shader_data->key.cs,
|
|
|
|
|
.prog_data = &shader_data->prog_data.cs,
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-27 17:57:18 -05:00
|
|
|
if (intel_use_jay(devinfo, nir->info.stage)) {
|
|
|
|
|
struct jay_shader_bin *bin = jay_compile(devinfo, mem_ctx, nir,
|
|
|
|
|
(union brw_any_prog_data*)params.prog_data,
|
|
|
|
|
(union brw_any_prog_key*)params.key);
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void*)bin->kernel;
|
|
|
|
|
shader_data->stats[0] = bin->stats;
|
|
|
|
|
|
|
|
|
|
params.prog_data->local_size[0] = nir->info.workgroup_size[0];
|
|
|
|
|
params.prog_data->local_size[1] = nir->info.workgroup_size[1];
|
|
|
|
|
params.prog_data->local_size[2] = nir->info.workgroup_size[2];
|
|
|
|
|
} else {
|
|
|
|
|
shader_data->code = (void*)brw_compile_cs(compiler, ¶ms);
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
should_remat_cb(nir_instr *instr, void *data)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shader_compile_bs(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
struct anv_shader_data *shader_data,
|
|
|
|
|
char **error_str)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
|
|
|
|
|
|
|
|
struct brw_nir_lower_shader_calls_state lowering_state = {
|
|
|
|
|
.devinfo = devinfo,
|
|
|
|
|
.key = &shader_data->key.bs,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
nir_shader **resume_shaders = NULL;
|
|
|
|
|
uint32_t num_resume_shaders = 0;
|
|
|
|
|
if (nir->info.stage != MESA_SHADER_COMPUTE) {
|
2026-03-29 22:03:29 -07:00
|
|
|
struct brw_nir_vectorize_mem_cb_data vectorize_cb_data = {
|
|
|
|
|
.devinfo = devinfo,
|
|
|
|
|
};
|
2024-08-07 23:32:23 +03:00
|
|
|
const nir_lower_shader_calls_options opts = {
|
|
|
|
|
.address_format = nir_address_format_64bit_global,
|
|
|
|
|
.stack_alignment = BRW_BTD_STACK_ALIGN,
|
|
|
|
|
.localized_loads = true,
|
|
|
|
|
.vectorizer_callback = brw_nir_should_vectorize_mem,
|
2026-03-29 22:03:29 -07:00
|
|
|
.vectorizer_data = &vectorize_cb_data,
|
2024-08-07 23:32:23 +03:00
|
|
|
.should_remat_callback = should_remat_cb,
|
|
|
|
|
};
|
|
|
|
|
|
2025-10-23 22:38:26 -07:00
|
|
|
NIR_PASS(_, nir, brw_nir_lower_rt_intrinsics_pre_trace);
|
2024-08-07 23:32:23 +03:00
|
|
|
NIR_PASS(_, nir, nir_lower_shader_calls, &opts,
|
|
|
|
|
&resume_shaders, &num_resume_shaders, mem_ctx);
|
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_shader_calls, &lowering_state);
|
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_rt_intrinsics,
|
|
|
|
|
&shader_data->key.base, devinfo);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < num_resume_shaders; i++) {
|
|
|
|
|
NIR_PASS(_, resume_shaders[i], brw_nir_lower_shader_calls,
|
|
|
|
|
&lowering_state);
|
|
|
|
|
NIR_PASS(_, resume_shaders[i], brw_nir_lower_rt_intrinsics,
|
|
|
|
|
&shader_data->key.base, devinfo);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
|
|
|
|
|
|
struct brw_compile_bs_params params = {
|
|
|
|
|
.base = {
|
|
|
|
|
.nir = nir,
|
|
|
|
|
.stats = shader_data->stats,
|
|
|
|
|
.log_data = device,
|
|
|
|
|
.mem_ctx = mem_ctx,
|
|
|
|
|
.source_hash = shader_data->source_hash,
|
2024-05-10 13:46:40 -07:00
|
|
|
.archiver = shader_data->archiver,
|
2024-08-07 23:32:23 +03:00
|
|
|
},
|
|
|
|
|
.key = &shader_data->key.bs,
|
|
|
|
|
.prog_data = &shader_data->prog_data.bs,
|
|
|
|
|
.num_resume_shaders = num_resume_shaders,
|
|
|
|
|
.resume_shaders = resume_shaders,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
shader_data->code = (void *)brw_compile_bs(compiler, ¶ms);
|
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
|
|
|
|
{
|
|
|
|
|
assert(glsl_type_is_vector_or_scalar(type));
|
|
|
|
|
|
|
|
|
|
uint32_t comp_size = glsl_type_is_boolean(type)
|
|
|
|
|
? 4 : glsl_get_bit_size(type) / 8;
|
|
|
|
|
unsigned length = glsl_get_vector_elements(type);
|
|
|
|
|
*size = comp_size * length,
|
|
|
|
|
*align = comp_size * (length == 3 ? 4 : length);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2026-02-10 09:20:15 -08:00
|
|
|
anv_shader_compute_fragment_rts(const struct intel_device_info *devinfo,
|
2024-08-07 23:32:23 +03:00
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
struct anv_shader_data *shader_data)
|
|
|
|
|
{
|
|
|
|
|
assert(shader_data->bind_map.surface_count == 0);
|
|
|
|
|
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
2026-04-23 15:39:59 -07:00
|
|
|
const uint64_t rt_mask =
|
|
|
|
|
(nir->info.outputs_written &
|
|
|
|
|
~BITFIELD_BIT(FRAG_RESULT_DUAL_SRC_BLEND)) >> FRAG_RESULT_DATA0;
|
2024-08-07 23:32:23 +03:00
|
|
|
const unsigned num_rts = util_last_bit64(rt_mask);
|
|
|
|
|
struct anv_pipeline_binding rt_bindings[MAX_RTS];
|
|
|
|
|
|
2026-02-06 15:17:52 -08:00
|
|
|
shader_data->key.fs.nr_color_regions =
|
2026-01-12 14:07:57 +02:00
|
|
|
util_last_bit(rt_mask & rp_color_mask(state));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
if (num_rts > 0) {
|
|
|
|
|
for (unsigned rt = 0; rt < num_rts; rt++) {
|
|
|
|
|
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0 + rt)) {
|
|
|
|
|
rt_bindings[rt] = (struct anv_pipeline_binding) {
|
|
|
|
|
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
|
|
|
|
|
.index = rt,
|
|
|
|
|
.binding = UINT32_MAX,
|
|
|
|
|
};
|
|
|
|
|
} else {
|
|
|
|
|
/* Setup a null render target */
|
|
|
|
|
rt_bindings[rt] = (struct anv_pipeline_binding) {
|
|
|
|
|
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
|
|
|
|
|
.index = ANV_COLOR_OUTPUT_UNUSED,
|
|
|
|
|
.binding = UINT32_MAX,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
shader_data->bind_map.surface_count = num_rts;
|
2026-02-10 09:20:15 -08:00
|
|
|
} else if (brw_nir_fs_needs_null_rt(devinfo, nir,
|
2026-02-06 15:17:52 -08:00
|
|
|
shader_data->key.fs.alpha_to_coverage != INTEL_NEVER)) {
|
2024-08-07 23:32:23 +03:00
|
|
|
/* Setup a null render target */
|
|
|
|
|
rt_bindings[0] = (struct anv_pipeline_binding) {
|
|
|
|
|
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
|
|
|
|
|
.index = ANV_COLOR_OUTPUT_DISABLED,
|
|
|
|
|
.binding = UINT32_MAX,
|
|
|
|
|
};
|
|
|
|
|
shader_data->bind_map.surface_count = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typed_memcpy(shader_data->bind_map.surface_to_descriptor,
|
|
|
|
|
rt_bindings, shader_data->bind_map.surface_count);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
accept_64bit_atomic_cb(const nir_intrinsic_instr *intrin, const void *data)
|
|
|
|
|
{
|
|
|
|
|
return (intrin->intrinsic == nir_intrinsic_image_atomic ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_image_atomic_swap ||
|
2025-08-14 12:47:51 +03:00
|
|
|
intrin->intrinsic == nir_intrinsic_image_heap_atomic ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_image_heap_atomic_swap ||
|
2024-08-07 23:32:23 +03:00
|
|
|
intrin->intrinsic == nir_intrinsic_image_deref_atomic ||
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_image_deref_atomic_swap) &&
|
|
|
|
|
intrin->def.bit_size == 64;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
lower_non_tg4_non_uniform_offsets(const nir_tex_instr *tex,
|
|
|
|
|
unsigned index, void *data)
|
|
|
|
|
{
|
|
|
|
|
/* HW cannot deal with divergent surfaces/samplers */
|
|
|
|
|
if (tex->src[index].src_type == nir_tex_src_texture_offset ||
|
|
|
|
|
tex->src[index].src_type == nir_tex_src_texture_handle ||
|
2025-08-14 12:47:51 +03:00
|
|
|
tex->src[index].src_type == nir_tex_src_texture_heap_offset ||
|
2024-08-07 23:32:23 +03:00
|
|
|
tex->src[index].src_type == nir_tex_src_sampler_offset ||
|
2025-08-14 12:47:51 +03:00
|
|
|
tex->src[index].src_type == nir_tex_src_sampler_handle ||
|
|
|
|
|
tex->src[index].src_type == nir_tex_src_sampler_heap_offset)
|
2024-08-07 23:32:23 +03:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (tex->src[index].src_type == nir_tex_src_offset) {
|
|
|
|
|
/* HW can deal with TG4 divergent offsets only */
|
|
|
|
|
return tex->op != nir_texop_tg4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
fixup_large_workgroup_image_coherency(nir_shader *nir)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_function_impl(impl, nir) {
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
|
|
if (intr->intrinsic != nir_intrinsic_image_deref_store ||
|
|
|
|
|
nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_3D)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* We have found image store access to 3D. */
|
|
|
|
|
nir_deref_instr *array_deref = nir_src_as_deref(intr->src[0]);
|
|
|
|
|
if (array_deref->deref_type != nir_deref_type_array)
|
|
|
|
|
continue;
|
|
|
|
|
|
2025-11-07 21:38:36 +08:00
|
|
|
nir_alu_instr *alu = nir_src_as_alu(intr->src[1]);
|
2024-08-07 23:32:23 +03:00
|
|
|
if (!alu || !nir_op_is_vec(alu->op))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* Check if any src is from @load_local_invocation_id. */
|
|
|
|
|
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
2025-11-07 21:38:36 +08:00
|
|
|
nir_instr *parent = nir_def_instr(alu->src[i].src.ssa);
|
2024-08-07 23:32:23 +03:00
|
|
|
if (parent->type != nir_instr_type_intrinsic)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
nir_intrinsic_instr *parent_intr = nir_instr_as_intrinsic(parent);
|
|
|
|
|
if (parent_intr->intrinsic !=
|
|
|
|
|
nir_intrinsic_load_local_invocation_id)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Found a match, change image access qualifier coherent. */
|
|
|
|
|
nir_deref_instr *parent_deref =
|
|
|
|
|
nir_src_as_deref(array_deref->parent);
|
|
|
|
|
parent_deref->var->data.access = ACCESS_COHERENT;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
} /* instr */
|
|
|
|
|
} /* block */
|
|
|
|
|
} /* func */
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-01 12:49:39 +03:00
|
|
|
static void
|
|
|
|
|
cleanup_nir(nir_shader *nir)
|
|
|
|
|
{
|
|
|
|
|
/* First run copy-prop to get rid of all of the vec() that address
|
|
|
|
|
* calculations often create and then constant-fold so that, when we get to
|
|
|
|
|
* anv_nir_lower_ubo_loads, we can detect constant offsets.
|
|
|
|
|
*/
|
|
|
|
|
bool progress;
|
|
|
|
|
do {
|
|
|
|
|
progress = false;
|
|
|
|
|
NIR_PASS(progress, nir, nir_opt_algebraic);
|
|
|
|
|
NIR_PASS(progress, nir, nir_opt_copy_prop);
|
|
|
|
|
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
|
|
|
|
NIR_PASS(progress, nir, nir_opt_dce);
|
|
|
|
|
} while (progress);
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
static void
|
|
|
|
|
anv_shader_lower_nir(struct anv_device *device,
|
|
|
|
|
void *mem_ctx,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
struct anv_shader_data *shader_data)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_physical_device *pdevice = device->physical;
|
|
|
|
|
const struct brw_compiler *compiler = pdevice->compiler;
|
2026-02-10 09:18:48 -08:00
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
2024-08-07 23:32:23 +03:00
|
|
|
struct anv_descriptor_set_layout * const *set_layouts =
|
|
|
|
|
(struct anv_descriptor_set_layout * const *) shader_data->info->set_layouts;
|
|
|
|
|
const uint32_t set_layout_count = shader_data->info->set_layout_count;
|
|
|
|
|
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
/* Workaround for apps that need fp64 support */
|
|
|
|
|
if (device->fp64_nir) {
|
2026-01-16 09:48:45 +02:00
|
|
|
nir_shader *fp64_nir = anv_ensure_fp64_shader(device);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_doubles, fp64_nir,
|
2024-08-07 23:32:23 +03:00
|
|
|
nir->options->lower_doubles_options);
|
|
|
|
|
|
|
|
|
|
bool fp_conv = false;
|
|
|
|
|
NIR_PASS(fp_conv, nir, nir_lower_int64_float_conversions);
|
|
|
|
|
if (fp_conv) {
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_algebraic);
|
2026-01-16 09:48:45 +02:00
|
|
|
NIR_PASS(_, nir, nir_lower_doubles, fp64_nir,
|
2024-08-07 23:32:23 +03:00
|
|
|
nir->options->lower_doubles_options);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
|
|
|
|
pdevice->instance->large_workgroup_non_coherent_image_workaround) {
|
|
|
|
|
const unsigned local_size = nir->info.workgroup_size[0] *
|
|
|
|
|
nir->info.workgroup_size[1] *
|
|
|
|
|
nir->info.workgroup_size[2];
|
|
|
|
|
if (local_size == 64)
|
|
|
|
|
fixup_large_workgroup_image_coherency(nir);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_wpos_center);
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_input_attachments,
|
2025-11-20 17:26:01 -05:00
|
|
|
&(nir_input_attachment_options) { });
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
2025-12-05 23:21:23 +02:00
|
|
|
(shader_data->info->flags & VK_SHADER_CREATE_UNALIGNED_DISPATCH_BIT_MESA)) {
|
2024-08-07 23:32:23 +03:00
|
|
|
NIR_PASS(_, nir, anv_nir_lower_unaligned_dispatch);
|
|
|
|
|
/* anv_nir_lower_unaligned_dispatch pass uses nir_jump_return that we
|
|
|
|
|
* need to lower it.
|
|
|
|
|
*/
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_returns);
|
|
|
|
|
/* Lower load_base_workgroup_id inserted by unaligned_dispatch */
|
|
|
|
|
nir_lower_compute_system_values_options lower_csv_options = {
|
|
|
|
|
.has_base_workgroup_id = true,
|
|
|
|
|
};
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_compute_system_values, &lower_csv_options);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mesa_shader_stage_is_mesh(nir->info.stage)) {
|
|
|
|
|
nir_lower_compute_system_values_options options = {
|
|
|
|
|
.lower_workgroup_id_to_index = true,
|
|
|
|
|
/* nir_lower_idiv generates expensive code */
|
2026-02-10 09:18:48 -08:00
|
|
|
.shortcut_1d_workgroup_id = devinfo->verx10 >= 125,
|
2024-08-07 23:32:23 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_compute_system_values, &options);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion, shader_data->info);
|
|
|
|
|
|
|
|
|
|
if (nir->info.stage <= MESA_SHADER_FRAGMENT) {
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_lower_multiview,
|
|
|
|
|
shader_data->key.base.view_mask,
|
|
|
|
|
shader_data->use_primitive_replication);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
|
|
|
|
nir->info.cs.has_cooperative_matrix) {
|
2025-11-28 09:42:47 +02:00
|
|
|
anv_fixup_subgroup_size(device, nir);
|
2025-09-09 18:24:08 +02:00
|
|
|
NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.api_subgroup_size);
|
2026-02-13 20:56:57 -08:00
|
|
|
|
|
|
|
|
/* Lowering of nir_instr_type_cmat_call will produce new
|
|
|
|
|
* nir_instr_type_call instructions that need to be inlined.
|
|
|
|
|
*/
|
|
|
|
|
bool inlined = false;
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
|
|
|
NIR_PASS(inlined, nir, nir_inline_functions);
|
|
|
|
|
nir_remove_non_entrypoints(nir);
|
2026-04-13 15:49:57 -07:00
|
|
|
|
2026-02-13 20:56:57 -08:00
|
|
|
if (inlined) {
|
2026-04-13 15:49:57 -07:00
|
|
|
/* Some shader_temp vars may have remained multi-function before
|
|
|
|
|
* cmat lowering/inlining. Now that everything was inlined,
|
|
|
|
|
* they may be lowered to locals.
|
|
|
|
|
*/
|
|
|
|
|
bool lowered_globals = false;
|
|
|
|
|
NIR_PASS(lowered_globals, nir, nir_lower_global_vars_to_local);
|
|
|
|
|
if (lowered_globals)
|
|
|
|
|
NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
|
2026-02-13 20:56:57 -08:00
|
|
|
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_copy_prop);
|
|
|
|
|
}
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_deref);
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
|
|
|
|
2025-11-16 11:55:21 -05:00
|
|
|
NIR_PASS(_, nir, nir_lower_indirect_derefs_to_if_else_trees,
|
|
|
|
|
nir_var_function_temp, 16);
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
|
|
|
|
2026-03-13 16:15:01 -07:00
|
|
|
NIR_PASS(_, nir, nir_lower_memory_model);
|
|
|
|
|
|
2025-11-12 16:04:16 +02:00
|
|
|
/* Apply lowering for 64bit atomics pre-Xe2 */
|
2026-02-10 09:18:48 -08:00
|
|
|
const bool lower_64bit_atomics = devinfo->ver < 20;
|
2025-11-12 16:04:16 +02:00
|
|
|
|
|
|
|
|
if (lower_64bit_atomics) {
|
|
|
|
|
/* Ensure robustness, do this before brw_nir_lower_storage_image so that
|
|
|
|
|
* added image size intrinsics for bounds checkings are properly lowered
|
|
|
|
|
* for cube images.
|
|
|
|
|
*/
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_robust_access,
|
|
|
|
|
accept_64bit_atomic_cb, NULL);
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler,
|
|
|
|
|
&(struct brw_nir_lower_storage_image_opts) {
|
|
|
|
|
/* Anv only supports Gfx9+ which has better defined typed read
|
|
|
|
|
* behavior. It allows us to only have to care about lowering
|
|
|
|
|
* loads.
|
|
|
|
|
*/
|
|
|
|
|
.lower_loads = true,
|
|
|
|
|
.lower_stores_64bit = true,
|
|
|
|
|
.lower_loads_without_formats =
|
|
|
|
|
pdevice->instance->emulate_read_without_format,
|
|
|
|
|
});
|
|
|
|
|
|
2025-11-12 16:04:16 +02:00
|
|
|
if (lower_64bit_atomics) {
|
|
|
|
|
/* Switch from image to global */
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_image_atomics_to_global,
|
|
|
|
|
accept_64bit_atomic_cb, NULL);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-11-12 16:04:16 +02:00
|
|
|
/* Detile for global */
|
2026-02-10 09:18:48 -08:00
|
|
|
NIR_PASS(_, nir, brw_nir_lower_texel_address, devinfo,
|
2025-11-12 16:04:16 +02:00
|
|
|
pdevice->isl_dev.shader_tiling);
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2026-04-16 23:15:21 +03:00
|
|
|
/* Lower push constants variables prior to global realignment for CBV
|
|
|
|
|
* resources, it makes identifying a 64bit pointer from the push constants
|
|
|
|
|
* easier.
|
|
|
|
|
*/
|
2024-08-07 23:32:23 +03:00
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
|
|
|
|
|
nir_address_format_32bit_offset);
|
|
|
|
|
|
2026-04-16 23:15:21 +03:00
|
|
|
/* Realign pointers to CBV on stages that can promote to push buffers. */
|
|
|
|
|
if (pdevice->instance->promote_cbv_to_push_buffers &&
|
|
|
|
|
nir->info.stage <= MESA_SHADER_FRAGMENT) {
|
|
|
|
|
/* Cleanup for the analysis, we don't want any ALU */
|
|
|
|
|
cleanup_nir(nir);
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_realign_cbv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
|
|
|
|
|
nir_address_format_64bit_global);
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
|
|
|
|
|
|
|
|
|
|
shader_data->push_desc_info.used_descriptors =
|
|
|
|
|
anv_nir_compute_used_push_descriptors(
|
|
|
|
|
nir, set_layouts, set_layout_count);
|
|
|
|
|
|
|
|
|
|
/* Need to have render targets placed first in the bind_map */
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
2026-02-10 09:20:15 -08:00
|
|
|
anv_shader_compute_fragment_rts(devinfo, state, shader_data);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
2025-11-26 16:30:38 +02:00
|
|
|
uint32_t dynamic_descriptors_offset = 0;
|
|
|
|
|
uint32_t dynamic_descriptors_offsets[MAX_SETS] = {};
|
|
|
|
|
for (uint32_t i = 0; i < set_layout_count; i++) {
|
|
|
|
|
dynamic_descriptors_offsets[i] = dynamic_descriptors_offset;
|
2025-12-10 22:13:25 +02:00
|
|
|
if (set_layouts[i] != NULL) {
|
|
|
|
|
shader_data->bind_map.binding_mask |= ANV_PIPELINE_BIND_MASK_SET(i);
|
|
|
|
|
const uint32_t dyn_desc_count =
|
|
|
|
|
set_layouts[i]->vk.dynamic_descriptor_count;
|
|
|
|
|
shader_data->bind_map.dynamic_descriptors[i] = dyn_desc_count;
|
|
|
|
|
dynamic_descriptors_offset += dyn_desc_count;
|
|
|
|
|
}
|
2025-11-26 16:30:38 +02:00
|
|
|
}
|
|
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
/* Apply the actual layout to UBOs, SSBOs, and textures */
|
|
|
|
|
if (shader_data->info->flags & VK_SHADER_CREATE_DESCRIPTOR_HEAP_BIT_EXT) {
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_lower_descriptor_heap, device,
|
|
|
|
|
shader_data->info->embedded_sampler_count,
|
|
|
|
|
shader_data->info->embedded_samplers,
|
|
|
|
|
&shader_data->bind_map);
|
|
|
|
|
} else {
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_apply_pipeline_layout,
|
2024-08-07 23:32:23 +03:00
|
|
|
pdevice, shader_data->key.base.robust_flags,
|
2025-11-26 16:30:38 +02:00
|
|
|
set_layouts, set_layout_count,
|
|
|
|
|
(shader_data->info->flags &
|
2026-02-19 12:09:54 +01:00
|
|
|
VK_SHADER_CREATE_INDEPENDENT_SETS_BIT_KHR) ? NULL :
|
2025-11-26 16:30:38 +02:00
|
|
|
dynamic_descriptors_offsets,
|
2024-06-05 15:48:55 +03:00
|
|
|
shader_data->info->flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT,
|
2024-08-07 23:32:23 +03:00
|
|
|
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
|
2025-08-14 12:47:51 +03:00
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
|
|
|
|
anv_nir_ubo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
|
|
|
|
anv_nir_ssbo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
|
|
|
|
|
2026-04-01 12:49:39 +03:00
|
|
|
|
|
|
|
|
cleanup_nir(nir);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
|
|
|
|
|
|
|
|
|
cleanup_nir(nir);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
/* Required for nir_divergence_analysis() which is needed for
|
|
|
|
|
* anv_nir_lower_ubo_loads.
|
|
|
|
|
*/
|
|
|
|
|
NIR_PASS(_, nir, nir_convert_to_lcssa, true, true);
|
|
|
|
|
nir_divergence_analysis(nir);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_remove_phis);
|
|
|
|
|
|
2025-10-16 17:16:27 +03:00
|
|
|
const bool lower_non_uniform_texture_offsets = device->info->ver < 20;
|
|
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
const enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
|
2024-08-07 23:32:23 +03:00
|
|
|
nir_lower_non_uniform_texture_access |
|
2026-02-06 14:01:06 -05:00
|
|
|
nir_lower_non_uniform_texture_query |
|
2024-08-07 23:32:23 +03:00
|
|
|
nir_lower_non_uniform_image_access |
|
2026-02-06 14:01:06 -05:00
|
|
|
nir_lower_non_uniform_image_query |
|
2025-10-16 17:16:27 +03:00
|
|
|
nir_lower_non_uniform_get_ssbo_size |
|
|
|
|
|
(lower_non_uniform_texture_offsets ?
|
|
|
|
|
nir_lower_non_uniform_texture_offset_access : 0);
|
|
|
|
|
|
|
|
|
|
/* Pre-Xe2 platforms don't have native support for dynamic programmable
|
|
|
|
|
* offsets. Since support includes non-uniform programmable offsets, we
|
|
|
|
|
* need to lower those texture messages in the same way we lower
|
|
|
|
|
* non-uniform texture/sampler handles.
|
|
|
|
|
*/
|
2025-08-14 12:47:51 +03:00
|
|
|
if (lower_non_uniform_texture_offsets)
|
|
|
|
|
nir_divergence_analysis(nir);
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
/* In practice, most shaders do not have non-uniform-qualified
|
|
|
|
|
* accesses (see
|
|
|
|
|
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
|
|
|
|
|
* thus a cheaper and likely to fail check is run first.
|
|
|
|
|
*/
|
|
|
|
|
if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_non_uniform_access);
|
|
|
|
|
|
|
|
|
|
/* We don't support non-uniform UBOs and non-uniform SSBO access is
|
|
|
|
|
* handled naturally by falling back to A64 messages.
|
|
|
|
|
*/
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_non_uniform_access,
|
|
|
|
|
&(nir_lower_non_uniform_access_options) {
|
|
|
|
|
.types = lower_non_uniform_access_types,
|
|
|
|
|
.tex_src_callback = lower_non_tg4_non_uniform_offsets,
|
|
|
|
|
.callback = NULL,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, intel_nir_lower_non_uniform_resource_intel);
|
|
|
|
|
NIR_PASS(_, nir, intel_nir_cleanup_resource_intel);
|
|
|
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mesa_shader_stage_uses_workgroup(nir->info.stage)) {
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
|
|
|
|
nir_var_mem_shared, shared_type_info);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io,
|
|
|
|
|
nir_var_mem_shared, nir_address_format_32bit_offset);
|
|
|
|
|
|
|
|
|
|
if (nir->info.zero_initialize_shared_memory &&
|
|
|
|
|
nir->info.shared_size > 0) {
|
|
|
|
|
/* The effective Shared Local Memory size is at least 1024 bytes and
|
|
|
|
|
* is always rounded to a power of two, so it is OK to align the size
|
|
|
|
|
* used by the shader to chunk_size -- which does simplify the logic.
|
|
|
|
|
*/
|
|
|
|
|
const unsigned chunk_size = 16;
|
2025-11-11 15:33:54 +08:00
|
|
|
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
|
2024-08-07 23:32:23 +03:00
|
|
|
assert(shared_size <=
|
2026-02-10 09:18:48 -08:00
|
|
|
intel_compute_slm_calculate_size(devinfo->ver,
|
2024-08-07 23:32:23 +03:00
|
|
|
nir->info.shared_size));
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
|
|
|
|
|
shared_size, chunk_size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-12 13:18:41 -08:00
|
|
|
if (mesa_shader_stage_is_compute(nir->info.stage)) {
|
2026-02-10 09:18:48 -08:00
|
|
|
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, devinfo,
|
2024-08-07 23:32:23 +03:00
|
|
|
&shader_data->prog_data.cs);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-17 11:37:22 +02:00
|
|
|
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
|
|
|
|
|
|
2026-02-13 11:47:12 +02:00
|
|
|
NIR_PASS(_, nir, anv_nir_shrink_push_constant_ranges);
|
|
|
|
|
|
2025-12-17 11:37:22 +02:00
|
|
|
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
|
|
|
|
pdevice, shader_data->key.base.robust_flags,
|
|
|
|
|
&(struct anv_nir_push_layout_info) {
|
|
|
|
|
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
|
|
|
|
shader_data->key.tcs.separate_tess_vue_layout) ||
|
|
|
|
|
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
|
|
|
|
shader_data->key.tes.separate_tess_vue_layout),
|
|
|
|
|
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
2026-02-06 15:17:52 -08:00
|
|
|
brw_fs_prog_key_is_dynamic(&shader_data->key.fs),
|
2025-12-17 11:37:22 +02:00
|
|
|
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
2026-02-06 15:17:52 -08:00
|
|
|
shader_data->key.fs.mesh_input == INTEL_SOMETIMES,
|
2025-12-17 11:37:22 +02:00
|
|
|
},
|
|
|
|
|
&shader_data->key.base,
|
|
|
|
|
&shader_data->prog_data.base,
|
2026-02-09 11:17:22 +02:00
|
|
|
&shader_data->bind_map, &shader_data->push_map);
|
2025-12-17 11:37:22 +02:00
|
|
|
|
2025-08-14 12:47:51 +03:00
|
|
|
if (!(shader_data->info->flags & VK_SHADER_CREATE_DESCRIPTOR_HEAP_BIT_EXT)) {
|
|
|
|
|
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
|
|
|
|
shader_data->bind_map.layout_type);
|
|
|
|
|
}
|
2025-12-17 11:37:22 +02:00
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
shader_data->push_desc_info.push_set_buffer =
|
|
|
|
|
anv_nir_loads_push_desc_buffer(
|
|
|
|
|
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
|
|
|
|
shader_data->push_desc_info.fully_promoted_ubo_descriptors =
|
|
|
|
|
anv_nir_push_desc_ubo_fully_promoted(
|
|
|
|
|
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
2026-03-03 10:54:13 +02:00
|
|
|
|
|
|
|
|
/* Only detected clearing compute shaders, these are the only problematic
|
|
|
|
|
* cases we're aware of.
|
|
|
|
|
*/
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE)
|
|
|
|
|
shader_data->bind_map.inferred_behavior = anv_nir_clear_shader_analysis(nir);
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
|
sets_layout_embedded_sampler_count(const struct vk_shader_compile_info *info)
|
|
|
|
|
{
|
|
|
|
|
uint32_t count = 0;
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < info->set_layout_count; s++) {
|
|
|
|
|
if (info->set_layouts[s] == NULL)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const struct anv_descriptor_set_layout *layout =
|
|
|
|
|
(const struct anv_descriptor_set_layout *) info->set_layouts[s];
|
|
|
|
|
count += layout->embedded_sampler_count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shaders_pre_lower_gfx(struct anv_device *device,
|
|
|
|
|
struct anv_shader_data *shaders_data,
|
|
|
|
|
uint32_t shader_count,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
void *mem_ctx)
|
|
|
|
|
{
|
|
|
|
|
const struct intel_device_info *devinfo = device->info;
|
|
|
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
|
|
|
|
|
|
/* Walk backwards to link */
|
|
|
|
|
struct anv_shader_data *next_stage = NULL;
|
|
|
|
|
for (int s = shader_count - 1; s >= 0; s--) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
|
|
|
|
|
|
if (next_stage == NULL) {
|
|
|
|
|
next_stage = shader_data;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (info->stage) {
|
|
|
|
|
case MESA_SHADER_VERTEX:
|
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
|
|
|
|
case MESA_SHADER_TASK:
|
|
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
|
|
|
anv_shader_link(compiler, info, next_stage->info);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TESS_CTRL:
|
|
|
|
|
anv_shader_link_tcs(compiler,
|
|
|
|
|
&shader_data->key.tcs,
|
|
|
|
|
info, next_stage->info);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_MESH:
|
|
|
|
|
anv_shader_link(compiler, info, next_stage->info);
|
|
|
|
|
next_stage->mue_map = &shader_data->prog_data.mesh.map;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("Invalid graphics shader stage");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
next_stage = shader_data;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool use_primitive_replication = false;
|
|
|
|
|
if (devinfo->ver >= 12 && shaders_data[0].key.base.view_mask != 0) {
|
|
|
|
|
/* For some pipelines HW Primitive Replication can be used instead of
|
|
|
|
|
* instancing to implement Multiview. This depend on how viewIndex is
|
|
|
|
|
* used in all the active shaders, so this check can't be done per
|
|
|
|
|
* individual shaders.
|
|
|
|
|
*/
|
|
|
|
|
nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
|
|
|
|
|
VkShaderStageFlags vk_stages = 0;
|
|
|
|
|
for (unsigned s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
shaders[shader_data->info->stage] = shader_data->info->nir;
|
|
|
|
|
vk_stages |= mesa_to_vk_shader_stage(shader_data->info->stage);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
use_primitive_replication =
|
|
|
|
|
anv_check_for_primitive_replication(device, vk_stages, shaders,
|
|
|
|
|
shaders_data[0].key.base.view_mask);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
shader_data->use_primitive_replication = use_primitive_replication;
|
|
|
|
|
shader_data->instance_multiplier =
|
|
|
|
|
(shader_data->key.base.view_mask && !use_primitive_replication) ?
|
|
|
|
|
util_bitcount(shader_data->key.base.view_mask) : 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shaders_post_lower_gfx(struct anv_device *device,
|
|
|
|
|
struct anv_shader_data *shaders_data,
|
|
|
|
|
uint32_t shader_count,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state)
|
|
|
|
|
{
|
|
|
|
|
struct vk_shader_compile_info *prev_stage = NULL;
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
|
|
|
|
|
|
struct shader_info *cur_info = &shader_data->info->nir->info;
|
|
|
|
|
|
2025-09-17 11:35:38 -04:00
|
|
|
if (prev_stage && info->stage < MESA_SHADER_FRAGMENT) {
|
2024-08-07 23:32:23 +03:00
|
|
|
struct shader_info *prev_info = &prev_stage->nir->info;
|
|
|
|
|
|
|
|
|
|
prev_info->outputs_written |= cur_info->inputs_read &
|
|
|
|
|
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
|
|
|
|
cur_info->inputs_read |= prev_info->outputs_written &
|
|
|
|
|
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
|
|
|
|
prev_info->patch_outputs_written |= cur_info->patch_inputs_read;
|
|
|
|
|
cur_info->patch_inputs_read |= prev_info->patch_outputs_written;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
prev_stage = info;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_shaders_post_lower_rt(struct anv_device *device,
|
|
|
|
|
struct anv_shader_data *shaders_data,
|
|
|
|
|
uint32_t shader_count)
|
|
|
|
|
{
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
|
|
|
|
|
|
switch (nir->info.stage) {
|
|
|
|
|
case MESA_SHADER_RAYGEN:
|
|
|
|
|
brw_nir_lower_raygen(nir, device->info);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case MESA_SHADER_ANY_HIT:
|
|
|
|
|
brw_nir_lower_any_hit(nir, device->info);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
|
|
|
brw_nir_lower_closest_hit(nir, device->info);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case MESA_SHADER_MISS:
|
|
|
|
|
brw_nir_lower_miss(nir, device->info);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case MESA_SHADER_CALLABLE:
|
|
|
|
|
brw_nir_lower_callable(nir, device->info);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case MESA_SHADER_INTERSECTION:
|
|
|
|
|
/* Nothing to do, we merge this into ANY_HIT */
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("invalid stage");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static VkShaderStageFlags
|
|
|
|
|
anv_shader_get_rt_group_linking(struct vk_physical_device *device,
|
|
|
|
|
VkShaderStageFlags stages)
|
|
|
|
|
{
|
|
|
|
|
const VkShaderStageFlags any_hit_intersection =
|
|
|
|
|
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
|
|
|
|
VK_SHADER_STAGE_INTERSECTION_BIT_KHR;
|
|
|
|
|
|
|
|
|
|
return (stages & any_hit_intersection) == any_hit_intersection ?
|
|
|
|
|
any_hit_intersection : 0;
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-10 13:46:40 -07:00
|
|
|
static void
|
|
|
|
|
anv_debug_archiver_init(void *mem_ctx, struct anv_shader_data *shaders_data,
|
|
|
|
|
uint32_t shader_count)
|
|
|
|
|
{
|
|
|
|
|
/* A hash is used to identify the per stage archive file. Just using the
|
|
|
|
|
* single stage hash/key is sufficient if it is not linked. If shaders
|
|
|
|
|
* are linked together, also include a combined hash of all stages to
|
|
|
|
|
* distinguish from the not linked case.
|
|
|
|
|
*/
|
2026-03-12 20:08:17 -04:00
|
|
|
unsigned char linked_hash[BLAKE3_KEY_LEN];
|
2024-05-10 13:46:40 -07:00
|
|
|
if (shader_count > 1) {
|
2026-03-12 20:34:57 -04:00
|
|
|
blake3_hasher ctx;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_init(&ctx);
|
2024-05-10 13:46:40 -07:00
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, info->nir->info.source_blake3, BLAKE3_OUT_LEN);
|
|
|
|
|
_mesa_blake3_update(&ctx, &shader_data->key, shader_data->key_size);
|
2024-05-10 13:46:40 -07:00
|
|
|
}
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_final(&ctx, linked_hash);
|
2024-05-10 13:46:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
|
|
2026-03-12 20:08:55 -04:00
|
|
|
char name[BLAKE3_HEX_LEN + 4] = {};
|
2024-05-10 13:46:40 -07:00
|
|
|
{
|
2026-03-12 20:34:57 -04:00
|
|
|
blake3_hasher ctx;
|
2026-03-12 20:08:17 -04:00
|
|
|
unsigned char hash[BLAKE3_KEY_LEN];
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_init(&ctx);
|
|
|
|
|
_mesa_blake3_update(&ctx, info->nir->info.source_blake3, BLAKE3_OUT_LEN);
|
|
|
|
|
_mesa_blake3_update(&ctx, &shader_data->key, shader_data->key_size);
|
2024-05-10 13:46:40 -07:00
|
|
|
if (shader_count > 1)
|
2026-03-12 21:31:29 -04:00
|
|
|
_mesa_blake3_update(&ctx, linked_hash, BLAKE3_KEY_LEN);
|
|
|
|
|
_mesa_blake3_final(&ctx, hash);
|
2024-05-10 13:46:40 -07:00
|
|
|
|
2026-03-12 21:45:16 -04:00
|
|
|
_mesa_blake3_format(name, hash);
|
2024-05-10 13:46:40 -07:00
|
|
|
}
|
2026-03-12 20:08:55 -04:00
|
|
|
memcpy(&name[BLAKE3_HEX_LEN - 1], ".anv", 4);
|
2024-05-10 13:46:40 -07:00
|
|
|
|
|
|
|
|
shader_data->archiver =
|
|
|
|
|
debug_archiver_open(mem_ctx, name, PACKAGE_VERSION MESA_GIT_SHA1);
|
|
|
|
|
|
2025-09-19 15:19:08 -07:00
|
|
|
if (shader_data->archiver) {
|
|
|
|
|
debug_archiver_set_prefix(shader_data->archiver,
|
2024-05-10 13:46:40 -07:00
|
|
|
_mesa_shader_stage_to_abbrev(info->stage));
|
2025-09-19 15:19:08 -07:00
|
|
|
}
|
2024-05-10 13:46:40 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
anv_debug_archiver_finish(struct anv_shader_data *shaders_data,
|
|
|
|
|
uint32_t shader_count)
|
|
|
|
|
{
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
debug_archiver_close(shader_data->archiver);
|
|
|
|
|
shader_data->archiver = NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
static VkResult
|
|
|
|
|
anv_shader_compile(struct vk_device *vk_device,
|
|
|
|
|
uint32_t shader_count,
|
|
|
|
|
struct vk_shader_compile_info *infos,
|
|
|
|
|
const struct vk_graphics_pipeline_state *state,
|
|
|
|
|
const struct vk_features *enabled_features,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator,
|
|
|
|
|
struct vk_shader **shaders_out)
|
|
|
|
|
{
|
|
|
|
|
struct anv_device *device =
|
|
|
|
|
container_of(vk_device, struct anv_device, vk);
|
|
|
|
|
VkResult result = VK_SUCCESS;
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < shader_count; i++)
|
|
|
|
|
shaders_out[i] = NULL;
|
|
|
|
|
|
|
|
|
|
void *mem_ctx = ralloc_context(NULL);
|
|
|
|
|
|
|
|
|
|
struct anv_shader_data *shaders_data =
|
|
|
|
|
rzalloc_array(mem_ctx, struct anv_shader_data, shader_count);
|
|
|
|
|
assert(shader_count < MAX2(ANV_GRAPHICS_SHADER_STAGE_COUNT,
|
|
|
|
|
ANV_RT_SHADER_STAGE_COUNT));
|
|
|
|
|
|
|
|
|
|
/* Order the stages (no guarantee from the runtime) */
|
|
|
|
|
struct vk_shader_compile_info *ordered_infos[MESA_SHADER_KERNEL] = { 0 };
|
|
|
|
|
struct vk_shader **ordered_shaders_out[MESA_SHADER_KERNEL] = { 0 };
|
|
|
|
|
VkShaderStageFlags stages = 0;
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
ordered_infos[infos[s].stage] = &infos[s];
|
|
|
|
|
ordered_shaders_out[infos[s].stage] = &shaders_out[s];
|
|
|
|
|
|
|
|
|
|
/* The runtime transfers the ownership of the NIR to us, so we need to
|
|
|
|
|
* free it after compile.
|
|
|
|
|
*/
|
|
|
|
|
ralloc_steal(mem_ctx, infos[s].nir);
|
|
|
|
|
stages |= mesa_to_vk_shader_stage(infos[s].stage);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
#define ADD_SHADER(_name) \
|
|
|
|
|
do { \
|
|
|
|
|
if (ordered_infos[MESA_SHADER_##_name]) { \
|
|
|
|
|
shaders_data[remapped_index].info = \
|
|
|
|
|
ordered_infos[MESA_SHADER_##_name]; \
|
|
|
|
|
shaders_data[remapped_index].shader_out = \
|
|
|
|
|
ordered_shaders_out[MESA_SHADER_##_name]; \
|
|
|
|
|
remapped_index++; \
|
|
|
|
|
} \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
uint32_t remapped_index = 0;
|
|
|
|
|
ADD_SHADER(COMPUTE);
|
|
|
|
|
ADD_SHADER(VERTEX);
|
|
|
|
|
ADD_SHADER(TESS_CTRL);
|
|
|
|
|
ADD_SHADER(TESS_EVAL);
|
|
|
|
|
ADD_SHADER(GEOMETRY);
|
|
|
|
|
ADD_SHADER(TASK);
|
|
|
|
|
ADD_SHADER(MESH);
|
|
|
|
|
ADD_SHADER(FRAGMENT);
|
|
|
|
|
ADD_SHADER(RAYGEN);
|
|
|
|
|
ADD_SHADER(CLOSEST_HIT);
|
|
|
|
|
ADD_SHADER(INTERSECTION);
|
|
|
|
|
ADD_SHADER(ANY_HIT);
|
|
|
|
|
ADD_SHADER(MISS);
|
|
|
|
|
ADD_SHADER(CALLABLE);
|
|
|
|
|
|
|
|
|
|
#undef ADD_SHADER
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* From now on, don't use infos[] anymore. */
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
|
|
|
|
|
|
shader_data->source_hash = ((uint32_t*)info->nir->info.source_blake3)[0];
|
|
|
|
|
|
2025-11-26 15:30:02 +02:00
|
|
|
for (uint32_t i = 0; i < info->set_layout_count; i++) {
|
|
|
|
|
shader_data->dynamic_descriptors[i] =
|
|
|
|
|
info->set_layouts[i] != NULL ?
|
|
|
|
|
info->set_layouts[i]->dynamic_descriptor_count : 0;
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
shader_data->bind_map.layout_type =
|
|
|
|
|
set_layouts_get_layout_type((struct anv_descriptor_set_layout * const *)info->set_layouts,
|
|
|
|
|
info->set_layout_count);
|
|
|
|
|
shader_data->bind_map.surface_to_descriptor =
|
|
|
|
|
brw_shader_stage_requires_bindless_resources(info->stage) ? NULL :
|
|
|
|
|
rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
|
|
|
|
|
shader_data->bind_map.sampler_to_descriptor =
|
|
|
|
|
brw_shader_stage_requires_bindless_resources(info->stage) ? NULL :
|
|
|
|
|
rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
|
|
|
|
|
shader_data->bind_map.embedded_sampler_to_binding =
|
|
|
|
|
rzalloc_array(mem_ctx, struct anv_pipeline_embedded_sampler_binding,
|
2025-08-14 12:47:51 +03:00
|
|
|
MAX2(sets_layout_embedded_sampler_count(info),
|
|
|
|
|
info->embedded_sampler_count));
|
2024-08-07 23:32:23 +03:00
|
|
|
|
|
|
|
|
shader_data->prog_data.base.stage = info->stage;
|
|
|
|
|
|
|
|
|
|
switch (info->stage) {
|
|
|
|
|
case MESA_SHADER_VERTEX:
|
|
|
|
|
populate_vs_prog_key(&shader_data->key.vs, vk_device->physical,
|
|
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TESS_CTRL:
|
|
|
|
|
populate_tcs_prog_key(&shader_data->key.tcs, vk_device->physical,
|
|
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
|
|
|
|
populate_tes_prog_key(&shader_data->key.tes, vk_device->physical,
|
|
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
|
|
|
populate_gs_prog_key(&shader_data->key.gs, vk_device->physical,
|
|
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TASK:
|
|
|
|
|
populate_task_prog_key(&shader_data->key.task, vk_device->physical,
|
|
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_MESH:
|
|
|
|
|
populate_mesh_prog_key(&shader_data->key.mesh, vk_device->physical,
|
|
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_FRAGMENT:
|
2026-02-06 15:17:52 -08:00
|
|
|
populate_fs_prog_key(&shader_data->key.fs, vk_device->physical,
|
2024-08-07 23:32:23 +03:00
|
|
|
info->robustness, state, stages);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_COMPUTE:
|
|
|
|
|
populate_cs_prog_key(&shader_data->key.cs, vk_device->physical,
|
2025-12-05 23:21:23 +02:00
|
|
|
info->robustness);
|
2024-08-07 23:32:23 +03:00
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_RAYGEN:
|
|
|
|
|
case MESA_SHADER_ANY_HIT:
|
|
|
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
|
|
|
case MESA_SHADER_MISS:
|
|
|
|
|
case MESA_SHADER_INTERSECTION:
|
|
|
|
|
case MESA_SHADER_CALLABLE:
|
|
|
|
|
populate_bs_prog_key(&shader_data->key.bs, vk_device->physical,
|
|
|
|
|
info->robustness, info->rt_flags);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("Invalid stage");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-10 13:46:40 -07:00
|
|
|
if (INTEL_DEBUG(DEBUG_MDA))
|
|
|
|
|
anv_debug_archiver_init(mem_ctx, shaders_data, shader_count);
|
|
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
{
|
|
|
|
|
/* We're going to do cross stage link if we have a fragment shader with
|
|
|
|
|
* any other stage (that would include all the associated
|
|
|
|
|
* pre-rasterization stages of the pipeline).
|
|
|
|
|
*/
|
|
|
|
|
const bool separate_shaders =
|
|
|
|
|
!(shader_count > 1 && ordered_infos[MESA_SHADER_FRAGMENT] != NULL);
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++)
|
|
|
|
|
shaders_data[s].info->nir->info.separate_shader = separate_shaders;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mesa_shader_stage_is_graphics(shaders_data[0].info->stage)) {
|
|
|
|
|
anv_shaders_pre_lower_gfx(device, shaders_data, shader_count,
|
|
|
|
|
state, mem_ctx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
|
|
|
|
|
anv_shader_lower_nir(device, mem_ctx, state, shader_data);
|
|
|
|
|
|
2025-11-28 09:42:47 +02:00
|
|
|
anv_fixup_subgroup_size(device, shader_data->info->nir);
|
2025-08-15 03:06:24 +00:00
|
|
|
|
|
|
|
|
anv_nir_apply_shader_workarounds(shader_data->info->nir);
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Combine intersection & any-hit before lowering */
|
|
|
|
|
if (ordered_infos[MESA_SHADER_INTERSECTION] != NULL) {
|
|
|
|
|
brw_nir_lower_combined_intersection_any_hit(
|
|
|
|
|
ordered_infos[MESA_SHADER_INTERSECTION]->nir,
|
|
|
|
|
ordered_infos[MESA_SHADER_ANY_HIT] != NULL ?
|
|
|
|
|
ordered_infos[MESA_SHADER_ANY_HIT]->nir : NULL,
|
|
|
|
|
device->info);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (mesa_shader_stage_is_graphics(shaders_data[0].info->stage))
|
|
|
|
|
anv_shaders_post_lower_gfx(device, shaders_data, shader_count, state);
|
|
|
|
|
else if (mesa_shader_stage_is_rt(shaders_data[0].info->stage))
|
|
|
|
|
anv_shaders_post_lower_rt(device, shaders_data, shader_count);
|
|
|
|
|
|
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
|
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
|
struct anv_shader_data *prev_shader_data =
|
|
|
|
|
s > 0 ? &shaders_data[s - 1] : NULL;
|
|
|
|
|
|
|
|
|
|
char *error_str = NULL;
|
|
|
|
|
switch (shader_data->info->stage) {
|
|
|
|
|
case MESA_SHADER_VERTEX:
|
|
|
|
|
anv_shader_compile_vs(device, mem_ctx, shader_data, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TESS_CTRL:
|
|
|
|
|
anv_shader_compile_tcs(device, mem_ctx, shader_data, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
|
|
|
|
anv_shader_compile_tes(device, mem_ctx,
|
|
|
|
|
&shaders_data[s], prev_shader_data,
|
|
|
|
|
&error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
|
|
|
anv_shader_compile_gs(device, mem_ctx, shader_data, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_TASK:
|
|
|
|
|
anv_shader_compile_task(device, mem_ctx, shader_data, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_MESH:
|
|
|
|
|
anv_shader_compile_mesh(device, mem_ctx,
|
|
|
|
|
&shaders_data[s], prev_shader_data,
|
|
|
|
|
&error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_FRAGMENT:
|
|
|
|
|
anv_shader_compile_fs(device, mem_ctx, shader_data, state, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_COMPUTE:
|
|
|
|
|
anv_shader_compile_cs(device, mem_ctx, shader_data, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
case MESA_SHADER_RAYGEN:
|
|
|
|
|
case MESA_SHADER_ANY_HIT:
|
|
|
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
|
|
|
case MESA_SHADER_MISS:
|
|
|
|
|
case MESA_SHADER_INTERSECTION:
|
|
|
|
|
case MESA_SHADER_CALLABLE:
|
|
|
|
|
anv_shader_compile_bs(device, mem_ctx, shader_data, &error_str);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("Invalid graphics shader stage");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (shader_data->code == NULL) {
|
|
|
|
|
if (error_str)
|
|
|
|
|
result = vk_errorf(device, VK_ERROR_UNKNOWN, "%s", error_str);
|
|
|
|
|
else
|
|
|
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
2025-09-05 13:48:46 -07:00
|
|
|
goto end;
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_nir_validate_push_layout(device->physical,
|
|
|
|
|
&shader_data->prog_data.base,
|
|
|
|
|
&shader_data->bind_map);
|
|
|
|
|
|
|
|
|
|
shader_data->xfb_info = shader_data->info->nir->xfb_info;
|
|
|
|
|
|
|
|
|
|
result = anv_shader_create(device, shader_data->info->stage,
|
2025-04-07 09:48:57 +03:00
|
|
|
mem_ctx, shader_data, pAllocator,
|
2024-08-07 23:32:23 +03:00
|
|
|
shader_data->shader_out);
|
|
|
|
|
if (result != VK_SUCCESS)
|
2025-09-05 13:48:46 -07:00
|
|
|
goto end;
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
2025-09-05 13:48:46 -07:00
|
|
|
end:
|
2024-05-10 13:46:40 -07:00
|
|
|
if (INTEL_DEBUG(DEBUG_MDA))
|
|
|
|
|
anv_debug_archiver_finish(shaders_data, shader_count);
|
2025-09-05 13:48:46 -07:00
|
|
|
|
2024-08-07 23:32:23 +03:00
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
/* TODO: Write the feedback index into the pipeline */
|
|
|
|
|
for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
|
|
|
|
|
anv_pipeline_account_shader(&pipeline->base, pipeline->shaders[s]);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-09-05 13:48:46 -07:00
|
|
|
if (result != VK_SUCCESS) {
|
|
|
|
|
for (unsigned s = 0; s < shader_count; s++) {
|
|
|
|
|
if (shaders_out[s] != NULL)
|
|
|
|
|
vk_shader_free(vk_device, &vk_device->alloc, shaders_out[s]);
|
|
|
|
|
}
|
2024-08-07 23:32:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct vk_device_shader_ops anv_device_shader_ops = {
|
|
|
|
|
.get_nir_options = anv_shader_get_nir_options,
|
|
|
|
|
.get_spirv_options = anv_shader_get_spirv_options,
|
|
|
|
|
.preprocess_nir = anv_shader_preprocess_nir,
|
|
|
|
|
.get_rt_group_linking = anv_shader_get_rt_group_linking,
|
|
|
|
|
.hash_state = anv_shader_hash_state,
|
|
|
|
|
.compile = anv_shader_compile,
|
|
|
|
|
.deserialize = anv_shader_deserialize,
|
2026-01-23 10:24:33 +02:00
|
|
|
.replay_rt_shader_group = anv_replay_rt_shader_group,
|
2024-08-11 00:05:15 +03:00
|
|
|
.write_rt_shader_group = anv_write_rt_shader_group,
|
|
|
|
|
.write_rt_shader_group_replay_handle = anv_write_rt_shader_group_replay_handle,
|
2024-08-08 14:42:07 +03:00
|
|
|
.cmd_bind_shaders = anv_cmd_buffer_bind_shaders,
|
2024-08-07 23:32:23 +03:00
|
|
|
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
|
2024-08-08 14:42:07 +03:00
|
|
|
.cmd_set_rt_state = anv_cmd_buffer_set_rt_state,
|
|
|
|
|
.cmd_set_stack_size = anv_cmd_buffer_set_stack_size,
|
2024-08-07 23:32:23 +03:00
|
|
|
};
|