mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-27 18:48:14 +02:00
A few compute shaders are doing BC3 image generation on the device and then generate incorrect data if running at SIMD16. That data is then sampled in a vertex shader that generates incorrect geometry. See https://github.com/ValveSoftware/Proton/issues/7595#issuecomment-4343662131 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41501>
2383 lines
86 KiB
C
2383 lines
86 KiB
C
/* Copyright © 2024 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "anv_private.h"
|
|
#include "anv_nir.h"
|
|
#include "anv_shader.h"
|
|
|
|
#include "vk_nir_convert_ycbcr.h"
|
|
#include "vk_pipeline.h"
|
|
|
|
#include "common/intel_compute_slm.h"
|
|
#include "common/intel_l3_config.h"
|
|
|
|
#include "compiler/brw/brw_nir.h"
|
|
#include "compiler/brw/brw_nir_rt.h"
|
|
#include "compiler/intel_nir.h"
|
|
#include "compiler/jay/jay.h"
|
|
|
|
#include "git_sha1.h"
|
|
|
|
typedef void (*game_wa_callback)(nir_shader *nir);
|
|
|
|
/* Structure to hold a game-specific workaround entry */
|
|
struct game_wa_entry {
|
|
game_wa_callback cb;
|
|
uint32_t shader_blake3s[16][BLAKE3_OUT_LEN32];
|
|
};
|
|
|
|
/* Workaround for a shader in Horizon Forbidden West that causes
|
|
* visual corruption. The shader writes the result of fsqrt to
|
|
* storage images with a 16-bit image format and misrendering
|
|
* occurs when those values are denormal for an unknown reason.
|
|
*
|
|
* This clamps the image writes to the smallest fp16 normalized
|
|
* value. (Pattern matching against fsqrt is easy to do in a one
|
|
* line algebraic pass, while matching image stores is harder.)
|
|
*
|
|
* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/12555
|
|
*/
|
|
static void
|
|
wa_forbidden_west(nir_shader *nir)
|
|
{
|
|
NIR_PASS(_, nir, brw_nir_apply_sqrt_workarounds);
|
|
}
|
|
|
|
/* Try to detect shaders relying on 32-wide subgroups. Usually they have a
|
|
* pattern like this:
|
|
*
|
|
* div 32 %1096 = @load_subgroup_invocation
|
|
* div 32 %1245 = iand %1096, %1228 (0x1f)
|
|
* div 32 %1246 = ixor %1245, %13 (0x1)
|
|
* div 32 %1247 = @shuffle (%1244, %1246)
|
|
*/
|
|
static bool is_alu1_iand_0x1f(nir_alu_instr *alu)
|
|
{
|
|
if (!alu || alu->op != nir_op_iand)
|
|
return false;
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
if (nir_src_is_const(alu->src[i].src) &&
|
|
nir_alu_src_as_uint(alu->src[i]) == 0x1f)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool is_simd32_shuffle(nir_intrinsic_instr *intrin)
|
|
{
|
|
nir_alu_instr *alu1 = nir_src_as_alu(intrin->src[1]);
|
|
if (alu1 == NULL)
|
|
return false;
|
|
|
|
if (is_alu1_iand_0x1f(alu1))
|
|
return true;
|
|
|
|
for (uint32_t i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
|
|
if (is_alu1_iand_0x1f(nir_src_as_alu(alu1->src[i].src)))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Try to detect shaders testing with a sequence like this :
|
|
*
|
|
* 32x3 %49 = @load_local_invocation_id
|
|
* 32 %1673 = load_const (0xffffffe0 = -32 = 4294967264)
|
|
* 32 %1674 = iand %49.x, %1673 (0xffffffe0)
|
|
* 32 %1675 = @load_subgroup_size
|
|
* 32 %1676 = umod %1674, %1675
|
|
*
|
|
* This sequence appears to be targetted at subgroup sizes larger than 32. The
|
|
* problem in this sequence is that subgroup size is expected to be >= 32 to
|
|
* match the masking of local_invocation_id above. If inferior, the umod
|
|
* operation returns the same value as if the subgroup was 32.
|
|
*/
|
|
static bool is_alu_used_for_umod_subgroup_size(nir_alu_instr *in_alu)
|
|
{
|
|
nir_foreach_use(src, &in_alu->def) {
|
|
nir_instr *instr = nir_src_use_instr(src);
|
|
if (instr->type != nir_instr_type_alu)
|
|
continue;
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
if (alu->op != nir_op_umod &&
|
|
alu->op != nir_op_imod)
|
|
continue;
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
if (&alu->src[i].src == src)
|
|
continue;
|
|
|
|
if (!nir_src_is_intrinsic(alu->src[i].src) ||
|
|
nir_src_as_intrinsic(alu->src[i].src)->intrinsic != nir_intrinsic_load_subgroup_size)
|
|
continue;
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
is_local_invoc_id_used_with_simd32_assumption(nir_intrinsic_instr *subgroup_inv)
|
|
{
|
|
nir_foreach_use(src, &subgroup_inv->def) {
|
|
nir_instr *instr = nir_src_use_instr(src);
|
|
if (instr->type != nir_instr_type_alu)
|
|
continue;
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
if (alu->op != nir_op_iand)
|
|
continue;
|
|
|
|
/* nir_print_instr(&alu->instr, stderr); */
|
|
/* fprintf(stderr, "\n"); */
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
if (&alu->src[i].src == src)
|
|
continue;
|
|
|
|
if (!nir_src_is_const(alu->src[i].src))
|
|
continue;
|
|
|
|
if (nir_src_as_uint(alu->src[i].src) != 0xffffffe0)
|
|
continue;
|
|
|
|
if (is_alu_used_for_umod_subgroup_size(alu))
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
detect_simd32_requirement(nir_builder *b,
|
|
nir_intrinsic_instr *intrin,
|
|
void *data)
|
|
{
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_shuffle:
|
|
return is_simd32_shuffle(intrin);
|
|
|
|
case nir_intrinsic_load_local_invocation_id:
|
|
return is_local_invoc_id_used_with_simd32_assumption(intrin);
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* List of game-specific workarounds identified by BLAKE3 hash of the shader.
|
|
* Add new workarounds here as needed.
|
|
*/
|
|
static const struct game_wa_entry game_was[] = {
|
|
{
|
|
.cb = wa_forbidden_west,
|
|
.shader_blake3s = {
|
|
{0x51683151, 0xe044f0ce, 0xc210a762, 0xb12b2da4, 0x4e69ddc0, 0x237b1cc1, 0xc84bcf09, 0x31cfe883},
|
|
},
|
|
},
|
|
};
|
|
|
|
/* Apply game-specific workarounds based on the shader's BLAKE3 hash */
|
|
static void
|
|
anv_nir_apply_shader_workarounds(nir_shader *nir)
|
|
{
|
|
for (unsigned i = 0; i < ARRAY_SIZE(game_was); i++) {
|
|
const struct game_wa_entry *wa = &game_was[i];
|
|
for (unsigned j = 0; j < ARRAY_SIZE(wa->shader_blake3s); j++) {
|
|
if (_mesa_printed_blake3_equal(nir->info.source_blake3, wa->shader_blake3s[j])) {
|
|
wa->cb(nir);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static enum brw_robustness_flags
|
|
anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate)
|
|
{
|
|
return
|
|
((rstate->storage_buffers !=
|
|
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
|
|
BRW_ROBUSTNESS_SSBO : 0) |
|
|
((rstate->uniform_buffers !=
|
|
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
|
|
BRW_ROBUSTNESS_UBO : 0);
|
|
}
|
|
|
|
static enum anv_descriptor_set_layout_type
|
|
set_layouts_get_layout_type(struct anv_descriptor_set_layout * const *set_layouts,
|
|
uint32_t set_layout_count)
|
|
{
|
|
for (uint32_t s = 0; s < set_layout_count; s++) {
|
|
if (set_layouts[s]) {
|
|
return set_layouts[s]->type;
|
|
}
|
|
}
|
|
|
|
return ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN;
|
|
}
|
|
|
|
void
|
|
anv_shader_init_uuid(struct anv_physical_device *device)
|
|
{
|
|
/* We should include any parameter here that will change the compiler's
|
|
* output. Mostly it's workarounds, but there is also settings for using
|
|
* indirect descriptors (a different binding model).
|
|
*
|
|
* The fp64 workaround is skipped because although it changes the
|
|
* compiler's output, not having that workaroung enabled with an app
|
|
* expecting fp64 support will just crash in the backend.
|
|
*/
|
|
blake3_hasher ctx;
|
|
_mesa_blake3_init(&ctx);
|
|
|
|
const bool indirect_descriptors = device->indirect_descriptors;
|
|
_mesa_blake3_update(&ctx, &indirect_descriptors, sizeof(indirect_descriptors));
|
|
|
|
const int spilling_rate = device->compiler->spilling_rate;
|
|
_mesa_blake3_update(&ctx, &spilling_rate, sizeof(spilling_rate));
|
|
|
|
const uint8_t afs = device->instance->assume_full_subgroups;
|
|
_mesa_blake3_update(&ctx, &afs, sizeof(afs));
|
|
|
|
const bool afswb = device->instance->assume_full_subgroups_with_barrier;
|
|
_mesa_blake3_update(&ctx, &afswb, sizeof(afswb));
|
|
|
|
const bool afs_shm = device->instance->assume_full_subgroups_with_shared_memory;
|
|
_mesa_blake3_update(&ctx, &afs_shm, sizeof(afs_shm));
|
|
|
|
const bool erwf = device->instance->emulate_read_without_format;
|
|
_mesa_blake3_update(&ctx, &erwf, sizeof(erwf));
|
|
|
|
const bool lttd = device->instance->lower_terminate_to_discard;
|
|
_mesa_blake3_update(&ctx, <td, sizeof(lttd));
|
|
|
|
const bool large_wg_wa =
|
|
device->instance->large_workgroup_non_coherent_image_workaround;
|
|
_mesa_blake3_update(&ctx, &large_wg_wa, sizeof(large_wg_wa));
|
|
|
|
const bool lto_disable = device->instance->disable_lto;
|
|
_mesa_blake3_update(&ctx, <o_disable, sizeof(lto_disable));
|
|
|
|
const bool btp_bti_rcc = device->rt_change_needs_flush;
|
|
_mesa_blake3_update(&ctx, &btp_bti_rcc, sizeof(btp_bti_rcc));
|
|
|
|
const bool cbv_push_buffer = device->instance->promote_cbv_to_push_buffers;
|
|
_mesa_blake3_update(&ctx, &cbv_push_buffer, sizeof(cbv_push_buffer));
|
|
|
|
uint8_t blake3[BLAKE3_KEY_LEN];
|
|
_mesa_blake3_final(&ctx, blake3);
|
|
memcpy(device->shader_binary_uuid, blake3, sizeof(device->shader_binary_uuid));
|
|
}
|
|
|
|
static const struct nir_shader_compiler_options *
|
|
anv_shader_get_nir_options(struct vk_physical_device *device,
|
|
mesa_shader_stage stage,
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
|
|
{
|
|
struct anv_physical_device *pdevice =
|
|
container_of(device, struct anv_physical_device, vk);
|
|
const struct brw_compiler *compiler = pdevice->compiler;
|
|
|
|
return &compiler->nir_options[stage];
|
|
}
|
|
|
|
static struct spirv_to_nir_options
|
|
anv_shader_get_spirv_options(struct vk_physical_device *device,
|
|
mesa_shader_stage stage,
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
{
|
|
struct anv_physical_device *pdevice =
|
|
container_of(device, struct anv_physical_device, vk);
|
|
enum brw_robustness_flags robust_flags = anv_get_robust_flags(rs);
|
|
|
|
return (struct spirv_to_nir_options) {
|
|
.ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
|
|
.ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
|
|
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
|
.push_const_addr_format = nir_address_format_logical,
|
|
|
|
.printf = ANV_DEBUG(SHADER_PRINT),
|
|
|
|
/* TODO: Consider changing this to an address format that has the NULL
|
|
* pointer equals to 0. That might be a better format to play nice
|
|
* with certain code / code generators.
|
|
*/
|
|
.shared_addr_format = nir_address_format_32bit_offset,
|
|
|
|
.min_ubo_alignment = ANV_UBO_ALIGNMENT,
|
|
.min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
|
|
|
|
.workarounds = {
|
|
.lower_terminate_to_discard = pdevice->instance->lower_terminate_to_discard,
|
|
},
|
|
};
|
|
}
|
|
|
|
static void
|
|
anv_shader_preprocess_nir(struct vk_physical_device *device,
|
|
nir_shader *nir,
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
{
|
|
struct anv_physical_device *pdevice =
|
|
container_of(device, struct anv_physical_device, vk);
|
|
const struct brw_compiler *compiler = pdevice->compiler;
|
|
|
|
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
|
|
nir_shader_get_entrypoint(nir), nir_var_shader_out);
|
|
|
|
const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
|
|
.point_coord = true,
|
|
.primitive_id = nir->info.stage == MESA_SHADER_FRAGMENT,
|
|
};
|
|
NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
|
|
|
|
NIR_PASS(_, nir, nir_update_image_intrinsic_from_var);
|
|
|
|
const nir_opt_access_options opt_access_options = {
|
|
.is_vulkan = true,
|
|
};
|
|
NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
|
|
|
|
struct brw_nir_compiler_opts opts = {
|
|
.robust_image_access = rs->images == VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS ||
|
|
rs->images == VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
|
|
};
|
|
brw_preprocess_nir(compiler, nir, &opts);
|
|
|
|
NIR_PASS(_, nir, nir_opt_barrier_modes);
|
|
NIR_PASS(_, nir, nir_opt_acquire_release_barriers, SCOPE_QUEUE_FAMILY);
|
|
|
|
if (ANV_DEBUG(SHADER_PRINT)) {
|
|
const nir_lower_printf_options printf_opts = {
|
|
.ptr_bit_size = 64,
|
|
.hash_format_strings = true,
|
|
};
|
|
NIR_PASS(_, nir, nir_lower_printf, &printf_opts);
|
|
}
|
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
}
|
|
|
|
static void
|
|
populate_base_prog_key(struct brw_base_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
{
|
|
const struct anv_physical_device *pdevice =
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
/* We can avoid including this for hashing because the runtime already
|
|
* hashes that information. We just put it here for at compile time.
|
|
*/
|
|
if (rs != NULL)
|
|
key->robust_flags = anv_get_robust_flags(rs);
|
|
key->divergent_atomics_flags = pdevice->instance->enable_opt_divergent_atomics;
|
|
key->limit_trig_input_range = pdevice->instance->limit_trig_input_range;
|
|
}
|
|
|
|
static void
|
|
populate_base_gfx_prog_key(struct brw_base_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *gfx_state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
const struct anv_physical_device *pdevice =
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
populate_base_prog_key(key, device, rs);
|
|
|
|
key->view_mask = (gfx_state && gfx_state->mv) ? gfx_state->mv->view_mask : 0;
|
|
|
|
key->vue_layout =
|
|
(util_bitcount(link_stages) > 1 && (link_stages & VK_SHADER_STAGE_FRAGMENT_BIT)) ?
|
|
INTEL_VUE_LAYOUT_FIXED :
|
|
pdevice->info.verx10 >= 125 ? INTEL_VUE_LAYOUT_SEPARATE_MESH :
|
|
INTEL_VUE_LAYOUT_SEPARATE;
|
|
}
|
|
|
|
static void
|
|
populate_vs_prog_key(struct brw_vs_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
const struct anv_physical_device *pdevice =
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
key->vf_component_packing = pdevice->instance->vf_component_packing;
|
|
}
|
|
|
|
static void
|
|
populate_tcs_prog_key(struct brw_tcs_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
if (state && state->ts &&
|
|
!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS))
|
|
key->input_vertices = state->ts->patch_control_points;
|
|
|
|
key->separate_tess_vue_layout =
|
|
!(link_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
|
|
}
|
|
|
|
static void
|
|
populate_tes_prog_key(struct brw_tes_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
key->separate_tess_vue_layout =
|
|
!(link_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
|
|
}
|
|
|
|
static void
|
|
populate_gs_prog_key(struct brw_gs_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
}
|
|
|
|
static void
|
|
populate_task_prog_key(struct brw_task_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
}
|
|
|
|
static void
|
|
populate_mesh_prog_key(struct brw_mesh_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
}
|
|
|
|
static bool
|
|
pipeline_has_coarse_pixel(const struct vk_graphics_pipeline_state *state)
|
|
{
|
|
if (state == NULL)
|
|
return true;
|
|
|
|
/* The Vulkan 1.2.199 spec says:
|
|
*
|
|
* "If any of the following conditions are met, Cxy' must be set to
|
|
* {1,1}:
|
|
*
|
|
* * If Sample Shading is enabled.
|
|
* * [...]"
|
|
*
|
|
* And "sample shading" is defined as follows:
|
|
*
|
|
* "Sample shading is enabled for a graphics pipeline:
|
|
*
|
|
* * If the interface of the fragment shader entry point of the
|
|
* graphics pipeline includes an input variable decorated with
|
|
* SampleId or SamplePosition. In this case minSampleShadingFactor
|
|
* takes the value 1.0.
|
|
*
|
|
* * Else if the sampleShadingEnable member of the
|
|
* VkPipelineMultisampleStateCreateInfo structure specified when
|
|
* creating the graphics pipeline is set to VK_TRUE. In this case
|
|
* minSampleShadingFactor takes the value of
|
|
* VkPipelineMultisampleStateCreateInfo::minSampleShading.
|
|
*
|
|
* Otherwise, sample shading is considered disabled."
|
|
*
|
|
* The first bullet above is handled by the back-end compiler because those
|
|
* inputs both force per-sample dispatch. The second bullet is handled
|
|
* here. Note that this sample shading being enabled has nothing to do
|
|
* with minSampleShading.
|
|
*/
|
|
if (state->ms != NULL && state->ms->sample_shading_enable)
|
|
return false;
|
|
|
|
/* Not dynamic & pipeline has a 1x1 fragment shading rate with no
|
|
* possibility for element of the pipeline to change the value or fragment
|
|
* shading rate not specified at all.
|
|
*/
|
|
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_FSR) &&
|
|
(state->fsr == NULL ||
|
|
(state->fsr->fragment_size.width <= 1 &&
|
|
state->fsr->fragment_size.height <= 1 &&
|
|
state->fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
|
|
state->fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static uint32_t
|
|
rp_color_mask(const struct vk_graphics_pipeline_state *state)
|
|
{
|
|
if (state == NULL || state->rp == NULL ||
|
|
!vk_render_pass_state_has_attachment_info(state->rp))
|
|
return ((1u << MAX_RTS) - 1);
|
|
|
|
assert(state->rp->color_attachment_count <= MAX_RTS);
|
|
|
|
uint32_t color_mask = 0;
|
|
for (uint32_t i = 0; i < state->rp->color_attachment_count; i++) {
|
|
if (state->rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
|
|
if (state->cal) {
|
|
if (state->cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
|
|
color_mask |= BITFIELD_BIT(state->cal->color_map[i]);
|
|
} else {
|
|
color_mask |= BITFIELD_BIT(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
return color_mask;
|
|
}
|
|
|
|
static void
|
|
populate_fs_prog_key(struct brw_fs_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
VkShaderStageFlags link_stages)
|
|
{
|
|
const struct anv_physical_device *pdevice =
|
|
container_of(device, const struct anv_physical_device, vk);
|
|
|
|
populate_base_gfx_prog_key(&key->base, device, rs, state, link_stages);
|
|
|
|
/* Consider all inputs as valid until look at the NIR variables. */
|
|
key->nr_color_regions = util_last_bit(rp_color_mask(state));
|
|
|
|
/* To reduce possible shader recompilations we would need to know if
|
|
* there is a SampleMask output variable to compute if we should emit
|
|
* code to workaround the issue that hardware disables alpha to coverage
|
|
* when there is SampleMask output.
|
|
*
|
|
* If the pipeline we compile the fragment shader in includes the output
|
|
* interface, then we can be sure whether alpha_coverage is enabled or not.
|
|
* If we don't have that output interface, then we have to compile the
|
|
* shader with some conditionals.
|
|
*/
|
|
if (state != NULL && state->ms != NULL) {
|
|
/* VUID-VkGraphicsPipelineCreateInfo-rasterizerDiscardEnable-00751:
|
|
*
|
|
* "If the pipeline is being created with fragment shader state,
|
|
* pMultisampleState must be a valid pointer to a valid
|
|
* VkPipelineMultisampleStateCreateInfo structure"
|
|
*
|
|
* It's also required for the fragment output interface.
|
|
*/
|
|
key->multisample_fbo =
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
|
|
INTEL_SOMETIMES :
|
|
state->ms->rasterization_samples > 1 ? INTEL_ALWAYS : INTEL_NEVER;
|
|
key->persample_interp =
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
|
|
INTEL_SOMETIMES :
|
|
(state->ms->sample_shading_enable &&
|
|
(state->ms->min_sample_shading * state->ms->rasterization_samples) > 1) ?
|
|
INTEL_ALWAYS : INTEL_NEVER;
|
|
key->alpha_to_coverage =
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ?
|
|
INTEL_SOMETIMES :
|
|
(state->ms->alpha_to_coverage_enable ? INTEL_ALWAYS : INTEL_NEVER);
|
|
|
|
/* TODO: We should make this dynamic */
|
|
if (pdevice->instance->sample_mask_out_opengl_behaviour)
|
|
key->ignore_sample_mask_out = !key->multisample_fbo;
|
|
} else {
|
|
/* Consider all inputs as valid until we look at the NIR variables. */
|
|
key->nr_color_regions = MAX_RTS;
|
|
|
|
key->alpha_to_coverage = INTEL_SOMETIMES;
|
|
key->multisample_fbo = INTEL_SOMETIMES;
|
|
key->persample_interp = INTEL_SOMETIMES;
|
|
}
|
|
|
|
if (pdevice->info.verx10 >= 200) {
|
|
if (state != NULL && state->rs != NULL) {
|
|
key->provoking_vertex_last =
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) ?
|
|
INTEL_SOMETIMES :
|
|
state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT ?
|
|
INTEL_ALWAYS : INTEL_NEVER;
|
|
} else {
|
|
key->provoking_vertex_last = INTEL_SOMETIMES;
|
|
}
|
|
} else {
|
|
/* Pre-Xe2 we don't care about this at all, make sure it's always set to
|
|
* NEVER to avoid it influencing the push constant.
|
|
*/
|
|
key->provoking_vertex_last = INTEL_NEVER;
|
|
}
|
|
|
|
if (state != NULL && state->rs != NULL) {
|
|
key->conservative_raster =
|
|
BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE) ?
|
|
INTEL_SOMETIMES :
|
|
state->rs->conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT ?
|
|
INTEL_NEVER : INTEL_ALWAYS;
|
|
} else {
|
|
key->conservative_raster = INTEL_SOMETIMES;
|
|
}
|
|
|
|
key->mesh_input =
|
|
(link_stages & VK_SHADER_STAGE_VERTEX_BIT) ? INTEL_NEVER :
|
|
(link_stages & VK_SHADER_STAGE_MESH_BIT_EXT) ? INTEL_ALWAYS :
|
|
pdevice->info.has_mesh_shading ? INTEL_SOMETIMES : INTEL_NEVER;
|
|
|
|
if (state && state->ms) {
|
|
key->min_sample_shading = state->ms->min_sample_shading;
|
|
key->api_sample_shading = state->ms->sample_shading_enable;
|
|
}
|
|
|
|
key->coarse_pixel = pipeline_has_coarse_pixel(state);
|
|
}
|
|
|
|
static void
|
|
populate_cs_prog_key(struct brw_cs_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs)
|
|
{
|
|
populate_base_prog_key(&key->base, device, rs);
|
|
}
|
|
|
|
static void
|
|
populate_bs_prog_key(struct brw_bs_prog_key *key,
|
|
const struct vk_physical_device *device,
|
|
const struct vk_pipeline_robustness_state *rs,
|
|
VkPipelineCreateFlags2KHR flags)
|
|
{
|
|
populate_base_prog_key(&key->base, device, rs);
|
|
|
|
uint32_t ray_flags = 0;
|
|
const bool rt_skip_triangles =
|
|
flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR;
|
|
const bool rt_skip_aabbs =
|
|
flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR;
|
|
assert(!(rt_skip_triangles && rt_skip_aabbs));
|
|
if (rt_skip_triangles)
|
|
ray_flags |= BRW_RT_RAY_FLAG_SKIP_TRIANGLES;
|
|
else if (rt_skip_aabbs)
|
|
ray_flags |= BRW_RT_RAY_FLAG_SKIP_AABBS;
|
|
|
|
key->pipeline_ray_flags = ray_flags;
|
|
}
|
|
|
|
static void
|
|
anv_shader_hash_state(struct vk_physical_device *device,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
const struct vk_features *enabled_features,
|
|
VkShaderStageFlags stages,
|
|
blake3_hash blake3_out)
|
|
{
|
|
struct mesa_blake3 blake3_ctx;
|
|
_mesa_blake3_init(&blake3_ctx);
|
|
|
|
anv_foreach_vk_stage(stage, stages) {
|
|
union brw_any_prog_key key;
|
|
memset(&key, 0, sizeof(key));
|
|
|
|
switch (stage) {
|
|
case VK_SHADER_STAGE_VERTEX_BIT:
|
|
populate_vs_prog_key(&key.vs, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.vs, sizeof(key.vs));
|
|
break;
|
|
case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
|
|
populate_tcs_prog_key(&key.tcs, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.tcs, sizeof(key.tcs));
|
|
break;
|
|
case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
|
|
populate_tes_prog_key(&key.tes, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.tes, sizeof(key.tes));
|
|
break;
|
|
case VK_SHADER_STAGE_GEOMETRY_BIT:
|
|
populate_gs_prog_key(&key.gs, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.gs, sizeof(key.gs));
|
|
break;
|
|
case VK_SHADER_STAGE_TASK_BIT_EXT:
|
|
populate_task_prog_key(&key.task, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.task, sizeof(key.task));
|
|
break;
|
|
case VK_SHADER_STAGE_MESH_BIT_EXT:
|
|
populate_mesh_prog_key(&key.mesh, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.mesh, sizeof(key.mesh));
|
|
break;
|
|
case VK_SHADER_STAGE_FRAGMENT_BIT:
|
|
populate_fs_prog_key(&key.fs, device, NULL, state, stages);
|
|
_mesa_blake3_update(&blake3_ctx, &key.fs, sizeof(key.fs));
|
|
break;
|
|
case VK_SHADER_STAGE_COMPUTE_BIT:
|
|
populate_cs_prog_key(&key.cs, device, NULL);
|
|
_mesa_blake3_update(&blake3_ctx, &key.cs, sizeof(key.cs));
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid stage");
|
|
}
|
|
}
|
|
|
|
_mesa_blake3_final(&blake3_ctx, blake3_out);
|
|
}
|
|
|
|
static void
|
|
merge_tess_info(struct shader_info *tes_info,
|
|
const struct shader_info *tcs_info)
|
|
{
|
|
/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
|
|
*
|
|
* "PointMode. Controls generation of points rather than triangles
|
|
* or lines. This functionality defaults to disabled, and is
|
|
* enabled if either shader stage includes the execution mode.
|
|
*
|
|
* and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
|
|
* PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
|
|
* and OutputVertices, it says:
|
|
*
|
|
* "One mode must be set in at least one of the tessellation
|
|
* shader stages."
|
|
*
|
|
* So, the fields can be set in either the TCS or TES, but they must
|
|
* agree if set in both. Our backend looks at TES, so bitwise-or in
|
|
* the values from the TCS.
|
|
*/
|
|
assert(tcs_info->tess.tcs_vertices_out == 0 ||
|
|
tes_info->tess.tcs_vertices_out == 0 ||
|
|
tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
|
|
tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
|
|
|
|
assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
|
|
tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
|
|
tcs_info->tess.spacing == tes_info->tess.spacing);
|
|
tes_info->tess.spacing |= tcs_info->tess.spacing;
|
|
|
|
assert(tcs_info->tess._primitive_mode == 0 ||
|
|
tes_info->tess._primitive_mode == 0 ||
|
|
tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
|
|
tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
|
|
tes_info->tess.ccw |= tcs_info->tess.ccw;
|
|
tes_info->tess.point_mode |= tcs_info->tess.point_mode;
|
|
}
|
|
|
|
static void
|
|
anv_shader_link_tcs(const struct brw_compiler *compiler,
|
|
struct brw_tcs_prog_key *key,
|
|
struct vk_shader_compile_info *tcs_stage,
|
|
struct vk_shader_compile_info *tes_stage)
|
|
{
|
|
assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
|
|
|
|
brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
|
|
|
|
nir_lower_patch_vertices(tes_stage->nir,
|
|
tcs_stage->nir->info.tess.tcs_vertices_out,
|
|
NULL);
|
|
|
|
/* Copy TCS info into the TES info */
|
|
merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
|
|
|
|
/* Whacking the key after cache lookup is a bit sketchy, but all of
|
|
* this comes from the SPIR-V, which is part of the hash used for the
|
|
* pipeline cache. So it should be safe.
|
|
*/
|
|
key->_tes_primitive_mode = tes_stage->nir->info.tess._primitive_mode;
|
|
}
|
|
|
|
static void
|
|
anv_shader_link(const struct brw_compiler *compiler,
|
|
struct vk_shader_compile_info *prev_stage,
|
|
struct vk_shader_compile_info *next_stage)
|
|
{
|
|
brw_nir_link_shaders(compiler, prev_stage->nir, next_stage->nir);
|
|
}
|
|
|
|
static const struct vk_ycbcr_conversion_state *
|
|
lookup_ycbcr_conversion(const void *_stage, uint32_t set,
|
|
uint32_t binding, uint32_t array_index)
|
|
{
|
|
const struct vk_shader_compile_info *stage = _stage;
|
|
|
|
if (set == VK_NIR_YCBCR_SET_IMMUTABLE_SAMPLERS) {
|
|
assert(binding < stage->embedded_sampler_count);
|
|
return &stage->embedded_samplers[binding].ycbcr_conversion;
|
|
} else {
|
|
assert(set < MAX_SETS);
|
|
const struct anv_descriptor_set_layout *set_layout =
|
|
container_of(stage->set_layouts[set],
|
|
struct anv_descriptor_set_layout, vk);
|
|
|
|
assert(binding < set_layout->binding_count);
|
|
const struct anv_descriptor_set_binding_layout *bind_layout =
|
|
&set_layout->binding[binding];
|
|
|
|
if (bind_layout->samplers == NULL)
|
|
return NULL;
|
|
|
|
array_index = MIN2(array_index, bind_layout->array_size - 1);
|
|
|
|
const struct anv_descriptor_set_layout_sampler *sampler =
|
|
&bind_layout->samplers[array_index];
|
|
|
|
return sampler->has_ycbcr_conversion ?
|
|
&sampler->ycbcr_conversion_state : NULL;
|
|
}
|
|
}
|
|
|
|
static void
|
|
anv_fixup_subgroup_size(struct anv_device *device, nir_shader *shader)
|
|
{
|
|
struct shader_info *info = &shader->info;
|
|
const struct anv_instance *instance = device->physical->instance;
|
|
|
|
if (!mesa_shader_stage_uses_workgroup(info->stage))
|
|
return;
|
|
|
|
unsigned local_size = info->workgroup_size[0] *
|
|
info->workgroup_size[1] *
|
|
info->workgroup_size[2];
|
|
|
|
/* Games don't always request full subgroups when they should,
|
|
* which can cause bugs, as they may expect bigger size of the
|
|
* subgroup than we choose for the execution.
|
|
*/
|
|
if (instance->assume_full_subgroups &&
|
|
info->uses_wide_subgroup_intrinsics &&
|
|
info->api_subgroup_size == BRW_SUBGROUP_SIZE &&
|
|
local_size &&
|
|
local_size % BRW_SUBGROUP_SIZE == 0) {
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
}
|
|
|
|
if (instance->assume_full_subgroups_with_barrier &&
|
|
info->stage == MESA_SHADER_COMPUTE &&
|
|
device->info->verx10 <= 125 &&
|
|
info->uses_control_barrier &&
|
|
info->min_subgroup_size != info->max_subgroup_size &&
|
|
local_size &&
|
|
local_size % BRW_SUBGROUP_SIZE == 0) {
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
}
|
|
|
|
/* Similarly, sometimes games rely on the implicit synchronization of
|
|
* the shared memory accesses, and choosing smaller subgroups than the game
|
|
* expects will cause bugs. */
|
|
if (instance->assume_full_subgroups_with_shared_memory &&
|
|
info->shared_size > 0 &&
|
|
info->min_subgroup_size != info->max_subgroup_size &&
|
|
local_size &&
|
|
local_size % BRW_SUBGROUP_SIZE == 0) {
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
}
|
|
|
|
/* Cooperative matrix extension requires that all invocations in a subgroup
|
|
* be active. As a result, when the application does not request a specific
|
|
* subgroup size, we must use SIMD32.
|
|
*/
|
|
if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix &&
|
|
info->max_subgroup_size > info->min_subgroup_size) {
|
|
info->api_subgroup_size = info->max_subgroup_size;
|
|
info->min_subgroup_size = info->max_subgroup_size;
|
|
}
|
|
|
|
/* Only promote to SIMD32 if the max allows it. */
|
|
if (info->max_subgroup_size >= BRW_SUBGROUP_SIZE &&
|
|
info->min_subgroup_size != info->max_subgroup_size &&
|
|
info->uses_wide_subgroup_intrinsics &&
|
|
nir_shader_intrinsics_pass(shader,
|
|
detect_simd32_requirement,
|
|
nir_metadata_all,
|
|
NULL)) {
|
|
info->max_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
info->min_subgroup_size = BRW_SUBGROUP_SIZE;
|
|
}
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_vs(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_vs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.vs,
|
|
.prog_data = &shader_data->prog_data.vs,
|
|
};
|
|
|
|
if (intel_use_jay(devinfo, nir->info.stage)) {
|
|
struct jay_shader_bin *bin =
|
|
jay_compile(devinfo, mem_ctx, nir,
|
|
(union brw_any_prog_data *) params.prog_data,
|
|
(union brw_any_prog_key *) params.key);
|
|
|
|
shader_data->code = (void *) bin->kernel;
|
|
} else {
|
|
shader_data->code = (void *) brw_compile_vs(compiler, ¶ms);
|
|
}
|
|
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_tcs(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
shader_data->key.tcs.outputs_written = nir->info.outputs_written;
|
|
shader_data->key.tcs.patch_outputs_written = nir->info.patch_outputs_written;
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_tcs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.tcs,
|
|
.prog_data = &shader_data->prog_data.tcs,
|
|
};
|
|
|
|
shader_data->code = (void *)brw_compile_tcs(compiler, ¶ms);
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_tes(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *tes_shader_data,
|
|
struct anv_shader_data *tcs_shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
nir_shader *nir = tes_shader_data->info->nir;
|
|
|
|
if (tcs_shader_data) {
|
|
tes_shader_data->key.tes.inputs_read =
|
|
tcs_shader_data->info->nir->info.outputs_written;
|
|
tes_shader_data->key.tes.patch_inputs_read =
|
|
tcs_shader_data->info->nir->info.patch_outputs_written;
|
|
}
|
|
|
|
tes_shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_tes_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = tes_shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = tes_shader_data->source_hash,
|
|
.archiver = tes_shader_data->archiver,
|
|
},
|
|
.key = &tes_shader_data->key.tes,
|
|
.prog_data = &tes_shader_data->prog_data.tes,
|
|
.input_vue_map = tcs_shader_data ?
|
|
&tcs_shader_data->prog_data.tcs.base.vue_map : NULL,
|
|
};
|
|
|
|
tes_shader_data->code = (void *)brw_compile_tes(compiler, ¶ms);
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_gs(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_gs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.gs,
|
|
.prog_data = &shader_data->prog_data.gs,
|
|
};
|
|
|
|
shader_data->code = (void *)brw_compile_gs(compiler, ¶ms);
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_task(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_task_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.task,
|
|
.prog_data = &shader_data->prog_data.task,
|
|
};
|
|
|
|
shader_data->code = (void *)brw_compile_task(compiler, ¶ms);
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static nir_def *
|
|
wa_18019110168_load_provoking_vertex(nir_builder *b, void *data)
|
|
{
|
|
const struct anv_pipeline_bind_map *bind_map = data;
|
|
nir_def *val = NULL;
|
|
|
|
for (uint32_t i = 0; i < bind_map->inline_dwords_count; i++) {
|
|
if (bind_map->inline_dwords[i] == anv_drv_const_dword(gfx.wa_18019110168)) {
|
|
val = nir_load_inline_data_intel(
|
|
b, 1, 32, nir_imm_int(b, 0),
|
|
.base = i * 4);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (val == NULL) {
|
|
val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0),
|
|
.base = anv_drv_const_offset(gfx.wa_18019110168) -
|
|
bind_map->push_ranges[0].start * 32,
|
|
.range = anv_drv_const_size(gfx.wa_18019110168));
|
|
}
|
|
|
|
return nir_iand_imm(b, val, ANV_WA_18019110168_PROVOKING_VERTEX_MASK);
|
|
}
|
|
|
|
static nir_def *
|
|
wa_18019110168_load_per_primitive_remap_table(nir_builder *b, void *data)
|
|
{
|
|
const struct anv_pipeline_bind_map *bind_map = data;
|
|
nir_def *val = NULL;
|
|
|
|
val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0),
|
|
.base = anv_drv_const_offset(gfx.wa_18019110168) -
|
|
bind_map->push_ranges[0].start * 32,
|
|
.range = anv_drv_const_size(gfx.wa_18019110168));
|
|
|
|
return nir_iand_imm(b, val, ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK);
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_mesh(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *mesh_shader_data,
|
|
struct anv_shader_data *task_shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
nir_shader *nir = mesh_shader_data->info->nir;
|
|
|
|
mesh_shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_mesh_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = mesh_shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = mesh_shader_data->source_hash,
|
|
.archiver = mesh_shader_data->archiver,
|
|
},
|
|
.key = &mesh_shader_data->key.mesh,
|
|
.prog_data = &mesh_shader_data->prog_data.mesh,
|
|
.tue_map = task_shader_data ?
|
|
&task_shader_data->prog_data.task.map :
|
|
NULL,
|
|
.wa_18019110168_load_provoking_vertex =
|
|
wa_18019110168_load_provoking_vertex,
|
|
.wa_18019110168_data = (void *)&mesh_shader_data->bind_map,
|
|
};
|
|
|
|
mesh_shader_data->code = (void *)brw_compile_mesh(compiler, ¶ms);
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_fs(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
/* When using Primitive Replication for multiview, each view gets its own
|
|
* position slot.
|
|
*/
|
|
uint32_t pos_slots = shader_data->use_primitive_replication ?
|
|
MAX2(1, util_bitcount(shader_data->key.base.view_mask)) : 1;
|
|
|
|
struct intel_vue_map prev_vue_map;
|
|
brw_compute_vue_map(compiler->devinfo,
|
|
&prev_vue_map,
|
|
nir->info.inputs_read,
|
|
nir->info.separate_shader,
|
|
pos_slots);
|
|
|
|
struct brw_compile_fs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.fs,
|
|
.prog_data = &shader_data->prog_data.fs,
|
|
.mue_map = shader_data->mue_map,
|
|
|
|
.allow_spilling = true,
|
|
.max_polygons = UCHAR_MAX,
|
|
|
|
.wa_18019110168_load_per_primitive_remap_table_offset =
|
|
wa_18019110168_load_per_primitive_remap_table,
|
|
.wa_18019110168_data = (void *)&shader_data->bind_map,
|
|
};
|
|
|
|
if (intel_use_jay(devinfo, nir->info.stage)) {
|
|
struct jay_shader_bin *bin =
|
|
jay_compile(devinfo, mem_ctx, nir,
|
|
(union brw_any_prog_data *) params.prog_data,
|
|
(union brw_any_prog_key *) params.key);
|
|
|
|
shader_data->code = (void *) bin->kernel;
|
|
} else {
|
|
shader_data->code = (void *) brw_compile_fs(compiler, ¶ms);
|
|
}
|
|
|
|
*error_str = params.base.error_str;
|
|
|
|
shader_data->num_stats = (uint32_t)!!shader_data->prog_data.fs.dispatch_multi +
|
|
(uint32_t)shader_data->prog_data.fs.dispatch_8 +
|
|
(uint32_t)shader_data->prog_data.fs.dispatch_16 +
|
|
(uint32_t)shader_data->prog_data.fs.dispatch_32;
|
|
assert(shader_data->num_stats <= ARRAY_SIZE(shader_data->stats));
|
|
|
|
/* Update the push constant padding range now that we know the amount of
|
|
* per-primitive data delivered in the payload.
|
|
*/
|
|
for (unsigned i = 0; i < ARRAY_SIZE(shader_data->bind_map.push_ranges); i++) {
|
|
if (shader_data->bind_map.push_ranges[i].set == ANV_DESCRIPTOR_SET_PER_PRIM_PADDING) {
|
|
shader_data->bind_map.push_ranges[i].length = MAX2(
|
|
shader_data->prog_data.fs.num_per_primitive_inputs / 2,
|
|
shader_data->bind_map.push_ranges[i].length);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_cs(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_cs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.cs,
|
|
.prog_data = &shader_data->prog_data.cs,
|
|
};
|
|
|
|
if (intel_use_jay(devinfo, nir->info.stage)) {
|
|
struct jay_shader_bin *bin = jay_compile(devinfo, mem_ctx, nir,
|
|
(union brw_any_prog_data*)params.prog_data,
|
|
(union brw_any_prog_key*)params.key);
|
|
|
|
shader_data->code = (void*)bin->kernel;
|
|
shader_data->stats[0] = bin->stats;
|
|
|
|
params.prog_data->local_size[0] = nir->info.workgroup_size[0];
|
|
params.prog_data->local_size[1] = nir->info.workgroup_size[1];
|
|
params.prog_data->local_size[2] = nir->info.workgroup_size[2];
|
|
} else {
|
|
shader_data->code = (void*)brw_compile_cs(compiler, ¶ms);
|
|
}
|
|
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static bool
|
|
should_remat_cb(nir_instr *instr, void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
|
|
}
|
|
|
|
static void
|
|
anv_shader_compile_bs(struct anv_device *device,
|
|
void *mem_ctx,
|
|
struct anv_shader_data *shader_data,
|
|
char **error_str)
|
|
{
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
nir_shader *nir = shader_data->info->nir;
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
|
|
struct brw_nir_lower_shader_calls_state lowering_state = {
|
|
.devinfo = devinfo,
|
|
.key = &shader_data->key.bs,
|
|
};
|
|
|
|
nir_shader **resume_shaders = NULL;
|
|
uint32_t num_resume_shaders = 0;
|
|
if (nir->info.stage != MESA_SHADER_COMPUTE) {
|
|
struct brw_nir_vectorize_mem_cb_data vectorize_cb_data = {
|
|
.devinfo = devinfo,
|
|
};
|
|
const nir_lower_shader_calls_options opts = {
|
|
.address_format = nir_address_format_64bit_global,
|
|
.stack_alignment = BRW_BTD_STACK_ALIGN,
|
|
.localized_loads = true,
|
|
.vectorizer_callback = brw_nir_should_vectorize_mem,
|
|
.vectorizer_data = &vectorize_cb_data,
|
|
.should_remat_callback = should_remat_cb,
|
|
};
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_rt_intrinsics_pre_trace);
|
|
NIR_PASS(_, nir, nir_lower_shader_calls, &opts,
|
|
&resume_shaders, &num_resume_shaders, mem_ctx);
|
|
NIR_PASS(_, nir, brw_nir_lower_shader_calls, &lowering_state);
|
|
NIR_PASS(_, nir, brw_nir_lower_rt_intrinsics,
|
|
&shader_data->key.base, devinfo);
|
|
}
|
|
|
|
for (unsigned i = 0; i < num_resume_shaders; i++) {
|
|
NIR_PASS(_, resume_shaders[i], brw_nir_lower_shader_calls,
|
|
&lowering_state);
|
|
NIR_PASS(_, resume_shaders[i], brw_nir_lower_rt_intrinsics,
|
|
&shader_data->key.base, devinfo);
|
|
}
|
|
|
|
shader_data->num_stats = 1;
|
|
|
|
struct brw_compile_bs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = shader_data->stats,
|
|
.log_data = device,
|
|
.mem_ctx = mem_ctx,
|
|
.source_hash = shader_data->source_hash,
|
|
.archiver = shader_data->archiver,
|
|
},
|
|
.key = &shader_data->key.bs,
|
|
.prog_data = &shader_data->prog_data.bs,
|
|
.num_resume_shaders = num_resume_shaders,
|
|
.resume_shaders = resume_shaders,
|
|
};
|
|
|
|
shader_data->code = (void *)brw_compile_bs(compiler, ¶ms);
|
|
*error_str = params.base.error_str;
|
|
}
|
|
|
|
static void
|
|
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
|
{
|
|
assert(glsl_type_is_vector_or_scalar(type));
|
|
|
|
uint32_t comp_size = glsl_type_is_boolean(type)
|
|
? 4 : glsl_get_bit_size(type) / 8;
|
|
unsigned length = glsl_get_vector_elements(type);
|
|
*size = comp_size * length,
|
|
*align = comp_size * (length == 3 ? 4 : length);
|
|
}
|
|
|
|
static void
|
|
anv_shader_compute_fragment_rts(const struct intel_device_info *devinfo,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
struct anv_shader_data *shader_data)
|
|
{
|
|
assert(shader_data->bind_map.surface_count == 0);
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
const uint64_t rt_mask =
|
|
(nir->info.outputs_written &
|
|
~BITFIELD_BIT(FRAG_RESULT_DUAL_SRC_BLEND)) >> FRAG_RESULT_DATA0;
|
|
const unsigned num_rts = util_last_bit64(rt_mask);
|
|
struct anv_pipeline_binding rt_bindings[MAX_RTS];
|
|
|
|
shader_data->key.fs.nr_color_regions =
|
|
util_last_bit(rt_mask & rp_color_mask(state));
|
|
|
|
if (num_rts > 0) {
|
|
for (unsigned rt = 0; rt < num_rts; rt++) {
|
|
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0 + rt)) {
|
|
rt_bindings[rt] = (struct anv_pipeline_binding) {
|
|
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
|
|
.index = rt,
|
|
.binding = UINT32_MAX,
|
|
};
|
|
} else {
|
|
/* Setup a null render target */
|
|
rt_bindings[rt] = (struct anv_pipeline_binding) {
|
|
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
|
|
.index = ANV_COLOR_OUTPUT_UNUSED,
|
|
.binding = UINT32_MAX,
|
|
};
|
|
}
|
|
}
|
|
shader_data->bind_map.surface_count = num_rts;
|
|
} else if (brw_nir_fs_needs_null_rt(devinfo, nir,
|
|
shader_data->key.fs.alpha_to_coverage != INTEL_NEVER)) {
|
|
/* Setup a null render target */
|
|
rt_bindings[0] = (struct anv_pipeline_binding) {
|
|
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
|
|
.index = ANV_COLOR_OUTPUT_DISABLED,
|
|
.binding = UINT32_MAX,
|
|
};
|
|
shader_data->bind_map.surface_count = 1;
|
|
}
|
|
|
|
typed_memcpy(shader_data->bind_map.surface_to_descriptor,
|
|
rt_bindings, shader_data->bind_map.surface_count);
|
|
}
|
|
|
|
static bool
|
|
accept_64bit_atomic_cb(const nir_intrinsic_instr *intrin, const void *data)
|
|
{
|
|
return (intrin->intrinsic == nir_intrinsic_image_atomic ||
|
|
intrin->intrinsic == nir_intrinsic_image_atomic_swap ||
|
|
intrin->intrinsic == nir_intrinsic_image_heap_atomic ||
|
|
intrin->intrinsic == nir_intrinsic_image_heap_atomic_swap ||
|
|
intrin->intrinsic == nir_intrinsic_image_deref_atomic ||
|
|
intrin->intrinsic == nir_intrinsic_image_deref_atomic_swap) &&
|
|
intrin->def.bit_size == 64;
|
|
}
|
|
|
|
static bool
|
|
lower_non_tg4_non_uniform_offsets(const nir_tex_instr *tex,
|
|
unsigned index, void *data)
|
|
{
|
|
/* HW cannot deal with divergent surfaces/samplers */
|
|
if (tex->src[index].src_type == nir_tex_src_texture_offset ||
|
|
tex->src[index].src_type == nir_tex_src_texture_handle ||
|
|
tex->src[index].src_type == nir_tex_src_texture_heap_offset ||
|
|
tex->src[index].src_type == nir_tex_src_sampler_offset ||
|
|
tex->src[index].src_type == nir_tex_src_sampler_handle ||
|
|
tex->src[index].src_type == nir_tex_src_sampler_heap_offset)
|
|
return true;
|
|
|
|
if (tex->src[index].src_type == nir_tex_src_offset) {
|
|
/* HW can deal with TG4 divergent offsets only */
|
|
return tex->op != nir_texop_tg4;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
fixup_large_workgroup_image_coherency(nir_shader *nir)
|
|
{
|
|
nir_foreach_function_impl(impl, nir) {
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
if (intr->intrinsic != nir_intrinsic_image_deref_store ||
|
|
nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_3D)
|
|
continue;
|
|
|
|
/* We have found image store access to 3D. */
|
|
nir_deref_instr *array_deref = nir_src_as_deref(intr->src[0]);
|
|
if (array_deref->deref_type != nir_deref_type_array)
|
|
continue;
|
|
|
|
nir_alu_instr *alu = nir_src_as_alu(intr->src[1]);
|
|
if (!alu || !nir_op_is_vec(alu->op))
|
|
return;
|
|
|
|
/* Check if any src is from @load_local_invocation_id. */
|
|
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
|
nir_instr *parent = nir_def_instr(alu->src[i].src.ssa);
|
|
if (parent->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *parent_intr = nir_instr_as_intrinsic(parent);
|
|
if (parent_intr->intrinsic !=
|
|
nir_intrinsic_load_local_invocation_id)
|
|
continue;
|
|
|
|
/* Found a match, change image access qualifier coherent. */
|
|
nir_deref_instr *parent_deref =
|
|
nir_src_as_deref(array_deref->parent);
|
|
parent_deref->var->data.access = ACCESS_COHERENT;
|
|
return;
|
|
}
|
|
} /* instr */
|
|
} /* block */
|
|
} /* func */
|
|
}
|
|
|
|
static void
|
|
cleanup_nir(nir_shader *nir)
|
|
{
|
|
/* First run copy-prop to get rid of all of the vec() that address
|
|
* calculations often create and then constant-fold so that, when we get to
|
|
* anv_nir_lower_ubo_loads, we can detect constant offsets.
|
|
*/
|
|
bool progress;
|
|
do {
|
|
progress = false;
|
|
NIR_PASS(progress, nir, nir_opt_algebraic);
|
|
NIR_PASS(progress, nir, nir_opt_copy_prop);
|
|
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
|
NIR_PASS(progress, nir, nir_opt_dce);
|
|
} while (progress);
|
|
}
|
|
|
|
static void
|
|
anv_shader_lower_nir(struct anv_device *device,
|
|
void *mem_ctx,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
struct anv_shader_data *shader_data)
|
|
{
|
|
const struct anv_physical_device *pdevice = device->physical;
|
|
const struct brw_compiler *compiler = pdevice->compiler;
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
struct anv_descriptor_set_layout * const *set_layouts =
|
|
(struct anv_descriptor_set_layout * const *) shader_data->info->set_layouts;
|
|
const uint32_t set_layout_count = shader_data->info->set_layout_count;
|
|
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
/* Workaround for apps that need fp64 support */
|
|
if (device->fp64_nir) {
|
|
nir_shader *fp64_nir = anv_ensure_fp64_shader(device);
|
|
|
|
NIR_PASS(_, nir, nir_lower_doubles, fp64_nir,
|
|
nir->options->lower_doubles_options);
|
|
|
|
bool fp_conv = false;
|
|
NIR_PASS(fp_conv, nir, nir_lower_int64_float_conversions);
|
|
if (fp_conv) {
|
|
NIR_PASS(_, nir, nir_opt_algebraic);
|
|
NIR_PASS(_, nir, nir_lower_doubles, fp64_nir,
|
|
nir->options->lower_doubles_options);
|
|
}
|
|
}
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
|
pdevice->instance->large_workgroup_non_coherent_image_workaround) {
|
|
const unsigned local_size = nir->info.workgroup_size[0] *
|
|
nir->info.workgroup_size[1] *
|
|
nir->info.workgroup_size[2];
|
|
if (local_size == 64)
|
|
fixup_large_workgroup_image_coherency(nir);
|
|
}
|
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
|
NIR_PASS(_, nir, nir_lower_wpos_center);
|
|
NIR_PASS(_, nir, nir_lower_input_attachments,
|
|
&(nir_input_attachment_options) { });
|
|
}
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
|
(shader_data->info->flags & VK_SHADER_CREATE_UNALIGNED_DISPATCH_BIT_MESA)) {
|
|
NIR_PASS(_, nir, anv_nir_lower_unaligned_dispatch);
|
|
/* anv_nir_lower_unaligned_dispatch pass uses nir_jump_return that we
|
|
* need to lower it.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_returns);
|
|
/* Lower load_base_workgroup_id inserted by unaligned_dispatch */
|
|
nir_lower_compute_system_values_options lower_csv_options = {
|
|
.has_base_workgroup_id = true,
|
|
};
|
|
NIR_PASS(_, nir, nir_lower_compute_system_values, &lower_csv_options);
|
|
}
|
|
|
|
if (mesa_shader_stage_is_mesh(nir->info.stage)) {
|
|
nir_lower_compute_system_values_options options = {
|
|
.lower_workgroup_id_to_index = true,
|
|
/* nir_lower_idiv generates expensive code */
|
|
.shortcut_1d_workgroup_id = devinfo->verx10 >= 125,
|
|
};
|
|
|
|
NIR_PASS(_, nir, nir_lower_compute_system_values, &options);
|
|
}
|
|
|
|
NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion, shader_data->info);
|
|
|
|
if (nir->info.stage <= MESA_SHADER_FRAGMENT) {
|
|
NIR_PASS(_, nir, anv_nir_lower_multiview,
|
|
shader_data->key.base.view_mask,
|
|
shader_data->use_primitive_replication);
|
|
}
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE &&
|
|
nir->info.cs.has_cooperative_matrix) {
|
|
anv_fixup_subgroup_size(device, nir);
|
|
NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.api_subgroup_size);
|
|
|
|
/* Lowering of nir_instr_type_cmat_call will produce new
|
|
* nir_instr_type_call instructions that need to be inlined.
|
|
*/
|
|
bool inlined = false;
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
NIR_PASS(inlined, nir, nir_inline_functions);
|
|
nir_remove_non_entrypoints(nir);
|
|
|
|
if (inlined) {
|
|
/* Some shader_temp vars may have remained multi-function before
|
|
* cmat lowering/inlining. Now that everything was inlined,
|
|
* they may be lowered to locals.
|
|
*/
|
|
bool lowered_globals = false;
|
|
NIR_PASS(lowered_globals, nir, nir_lower_global_vars_to_local);
|
|
if (lowered_globals)
|
|
NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
|
|
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
|
NIR_PASS(_, nir, nir_opt_copy_prop);
|
|
}
|
|
NIR_PASS(_, nir, nir_opt_deref);
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
|
|
NIR_PASS(_, nir, nir_lower_indirect_derefs_to_if_else_trees,
|
|
nir_var_function_temp, 16);
|
|
}
|
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
|
|
NIR_PASS(_, nir, nir_lower_memory_model);
|
|
|
|
/* Apply lowering for 64bit atomics pre-Xe2 */
|
|
const bool lower_64bit_atomics = devinfo->ver < 20;
|
|
|
|
if (lower_64bit_atomics) {
|
|
/* Ensure robustness, do this before brw_nir_lower_storage_image so that
|
|
* added image size intrinsics for bounds checkings are properly lowered
|
|
* for cube images.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_robust_access,
|
|
accept_64bit_atomic_cb, NULL);
|
|
}
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_storage_image, compiler,
|
|
&(struct brw_nir_lower_storage_image_opts) {
|
|
/* Anv only supports Gfx9+ which has better defined typed read
|
|
* behavior. It allows us to only have to care about lowering
|
|
* loads.
|
|
*/
|
|
.lower_loads = true,
|
|
.lower_stores_64bit = true,
|
|
.lower_loads_without_formats =
|
|
pdevice->instance->emulate_read_without_format,
|
|
});
|
|
|
|
if (lower_64bit_atomics) {
|
|
/* Switch from image to global */
|
|
NIR_PASS(_, nir, nir_lower_image_atomics_to_global,
|
|
accept_64bit_atomic_cb, NULL);
|
|
|
|
/* Detile for global */
|
|
NIR_PASS(_, nir, brw_nir_lower_texel_address, devinfo,
|
|
pdevice->isl_dev.shader_tiling);
|
|
}
|
|
|
|
/* Lower push constants variables prior to global realignment for CBV
|
|
* resources, it makes identifying a 64bit pointer from the push constants
|
|
* easier.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
|
|
nir_address_format_32bit_offset);
|
|
|
|
/* Realign pointers to CBV on stages that can promote to push buffers. */
|
|
if (pdevice->instance->promote_cbv_to_push_buffers &&
|
|
nir->info.stage <= MESA_SHADER_FRAGMENT) {
|
|
/* Cleanup for the analysis, we don't want any ALU */
|
|
cleanup_nir(nir);
|
|
NIR_PASS(_, nir, anv_nir_realign_cbv);
|
|
}
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
|
|
nir_address_format_64bit_global);
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
|
|
|
|
shader_data->push_desc_info.used_descriptors =
|
|
anv_nir_compute_used_push_descriptors(
|
|
nir, set_layouts, set_layout_count);
|
|
|
|
/* Need to have render targets placed first in the bind_map */
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
|
anv_shader_compute_fragment_rts(devinfo, state, shader_data);
|
|
|
|
uint32_t dynamic_descriptors_offset = 0;
|
|
uint32_t dynamic_descriptors_offsets[MAX_SETS] = {};
|
|
for (uint32_t i = 0; i < set_layout_count; i++) {
|
|
dynamic_descriptors_offsets[i] = dynamic_descriptors_offset;
|
|
if (set_layouts[i] != NULL) {
|
|
shader_data->bind_map.binding_mask |= ANV_PIPELINE_BIND_MASK_SET(i);
|
|
const uint32_t dyn_desc_count =
|
|
set_layouts[i]->vk.dynamic_descriptor_count;
|
|
shader_data->bind_map.dynamic_descriptors[i] = dyn_desc_count;
|
|
dynamic_descriptors_offset += dyn_desc_count;
|
|
}
|
|
}
|
|
|
|
/* Apply the actual layout to UBOs, SSBOs, and textures */
|
|
if (shader_data->info->flags & VK_SHADER_CREATE_DESCRIPTOR_HEAP_BIT_EXT) {
|
|
NIR_PASS(_, nir, anv_nir_lower_descriptor_heap, device,
|
|
shader_data->info->embedded_sampler_count,
|
|
shader_data->info->embedded_samplers,
|
|
&shader_data->bind_map);
|
|
} else {
|
|
NIR_PASS(_, nir, anv_nir_apply_pipeline_layout,
|
|
pdevice, shader_data->key.base.robust_flags,
|
|
set_layouts, set_layout_count,
|
|
(shader_data->info->flags &
|
|
VK_SHADER_CREATE_INDEPENDENT_SETS_BIT_KHR) ? NULL :
|
|
dynamic_descriptors_offsets,
|
|
shader_data->info->flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT,
|
|
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
|
|
}
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
|
anv_nir_ubo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
|
anv_nir_ssbo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
|
|
|
|
|
cleanup_nir(nir);
|
|
|
|
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
|
|
|
cleanup_nir(nir);
|
|
|
|
/* Required for nir_divergence_analysis() which is needed for
|
|
* anv_nir_lower_ubo_loads.
|
|
*/
|
|
NIR_PASS(_, nir, nir_convert_to_lcssa, true, true);
|
|
nir_divergence_analysis(nir);
|
|
|
|
NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
|
|
|
|
NIR_PASS(_, nir, nir_opt_remove_phis);
|
|
|
|
const bool lower_non_uniform_texture_offsets = device->info->ver < 20;
|
|
|
|
const enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
|
|
nir_lower_non_uniform_texture_access |
|
|
nir_lower_non_uniform_texture_query |
|
|
nir_lower_non_uniform_image_access |
|
|
nir_lower_non_uniform_image_query |
|
|
nir_lower_non_uniform_get_ssbo_size |
|
|
(lower_non_uniform_texture_offsets ?
|
|
nir_lower_non_uniform_texture_offset_access : 0);
|
|
|
|
/* Pre-Xe2 platforms don't have native support for dynamic programmable
|
|
* offsets. Since support includes non-uniform programmable offsets, we
|
|
* need to lower those texture messages in the same way we lower
|
|
* non-uniform texture/sampler handles.
|
|
*/
|
|
if (lower_non_uniform_texture_offsets)
|
|
nir_divergence_analysis(nir);
|
|
|
|
/* In practice, most shaders do not have non-uniform-qualified
|
|
* accesses (see
|
|
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
|
|
* thus a cheaper and likely to fail check is run first.
|
|
*/
|
|
if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
|
|
NIR_PASS(_, nir, nir_opt_non_uniform_access);
|
|
|
|
/* We don't support non-uniform UBOs and non-uniform SSBO access is
|
|
* handled naturally by falling back to A64 messages.
|
|
*/
|
|
NIR_PASS(_, nir, nir_lower_non_uniform_access,
|
|
&(nir_lower_non_uniform_access_options) {
|
|
.types = lower_non_uniform_access_types,
|
|
.tex_src_callback = lower_non_tg4_non_uniform_offsets,
|
|
.callback = NULL,
|
|
});
|
|
|
|
NIR_PASS(_, nir, intel_nir_lower_non_uniform_resource_intel);
|
|
NIR_PASS(_, nir, intel_nir_cleanup_resource_intel);
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
}
|
|
|
|
if (mesa_shader_stage_uses_workgroup(nir->info.stage)) {
|
|
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
|
nir_var_mem_shared, shared_type_info);
|
|
|
|
NIR_PASS(_, nir, nir_lower_explicit_io,
|
|
nir_var_mem_shared, nir_address_format_32bit_offset);
|
|
|
|
if (nir->info.zero_initialize_shared_memory &&
|
|
nir->info.shared_size > 0) {
|
|
/* The effective Shared Local Memory size is at least 1024 bytes and
|
|
* is always rounded to a power of two, so it is OK to align the size
|
|
* used by the shader to chunk_size -- which does simplify the logic.
|
|
*/
|
|
const unsigned chunk_size = 16;
|
|
const unsigned shared_size = align(nir->info.shared_size, chunk_size);
|
|
assert(shared_size <=
|
|
intel_compute_slm_calculate_size(devinfo->ver,
|
|
nir->info.shared_size));
|
|
|
|
NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
|
|
shared_size, chunk_size);
|
|
}
|
|
}
|
|
|
|
if (mesa_shader_stage_is_compute(nir->info.stage)) {
|
|
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, devinfo,
|
|
&shader_data->prog_data.cs);
|
|
}
|
|
|
|
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
|
|
|
|
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
|
|
|
|
NIR_PASS(_, nir, anv_nir_shrink_push_constant_ranges);
|
|
|
|
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
|
pdevice, shader_data->key.base.robust_flags,
|
|
&(struct anv_nir_push_layout_info) {
|
|
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
|
shader_data->key.tcs.separate_tess_vue_layout) ||
|
|
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
|
shader_data->key.tes.separate_tess_vue_layout),
|
|
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
brw_fs_prog_key_is_dynamic(&shader_data->key.fs),
|
|
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
shader_data->key.fs.mesh_input == INTEL_SOMETIMES,
|
|
},
|
|
&shader_data->key.base,
|
|
&shader_data->prog_data.base,
|
|
&shader_data->bind_map, &shader_data->push_map);
|
|
|
|
if (!(shader_data->info->flags & VK_SHADER_CREATE_DESCRIPTOR_HEAP_BIT_EXT)) {
|
|
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
|
shader_data->bind_map.layout_type);
|
|
}
|
|
|
|
shader_data->push_desc_info.push_set_buffer =
|
|
anv_nir_loads_push_desc_buffer(
|
|
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
|
shader_data->push_desc_info.fully_promoted_ubo_descriptors =
|
|
anv_nir_push_desc_ubo_fully_promoted(
|
|
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
|
|
|
/* Only detected clearing compute shaders, these are the only problematic
|
|
* cases we're aware of.
|
|
*/
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE)
|
|
shader_data->bind_map.inferred_behavior = anv_nir_clear_shader_analysis(nir);
|
|
}
|
|
|
|
static uint32_t
|
|
sets_layout_embedded_sampler_count(const struct vk_shader_compile_info *info)
|
|
{
|
|
uint32_t count = 0;
|
|
|
|
for (uint32_t s = 0; s < info->set_layout_count; s++) {
|
|
if (info->set_layouts[s] == NULL)
|
|
continue;
|
|
|
|
const struct anv_descriptor_set_layout *layout =
|
|
(const struct anv_descriptor_set_layout *) info->set_layouts[s];
|
|
count += layout->embedded_sampler_count;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
static void
|
|
anv_shaders_pre_lower_gfx(struct anv_device *device,
|
|
struct anv_shader_data *shaders_data,
|
|
uint32_t shader_count,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
void *mem_ctx)
|
|
{
|
|
const struct intel_device_info *devinfo = device->info;
|
|
const struct brw_compiler *compiler = device->physical->compiler;
|
|
|
|
/* Walk backwards to link */
|
|
struct anv_shader_data *next_stage = NULL;
|
|
for (int s = shader_count - 1; s >= 0; s--) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
if (next_stage == NULL) {
|
|
next_stage = shader_data;
|
|
continue;
|
|
}
|
|
|
|
switch (info->stage) {
|
|
case MESA_SHADER_VERTEX:
|
|
case MESA_SHADER_TESS_EVAL:
|
|
case MESA_SHADER_TASK:
|
|
case MESA_SHADER_GEOMETRY:
|
|
anv_shader_link(compiler, info, next_stage->info);
|
|
break;
|
|
case MESA_SHADER_TESS_CTRL:
|
|
anv_shader_link_tcs(compiler,
|
|
&shader_data->key.tcs,
|
|
info, next_stage->info);
|
|
break;
|
|
case MESA_SHADER_MESH:
|
|
anv_shader_link(compiler, info, next_stage->info);
|
|
next_stage->mue_map = &shader_data->prog_data.mesh.map;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid graphics shader stage");
|
|
}
|
|
|
|
next_stage = shader_data;
|
|
}
|
|
|
|
bool use_primitive_replication = false;
|
|
if (devinfo->ver >= 12 && shaders_data[0].key.base.view_mask != 0) {
|
|
/* For some pipelines HW Primitive Replication can be used instead of
|
|
* instancing to implement Multiview. This depend on how viewIndex is
|
|
* used in all the active shaders, so this check can't be done per
|
|
* individual shaders.
|
|
*/
|
|
nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
|
|
VkShaderStageFlags vk_stages = 0;
|
|
for (unsigned s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
shaders[shader_data->info->stage] = shader_data->info->nir;
|
|
vk_stages |= mesa_to_vk_shader_stage(shader_data->info->stage);
|
|
}
|
|
|
|
use_primitive_replication =
|
|
anv_check_for_primitive_replication(device, vk_stages, shaders,
|
|
shaders_data[0].key.base.view_mask);
|
|
}
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
shader_data->use_primitive_replication = use_primitive_replication;
|
|
shader_data->instance_multiplier =
|
|
(shader_data->key.base.view_mask && !use_primitive_replication) ?
|
|
util_bitcount(shader_data->key.base.view_mask) : 1;
|
|
}
|
|
}
|
|
|
|
static void
|
|
anv_shaders_post_lower_gfx(struct anv_device *device,
|
|
struct anv_shader_data *shaders_data,
|
|
uint32_t shader_count,
|
|
const struct vk_graphics_pipeline_state *state)
|
|
{
|
|
struct vk_shader_compile_info *prev_stage = NULL;
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
struct shader_info *cur_info = &shader_data->info->nir->info;
|
|
|
|
if (prev_stage && info->stage < MESA_SHADER_FRAGMENT) {
|
|
struct shader_info *prev_info = &prev_stage->nir->info;
|
|
|
|
prev_info->outputs_written |= cur_info->inputs_read &
|
|
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
|
cur_info->inputs_read |= prev_info->outputs_written &
|
|
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
|
prev_info->patch_outputs_written |= cur_info->patch_inputs_read;
|
|
cur_info->patch_inputs_read |= prev_info->patch_outputs_written;
|
|
}
|
|
|
|
prev_stage = info;
|
|
}
|
|
}
|
|
|
|
static void
|
|
anv_shaders_post_lower_rt(struct anv_device *device,
|
|
struct anv_shader_data *shaders_data,
|
|
uint32_t shader_count)
|
|
{
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
nir_shader *nir = shader_data->info->nir;
|
|
|
|
switch (nir->info.stage) {
|
|
case MESA_SHADER_RAYGEN:
|
|
brw_nir_lower_raygen(nir, device->info);
|
|
break;
|
|
|
|
case MESA_SHADER_ANY_HIT:
|
|
brw_nir_lower_any_hit(nir, device->info);
|
|
break;
|
|
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
brw_nir_lower_closest_hit(nir, device->info);
|
|
break;
|
|
|
|
case MESA_SHADER_MISS:
|
|
brw_nir_lower_miss(nir, device->info);
|
|
break;
|
|
|
|
case MESA_SHADER_CALLABLE:
|
|
brw_nir_lower_callable(nir, device->info);
|
|
break;
|
|
|
|
case MESA_SHADER_INTERSECTION:
|
|
/* Nothing to do, we merge this into ANY_HIT */
|
|
break;
|
|
|
|
default:
|
|
UNREACHABLE("invalid stage");
|
|
}
|
|
}
|
|
}
|
|
|
|
static VkShaderStageFlags
|
|
anv_shader_get_rt_group_linking(struct vk_physical_device *device,
|
|
VkShaderStageFlags stages)
|
|
{
|
|
const VkShaderStageFlags any_hit_intersection =
|
|
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
|
|
VK_SHADER_STAGE_INTERSECTION_BIT_KHR;
|
|
|
|
return (stages & any_hit_intersection) == any_hit_intersection ?
|
|
any_hit_intersection : 0;
|
|
}
|
|
|
|
static void
|
|
anv_debug_archiver_init(void *mem_ctx, struct anv_shader_data *shaders_data,
|
|
uint32_t shader_count)
|
|
{
|
|
/* A hash is used to identify the per stage archive file. Just using the
|
|
* single stage hash/key is sufficient if it is not linked. If shaders
|
|
* are linked together, also include a combined hash of all stages to
|
|
* distinguish from the not linked case.
|
|
*/
|
|
unsigned char linked_hash[BLAKE3_KEY_LEN];
|
|
if (shader_count > 1) {
|
|
blake3_hasher ctx;
|
|
_mesa_blake3_init(&ctx);
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
_mesa_blake3_update(&ctx, info->nir->info.source_blake3, BLAKE3_OUT_LEN);
|
|
_mesa_blake3_update(&ctx, &shader_data->key, shader_data->key_size);
|
|
}
|
|
_mesa_blake3_final(&ctx, linked_hash);
|
|
}
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
char name[BLAKE3_HEX_LEN + 4] = {};
|
|
{
|
|
blake3_hasher ctx;
|
|
unsigned char hash[BLAKE3_KEY_LEN];
|
|
_mesa_blake3_init(&ctx);
|
|
_mesa_blake3_update(&ctx, info->nir->info.source_blake3, BLAKE3_OUT_LEN);
|
|
_mesa_blake3_update(&ctx, &shader_data->key, shader_data->key_size);
|
|
if (shader_count > 1)
|
|
_mesa_blake3_update(&ctx, linked_hash, BLAKE3_KEY_LEN);
|
|
_mesa_blake3_final(&ctx, hash);
|
|
|
|
_mesa_blake3_format(name, hash);
|
|
}
|
|
memcpy(&name[BLAKE3_HEX_LEN - 1], ".anv", 4);
|
|
|
|
shader_data->archiver =
|
|
debug_archiver_open(mem_ctx, name, PACKAGE_VERSION MESA_GIT_SHA1);
|
|
|
|
if (shader_data->archiver) {
|
|
debug_archiver_set_prefix(shader_data->archiver,
|
|
_mesa_shader_stage_to_abbrev(info->stage));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
anv_debug_archiver_finish(struct anv_shader_data *shaders_data,
|
|
uint32_t shader_count)
|
|
{
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
debug_archiver_close(shader_data->archiver);
|
|
shader_data->archiver = NULL;
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
anv_shader_compile(struct vk_device *vk_device,
|
|
uint32_t shader_count,
|
|
struct vk_shader_compile_info *infos,
|
|
const struct vk_graphics_pipeline_state *state,
|
|
const struct vk_features *enabled_features,
|
|
const VkAllocationCallbacks* pAllocator,
|
|
struct vk_shader **shaders_out)
|
|
{
|
|
struct anv_device *device =
|
|
container_of(vk_device, struct anv_device, vk);
|
|
VkResult result = VK_SUCCESS;
|
|
|
|
for (uint32_t i = 0; i < shader_count; i++)
|
|
shaders_out[i] = NULL;
|
|
|
|
void *mem_ctx = ralloc_context(NULL);
|
|
|
|
struct anv_shader_data *shaders_data =
|
|
rzalloc_array(mem_ctx, struct anv_shader_data, shader_count);
|
|
assert(shader_count < MAX2(ANV_GRAPHICS_SHADER_STAGE_COUNT,
|
|
ANV_RT_SHADER_STAGE_COUNT));
|
|
|
|
/* Order the stages (no guarantee from the runtime) */
|
|
struct vk_shader_compile_info *ordered_infos[MESA_SHADER_KERNEL] = { 0 };
|
|
struct vk_shader **ordered_shaders_out[MESA_SHADER_KERNEL] = { 0 };
|
|
VkShaderStageFlags stages = 0;
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
ordered_infos[infos[s].stage] = &infos[s];
|
|
ordered_shaders_out[infos[s].stage] = &shaders_out[s];
|
|
|
|
/* The runtime transfers the ownership of the NIR to us, so we need to
|
|
* free it after compile.
|
|
*/
|
|
ralloc_steal(mem_ctx, infos[s].nir);
|
|
stages |= mesa_to_vk_shader_stage(infos[s].stage);
|
|
}
|
|
|
|
{
|
|
#define ADD_SHADER(_name) \
|
|
do { \
|
|
if (ordered_infos[MESA_SHADER_##_name]) { \
|
|
shaders_data[remapped_index].info = \
|
|
ordered_infos[MESA_SHADER_##_name]; \
|
|
shaders_data[remapped_index].shader_out = \
|
|
ordered_shaders_out[MESA_SHADER_##_name]; \
|
|
remapped_index++; \
|
|
} \
|
|
} while (0)
|
|
|
|
uint32_t remapped_index = 0;
|
|
ADD_SHADER(COMPUTE);
|
|
ADD_SHADER(VERTEX);
|
|
ADD_SHADER(TESS_CTRL);
|
|
ADD_SHADER(TESS_EVAL);
|
|
ADD_SHADER(GEOMETRY);
|
|
ADD_SHADER(TASK);
|
|
ADD_SHADER(MESH);
|
|
ADD_SHADER(FRAGMENT);
|
|
ADD_SHADER(RAYGEN);
|
|
ADD_SHADER(CLOSEST_HIT);
|
|
ADD_SHADER(INTERSECTION);
|
|
ADD_SHADER(ANY_HIT);
|
|
ADD_SHADER(MISS);
|
|
ADD_SHADER(CALLABLE);
|
|
|
|
#undef ADD_SHADER
|
|
}
|
|
|
|
/* From now on, don't use infos[] anymore. */
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
struct vk_shader_compile_info *info = shader_data->info;
|
|
|
|
shader_data->source_hash = ((uint32_t*)info->nir->info.source_blake3)[0];
|
|
|
|
for (uint32_t i = 0; i < info->set_layout_count; i++) {
|
|
shader_data->dynamic_descriptors[i] =
|
|
info->set_layouts[i] != NULL ?
|
|
info->set_layouts[i]->dynamic_descriptor_count : 0;
|
|
}
|
|
|
|
shader_data->bind_map.layout_type =
|
|
set_layouts_get_layout_type((struct anv_descriptor_set_layout * const *)info->set_layouts,
|
|
info->set_layout_count);
|
|
shader_data->bind_map.surface_to_descriptor =
|
|
brw_shader_stage_requires_bindless_resources(info->stage) ? NULL :
|
|
rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
|
|
shader_data->bind_map.sampler_to_descriptor =
|
|
brw_shader_stage_requires_bindless_resources(info->stage) ? NULL :
|
|
rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
|
|
shader_data->bind_map.embedded_sampler_to_binding =
|
|
rzalloc_array(mem_ctx, struct anv_pipeline_embedded_sampler_binding,
|
|
MAX2(sets_layout_embedded_sampler_count(info),
|
|
info->embedded_sampler_count));
|
|
|
|
shader_data->prog_data.base.stage = info->stage;
|
|
|
|
switch (info->stage) {
|
|
case MESA_SHADER_VERTEX:
|
|
populate_vs_prog_key(&shader_data->key.vs, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_TESS_CTRL:
|
|
populate_tcs_prog_key(&shader_data->key.tcs, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_TESS_EVAL:
|
|
populate_tes_prog_key(&shader_data->key.tes, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_GEOMETRY:
|
|
populate_gs_prog_key(&shader_data->key.gs, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_TASK:
|
|
populate_task_prog_key(&shader_data->key.task, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_MESH:
|
|
populate_mesh_prog_key(&shader_data->key.mesh, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_FRAGMENT:
|
|
populate_fs_prog_key(&shader_data->key.fs, vk_device->physical,
|
|
info->robustness, state, stages);
|
|
break;
|
|
case MESA_SHADER_COMPUTE:
|
|
populate_cs_prog_key(&shader_data->key.cs, vk_device->physical,
|
|
info->robustness);
|
|
break;
|
|
case MESA_SHADER_RAYGEN:
|
|
case MESA_SHADER_ANY_HIT:
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
case MESA_SHADER_MISS:
|
|
case MESA_SHADER_INTERSECTION:
|
|
case MESA_SHADER_CALLABLE:
|
|
populate_bs_prog_key(&shader_data->key.bs, vk_device->physical,
|
|
info->robustness, info->rt_flags);
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid stage");
|
|
}
|
|
}
|
|
|
|
if (INTEL_DEBUG(DEBUG_MDA))
|
|
anv_debug_archiver_init(mem_ctx, shaders_data, shader_count);
|
|
|
|
{
|
|
/* We're going to do cross stage link if we have a fragment shader with
|
|
* any other stage (that would include all the associated
|
|
* pre-rasterization stages of the pipeline).
|
|
*/
|
|
const bool separate_shaders =
|
|
!(shader_count > 1 && ordered_infos[MESA_SHADER_FRAGMENT] != NULL);
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++)
|
|
shaders_data[s].info->nir->info.separate_shader = separate_shaders;
|
|
}
|
|
|
|
if (mesa_shader_stage_is_graphics(shaders_data[0].info->stage)) {
|
|
anv_shaders_pre_lower_gfx(device, shaders_data, shader_count,
|
|
state, mem_ctx);
|
|
}
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
|
|
anv_shader_lower_nir(device, mem_ctx, state, shader_data);
|
|
|
|
anv_fixup_subgroup_size(device, shader_data->info->nir);
|
|
|
|
anv_nir_apply_shader_workarounds(shader_data->info->nir);
|
|
}
|
|
|
|
/* Combine intersection & any-hit before lowering */
|
|
if (ordered_infos[MESA_SHADER_INTERSECTION] != NULL) {
|
|
brw_nir_lower_combined_intersection_any_hit(
|
|
ordered_infos[MESA_SHADER_INTERSECTION]->nir,
|
|
ordered_infos[MESA_SHADER_ANY_HIT] != NULL ?
|
|
ordered_infos[MESA_SHADER_ANY_HIT]->nir : NULL,
|
|
device->info);
|
|
}
|
|
|
|
if (mesa_shader_stage_is_graphics(shaders_data[0].info->stage))
|
|
anv_shaders_post_lower_gfx(device, shaders_data, shader_count, state);
|
|
else if (mesa_shader_stage_is_rt(shaders_data[0].info->stage))
|
|
anv_shaders_post_lower_rt(device, shaders_data, shader_count);
|
|
|
|
for (uint32_t s = 0; s < shader_count; s++) {
|
|
struct anv_shader_data *shader_data = &shaders_data[s];
|
|
struct anv_shader_data *prev_shader_data =
|
|
s > 0 ? &shaders_data[s - 1] : NULL;
|
|
|
|
char *error_str = NULL;
|
|
switch (shader_data->info->stage) {
|
|
case MESA_SHADER_VERTEX:
|
|
anv_shader_compile_vs(device, mem_ctx, shader_data, &error_str);
|
|
break;
|
|
case MESA_SHADER_TESS_CTRL:
|
|
anv_shader_compile_tcs(device, mem_ctx, shader_data, &error_str);
|
|
break;
|
|
case MESA_SHADER_TESS_EVAL:
|
|
anv_shader_compile_tes(device, mem_ctx,
|
|
&shaders_data[s], prev_shader_data,
|
|
&error_str);
|
|
break;
|
|
case MESA_SHADER_GEOMETRY:
|
|
anv_shader_compile_gs(device, mem_ctx, shader_data, &error_str);
|
|
break;
|
|
case MESA_SHADER_TASK:
|
|
anv_shader_compile_task(device, mem_ctx, shader_data, &error_str);
|
|
break;
|
|
case MESA_SHADER_MESH:
|
|
anv_shader_compile_mesh(device, mem_ctx,
|
|
&shaders_data[s], prev_shader_data,
|
|
&error_str);
|
|
break;
|
|
case MESA_SHADER_FRAGMENT:
|
|
anv_shader_compile_fs(device, mem_ctx, shader_data, state, &error_str);
|
|
break;
|
|
case MESA_SHADER_COMPUTE:
|
|
anv_shader_compile_cs(device, mem_ctx, shader_data, &error_str);
|
|
break;
|
|
case MESA_SHADER_RAYGEN:
|
|
case MESA_SHADER_ANY_HIT:
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
case MESA_SHADER_MISS:
|
|
case MESA_SHADER_INTERSECTION:
|
|
case MESA_SHADER_CALLABLE:
|
|
anv_shader_compile_bs(device, mem_ctx, shader_data, &error_str);
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid graphics shader stage");
|
|
}
|
|
|
|
if (shader_data->code == NULL) {
|
|
if (error_str)
|
|
result = vk_errorf(device, VK_ERROR_UNKNOWN, "%s", error_str);
|
|
else
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
goto end;
|
|
}
|
|
|
|
anv_nir_validate_push_layout(device->physical,
|
|
&shader_data->prog_data.base,
|
|
&shader_data->bind_map);
|
|
|
|
shader_data->xfb_info = shader_data->info->nir->xfb_info;
|
|
|
|
result = anv_shader_create(device, shader_data->info->stage,
|
|
mem_ctx, shader_data, pAllocator,
|
|
shader_data->shader_out);
|
|
if (result != VK_SUCCESS)
|
|
goto end;
|
|
}
|
|
|
|
end:
|
|
if (INTEL_DEBUG(DEBUG_MDA))
|
|
anv_debug_archiver_finish(shaders_data, shader_count);
|
|
|
|
ralloc_free(mem_ctx);
|
|
|
|
#if 0
|
|
/* TODO: Write the feedback index into the pipeline */
|
|
for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
|
|
anv_pipeline_account_shader(&pipeline->base, pipeline->shaders[s]);
|
|
}
|
|
#endif
|
|
|
|
if (result != VK_SUCCESS) {
|
|
for (unsigned s = 0; s < shader_count; s++) {
|
|
if (shaders_out[s] != NULL)
|
|
vk_shader_free(vk_device, &vk_device->alloc, shaders_out[s]);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
struct vk_device_shader_ops anv_device_shader_ops = {
|
|
.get_nir_options = anv_shader_get_nir_options,
|
|
.get_spirv_options = anv_shader_get_spirv_options,
|
|
.preprocess_nir = anv_shader_preprocess_nir,
|
|
.get_rt_group_linking = anv_shader_get_rt_group_linking,
|
|
.hash_state = anv_shader_hash_state,
|
|
.compile = anv_shader_compile,
|
|
.deserialize = anv_shader_deserialize,
|
|
.replay_rt_shader_group = anv_replay_rt_shader_group,
|
|
.write_rt_shader_group = anv_write_rt_shader_group,
|
|
.write_rt_shader_group_replay_handle = anv_write_rt_shader_group_replay_handle,
|
|
.cmd_bind_shaders = anv_cmd_buffer_bind_shaders,
|
|
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
|
|
.cmd_set_rt_state = anv_cmd_buffer_set_rt_state,
|
|
.cmd_set_stack_size = anv_cmd_buffer_set_stack_size,
|
|
};
|