panvk: Optimize input attachment loads when we can

When we know the input attachment is also an active color attachment
we can load the value from the tile buffer instead of going back to
the texture.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
This commit is contained in:
Boris Brezillon 2025-01-24 12:06:31 +01:00 committed by Marge Bot
parent 20275d6521
commit bfd5ddbf32
7 changed files with 384 additions and 30 deletions

View file

@ -1293,7 +1293,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
}
static VkResult
prepare_ds(struct panvk_cmd_buffer *cmdbuf)
prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs)
{
bool dirty = dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
@ -1307,7 +1307,9 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
fs_user_dirty(cmdbuf);
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) ||
fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, OQ);
if (!dirty)
return VK_SUCCESS;
@ -1356,8 +1358,11 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
if (rs->depth_clamp_enable)
cfg.depth_clamp_mode = MALI_DEPTH_CLAMP_MODE_BOUNDS;
if (fs)
if (fs) {
cfg.shader_read_only_z_s = earlyzs.shader_readonly_zs;
cfg.depth_source = pan_depth_source(&fs->info);
}
cfg.depth_write_enable = test_z && ds->depth.write_enable;
cfg.depth_bias_enable = rs->depth_bias.enable;
cfg.depth_function = test_z ? translate_compare_func(ds->depth.compare_op)
@ -1454,7 +1459,8 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf)
}
static void
prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
struct pan_earlyzs_state *earlyzs)
{
struct cs_builder *b =
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
@ -1478,6 +1484,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
/* line mode needs primitive topology */
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) ||
dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) ||
fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE) ||
gfx_state_dirty(cmdbuf, OQ);
bool dcd1_dirty = dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
@ -1517,26 +1524,30 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
struct mali_dcd_flags_0_packed dcd0;
pan_pack(&dcd0, DCD_FLAGS_0, cfg) {
if (fs) {
uint8_t rt_written = color_attachment_written_mask(
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
uint8_t rt_written = color_attachment_written_mask(
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
uint8_t rt_read =
color_attachment_read_mask(fs, &dyns->ial, rt_mask);
bool zs_read = zs_attachment_read(fs, &dyns->ial);
cfg.allow_forward_pixel_to_kill =
fs->info.fs.can_fpk && !(rt_mask & ~rt_written) &&
!alpha_to_coverage && !cmdbuf->state.gfx.cb.info.any_dest_read;
!(rt_read & rt_written) && !alpha_to_coverage &&
!cmdbuf->state.gfx.cb.info.any_dest_read;
bool writes_zs = writes_z || writes_s;
bool zs_always_passes = ds_test_always_passes(cmdbuf);
bool oq = cmdbuf->state.gfx.occlusion_query.mode !=
MALI_OCCLUSION_MODE_DISABLED;
struct pan_earlyzs_state earlyzs =
*earlyzs =
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
alpha_to_coverage, zs_always_passes, false);
alpha_to_coverage, zs_always_passes, zs_read);
cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill;
cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update;
cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs->kill;
cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs->update;
cfg.evaluate_per_sample = fs->info.fs.sample_shading &&
(dyns->ms.rasterization_samples > 1);
@ -1748,7 +1759,11 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size);
result = prepare_ds(cmdbuf);
struct pan_earlyzs_state earlyzs = {0};
prepare_dcd(cmdbuf, &earlyzs);
result = prepare_ds(cmdbuf, earlyzs);
if (result != VK_SUCCESS)
return result;
@ -1756,7 +1771,6 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
if (result != VK_SUCCESS)
return result;
prepare_dcd(cmdbuf);
prepare_vp(cmdbuf);
prepare_tiler_primitive_size(cmdbuf);
}

View file

@ -282,13 +282,16 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
8));
}
uint8_t rt_written = color_attachment_written_mask(
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
uint8_t rt_written = color_attachment_written_mask(
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
uint8_t rt_read = color_attachment_read_mask(fs, &dyns->ial, rt_mask);
bool zs_read = zs_attachment_read(fs, &dyns->ial);
cfg.properties.allow_forward_pixel_to_kill =
fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
!alpha_to_coverage && !binfo->any_dest_read;
!(rt_read & rt_written) && !alpha_to_coverage &&
!binfo->any_dest_read;
bool writes_zs = writes_z || writes_s;
bool zs_always_passes = ds_test_always_passes(cmdbuf);
@ -297,7 +300,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
struct pan_earlyzs_state earlyzs =
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
alpha_to_coverage, zs_always_passes, false);
alpha_to_coverage, zs_always_passes, zs_read);
cfg.properties.pixel_kill_operation = earlyzs.kill;
cfg.properties.zs_update_operation = earlyzs.update;

View file

@ -16,6 +16,7 @@
#include "panvk_image.h"
#include "panvk_image_view.h"
#include "panvk_physical_device.h"
#include "panvk_shader.h"
#include "vk_command_buffer.h"
#include "vk_format.h"
@ -376,4 +377,47 @@ color_attachment_written_mask(
return catt_written_mask;
}
static inline uint32_t
color_attachment_read_mask(const struct panvk_shader *fs,
const struct vk_input_attachment_location_state *ial,
uint8_t color_attachment_mask)
{
uint32_t color_attachment_count =
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
? util_last_bit(color_attachment_mask)
: ial->color_attachment_count;
uint32_t catt_read_mask = 0;
for (uint32_t i = 0; i < color_attachment_count; i++) {
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
continue;
uint32_t catt_idx = ial->color_map[i] + 1;
if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) {
assert(color_attachment_mask & BITFIELD_BIT(i));
catt_read_mask |= BITFIELD_BIT(i);
}
}
return catt_read_mask;
}
static inline bool
zs_attachment_read(const struct panvk_shader *fs,
const struct vk_input_attachment_location_state *ial)
{
uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
? BITFIELD_BIT(0)
: ial->depth_att != MESA_VK_ATTACHMENT_UNUSED
? BITFIELD_BIT(ial->depth_att + 1)
: 0;
uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
? BITFIELD_BIT(0)
: ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED
? BITFIELD_BIT(ial->stencil_att + 1)
: 0;
return (depth_mask | stencil_mask) & fs->fs.input_attachment_read;
}
#endif

View file

@ -52,6 +52,18 @@ enum panvk_desc_table_id {
};
#endif
#define PANVK_COLOR_ATTACHMENT(x) (x)
#define PANVK_ZS_ATTACHMENT 255
struct panvk_input_attachment_info {
uint32_t target;
uint32_t conversion;
};
/* One attachment per color, one for depth, one for stencil, and the last one
* for the attachment without an InputAttachmentIndex attribute. */
#define INPUT_ATTACHMENT_MAP_SIZE 11
#define FAU_WORD_SIZE sizeof(uint64_t)
#define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
@ -79,6 +91,8 @@ struct panvk_graphics_sysvals {
aligned_u64 push_consts;
aligned_u64 printf_buffer_address;
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
#if PAN_ARCH <= 7
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
* layer, and filter primitives at the VS level.
@ -265,6 +279,7 @@ struct panvk_shader {
struct {
struct pan_earlyzs_lut earlyzs_lut;
uint32_t input_attachment_read;
} fs;
};

View file

@ -539,6 +539,84 @@ panvk_per_arch(cmd_preload_render_area_border)(
panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info);
}
static void
prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals)
{
const struct vk_input_attachment_location_state *ial =
&cmdbuf->vk.dynamic_graphics_state.ial;
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
uint32_t catt_count =
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
? MAX_RTS
: ial->color_attachment_count;
memset(iam, ~0, sizeof(iam));
assert(catt_count <= MAX_RTS);
for (uint32_t i = 0; i < catt_count; i++) {
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED ||
!(cmdbuf->state.gfx.render.bound_attachments &
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)))
continue;
VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i];
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
struct mali_internal_conversion_packed conv;
uint32_t ia_idx = ial->color_map[i] + 1;
assert(ia_idx < ARRAY_SIZE(iam));
iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i);
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
cfg.memory_format =
GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false);
#if PAN_ARCH <= 7
cfg.register_format =
vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32
: vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32
: MALI_REGISTER_FILE_FORMAT_F32;
#endif
}
iam[ia_idx].conversion = conv.opaque[0];
}
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
uint32_t ia_idx =
ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1;
assert(ia_idx < ARRAY_SIZE(iam));
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
#if PAN_ARCH <= 7
/* On v7, we need to pass the depth format around. If we use a conversion
* of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */
VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt;
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
struct mali_internal_conversion_packed conv;
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32;
cfg.memory_format =
GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false);
}
iam[ia_idx].conversion = conv.opaque[0];
#endif
}
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
uint32_t ia_idx =
ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1;
assert(ia_idx < ARRAY_SIZE(iam));
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
}
for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++)
set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]);
}
/* This value has been selected to get
* dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing.
*/
@ -647,6 +725,9 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
}
}
if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP))
prepare_iam_sysvals(cmdbuf, dirty_sysvals);
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
#if PAN_ARCH <= 7

View file

@ -30,6 +30,7 @@
#include "panvk_device.h"
#include "panvk_shader.h"
#include "vk_graphics_state.h"
#include "vk_pipeline.h"
#include "vk_pipeline_layout.h"
@ -72,6 +73,7 @@ struct lower_desc_ctx {
bool add_bounds_checks;
nir_address_format ubo_addr_format;
nir_address_format ssbo_addr_format;
struct panvk_shader *shader;
};
static nir_address_format
@ -827,6 +829,137 @@ get_img_index(nir_builder *b, nir_deref_instr *deref,
}
}
static bool
lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
void *data)
{
if (intr->intrinsic != nir_intrinsic_image_deref_load &&
intr->intrinsic != nir_intrinsic_image_deref_sparse_load)
return false;
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
enum glsl_sampler_dim image_dim = glsl_get_sampler_dim(deref->type);
if (image_dim != GLSL_SAMPLER_DIM_SUBPASS &&
image_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
return false;
struct panvk_shader *shader = data;
nir_variable *var = nir_deref_instr_get_variable(deref);
assert(var);
const unsigned iam_idx =
var->data.index != NIR_VARIABLE_NO_INDEX ? var->data.index + 1 : 0;
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
shader->fs.input_attachment_read |= BITFIELD_BIT(iam_idx);
b->cursor = nir_before_instr(&intr->instr);
nir_def *target =
nir_load_input_attachment_target_pan(b, nir_imm_int(b, iam_idx));
nir_def *load_img, *load_output;
nir_push_if(b, nir_ine_imm(b, target, ~0));
{
nir_def *is_color_att = nir_ilt_imm(b, target, 8);
nir_def *load_color, *load_zs;
nir_io_semantics iosem = {0};
nir_push_if(b, is_color_att);
{
nir_def *conversion =
nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx));
iosem.location = FRAG_RESULT_DATA0;
load_color = nir_load_converted_output_pan(
b, intr->def.num_components, intr->def.bit_size, target,
intr->src[2].ssa, conversion, .dest_type = dest_type,
.io_semantics = iosem);
}
nir_push_else(b, NULL);
{
#if PAN_ARCH <= 7
/* On v7, we need to pass the depth format around. If we use a
* conversion of zero, like we do on v9+, the GPU reports an
* INVALID_INSTR_ENC. */
struct mali_internal_conversion_packed stencil_conv;
pan_pack(&stencil_conv, INTERNAL_CONVERSION, cfg) {
cfg.register_format = MALI_REGISTER_FILE_FORMAT_U32;
cfg.memory_format = GENX(panfrost_dithered_format_from_pipe_format)(
PIPE_FORMAT_S8_UINT, false);
}
nir_def *conversion =
dest_type == nir_type_uint32
? nir_imm_int(b, stencil_conv.opaque[0])
: nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx));
#else
nir_def *conversion = nir_imm_int(b, 0);
#endif
iosem.location = dest_type == nir_type_float32 ? FRAG_RESULT_DEPTH
: FRAG_RESULT_STENCIL;
target = nir_imm_int(b, 0);
load_zs = nir_load_converted_output_pan(
b, intr->def.num_components, intr->def.bit_size, target,
intr->src[2].ssa, conversion, .dest_type = dest_type,
.io_semantics = iosem);
/* If we loaded the stencil value, the upper 24 bits might contain
* garbage, hence the masking done here. */
if (iosem.location == FRAG_RESULT_STENCIL)
load_zs = nir_iand_imm(b, load_zs, BITFIELD_MASK(8));
}
nir_pop_if(b, NULL);
load_output = nir_if_phi(b, load_color, load_zs);
}
nir_push_else(b, NULL);
{
load_img =
intr->intrinsic == nir_intrinsic_image_deref_sparse_load
? nir_image_deref_sparse_load(
b, intr->num_components, intr->def.bit_size, intr->src[0].ssa,
intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa,
.image_dim = nir_intrinsic_image_dim(intr),
.image_array = nir_intrinsic_image_array(intr),
.format = nir_intrinsic_format(intr),
.access = nir_intrinsic_access(intr), .dest_type = dest_type)
: nir_image_deref_load(
b, intr->num_components, intr->def.bit_size, intr->src[0].ssa,
intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa,
.image_dim = nir_intrinsic_image_dim(intr),
.image_array = nir_intrinsic_image_array(intr),
.format = nir_intrinsic_format(intr),
.access = nir_intrinsic_access(intr), .dest_type = dest_type);
}
nir_pop_if(b, NULL);
nir_def_replace(&intr->def, nir_if_phi(b, load_output, load_img));
return true;
}
static bool
lower_input_attachment_loads(nir_shader *nir, struct panvk_shader *shader)
{
bool progress = false;
NIR_PASS(progress, nir, nir_shader_intrinsics_pass,
lower_input_attachment_load, nir_metadata_control_flow, shader);
/* Lower the remaining input attachment loads. */
struct nir_input_attachment_options lower_input_attach_opts = {
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
};
NIR_PASS(progress, nir, nir_lower_input_attachments,
&lower_input_attach_opts);
return progress;
}
static bool
lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
struct lower_desc_ctx *ctx)
@ -1231,11 +1364,13 @@ upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader *shader,
void
panvk_per_arch(nir_lower_descriptors)(
nir_shader *nir, struct panvk_device *dev,
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
const struct vk_pipeline_robustness_state *rs,
uint32_t set_layout_count,
struct vk_descriptor_set_layout *const *set_layouts,
struct panvk_shader *shader)
{
struct lower_desc_ctx ctx = {
.shader = shader,
.add_bounds_checks =
rs->storage_buffers !=
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
@ -1279,6 +1414,9 @@ panvk_per_arch(nir_lower_descriptors)(
create_copy_table(nir, &ctx);
upload_shader_desc_info(dev, shader, &ctx.desc_info);
if (nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS(progress, nir, lower_input_attachment_loads, shader);
NIR_PASS(progress, nir, nir_shader_instructions_pass,
lower_descriptors_instr, nir_metadata_control_flow, &ctx);

View file

@ -62,12 +62,18 @@
#include "vk_shader.h"
#include "vk_util.h"
struct panvk_lower_sysvals_context {
struct panvk_shader *shader;
const struct vk_graphics_pipeline_state *state;
};
static bool
panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
const struct panvk_lower_sysvals_context *ctx = data;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
unsigned bit_size = intr->def.bit_size;
nir_def *val = NULL;
@ -127,6 +133,52 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
val = load_sysval(b, graphics, bit_size, printf_buffer_address);
break;
case nir_intrinsic_load_input_attachment_target_pan: {
const struct vk_input_attachment_location_state *ial =
ctx->state ? ctx->state->ial : NULL;
if (ial) {
uint32_t index = nir_src_as_uint(intr->src[0]);
uint32_t depth_idx = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
? 0
: ial->depth_att + 1;
uint32_t stencil_idx = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
? 0
: ial->stencil_att + 1;
uint32_t target = ~0;
if (depth_idx == index || stencil_idx == index) {
target = PANVK_ZS_ATTACHMENT;
} else {
for (unsigned i = 0; i < ial->color_attachment_count; i++) {
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
continue;
if (ial->color_map[i] + 1 == index) {
target = PANVK_COLOR_ATTACHMENT(i);
break;
}
}
}
val = nir_imm_int(b, target);
} else {
nir_def *ia_info =
load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa);
val = nir_channel(b, ia_info, 0);
}
break;
}
case nir_intrinsic_load_input_attachment_conv_pan: {
nir_def *ia_info =
load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa);
val = nir_channel(b, ia_info, 1);
break;
}
default:
return false;
}
@ -326,15 +378,6 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev,
NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
NIR_PASS(_, nir, nir_opt_loop);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
struct nir_input_attachment_options lower_input_attach_opts = {
.use_fragcoord_sysval = true,
.use_layer_id_sysval = true,
};
NIR_PASS(_, nir, nir_lower_input_attachments, &lower_input_attach_opts);
}
/* Do texture lowering here. Yes, it's a duplication of the texture
* lowering in bifrost_compile. However, we need to lower texture stuff
* now, before we call panvk_per_arch(nir_lower_descriptors)() because some
@ -400,6 +443,9 @@ panvk_hash_graphics_state(struct vk_physical_device *device,
_mesa_blake3_update(&blake3_ctx, &state->rp->view_mask,
sizeof(state->rp->view_mask));
if (state->ial)
_mesa_blake3_update(&blake3_ctx, state->ial, sizeof(*state->ial));
_mesa_blake3_final(&blake3_ctx, blake3_out);
}
@ -677,6 +723,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
struct vk_descriptor_set_layout *const *set_layouts,
const struct vk_pipeline_robustness_state *rs,
uint32_t *noperspective_varyings,
const struct vk_graphics_pipeline_state *state,
const struct panfrost_compile_inputs *compile_input,
struct panvk_shader *shader)
{
@ -813,8 +860,13 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
NIR_PASS(_, nir, pan_nir_lower_static_noperspective,
*noperspective_varyings);
struct panvk_lower_sysvals_context lower_sysvals_ctx = {
.shader = shader,
.state = state,
};
NIR_PASS(_, nir, nir_shader_instructions_pass, panvk_lower_sysvals,
nir_metadata_control_flow, NULL);
nir_metadata_control_flow, &lower_sysvals_ctx);
lower_load_push_consts(nir, shader);
}
@ -1134,7 +1186,8 @@ panvk_compile_shader(struct panvk_device *dev,
nir->info.fs.uses_sample_shading = true;
panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
info->robustness, noperspective_varyings, &inputs, shader);
info->robustness, noperspective_varyings, state, &inputs,
shader);
#if PAN_ARCH >= 9
if (info->stage == MESA_SHADER_FRAGMENT)
@ -1319,7 +1372,6 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
VkResult result;
blob_copy_bytes(blob, &info, sizeof(info));
if (blob->overrun)
return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
@ -1340,6 +1392,8 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
case MESA_SHADER_FRAGMENT:
shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info, PAN_ARCH);
blob_copy_bytes(blob, &shader->fs.input_attachment_read,
sizeof(shader->fs.input_attachment_read));
break;
default:
@ -1441,6 +1495,11 @@ panvk_shader_serialize(struct vk_device *vk_dev,
sizeof(shader->cs.local_size));
break;
case MESA_SHADER_FRAGMENT:
blob_write_bytes(blob, &shader->fs.input_attachment_read,
sizeof(shader->fs.input_attachment_read));
break;
default:
break;
}