mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 10:10:09 +01:00
panvk: Optimize input attachment loads when we can
When we know the input attachment is also an active color attachment we can load the value from the tile buffer instead of going back to the texture. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
This commit is contained in:
parent
20275d6521
commit
bfd5ddbf32
7 changed files with 384 additions and 30 deletions
|
|
@ -1293,7 +1293,7 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
static VkResult
|
||||
prepare_ds(struct panvk_cmd_buffer *cmdbuf)
|
||||
prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs)
|
||||
{
|
||||
bool dirty = dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
|
||||
|
|
@ -1307,7 +1307,9 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
|
|||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
|
||||
fs_user_dirty(cmdbuf);
|
||||
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) ||
|
||||
fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, OQ);
|
||||
|
||||
if (!dirty)
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -1356,8 +1358,11 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf)
|
|||
if (rs->depth_clamp_enable)
|
||||
cfg.depth_clamp_mode = MALI_DEPTH_CLAMP_MODE_BOUNDS;
|
||||
|
||||
if (fs)
|
||||
if (fs) {
|
||||
cfg.shader_read_only_z_s = earlyzs.shader_readonly_zs;
|
||||
cfg.depth_source = pan_depth_source(&fs->info);
|
||||
}
|
||||
|
||||
cfg.depth_write_enable = test_z && ds->depth.write_enable;
|
||||
cfg.depth_bias_enable = rs->depth_bias.enable;
|
||||
cfg.depth_function = test_z ? translate_compare_func(ds->depth.compare_op)
|
||||
|
|
@ -1454,7 +1459,8 @@ prepare_oq(struct panvk_cmd_buffer *cmdbuf)
|
|||
}
|
||||
|
||||
static void
|
||||
prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
|
||||
prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct pan_earlyzs_state *earlyzs)
|
||||
{
|
||||
struct cs_builder *b =
|
||||
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
|
||||
|
|
@ -1478,6 +1484,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
|
|||
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
|
||||
/* line mode needs primitive topology */
|
||||
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) ||
|
||||
fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE) ||
|
||||
gfx_state_dirty(cmdbuf, OQ);
|
||||
bool dcd1_dirty = dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
|
||||
|
|
@ -1517,26 +1524,30 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
|
|||
struct mali_dcd_flags_0_packed dcd0;
|
||||
pan_pack(&dcd0, DCD_FLAGS_0, cfg) {
|
||||
if (fs) {
|
||||
uint8_t rt_written = color_attachment_written_mask(
|
||||
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
|
||||
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
|
||||
uint8_t rt_written = color_attachment_written_mask(
|
||||
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
|
||||
uint8_t rt_read =
|
||||
color_attachment_read_mask(fs, &dyns->ial, rt_mask);
|
||||
bool zs_read = zs_attachment_read(fs, &dyns->ial);
|
||||
|
||||
cfg.allow_forward_pixel_to_kill =
|
||||
fs->info.fs.can_fpk && !(rt_mask & ~rt_written) &&
|
||||
!alpha_to_coverage && !cmdbuf->state.gfx.cb.info.any_dest_read;
|
||||
!(rt_read & rt_written) && !alpha_to_coverage &&
|
||||
!cmdbuf->state.gfx.cb.info.any_dest_read;
|
||||
|
||||
bool writes_zs = writes_z || writes_s;
|
||||
bool zs_always_passes = ds_test_always_passes(cmdbuf);
|
||||
bool oq = cmdbuf->state.gfx.occlusion_query.mode !=
|
||||
MALI_OCCLUSION_MODE_DISABLED;
|
||||
|
||||
struct pan_earlyzs_state earlyzs =
|
||||
*earlyzs =
|
||||
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
|
||||
alpha_to_coverage, zs_always_passes, false);
|
||||
alpha_to_coverage, zs_always_passes, zs_read);
|
||||
|
||||
cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill;
|
||||
cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update;
|
||||
cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs->kill;
|
||||
cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs->update;
|
||||
cfg.evaluate_per_sample = fs->info.fs.sample_shading &&
|
||||
(dyns->ms.rasterization_samples > 1);
|
||||
|
||||
|
|
@ -1748,7 +1759,11 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, VARY_SIZE), varying_size);
|
||||
|
||||
result = prepare_ds(cmdbuf);
|
||||
struct pan_earlyzs_state earlyzs = {0};
|
||||
|
||||
prepare_dcd(cmdbuf, &earlyzs);
|
||||
|
||||
result = prepare_ds(cmdbuf, earlyzs);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
|
@ -1756,7 +1771,6 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
prepare_dcd(cmdbuf);
|
||||
prepare_vp(cmdbuf);
|
||||
prepare_tiler_primitive_size(cmdbuf);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -282,13 +282,16 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
|
|||
8));
|
||||
}
|
||||
|
||||
uint8_t rt_written = color_attachment_written_mask(
|
||||
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
|
||||
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
|
||||
uint8_t rt_written = color_attachment_written_mask(
|
||||
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
|
||||
uint8_t rt_read = color_attachment_read_mask(fs, &dyns->ial, rt_mask);
|
||||
bool zs_read = zs_attachment_read(fs, &dyns->ial);
|
||||
cfg.properties.allow_forward_pixel_to_kill =
|
||||
fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
|
||||
!alpha_to_coverage && !binfo->any_dest_read;
|
||||
!(rt_read & rt_written) && !alpha_to_coverage &&
|
||||
!binfo->any_dest_read;
|
||||
|
||||
bool writes_zs = writes_z || writes_s;
|
||||
bool zs_always_passes = ds_test_always_passes(cmdbuf);
|
||||
|
|
@ -297,7 +300,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
struct pan_earlyzs_state earlyzs =
|
||||
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
|
||||
alpha_to_coverage, zs_always_passes, false);
|
||||
alpha_to_coverage, zs_always_passes, zs_read);
|
||||
|
||||
cfg.properties.pixel_kill_operation = earlyzs.kill;
|
||||
cfg.properties.zs_update_operation = earlyzs.update;
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include "panvk_image.h"
|
||||
#include "panvk_image_view.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_shader.h"
|
||||
|
||||
#include "vk_command_buffer.h"
|
||||
#include "vk_format.h"
|
||||
|
|
@ -376,4 +377,47 @@ color_attachment_written_mask(
|
|||
return catt_written_mask;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
color_attachment_read_mask(const struct panvk_shader *fs,
|
||||
const struct vk_input_attachment_location_state *ial,
|
||||
uint8_t color_attachment_mask)
|
||||
{
|
||||
uint32_t color_attachment_count =
|
||||
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
||||
? util_last_bit(color_attachment_mask)
|
||||
: ial->color_attachment_count;
|
||||
uint32_t catt_read_mask = 0;
|
||||
|
||||
for (uint32_t i = 0; i < color_attachment_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
uint32_t catt_idx = ial->color_map[i] + 1;
|
||||
if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) {
|
||||
assert(color_attachment_mask & BITFIELD_BIT(i));
|
||||
catt_read_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
return catt_read_mask;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
zs_attachment_read(const struct panvk_shader *fs,
|
||||
const struct vk_input_attachment_location_state *ial)
|
||||
{
|
||||
uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? BITFIELD_BIT(0)
|
||||
: ial->depth_att != MESA_VK_ATTACHMENT_UNUSED
|
||||
? BITFIELD_BIT(ial->depth_att + 1)
|
||||
: 0;
|
||||
uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? BITFIELD_BIT(0)
|
||||
: ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED
|
||||
? BITFIELD_BIT(ial->stencil_att + 1)
|
||||
: 0;
|
||||
|
||||
return (depth_mask | stencil_mask) & fs->fs.input_attachment_read;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -52,6 +52,18 @@ enum panvk_desc_table_id {
|
|||
};
|
||||
#endif
|
||||
|
||||
#define PANVK_COLOR_ATTACHMENT(x) (x)
|
||||
#define PANVK_ZS_ATTACHMENT 255
|
||||
|
||||
struct panvk_input_attachment_info {
|
||||
uint32_t target;
|
||||
uint32_t conversion;
|
||||
};
|
||||
|
||||
/* One attachment per color, one for depth, one for stencil, and the last one
|
||||
* for the attachment without an InputAttachmentIndex attribute. */
|
||||
#define INPUT_ATTACHMENT_MAP_SIZE 11
|
||||
|
||||
#define FAU_WORD_SIZE sizeof(uint64_t)
|
||||
|
||||
#define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
|
||||
|
|
@ -79,6 +91,8 @@ struct panvk_graphics_sysvals {
|
|||
aligned_u64 push_consts;
|
||||
aligned_u64 printf_buffer_address;
|
||||
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
|
||||
* layer, and filter primitives at the VS level.
|
||||
|
|
@ -265,6 +279,7 @@ struct panvk_shader {
|
|||
|
||||
struct {
|
||||
struct pan_earlyzs_lut earlyzs_lut;
|
||||
uint32_t input_attachment_read;
|
||||
} fs;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -539,6 +539,84 @@ panvk_per_arch(cmd_preload_render_area_border)(
|
|||
panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info);
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals)
|
||||
{
|
||||
const struct vk_input_attachment_location_state *ial =
|
||||
&cmdbuf->vk.dynamic_graphics_state.ial;
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
uint32_t catt_count =
|
||||
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
||||
? MAX_RTS
|
||||
: ial->color_attachment_count;
|
||||
|
||||
memset(iam, ~0, sizeof(iam));
|
||||
|
||||
assert(catt_count <= MAX_RTS);
|
||||
|
||||
for (uint32_t i = 0; i < catt_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED ||
|
||||
!(cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)))
|
||||
continue;
|
||||
|
||||
VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i];
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
struct mali_internal_conversion_packed conv;
|
||||
uint32_t ia_idx = ial->color_map[i] + 1;
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
|
||||
iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i);
|
||||
|
||||
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.memory_format =
|
||||
GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false);
|
||||
#if PAN_ARCH <= 7
|
||||
cfg.register_format =
|
||||
vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32
|
||||
: vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32
|
||||
: MALI_REGISTER_FILE_FORMAT_F32;
|
||||
#endif
|
||||
}
|
||||
|
||||
iam[ia_idx].conversion = conv.opaque[0];
|
||||
}
|
||||
|
||||
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
uint32_t ia_idx =
|
||||
ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1;
|
||||
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
/* On v7, we need to pass the depth format around. If we use a conversion
|
||||
* of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */
|
||||
VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt;
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
struct mali_internal_conversion_packed conv;
|
||||
|
||||
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32;
|
||||
cfg.memory_format =
|
||||
GENX(panfrost_dithered_format_from_pipe_format)(pfmt, false);
|
||||
}
|
||||
iam[ia_idx].conversion = conv.opaque[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
uint32_t ia_idx =
|
||||
ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1;
|
||||
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++)
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]);
|
||||
}
|
||||
|
||||
/* This value has been selected to get
|
||||
* dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing.
|
||||
*/
|
||||
|
|
@ -647,6 +725,9 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
|||
}
|
||||
}
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP))
|
||||
prepare_iam_sysvals(cmdbuf, dirty_sysvals);
|
||||
|
||||
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include "panvk_device.h"
|
||||
#include "panvk_shader.h"
|
||||
|
||||
#include "vk_graphics_state.h"
|
||||
#include "vk_pipeline.h"
|
||||
#include "vk_pipeline_layout.h"
|
||||
|
||||
|
|
@ -72,6 +73,7 @@ struct lower_desc_ctx {
|
|||
bool add_bounds_checks;
|
||||
nir_address_format ubo_addr_format;
|
||||
nir_address_format ssbo_addr_format;
|
||||
struct panvk_shader *shader;
|
||||
};
|
||||
|
||||
static nir_address_format
|
||||
|
|
@ -827,6 +829,137 @@ get_img_index(nir_builder *b, nir_deref_instr *deref,
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_image_deref_load &&
|
||||
intr->intrinsic != nir_intrinsic_image_deref_sparse_load)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
||||
enum glsl_sampler_dim image_dim = glsl_get_sampler_dim(deref->type);
|
||||
if (image_dim != GLSL_SAMPLER_DIM_SUBPASS &&
|
||||
image_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
|
||||
return false;
|
||||
|
||||
struct panvk_shader *shader = data;
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
assert(var);
|
||||
|
||||
const unsigned iam_idx =
|
||||
var->data.index != NIR_VARIABLE_NO_INDEX ? var->data.index + 1 : 0;
|
||||
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
|
||||
|
||||
shader->fs.input_attachment_read |= BITFIELD_BIT(iam_idx);
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *target =
|
||||
nir_load_input_attachment_target_pan(b, nir_imm_int(b, iam_idx));
|
||||
nir_def *load_img, *load_output;
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, target, ~0));
|
||||
{
|
||||
nir_def *is_color_att = nir_ilt_imm(b, target, 8);
|
||||
nir_def *load_color, *load_zs;
|
||||
nir_io_semantics iosem = {0};
|
||||
|
||||
nir_push_if(b, is_color_att);
|
||||
{
|
||||
nir_def *conversion =
|
||||
nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx));
|
||||
|
||||
iosem.location = FRAG_RESULT_DATA0;
|
||||
load_color = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
.io_semantics = iosem);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
#if PAN_ARCH <= 7
|
||||
/* On v7, we need to pass the depth format around. If we use a
|
||||
* conversion of zero, like we do on v9+, the GPU reports an
|
||||
* INVALID_INSTR_ENC. */
|
||||
struct mali_internal_conversion_packed stencil_conv;
|
||||
|
||||
pan_pack(&stencil_conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.register_format = MALI_REGISTER_FILE_FORMAT_U32;
|
||||
cfg.memory_format = GENX(panfrost_dithered_format_from_pipe_format)(
|
||||
PIPE_FORMAT_S8_UINT, false);
|
||||
}
|
||||
|
||||
nir_def *conversion =
|
||||
dest_type == nir_type_uint32
|
||||
? nir_imm_int(b, stencil_conv.opaque[0])
|
||||
: nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx));
|
||||
#else
|
||||
nir_def *conversion = nir_imm_int(b, 0);
|
||||
#endif
|
||||
|
||||
iosem.location = dest_type == nir_type_float32 ? FRAG_RESULT_DEPTH
|
||||
: FRAG_RESULT_STENCIL;
|
||||
target = nir_imm_int(b, 0);
|
||||
load_zs = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
.io_semantics = iosem);
|
||||
|
||||
/* If we loaded the stencil value, the upper 24 bits might contain
|
||||
* garbage, hence the masking done here. */
|
||||
if (iosem.location == FRAG_RESULT_STENCIL)
|
||||
load_zs = nir_iand_imm(b, load_zs, BITFIELD_MASK(8));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
load_output = nir_if_phi(b, load_color, load_zs);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
load_img =
|
||||
intr->intrinsic == nir_intrinsic_image_deref_sparse_load
|
||||
? nir_image_deref_sparse_load(
|
||||
b, intr->num_components, intr->def.bit_size, intr->src[0].ssa,
|
||||
intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa,
|
||||
.image_dim = nir_intrinsic_image_dim(intr),
|
||||
.image_array = nir_intrinsic_image_array(intr),
|
||||
.format = nir_intrinsic_format(intr),
|
||||
.access = nir_intrinsic_access(intr), .dest_type = dest_type)
|
||||
: nir_image_deref_load(
|
||||
b, intr->num_components, intr->def.bit_size, intr->src[0].ssa,
|
||||
intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa,
|
||||
.image_dim = nir_intrinsic_image_dim(intr),
|
||||
.image_array = nir_intrinsic_image_array(intr),
|
||||
.format = nir_intrinsic_format(intr),
|
||||
.access = nir_intrinsic_access(intr), .dest_type = dest_type);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def_replace(&intr->def, nir_if_phi(b, load_output, load_img));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_input_attachment_loads(nir_shader *nir, struct panvk_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
NIR_PASS(progress, nir, nir_shader_intrinsics_pass,
|
||||
lower_input_attachment_load, nir_metadata_control_flow, shader);
|
||||
|
||||
/* Lower the remaining input attachment loads. */
|
||||
struct nir_input_attachment_options lower_input_attach_opts = {
|
||||
.use_fragcoord_sysval = true,
|
||||
.use_layer_id_sysval = true,
|
||||
};
|
||||
NIR_PASS(progress, nir, nir_lower_input_attachments,
|
||||
&lower_input_attach_opts);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_img_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
struct lower_desc_ctx *ctx)
|
||||
|
|
@ -1231,11 +1364,13 @@ upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader *shader,
|
|||
void
|
||||
panvk_per_arch(nir_lower_descriptors)(
|
||||
nir_shader *nir, struct panvk_device *dev,
|
||||
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
|
||||
const struct vk_pipeline_robustness_state *rs,
|
||||
uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts,
|
||||
struct panvk_shader *shader)
|
||||
{
|
||||
struct lower_desc_ctx ctx = {
|
||||
.shader = shader,
|
||||
.add_bounds_checks =
|
||||
rs->storage_buffers !=
|
||||
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
|
||||
|
|
@ -1279,6 +1414,9 @@ panvk_per_arch(nir_lower_descriptors)(
|
|||
create_copy_table(nir, &ctx);
|
||||
upload_shader_desc_info(dev, shader, &ctx.desc_info);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(progress, nir, lower_input_attachment_loads, shader);
|
||||
|
||||
NIR_PASS(progress, nir, nir_shader_instructions_pass,
|
||||
lower_descriptors_instr, nir_metadata_control_flow, &ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -62,12 +62,18 @@
|
|||
#include "vk_shader.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
struct panvk_lower_sysvals_context {
|
||||
struct panvk_shader *shader;
|
||||
const struct vk_graphics_pipeline_state *state;
|
||||
};
|
||||
|
||||
static bool
|
||||
panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
const struct panvk_lower_sysvals_context *ctx = data;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
unsigned bit_size = intr->def.bit_size;
|
||||
nir_def *val = NULL;
|
||||
|
|
@ -127,6 +133,52 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
|||
val = load_sysval(b, graphics, bit_size, printf_buffer_address);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_input_attachment_target_pan: {
|
||||
const struct vk_input_attachment_location_state *ial =
|
||||
ctx->state ? ctx->state->ial : NULL;
|
||||
|
||||
if (ial) {
|
||||
uint32_t index = nir_src_as_uint(intr->src[0]);
|
||||
uint32_t depth_idx = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? 0
|
||||
: ial->depth_att + 1;
|
||||
uint32_t stencil_idx = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? 0
|
||||
: ial->stencil_att + 1;
|
||||
uint32_t target = ~0;
|
||||
|
||||
if (depth_idx == index || stencil_idx == index) {
|
||||
target = PANVK_ZS_ATTACHMENT;
|
||||
} else {
|
||||
for (unsigned i = 0; i < ial->color_attachment_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
if (ial->color_map[i] + 1 == index) {
|
||||
target = PANVK_COLOR_ATTACHMENT(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = nir_imm_int(b, target);
|
||||
} else {
|
||||
nir_def *ia_info =
|
||||
load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa);
|
||||
|
||||
val = nir_channel(b, ia_info, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_input_attachment_conv_pan: {
|
||||
nir_def *ia_info =
|
||||
load_sysval_entry(b, graphics, bit_size, iam, intr->src[0].ssa);
|
||||
|
||||
val = nir_channel(b, ia_info, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -326,15 +378,6 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev,
|
|||
NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
|
||||
NIR_PASS(_, nir, nir_opt_loop);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
struct nir_input_attachment_options lower_input_attach_opts = {
|
||||
.use_fragcoord_sysval = true,
|
||||
.use_layer_id_sysval = true,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_input_attachments, &lower_input_attach_opts);
|
||||
}
|
||||
|
||||
/* Do texture lowering here. Yes, it's a duplication of the texture
|
||||
* lowering in bifrost_compile. However, we need to lower texture stuff
|
||||
* now, before we call panvk_per_arch(nir_lower_descriptors)() because some
|
||||
|
|
@ -400,6 +443,9 @@ panvk_hash_graphics_state(struct vk_physical_device *device,
|
|||
_mesa_blake3_update(&blake3_ctx, &state->rp->view_mask,
|
||||
sizeof(state->rp->view_mask));
|
||||
|
||||
if (state->ial)
|
||||
_mesa_blake3_update(&blake3_ctx, state->ial, sizeof(*state->ial));
|
||||
|
||||
_mesa_blake3_final(&blake3_ctx, blake3_out);
|
||||
}
|
||||
|
||||
|
|
@ -677,6 +723,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
struct vk_descriptor_set_layout *const *set_layouts,
|
||||
const struct vk_pipeline_robustness_state *rs,
|
||||
uint32_t *noperspective_varyings,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
const struct panfrost_compile_inputs *compile_input,
|
||||
struct panvk_shader *shader)
|
||||
{
|
||||
|
|
@ -813,8 +860,13 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
NIR_PASS(_, nir, pan_nir_lower_static_noperspective,
|
||||
*noperspective_varyings);
|
||||
|
||||
struct panvk_lower_sysvals_context lower_sysvals_ctx = {
|
||||
.shader = shader,
|
||||
.state = state,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_instructions_pass, panvk_lower_sysvals,
|
||||
nir_metadata_control_flow, NULL);
|
||||
nir_metadata_control_flow, &lower_sysvals_ctx);
|
||||
|
||||
lower_load_push_consts(nir, shader);
|
||||
}
|
||||
|
|
@ -1134,7 +1186,8 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||
nir->info.fs.uses_sample_shading = true;
|
||||
|
||||
panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
|
||||
info->robustness, noperspective_varyings, &inputs, shader);
|
||||
info->robustness, noperspective_varyings, state, &inputs,
|
||||
shader);
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
if (info->stage == MESA_SHADER_FRAGMENT)
|
||||
|
|
@ -1319,7 +1372,6 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
|
|||
VkResult result;
|
||||
|
||||
blob_copy_bytes(blob, &info, sizeof(info));
|
||||
|
||||
if (blob->overrun)
|
||||
return panvk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
|
||||
|
||||
|
|
@ -1340,6 +1392,8 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
|
|||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info, PAN_ARCH);
|
||||
blob_copy_bytes(blob, &shader->fs.input_attachment_read,
|
||||
sizeof(shader->fs.input_attachment_read));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -1441,6 +1495,11 @@ panvk_shader_serialize(struct vk_device *vk_dev,
|
|||
sizeof(shader->cs.local_size));
|
||||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
blob_write_bytes(blob, &shader->fs.input_attachment_read,
|
||||
sizeof(shader->fs.input_attachment_read));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue