mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
panvk/csf: Optimize read-only tile buffer access
When the color/input attachment map is known at compile time, we can determine the set of read-only render targets and replace .wait by .wait_resource flows, in order to avoid read-after-read serialization. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
This commit is contained in:
parent
4f4ac56145
commit
24b1aa6c28
5 changed files with 126 additions and 23 deletions
|
|
@ -1465,6 +1465,10 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|||
struct cs_builder *b =
|
||||
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER);
|
||||
const struct panvk_shader *fs = get_fs(cmdbuf);
|
||||
bool dcd2_dirty =
|
||||
fs_user_dirty(cmdbuf) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, COLOR_ATTACHMENT_MAP);
|
||||
bool dcd0_dirty =
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_CULL_MODE) ||
|
||||
|
|
@ -1486,7 +1490,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|||
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) ||
|
||||
fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE) ||
|
||||
gfx_state_dirty(cmdbuf, OQ);
|
||||
gfx_state_dirty(cmdbuf, OQ) || dcd2_dirty;
|
||||
bool dcd1_dirty = dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) ||
|
||||
/* line mode needs primitive topology */
|
||||
|
|
@ -1505,6 +1509,14 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|||
bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
|
||||
bool writes_z = writes_depth(cmdbuf);
|
||||
bool writes_s = writes_stencil(cmdbuf);
|
||||
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
|
||||
uint8_t rt_written = 0, rt_read = 0;
|
||||
|
||||
if (fs) {
|
||||
rt_written = color_attachment_written_mask(fs, &dyns->cal);
|
||||
rt_read = color_attachment_read_mask(fs, &dyns->ial, rt_mask);
|
||||
}
|
||||
|
||||
bool msaa = dyns->ms.rasterization_samples > 1;
|
||||
if ((ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
|
||||
|
|
@ -1524,12 +1536,6 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|||
struct mali_dcd_flags_0_packed dcd0;
|
||||
pan_pack(&dcd0, DCD_FLAGS_0, cfg) {
|
||||
if (fs) {
|
||||
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
|
||||
uint8_t rt_written = color_attachment_written_mask(
|
||||
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
|
||||
uint8_t rt_read =
|
||||
color_attachment_read_mask(fs, &dyns->ial, rt_mask);
|
||||
bool zs_read = zs_attachment_read(fs, &dyns->ial);
|
||||
|
||||
cfg.allow_forward_pixel_to_kill =
|
||||
|
|
@ -1594,6 +1600,17 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|||
cs_update_vt_ctx(b)
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD1), dcd1.opaque[0]);
|
||||
}
|
||||
|
||||
if (dcd2_dirty) {
|
||||
struct mali_dcd_flags_2_packed dcd2;
|
||||
pan_pack(&dcd2, DCD_FLAGS_2, cfg) {
|
||||
cfg.read_mask = rt_read;
|
||||
cfg.write_mask = rt_written;
|
||||
}
|
||||
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), dcd2.opaque[0]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1750,9 +1767,6 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
uint32_t varying_size = get_varying_slots(cmdbuf) * 16;
|
||||
|
||||
cs_update_vt_ctx(b) {
|
||||
/* We don't use the resource dep system yet. */
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, DCD2), 0);
|
||||
|
||||
prepare_index_buffer(cmdbuf, draw);
|
||||
|
||||
set_tiler_idvs_flags(b, cmdbuf, draw);
|
||||
|
|
|
|||
|
|
@ -364,7 +364,7 @@ void panvk_per_arch(nir_lower_descriptors)(
|
|||
nir_shader *nir, struct panvk_device *dev,
|
||||
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts,
|
||||
struct panvk_shader *shader);
|
||||
const struct vk_graphics_pipeline_state *state, struct panvk_shader *shader);
|
||||
|
||||
/* This a stripped-down version of panvk_shader for internal shaders that
|
||||
* are managed by vk_meta (blend and preload shaders). Those don't need the
|
||||
|
|
|
|||
|
|
@ -561,6 +561,14 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
|
|||
|
||||
fill_textures(cmdbuf, fbinfo, key, descs.cpu + PANVK_DESCRIPTOR_SIZE);
|
||||
|
||||
uint32_t rt_written = 0;
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
for (unsigned i = 0; i < fbinfo->rt_count; i++) {
|
||||
if (fbinfo->rts[i].preload)
|
||||
rt_written |= BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
fill_bds(fbinfo, key, bds.cpu);
|
||||
|
||||
|
|
@ -646,6 +654,7 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
|
|||
cfg.shader.resources = res_table.gpu | 1;
|
||||
cfg.shader.shader = panvk_priv_mem_dev_addr(shader->spd);
|
||||
cfg.shader.thread_storage = cmdbuf->state.gfx.tsd;
|
||||
cfg.flags_2.write_mask = rt_written;
|
||||
}
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
|
|
|
|||
|
|
@ -829,6 +829,11 @@ get_img_index(nir_builder *b, nir_deref_instr *deref,
|
|||
}
|
||||
}
|
||||
|
||||
struct panvk_lower_input_attachment_load_ctx {
|
||||
uint32_t ro_color_mask;
|
||||
struct panvk_shader *shader;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
void *data)
|
||||
|
|
@ -843,7 +848,8 @@ lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
image_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
|
||||
return false;
|
||||
|
||||
struct panvk_shader *shader = data;
|
||||
const struct panvk_lower_input_attachment_load_ctx *ctx = data;
|
||||
struct panvk_shader *shader = ctx->shader;
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
assert(var);
|
||||
|
||||
|
|
@ -869,12 +875,28 @@ lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
{
|
||||
nir_def *conversion =
|
||||
nir_load_input_attachment_conv_pan(b, nir_imm_int(b, iam_idx));
|
||||
nir_def *is_read_only =
|
||||
nir_i2b(b, nir_iand_imm(b, nir_ishl(b, nir_imm_int(b, 1), target),
|
||||
ctx->ro_color_mask));
|
||||
nir_def *load_ro_color, *load_rw_color;
|
||||
|
||||
iosem.location = FRAG_RESULT_DATA0;
|
||||
load_color = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
.io_semantics = iosem);
|
||||
nir_push_if(b, is_read_only);
|
||||
{
|
||||
load_ro_color = nir_load_readonly_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
.io_semantics = iosem);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
load_rw_color = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
.io_semantics = iosem);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
load_color = nir_if_phi(b, load_ro_color, load_rw_color);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
|
|
@ -942,12 +964,71 @@ lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
}
|
||||
|
||||
static bool
|
||||
lower_input_attachment_loads(nir_shader *nir, struct panvk_shader *shader)
|
||||
collect_frag_writes(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
||||
|
||||
if (deref->modes != nir_var_shader_out)
|
||||
return false;
|
||||
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
assert(var);
|
||||
|
||||
if (var->data.location < FRAG_RESULT_DATA0 ||
|
||||
var->data.location > FRAG_RESULT_DATA7)
|
||||
return false;
|
||||
|
||||
uint32_t *written_mask = data;
|
||||
|
||||
*written_mask |= BITFIELD_BIT(var->data.location - FRAG_RESULT_DATA0);
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
readonly_color_mask(nir_shader *nir,
|
||||
const struct vk_graphics_pipeline_state *state)
|
||||
{
|
||||
if (!state || !state->ial || !state->cal)
|
||||
return 0;
|
||||
|
||||
uint32_t in_mask = 0, out_mask = 0;
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->ial->color_map); i++) {
|
||||
if (i >= state->ial->color_attachment_count)
|
||||
break;
|
||||
|
||||
if (state->ial->color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
|
||||
in_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, collect_frag_writes,
|
||||
nir_metadata_all, &out_mask);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->cal->color_map); i++) {
|
||||
if (state->ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
out_mask &= ~BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
return in_mask & ~out_mask;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_input_attachment_loads(nir_shader *nir,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
struct panvk_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
struct panvk_lower_input_attachment_load_ctx ia_load_ctx = {
|
||||
.ro_color_mask = readonly_color_mask(nir, state),
|
||||
.shader = shader,
|
||||
};
|
||||
|
||||
NIR_PASS(progress, nir, nir_shader_intrinsics_pass,
|
||||
lower_input_attachment_load, nir_metadata_control_flow, shader);
|
||||
lower_input_attachment_load, nir_metadata_control_flow,
|
||||
&ia_load_ctx);
|
||||
|
||||
/* Lower the remaining input attachment loads. */
|
||||
struct nir_input_attachment_options lower_input_attach_opts = {
|
||||
|
|
@ -1364,10 +1445,9 @@ upload_shader_desc_info(struct panvk_device *dev, struct panvk_shader *shader,
|
|||
void
|
||||
panvk_per_arch(nir_lower_descriptors)(
|
||||
nir_shader *nir, struct panvk_device *dev,
|
||||
const struct vk_pipeline_robustness_state *rs,
|
||||
uint32_t set_layout_count,
|
||||
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts,
|
||||
struct panvk_shader *shader)
|
||||
const struct vk_graphics_pipeline_state *state, struct panvk_shader *shader)
|
||||
{
|
||||
struct lower_desc_ctx ctx = {
|
||||
.shader = shader,
|
||||
|
|
@ -1415,7 +1495,7 @@ panvk_per_arch(nir_lower_descriptors)(
|
|||
upload_shader_desc_info(dev, shader, &ctx.desc_info);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(progress, nir, lower_input_attachment_loads, shader);
|
||||
NIR_PASS(progress, nir, lower_input_attachment_loads, state, shader);
|
||||
|
||||
NIR_PASS(progress, nir, nir_shader_instructions_pass,
|
||||
lower_descriptors_instr, nir_metadata_control_flow, &ctx);
|
||||
|
|
|
|||
|
|
@ -769,7 +769,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
#endif
|
||||
|
||||
panvk_per_arch(nir_lower_descriptors)(nir, dev, rs, set_layout_count,
|
||||
set_layouts, shader);
|
||||
set_layouts, state, shader);
|
||||
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue