asahi,hk: implement remapping in epilog

for DRLR

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31532>
This commit is contained in:
Alyssa Rosenzweig 2024-10-02 17:14:23 -04:00 committed by Marge Bot
parent 03a81d79f8
commit 9845e01f0f
4 changed files with 73 additions and 34 deletions

View file

@ -117,23 +117,24 @@ struct agx_fs_epilog_link_info {
*/
uint8_t size_32;
/* Mask of render targets written by the main shader */
uint8_t rt_written;
/* Mask of locations written by the main shader */
uint8_t loc_written;
/* If set, the API fragment shader uses sample shading. This means the epilog
* will be invoked per-sample as well.
*/
unsigned sample_shading : 1;
/* If set, broadcast the render target #0 value to all render targets. This
* implements gl_FragColor semantics.
/* If set, broadcast location #0 value to all render targets. This
* implements gl_FragColor semantics. This tells the driver to set remap
* appropriately.
*/
unsigned broadcast_rt0 : 1;
/* If set, force render target 0's W channel to 1.0. This optimizes blending
/* If set, force location 0's W channel to 1.0. This optimizes blending
* calculations in some applications.
*/
unsigned rt0_w_1 : 1;
unsigned loc0_w_1 : 1;
/* If set, the API fragment shader wants to write depth/stencil respectively.
* This happens in the epilog for correctness when the epilog discards.
@ -162,6 +163,12 @@ struct agx_fs_epilog_key {
/* Blend state. Blending happens in the epilog. */
struct agx_blend_key blend;
/* Colour attachment remapping for Vulkan. Negative values indicate that an
* attachment is discarded. Positive values indicate the output location we
* want to store at the indexed colour attachment.
*/
int8_t remap[8];
/* Tilebuffer configuration */
enum pipe_format rt_formats[8];
uint8_t nr_samples;

View file

@ -5,6 +5,7 @@
*/
#include "gallium/include/pipe/p_defines.h"
#include "util/format/u_formats.h"
#include "agx_abi.h"
#include "agx_linker.h"
#include "agx_nir_lower_gs.h"
@ -278,6 +279,26 @@ blend_uses_2src(struct agx_blend_rt_key rt)
return false;
}
static void
copy_colour(nir_builder *b, const struct agx_fs_epilog_key *key,
unsigned out_rt, unsigned in_loc, bool dual_src)
{
unsigned size = (key->link.size_32 & BITFIELD_BIT(in_loc)) ? 32 : 16;
nir_def *value =
nir_load_exported_agx(b, 4, size, .base = AGX_ABI_FOUT_COLOUR(in_loc));
if (key->link.loc0_w_1 && in_loc == 0) {
value =
nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
}
nir_store_output(b, value, nir_imm_int(b, 0),
.io_semantics.location = FRAG_RESULT_DATA0 + out_rt,
.io_semantics.dual_source_blend_index = dual_src,
.src_type = nir_type_float | size);
}
void
agx_nir_fs_epilog(nir_builder *b, const void *key_)
{
@ -288,24 +309,26 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
/* First, construct a passthrough shader reading each colour and outputting
* the value.
*/
u_foreach_bit(rt, key->link.rt_written) {
bool dual_src = (rt == 1) && blend_uses_2src(key->blend.rt[0]);
unsigned read_rt = (key->link.broadcast_rt0 && !dual_src) ? 0 : rt;
unsigned size = (key->link.size_32 & BITFIELD_BIT(read_rt)) ? 32 : 16;
for (unsigned rt = 0; rt < ARRAY_SIZE(key->remap); ++rt) {
int location = key->remap[rt];
nir_def *value =
nir_load_exported_agx(b, 4, size, .base = AGX_ABI_FOUT_COLOUR(rt));
/* Negative remaps indicate the attachment isn't written. */
if (location >= 0 && key->link.loc_written & BITFIELD_BIT(location)) {
copy_colour(b, key, rt, location, false);
if (key->link.rt0_w_1 && read_rt == 0) {
value =
nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
/* If this render target uses dual source blending, also copy the dual
* source colour. While the copy_colour above is needed even for
* missing attachments to handle alpha-to-coverage, this copy is only
* for blending so should be suppressed for missing attachments to keep
* the assert from blowing up on OpenGL.
*/
if (blend_uses_2src(key->blend.rt[rt]) &&
key->rt_formats[rt] != PIPE_FORMAT_NONE) {
assert(location == 0);
copy_colour(b, key, rt, 1, true);
}
}
nir_store_output(
b, value, nir_imm_int(b, 0),
.io_semantics.location = FRAG_RESULT_DATA0 + (dual_src ? 0 : rt),
.io_semantics.dual_source_blend_index = dual_src,
.src_type = nir_type_float | size);
}
/* Grab registers early, this has to happen in the first block. */
@ -515,31 +538,30 @@ lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
if (sem.location == FRAG_RESULT_COLOR) {
sem.location = FRAG_RESULT_DATA0;
info->broadcast_rt0 = true;
info->rt_written = ~0;
}
/* We don't use the epilog for sample mask writes */
if (sem.location < FRAG_RESULT_DATA0)
return false;
/* Determine the render target index. Dual source blending aliases a second
/* Determine the ABI location. Dual source blending aliases a second
* render target, so get that out of the way now.
*/
unsigned rt = sem.location - FRAG_RESULT_DATA0;
rt += nir_src_as_uint(intr->src[1]);
unsigned loc = sem.location - FRAG_RESULT_DATA0;
loc += nir_src_as_uint(intr->src[1]);
if (sem.dual_source_blend_index) {
assert(rt == 0);
rt = 1;
assert(loc == 0);
loc = 1;
}
info->rt_written |= BITFIELD_BIT(rt);
b->cursor = nir_instr_remove(&intr->instr);
nir_def *vec = intr->src[0].ssa;
info->loc_written |= BITFIELD_BIT(loc);
if (vec->bit_size == 32)
info->size_32 |= BITFIELD_BIT(rt);
info->size_32 |= BITFIELD_BIT(loc);
else
assert(vec->bit_size == 16);
@ -548,15 +570,15 @@ lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
u_foreach_bit(c, nir_intrinsic_write_mask(intr)) {
nir_scalar s = nir_scalar_resolved(vec, c);
if (rt == 0 && c == 3 && nir_scalar_is_const(s) &&
if (loc == 0 && c == 3 && nir_scalar_is_const(s) &&
nir_scalar_as_uint(s) == one_f) {
info->rt0_w_1 = true;
info->loc0_w_1 = true;
} else {
unsigned stride = vec->bit_size / 16;
nir_export_agx(b, nir_channel(b, vec, c),
.base = (2 * (4 + (4 * rt))) + (comp + c) * stride);
.base = AGX_ABI_FOUT_COLOUR(loc) + (comp + c) * stride);
}
}

View file

@ -189,6 +189,9 @@ hk_cmd_buffer_dirty_render_pass(struct hk_cmd_buffer *cmd)
/* This may depend on render targets for ESO */
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
/* This may depend on render targets */
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP);
}
void
@ -2832,6 +2835,11 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
: PIPE_LOGICOP_COPY,
};
for (unsigned rt = 0; rt < ARRAY_SIZE(dyn->cal.color_map); ++rt) {
int map = dyn->cal.color_map[rt];
key.epilog.remap[rt] = map == MESA_VK_ATTACHMENT_UNUSED ? -1 : map;
}
if (dyn->ms.alpha_to_one_enable || dyn->ms.alpha_to_coverage_enable ||
dyn->cb.logic_op_enable) {

View file

@ -2352,6 +2352,8 @@ agx_update_fs(struct agx_batch *batch)
struct pipe_surface *surf = batch->key.cbufs[i];
link_key.epilog.fs.rt_formats[i] = surf ? surf->format : PIPE_FORMAT_NONE;
link_key.epilog.fs.remap[i] =
link_key.epilog.fs.link.broadcast_rt0 ? 0 : i;
}
memcpy(&link_key.epilog.fs.blend, &ctx->blend->key,
@ -2362,7 +2364,7 @@ agx_update_fs(struct agx_batch *batch)
link_key.epilog.fs.link.rt_spill_base = 0;
/* Try to disable blending to get rid of some fsats */
if (link_key.epilog.fs.link.rt0_w_1) {
if (link_key.epilog.fs.link.loc0_w_1) {
struct agx_blend_rt_key *k = &link_key.epilog.fs.blend.rt[0];
k->rgb_src_factor = optimize_blend_factor_w_1(k->rgb_src_factor);