asahi: Implement color masks with masked stores

Blend states can require masking colour. Currently, this is handled by
nir_lower_blend, which lowers masks to a read-modify-write operation as required
on Mali hardware. However, our "tilebuffer store" instruction supports a write
mask, allowing us to write only a subset of channels to the tilebuffer. It's
more efficient to use that than to emit pointless tilebuffer loads.

Note that even without tilebuffer loads, non-opaque masks don't work with opaque
pass types.  Here, we handle this with a translucent pass type, which gets HSR
to do the right thing and is consistent with the pass type used previously.
However, it's a bit heavy handed -- Apple manages to use an opaque pass type
with masking but with some unknown HSR fields twiddled. IMO reverse-engineering
those details shouldn't block this because this gets us closer to optimal (just
not all the way there) and is strictly better than what we had before.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21431>
This commit is contained in:
Alyssa Rosenzweig 2023-02-17 18:24:38 -05:00 committed by Marge Bot
parent 3084e6e689
commit 029c686c6d
5 changed files with 94 additions and 12 deletions

View file

@ -19,7 +19,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
agx_preprocess_nir(shader);
if (tib)
agx_nir_lower_tilebuffer(shader, tib);
agx_nir_lower_tilebuffer(shader, tib, NULL, NULL);
struct agx_meta_shader *res = rzalloc(cache->ht, struct agx_meta_shader);
agx_compile_shader_nir(shader, key, NULL, &binary, &res->info);

View file

@ -3,6 +3,7 @@
* SPDX-License-Identifier: MIT
*/
#include "compiler/agx_internal_formats.h"
#include "agx_nir_format_helpers.h"
#include "agx_tilebuffer.h"
#include "nir.h"
@ -10,6 +11,12 @@
#define ALL_SAMPLES 0xFF
struct ctx {
struct agx_tilebuffer_layout *tib;
uint8_t *colormasks;
bool *translucent;
};
static bool
tib_filter(const nir_instr *instr, UNUSED const void *_)
{
@ -29,7 +36,8 @@ tib_filter(const nir_instr *instr, UNUSED const void *_)
static nir_ssa_def *
tib_impl(nir_builder *b, nir_instr *instr, void *data)
{
struct agx_tilebuffer_layout *tib = data;
struct ctx *ctx = data;
struct agx_tilebuffer_layout *tib = ctx->tib;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
@ -41,10 +49,38 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
unsigned comps = util_format_get_nr_components(logical_format);
if (intr->intrinsic == nir_intrinsic_store_output) {
/* Only write components that actually exist */
uint16_t write_mask = BITFIELD_MASK(comps);
/* Delete stores to nonexistant render targets */
if (logical_format == PIPE_FORMAT_NONE)
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
/* Only write colours masked by the blend state */
if (ctx->colormasks)
write_mask &= ctx->colormasks[rt];
/* Masked stores require a translucent pass type */
if (write_mask != BITFIELD_MASK(comps)) {
assert(ctx->translucent != NULL &&
"colour masking requires translucency");
assert(agx_internal_format_supports_mask(format) &&
"write mask but format cannot be masked");
*(ctx->translucent) = true;
}
/* But we ignore the NIR write mask for that, since it's basically an
* optimization hint.
*/
if (agx_internal_format_supports_mask(format))
write_mask &= nir_intrinsic_write_mask(intr);
/* Delete stores that are entirely masked out */
if (!write_mask)
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
nir_ssa_def *value = intr->src[0].ssa;
/* Trim to format as required by hardware */
@ -60,11 +96,9 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
value = nir_f2f32(b, value);
}
nir_store_local_pixel_agx(
b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
.base = tib->offset_B[rt],
.write_mask = nir_intrinsic_write_mask(intr) & BITFIELD_MASK(comps),
.format = format);
nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
.base = tib->offset_B[rt],
.write_mask = write_mask, .format = format);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
} else {
@ -101,8 +135,16 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
}
bool
agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib)
agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
uint8_t *colormasks, bool *translucent)
{
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
return nir_shader_lower_instructions(shader, tib_filter, tib_impl, tib);
struct ctx ctx = {
.tib = tib,
.colormasks = colormasks,
.translucent = translucent,
};
return nir_shader_lower_instructions(shader, tib_filter, tib_impl, &ctx);
}

View file

@ -5,6 +5,7 @@
#include "agx_tilebuffer.h"
#include <assert.h>
#include "compiler/agx_internal_formats.h"
#include "util/format/u_format.h"
#include "agx_formats.h"
#include "agx_usc.h"
@ -77,6 +78,13 @@ agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
return agx_pixel_format[tib->logical_format[rt]].internal;
}
bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
{
enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
return agx_internal_format_supports_mask((enum agx_internal_formats)fmt);
}
static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)
{

View file

@ -47,7 +47,8 @@ agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
uint8_t nr_samples);
bool agx_nir_lower_tilebuffer(struct nir_shader *shader,
struct agx_tilebuffer_layout *tib);
struct agx_tilebuffer_layout *tib,
uint8_t *colormasks, bool *translucent);
void agx_usc_tilebuffer(struct agx_usc_builder *b,
struct agx_tilebuffer_layout *tib);
@ -57,6 +58,9 @@ uint32_t agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib);
enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt);
bool agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib,
unsigned rt);
#ifdef __cplusplus
} /* extern C */
#endif

View file

@ -1370,6 +1370,8 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
nir_shader *nir = nir_shader_clone(NULL, so->nir);
bool force_translucent = false;
if (nir->info.stage == MESA_SHADER_VERTEX) {
struct asahi_vs_shader_key *key = &key_->vs;
@ -1393,9 +1395,25 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
opts.format[i] = key->rt_formats[i];
memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
NIR_PASS_V(nir, nir_lower_blend, &opts);
NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib);
/* It's more efficient to use masked stores (with
* agx_nir_lower_tilebuffer) than to emulate colour masking with
* nir_lower_blend.
*/
uint8_t colormasks[PIPE_MAX_COLOR_BUFS] = {0};
for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
if (agx_tilebuffer_supports_mask(&tib, i)) {
colormasks[i] = key->blend.rt[i].colormask;
opts.rt[i].colormask = BITFIELD_MASK(4);
} else {
colormasks[i] = BITFIELD_MASK(4);
}
}
NIR_PASS_V(nir, nir_lower_blend, &opts);
NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib, colormasks,
&force_translucent);
if (key->sprite_coord_enable) {
NIR_PASS_V(nir, nir_lower_texcoord_replace_late,
@ -1415,6 +1433,16 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info);
/* reads_tib => Translucent pass type */
compiled->info.reads_tib |= force_translucent;
/* Could be optimized to use non-translucent pass types with the appropriate
* HSR configuration, but that mechanism is not yet understood. Warn that
* we're leaving perf on the table when used.
*/
if (force_translucent)
perf_debug(dev, "Translucency forced due to colour masking");
if (binary.size) {
compiled->bo = agx_bo_create(dev, binary.size,
AGX_BO_EXEC | AGX_BO_LOW_VA, "Executable");