mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 13:28:06 +02:00
asahi: Implement color masks with masked stores
Blend states can require masking colour. Currently, this is handled by nir_lower_blend, which lowers masks to a read-modify-write operation as required on Mali hardware. However, our "tilebuffer store" instruction supports a write mask, allowing us to write only a subset of channels to the tilebuffer. It's more efficient to use that than to emit pointless tilebuffer loads. Note that even without tilebuffer loads, non-opaque masks don't work with opaque pass types. Here, we handle this with a translucent pass type, which gets HSR to do the right thing and is consistent with the pass type used previously. However, it's a bit heavy handed -- Apple manages to use an opaque pass type with masking but with some unknown HSR fields twiddled. IMO reverse-engineering those details shouldn't block this because this gets us closer to optimal (just not all the way there) and is strictly better than what we had before. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21431>
This commit is contained in:
parent
3084e6e689
commit
029c686c6d
5 changed files with 94 additions and 12 deletions
|
|
@ -19,7 +19,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
|
|||
|
||||
agx_preprocess_nir(shader);
|
||||
if (tib)
|
||||
agx_nir_lower_tilebuffer(shader, tib);
|
||||
agx_nir_lower_tilebuffer(shader, tib, NULL, NULL);
|
||||
|
||||
struct agx_meta_shader *res = rzalloc(cache->ht, struct agx_meta_shader);
|
||||
agx_compile_shader_nir(shader, key, NULL, &binary, &res->info);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/agx_internal_formats.h"
|
||||
#include "agx_nir_format_helpers.h"
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "nir.h"
|
||||
|
|
@ -10,6 +11,12 @@
|
|||
|
||||
#define ALL_SAMPLES 0xFF
|
||||
|
||||
struct ctx {
|
||||
struct agx_tilebuffer_layout *tib;
|
||||
uint8_t *colormasks;
|
||||
bool *translucent;
|
||||
};
|
||||
|
||||
static bool
|
||||
tib_filter(const nir_instr *instr, UNUSED const void *_)
|
||||
{
|
||||
|
|
@ -29,7 +36,8 @@ tib_filter(const nir_instr *instr, UNUSED const void *_)
|
|||
static nir_ssa_def *
|
||||
tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
struct agx_tilebuffer_layout *tib = data;
|
||||
struct ctx *ctx = data;
|
||||
struct agx_tilebuffer_layout *tib = ctx->tib;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
|
|
@ -41,10 +49,38 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
unsigned comps = util_format_get_nr_components(logical_format);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_output) {
|
||||
/* Only write components that actually exist */
|
||||
uint16_t write_mask = BITFIELD_MASK(comps);
|
||||
|
||||
/* Delete stores to nonexistant render targets */
|
||||
if (logical_format == PIPE_FORMAT_NONE)
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
/* Only write colours masked by the blend state */
|
||||
if (ctx->colormasks)
|
||||
write_mask &= ctx->colormasks[rt];
|
||||
|
||||
/* Masked stores require a translucent pass type */
|
||||
if (write_mask != BITFIELD_MASK(comps)) {
|
||||
assert(ctx->translucent != NULL &&
|
||||
"colour masking requires translucency");
|
||||
|
||||
assert(agx_internal_format_supports_mask(format) &&
|
||||
"write mask but format cannot be masked");
|
||||
|
||||
*(ctx->translucent) = true;
|
||||
}
|
||||
|
||||
/* But we ignore the NIR write mask for that, since it's basically an
|
||||
* optimization hint.
|
||||
*/
|
||||
if (agx_internal_format_supports_mask(format))
|
||||
write_mask &= nir_intrinsic_write_mask(intr);
|
||||
|
||||
/* Delete stores that are entirely masked out */
|
||||
if (!write_mask)
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
nir_ssa_def *value = intr->src[0].ssa;
|
||||
|
||||
/* Trim to format as required by hardware */
|
||||
|
|
@ -60,11 +96,9 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
value = nir_f2f32(b, value);
|
||||
}
|
||||
|
||||
nir_store_local_pixel_agx(
|
||||
b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
|
||||
.base = tib->offset_B[rt],
|
||||
.write_mask = nir_intrinsic_write_mask(intr) & BITFIELD_MASK(comps),
|
||||
.format = format);
|
||||
nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
|
||||
.base = tib->offset_B[rt],
|
||||
.write_mask = write_mask, .format = format);
|
||||
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
} else {
|
||||
|
|
@ -101,8 +135,16 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib)
|
||||
agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
|
||||
uint8_t *colormasks, bool *translucent)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
return nir_shader_lower_instructions(shader, tib_filter, tib_impl, tib);
|
||||
|
||||
struct ctx ctx = {
|
||||
.tib = tib,
|
||||
.colormasks = colormasks,
|
||||
.translucent = translucent,
|
||||
};
|
||||
|
||||
return nir_shader_lower_instructions(shader, tib_filter, tib_impl, &ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "agx_tilebuffer.h"
|
||||
#include <assert.h>
|
||||
#include "compiler/agx_internal_formats.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "agx_formats.h"
|
||||
#include "agx_usc.h"
|
||||
|
|
@ -77,6 +78,13 @@ agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
|
|||
return agx_pixel_format[tib->logical_format[rt]].internal;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
|
||||
{
|
||||
enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
|
||||
return agx_internal_format_supports_mask((enum agx_internal_formats)fmt);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
agx_shared_layout_from_tile_size(struct agx_tile_size t)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@ agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
|
|||
uint8_t nr_samples);
|
||||
|
||||
bool agx_nir_lower_tilebuffer(struct nir_shader *shader,
|
||||
struct agx_tilebuffer_layout *tib);
|
||||
struct agx_tilebuffer_layout *tib,
|
||||
uint8_t *colormasks, bool *translucent);
|
||||
|
||||
void agx_usc_tilebuffer(struct agx_usc_builder *b,
|
||||
struct agx_tilebuffer_layout *tib);
|
||||
|
|
@ -57,6 +58,9 @@ uint32_t agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib);
|
|||
enum pipe_format
|
||||
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt);
|
||||
|
||||
bool agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib,
|
||||
unsigned rt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1370,6 +1370,8 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
|||
|
||||
nir_shader *nir = nir_shader_clone(NULL, so->nir);
|
||||
|
||||
bool force_translucent = false;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
struct asahi_vs_shader_key *key = &key_->vs;
|
||||
|
||||
|
|
@ -1393,9 +1395,25 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
|||
opts.format[i] = key->rt_formats[i];
|
||||
|
||||
memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
|
||||
NIR_PASS_V(nir, nir_lower_blend, &opts);
|
||||
|
||||
NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib);
|
||||
/* It's more efficient to use masked stores (with
|
||||
* agx_nir_lower_tilebuffer) than to emulate colour masking with
|
||||
* nir_lower_blend.
|
||||
*/
|
||||
uint8_t colormasks[PIPE_MAX_COLOR_BUFS] = {0};
|
||||
|
||||
for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
|
||||
if (agx_tilebuffer_supports_mask(&tib, i)) {
|
||||
colormasks[i] = key->blend.rt[i].colormask;
|
||||
opts.rt[i].colormask = BITFIELD_MASK(4);
|
||||
} else {
|
||||
colormasks[i] = BITFIELD_MASK(4);
|
||||
}
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_blend, &opts);
|
||||
NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib, colormasks,
|
||||
&force_translucent);
|
||||
|
||||
if (key->sprite_coord_enable) {
|
||||
NIR_PASS_V(nir, nir_lower_texcoord_replace_late,
|
||||
|
|
@ -1415,6 +1433,16 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
|||
|
||||
agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info);
|
||||
|
||||
/* reads_tib => Translucent pass type */
|
||||
compiled->info.reads_tib |= force_translucent;
|
||||
|
||||
/* Could be optimized to use non-translucent pass types with the appropriate
|
||||
* HSR configuration, but that mechanism is not yet understood. Warn that
|
||||
* we're leaving perf on the table when used.
|
||||
*/
|
||||
if (force_translucent)
|
||||
perf_debug(dev, "Translucency forced due to colour masking");
|
||||
|
||||
if (binary.size) {
|
||||
compiled->bo = agx_bo_create(dev, binary.size,
|
||||
AGX_BO_EXEC | AGX_BO_LOW_VA, "Executable");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue