mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
asahi,hk: implement remapping in epilog
for DRLR Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31532>
This commit is contained in:
parent
03a81d79f8
commit
9845e01f0f
4 changed files with 73 additions and 34 deletions
|
|
@ -117,23 +117,24 @@ struct agx_fs_epilog_link_info {
|
|||
*/
|
||||
uint8_t size_32;
|
||||
|
||||
/* Mask of render targets written by the main shader */
|
||||
uint8_t rt_written;
|
||||
/* Mask of locations written by the main shader */
|
||||
uint8_t loc_written;
|
||||
|
||||
/* If set, the API fragment shader uses sample shading. This means the epilog
|
||||
* will be invoked per-sample as well.
|
||||
*/
|
||||
unsigned sample_shading : 1;
|
||||
|
||||
/* If set, broadcast the render target #0 value to all render targets. This
|
||||
* implements gl_FragColor semantics.
|
||||
/* If set, broadcast location #0 value to all render targets. This
|
||||
* implements gl_FragColor semantics. This tells the driver to set remap
|
||||
* appropriately.
|
||||
*/
|
||||
unsigned broadcast_rt0 : 1;
|
||||
|
||||
/* If set, force render target 0's W channel to 1.0. This optimizes blending
|
||||
/* If set, force location 0's W channel to 1.0. This optimizes blending
|
||||
* calculations in some applications.
|
||||
*/
|
||||
unsigned rt0_w_1 : 1;
|
||||
unsigned loc0_w_1 : 1;
|
||||
|
||||
/* If set, the API fragment shader wants to write depth/stencil respectively.
|
||||
* This happens in the epilog for correctness when the epilog discards.
|
||||
|
|
@ -162,6 +163,12 @@ struct agx_fs_epilog_key {
|
|||
/* Blend state. Blending happens in the epilog. */
|
||||
struct agx_blend_key blend;
|
||||
|
||||
/* Colour attachment remapping for Vulkan. Negative values indicate that an
|
||||
* attachment is discarded. Positive values indicate the output location we
|
||||
* want to store at the indexed colour attachment.
|
||||
*/
|
||||
int8_t remap[8];
|
||||
|
||||
/* Tilebuffer configuration */
|
||||
enum pipe_format rt_formats[8];
|
||||
uint8_t nr_samples;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
#include "gallium/include/pipe/p_defines.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "agx_abi.h"
|
||||
#include "agx_linker.h"
|
||||
#include "agx_nir_lower_gs.h"
|
||||
|
|
@ -278,6 +279,26 @@ blend_uses_2src(struct agx_blend_rt_key rt)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_colour(nir_builder *b, const struct agx_fs_epilog_key *key,
|
||||
unsigned out_rt, unsigned in_loc, bool dual_src)
|
||||
{
|
||||
unsigned size = (key->link.size_32 & BITFIELD_BIT(in_loc)) ? 32 : 16;
|
||||
|
||||
nir_def *value =
|
||||
nir_load_exported_agx(b, 4, size, .base = AGX_ABI_FOUT_COLOUR(in_loc));
|
||||
|
||||
if (key->link.loc0_w_1 && in_loc == 0) {
|
||||
value =
|
||||
nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
|
||||
}
|
||||
|
||||
nir_store_output(b, value, nir_imm_int(b, 0),
|
||||
.io_semantics.location = FRAG_RESULT_DATA0 + out_rt,
|
||||
.io_semantics.dual_source_blend_index = dual_src,
|
||||
.src_type = nir_type_float | size);
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_fs_epilog(nir_builder *b, const void *key_)
|
||||
{
|
||||
|
|
@ -288,24 +309,26 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
|
|||
/* First, construct a passthrough shader reading each colour and outputting
|
||||
* the value.
|
||||
*/
|
||||
u_foreach_bit(rt, key->link.rt_written) {
|
||||
bool dual_src = (rt == 1) && blend_uses_2src(key->blend.rt[0]);
|
||||
unsigned read_rt = (key->link.broadcast_rt0 && !dual_src) ? 0 : rt;
|
||||
unsigned size = (key->link.size_32 & BITFIELD_BIT(read_rt)) ? 32 : 16;
|
||||
for (unsigned rt = 0; rt < ARRAY_SIZE(key->remap); ++rt) {
|
||||
int location = key->remap[rt];
|
||||
|
||||
nir_def *value =
|
||||
nir_load_exported_agx(b, 4, size, .base = AGX_ABI_FOUT_COLOUR(rt));
|
||||
/* Negative remaps indicate the attachment isn't written. */
|
||||
if (location >= 0 && key->link.loc_written & BITFIELD_BIT(location)) {
|
||||
copy_colour(b, key, rt, location, false);
|
||||
|
||||
if (key->link.rt0_w_1 && read_rt == 0) {
|
||||
value =
|
||||
nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
|
||||
/* If this render target uses dual source blending, also copy the dual
|
||||
* source colour. While the copy_colour above is needed even for
|
||||
* missing attachments to handle alpha-to-coverage, this copy is only
|
||||
* for blending so should be suppressed for missing attachments to keep
|
||||
* the assert from blowing up on OpenGL.
|
||||
*/
|
||||
if (blend_uses_2src(key->blend.rt[rt]) &&
|
||||
key->rt_formats[rt] != PIPE_FORMAT_NONE) {
|
||||
|
||||
assert(location == 0);
|
||||
copy_colour(b, key, rt, 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
nir_store_output(
|
||||
b, value, nir_imm_int(b, 0),
|
||||
.io_semantics.location = FRAG_RESULT_DATA0 + (dual_src ? 0 : rt),
|
||||
.io_semantics.dual_source_blend_index = dual_src,
|
||||
.src_type = nir_type_float | size);
|
||||
}
|
||||
|
||||
/* Grab registers early, this has to happen in the first block. */
|
||||
|
|
@ -515,31 +538,30 @@ lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
if (sem.location == FRAG_RESULT_COLOR) {
|
||||
sem.location = FRAG_RESULT_DATA0;
|
||||
info->broadcast_rt0 = true;
|
||||
info->rt_written = ~0;
|
||||
}
|
||||
|
||||
/* We don't use the epilog for sample mask writes */
|
||||
if (sem.location < FRAG_RESULT_DATA0)
|
||||
return false;
|
||||
|
||||
/* Determine the render target index. Dual source blending aliases a second
|
||||
/* Determine the ABI location. Dual source blending aliases a second
|
||||
* render target, so get that out of the way now.
|
||||
*/
|
||||
unsigned rt = sem.location - FRAG_RESULT_DATA0;
|
||||
rt += nir_src_as_uint(intr->src[1]);
|
||||
unsigned loc = sem.location - FRAG_RESULT_DATA0;
|
||||
loc += nir_src_as_uint(intr->src[1]);
|
||||
|
||||
if (sem.dual_source_blend_index) {
|
||||
assert(rt == 0);
|
||||
rt = 1;
|
||||
assert(loc == 0);
|
||||
loc = 1;
|
||||
}
|
||||
|
||||
info->rt_written |= BITFIELD_BIT(rt);
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def *vec = intr->src[0].ssa;
|
||||
|
||||
info->loc_written |= BITFIELD_BIT(loc);
|
||||
|
||||
if (vec->bit_size == 32)
|
||||
info->size_32 |= BITFIELD_BIT(rt);
|
||||
info->size_32 |= BITFIELD_BIT(loc);
|
||||
else
|
||||
assert(vec->bit_size == 16);
|
||||
|
||||
|
|
@ -548,15 +570,15 @@ lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
|
||||
u_foreach_bit(c, nir_intrinsic_write_mask(intr)) {
|
||||
nir_scalar s = nir_scalar_resolved(vec, c);
|
||||
if (rt == 0 && c == 3 && nir_scalar_is_const(s) &&
|
||||
if (loc == 0 && c == 3 && nir_scalar_is_const(s) &&
|
||||
nir_scalar_as_uint(s) == one_f) {
|
||||
|
||||
info->rt0_w_1 = true;
|
||||
info->loc0_w_1 = true;
|
||||
} else {
|
||||
unsigned stride = vec->bit_size / 16;
|
||||
|
||||
nir_export_agx(b, nir_channel(b, vec, c),
|
||||
.base = (2 * (4 + (4 * rt))) + (comp + c) * stride);
|
||||
.base = AGX_ABI_FOUT_COLOUR(loc) + (comp + c) * stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -189,6 +189,9 @@ hk_cmd_buffer_dirty_render_pass(struct hk_cmd_buffer *cmd)
|
|||
|
||||
/* This may depend on render targets for ESO */
|
||||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
|
||||
|
||||
/* This may depend on render targets */
|
||||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2832,6 +2835,11 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
: PIPE_LOGICOP_COPY,
|
||||
};
|
||||
|
||||
for (unsigned rt = 0; rt < ARRAY_SIZE(dyn->cal.color_map); ++rt) {
|
||||
int map = dyn->cal.color_map[rt];
|
||||
key.epilog.remap[rt] = map == MESA_VK_ATTACHMENT_UNUSED ? -1 : map;
|
||||
}
|
||||
|
||||
if (dyn->ms.alpha_to_one_enable || dyn->ms.alpha_to_coverage_enable ||
|
||||
dyn->cb.logic_op_enable) {
|
||||
|
||||
|
|
|
|||
|
|
@ -2352,6 +2352,8 @@ agx_update_fs(struct agx_batch *batch)
|
|||
struct pipe_surface *surf = batch->key.cbufs[i];
|
||||
|
||||
link_key.epilog.fs.rt_formats[i] = surf ? surf->format : PIPE_FORMAT_NONE;
|
||||
link_key.epilog.fs.remap[i] =
|
||||
link_key.epilog.fs.link.broadcast_rt0 ? 0 : i;
|
||||
}
|
||||
|
||||
memcpy(&link_key.epilog.fs.blend, &ctx->blend->key,
|
||||
|
|
@ -2362,7 +2364,7 @@ agx_update_fs(struct agx_batch *batch)
|
|||
link_key.epilog.fs.link.rt_spill_base = 0;
|
||||
|
||||
/* Try to disable blending to get rid of some fsats */
|
||||
if (link_key.epilog.fs.link.rt0_w_1) {
|
||||
if (link_key.epilog.fs.link.loc0_w_1) {
|
||||
struct agx_blend_rt_key *k = &link_key.epilog.fs.blend.rt[0];
|
||||
|
||||
k->rgb_src_factor = optimize_blend_factor_w_1(k->rgb_src_factor);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue