mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-26 20:50:23 +01:00
panvk: Push our own blend descriptors
This fixes attachment remapping for KHR_dynamic_rendering_local_read. See the long comment in panvk_vX_blend.c for more details. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14484 Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39274>
This commit is contained in:
parent
f66b48d790
commit
aee14878cc
5 changed files with 105 additions and 38 deletions
|
|
@ -26,7 +26,6 @@
|
|||
#include "pan_props.h"
|
||||
|
||||
#define MAX_VBS 16
|
||||
#define MAX_RTS 8
|
||||
|
||||
struct panvk_cmd_buffer;
|
||||
|
||||
|
|
@ -139,6 +138,7 @@ struct panvk_cmd_graphics_state {
|
|||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
uint64_t blend_descs[MAX_RTS];
|
||||
uint64_t push_uniforms;
|
||||
bool required;
|
||||
#if PAN_ARCH < 9
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops);
|
||||
|
||||
#define MAX_RTS 8
|
||||
#define MAX_VS_ATTRIBS 16
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
|
|
@ -155,6 +156,10 @@ struct panvk_graphics_sysvals {
|
|||
uint32_t noperspective_varyings;
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
aligned_u64 blend_descs[MAX_RTS];
|
||||
} fs;
|
||||
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
|
|
|
|||
|
|
@ -114,16 +114,15 @@ out:
|
|||
}
|
||||
|
||||
static void
|
||||
emit_blend_desc(const struct pan_shader_info *fs_info, uint64_t fs_code,
|
||||
const struct pan_blend_state *state, unsigned blend_idx,
|
||||
unsigned rt_idx, uint64_t blend_shader, uint16_t constant,
|
||||
emit_blend_desc(const struct pan_blend_state *state, uint8_t rt_idx,
|
||||
const struct pan_shader_info *fs_info, uint8_t loc,
|
||||
uint64_t fs_code, uint64_t blend_shader, uint16_t constant,
|
||||
struct mali_blend_packed *bd)
|
||||
{
|
||||
const struct pan_blend_rt_state *rt =
|
||||
rt_idx != MESA_VK_ATTACHMENT_UNUSED ? &state->rts[rt_idx] : NULL;
|
||||
const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
|
||||
|
||||
pan_pack(bd, BLEND, cfg) {
|
||||
if (!state->rt_count || !rt || !rt->equation.color_mask) {
|
||||
if (loc == MESA_VK_ATTACHMENT_UNUSED || !rt->equation.color_mask) {
|
||||
cfg.enable = false;
|
||||
cfg.internal.mode = MALI_BLEND_MODE_OFF;
|
||||
continue;
|
||||
|
|
@ -146,7 +145,7 @@ emit_blend_desc(const struct pan_shader_info *fs_info, uint64_t fs_code,
|
|||
cfg.internal.shader.pc = (uint32_t)blend_shader;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint32_t ret_offset = fs_info->bifrost.blend[blend_idx].return_offset;
|
||||
uint32_t ret_offset = fs_info->bifrost.blend[loc].return_offset;
|
||||
|
||||
/* If ret_offset is zero, we assume the BLEND is a terminal
|
||||
* instruction and set return_value to zero, to let the
|
||||
|
|
@ -186,7 +185,7 @@ emit_blend_desc(const struct pan_shader_info *fs_info, uint64_t fs_code,
|
|||
cfg.internal.fixed_function.rt = rt_idx;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
nir_alu_type type = fs_info->bifrost.blend[blend_idx].type;
|
||||
nir_alu_type type = fs_info->bifrost.blend[loc].type;
|
||||
if (fs_info->fs.untyped_color_outputs) {
|
||||
cfg.internal.fixed_function.conversion.register_format =
|
||||
GENX(pan_fixup_blend_type)(type, rt->format);
|
||||
|
|
@ -285,7 +284,7 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf,
|
|||
.alpha_to_one = dyns->ms.alpha_to_one_enable,
|
||||
.logicop_enable = cb->logic_op_enable,
|
||||
.logicop_func = vk_logic_op_to_pipe(cb->logic_op),
|
||||
.rt_count = cb->attachment_count,
|
||||
.rt_count = cmdbuf->state.gfx.render.fb.info.rt_count,
|
||||
.constants =
|
||||
{
|
||||
cb->blend_constants[0],
|
||||
|
|
@ -297,31 +296,28 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf,
|
|||
uint64_t blend_shaders[8] = {};
|
||||
/* All bits set to one encodes unused fixed-function blend constant. */
|
||||
unsigned ff_blend_constant = ~0;
|
||||
uint8_t remap_catts[MAX_RTS] = {
|
||||
MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED,
|
||||
MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED,
|
||||
MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED,
|
||||
MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED,
|
||||
};
|
||||
uint32_t blend_count = MAX2(cmdbuf->state.gfx.render.fb.info.rt_count, 1);
|
||||
|
||||
static_assert(ARRAY_SIZE(remap_catts) <= ARRAY_SIZE(cal->color_map),
|
||||
"vk_color_attachment_location_state::color_map is too small");
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(remap_catts); i++) {
|
||||
if (cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
assert(cal->color_map[i] < MAX_RTS);
|
||||
remap_catts[cal->color_map[i]] = i;
|
||||
}
|
||||
}
|
||||
uint8_t loc_rt[MAX_RTS], rt_loc[MAX_RTS];
|
||||
memset(loc_rt, MESA_VK_ATTACHMENT_UNUSED, sizeof(loc_rt));
|
||||
memset(rt_loc, MESA_VK_ATTACHMENT_UNUSED, sizeof(rt_loc));
|
||||
|
||||
memset(blend_info, 0, sizeof(*blend_info));
|
||||
for (uint8_t i = 0; i < cb->attachment_count; i++) {
|
||||
for (uint8_t i = 0; i < bs.rt_count; i++) {
|
||||
struct pan_blend_rt_state *rt = &bs.rts[i];
|
||||
|
||||
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
uint8_t loc = cal->color_map[i];
|
||||
if (loc == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
if (!(fs_info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0 + loc)))
|
||||
continue;
|
||||
|
||||
/* At this point, we know it's mapped to a shader location. */
|
||||
assert(loc < MAX_RTS && loc_rt[loc] == MESA_VK_ATTACHMENT_UNUSED);
|
||||
rt_loc[i] = loc;
|
||||
loc_rt[loc] = i;
|
||||
|
||||
if (!(cb->color_write_enables & BITFIELD_BIT(i)))
|
||||
continue;
|
||||
|
||||
|
|
@ -365,7 +361,7 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf,
|
|||
blend_info->any_dest_read |= pan_blend_reads_dest(rt->equation);
|
||||
|
||||
if (blend_needs_shader(&bs, i, &ff_blend_constant)) {
|
||||
nir_alu_type src0_type = fs_info->bifrost.blend[i].type;
|
||||
nir_alu_type src0_type = fs_info->bifrost.blend[loc].type;
|
||||
nir_alu_type src1_type = fs_info->bifrost.blend_src1_type;
|
||||
|
||||
VkResult result = get_blend_shader(dev, &bs, src0_type, src1_type,
|
||||
|
|
@ -383,18 +379,66 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf,
|
|||
if (ff_blend_constant == ~0)
|
||||
ff_blend_constant = 0;
|
||||
|
||||
/* Now that we've collected all the information, we can emit. */
|
||||
for (uint8_t i = 0; i < blend_count; i++) {
|
||||
uint32_t catt_idx = remap_catts[i];
|
||||
uint64_t blend_shader =
|
||||
catt_idx != MESA_VK_ATTACHMENT_UNUSED ? blend_shaders[catt_idx] : 0;
|
||||
|
||||
emit_blend_desc(fs_info, fs_code, &bs, i, catt_idx,
|
||||
blend_shader, ff_blend_constant, &bds[i]);
|
||||
struct mali_blend_packed packed[MAX_RTS];
|
||||
for (uint8_t rt = 0; rt < blend_count; rt++) {
|
||||
emit_blend_desc(&bs, rt, fs_info, rt_loc[rt], fs_code,
|
||||
blend_shaders[rt], ff_blend_constant, &packed[rt]);
|
||||
}
|
||||
|
||||
if (blend_info->shader_loads_blend_const)
|
||||
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
||||
/* Copy into the GPU descriptor array */
|
||||
typed_memcpy(bds, packed, blend_count);
|
||||
|
||||
/* Re-order blend descriptors for the shader
|
||||
*
|
||||
* Blending on Bifrost+ is really annoying. In theory, we can order the
|
||||
* blend descriptors any way we want and, in theory, they all have an RT
|
||||
* index. However, that's not really the way blending works. If every
|
||||
* blend descriptor is either disabled or has the RT index that's the same
|
||||
* as the blend descriptor index, everything is fine. If not, we're in for
|
||||
* a bit of trouble.
|
||||
*
|
||||
* The RT index is only really consumed by the BLEND instruction to tell it
|
||||
* what RT to target. The FF blend hardware, however, assumes that blend
|
||||
* RTs map 1:1 to framebuffer RTs. If the BLEND instruction kicks off a
|
||||
* blend shader, everything is good because the blend shader has the
|
||||
* correct RT index baked into it. But if the BLEND fires off a message to
|
||||
* the blend unit, it then looks up the blend descriptor by RT index,
|
||||
* ignoring which blend descriptor FAU we read from the shader. Say, for
|
||||
* instance bds[3].rt == 5. If we BLEND using blend_descriptor_3, the
|
||||
* BLEND instruction will see an RT of 5 and kick off a blend message. The
|
||||
* blend unit will then fetch bds[5] and use that to do the actual blend.
|
||||
*
|
||||
* We could try to do something crazy where we divide blend descriptors
|
||||
* into a shader half and a HW half and only re-arrange the shader half so
|
||||
* that the hardware's little daisy-chain game works. However, there are a
|
||||
* few fields that are used by both, notably the conversion, so this is
|
||||
* dead in the water.
|
||||
*
|
||||
* The solution, then, is to push our own blend descriptors. Or at least
|
||||
* the shader half (everything in .internal). This lets us leave the HW
|
||||
* blend descriptors with a 1:1 mapping to framebuffer RTs and re-order
|
||||
* things from the shader PoV. The BLEND instructions in the shader will
|
||||
* use the ones we pushed manually and then, when a blend message gets sent
|
||||
* to the hardware, it'll pick up an idential blend descriptor in the RT
|
||||
* index location in the bds[] array.
|
||||
*/
|
||||
for (uint8_t loc = 0; loc < MAX_RTS; loc++) {
|
||||
uint8_t rt = loc_rt[loc];
|
||||
if (rt == MESA_VK_ATTACHMENT_UNUSED) {
|
||||
struct mali_blend_packed disabled;
|
||||
pan_pack(&disabled, BLEND, cfg) {
|
||||
cfg.enable = false;
|
||||
cfg.internal.mode = MALI_BLEND_MODE_OFF;
|
||||
}
|
||||
cmdbuf->state.gfx.fs.blend_descs[loc] =
|
||||
disabled.opaque[2] | (uint64_t)disabled.opaque[3] << 32;
|
||||
} else {
|
||||
cmdbuf->state.gfx.fs.blend_descs[loc] =
|
||||
packed[rt].opaque[2] | (uint64_t)packed[rt].opaque[3] << 32;
|
||||
}
|
||||
}
|
||||
|
||||
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -733,6 +733,11 @@ panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
|||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < MAX_RTS; i++) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, fs.blend_descs[i],
|
||||
cmdbuf->state.gfx.fs.blend_descs[i]);
|
||||
}
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
|
||||
|
|
|
|||
|
|
@ -146,6 +146,12 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
|||
val = load_sysval(b, common, bit_size, printf_buffer_address);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_blend_descriptor_pan: {
|
||||
uint32_t loc = nir_intrinsic_base(intr);
|
||||
val = load_sysval(b, graphics, bit_size, fs.blend_descs[loc]);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_input_attachment_target_pan: {
|
||||
const struct vk_input_attachment_location_state *ial =
|
||||
ctx->state ? ctx->state->ial : NULL;
|
||||
|
|
@ -1400,6 +1406,13 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||
nir_assign_io_var_locations(nir, nir_var_shader_out);
|
||||
panvk_lower_nir_io(nir);
|
||||
|
||||
/* Lower FS outputs now so that we can lower load_blend_descriptor_pan
|
||||
* to a driver-provided FAU instead of using the blend descriptors
|
||||
* uploaded by the hardware. See panvk_vX_blend.c for details.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, pan_nir_lower_fs_outputs, false);
|
||||
|
||||
variant->own_bin = true;
|
||||
|
||||
result = panvk_compile_nir(dev, nir, info->flags, &inputs, state,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue