pan/bi: Support native layer_id store/load on Valhall

On Valhall, we can store the layer index in PositionFIFO attributes and
have the primitives dispatched to the appropriate list in the tiler
context, which means we no longer have to issue N IDVS jobs when doing
layered rendering.

On the fragment shader side, we can pass the layer index through the
frame_argument field, which can be preloaded in r62-r63, so do that to
save a push uniform slot.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30695>
This commit is contained in:
Boris Brezillon 2024-07-03 17:13:59 +02:00 committed by Marge Bot
parent fa24b3f4d0
commit 6b49b1708e
2 changed files with 31 additions and 4 deletions

View file

@ -1022,6 +1022,7 @@ bi_should_remove_store(nir_intrinsic_instr *intr, enum bi_idvs_mode idvs)
switch (sem.location) {
case VARYING_SLOT_POS:
case VARYING_SLOT_PSIZ:
case VARYING_SLOT_LAYER:
return idvs == BI_IDVS_VARYING;
default:
return idvs == BI_IDVS_POSITION;
@ -1101,6 +1102,8 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
bool psiz =
(nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ);
bool layer =
(nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_LAYER);
bi_index a[4] = {bi_null()};
@ -1116,20 +1119,30 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
bi_imm_u32(format), regfmt, nr - 1);
} else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) {
bi_index index = bi_preload(b, 59);
unsigned pos_attr_offset = 0;
unsigned src_bit_sz = nir_src_bit_size(instr->src[0]);
if (psiz) {
assert(T_size == 16 && "should've been lowered");
if (psiz || layer)
index = bi_iadd_imm_i32(b, index, 4);
if (layer) {
assert(nr == 1 && src_bit_sz == 32);
src_bit_sz = 8;
pos_attr_offset = 2;
data = bi_byte(data, 0);
}
if (psiz)
assert(T_size == 16 && "should've been lowered");
bi_index address = bi_lea_buf_imm(b, index);
bi_emit_split_i32(b, a, address, 2);
bool varying = (b->shader->idvs == BI_IDVS_VARYING);
bi_store(b, nr * nir_src_bit_size(instr->src[0]), data, a[0], a[1],
bi_store(b, nr * src_bit_sz, data, a[0], a[1],
varying ? BI_SEG_VARY : BI_SEG_POS,
varying ? bi_varying_offset(b->shader, instr) : 0);
varying ? bi_varying_offset(b->shader, instr) : pos_attr_offset);
} else if (immediate) {
bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b), bi_instance_id(b),
regfmt, imm_index);
@ -1920,6 +1933,11 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_derivative(b, dst, instr, 2, true);
break;
case nir_intrinsic_load_layer_id:
assert(b->shader->arch >= 9);
bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0)));
break;
default:
fprintf(stderr, "Unhandled intrinsic %s\n",
nir_intrinsic_infos[instr->intrinsic].name);

View file

@ -815,6 +815,15 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
#if PAN_ARCH >= 9
cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
cfg.first_provoking_vertex = fb->first_provoking_vertex;
/* internal_layer_index is used to select the right primitive list in the
* tiler context, and frame_arg is the value that's passed to the fragment
* shader through r62-r63, which we use to pass gl_Layer. Since the
* layer_idx only takes 8-bits, we might use the extra 56-bits we have
* in frame_argument to pass other information to the fragment shader at
* some point. */
cfg.internal_layer_index = layer_idx;
cfg.frame_argument = layer_idx;
#endif
}