From 6b49b1708ea849d45c1b817ea63a02872dfd0986 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Jul 2024 17:13:59 +0200 Subject: [PATCH] pan/bi: Support native layer_id store/load on Valhall On Valhall, we can store the layer index in PositionFIFO attributes and have the primitives dispatched to the appropriate list in the tiler context, which means we no longer have to issue N IDVS jobs when doing layered rendering. On the fragment shader side, we can pass the layer index through the frame_argument field, which can be preloaded in r62-r63, so do that to save a push uniform slot. Signed-off-by: Boris Brezillon Reviewed-by: Mary Guillemard Part-of: --- src/panfrost/compiler/bifrost_compile.c | 26 +++++++++++++++++++++---- src/panfrost/lib/pan_desc.c | 9 +++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index a9ffbae1fca..09b8c2cfd00 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1022,6 +1022,7 @@ bi_should_remove_store(nir_intrinsic_instr *intr, enum bi_idvs_mode idvs) switch (sem.location) { case VARYING_SLOT_POS: case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: return idvs == BI_IDVS_VARYING; default: return idvs == BI_IDVS_POSITION; @@ -1101,6 +1102,8 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) bool psiz = (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ); + bool layer = + (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_LAYER); bi_index a[4] = {bi_null()}; @@ -1116,20 +1119,30 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) bi_imm_u32(format), regfmt, nr - 1); } else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) { bi_index index = bi_preload(b, 59); + unsigned pos_attr_offset = 0; + unsigned src_bit_sz = nir_src_bit_size(instr->src[0]); - if (psiz) { - assert(T_size == 16 && "should've been lowered"); + if (psiz || layer) index = bi_iadd_imm_i32(b, index, 4); + + if (layer) { + assert(nr == 1 && src_bit_sz == 32); + src_bit_sz = 8; + pos_attr_offset = 2; + data = bi_byte(data, 0); } + if (psiz) + assert(T_size == 16 && "should've been lowered"); + bi_index address = bi_lea_buf_imm(b, index); bi_emit_split_i32(b, a, address, 2); bool varying = (b->shader->idvs == BI_IDVS_VARYING); - bi_store(b, nr * nir_src_bit_size(instr->src[0]), data, a[0], a[1], + bi_store(b, nr * src_bit_sz, data, a[0], a[1], varying ? BI_SEG_VARY : BI_SEG_POS, - varying ? bi_varying_offset(b->shader, instr) : 0); + varying ? bi_varying_offset(b->shader, instr) : pos_attr_offset); } else if (immediate) { bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b), bi_instance_id(b), regfmt, imm_index); @@ -1920,6 +1933,11 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_derivative(b, dst, instr, 2, true); break; + case nir_intrinsic_load_layer_id: + assert(b->shader->arch >= 9); + bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0))); + break; + default: fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name); diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index c321c5cc231..d426c5817bc 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -815,6 +815,15 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, #if PAN_ARCH >= 9 cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin; cfg.first_provoking_vertex = fb->first_provoking_vertex; + + /* internal_layer_index is used to select the right primitive list in the + * tiler context, and frame_arg is the value that's passed to the fragment + * shader through r62-r63, which we use to pass gl_Layer. Since the + * layer_idx only takes 8-bits, we might use the extra 56-bits we have + * in frame_argument to pass other information to the fragment shader at + * some point. */ + cfg.internal_layer_index = layer_idx; + cfg.frame_argument = layer_idx; #endif }