From 652e1c2e13946de1e6dc6ae94b8b1f4af6685732 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Fri, 16 Feb 2024 11:06:59 +0100 Subject: [PATCH] pan/bi: Rework indices for attributes on Valhall This also fix missing encoding of indice with non immediate index. Signed-off-by: Mary Guillemard Reviewed-by: Boris Brezillon Part-of: --- .../panfrost/pan_nir_lower_res_indices.c | 31 ++++++- src/panfrost/compiler/bifrost_compile.c | 93 +++++++++++++------ src/panfrost/lib/pan_blitter.c | 13 ++- src/panfrost/util/pan_collect_varyings.c | 2 +- 4 files changed, 107 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c b/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c index bbcab2494ab..dd7e6451119 100644 --- a/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c +++ b/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c @@ -74,13 +74,38 @@ lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin) } static bool -lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) +lower_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + const struct panfrost_compile_inputs *inputs) +{ + /* We always use heap-based varying allocation when IDVS is used on Valhall. */ + bool malloc_idvs = !inputs->no_idvs; + + /* All vertex attributes come from the attribute table. + * Fragment inputs come from the attribute table too, unless they've + * been allocated on the heap. + */ + if (b->shader->info.stage == MESA_SHADER_VERTEX || + (b->shader->info.stage == MESA_SHADER_FRAGMENT && !malloc_idvs)) { + nir_intrinsic_set_base( + intrin, + pan_res_handle(PAN_TABLE_ATTRIBUTE, nir_intrinsic_base(intrin))); + return true; + } + + return false; +} + +static bool +lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, + const struct panfrost_compile_inputs *inputs) { switch (intrin->intrinsic) { case nir_intrinsic_image_load: case nir_intrinsic_image_store: case nir_intrinsic_image_texel_address: return lower_image_intrin(b, intrin); + case nir_intrinsic_load_input: + return lower_input_intrin(b, intrin, inputs); default: return false; } @@ -89,11 +114,13 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) static bool lower_instr(nir_builder *b, nir_instr *instr, void *data) { + const struct panfrost_compile_inputs *inputs = data; + switch (instr->type) { case nir_instr_type_tex: return lower_tex(b, nir_instr_as_tex(instr)); case nir_instr_type_intrinsic: - return lower_intrinsic(b, nir_instr_as_intrinsic(instr)); + return lower_intrinsic(b, nir_instr_as_intrinsic(instr), inputs); default: return false; } diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 6eacf49433c..7ec98e701bb 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -390,6 +390,40 @@ bi_is_intr_immediate(nir_intrinsic_instr *instr, unsigned *immediate, return (*immediate) < max; } +static bool +bi_is_imm_desc_handle(bi_builder *b, nir_intrinsic_instr *instr, + uint32_t *immediate, unsigned max) +{ + nir_src *offset = nir_get_io_offset_src(instr); + + if (!nir_src_is_const(*offset)) + return false; + + if (b->shader->arch >= 9) { + uint32_t res_handle = + nir_intrinsic_base(instr) + nir_src_as_uint(*offset); + uint32_t table_index = pan_res_handle_get_table(res_handle); + uint32_t res_index = pan_res_handle_get_index(res_handle); + + if (!va_is_valid_const_table(table_index) || res_index >= max) + return false; + + *immediate = res_handle; + return true; + } + + return bi_is_intr_immediate(instr, immediate, max); +} + +static bool +bi_is_imm_var_desc_handle(bi_builder *b, nir_intrinsic_instr *instr, + uint32_t *immediate) +{ + unsigned max = b->shader->arch >= 9 ? 256 : 20; + + return bi_is_imm_desc_handle(b, instr, immediate, max); +} + static void bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src, unsigned *channel, unsigned count, unsigned bitsize); @@ -439,14 +473,17 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) unsigned imm_index = 0; unsigned base = nir_intrinsic_base(instr); bool constant = nir_src_is_const(*offset); - bool immediate = bi_is_intr_immediate(instr, &imm_index, 16); + bool immediate = bi_is_imm_desc_handle(b, instr, &imm_index, 16); bi_index dest = (component == 0) ? bi_def_index(&instr->def) : bi_temp(b->shader); bi_instr *I; if (immediate) { I = bi_ld_attr_imm_to(b, dest, bi_vertex_id(b), bi_instance_id(b), regfmt, - vecsize, imm_index); + vecsize, pan_res_handle_get_index(imm_index)); + + if (b->shader->arch >= 9) + I->table = va_res_fold_table_idx(pan_res_handle_get_table(base)); } else { bi_index idx = bi_src_index(&instr->src[0]); @@ -459,9 +496,6 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) regfmt, vecsize); } - if (b->shader->arch >= 9) - I->table = PAN_TABLE_ATTRIBUTE; - bi_copy_component(b, instr, dest); } @@ -544,23 +578,38 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) nir_src *offset = nir_get_io_offset_src(instr); unsigned imm_index = 0; - bool immediate = bi_is_intr_immediate(instr, &imm_index, 20); - bi_instr *I = NULL; + bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index); + unsigned base = nir_intrinsic_base(instr); + + /* On Valhall, ensure the table and index are valid for usage with immediate + * form when IDVS isn't used */ + if (b->shader->arch >= 9 && !b->shader->malloc_idvs) + immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) && + pan_res_handle_get_index(base) < 256; if (b->shader->malloc_idvs && immediate) { /* Immediate index given in bytes. */ bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format, update, vecsize, bi_varying_offset(b->shader, instr)); - } else if (immediate && smooth) { - I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize, - imm_index); - } else if (immediate && !smooth) { - I = bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize, - imm_index); + } else if (immediate) { + bi_instr *I; + + if (smooth) { + I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize, + pan_res_handle_get_index(imm_index)); + } else { + I = bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize, + pan_res_handle_get_index(imm_index)); + } + + /* Valhall usually uses machine-allocated IDVS. If this is disabled, + * use a simple Midgard-style ABI. + */ + if (b->shader->arch >= 9) + I->table = va_res_fold_table_idx(pan_res_handle_get_table(base)); } else { bi_index idx = bi_src_index(offset); - unsigned base = nir_intrinsic_base(instr); if (b->shader->malloc_idvs) { /* Index needs to be in bytes, but NIR gives the index @@ -574,25 +623,17 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample, source_format, update, vecsize); - } else if (smooth) { - if (base != 0) - idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); - - I = bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize); } else { if (base != 0) idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); - I = bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize); + if (smooth) + bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize); + else + bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize); } } - /* Valhall usually uses machine-allocated IDVS. If this is disabled, use - * a simple Midgard-style ABI. - */ - if (b->shader->arch >= 9 && I != NULL) - I->table = PAN_TABLE_ATTRIBUTE; - bi_copy_component(b, instr, dest); } diff --git a/src/panfrost/lib/pan_blitter.c b/src/panfrost/lib/pan_blitter.c index 956d3ea7393..98f93ada43b 100644 --- a/src/panfrost/lib/pan_blitter.c +++ b/src/panfrost/lib/pan_blitter.c @@ -414,6 +414,12 @@ tex_hw_index(uint32_t index) return PAN_ARCH >= 9 ? pan_res_handle(PAN_TABLE_TEXTURE, index) : index; } +static uint32_t +attr_hw_index(uint32_t index) +{ + return PAN_ARCH >= 9 ? pan_res_handle(PAN_TABLE_ATTRIBUTE, index) : index; +} + static const struct pan_blit_shader_data * pan_blitter_get_blit_shader(struct pan_blitter_cache *cache, const struct pan_blit_shader_key *key) @@ -484,12 +490,13 @@ pan_blitter_get_blit_shader(struct pan_blitter_cache *cache, nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(), "pan_blit(%s)", sig); + nir_def *barycentric = nir_load_barycentric( &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH); nir_def *coord = nir_load_interpolated_input( - &b, coord_comps, 32, barycentric, nir_imm_int(&b, 0), .base = 0, - .dest_type = nir_type_float32, .io_semantics.location = VARYING_SLOT_VAR0, - .io_semantics.num_slots = 1); + &b, coord_comps, 32, barycentric, nir_imm_int(&b, 0), + .base = attr_hw_index(0), .dest_type = nir_type_float32, + .io_semantics.location = VARYING_SLOT_VAR0, .io_semantics.num_slots = 1); unsigned active_count = 0; for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) { diff --git a/src/panfrost/util/pan_collect_varyings.c b/src/panfrost/util/pan_collect_varyings.c index b5cc72c51ab..0134ecfb67b 100644 --- a/src/panfrost/util/pan_collect_varyings.c +++ b/src/panfrost/util/pan_collect_varyings.c @@ -137,7 +137,7 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data) /* Consider each slot separately */ for (unsigned offset = 0; offset < sem.num_slots; ++offset) { unsigned location = sem.location + offset; - unsigned index = nir_intrinsic_base(intr) + offset; + unsigned index = pan_res_handle_get_index(nir_intrinsic_base(intr)) + offset; if (slots[location].type) { assert(slots[location].type == type);