diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 34ebc10e84d..a8e903f6ba3 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -39,6 +39,7 @@ files_panfrost = files( 'pan_mempool.h', 'pan_nir_remove_fragcolor_stores.c', 'pan_nir_lower_sysvals.c', + 'pan_nir_lower_res_indices.c', ) panfrost_includes = [ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index b768bc878a7..d7028133e1d 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -436,6 +436,9 @@ bool panfrost_nir_remove_fragcolor_stores(nir_shader *s, unsigned nr_cbufs); bool panfrost_nir_lower_sysvals(nir_shader *s, struct panfrost_sysvals *sysvals); +bool panfrost_nir_lower_res_indices(nir_shader *shader, + struct panfrost_compile_inputs *inputs); + /** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer * Descriptor at draw-time on Midgard */ diff --git a/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c b/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c new file mode 100644 index 00000000000..591c3c6ce0e --- /dev/null +++ b/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c @@ -0,0 +1,82 @@ +/* + * Copyright © 2024 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/nir/nir_builder.h" +#include "genxml/gen_macros.h" +#include "pan_context.h" +#include "pan_shader.h" + +static bool +lower_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + b->cursor = nir_before_instr(&tex->instr); + + nir_def *tex_offset = nir_steal_tex_src(tex, nir_tex_src_texture_offset); + nir_def *sampler_offset = nir_steal_tex_src(tex, nir_tex_src_sampler_offset); + + if (tex_offset != NULL) { + tex_offset = + nir_ior_imm(b, tex_offset, pan_res_handle(PAN_TABLE_TEXTURE, 0)); + nir_tex_instr_add_src(tex, nir_tex_src_texture_offset, tex_offset); + } else { + tex->texture_index = + pan_res_handle(PAN_TABLE_TEXTURE, tex->texture_index); + } + + /* By ABI with the compiler, we assume there is a valid sampler bound at + * index 0 for txf. + */ + if (!nir_tex_instr_need_sampler(tex)) { + tex->sampler_index = pan_res_handle(PAN_TABLE_SAMPLER, 0); + } else if (sampler_offset != NULL) { + sampler_offset = + nir_ior_imm(b, sampler_offset, pan_res_handle(PAN_TABLE_SAMPLER, 0)); + nir_tex_instr_add_src(tex, nir_tex_src_sampler_offset, sampler_offset); + } else { + tex->sampler_index = + pan_res_handle(PAN_TABLE_SAMPLER, tex->sampler_index); + } + + return true; +} + +bool +panfrost_nir_lower_res_indices(nir_shader *shader, + struct panfrost_compile_inputs *inputs) +{ + /** + * Starting with Valhall, we are required to encode table indices by the + * compiler ABI. + */ + if (pan_arch(inputs->gpu_id) < 9) + return false; + + return nir_shader_instructions_pass( + shader, lower_instr, nir_metadata_block_index | nir_metadata_dominance, + inputs); +} \ No newline at end of file diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 61a4eb12ded..31294d3b3d0 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -168,6 +168,9 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, NIR_PASS_V(s, panfrost_nir_lower_sysvals, &out->sysvals); + /* Lower resource indices */ + NIR_PASS_V(s, panfrost_nir_lower_res_indices, &inputs); + screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info); assert(req_local_mem >= out->info.wls_size); diff --git a/src/panfrost/compiler/ISA.xml b/src/panfrost/compiler/ISA.xml index f1e908331c2..7ddd9993e1a 100644 --- a/src/panfrost/compiler/ISA.xml +++ b/src/panfrost/compiler/ISA.xml @@ -8417,6 +8417,7 @@ + @@ -8466,6 +8467,7 @@ + @@ -8507,6 +8509,7 @@ + diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index c15dbbb4be9..d74a7c67729 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -362,6 +362,18 @@ bi_reg_fmt_for_nir(nir_alu_type T) } } +static bool +va_is_valid_const_narrow_index(bi_index idx) +{ + if (idx.type != BI_INDEX_CONSTANT) + return false; + + unsigned index = pan_res_handle_get_index(idx.value); + unsigned table_index = pan_res_handle_get_table(idx.value); + + return index < 1024 && va_is_valid_const_table(table_index); +} + /* Checks if the _IMM variant of an intrinsic can be used, returning in imm the * immediate to be used (which applies even if _IMM can't be used) */ @@ -3456,7 +3468,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) bi_index sregs[VALHALL_TEX_SREG_COUNT] = {}; bi_index sampler = bi_imm_u32(instr->sampler_index); bi_index texture = bi_imm_u32(instr->texture_index); - uint32_t tables = (PAN_TABLE_SAMPLER << 11) | (PAN_TABLE_TEXTURE << 27); for (unsigned i = 0; i < instr->num_srcs; ++i) { bi_index index = bi_src_index(&instr->src[i].src); @@ -3560,9 +3571,31 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) if (sr_count) bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32); - bi_index image_src = bi_imm_u32(tables); - image_src = bi_lshift_or_i32(b, sampler, image_src, bi_imm_u8(0)); - image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16)); + bool narrow_indices = va_is_valid_const_narrow_index(texture) && + va_is_valid_const_narrow_index(sampler); + + bi_index src0; + bi_index src1; + + if (narrow_indices) { + unsigned tex_set = + va_res_fold_table_idx(pan_res_handle_get_table(texture.value)); + unsigned sampler_set = + va_res_fold_table_idx(pan_res_handle_get_table(sampler.value)); + unsigned texture_index = pan_res_handle_get_index(texture.value); + unsigned sampler_index = pan_res_handle_get_index(sampler.value); + + unsigned packed_handle = (tex_set << 27) | (texture_index << 16) | + (sampler_set << 11) | sampler_index; + + src0 = bi_imm_u32(packed_handle); + + /* TODO: narrow offsetms */ + src1 = bi_zero(); + } else { + src0 = sampler; + src1 = texture; + } /* Only write the components that we actually read */ unsigned mask = nir_def_components_read(&instr->def); @@ -3577,19 +3610,19 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) case nir_texop_tex: case nir_texop_txl: case nir_texop_txb: - bi_tex_single_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim, - regfmt, instr->is_shadow, explicit_offset, lod_mode, - mask, sr_count); + bi_tex_single_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt, + instr->is_shadow, explicit_offset, lod_mode, + !narrow_indices, mask, sr_count); break; case nir_texop_txf: case nir_texop_txf_ms: - bi_tex_fetch_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim, - regfmt, explicit_offset, mask, sr_count); + bi_tex_fetch_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt, + explicit_offset, !narrow_indices, mask, sr_count); break; case nir_texop_tg4: - bi_tex_gather_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim, + bi_tex_gather_to(b, dest, idx, src0, src1, instr->is_array, dim, instr->component, false, regfmt, instr->is_shadow, - explicit_offset, mask, sr_count); + explicit_offset, !narrow_indices, mask, sr_count); break; default: unreachable("Unhandled Valhall texture op"); @@ -3704,8 +3737,11 @@ bi_emit_tex(bi_builder *b, nir_tex_instr *instr) * it for txf operations, since there may be no other valid samplers. This is * a workaround: txf does not require a sampler in NIR (so sampler_index is * undefined) but we need one in the hardware. This is ABI with the driver. + * + * On Valhall, as the descriptor table is encoded in the index, this should + * be handled by the driver. */ - if (!nir_tex_instr_need_sampler(instr)) + if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9) instr->sampler_index = 0; if (b->shader->arch >= 9) diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index 32c460c28e0..a4a3f53dad2 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -504,6 +504,7 @@ typedef struct { /* Used for valhall texturing */ bool shadow; + bool wide_indices; bool texel_offset; bool array_enable; bool integer_coordinates; diff --git a/src/panfrost/compiler/valhall/test/test-insert-flow.cpp b/src/panfrost/compiler/valhall/test/test-insert-flow.cpp index a9703c1c996..37921935b4a 100644 --- a/src/panfrost/compiler/valhall/test/test-insert-flow.cpp +++ b/src/panfrost/compiler/valhall/test/test-insert-flow.cpp @@ -198,10 +198,10 @@ TEST_F(InsertFlow, TextureImplicit) { CASE(FRAGMENT, { bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); - bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8), - bi_register(12), false, BI_DIMENSION_2D, - BI_REGISTER_FORMAT_F32, false, false, - BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4); + bi_tex_single_to( + b, bi_register(0), bi_register(4), bi_register(8), bi_register(12), + false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false, + BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4); flow(DISCARD); flow(WAIT0); bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); @@ -217,7 +217,7 @@ TEST_F(InsertFlow, TextureExplicit) bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8), bi_register(12), false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false, - BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4); + BI_VA_LOD_MODE_ZERO_LOD, false, BI_WRITE_MASK_RGBA, 4); flow(WAIT0); bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); flow(END); diff --git a/src/panfrost/compiler/valhall/test/test-merge-flow.cpp b/src/panfrost/compiler/valhall/test/test-merge-flow.cpp index 36e8c1c5064..920655465fe 100644 --- a/src/panfrost/compiler/valhall/test/test-merge-flow.cpp +++ b/src/panfrost/compiler/valhall/test/test-merge-flow.cpp @@ -292,10 +292,10 @@ TEST_F(MergeFlow, DeletePointlessDiscard) CASE( { bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); - bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8), - bi_register(12), false, BI_DIMENSION_2D, - BI_REGISTER_FORMAT_F32, false, false, - BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4); + bi_tex_single_to( + b, bi_register(0), bi_register(4), bi_register(8), bi_register(12), + false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false, + BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4); flow(DISCARD); flow(WAIT0); flow(WAIT0126); @@ -311,7 +311,7 @@ TEST_F(MergeFlow, DeletePointlessDiscard) I = bi_tex_single_to( b, bi_register(0), bi_register(4), bi_register(8), bi_register(12), false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false, - BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4); + BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4); I->flow = VA_FLOW_WAIT0126; I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest); diff --git a/src/panfrost/compiler/valhall/va_pack.c b/src/panfrost/compiler/valhall/va_pack.c index f6381c6f5f0..34663d1c70e 100644 --- a/src/panfrost/compiler/valhall/va_pack.c +++ b/src/panfrost/compiler/valhall/va_pack.c @@ -913,6 +913,8 @@ va_pack_instr(const bi_instr *I) if (I->op == BI_OPCODE_TEX_FETCH && I->shadow) invalid_instruction(I, "TEX_FETCH does not support .shadow"); + if (I->wide_indices) + hex |= (1ull << 8); if (I->array_enable) hex |= (1ull << 10); if (I->texel_offset) diff --git a/src/panfrost/compiler/valhall/valhall.h b/src/panfrost/compiler/valhall/valhall.h index 14442946664..b66cc457f1b 100644 --- a/src/panfrost/compiler/valhall/valhall.h +++ b/src/panfrost/compiler/valhall/valhall.h @@ -161,6 +161,26 @@ va_flow_is_wait_or_none(enum va_flow flow) return (flow <= VA_FLOW_WAIT); } +static inline bool +va_is_valid_const_table(unsigned table) +{ + return (table >= 0 && table <= 11) || (table >= 60 && table <= 63); +} + +static inline uint32_t +va_res_fold_table_idx(uint32_t table) +{ + switch (table) { + case 0 ... 11: + return table; + case 60 ... 63: + return table + 12 - 60; + default: + assert(!"Can't pack table"); + return 0; + } +} + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/panfrost/lib/pan_blitter.c b/src/panfrost/lib/pan_blitter.c index b207dc11791..956d3ea7393 100644 --- a/src/panfrost/lib/pan_blitter.c +++ b/src/panfrost/lib/pan_blitter.c @@ -402,6 +402,18 @@ lower_sampler_parameters(nir_builder *b, nir_intrinsic_instr *intr, return true; } +static uint32_t +sampler_hw_index(uint32_t index) +{ + return PAN_ARCH >= 9 ? pan_res_handle(PAN_TABLE_SAMPLER, index) : index; +} + +static uint32_t +tex_hw_index(uint32_t index) +{ + return PAN_ARCH >= 9 ? pan_res_handle(PAN_TABLE_TEXTURE, index) : index; +} + static const struct pan_blit_shader_data * pan_blitter_get_blit_shader(struct pan_blitter_cache *cache, const struct pan_blit_shader_key *key) @@ -527,7 +539,8 @@ pan_blitter_get_blit_shader(struct pan_blitter_cache *cache, tex->op = nir_texop_txf_ms; tex->dest_type = key->surfaces[i].type; - tex->texture_index = active_count; + tex->texture_index = tex_hw_index(active_count); + tex->sampler_index = sampler_hw_index(0); tex->is_array = key->surfaces[i].array; tex->sampler_dim = sampler_dim; @@ -552,7 +565,8 @@ pan_blitter_get_blit_shader(struct pan_blitter_cache *cache, nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1); tex->dest_type = key->surfaces[i].type; - tex->texture_index = active_count; + tex->texture_index = tex_hw_index(active_count); + tex->sampler_index = sampler_hw_index(0); tex->is_array = key->surfaces[i].array; tex->sampler_dim = sampler_dim; diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 94b53ddac56..e3bf068cc55 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -426,4 +426,37 @@ pan_subgroup_size(unsigned arch) return 1; } +/* + * Helper extracting the table from a given handle of Valhall descriptor model. + */ +static inline unsigned +pan_res_handle_get_table(unsigned handle) +{ + unsigned table = handle >> 24; + + assert(table < 64); + return table; +} + +/* + * Helper returning the index from a given handle of Valhall descriptor model. + */ +static inline unsigned +pan_res_handle_get_index(unsigned handle) +{ + return handle & BITFIELD_MASK(24); +} + +/* + * Helper creating an handle for Valhall descriptor model. + */ +static inline unsigned +pan_res_handle(unsigned table, unsigned index) +{ + assert(table < 64); + assert(index < (1u << 24)); + + return (table << 24) | index; +} + #endif