pan/bi: Rework indices for tex on Valhall

Lower tex/sampler table in indices on panfrost.

This also implement wide indices and change the format of texture and sampler
indices received by the compiler.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27846>
This commit is contained in:
Mary Guillemard 2024-02-02 10:13:02 +01:00 committed by Marge Bot
parent 181891495a
commit ce52b6d359
13 changed files with 222 additions and 24 deletions

View file

@ -39,6 +39,7 @@ files_panfrost = files(
'pan_mempool.h',
'pan_nir_remove_fragcolor_stores.c',
'pan_nir_lower_sysvals.c',
'pan_nir_lower_res_indices.c',
)
panfrost_includes = [

View file

@ -436,6 +436,9 @@ bool panfrost_nir_remove_fragcolor_stores(nir_shader *s, unsigned nr_cbufs);
bool panfrost_nir_lower_sysvals(nir_shader *s,
struct panfrost_sysvals *sysvals);
bool panfrost_nir_lower_res_indices(nir_shader *shader,
struct panfrost_compile_inputs *inputs);
/** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
* Descriptor at draw-time on Midgard
*/

View file

@ -0,0 +1,82 @@
/*
* Copyright © 2024 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "compiler/nir/nir_builder.h"
#include "genxml/gen_macros.h"
#include "pan_context.h"
#include "pan_shader.h"
static bool
lower_instr(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_tex)
return false;
nir_tex_instr *tex = nir_instr_as_tex(instr);
b->cursor = nir_before_instr(&tex->instr);
nir_def *tex_offset = nir_steal_tex_src(tex, nir_tex_src_texture_offset);
nir_def *sampler_offset = nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
if (tex_offset != NULL) {
tex_offset =
nir_ior_imm(b, tex_offset, pan_res_handle(PAN_TABLE_TEXTURE, 0));
nir_tex_instr_add_src(tex, nir_tex_src_texture_offset, tex_offset);
} else {
tex->texture_index =
pan_res_handle(PAN_TABLE_TEXTURE, tex->texture_index);
}
/* By ABI with the compiler, we assume there is a valid sampler bound at
* index 0 for txf.
*/
if (!nir_tex_instr_need_sampler(tex)) {
tex->sampler_index = pan_res_handle(PAN_TABLE_SAMPLER, 0);
} else if (sampler_offset != NULL) {
sampler_offset =
nir_ior_imm(b, sampler_offset, pan_res_handle(PAN_TABLE_SAMPLER, 0));
nir_tex_instr_add_src(tex, nir_tex_src_sampler_offset, sampler_offset);
} else {
tex->sampler_index =
pan_res_handle(PAN_TABLE_SAMPLER, tex->sampler_index);
}
return true;
}
bool
panfrost_nir_lower_res_indices(nir_shader *shader,
struct panfrost_compile_inputs *inputs)
{
/**
* Starting with Valhall, we are required to encode table indices by the
* compiler ABI.
*/
if (pan_arch(inputs->gpu_id) < 9)
return false;
return nir_shader_instructions_pass(
shader, lower_instr, nir_metadata_block_index | nir_metadata_dominance,
inputs);
}

View file

@ -168,6 +168,9 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
NIR_PASS_V(s, panfrost_nir_lower_sysvals, &out->sysvals);
/* Lower resource indices */
NIR_PASS_V(s, panfrost_nir_lower_res_indices, &inputs);
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
assert(req_local_mem >= out->info.wls_size);

View file

@ -8417,6 +8417,7 @@
<src start="0"/>
<src start="1"/>
<immediate name="sr_count" size="4" pseudo="true"/>
<mod name="wide_indices" start="8" size="1" opt="wide_indices"/>
<mod name="texel_offset" start="9" size="1" opt="texel_offset"/>
<mod name="skip" start="9" size="1" opt="skip"/>
<mod name="shadow" start="9" size="1" opt="shadow"/>
@ -8466,6 +8467,7 @@
<src start="0"/>
<src start="1"/>
<immediate name="sr_count" size="4" pseudo="true"/>
<mod name="wide_indices" start="8" size="1" opt="wide_indices"/>
<mod name="texel_offset" start="9" size="1" opt="texel_offset"/>
<mod name="skip" start="9" size="1" opt="skip"/>
<mod name="array_enable" start="9" size="1" opt="array_enable"/>
@ -8507,6 +8509,7 @@
<src start="0"/>
<src start="1"/>
<immediate name="sr_count" size="4" pseudo="true"/>
<mod name="wide_indices" start="8" size="1" opt="wide_indices"/>
<mod name="texel_offset" start="9" size="1" opt="texel_offset"/>
<mod name="skip" start="9" size="1" opt="skip"/>
<mod name="shadow" start="9" size="1" opt="shadow"/>

View file

@ -362,6 +362,18 @@ bi_reg_fmt_for_nir(nir_alu_type T)
}
}
static bool
va_is_valid_const_narrow_index(bi_index idx)
{
if (idx.type != BI_INDEX_CONSTANT)
return false;
unsigned index = pan_res_handle_get_index(idx.value);
unsigned table_index = pan_res_handle_get_table(idx.value);
return index < 1024 && va_is_valid_const_table(table_index);
}
/* Checks if the _IMM variant of an intrinsic can be used, returning in imm the
* immediate to be used (which applies even if _IMM can't be used) */
@ -3456,7 +3468,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
bi_index sregs[VALHALL_TEX_SREG_COUNT] = {};
bi_index sampler = bi_imm_u32(instr->sampler_index);
bi_index texture = bi_imm_u32(instr->texture_index);
uint32_t tables = (PAN_TABLE_SAMPLER << 11) | (PAN_TABLE_TEXTURE << 27);
for (unsigned i = 0; i < instr->num_srcs; ++i) {
bi_index index = bi_src_index(&instr->src[i].src);
@ -3560,9 +3571,31 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
if (sr_count)
bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32);
bi_index image_src = bi_imm_u32(tables);
image_src = bi_lshift_or_i32(b, sampler, image_src, bi_imm_u8(0));
image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16));
bool narrow_indices = va_is_valid_const_narrow_index(texture) &&
va_is_valid_const_narrow_index(sampler);
bi_index src0;
bi_index src1;
if (narrow_indices) {
unsigned tex_set =
va_res_fold_table_idx(pan_res_handle_get_table(texture.value));
unsigned sampler_set =
va_res_fold_table_idx(pan_res_handle_get_table(sampler.value));
unsigned texture_index = pan_res_handle_get_index(texture.value);
unsigned sampler_index = pan_res_handle_get_index(sampler.value);
unsigned packed_handle = (tex_set << 27) | (texture_index << 16) |
(sampler_set << 11) | sampler_index;
src0 = bi_imm_u32(packed_handle);
/* TODO: narrow offsetms */
src1 = bi_zero();
} else {
src0 = sampler;
src1 = texture;
}
/* Only write the components that we actually read */
unsigned mask = nir_def_components_read(&instr->def);
@ -3577,19 +3610,19 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
case nir_texop_tex:
case nir_texop_txl:
case nir_texop_txb:
bi_tex_single_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim,
regfmt, instr->is_shadow, explicit_offset, lod_mode,
mask, sr_count);
bi_tex_single_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt,
instr->is_shadow, explicit_offset, lod_mode,
!narrow_indices, mask, sr_count);
break;
case nir_texop_txf:
case nir_texop_txf_ms:
bi_tex_fetch_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim,
regfmt, explicit_offset, mask, sr_count);
bi_tex_fetch_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt,
explicit_offset, !narrow_indices, mask, sr_count);
break;
case nir_texop_tg4:
bi_tex_gather_to(b, dest, idx, image_src, bi_zero(), instr->is_array, dim,
bi_tex_gather_to(b, dest, idx, src0, src1, instr->is_array, dim,
instr->component, false, regfmt, instr->is_shadow,
explicit_offset, mask, sr_count);
explicit_offset, !narrow_indices, mask, sr_count);
break;
default:
unreachable("Unhandled Valhall texture op");
@ -3704,8 +3737,11 @@ bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
* it for txf operations, since there may be no other valid samplers. This is
* a workaround: txf does not require a sampler in NIR (so sampler_index is
* undefined) but we need one in the hardware. This is ABI with the driver.
*
* On Valhall, as the descriptor table is encoded in the index, this should
* be handled by the driver.
*/
if (!nir_tex_instr_need_sampler(instr))
if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9)
instr->sampler_index = 0;
if (b->shader->arch >= 9)

View file

@ -504,6 +504,7 @@ typedef struct {
/* Used for valhall texturing */
bool shadow;
bool wide_indices;
bool texel_offset;
bool array_enable;
bool integer_coordinates;

View file

@ -198,10 +198,10 @@ TEST_F(InsertFlow, TextureImplicit)
{
CASE(FRAGMENT, {
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
bi_tex_single_to(
b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4);
flow(DISCARD);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
@ -217,7 +217,7 @@ TEST_F(InsertFlow, TextureExplicit)
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
BI_VA_LOD_MODE_ZERO_LOD, false, BI_WRITE_MASK_RGBA, 4);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);

View file

@ -292,10 +292,10 @@ TEST_F(MergeFlow, DeletePointlessDiscard)
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
bi_tex_single_to(
b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4);
flow(DISCARD);
flow(WAIT0);
flow(WAIT0126);
@ -311,7 +311,7 @@ TEST_F(MergeFlow, DeletePointlessDiscard)
I = bi_tex_single_to(
b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
BI_VA_LOD_MODE_COMPUTED_LOD, false, BI_WRITE_MASK_RGBA, 4);
I->flow = VA_FLOW_WAIT0126;
I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
atest);

View file

@ -913,6 +913,8 @@ va_pack_instr(const bi_instr *I)
if (I->op == BI_OPCODE_TEX_FETCH && I->shadow)
invalid_instruction(I, "TEX_FETCH does not support .shadow");
if (I->wide_indices)
hex |= (1ull << 8);
if (I->array_enable)
hex |= (1ull << 10);
if (I->texel_offset)

View file

@ -161,6 +161,26 @@ va_flow_is_wait_or_none(enum va_flow flow)
return (flow <= VA_FLOW_WAIT);
}
static inline bool
va_is_valid_const_table(unsigned table)
{
return (table >= 0 && table <= 11) || (table >= 60 && table <= 63);
}
static inline uint32_t
va_res_fold_table_idx(uint32_t table)
{
switch (table) {
case 0 ... 11:
return table;
case 60 ... 63:
return table + 12 - 60;
default:
assert(!"Can't pack table");
return 0;
}
}
#ifdef __cplusplus
} /* extern C */
#endif

View file

@ -402,6 +402,18 @@ lower_sampler_parameters(nir_builder *b, nir_intrinsic_instr *intr,
return true;
}
static uint32_t
sampler_hw_index(uint32_t index)
{
return PAN_ARCH >= 9 ? pan_res_handle(PAN_TABLE_SAMPLER, index) : index;
}
static uint32_t
tex_hw_index(uint32_t index)
{
return PAN_ARCH >= 9 ? pan_res_handle(PAN_TABLE_TEXTURE, index) : index;
}
static const struct pan_blit_shader_data *
pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
const struct pan_blit_shader_key *key)
@ -527,7 +539,8 @@ pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
tex->op = nir_texop_txf_ms;
tex->dest_type = key->surfaces[i].type;
tex->texture_index = active_count;
tex->texture_index = tex_hw_index(active_count);
tex->sampler_index = sampler_hw_index(0);
tex->is_array = key->surfaces[i].array;
tex->sampler_dim = sampler_dim;
@ -552,7 +565,8 @@ pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1);
tex->dest_type = key->surfaces[i].type;
tex->texture_index = active_count;
tex->texture_index = tex_hw_index(active_count);
tex->sampler_index = sampler_hw_index(0);
tex->is_array = key->surfaces[i].array;
tex->sampler_dim = sampler_dim;

View file

@ -426,4 +426,37 @@ pan_subgroup_size(unsigned arch)
return 1;
}
/*
* Helper extracting the table from a given handle of Valhall descriptor model.
*/
static inline unsigned
pan_res_handle_get_table(unsigned handle)
{
unsigned table = handle >> 24;
assert(table < 64);
return table;
}
/*
* Helper returning the index from a given handle of Valhall descriptor model.
*/
static inline unsigned
pan_res_handle_get_index(unsigned handle)
{
return handle & BITFIELD_MASK(24);
}
/*
* Helper creating an handle for Valhall descriptor model.
*/
static inline unsigned
pan_res_handle(unsigned table, unsigned index)
{
assert(table < 64);
assert(index < (1u << 24));
return (table << 24) | index;
}
#endif