mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
pan/bi: Lower FS input loads in NIR
Co-authored-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40391>
This commit is contained in:
parent
d2f430bea9
commit
8541dca8ed
4 changed files with 127 additions and 148 deletions
|
|
@ -580,151 +580,6 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
bi_copy_component(b, instr, dest);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_load_fs_input(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
enum bi_sample sample = BI_SAMPLE_CENTER;
|
||||
enum bi_update update = BI_UPDATE_STORE;
|
||||
enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
|
||||
enum bi_source_format source_format;
|
||||
bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
|
||||
bi_index src0 = bi_null();
|
||||
|
||||
/* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
|
||||
* through a compiler input value, falling back to LD_VAR[_IMM] +
|
||||
* Attribute Descriptors otherwise. */
|
||||
bool use_ld_var_buf =
|
||||
b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
|
||||
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
enum bi_vecsize vecsize = (instr->num_components + component - 1);
|
||||
bi_index dest =
|
||||
(component == 0) ? bi_def_index(&instr->def) : bi_temp(b->shader);
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
|
||||
const nir_alu_type type = nir_intrinsic_dest_type(instr);
|
||||
const nir_alu_type base_type = nir_alu_type_get_base_type(type);
|
||||
const nir_alu_type sz = nir_alu_type_get_type_size(type);
|
||||
assert(sz == instr->def.bit_size);
|
||||
assert(sz == 16 || sz == 32);
|
||||
assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float);
|
||||
|
||||
const struct pan_varying_slot *slot = NULL;
|
||||
unsigned src_sz = sz;
|
||||
if (use_ld_var_buf) {
|
||||
pan_varying_layout_require_layout(b->shader->varying_layout);
|
||||
slot = pan_varying_layout_find_slot(b->shader->varying_layout,
|
||||
sem.location);
|
||||
assert(slot);
|
||||
src_sz = nir_alu_type_get_type_size(slot->alu_type);
|
||||
assert(src_sz == 16 || src_sz == 32);
|
||||
}
|
||||
|
||||
if (smooth) {
|
||||
nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
|
||||
assert(parent);
|
||||
|
||||
sample = bi_interp_for_intrinsic(parent->intrinsic);
|
||||
src0 = bi_varying_src0_for_barycentric(b, parent);
|
||||
|
||||
/* Smooth ints don't exist */
|
||||
assert(base_type == nir_type_float);
|
||||
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
|
||||
source_format =
|
||||
(src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32;
|
||||
} else {
|
||||
if (use_ld_var_buf) {
|
||||
/* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers
|
||||
* is okay if the source_format is flat and uses the same bit size.
|
||||
* The conversion is a no-op. */
|
||||
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
|
||||
source_format = (src_sz == 16) ?
|
||||
BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
|
||||
/* conversion MUST be a noop for int varyings to work correctly */
|
||||
assert(base_type == nir_type_float || src_sz == sz);
|
||||
} else {
|
||||
/* Flat loading with i16/u16 is not encodable */
|
||||
assert(base_type == nir_type_float || sz == 32);
|
||||
regfmt = bi_reg_fmt_for_nir(type);
|
||||
}
|
||||
|
||||
/* Valhall can't have bi_null() here, although the source is
|
||||
* logically unused for flat varyings
|
||||
*/
|
||||
if (b->shader->arch >= 9)
|
||||
src0 = bi_preload(b, 61);
|
||||
|
||||
/* Gather info as we go */
|
||||
b->shader->info.bifrost->uses_flat_shading = true;
|
||||
}
|
||||
|
||||
nir_src *offset_src = nir_get_io_offset_src(instr);
|
||||
unsigned imm_index = 0;
|
||||
bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
|
||||
if (use_ld_var_buf) {
|
||||
assert(slot);
|
||||
if (immediate) {
|
||||
assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
|
||||
unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16);
|
||||
|
||||
/* Immediate index given in bytes. */
|
||||
bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
|
||||
update, vecsize, offset);
|
||||
} else {
|
||||
bi_index idx = bi_src_index(offset_src);
|
||||
/* Index needs to be in bytes, but NIR gives the index
|
||||
* in slots. For now assume 16 bytes per element.
|
||||
*/
|
||||
bi_index idx_bytes = bi_lshift_or_i32(b, idx, bi_zero(), bi_imm_u8(4));
|
||||
if (slot->offset != 0)
|
||||
idx_bytes = bi_iadd_u32(b, idx_bytes, bi_imm_u32(slot->offset),
|
||||
false);
|
||||
|
||||
bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample,
|
||||
source_format, update, vecsize);
|
||||
}
|
||||
} else {
|
||||
/* On Valhall, ensure the table and index are valid for usage with
|
||||
* immediate form when IDVS isn't used */
|
||||
if (b->shader->arch >= 9)
|
||||
immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) &&
|
||||
pan_res_handle_get_index(base) < 256;
|
||||
|
||||
if (immediate) {
|
||||
bi_instr *I;
|
||||
|
||||
if (smooth) {
|
||||
I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize,
|
||||
pan_res_handle_get_index(imm_index));
|
||||
} else {
|
||||
I =
|
||||
bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize,
|
||||
pan_res_handle_get_index(imm_index));
|
||||
}
|
||||
|
||||
/* Valhall usually uses LD_VAR_BUF. If this is disabled, use a simple
|
||||
* Midgard-style ABI. */
|
||||
if (b->shader->arch >= 9)
|
||||
I->table = va_res_fold_table_idx(pan_res_handle_get_table(base));
|
||||
} else {
|
||||
bi_index idx = bi_src_index(offset_src);
|
||||
|
||||
if (base != 0)
|
||||
idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
|
||||
|
||||
if (smooth)
|
||||
bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize);
|
||||
else
|
||||
bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize);
|
||||
}
|
||||
}
|
||||
|
||||
bi_copy_component(b, instr, dest);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_load_var(bi_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
|
|
@ -2140,9 +1995,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_interpolated_input:
|
||||
case nir_intrinsic_load_input:
|
||||
assert(!b->shader->inputs->is_blend);
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
bi_emit_load_fs_input(b, instr);
|
||||
else if (stage == MESA_SHADER_VERTEX)
|
||||
if (stage == MESA_SHADER_VERTEX)
|
||||
bi_emit_load_attr(b, instr);
|
||||
else
|
||||
UNREACHABLE("Unsupported shader stage");
|
||||
|
|
@ -7247,6 +7100,10 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
|||
inputs->trust_varying_flat_highp_types, false);
|
||||
info->varyings.noperspective =
|
||||
pan_nir_collect_noperspective_varyings_fs(nir);
|
||||
|
||||
if (!inputs->is_blend)
|
||||
NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id,
|
||||
inputs->varying_layout, inputs->valhall.use_ld_var_buf);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ libpanfrost_compiler_files = files(
|
|||
'pan_nir_lower_sample_position.c',
|
||||
'pan_nir_lower_store_component.c',
|
||||
'pan_nir_lower_texel_buffer_index.c',
|
||||
'pan_nir_lower_varyings_io.c',
|
||||
'pan_nir_lower_vertex_id.c',
|
||||
'pan_nir_lower_xfb.c',
|
||||
'pan_nir_resize_varying_io.c',
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ bool pan_nir_lower_frag_coord_zw(nir_shader *shader);
|
|||
bool pan_nir_lower_noperspective_vs(nir_shader *shader);
|
||||
bool pan_nir_lower_noperspective_fs(nir_shader *shader);
|
||||
|
||||
bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
|
||||
const struct pan_varying_layout *varying_layout,
|
||||
bool valhall_use_ld_var_buf);
|
||||
|
||||
bool pan_nir_lower_helper_invocation(nir_shader *shader);
|
||||
bool pan_nir_lower_sample_pos(nir_shader *shader);
|
||||
bool pan_nir_lower_xfb(nir_shader *nir);
|
||||
|
|
|
|||
117
src/panfrost/compiler/pan_nir_lower_varyings_io.c
Normal file
117
src/panfrost/compiler/pan_nir_lower_varyings_io.c
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright (C) 2025 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "pan_nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "panfrost/model/pan_model.h"
|
||||
|
||||
struct lower_fs_inputs_ctx {
|
||||
unsigned arch;
|
||||
const struct pan_varying_layout *varying_layout;
|
||||
bool valhall_use_ld_var_buf;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_fs_input_load(struct nir_builder *b,
|
||||
nir_intrinsic_instr *load, void *cb_data)
|
||||
{
|
||||
const struct lower_fs_inputs_ctx *ctx = cb_data;
|
||||
|
||||
if (load->intrinsic != nir_intrinsic_load_input &&
|
||||
load->intrinsic != nir_intrinsic_load_interpolated_input)
|
||||
return false;
|
||||
|
||||
const nir_io_semantics sem = nir_intrinsic_io_semantics(load);
|
||||
const nir_alu_type dest_type = nir_intrinsic_dest_type(load);
|
||||
|
||||
/* Indirect array varyings are not yet supported (num_slots > 1) */
|
||||
assert(sem.num_slots == 1);
|
||||
assert(nir_src_as_uint(*nir_get_io_offset_src(load)) == 0);
|
||||
|
||||
nir_intrinsic_instr *bary;
|
||||
switch (load->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
bary = NULL;
|
||||
break;
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
/* Cannot interpolate ints */
|
||||
assert(nir_alu_type_get_base_type(dest_type) == nir_type_float);
|
||||
bary = nir_src_as_intrinsic(load->src[0]);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Already handled");
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
const unsigned component = nir_intrinsic_component(load);
|
||||
const unsigned load_comps = load->num_components + component;
|
||||
|
||||
nir_def *res;
|
||||
if (ctx->valhall_use_ld_var_buf) {
|
||||
assert(ctx->arch >= 9);
|
||||
|
||||
pan_varying_layout_require_layout(ctx->varying_layout);
|
||||
const struct pan_varying_slot *slot =
|
||||
pan_varying_layout_find_slot(ctx->varying_layout,
|
||||
sem.location);
|
||||
assert(slot);
|
||||
const nir_alu_type src_type = slot->alu_type;
|
||||
nir_def *offset_B = nir_imm_int(b, slot->offset);
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
res = nir_load_var_buf_pan(b, load_comps, load->def.bit_size,
|
||||
offset_B, &bary->def,
|
||||
.src_type = src_type,
|
||||
.io_semantics = sem);
|
||||
} else {
|
||||
res = nir_load_var_buf_flat_pan(b, load_comps, load->def.bit_size,
|
||||
offset_B,
|
||||
.src_type = src_type,
|
||||
.io_semantics = sem);
|
||||
}
|
||||
} else {
|
||||
const uint32_t base = nir_intrinsic_base(load);
|
||||
nir_def *idx = nir_imm_int(b, base);
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
res = nir_load_var_pan(b, load_comps, load->def.bit_size,
|
||||
idx, &bary->def,
|
||||
.dest_type = dest_type,
|
||||
.io_semantics = sem);
|
||||
} else {
|
||||
res = nir_load_var_flat_pan(b, load_comps, load->def.bit_size, idx,
|
||||
.dest_type = dest_type,
|
||||
.io_semantics = sem);
|
||||
}
|
||||
}
|
||||
|
||||
if (component > 0) {
|
||||
unsigned swiz[NIR_MAX_VEC_COMPONENTS] = {0, };
|
||||
for (unsigned c = 0; c < load->num_components; c++)
|
||||
swiz[c] = component + c;
|
||||
|
||||
res = nir_swizzle(b, res, swiz, load->num_components);
|
||||
}
|
||||
|
||||
nir_def_replace(&load->def, res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
|
||||
const struct pan_varying_layout *varying_layout,
|
||||
bool valhall_use_ld_var_buf)
|
||||
{
|
||||
const struct lower_fs_inputs_ctx ctx = {
|
||||
.arch = pan_arch(gpu_id),
|
||||
.varying_layout = varying_layout,
|
||||
.valhall_use_ld_var_buf = valhall_use_ld_var_buf,
|
||||
};
|
||||
return nir_shader_intrinsics_pass(shader, lower_fs_input_load,
|
||||
nir_metadata_control_flow,
|
||||
(void *)&ctx);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue