pan/compiler: Mostly remove auto32 varting store

Using explicit types makes the code more easier to reason about, there
is only one edge-case where we still need varying stores but it should
be removed soon.

Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38681>
This commit is contained in:
Lorenzo Rossi 2026-02-20 17:48:20 +01:00 committed by Marge Bot
parent 9f8beb3bd5
commit 3b8a87cbe7
2 changed files with 89 additions and 68 deletions

View file

@ -590,6 +590,12 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
bi_index src0 = bi_null();
/* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
* through a compiler input value, falling back to LD_VAR[_IMM] +
* Attribute Descriptors otherwise. */
bool use_ld_var_buf =
b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
unsigned component = nir_intrinsic_component(instr);
enum bi_vecsize vecsize = (instr->num_components + component - 1);
bi_index dest =
@ -597,18 +603,24 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
const nir_alu_type type = nir_intrinsic_dest_type(instr);
const nir_alu_type base_type = nir_alu_type_get_base_type(type);
const nir_alu_type sz = nir_alu_type_get_type_size(type);
assert(sz == instr->def.bit_size);
assert(sz == 16 || sz == 32);
assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float);
const struct pan_varying_slot *slot = NULL;
if (b->shader->varying_layout) {
unsigned src_sz = sz;
if (use_ld_var_buf) {
pan_varying_layout_require_layout(b->shader->varying_layout);
slot = pan_varying_layout_find_slot(b->shader->varying_layout,
sem.location);
assert(slot);
src_sz = nir_alu_type_get_type_size(slot->alu_type);
assert(src_sz == 16 || src_sz == 32);
}
unsigned sz = instr->def.bit_size;
assert(sz == 16 || sz == 32);
/* mediump varyings are always written as 32-bits in the VS, but may be read
* to 16 in the FS. */
unsigned src_sz = sem.medium_precision ? 32 : sz;
if (smooth) {
nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
assert(parent);
@ -616,17 +628,26 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
sample = bi_interp_for_intrinsic(parent->intrinsic);
src0 = bi_varying_src0_for_barycentric(b, parent);
/* Smooth ints don't exist */
assert(base_type == nir_type_float);
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
source_format =
(src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32;
} else {
/* u16 regfmt is not supported by LD_VAR_BUF, but using f16 for integers
* is okay because we use a f16 attribute descriptor for all 16-bit
* varyings regardless of whether they are floats or ints. The
* conversion is a no-op. */
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO;
source_format = (src_sz == 16) ?
BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
if (use_ld_var_buf) {
/* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers
* is okay if the source_format is flat and uses the same bit size.
* The conversion is a no-op. */
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
source_format = (src_sz == 16) ?
BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
/* conversion MUST be a noop for int varyings to work correctly */
assert(base_type == nir_type_float || src_sz == sz);
} else {
/* Flat loading with i16/u16 is not encodable */
assert(base_type == nir_type_float || sz == 32);
regfmt = bi_reg_fmt_for_nir(type);
}
/* Valhall can't have bi_null() here, although the source is
* logically unused for flat varyings
@ -643,13 +664,8 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
unsigned base = nir_intrinsic_base(instr);
/* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
* through a compiler input value, falling back to LD_VAR[_IMM] +
* Attribute Descriptors otherwise. */
bool use_ld_var_buf =
b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
if (use_ld_var_buf) {
assert(slot);
if (immediate) {
assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16);
@ -1151,11 +1167,20 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
bi_store(b, nr * src_bit_sz, data, a[0], a[1], seg, offset);
} else {
/* 16-bit varyings are always written and loaded as F16, regardless of
* whether they are float or int */
assert(T_size == 32 || T_size == 16);
enum bi_register_format regfmt =
T_size == 16 ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO;
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
/* Since v9 we cannot have separate attribute descriptors for VS-FS,
* There might be a mismatch on Gallium where the VS thinks it is storing
* an int, but the data is actually a float, and that's what FS expects.
* So, just for v9 onwards, just until we haven't fixed gallium, use auto32.
* We are still getting around the midgard quirk since we do this only
* from v9.
* TODO: fix all bugs with gallium and remove this patch
*/
if (b->shader->arch >= 9 && T_size == 32)
regfmt = BI_REGISTER_FORMAT_AUTO;
if (immediate) {
bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b),

View file

@ -1322,39 +1322,26 @@ emit_varying_read(compiler_context *ctx, unsigned dest, unsigned offset,
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
ins.load_store.index_format = midgard_index_address_u32;
/* For flat shading, for GPUs supporting auto32, we always use .u32 and
* require 32-bit mode. For smooth shading, we use the appropriate
* floating-point type.
*
* This could be optimized, but it makes it easy to check correctness.
*/
if (ctx->quirks & MIDGARD_NO_AUTO32) {
switch (type) {
case nir_type_uint32:
case nir_type_bool32:
ins.op = midgard_op_ld_vary_32u;
break;
case nir_type_int32:
ins.op = midgard_op_ld_vary_32i;
break;
case nir_type_float32:
ins.op = midgard_op_ld_vary_32;
break;
case nir_type_float16:
ins.op = midgard_op_ld_vary_16;
break;
default:
UNREACHABLE("Attempted to load unknown type");
break;
}
} else if (flat) {
assert(nir_alu_type_get_type_size(type) == 32);
ins.op = midgard_op_ld_vary_32u;
} else {
if (!flat) {
assert(nir_alu_type_get_base_type(type) == nir_type_float);
ins.op = (nir_alu_type_get_type_size(type) == 32) ? midgard_op_ld_vary_32
: midgard_op_ld_vary_16;
}
switch (type) {
case nir_type_uint32:
case nir_type_bool32:
ins.op = midgard_op_ld_vary_32u;
break;
case nir_type_int32:
ins.op = midgard_op_ld_vary_32i;
break;
case nir_type_float32:
ins.op = midgard_op_ld_vary_32;
break;
case nir_type_float16:
ins.op = midgard_op_ld_vary_16;
break;
default:
UNREACHABLE("Attempted to load unknown type");
break;
}
emit_mir_instruction(ctx, &ins);
@ -1868,18 +1855,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
unsigned dst_component = nir_intrinsic_component(instr);
unsigned nr_comp = nir_src_num_components(instr->src[0]);
/* ABI: Format controlled by the attribute descriptor.
* This simplifies flat shading, although it prevents
* certain (unimplemented) 16-bit optimizations.
*
* In particular, it lets the driver handle internal
* TGSI shaders that set flat in the VS but smooth in
* the FS. This matches our handling on Bifrost.
*/
bool auto32 = true;
assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) ==
32);
bool auto32 = false;
/* ABI: varyings in the secondary attribute table */
bool secondary_table = true;
@ -1910,6 +1886,26 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
src_component++;
}
nir_alu_type type = nir_intrinsic_src_type(instr);
switch (type) {
case nir_type_uint32:
case nir_type_bool32:
st.op = midgard_op_st_vary_32u;
break;
case nir_type_int32:
st.op = midgard_op_st_vary_32i;
break;
case nir_type_float32:
st.op = midgard_op_st_vary_32;
break;
case nir_type_float16:
st.op = midgard_op_st_vary_16;
break;
default:
UNREACHABLE("Attempted to store unknown type");
break;
}
emit_mir_instruction(ctx, &st);
} else {
UNREACHABLE("Unknown store");