mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-14 04:58:20 +02:00
pan/compiler: Mostly remove auto32 varting store
Using explicit types makes the code more easier to reason about, there is only one edge-case where we still need varying stores but it should be removed soon. Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38681>
This commit is contained in:
parent
9f8beb3bd5
commit
3b8a87cbe7
2 changed files with 89 additions and 68 deletions
|
|
@ -590,6 +590,12 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
|
||||
bi_index src0 = bi_null();
|
||||
|
||||
/* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
|
||||
* through a compiler input value, falling back to LD_VAR[_IMM] +
|
||||
* Attribute Descriptors otherwise. */
|
||||
bool use_ld_var_buf =
|
||||
b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
|
||||
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
enum bi_vecsize vecsize = (instr->num_components + component - 1);
|
||||
bi_index dest =
|
||||
|
|
@ -597,18 +603,24 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
|
||||
const nir_alu_type type = nir_intrinsic_dest_type(instr);
|
||||
const nir_alu_type base_type = nir_alu_type_get_base_type(type);
|
||||
const nir_alu_type sz = nir_alu_type_get_type_size(type);
|
||||
assert(sz == instr->def.bit_size);
|
||||
assert(sz == 16 || sz == 32);
|
||||
assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float);
|
||||
|
||||
const struct pan_varying_slot *slot = NULL;
|
||||
if (b->shader->varying_layout) {
|
||||
unsigned src_sz = sz;
|
||||
if (use_ld_var_buf) {
|
||||
pan_varying_layout_require_layout(b->shader->varying_layout);
|
||||
slot = pan_varying_layout_find_slot(b->shader->varying_layout,
|
||||
sem.location);
|
||||
assert(slot);
|
||||
src_sz = nir_alu_type_get_type_size(slot->alu_type);
|
||||
assert(src_sz == 16 || src_sz == 32);
|
||||
}
|
||||
|
||||
unsigned sz = instr->def.bit_size;
|
||||
assert(sz == 16 || sz == 32);
|
||||
/* mediump varyings are always written as 32-bits in the VS, but may be read
|
||||
* to 16 in the FS. */
|
||||
unsigned src_sz = sem.medium_precision ? 32 : sz;
|
||||
|
||||
if (smooth) {
|
||||
nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
|
||||
assert(parent);
|
||||
|
|
@ -616,17 +628,26 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
sample = bi_interp_for_intrinsic(parent->intrinsic);
|
||||
src0 = bi_varying_src0_for_barycentric(b, parent);
|
||||
|
||||
/* Smooth ints don't exist */
|
||||
assert(base_type == nir_type_float);
|
||||
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
|
||||
source_format =
|
||||
(src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32;
|
||||
} else {
|
||||
/* u16 regfmt is not supported by LD_VAR_BUF, but using f16 for integers
|
||||
* is okay because we use a f16 attribute descriptor for all 16-bit
|
||||
* varyings regardless of whether they are floats or ints. The
|
||||
* conversion is a no-op. */
|
||||
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO;
|
||||
source_format = (src_sz == 16) ?
|
||||
BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
|
||||
if (use_ld_var_buf) {
|
||||
/* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers
|
||||
* is okay if the source_format is flat and uses the same bit size.
|
||||
* The conversion is a no-op. */
|
||||
regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
|
||||
source_format = (src_sz == 16) ?
|
||||
BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
|
||||
/* conversion MUST be a noop for int varyings to work correctly */
|
||||
assert(base_type == nir_type_float || src_sz == sz);
|
||||
} else {
|
||||
/* Flat loading with i16/u16 is not encodable */
|
||||
assert(base_type == nir_type_float || sz == 32);
|
||||
regfmt = bi_reg_fmt_for_nir(type);
|
||||
}
|
||||
|
||||
/* Valhall can't have bi_null() here, although the source is
|
||||
* logically unused for flat varyings
|
||||
|
|
@ -643,13 +664,8 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
|
||||
/* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
|
||||
* through a compiler input value, falling back to LD_VAR[_IMM] +
|
||||
* Attribute Descriptors otherwise. */
|
||||
bool use_ld_var_buf =
|
||||
b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
|
||||
|
||||
if (use_ld_var_buf) {
|
||||
assert(slot);
|
||||
if (immediate) {
|
||||
assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
|
||||
unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16);
|
||||
|
|
@ -1151,11 +1167,20 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
bi_store(b, nr * src_bit_sz, data, a[0], a[1], seg, offset);
|
||||
} else {
|
||||
/* 16-bit varyings are always written and loaded as F16, regardless of
|
||||
* whether they are float or int */
|
||||
assert(T_size == 32 || T_size == 16);
|
||||
enum bi_register_format regfmt =
|
||||
T_size == 16 ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO;
|
||||
|
||||
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
|
||||
|
||||
/* Since v9 we cannot have separate attribute descriptors for VS-FS,
|
||||
* There might be a mismatch on Gallium where the VS thinks it is storing
|
||||
* an int, but the data is actually a float, and that's what FS expects.
|
||||
* So, just for v9 onwards, just until we haven't fixed gallium, use auto32.
|
||||
* We are still getting around the midgard quirk since we do this only
|
||||
* from v9.
|
||||
* TODO: fix all bugs with gallium and remove this patch
|
||||
*/
|
||||
if (b->shader->arch >= 9 && T_size == 32)
|
||||
regfmt = BI_REGISTER_FORMAT_AUTO;
|
||||
|
||||
if (immediate) {
|
||||
bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b),
|
||||
|
|
|
|||
|
|
@ -1322,39 +1322,26 @@ emit_varying_read(compiler_context *ctx, unsigned dest, unsigned offset,
|
|||
ins.load_store.arg_reg = REGISTER_LDST_ZERO;
|
||||
ins.load_store.index_format = midgard_index_address_u32;
|
||||
|
||||
/* For flat shading, for GPUs supporting auto32, we always use .u32 and
|
||||
* require 32-bit mode. For smooth shading, we use the appropriate
|
||||
* floating-point type.
|
||||
*
|
||||
* This could be optimized, but it makes it easy to check correctness.
|
||||
*/
|
||||
if (ctx->quirks & MIDGARD_NO_AUTO32) {
|
||||
switch (type) {
|
||||
case nir_type_uint32:
|
||||
case nir_type_bool32:
|
||||
ins.op = midgard_op_ld_vary_32u;
|
||||
break;
|
||||
case nir_type_int32:
|
||||
ins.op = midgard_op_ld_vary_32i;
|
||||
break;
|
||||
case nir_type_float32:
|
||||
ins.op = midgard_op_ld_vary_32;
|
||||
break;
|
||||
case nir_type_float16:
|
||||
ins.op = midgard_op_ld_vary_16;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Attempted to load unknown type");
|
||||
break;
|
||||
}
|
||||
} else if (flat) {
|
||||
assert(nir_alu_type_get_type_size(type) == 32);
|
||||
ins.op = midgard_op_ld_vary_32u;
|
||||
} else {
|
||||
if (!flat) {
|
||||
assert(nir_alu_type_get_base_type(type) == nir_type_float);
|
||||
|
||||
ins.op = (nir_alu_type_get_type_size(type) == 32) ? midgard_op_ld_vary_32
|
||||
: midgard_op_ld_vary_16;
|
||||
}
|
||||
switch (type) {
|
||||
case nir_type_uint32:
|
||||
case nir_type_bool32:
|
||||
ins.op = midgard_op_ld_vary_32u;
|
||||
break;
|
||||
case nir_type_int32:
|
||||
ins.op = midgard_op_ld_vary_32i;
|
||||
break;
|
||||
case nir_type_float32:
|
||||
ins.op = midgard_op_ld_vary_32;
|
||||
break;
|
||||
case nir_type_float16:
|
||||
ins.op = midgard_op_ld_vary_16;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Attempted to load unknown type");
|
||||
break;
|
||||
}
|
||||
|
||||
emit_mir_instruction(ctx, &ins);
|
||||
|
|
@ -1868,18 +1855,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
|
||||
unsigned dst_component = nir_intrinsic_component(instr);
|
||||
unsigned nr_comp = nir_src_num_components(instr->src[0]);
|
||||
|
||||
/* ABI: Format controlled by the attribute descriptor.
|
||||
* This simplifies flat shading, although it prevents
|
||||
* certain (unimplemented) 16-bit optimizations.
|
||||
*
|
||||
* In particular, it lets the driver handle internal
|
||||
* TGSI shaders that set flat in the VS but smooth in
|
||||
* the FS. This matches our handling on Bifrost.
|
||||
*/
|
||||
bool auto32 = true;
|
||||
assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) ==
|
||||
32);
|
||||
bool auto32 = false;
|
||||
|
||||
/* ABI: varyings in the secondary attribute table */
|
||||
bool secondary_table = true;
|
||||
|
|
@ -1910,6 +1886,26 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
src_component++;
|
||||
}
|
||||
|
||||
nir_alu_type type = nir_intrinsic_src_type(instr);
|
||||
switch (type) {
|
||||
case nir_type_uint32:
|
||||
case nir_type_bool32:
|
||||
st.op = midgard_op_st_vary_32u;
|
||||
break;
|
||||
case nir_type_int32:
|
||||
st.op = midgard_op_st_vary_32i;
|
||||
break;
|
||||
case nir_type_float32:
|
||||
st.op = midgard_op_st_vary_32;
|
||||
break;
|
||||
case nir_type_float16:
|
||||
st.op = midgard_op_st_vary_16;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Attempted to store unknown type");
|
||||
break;
|
||||
}
|
||||
|
||||
emit_mir_instruction(ctx, &st);
|
||||
} else {
|
||||
UNREACHABLE("Unknown store");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue