diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 1924ab1fddc..99b82202a54 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -590,6 +590,12 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input; bi_index src0 = bi_null(); + /* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver + * through a compiler input value, falling back to LD_VAR[_IMM] + + * Attribute Descriptors otherwise. */ + bool use_ld_var_buf = + b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf; + unsigned component = nir_intrinsic_component(instr); enum bi_vecsize vecsize = (instr->num_components + component - 1); bi_index dest = @@ -597,18 +603,24 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) nir_io_semantics sem = nir_intrinsic_io_semantics(instr); + const nir_alu_type type = nir_intrinsic_dest_type(instr); + const nir_alu_type base_type = nir_alu_type_get_base_type(type); + const nir_alu_type sz = nir_alu_type_get_type_size(type); + assert(sz == instr->def.bit_size); + assert(sz == 16 || sz == 32); + assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float); + const struct pan_varying_slot *slot = NULL; - if (b->shader->varying_layout) { + unsigned src_sz = sz; + if (use_ld_var_buf) { + pan_varying_layout_require_layout(b->shader->varying_layout); slot = pan_varying_layout_find_slot(b->shader->varying_layout, sem.location); + assert(slot); + src_sz = nir_alu_type_get_type_size(slot->alu_type); + assert(src_sz == 16 || src_sz == 32); } - unsigned sz = instr->def.bit_size; - assert(sz == 16 || sz == 32); - /* mediump varyings are always written as 32-bits in the VS, but may be read - * to 16 in the FS. */ - unsigned src_sz = sem.medium_precision ? 32 : sz; - if (smooth) { nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); assert(parent); @@ -616,17 +628,26 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) sample = bi_interp_for_intrinsic(parent->intrinsic); src0 = bi_varying_src0_for_barycentric(b, parent); + /* Smooth ints don't exist */ + assert(base_type == nir_type_float); regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32; source_format = (src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32; } else { - /* u16 regfmt is not supported by LD_VAR_BUF, but using f16 for integers - * is okay because we use a f16 attribute descriptor for all 16-bit - * varyings regardless of whether they are floats or ints. The - * conversion is a no-op. */ - regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO; - source_format = (src_sz == 16) ? - BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32; + if (use_ld_var_buf) { + /* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers + * is okay if the source_format is flat and uses the same bit size. + * The conversion is a no-op. */ + regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32; + source_format = (src_sz == 16) ? + BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32; + /* conversion MUST be a noop for int varyings to work correctly */ + assert(base_type == nir_type_float || src_sz == sz); + } else { + /* Flat loading with i16/u16 is not encodable */ + assert(base_type == nir_type_float || sz == 32); + regfmt = bi_reg_fmt_for_nir(type); + } /* Valhall can't have bi_null() here, although the source is * logically unused for flat varyings @@ -643,13 +664,8 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index); unsigned base = nir_intrinsic_base(instr); - /* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver - * through a compiler input value, falling back to LD_VAR[_IMM] + - * Attribute Descriptors otherwise. */ - bool use_ld_var_buf = - b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf; - if (use_ld_var_buf) { + assert(slot); if (immediate) { assert(nir_src_is_const(*offset_src) && "assumes immediate offset"); unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16); @@ -1151,11 +1167,20 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) bi_store(b, nr * src_bit_sz, data, a[0], a[1], seg, offset); } else { - /* 16-bit varyings are always written and loaded as F16, regardless of - * whether they are float or int */ assert(T_size == 32 || T_size == 16); - enum bi_register_format regfmt = - T_size == 16 ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO; + + enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); + + /* Since v9 we cannot have separate attribute descriptors for VS-FS, + * There might be a mismatch on Gallium where the VS thinks it is storing + * an int, but the data is actually a float, and that's what FS expects. + * So, just for v9 onwards, just until we haven't fixed gallium, use auto32. + * We are still getting around the midgard quirk since we do this only + * from v9. + * TODO: fix all bugs with gallium and remove this patch + */ + if (b->shader->arch >= 9 && T_size == 32) + regfmt = BI_REGISTER_FORMAT_AUTO; if (immediate) { bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b), diff --git a/src/panfrost/compiler/midgard/midgard_compile.c b/src/panfrost/compiler/midgard/midgard_compile.c index 8160b194ab1..ae4cd952c3c 100644 --- a/src/panfrost/compiler/midgard/midgard_compile.c +++ b/src/panfrost/compiler/midgard/midgard_compile.c @@ -1322,39 +1322,26 @@ emit_varying_read(compiler_context *ctx, unsigned dest, unsigned offset, ins.load_store.arg_reg = REGISTER_LDST_ZERO; ins.load_store.index_format = midgard_index_address_u32; - /* For flat shading, for GPUs supporting auto32, we always use .u32 and - * require 32-bit mode. For smooth shading, we use the appropriate - * floating-point type. - * - * This could be optimized, but it makes it easy to check correctness. - */ - if (ctx->quirks & MIDGARD_NO_AUTO32) { - switch (type) { - case nir_type_uint32: - case nir_type_bool32: - ins.op = midgard_op_ld_vary_32u; - break; - case nir_type_int32: - ins.op = midgard_op_ld_vary_32i; - break; - case nir_type_float32: - ins.op = midgard_op_ld_vary_32; - break; - case nir_type_float16: - ins.op = midgard_op_ld_vary_16; - break; - default: - UNREACHABLE("Attempted to load unknown type"); - break; - } - } else if (flat) { - assert(nir_alu_type_get_type_size(type) == 32); - ins.op = midgard_op_ld_vary_32u; - } else { + if (!flat) { assert(nir_alu_type_get_base_type(type) == nir_type_float); - - ins.op = (nir_alu_type_get_type_size(type) == 32) ? midgard_op_ld_vary_32 - : midgard_op_ld_vary_16; + } + switch (type) { + case nir_type_uint32: + case nir_type_bool32: + ins.op = midgard_op_ld_vary_32u; + break; + case nir_type_int32: + ins.op = midgard_op_ld_vary_32i; + break; + case nir_type_float32: + ins.op = midgard_op_ld_vary_32; + break; + case nir_type_float16: + ins.op = midgard_op_ld_vary_16; + break; + default: + UNREACHABLE("Attempted to load unknown type"); + break; } emit_mir_instruction(ctx, &ins); @@ -1868,18 +1855,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) unsigned dst_component = nir_intrinsic_component(instr); unsigned nr_comp = nir_src_num_components(instr->src[0]); - - /* ABI: Format controlled by the attribute descriptor. - * This simplifies flat shading, although it prevents - * certain (unimplemented) 16-bit optimizations. - * - * In particular, it lets the driver handle internal - * TGSI shaders that set flat in the VS but smooth in - * the FS. This matches our handling on Bifrost. - */ - bool auto32 = true; - assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) == - 32); + bool auto32 = false; /* ABI: varyings in the secondary attribute table */ bool secondary_table = true; @@ -1910,6 +1886,26 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) src_component++; } + nir_alu_type type = nir_intrinsic_src_type(instr); + switch (type) { + case nir_type_uint32: + case nir_type_bool32: + st.op = midgard_op_st_vary_32u; + break; + case nir_type_int32: + st.op = midgard_op_st_vary_32i; + break; + case nir_type_float32: + st.op = midgard_op_st_vary_32; + break; + case nir_type_float16: + st.op = midgard_op_st_vary_16; + break; + default: + UNREACHABLE("Attempted to store unknown type"); + break; + } + emit_mir_instruction(ctx, &st); } else { UNREACHABLE("Unknown store");