diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index ab0765547ad..350fd1f397c 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1180,6 +1180,7 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) ASSERTED nir_alu_type T = nir_intrinsic_src_type(instr); ASSERTED unsigned T_size = nir_alu_type_get_type_size(T); + nir_io_semantics sem = nir_intrinsic_io_semantics(instr); assert(T_size == 32 || T_size == 16); /* 16-bit varyings are always written and loaded as F16, regardless of * whether they are float or int */ @@ -1224,11 +1225,6 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) data = tmp; } - bool psiz = - (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ); - bool layer = - (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_LAYER); - bi_index a[4] = {bi_null()}; if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) { @@ -1247,20 +1243,21 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) unsigned pos_attr_offset = 0; unsigned src_bit_sz = nir_src_bit_size(instr->src[0]); - if (psiz || layer) + enum va_shader_output output_type = va_shader_output_from_semantics(&sem); + if (output_type == VA_SHADER_OUTPUT_ATTRIB) index_offset += 4; - if (layer) { + if (sem.location == VARYING_SLOT_LAYER) { assert(nr == 1 && src_bit_sz == 32); src_bit_sz = 8; pos_attr_offset = 2; data = bi_byte(data, 0); } - if (psiz) + if (sem.location == VARYING_SLOT_PSIZ) assert(T_size == 16 && "should've been lowered"); - bool varying = (b->shader->idvs == BI_IDVS_VARYING); + bool varying = (output_type == VA_SHADER_OUTPUT_VARY); if (instr->intrinsic == nir_intrinsic_store_per_view_output) { unsigned view_index = nir_src_as_uint(instr->src[1]); @@ -1287,6 +1284,13 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) bi_instr *I = bi_lea_buf_imm_to(b, address, index); I->table = va_res_fold_table_idx(61); I->index = 0; + + /* On Avalon, the hardware-controlled buffer is at index 1 for varyings */ + if (pan_arch(b->shader->inputs->gpu_id) >= 12 && + output_type == VA_SHADER_OUTPUT_VARY) { + I->index = 1; + } + bi_emit_split_i32(b, a, address, 2); bi_store(b, nr * src_bit_sz, data, a[0], a[1], @@ -2233,6 +2237,11 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_mov_i32_to(b, dst, bi_src_index(&instr->src[0])); break; + case nir_intrinsic_load_shader_output_pan: + assert(b->shader->arch >= 12 && "load_shader_output_pan should have been lowered!"); + bi_mov_i32_to(b, dst, bi_fau(BIR_FAU_SHADER_OUTPUT, false)); + break; + default: fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name); @@ -5824,7 +5833,7 @@ bi_compile_variant_nir(nir_shader *nir, ctx->idvs = idvs; ctx->malloc_idvs = (ctx->arch >= 9) && !inputs->no_idvs; - if (idvs != BI_IDVS_NONE) { + if (idvs == BI_IDVS_POSITION || idvs == BI_IDVS_VARYING) { /* Specializing shaders for IDVS is destructive, so we need to * clone. However, the last (second) IDVS shader does not need * to be preserved so we can skip cloning that one. @@ -6128,6 +6137,16 @@ bi_compile_variant(nir_shader *nir, } else { info->preload = preload; info->work_reg_count = ctx->info.work_reg_count; + + if (idvs == BI_IDVS_ALL) { + /* Varying shader is only enabled if we can have any kind of varying + * written (that mean not position, layer or point size) */ + info->vs.secondary_enable = + (nir->info.outputs_written & + ~(BITFIELD64_BIT(VARYING_SLOT_POS) | + BITFIELD64_BIT(VARYING_SLOT_LAYER) | + BITFIELD64_BIT(VARYING_SLOT_PSIZ))) != 0; + } } if (idvs == BI_IDVS_POSITION && !nir->info.internal && @@ -6205,7 +6224,10 @@ bifrost_compile_shader_nir(nir_shader *nir, pan_nir_collect_varyings(nir, info, PAN_MEDIUMP_VARY_32BIT); - if (info->vs.idvs) { + /* On Avalon, IDVS is only in one binary */ + if (info->vs.idvs && pan_arch(inputs->gpu_id) >= 12) { + bi_compile_variant(nir, inputs, binary, info, BI_IDVS_ALL); + } else if (info->vs.idvs) { bi_compile_variant(nir, inputs, binary, info, BI_IDVS_POSITION); bi_compile_variant(nir, inputs, binary, info, BI_IDVS_VARYING); } else { diff --git a/src/panfrost/compiler/compiler.h b/src/panfrost/compiler/compiler.h index f27c7632ccf..e67efaeb931 100644 --- a/src/panfrost/compiler/compiler.h +++ b/src/panfrost/compiler/compiler.h @@ -851,6 +851,9 @@ enum bi_idvs_mode { /* IDVS in use. Compiling a varying shader */ BI_IDVS_VARYING = 2, + + /* IDVS2 in use. Compiling a deferred shader (v12+) */ + BI_IDVS_ALL = 3, }; typedef struct { @@ -930,6 +933,9 @@ enum bir_fau { BIR_FAU_WLS_PTR = 17, BIR_FAU_PROGRAM_COUNTER = 18, + /* Avalon only */ + BIR_FAU_SHADER_OUTPUT = (1 << 9), + BIR_FAU_UNIFORM = (1 << 7), /* Look up table on Valhall */ BIR_FAU_IMMEDIATE = (1 << 8), diff --git a/src/panfrost/compiler/valhall/ISA.xml b/src/panfrost/compiler/valhall/ISA.xml index 7469d474344..0047ca5f7dd 100644 --- a/src/panfrost/compiler/valhall/ISA.xml +++ b/src/panfrost/compiler/valhall/ISA.xml @@ -165,7 +165,7 @@ lane_id core_id - + shader_output diff --git a/src/panfrost/compiler/valhall/va_compiler.h b/src/panfrost/compiler/valhall/va_compiler.h index f78584ea105..22f58955e28 100644 --- a/src/panfrost/compiler/valhall/va_compiler.h +++ b/src/panfrost/compiler/valhall/va_compiler.h @@ -67,6 +67,7 @@ va_fau_page(enum bir_fau value) return 1; case BIR_FAU_LANE_ID: case BIR_FAU_CORE_ID: + case BIR_FAU_SHADER_OUTPUT: case BIR_FAU_PROGRAM_COUNTER: return 3; default: diff --git a/src/panfrost/compiler/valhall/va_pack.c b/src/panfrost/compiler/valhall/va_pack.c index 3bc1ac25a73..e241f99a375 100644 --- a/src/panfrost/compiler/valhall/va_pack.c +++ b/src/panfrost/compiler/valhall/va_pack.c @@ -104,6 +104,8 @@ va_pack_fau_special(const bi_instr *I, enum bir_fau fau) return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER; case BIR_FAU_LANE_ID: return VA_FAU_SPECIAL_PAGE_3_LANE_ID; + case BIR_FAU_SHADER_OUTPUT: + return VA_FAU_SPECIAL_PAGE_3_SHADER_OUTPUT; case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER; case BIR_FAU_SAMPLE_POS_ARRAY: