diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index ab957d2bd76..c890030da51 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -580,151 +580,6 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) bi_copy_component(b, instr, dest); } -static void -bi_emit_load_fs_input(bi_builder *b, nir_intrinsic_instr *instr) -{ - enum bi_sample sample = BI_SAMPLE_CENTER; - enum bi_update update = BI_UPDATE_STORE; - enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO; - enum bi_source_format source_format; - bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input; - bi_index src0 = bi_null(); - - /* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver - * through a compiler input value, falling back to LD_VAR[_IMM] + - * Attribute Descriptors otherwise. */ - bool use_ld_var_buf = - b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf; - - unsigned component = nir_intrinsic_component(instr); - enum bi_vecsize vecsize = (instr->num_components + component - 1); - bi_index dest = - (component == 0) ? bi_def_index(&instr->def) : bi_temp(b->shader); - - nir_io_semantics sem = nir_intrinsic_io_semantics(instr); - - const nir_alu_type type = nir_intrinsic_dest_type(instr); - const nir_alu_type base_type = nir_alu_type_get_base_type(type); - const nir_alu_type sz = nir_alu_type_get_type_size(type); - assert(sz == instr->def.bit_size); - assert(sz == 16 || sz == 32); - assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float); - - const struct pan_varying_slot *slot = NULL; - unsigned src_sz = sz; - if (use_ld_var_buf) { - pan_varying_layout_require_layout(b->shader->varying_layout); - slot = pan_varying_layout_find_slot(b->shader->varying_layout, - sem.location); - assert(slot); - src_sz = nir_alu_type_get_type_size(slot->alu_type); - assert(src_sz == 16 || src_sz == 32); - } - - if (smooth) { - nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); - assert(parent); - - sample = bi_interp_for_intrinsic(parent->intrinsic); - src0 = bi_varying_src0_for_barycentric(b, parent); - - /* Smooth ints don't exist */ - assert(base_type == nir_type_float); - regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32; - source_format = - (src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32; - } else { - if (use_ld_var_buf) { - /* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers - * is okay if the source_format is flat and uses the same bit size. - * The conversion is a no-op. */ - regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32; - source_format = (src_sz == 16) ? - BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32; - /* conversion MUST be a noop for int varyings to work correctly */ - assert(base_type == nir_type_float || src_sz == sz); - } else { - /* Flat loading with i16/u16 is not encodable */ - assert(base_type == nir_type_float || sz == 32); - regfmt = bi_reg_fmt_for_nir(type); - } - - /* Valhall can't have bi_null() here, although the source is - * logically unused for flat varyings - */ - if (b->shader->arch >= 9) - src0 = bi_preload(b, 61); - - /* Gather info as we go */ - b->shader->info.bifrost->uses_flat_shading = true; - } - - nir_src *offset_src = nir_get_io_offset_src(instr); - unsigned imm_index = 0; - bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index); - unsigned base = nir_intrinsic_base(instr); - - if (use_ld_var_buf) { - assert(slot); - if (immediate) { - assert(nir_src_is_const(*offset_src) && "assumes immediate offset"); - unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16); - - /* Immediate index given in bytes. */ - bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format, - update, vecsize, offset); - } else { - bi_index idx = bi_src_index(offset_src); - /* Index needs to be in bytes, but NIR gives the index - * in slots. For now assume 16 bytes per element. - */ - bi_index idx_bytes = bi_lshift_or_i32(b, idx, bi_zero(), bi_imm_u8(4)); - if (slot->offset != 0) - idx_bytes = bi_iadd_u32(b, idx_bytes, bi_imm_u32(slot->offset), - false); - - bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample, - source_format, update, vecsize); - } - } else { - /* On Valhall, ensure the table and index are valid for usage with - * immediate form when IDVS isn't used */ - if (b->shader->arch >= 9) - immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) && - pan_res_handle_get_index(base) < 256; - - if (immediate) { - bi_instr *I; - - if (smooth) { - I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize, - pan_res_handle_get_index(imm_index)); - } else { - I = - bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize, - pan_res_handle_get_index(imm_index)); - } - - /* Valhall usually uses LD_VAR_BUF. If this is disabled, use a simple - * Midgard-style ABI. */ - if (b->shader->arch >= 9) - I->table = va_res_fold_table_idx(pan_res_handle_get_table(base)); - } else { - bi_index idx = bi_src_index(offset_src); - - if (base != 0) - idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); - - if (smooth) - bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize); - else - bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize); - } - } - - bi_copy_component(b, instr, dest); -} - static void bi_emit_load_var(bi_builder *b, nir_intrinsic_instr *intr) { @@ -2140,9 +1995,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) case nir_intrinsic_load_interpolated_input: case nir_intrinsic_load_input: assert(!b->shader->inputs->is_blend); - if (stage == MESA_SHADER_FRAGMENT) - bi_emit_load_fs_input(b, instr); - else if (stage == MESA_SHADER_VERTEX) + if (stage == MESA_SHADER_VERTEX) bi_emit_load_attr(b, instr); else UNREACHABLE("Unsupported shader stage"); @@ -7247,6 +7100,10 @@ bifrost_compile_shader_nir(nir_shader *nir, inputs->trust_varying_flat_highp_types, false); info->varyings.noperspective = pan_nir_collect_noperspective_varyings_fs(nir); + + if (!inputs->is_blend) + NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id, + inputs->varying_layout, inputs->valhall.use_ld_var_buf); } if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) { diff --git a/src/panfrost/compiler/meson.build b/src/panfrost/compiler/meson.build index 3966b69710f..e34a64cac91 100644 --- a/src/panfrost/compiler/meson.build +++ b/src/panfrost/compiler/meson.build @@ -16,6 +16,7 @@ libpanfrost_compiler_files = files( 'pan_nir_lower_sample_position.c', 'pan_nir_lower_store_component.c', 'pan_nir_lower_texel_buffer_index.c', + 'pan_nir_lower_varyings_io.c', 'pan_nir_lower_vertex_id.c', 'pan_nir_lower_xfb.c', 'pan_nir_resize_varying_io.c', diff --git a/src/panfrost/compiler/pan_nir.h b/src/panfrost/compiler/pan_nir.h index bbc2d1a09bf..97ca12711e4 100644 --- a/src/panfrost/compiler/pan_nir.h +++ b/src/panfrost/compiler/pan_nir.h @@ -57,6 +57,10 @@ bool pan_nir_lower_frag_coord_zw(nir_shader *shader); bool pan_nir_lower_noperspective_vs(nir_shader *shader); bool pan_nir_lower_noperspective_fs(nir_shader *shader); +bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id, + const struct pan_varying_layout *varying_layout, + bool valhall_use_ld_var_buf); + bool pan_nir_lower_helper_invocation(nir_shader *shader); bool pan_nir_lower_sample_pos(nir_shader *shader); bool pan_nir_lower_xfb(nir_shader *nir); diff --git a/src/panfrost/compiler/pan_nir_lower_varyings_io.c b/src/panfrost/compiler/pan_nir_lower_varyings_io.c new file mode 100644 index 00000000000..b90cf8d7935 --- /dev/null +++ b/src/panfrost/compiler/pan_nir_lower_varyings_io.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2025 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "pan_nir.h" +#include "nir_builder.h" + +#include "panfrost/model/pan_model.h" + +struct lower_fs_inputs_ctx { + unsigned arch; + const struct pan_varying_layout *varying_layout; + bool valhall_use_ld_var_buf; +}; + +static bool +lower_fs_input_load(struct nir_builder *b, + nir_intrinsic_instr *load, void *cb_data) +{ + const struct lower_fs_inputs_ctx *ctx = cb_data; + + if (load->intrinsic != nir_intrinsic_load_input && + load->intrinsic != nir_intrinsic_load_interpolated_input) + return false; + + const nir_io_semantics sem = nir_intrinsic_io_semantics(load); + const nir_alu_type dest_type = nir_intrinsic_dest_type(load); + + /* Indirect array varyings are not yet supported (num_slots > 1) */ + assert(sem.num_slots == 1); + assert(nir_src_as_uint(*nir_get_io_offset_src(load)) == 0); + + nir_intrinsic_instr *bary; + switch (load->intrinsic) { + case nir_intrinsic_load_input: + bary = NULL; + break; + case nir_intrinsic_load_interpolated_input: + /* Cannot interpolate ints */ + assert(nir_alu_type_get_base_type(dest_type) == nir_type_float); + bary = nir_src_as_intrinsic(load->src[0]); + break; + default: + UNREACHABLE("Already handled"); + } + + b->cursor = nir_before_instr(&load->instr); + + const unsigned component = nir_intrinsic_component(load); + const unsigned load_comps = load->num_components + component; + + nir_def *res; + if (ctx->valhall_use_ld_var_buf) { + assert(ctx->arch >= 9); + + pan_varying_layout_require_layout(ctx->varying_layout); + const struct pan_varying_slot *slot = + pan_varying_layout_find_slot(ctx->varying_layout, + sem.location); + assert(slot); + const nir_alu_type src_type = slot->alu_type; + nir_def *offset_B = nir_imm_int(b, slot->offset); + + if (load->intrinsic == nir_intrinsic_load_interpolated_input) { + res = nir_load_var_buf_pan(b, load_comps, load->def.bit_size, + offset_B, &bary->def, + .src_type = src_type, + .io_semantics = sem); + } else { + res = nir_load_var_buf_flat_pan(b, load_comps, load->def.bit_size, + offset_B, + .src_type = src_type, + .io_semantics = sem); + } + } else { + const uint32_t base = nir_intrinsic_base(load); + nir_def *idx = nir_imm_int(b, base); + + if (load->intrinsic == nir_intrinsic_load_interpolated_input) { + res = nir_load_var_pan(b, load_comps, load->def.bit_size, + idx, &bary->def, + .dest_type = dest_type, + .io_semantics = sem); + } else { + res = nir_load_var_flat_pan(b, load_comps, load->def.bit_size, idx, + .dest_type = dest_type, + .io_semantics = sem); + } + } + + if (component > 0) { + unsigned swiz[NIR_MAX_VEC_COMPONENTS] = {0, }; + for (unsigned c = 0; c < load->num_components; c++) + swiz[c] = component + c; + + res = nir_swizzle(b, res, swiz, load->num_components); + } + + nir_def_replace(&load->def, res); + return true; +} + +bool +pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id, + const struct pan_varying_layout *varying_layout, + bool valhall_use_ld_var_buf) +{ + const struct lower_fs_inputs_ctx ctx = { + .arch = pan_arch(gpu_id), + .varying_layout = varying_layout, + .valhall_use_ld_var_buf = valhall_use_ld_var_buf, + }; + return nir_shader_intrinsics_pass(shader, lower_fs_input_load, + nir_metadata_control_flow, + (void *)&ctx); +}