diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index cd28b88f16b..ce9128d114c 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1518,6 +1518,9 @@ brw_compile_fs(const struct brw_compiler *compiler, brw_nir_lower_fs_inputs(nir, devinfo, key); brw_nir_lower_fs_outputs(nir); + if (!key->coherent_fb_fetch) + NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key); + /* From the SKL PRM, Volume 7, "Alpha Coverage": * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in * hardware, regardless of the state setting for this feature." diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 9a4bfe87c2f..8a733e062e5 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -3647,80 +3647,6 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon return dest; } -/** - * Fake non-coherent framebuffer read implemented using TXF to fetch from the - * framebuffer at the current fragment coordinates and sample index. - */ -static brw_inst * -emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const brw_reg &dst, - unsigned target) -{ - brw_shader &s = ntb.s; - const struct intel_device_info *devinfo = s.devinfo; - - assert(bld.shader->stage == MESA_SHADER_FRAGMENT); - const brw_wm_prog_key *wm_key = - reinterpret_cast(s.key); - assert(!wm_key->coherent_fb_fetch); - - /* Calculate the fragment coordinates. */ - const brw_reg coords = bld.vgrf(BRW_TYPE_UD, 3); - bld.MOV(offset(coords, bld, 0), s.pixel_x); - bld.MOV(offset(coords, bld, 1), s.pixel_y); - bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld)); - - /* Calculate the sample index and MCS payload when multisampling. Luckily - * the MCS fetch message behaves deterministically for UMS surfaces, so it - * shouldn't be necessary to recompile based on whether the framebuffer is - * CMS or UMS. - */ - assert(wm_key->multisample_fbo == INTEL_ALWAYS || - wm_key->multisample_fbo == INTEL_NEVER); - if (wm_key->multisample_fbo && - ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE) - ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb); - - const brw_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID]; - const brw_reg mcs = wm_key->multisample_fbo ? - emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), brw_reg()) : brw_reg(); - - /* Use either a normal or a CMS texel fetch message depending on whether - * the framebuffer is single or multisample. On SKL+ use the wide CMS - * message just in case the framebuffer uses 16x multisampling, it should - * be equivalent to the normal CMS fetch for lower multisampling modes. - */ - opcode op; - if (wm_key->multisample_fbo) { - /* On SKL+ use the wide CMS message just in case the framebuffer uses 16x - * multisampling, it should be equivalent to the normal CMS fetch for - * lower multisampling modes. - * - * On Gfx12HP, there is only CMS_W variant available. - */ - if (devinfo->verx10 >= 125) - op = SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL; - else - op = SHADER_OPCODE_TXF_CMS_W_LOGICAL; - } else { - op = SHADER_OPCODE_TXF_LOGICAL; - } - - /* Emit the instruction. */ - brw_reg srcs[TEX_LOGICAL_NUM_SRCS]; - srcs[TEX_LOGICAL_SRC_COORDINATE] = coords; - srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0); - srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = sample; - srcs[TEX_LOGICAL_SRC_MCS] = mcs; - srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(target); - srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0); - - brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex(); - tex->size_written = 4 * tex->dst.component_size(tex->exec_size); - tex->coord_components = 3; - - return tex; -} - /** * Actual coherent framebuffer read implemented using the native render target * read message. Requires SKL+. @@ -4260,10 +4186,8 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, const unsigned target = l - FRAG_RESULT_DATA0 + load_offset; const brw_reg tmp = bld.vgrf(dest.type, 4); - if (reinterpret_cast(s.key)->coherent_fb_fetch) - emit_coherent_fb_read(bld, tmp, target); - else - emit_non_coherent_fb_read(ntb, bld, tmp, target); + assert(reinterpret_cast(s.key)->coherent_fb_fetch); + emit_coherent_fb_read(bld, tmp, target); brw_combine_with_vec(bld, dest, offset(tmp, bld, nir_intrinsic_component(instr)), diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index aaff0f97256..fee89cca3fb 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -192,6 +192,8 @@ void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue, enum tess_primitive_mode tes_primitive_mode); void brw_nir_lower_fs_outputs(nir_shader *nir); +bool brw_nir_lower_fs_load_output(nir_shader *shader, + const struct brw_wm_prog_key *key); bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size); diff --git a/src/intel/compiler/brw_nir_lower_fs_load_output.c b/src/intel/compiler/brw_nir_lower_fs_load_output.c new file mode 100644 index 00000000000..8c79f55fb91 --- /dev/null +++ b/src/intel/compiler/brw_nir_lower_fs_load_output.c @@ -0,0 +1,61 @@ +/* + * Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_nir.h" +#include "compiler/nir/nir_builder.h" + +/** + * Lower fragment shader output reads into sampler operations. + */ + +static bool +brw_nir_lower_fs_load_output_instr(nir_builder *b, + nir_intrinsic_instr *intrin, + void *data) +{ + if (intrin->intrinsic != nir_intrinsic_load_output) + return false; + + const struct brw_wm_prog_key *key = data; + + /* Only used by Iris that never sets this to SOMETIMES */ + assert(key->multisample_fbo != INTEL_SOMETIMES); + + b->cursor = nir_before_instr(&intrin->instr); + + nir_def *coords[3] = { + nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 0)), + nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 1)), + nir_load_layer_id(b), + }; + nir_def *coord = nir_vec(b, coords, 3); + + nir_def *tex = + key->multisample_fbo == INTEL_NEVER ? + nir_build_tex(b, nir_texop_txf, coord, + .texture_index = nir_intrinsic_base(intrin), + .dim = GLSL_SAMPLER_DIM_2D, + .is_array = true, + .dest_type = nir_type_uint32) : + nir_build_tex(b, nir_texop_txf_ms, coord, + .texture_index = nir_intrinsic_base(intrin), + .ms_index = nir_load_sample_id(b), + .dim = GLSL_SAMPLER_DIM_MS, + .dest_type = nir_type_uint32); + + nir_def_replace(&intrin->def, tex); + + return true; +} + +bool +brw_nir_lower_fs_load_output(nir_shader *shader, + const struct brw_wm_prog_key *key) +{ + return nir_shader_intrinsics_pass(shader, + brw_nir_lower_fs_load_output_instr, + nir_metadata_control_flow, + (void *) key); +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index aae7dc30a42..2fff9740f6e 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -73,6 +73,7 @@ libintel_compiler_brw_files = files( 'brw_nir_lower_cs_intrinsics.c', 'brw_nir_lower_alpha_to_coverage.c', 'brw_nir_lower_fs_barycentrics.c', + 'brw_nir_lower_fs_load_output.c', 'brw_nir_lower_immediate_offsets.c', 'brw_nir_lower_intersection_shader.c', 'brw_nir_lower_ray_queries.c',