brw: lower non coherent FS load_output in NIR

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37527>
This commit is contained in:
Lionel Landwerlin 2025-09-02 13:39:29 +03:00 committed by Marge Bot
parent 3b6b03bd3b
commit d4ab2087cf
5 changed files with 69 additions and 78 deletions

View file

@ -1518,6 +1518,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_nir_lower_fs_inputs(nir, devinfo, key);
brw_nir_lower_fs_outputs(nir);
if (!key->coherent_fb_fetch)
NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
/* From the SKL PRM, Volume 7, "Alpha Coverage":
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
* hardware, regardless of the state setting for this feature."

View file

@ -3647,80 +3647,6 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon
return dest;
}
/**
* Fake non-coherent framebuffer read implemented using TXF to fetch from the
* framebuffer at the current fragment coordinates and sample index.
*/
static brw_inst *
emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const brw_reg &dst,
unsigned target)
{
brw_shader &s = ntb.s;
const struct intel_device_info *devinfo = s.devinfo;
assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
const brw_wm_prog_key *wm_key =
reinterpret_cast<const brw_wm_prog_key *>(s.key);
assert(!wm_key->coherent_fb_fetch);
/* Calculate the fragment coordinates. */
const brw_reg coords = bld.vgrf(BRW_TYPE_UD, 3);
bld.MOV(offset(coords, bld, 0), s.pixel_x);
bld.MOV(offset(coords, bld, 1), s.pixel_y);
bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
/* Calculate the sample index and MCS payload when multisampling. Luckily
* the MCS fetch message behaves deterministically for UMS surfaces, so it
* shouldn't be necessary to recompile based on whether the framebuffer is
* CMS or UMS.
*/
assert(wm_key->multisample_fbo == INTEL_ALWAYS ||
wm_key->multisample_fbo == INTEL_NEVER);
if (wm_key->multisample_fbo &&
ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb);
const brw_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID];
const brw_reg mcs = wm_key->multisample_fbo ?
emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), brw_reg()) : brw_reg();
/* Use either a normal or a CMS texel fetch message depending on whether
* the framebuffer is single or multisample. On SKL+ use the wide CMS
* message just in case the framebuffer uses 16x multisampling, it should
* be equivalent to the normal CMS fetch for lower multisampling modes.
*/
opcode op;
if (wm_key->multisample_fbo) {
/* On SKL+ use the wide CMS message just in case the framebuffer uses 16x
* multisampling, it should be equivalent to the normal CMS fetch for
* lower multisampling modes.
*
* On Gfx12HP, there is only CMS_W variant available.
*/
if (devinfo->verx10 >= 125)
op = SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL;
else
op = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
} else {
op = SHADER_OPCODE_TXF_LOGICAL;
}
/* Emit the instruction. */
brw_reg srcs[TEX_LOGICAL_NUM_SRCS];
srcs[TEX_LOGICAL_SRC_COORDINATE] = coords;
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0);
srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = sample;
srcs[TEX_LOGICAL_SRC_MCS] = mcs;
srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(target);
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
tex->size_written = 4 * tex->dst.component_size(tex->exec_size);
tex->coord_components = 3;
return tex;
}
/**
* Actual coherent framebuffer read implemented using the native render target
* read message. Requires SKL+.
@ -4260,10 +4186,8 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
const brw_reg tmp = bld.vgrf(dest.type, 4);
if (reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch)
emit_coherent_fb_read(bld, tmp, target);
else
emit_non_coherent_fb_read(ntb, bld, tmp, target);
assert(reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch);
emit_coherent_fb_read(bld, tmp, target);
brw_combine_with_vec(bld, dest,
offset(tmp, bld, nir_intrinsic_component(instr)),

View file

@ -192,6 +192,8 @@ void brw_nir_lower_tcs_outputs(nir_shader *nir,
const struct intel_vue_map *vue,
enum tess_primitive_mode tes_primitive_mode);
void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_fs_load_output(nir_shader *shader,
const struct brw_wm_prog_key *key);
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);

View file

@ -0,0 +1,61 @@
/*
* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_nir.h"
#include "compiler/nir/nir_builder.h"
/**
* Lower fragment shader output reads into sampler operations.
*/
static bool
brw_nir_lower_fs_load_output_instr(nir_builder *b,
nir_intrinsic_instr *intrin,
void *data)
{
if (intrin->intrinsic != nir_intrinsic_load_output)
return false;
const struct brw_wm_prog_key *key = data;
/* Only used by Iris that never sets this to SOMETIMES */
assert(key->multisample_fbo != INTEL_SOMETIMES);
b->cursor = nir_before_instr(&intrin->instr);
nir_def *coords[3] = {
nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 0)),
nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 1)),
nir_load_layer_id(b),
};
nir_def *coord = nir_vec(b, coords, 3);
nir_def *tex =
key->multisample_fbo == INTEL_NEVER ?
nir_build_tex(b, nir_texop_txf, coord,
.texture_index = nir_intrinsic_base(intrin),
.dim = GLSL_SAMPLER_DIM_2D,
.is_array = true,
.dest_type = nir_type_uint32) :
nir_build_tex(b, nir_texop_txf_ms, coord,
.texture_index = nir_intrinsic_base(intrin),
.ms_index = nir_load_sample_id(b),
.dim = GLSL_SAMPLER_DIM_MS,
.dest_type = nir_type_uint32);
nir_def_replace(&intrin->def, tex);
return true;
}
bool
brw_nir_lower_fs_load_output(nir_shader *shader,
const struct brw_wm_prog_key *key)
{
return nir_shader_intrinsics_pass(shader,
brw_nir_lower_fs_load_output_instr,
nir_metadata_control_flow,
(void *) key);
}

View file

@ -73,6 +73,7 @@ libintel_compiler_brw_files = files(
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_lower_alpha_to_coverage.c',
'brw_nir_lower_fs_barycentrics.c',
'brw_nir_lower_fs_load_output.c',
'brw_nir_lower_immediate_offsets.c',
'brw_nir_lower_intersection_shader.c',
'brw_nir_lower_ray_queries.c',