mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 00:30:13 +01:00
brw: lower non coherent FS load_output in NIR
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37527>
This commit is contained in:
parent
3b6b03bd3b
commit
d4ab2087cf
5 changed files with 69 additions and 78 deletions
|
|
@ -1518,6 +1518,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
brw_nir_lower_fs_inputs(nir, devinfo, key);
|
||||
brw_nir_lower_fs_outputs(nir);
|
||||
|
||||
if (!key->coherent_fb_fetch)
|
||||
NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
|
||||
|
||||
/* From the SKL PRM, Volume 7, "Alpha Coverage":
|
||||
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
|
||||
* hardware, regardless of the state setting for this feature."
|
||||
|
|
|
|||
|
|
@ -3647,80 +3647,6 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon
|
|||
return dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fake non-coherent framebuffer read implemented using TXF to fetch from the
|
||||
* framebuffer at the current fragment coordinates and sample index.
|
||||
*/
|
||||
static brw_inst *
|
||||
emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const brw_reg &dst,
|
||||
unsigned target)
|
||||
{
|
||||
brw_shader &s = ntb.s;
|
||||
const struct intel_device_info *devinfo = s.devinfo;
|
||||
|
||||
assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
|
||||
const brw_wm_prog_key *wm_key =
|
||||
reinterpret_cast<const brw_wm_prog_key *>(s.key);
|
||||
assert(!wm_key->coherent_fb_fetch);
|
||||
|
||||
/* Calculate the fragment coordinates. */
|
||||
const brw_reg coords = bld.vgrf(BRW_TYPE_UD, 3);
|
||||
bld.MOV(offset(coords, bld, 0), s.pixel_x);
|
||||
bld.MOV(offset(coords, bld, 1), s.pixel_y);
|
||||
bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
|
||||
|
||||
/* Calculate the sample index and MCS payload when multisampling. Luckily
|
||||
* the MCS fetch message behaves deterministically for UMS surfaces, so it
|
||||
* shouldn't be necessary to recompile based on whether the framebuffer is
|
||||
* CMS or UMS.
|
||||
*/
|
||||
assert(wm_key->multisample_fbo == INTEL_ALWAYS ||
|
||||
wm_key->multisample_fbo == INTEL_NEVER);
|
||||
if (wm_key->multisample_fbo &&
|
||||
ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
|
||||
ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb);
|
||||
|
||||
const brw_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID];
|
||||
const brw_reg mcs = wm_key->multisample_fbo ?
|
||||
emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), brw_reg()) : brw_reg();
|
||||
|
||||
/* Use either a normal or a CMS texel fetch message depending on whether
|
||||
* the framebuffer is single or multisample. On SKL+ use the wide CMS
|
||||
* message just in case the framebuffer uses 16x multisampling, it should
|
||||
* be equivalent to the normal CMS fetch for lower multisampling modes.
|
||||
*/
|
||||
opcode op;
|
||||
if (wm_key->multisample_fbo) {
|
||||
/* On SKL+ use the wide CMS message just in case the framebuffer uses 16x
|
||||
* multisampling, it should be equivalent to the normal CMS fetch for
|
||||
* lower multisampling modes.
|
||||
*
|
||||
* On Gfx12HP, there is only CMS_W variant available.
|
||||
*/
|
||||
if (devinfo->verx10 >= 125)
|
||||
op = SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL;
|
||||
else
|
||||
op = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
|
||||
} else {
|
||||
op = SHADER_OPCODE_TXF_LOGICAL;
|
||||
}
|
||||
|
||||
/* Emit the instruction. */
|
||||
brw_reg srcs[TEX_LOGICAL_NUM_SRCS];
|
||||
srcs[TEX_LOGICAL_SRC_COORDINATE] = coords;
|
||||
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0);
|
||||
srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = sample;
|
||||
srcs[TEX_LOGICAL_SRC_MCS] = mcs;
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(target);
|
||||
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
|
||||
|
||||
brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
|
||||
tex->size_written = 4 * tex->dst.component_size(tex->exec_size);
|
||||
tex->coord_components = 3;
|
||||
|
||||
return tex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actual coherent framebuffer read implemented using the native render target
|
||||
* read message. Requires SKL+.
|
||||
|
|
@ -4260,10 +4186,8 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
|||
const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
|
||||
const brw_reg tmp = bld.vgrf(dest.type, 4);
|
||||
|
||||
if (reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch)
|
||||
emit_coherent_fb_read(bld, tmp, target);
|
||||
else
|
||||
emit_non_coherent_fb_read(ntb, bld, tmp, target);
|
||||
assert(reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch);
|
||||
emit_coherent_fb_read(bld, tmp, target);
|
||||
|
||||
brw_combine_with_vec(bld, dest,
|
||||
offset(tmp, bld, nir_intrinsic_component(instr)),
|
||||
|
|
|
|||
|
|
@ -192,6 +192,8 @@ void brw_nir_lower_tcs_outputs(nir_shader *nir,
|
|||
const struct intel_vue_map *vue,
|
||||
enum tess_primitive_mode tes_primitive_mode);
|
||||
void brw_nir_lower_fs_outputs(nir_shader *nir);
|
||||
bool brw_nir_lower_fs_load_output(nir_shader *shader,
|
||||
const struct brw_wm_prog_key *key);
|
||||
|
||||
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
|
||||
|
||||
|
|
|
|||
61
src/intel/compiler/brw_nir_lower_fs_load_output.c
Normal file
61
src/intel/compiler/brw_nir_lower_fs_load_output.c
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright © 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
/**
|
||||
* Lower fragment shader output reads into sampler operations.
|
||||
*/
|
||||
|
||||
static bool
|
||||
brw_nir_lower_fs_load_output_instr(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_output)
|
||||
return false;
|
||||
|
||||
const struct brw_wm_prog_key *key = data;
|
||||
|
||||
/* Only used by Iris that never sets this to SOMETIMES */
|
||||
assert(key->multisample_fbo != INTEL_SOMETIMES);
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_def *coords[3] = {
|
||||
nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 0)),
|
||||
nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 1)),
|
||||
nir_load_layer_id(b),
|
||||
};
|
||||
nir_def *coord = nir_vec(b, coords, 3);
|
||||
|
||||
nir_def *tex =
|
||||
key->multisample_fbo == INTEL_NEVER ?
|
||||
nir_build_tex(b, nir_texop_txf, coord,
|
||||
.texture_index = nir_intrinsic_base(intrin),
|
||||
.dim = GLSL_SAMPLER_DIM_2D,
|
||||
.is_array = true,
|
||||
.dest_type = nir_type_uint32) :
|
||||
nir_build_tex(b, nir_texop_txf_ms, coord,
|
||||
.texture_index = nir_intrinsic_base(intrin),
|
||||
.ms_index = nir_load_sample_id(b),
|
||||
.dim = GLSL_SAMPLER_DIM_MS,
|
||||
.dest_type = nir_type_uint32);
|
||||
|
||||
nir_def_replace(&intrin->def, tex);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_fs_load_output(nir_shader *shader,
|
||||
const struct brw_wm_prog_key *key)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader,
|
||||
brw_nir_lower_fs_load_output_instr,
|
||||
nir_metadata_control_flow,
|
||||
(void *) key);
|
||||
}
|
||||
|
|
@ -73,6 +73,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_nir_lower_cs_intrinsics.c',
|
||||
'brw_nir_lower_alpha_to_coverage.c',
|
||||
'brw_nir_lower_fs_barycentrics.c',
|
||||
'brw_nir_lower_fs_load_output.c',
|
||||
'brw_nir_lower_immediate_offsets.c',
|
||||
'brw_nir_lower_intersection_shader.c',
|
||||
'brw_nir_lower_ray_queries.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue