mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 07:00:12 +01:00
brw: lower non coherent FS load_output in NIR
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37527>
This commit is contained in:
parent
3b6b03bd3b
commit
d4ab2087cf
5 changed files with 69 additions and 78 deletions
|
|
@ -1518,6 +1518,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||||
brw_nir_lower_fs_inputs(nir, devinfo, key);
|
brw_nir_lower_fs_inputs(nir, devinfo, key);
|
||||||
brw_nir_lower_fs_outputs(nir);
|
brw_nir_lower_fs_outputs(nir);
|
||||||
|
|
||||||
|
if (!key->coherent_fb_fetch)
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
|
||||||
|
|
||||||
/* From the SKL PRM, Volume 7, "Alpha Coverage":
|
/* From the SKL PRM, Volume 7, "Alpha Coverage":
|
||||||
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
|
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
|
||||||
* hardware, regardless of the state setting for this feature."
|
* hardware, regardless of the state setting for this feature."
|
||||||
|
|
|
||||||
|
|
@ -3647,80 +3647,6 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Fake non-coherent framebuffer read implemented using TXF to fetch from the
|
|
||||||
* framebuffer at the current fragment coordinates and sample index.
|
|
||||||
*/
|
|
||||||
static brw_inst *
|
|
||||||
emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const brw_reg &dst,
|
|
||||||
unsigned target)
|
|
||||||
{
|
|
||||||
brw_shader &s = ntb.s;
|
|
||||||
const struct intel_device_info *devinfo = s.devinfo;
|
|
||||||
|
|
||||||
assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
|
|
||||||
const brw_wm_prog_key *wm_key =
|
|
||||||
reinterpret_cast<const brw_wm_prog_key *>(s.key);
|
|
||||||
assert(!wm_key->coherent_fb_fetch);
|
|
||||||
|
|
||||||
/* Calculate the fragment coordinates. */
|
|
||||||
const brw_reg coords = bld.vgrf(BRW_TYPE_UD, 3);
|
|
||||||
bld.MOV(offset(coords, bld, 0), s.pixel_x);
|
|
||||||
bld.MOV(offset(coords, bld, 1), s.pixel_y);
|
|
||||||
bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
|
|
||||||
|
|
||||||
/* Calculate the sample index and MCS payload when multisampling. Luckily
|
|
||||||
* the MCS fetch message behaves deterministically for UMS surfaces, so it
|
|
||||||
* shouldn't be necessary to recompile based on whether the framebuffer is
|
|
||||||
* CMS or UMS.
|
|
||||||
*/
|
|
||||||
assert(wm_key->multisample_fbo == INTEL_ALWAYS ||
|
|
||||||
wm_key->multisample_fbo == INTEL_NEVER);
|
|
||||||
if (wm_key->multisample_fbo &&
|
|
||||||
ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
|
|
||||||
ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb);
|
|
||||||
|
|
||||||
const brw_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID];
|
|
||||||
const brw_reg mcs = wm_key->multisample_fbo ?
|
|
||||||
emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), brw_reg()) : brw_reg();
|
|
||||||
|
|
||||||
/* Use either a normal or a CMS texel fetch message depending on whether
|
|
||||||
* the framebuffer is single or multisample. On SKL+ use the wide CMS
|
|
||||||
* message just in case the framebuffer uses 16x multisampling, it should
|
|
||||||
* be equivalent to the normal CMS fetch for lower multisampling modes.
|
|
||||||
*/
|
|
||||||
opcode op;
|
|
||||||
if (wm_key->multisample_fbo) {
|
|
||||||
/* On SKL+ use the wide CMS message just in case the framebuffer uses 16x
|
|
||||||
* multisampling, it should be equivalent to the normal CMS fetch for
|
|
||||||
* lower multisampling modes.
|
|
||||||
*
|
|
||||||
* On Gfx12HP, there is only CMS_W variant available.
|
|
||||||
*/
|
|
||||||
if (devinfo->verx10 >= 125)
|
|
||||||
op = SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL;
|
|
||||||
else
|
|
||||||
op = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
|
|
||||||
} else {
|
|
||||||
op = SHADER_OPCODE_TXF_LOGICAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Emit the instruction. */
|
|
||||||
brw_reg srcs[TEX_LOGICAL_NUM_SRCS];
|
|
||||||
srcs[TEX_LOGICAL_SRC_COORDINATE] = coords;
|
|
||||||
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0);
|
|
||||||
srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = sample;
|
|
||||||
srcs[TEX_LOGICAL_SRC_MCS] = mcs;
|
|
||||||
srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(target);
|
|
||||||
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
|
|
||||||
|
|
||||||
brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
|
|
||||||
tex->size_written = 4 * tex->dst.component_size(tex->exec_size);
|
|
||||||
tex->coord_components = 3;
|
|
||||||
|
|
||||||
return tex;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Actual coherent framebuffer read implemented using the native render target
|
* Actual coherent framebuffer read implemented using the native render target
|
||||||
* read message. Requires SKL+.
|
* read message. Requires SKL+.
|
||||||
|
|
@ -4260,10 +4186,8 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
||||||
const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
|
const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
|
||||||
const brw_reg tmp = bld.vgrf(dest.type, 4);
|
const brw_reg tmp = bld.vgrf(dest.type, 4);
|
||||||
|
|
||||||
if (reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch)
|
assert(reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch);
|
||||||
emit_coherent_fb_read(bld, tmp, target);
|
emit_coherent_fb_read(bld, tmp, target);
|
||||||
else
|
|
||||||
emit_non_coherent_fb_read(ntb, bld, tmp, target);
|
|
||||||
|
|
||||||
brw_combine_with_vec(bld, dest,
|
brw_combine_with_vec(bld, dest,
|
||||||
offset(tmp, bld, nir_intrinsic_component(instr)),
|
offset(tmp, bld, nir_intrinsic_component(instr)),
|
||||||
|
|
|
||||||
|
|
@ -192,6 +192,8 @@ void brw_nir_lower_tcs_outputs(nir_shader *nir,
|
||||||
const struct intel_vue_map *vue,
|
const struct intel_vue_map *vue,
|
||||||
enum tess_primitive_mode tes_primitive_mode);
|
enum tess_primitive_mode tes_primitive_mode);
|
||||||
void brw_nir_lower_fs_outputs(nir_shader *nir);
|
void brw_nir_lower_fs_outputs(nir_shader *nir);
|
||||||
|
bool brw_nir_lower_fs_load_output(nir_shader *shader,
|
||||||
|
const struct brw_wm_prog_key *key);
|
||||||
|
|
||||||
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
|
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
|
||||||
|
|
||||||
|
|
|
||||||
61
src/intel/compiler/brw_nir_lower_fs_load_output.c
Normal file
61
src/intel/compiler/brw_nir_lower_fs_load_output.c
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2025 Intel Corporation
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "brw_nir.h"
|
||||||
|
#include "compiler/nir/nir_builder.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lower fragment shader output reads into sampler operations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static bool
|
||||||
|
brw_nir_lower_fs_load_output_instr(nir_builder *b,
|
||||||
|
nir_intrinsic_instr *intrin,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_output)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const struct brw_wm_prog_key *key = data;
|
||||||
|
|
||||||
|
/* Only used by Iris that never sets this to SOMETIMES */
|
||||||
|
assert(key->multisample_fbo != INTEL_SOMETIMES);
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
|
||||||
|
nir_def *coords[3] = {
|
||||||
|
nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 0)),
|
||||||
|
nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 1)),
|
||||||
|
nir_load_layer_id(b),
|
||||||
|
};
|
||||||
|
nir_def *coord = nir_vec(b, coords, 3);
|
||||||
|
|
||||||
|
nir_def *tex =
|
||||||
|
key->multisample_fbo == INTEL_NEVER ?
|
||||||
|
nir_build_tex(b, nir_texop_txf, coord,
|
||||||
|
.texture_index = nir_intrinsic_base(intrin),
|
||||||
|
.dim = GLSL_SAMPLER_DIM_2D,
|
||||||
|
.is_array = true,
|
||||||
|
.dest_type = nir_type_uint32) :
|
||||||
|
nir_build_tex(b, nir_texop_txf_ms, coord,
|
||||||
|
.texture_index = nir_intrinsic_base(intrin),
|
||||||
|
.ms_index = nir_load_sample_id(b),
|
||||||
|
.dim = GLSL_SAMPLER_DIM_MS,
|
||||||
|
.dest_type = nir_type_uint32);
|
||||||
|
|
||||||
|
nir_def_replace(&intrin->def, tex);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_lower_fs_load_output(nir_shader *shader,
|
||||||
|
const struct brw_wm_prog_key *key)
|
||||||
|
{
|
||||||
|
return nir_shader_intrinsics_pass(shader,
|
||||||
|
brw_nir_lower_fs_load_output_instr,
|
||||||
|
nir_metadata_control_flow,
|
||||||
|
(void *) key);
|
||||||
|
}
|
||||||
|
|
@ -73,6 +73,7 @@ libintel_compiler_brw_files = files(
|
||||||
'brw_nir_lower_cs_intrinsics.c',
|
'brw_nir_lower_cs_intrinsics.c',
|
||||||
'brw_nir_lower_alpha_to_coverage.c',
|
'brw_nir_lower_alpha_to_coverage.c',
|
||||||
'brw_nir_lower_fs_barycentrics.c',
|
'brw_nir_lower_fs_barycentrics.c',
|
||||||
|
'brw_nir_lower_fs_load_output.c',
|
||||||
'brw_nir_lower_immediate_offsets.c',
|
'brw_nir_lower_immediate_offsets.c',
|
||||||
'brw_nir_lower_intersection_shader.c',
|
'brw_nir_lower_intersection_shader.c',
|
||||||
'brw_nir_lower_ray_queries.c',
|
'brw_nir_lower_ray_queries.c',
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue