mesa/src/asahi/lib/agx_nir_prolog_epilog.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

756 lines
24 KiB
C
Raw Normal View History

/*
* Copyright 2024 Alyssa Rosenzweig
* Copyright 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "gallium/include/pipe/p_defines.h"
#include "poly/cl/libpoly.h"
#include "poly/nir/poly_nir.h"
#include "util/format/u_formats.h"
#include "agx_abi.h"
#include "agx_linker.h"
#include "agx_nir.h"
#include "agx_nir_lower_vbo.h"
#include "agx_pack.h"
#include "agx_tilebuffer.h"
#include "libagx.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_builder_opcodes.h"
#include "nir_lower_blend.h"
#include "shader_enums.h"
/*
* Insert code into a fragment shader to lower polygon stipple. The stipple is
* passed in a sideband, rather than requiring a texture binding. This is
* simpler for drivers to integrate and might be more efficient.
*/
static bool
agx_nir_lower_poly_stipple(nir_shader *s)
{
assert(s->info.stage == MESA_SHADER_FRAGMENT);
/* Insert at the beginning for performance. */
nir_builder b_ =
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
nir_builder *b = &b_;
/* The stipple coordinate is defined at the window coordinate mod 32. It's
* reversed along the X-axis to simplify the driver, hence the NOT.
*/
nir_def *raw = nir_u2u32(b, nir_load_pixel_coord(b));
nir_def *coord = nir_umod_imm(
b,
nir_vec2(b, nir_inot(b, nir_channel(b, raw, 0)), nir_channel(b, raw, 1)),
32);
/* Extract the column from the packed bitfield */
nir_def *pattern = nir_load_polygon_stipple_agx(b, nir_channel(b, coord, 1));
nir_def *bit = nir_ubitfield_extract(b, pattern, nir_channel(b, coord, 0),
nir_imm_int(b, 1));
/* Discard fragments where the pattern is 0 */
nir_demote_if(b, nir_ieq_imm(b, bit, 0));
s->info.fs.uses_discard = true;
treewide: Switch to nir_progress Via the Coccinelle patch at the end of the commit message, followed by sed -ie 's/progress = progress | /progress |=/g' $(git grep -l 'progress = prog') ninja -C ~/mesa/build clang-format cd ~/mesa/src/compiler/nir && clang-format -i *.c agxfmt @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} -return prog; +return nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -return true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -return false; -} +bool progress = prog_expr; +return nir_progress(progress, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, prog ? (metadata) : nir_metadata_all); -return prog; +return nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, prog ? (metadata) : nir_metadata_all); +nir_progress(prog, impl, metadata); @@ expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); -return true; +return nir_progress(true, impl, metadata); @@ expression impl; @@ -nir_metadata_preserve(impl, nir_metadata_all); -return false; +return nir_no_progress(impl); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} -other_prog |= prog; +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +nir_progress(prog, impl, metadata); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -other_prog = true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; identifier prog; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -prog = true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +prog = prog | nir_progress(impl_progress, impl, metadata); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -other_prog = true; -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; identifier prog; @@ -if (prog_expr) { -prog = true; -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +prog = prog | nir_progress(impl_progress, impl, metadata); @@ expression prog_expr, impl, metadata; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +nir_progress(impl_progress, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); -prog = true; +prog = nir_progress(true, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} -return prog; +return nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} +nir_progress(prog, impl, metadata); @@ expression impl; @@ -nir_metadata_preserve(impl, nir_metadata_all); +nir_no_progress(impl); @@ expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); +nir_progress(true, impl, metadata); squashme! sed -ie 's/progress = progress | /progress |=/g' $(git grep -l 'progress = prog') Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33722>
2025-02-24 15:10:33 -05:00
return nir_progress(true, b->impl, nir_metadata_control_flow);
}
static int
map_vs_part_uniform(nir_intrinsic_instr *intr, unsigned nr_attribs)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_vbo_base_agx:
return AGX_ABI_VUNI_VBO_BASE(nir_src_as_uint(intr->src[0]));
case nir_intrinsic_load_attrib_clamp_agx:
return AGX_ABI_VUNI_VBO_CLAMP(nr_attribs, nir_src_as_uint(intr->src[0]));
case nir_intrinsic_load_first_vertex:
return AGX_ABI_VUNI_FIRST_VERTEX(nr_attribs);
case nir_intrinsic_load_base_instance:
return AGX_ABI_VUNI_BASE_INSTANCE(nr_attribs);
case nir_intrinsic_load_vertex_param_buffer_poly:
return AGX_ABI_VUNI_VERTEX_PARAMS(nr_attribs);
default:
return -1;
}
}
static int
map_fs_part_uniform(nir_intrinsic_instr *intr)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_blend_const_color_r_float:
return AGX_ABI_FUNI_BLEND_R;
case nir_intrinsic_load_blend_const_color_g_float:
return AGX_ABI_FUNI_BLEND_G;
case nir_intrinsic_load_blend_const_color_b_float:
return AGX_ABI_FUNI_BLEND_B;
case nir_intrinsic_load_blend_const_color_a_float:
return AGX_ABI_FUNI_BLEND_A;
default:
return -1;
}
}
static bool
lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
void *data)
{
int unif;
if (b->shader->info.stage == MESA_SHADER_VERTEX) {
unsigned *nr_attribs = data;
unif = map_vs_part_uniform(intr, *nr_attribs);
} else {
unif = map_fs_part_uniform(intr);
}
if (unif >= 0) {
b->cursor = nir_instr_remove(&intr->instr);
nir_def *load = nir_load_preamble(b, 1, intr->def.bit_size, .base = unif);
nir_def_rewrite_uses(&intr->def, load);
return true;
} else if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) {
b->cursor = nir_instr_remove(&intr->instr);
nir_def *offs =
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH);
nir_def_rewrite_uses(&intr->def,
nir_bindless_image_agx(b, offs, .desc_set = 0));
return true;
} else {
return false;
}
}
bool
agx_nir_lower_non_monolithic_uniforms(nir_shader *nir, unsigned nr)
{
return nir_shader_intrinsics_pass(nir, lower_non_monolithic_uniforms,
nir_metadata_control_flow, &nr);
}
static bool
lower_adjacency(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
const struct agx_vs_prolog_key *key = data;
b->cursor = nir_before_instr(&intr->instr);
if (intr->intrinsic != nir_intrinsic_load_vertex_id)
return false;
nir_def *id = nir_load_vertex_id(b);
if (key->adjacency == MESA_PRIM_LINES_ADJACENCY) {
id = poly_map_to_line_adj(b, id);
} else if (key->adjacency == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) {
id = poly_map_to_tri_strip_adj(b, id);
} else if (key->adjacency == MESA_PRIM_LINE_STRIP_ADJACENCY) {
id = poly_map_to_line_strip_adj(b, id);
} else if (key->adjacency == MESA_PRIM_TRIANGLES_ADJACENCY) {
/* Sequence (0, 2, 4), (6, 8, 10), ... */
id = nir_imul_imm(b, id, 2);
} else {
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("unknown");
}
id = poly_nir_load_vertex_id(b, id);
nir_def_replace(&intr->def, id);
return true;
}
void
agx_nir_vs_prolog(nir_builder *b, const void *key_)
{
const struct agx_vs_prolog_key *key = key_;
b->shader->info.stage = MESA_SHADER_VERTEX;
b->shader->info.name = "VS prolog";
/* First, construct a passthrough shader reading each attribute and exporting
* the value. We also need to export vertex/instance ID in their usual regs.
*/
if (!key->static_vi) {
unsigned i = 0;
nir_def *vec = NULL;
unsigned vec_idx = ~0;
BITSET_FOREACH_SET(i, key->component_mask, AGX_MAX_ATTRIBS * 4) {
unsigned a = i / 4;
unsigned c = i % 4;
if (vec_idx != a) {
vec = nir_load_input(b, 4, 32, nir_imm_int(b, 0), .base = a);
vec_idx = a;
}
nir_export_agx(b, nir_channel(b, vec, c),
.base = AGX_ABI_VIN_ATTRIB(i));
}
}
if (!key->hw) {
nir_export_agx(b, nir_channel(b, nir_load_global_invocation_id(b, 32), 0),
.base = AGX_ABI_VIN_VERTEX_ID_ZERO_BASE);
}
nir_export_agx(b, nir_load_vertex_id(b), .base = AGX_ABI_VIN_VERTEX_ID);
nir_export_agx(b, nir_load_instance_id(b), .base = AGX_ABI_VIN_INSTANCE_ID);
/* Now lower the resulting program using the key */
if (!key->static_vi) {
agx_nir_lower_vbo(b->shader, key->attribs, key->robustness, false);
/* Clean up redundant vertex ID loads */
if (!key->hw || key->adjacency) {
NIR_PASS(_, b->shader, nir_opt_cse);
NIR_PASS(_, b->shader, nir_opt_dce);
}
}
if (!key->hw) {
b->cursor = nir_before_impl(nir_shader_get_entrypoint(b->shader));
poly_nir_lower_sw_vs(b->shader);
} else if (key->adjacency) {
nir_shader_intrinsics_pass(b->shader, lower_adjacency,
nir_metadata_control_flow, (void *)key);
}
nir_inline_sysval(b->shader, nir_intrinsic_load_index_size_poly,
key->sw_index_size_B);
/* Finally, lower uniforms according to our ABI */
unsigned nr = DIV_ROUND_UP(BITSET_LAST_BIT(key->component_mask), 4);
agx_nir_lower_non_monolithic_uniforms(b->shader, nr);
b->shader->info.io_lowered = true;
}
static bool
gather_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_load_input)
return false;
unsigned idx = nir_src_as_uint(intr->src[0]) + nir_intrinsic_base(intr);
unsigned comp = nir_intrinsic_component(intr);
assert(intr->def.bit_size == 32 && "todo: push conversions up?");
unsigned base = 4 * idx + comp;
b->cursor = nir_before_instr(&intr->instr);
BITSET_WORD *comps_read = data;
nir_component_mask_t mask = nir_def_components_read(&intr->def);
u_foreach_bit(c, mask) {
BITSET_SET(comps_read, base + c);
}
return false;
}
bool
agx_nir_gather_vs_inputs(nir_shader *s, BITSET_WORD *attrib_components_read)
{
return nir_shader_intrinsics_pass(
s, gather_inputs, nir_metadata_control_flow, attrib_components_read);
}
static bool
lower_input_to_prolog(nir_builder *b, nir_intrinsic_instr *intr, void *_data)
{
if (intr->intrinsic != nir_intrinsic_load_input)
return false;
unsigned idx = nir_src_as_uint(intr->src[0]) + nir_intrinsic_base(intr);
unsigned comp = nir_intrinsic_component(intr);
assert(intr->def.bit_size == 32 && "todo: push conversions up?");
unsigned base = 4 * idx + comp;
b->cursor = nir_before_instr(&intr->instr);
nir_def *val =
nir_load_exported_agx(b, intr->def.num_components, intr->def.bit_size,
.base = AGX_ABI_VIN_ATTRIB(base));
nir_def_replace(&intr->def, val);
return true;
}
bool
agx_nir_lower_vs_input_to_prolog(nir_shader *s)
{
return nir_shader_intrinsics_pass(s, lower_input_to_prolog,
nir_metadata_control_flow, NULL);
}
static bool
lower_active_samples_to_register(nir_builder *b, nir_intrinsic_instr *intr,
void *data)
{
if (intr->intrinsic != nir_intrinsic_load_active_samples_agx)
return false;
b->cursor = nir_before_instr(&intr->instr);
nir_def *id =
nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FIN_SAMPLE_MASK);
nir_def_replace(&intr->def, id);
return true;
}
static bool
lower_tests_zs_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
bool *value = data;
if (intr->intrinsic != nir_intrinsic_load_shader_part_tests_zs_agx)
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_def_rewrite_uses(&intr->def, nir_imm_intN_t(b, *value ? 0xFF : 0, 16));
return true;
}
static bool
lower_tests_zs(nir_shader *s, bool value)
{
if (!s->info.fs.uses_discard)
return false;
return nir_shader_intrinsics_pass(s, lower_tests_zs_intr,
nir_metadata_control_flow, &value);
}
static inline bool
blend_uses_2src(struct agx_blend_rt_key rt)
{
enum pipe_blendfactor factors[] = {
rt.rgb_src_factor,
rt.rgb_dst_factor,
rt.alpha_src_factor,
rt.alpha_dst_factor,
};
for (unsigned i = 0; i < ARRAY_SIZE(factors); ++i) {
switch (factors[i]) {
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
return true;
default:
break;
}
}
return false;
}
static void
copy_colour(nir_builder *b, const struct agx_fs_epilog_key *key,
unsigned out_rt, unsigned in_loc, bool dual_src)
{
unsigned size = (key->link.size_32 & BITFIELD_BIT(in_loc)) ? 32 : 16;
nir_def *value =
nir_load_exported_agx(b, 4, size, .base = AGX_ABI_FOUT_COLOUR(in_loc));
if (key->link.loc0_w_1 && in_loc == 0) {
value =
nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
}
nir_store_output(b, value, nir_imm_int(b, 0),
.io_semantics.location = FRAG_RESULT_DATA0 + out_rt,
.io_semantics.dual_source_blend_index = dual_src,
.src_type = nir_type_float | size);
}
void
agx_nir_fs_epilog(nir_builder *b, const void *key_)
{
const struct agx_fs_epilog_key *key = key_;
b->shader->info.stage = MESA_SHADER_FRAGMENT;
b->shader->info.name = "FS epilog";
/* First, construct a passthrough shader reading each colour and outputting
* the value.
*/
for (unsigned rt = 0; rt < ARRAY_SIZE(key->remap); ++rt) {
int location = key->remap[rt];
/* Negative remaps indicate the attachment isn't written. */
if (location >= 0 && key->link.loc_written & BITFIELD_BIT(location)) {
copy_colour(b, key, rt, location, false);
/* If this render target uses dual source blending, also copy the dual
* source colour. While the copy_colour above is needed even for
* missing attachments to handle alpha-to-coverage, this copy is only
* for blending so should be suppressed for missing attachments to keep
* the assert from blowing up on OpenGL.
*/
if (blend_uses_2src(key->blend.rt[rt]) &&
key->rt_formats[rt] != PIPE_FORMAT_NONE) {
assert(location == 0);
copy_colour(b, key, rt, 1, true);
}
}
}
/* Grab registers early, this has to happen in the first block. */
nir_def *sample_id = NULL, *write_samples = NULL;
if (key->link.sample_shading) {
sample_id =
nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FOUT_SAMPLE_MASK);
}
if (key->link.sample_mask_after_force_early) {
write_samples =
nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FOUT_WRITE_SAMPLES);
}
/* Now lower the resulting program using the key */
struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(
key->rt_formats, ARRAY_SIZE(key->rt_formats), key->nr_samples, true);
if (key->force_small_tile)
tib.tile_size = (struct agx_tile_size){16, 16};
bool force_translucent = false;
nir_lower_blend_options opts = {
.scalar_blend_const = true,
.logicop_enable = key->blend.logicop_enable,
.logicop_func = key->blend.logicop_func,
};
static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync");
for (unsigned i = 0; i < 8; ++i) {
opts.format[i] = key->rt_formats[i];
opts.rt[i] = (nir_lower_blend_rt){
.rgb.src_factor = key->blend.rt[i].rgb_src_factor,
.rgb.dst_factor = key->blend.rt[i].rgb_dst_factor,
.rgb.func = key->blend.rt[i].rgb_func,
.alpha.src_factor = key->blend.rt[i].alpha_src_factor,
.alpha.dst_factor = key->blend.rt[i].alpha_dst_factor,
.alpha.func = key->blend.rt[i].alpha_func,
.colormask = key->blend.rt[i].colormask,
};
}
/* It's more efficient to use masked stores (with
* agx_nir_lower_tilebuffer) than to emulate colour masking with
* nir_lower_blend.
*/
uint8_t colormasks[8] = {0};
for (unsigned i = 0; i < 8; ++i) {
if (key->rt_formats[i] == PIPE_FORMAT_NONE)
continue;
if (agx_tilebuffer_supports_mask(&tib, i)) {
colormasks[i] = key->blend.rt[i].colormask;
opts.rt[i].colormask = (uint8_t)BITFIELD_MASK(4);
} else {
colormasks[i] = (uint8_t)BITFIELD_MASK(4);
}
/* If not all bound RTs are fully written to, we need to force
* translucent pass type. agx_nir_lower_tilebuffer will take
* care of this for its own colormasks input.
*/
unsigned comps = util_format_get_nr_components(key->rt_formats[i]);
if ((opts.rt[i].colormask & BITFIELD_MASK(comps)) !=
BITFIELD_MASK(comps)) {
force_translucent = true;
}
}
/* Alpha-to-coverage must be lowered before alpha-to-one */
if (key->blend.alpha_to_coverage)
NIR_PASS(_, b->shader, nir_lower_alpha_to_coverage, tib.nr_samples, false,
NULL);
/* Depth/stencil writes must be deferred until after all discards,
* particularly alpha-to-coverage.
*/
if (key->link.write_z || key->link.write_s) {
nir_store_zs_agx(
b, nir_imm_intN_t(b, 0xFF, 16),
nir_load_exported_agx(b, 1, 32, .base = AGX_ABI_FOUT_Z),
nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FOUT_S),
.base = (key->link.write_z ? 1 : 0) | (key->link.write_s ? 2 : 0));
if (key->link.write_z)
b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
if (key->link.write_s)
b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
}
/* Alpha-to-one must be lowered before blending */
if (key->blend.alpha_to_one)
NIR_PASS(_, b->shader, nir_lower_alpha_to_one);
NIR_PASS(_, b->shader, nir_lower_blend, &opts);
unsigned rt_spill = key->link.rt_spill_base;
NIR_PASS(_, b->shader, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
write_samples, &force_translucent);
NIR_PASS(_, b->shader, agx_nir_lower_texture);
NIR_PASS(_, b->shader, agx_nir_lower_multisampled_image_store);
/* If the API shader runs once per sample, then the epilog runs once per
* sample as well, so we need to lower our code to run for a single sample.
*
* If the API shader runs once per pixel, then the epilog runs once per
* pixel. So we run through the monolithic MSAA lowering, which wraps the
* epilog in the sample loop if needed. This localizes sample shading
* to the epilog, when sample shading is not used but blending is.
*/
if (key->link.sample_shading) {
/* Lower the resulting discards. Done in agx_nir_lower_monolithic_msaa for
* the pixel shaded path. Must be done before agx_nir_lower_to_per_sample
* to avoid duplicating tests.
*/
if (key->blend.alpha_to_coverage) {
NIR_PASS(_, b->shader, agx_nir_lower_sample_mask);
}
NIR_PASS(_, b->shader, agx_nir_lower_to_per_sample);
NIR_PASS(_, b->shader, agx_nir_lower_fs_active_samples_to_register);
/* Ensure the sample ID is preserved in register. We do this late since it
* has to go in the last block, and the above passes might add control
* flow when lowering.
*/
b->cursor = nir_after_impl(b->impl);
nir_export_agx(b, sample_id, .base = AGX_ABI_FIN_SAMPLE_MASK);
} else {
NIR_PASS(_, b->shader, agx_nir_lower_monolithic_msaa, key->nr_samples);
}
/* Finally, lower uniforms according to our ABI */
nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
nir_metadata_control_flow, NULL);
/* There is no shader part after the epilog, so we're always responsible for
* running our own tests, unless the fragment shader forced early tests.
*/
NIR_PASS(_, b->shader, lower_tests_zs, !key->link.already_ran_zs);
b->shader->info.io_lowered = true;
b->shader->info.fs.uses_fbfetch_output |= force_translucent;
b->shader->info.fs.uses_sample_shading = key->link.sample_shading;
}
struct lower_epilog_ctx {
struct agx_fs_epilog_link_info *info;
nir_variable *masked_samples;
};
static bool
lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
struct lower_epilog_ctx *ctx = data;
struct agx_fs_epilog_link_info *info = ctx->info;
if (intr->intrinsic == nir_intrinsic_store_zs_agx) {
assert(nir_src_as_uint(intr->src[0]) == 0xff && "msaa not yet lowered");
b->cursor = nir_instr_remove(&intr->instr);
unsigned base = nir_intrinsic_base(intr);
info->write_z = !!(base & 1);
info->write_s = !!(base & 2);
if (info->write_z)
nir_export_agx(b, intr->src[1].ssa, .base = AGX_ABI_FOUT_Z);
if (info->write_s)
nir_export_agx(b, intr->src[2].ssa, .base = AGX_ABI_FOUT_S);
return true;
}
if (intr->intrinsic == nir_intrinsic_demote_samples &&
b->shader->info.fs.early_fragment_tests) {
if (!ctx->masked_samples) {
b->cursor = nir_before_impl(nir_shader_get_entrypoint(b->shader));
ctx->masked_samples =
nir_local_variable_create(b->impl, glsl_uint16_t_type(), NULL);
nir_store_var(b, ctx->masked_samples, nir_imm_intN_t(b, 0xFF, 16),
nir_component_mask(1));
}
b->cursor = nir_before_instr(&intr->instr);
nir_def *mask = nir_load_var(b, ctx->masked_samples);
nir_def *mask_2 =
nir_ixor(b, intr->src[0].ssa, nir_imm_intN_t(b, 0xff, 16));
mask = nir_iand(b, mask, mask_2);
nir_store_var(b, ctx->masked_samples, mask, nir_component_mask(1));
nir_instr_remove(&intr->instr);
return true;
}
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
/* Fix up gl_FragColor */
if (sem.location == FRAG_RESULT_COLOR) {
sem.location = FRAG_RESULT_DATA0;
info->broadcast_rt0 = true;
}
/* We don't use the epilog for sample mask writes */
if (sem.location < FRAG_RESULT_DATA0)
return false;
/* Determine the ABI location. Dual source blending aliases a second
* render target, so get that out of the way now.
*/
unsigned loc = sem.location - FRAG_RESULT_DATA0;
loc += nir_src_as_uint(intr->src[1]);
if (sem.dual_source_blend_index) {
assert(loc == 0);
loc = 1;
}
b->cursor = nir_instr_remove(&intr->instr);
nir_def *vec = intr->src[0].ssa;
info->loc_written |= BITFIELD_BIT(loc);
if (vec->bit_size == 32)
info->size_32 |= BITFIELD_BIT(loc);
else
assert(vec->bit_size == 16);
uint32_t one_f = (vec->bit_size == 32 ? fui(1.0) : _mesa_float_to_half(1.0));
unsigned comp = nir_intrinsic_component(intr);
u_foreach_bit(c, nir_intrinsic_write_mask(intr)) {
nir_scalar s = nir_scalar_resolved(vec, c);
if (loc == 0 && c == 3 && nir_scalar_is_const(s) &&
nir_scalar_as_uint(s) == one_f) {
info->loc0_w_1 = true;
} else {
unsigned stride = vec->bit_size / 16;
nir_export_agx(b, nir_channel(b, vec, c),
.base = AGX_ABI_FOUT_COLOUR(loc) + (comp + c) * stride);
}
}
return true;
}
bool
agx_nir_lower_fs_output_to_epilog(nir_shader *s,
struct agx_fs_epilog_link_info *out)
{
struct lower_epilog_ctx ctx = {.info = out};
nir_shader_intrinsics_pass(s, lower_output_to_epilog,
nir_metadata_control_flow, &ctx);
if (ctx.masked_samples) {
nir_builder b =
nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(s)));
nir_export_agx(&b, nir_load_var(&b, ctx.masked_samples),
.base = AGX_ABI_FOUT_WRITE_SAMPLES);
out->sample_mask_after_force_early = true;
bool progress;
do {
progress = false;
NIR_PASS(progress, s, nir_lower_vars_to_ssa);
NIR_PASS(progress, s, nir_opt_dce);
} while (progress);
}
out->sample_shading = s->info.fs.uses_sample_shading;
return true;
}
bool
agx_nir_lower_fs_active_samples_to_register(nir_shader *s)
{
return nir_shader_intrinsics_pass(s, lower_active_samples_to_register,
nir_metadata_control_flow, NULL);
}
static bool
agx_nir_lower_stats_fs(nir_shader *s)
{
assert(s->info.stage == MESA_SHADER_FRAGMENT);
nir_builder b_ =
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
nir_builder *b = &b_;
nir_push_if(b, nir_inot(b, nir_load_helper_invocation(b, 1)));
nir_def *samples = nir_bit_count(b, nir_load_sample_mask_in(b));
unsigned query = PIPE_STAT_QUERY_PS_INVOCATIONS;
nir_def *addr = nir_load_stat_query_address_poly(b, .base = query);
nir_global_atomic(b, 32, addr, samples, .atomic_op = nir_atomic_op_iadd);
nir_pop_if(b, NULL);
treewide: Switch to nir_progress Via the Coccinelle patch at the end of the commit message, followed by sed -ie 's/progress = progress | /progress |=/g' $(git grep -l 'progress = prog') ninja -C ~/mesa/build clang-format cd ~/mesa/src/compiler/nir && clang-format -i *.c agxfmt @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} -return prog; +return nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -return true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -return false; -} +bool progress = prog_expr; +return nir_progress(progress, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, prog ? (metadata) : nir_metadata_all); -return prog; +return nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, prog ? (metadata) : nir_metadata_all); +nir_progress(prog, impl, metadata); @@ expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); -return true; +return nir_progress(true, impl, metadata); @@ expression impl; @@ -nir_metadata_preserve(impl, nir_metadata_all); -return false; +return nir_no_progress(impl); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} -other_prog |= prog; +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +nir_progress(prog, impl, metadata); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -other_prog = true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; identifier prog; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -prog = true; -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +prog = prog | nir_progress(impl_progress, impl, metadata); @@ identifier other_prog, prog; expression impl, metadata; @@ -if (prog) { -other_prog = true; -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +other_prog = other_prog | nir_progress(prog, impl, metadata); @@ expression prog_expr, impl, metadata; identifier prog; @@ -if (prog_expr) { -prog = true; -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +prog = prog | nir_progress(impl_progress, impl, metadata); @@ expression prog_expr, impl, metadata; @@ -if (prog_expr) { -nir_metadata_preserve(impl, metadata); -} else { -nir_metadata_preserve(impl, nir_metadata_all); -} +bool impl_progress = prog_expr; +nir_progress(impl_progress, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); -prog = true; +prog = nir_progress(true, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} -return prog; +return nir_progress(prog, impl, metadata); @@ identifier prog; expression impl, metadata; @@ -if (prog) { -nir_metadata_preserve(impl, metadata); -} +nir_progress(prog, impl, metadata); @@ expression impl; @@ -nir_metadata_preserve(impl, nir_metadata_all); +nir_no_progress(impl); @@ expression impl, metadata; @@ -nir_metadata_preserve(impl, metadata); +nir_progress(true, impl, metadata); squashme! sed -ie 's/progress = progress | /progress |=/g' $(git grep -l 'progress = prog') Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33722>
2025-02-24 15:10:33 -05:00
return nir_progress(true, b->impl, nir_metadata_control_flow);
}
void
agx_nir_fs_prolog(nir_builder *b, const void *key_)
{
const struct agx_fs_prolog_key *key = key_;
b->shader->info.stage = MESA_SHADER_FRAGMENT;
b->shader->info.name = "FS prolog";
/* First, insert code for any emulated features */
if (key->api_sample_mask != 0xff) {
/* Kill samples that are NOT covered by the mask */
nir_demote_samples(b, nir_imm_intN_t(b, key->api_sample_mask ^ 0xff, 16));
b->shader->info.fs.uses_discard = true;
}
if (key->statistics) {
NIR_PASS(_, b->shader, agx_nir_lower_stats_fs);
}
if (key->cull_distance_size) {
NIR_PASS(_, b->shader, agx_nir_lower_cull_distance_fs,
key->cull_distance_size);
}
if (key->polygon_stipple) {
NIR_PASS(_, b->shader, agx_nir_lower_poly_stipple);
}
/* Then, lower the prolog */
NIR_PASS(_, b->shader, agx_nir_lower_discard_zs_emit);
NIR_PASS(_, b->shader, agx_nir_lower_sample_mask);
NIR_PASS(_, b->shader, nir_shader_intrinsics_pass,
lower_non_monolithic_uniforms, nir_metadata_control_flow, NULL);
NIR_PASS(_, b->shader, lower_tests_zs, key->run_zs_tests);
b->shader->info.io_lowered = true;
}