mesa/src/panfrost/lib/pan_fb_nir.c
Erik Faye-Lund b8024d7723
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run
pan/lib: harmonize default-case handling
There's little point in having two unreachable blocks here. Yeah, sure,
in theory we are a little bit safer against forgetting to add a case for
newly introduced enum values here. But the UNREACHABLE macro should
already tell us when we trigger such cases anyway, and the cost here is
really readability.

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40115>
2026-03-05 10:36:58 +00:00

859 lines
29 KiB
C

/*
* Copyright (C) 2026 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
#include "pan_blend.h"
#include "pan_desc.h"
#include "pan_fb.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/pan_nir.h"
static enum glsl_sampler_dim
mali_to_glsl_dim(enum mali_texture_dimension dim)
{
switch (dim) {
case MALI_TEXTURE_DIMENSION_1D: return GLSL_SAMPLER_DIM_1D;
case MALI_TEXTURE_DIMENSION_CUBE:
case MALI_TEXTURE_DIMENSION_2D: return GLSL_SAMPLER_DIM_2D;
case MALI_TEXTURE_DIMENSION_3D: return GLSL_SAMPLER_DIM_3D;
default:
UNREACHABLE("Unknown mali_texture_dimension");
}
}
static enum pan_fb_shader_data_type
data_type_for_format(enum pipe_format format)
{
if (util_format_is_pure_uint(format))
return PAN_FB_SHADER_DATA_TYPE_U32;
else if (util_format_is_pure_sint(format))
return PAN_FB_SHADER_DATA_TYPE_I32;
else
return PAN_FB_SHADER_DATA_TYPE_F32;
}
static nir_alu_type
nir_alu_type_for_data_type(enum pan_fb_shader_data_type data_type)
{
switch (data_type) {
case PAN_FB_SHADER_DATA_TYPE_F32: return nir_type_float32;
case PAN_FB_SHADER_DATA_TYPE_I32: return nir_type_int32;
case PAN_FB_SHADER_DATA_TYPE_U32: return nir_type_uint32;
default:
UNREACHABLE("Invalid pan_fb_shader_data_type");
}
}
static enum pan_fb_shader_op
get_shader_op_for_load(enum pan_fb_load_op op)
{
switch (op) {
case PAN_FB_LOAD_NONE: return PAN_FB_SHADER_DONT_CARE;
case PAN_FB_LOAD_CLEAR: return PAN_FB_SHADER_LOAD_CLEAR;
case PAN_FB_LOAD_IMAGE: return PAN_FB_SHADER_LOAD_IMAGE;
default:
UNREACHABLE("Invalid load op");
}
}
static inline enum pan_fb_shader_op
get_shader_op_for_resolve(enum pan_fb_resolve_op op)
{
switch (op) {
case PAN_FB_RESOLVE_NONE: return PAN_FB_SHADER_PRESERVE;
case PAN_FB_RESOLVE_IMAGE: return PAN_FB_SHADER_LOAD_IMAGE;
case PAN_FB_RESOLVE_RT_0: return PAN_FB_SHADER_COPY_RT_0;
case PAN_FB_RESOLVE_RT_1: return PAN_FB_SHADER_COPY_RT_1;
case PAN_FB_RESOLVE_RT_2: return PAN_FB_SHADER_COPY_RT_2;
case PAN_FB_RESOLVE_RT_3: return PAN_FB_SHADER_COPY_RT_3;
case PAN_FB_RESOLVE_RT_4: return PAN_FB_SHADER_COPY_RT_4;
case PAN_FB_RESOLVE_RT_5: return PAN_FB_SHADER_COPY_RT_5;
case PAN_FB_RESOLVE_RT_6: return PAN_FB_SHADER_COPY_RT_6;
case PAN_FB_RESOLVE_RT_7: return PAN_FB_SHADER_COPY_RT_7;
case PAN_FB_RESOLVE_Z: return PAN_FB_SHADER_COPY_Z;
case PAN_FB_RESOLVE_S: return PAN_FB_SHADER_COPY_S;
default:
UNREACHABLE("Invalid resolve op");
}
}
static enum pan_fb_msaa_copy_op
reduce_msaa_op(enum pan_fb_msaa_copy_op msaa, enum pan_fb_shader_op op,
uint8_t fb_sample_count, uint8_t image_sample_count)
{
if (pan_fb_shader_op_can_discard(op))
return PAN_FB_MSAA_COPY_ALL;
const uint8_t src_sample_count = op == PAN_FB_SHADER_LOAD_IMAGE
? image_sample_count : fb_sample_count;
if (msaa == PAN_FB_MSAA_COPY_ALL)
assert(src_sample_count == fb_sample_count);
if (src_sample_count <= 1)
return PAN_FB_MSAA_COPY_SINGLE;
return msaa;
}
static bool
op_needs_sample_count(enum pan_fb_shader_op op,
enum pan_fb_msaa_copy_op msaa)
{
return msaa == PAN_FB_MSAA_COPY_AVERAGE ||
msaa == PAN_FB_MSAA_COPY_MIN ||
msaa == PAN_FB_MSAA_COPY_MAX;
}
static const struct pan_fb_shader_key_target key_target_dont_care = {
.in_bounds_op = PAN_FB_SHADER_DONT_CARE,
.border_op = PAN_FB_SHADER_DONT_CARE,
};
static const struct pan_fb_shader_key_target key_target_preserve = {
.in_bounds_op = PAN_FB_SHADER_PRESERVE,
.border_op = PAN_FB_SHADER_PRESERVE,
};
static struct pan_fb_shader_key_target
get_key_target(enum pipe_format format,
uint8_t fb_sample_count,
bool has_border,
enum pan_fb_shader_op in_bounds_op,
enum pan_fb_shader_op border_op,
enum pan_fb_msaa_copy_op in_bounds_msaa,
enum pan_fb_msaa_copy_op border_msaa,
bool sample0_only,
const struct pan_image_view *iview)
{
if (format == PIPE_FORMAT_NONE)
return key_target_dont_care;
/* If we have a full framebuffer, there is no border. Set the boarder load
* equal to the in-bounds load so things in the shader fold nicely.
*/
if (!has_border) {
border_op = in_bounds_op;
border_msaa = in_bounds_msaa;
}
if (in_bounds_op == PAN_FB_SHADER_DONT_CARE &&
border_op == PAN_FB_SHADER_DONT_CARE)
return key_target_dont_care;
/* If both load ops can be implemented as a discard, set the target to
* always PRESERVE and we just won't emit any code to do a load.
*
* Importantly, this is true for all load ops except PAN_FB_LOAD_OP_IMAGE.
*/
if (pan_fb_shader_op_can_discard(in_bounds_op) &&
pan_fb_shader_op_can_discard(border_op))
return key_target_preserve;
/* We don't have a bound on this enum so we need a live assert */
enum mali_texture_dimension dim = MALI_TEXTURE_DIMENSION_2D;
bool is_array = false;
uint8_t image_sample_count = 0;
if (in_bounds_op == PAN_FB_SHADER_LOAD_IMAGE ||
border_op == PAN_FB_SHADER_LOAD_IMAGE) {
if (util_format_has_depth(util_format_description(format)))
assert(util_format_get_depth_bits(format) ==
util_format_get_depth_bits(iview->format));
else if (util_format_has_stencil(util_format_description(format)))
assert(util_format_has_stencil(util_format_description(iview->format)));
else
assert(format == iview->format);
dim = iview->dim;
is_array = iview->first_layer != iview->last_layer;
image_sample_count = pan_image_view_get_nr_samples(iview);
}
in_bounds_msaa = reduce_msaa_op(in_bounds_msaa, in_bounds_op,
fb_sample_count, image_sample_count);
border_msaa = reduce_msaa_op(border_msaa, border_op,
fb_sample_count, image_sample_count);
const bool needs_sample_count =
op_needs_sample_count(in_bounds_op, in_bounds_msaa) ||
op_needs_sample_count(border_op, border_msaa);
assert(((unsigned)dim) < (1 << 2));
return (struct pan_fb_shader_key_target) {
.in_bounds_op = in_bounds_op,
.border_op = border_op,
.in_bounds_msaa = in_bounds_msaa,
.border_msaa = border_msaa,
.sample0_only = sample0_only,
.image_dim = dim,
.image_is_array = is_array,
.image_samples_log2 = needs_sample_count
? util_logbase2(image_sample_count) : 0,
.data_type = data_type_for_format(format),
};
}
static struct pan_fb_shader_key_target
get_load_key_target(enum pipe_format format,
uint8_t fb_sample_count,
bool has_border,
const struct pan_fb_load_target *load)
{
return get_key_target(format, fb_sample_count, has_border,
get_shader_op_for_load(load->in_bounds_load),
get_shader_op_for_load(load->border_load),
load->msaa, load->msaa, false, load->iview);
}
bool
GENX(pan_fb_load_shader_key_fill)(struct pan_fb_shader_key *key,
const struct pan_fb_layout *fb,
const struct pan_fb_load *load,
bool zs_prepass)
{
*key = (struct pan_fb_shader_key) { };
const bool has_border = pan_fb_has_partial_tiles(fb);
/* Z/S can only be written from the prepass and color can only be written
* from the non-prepass. Setting everything to zero will cause the shader
* to just not write anything in that case.
*/
if (zs_prepass) {
key->z = get_load_key_target(fb->z_format, fb->sample_count,
has_border, &load->z);
key->s = get_load_key_target(fb->s_format, fb->sample_count,
has_border, &load->s);
return pan_fb_shader_key_target_written(&key->z) ||
pan_fb_shader_key_target_written(&key->s);
} else {
bool needs_shader = false;
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
key->rts[rt] = get_load_key_target(fb->rt_formats[rt],
fb->sample_count,
has_border, &load->rts[rt]);
if (pan_fb_shader_key_target_written(&key->rts[rt]))
needs_shader = true;
}
return needs_shader;
}
}
#if PAN_ARCH >= 6
static struct pan_fb_shader_key_target
get_resolve_key_target(enum pipe_format format,
uint8_t fb_sample_count,
bool has_border,
const struct pan_fb_resolve_target *load)
{
const enum pan_fb_shader_op in_bounds_op =
get_shader_op_for_resolve(load->in_bounds.resolve);
const enum pan_fb_shader_op border_op =
get_shader_op_for_resolve(load->border.resolve);
return get_key_target(format, fb_sample_count, has_border,
in_bounds_op, border_op,
load->in_bounds.msaa, load->border.msaa,
true, load->iview);
}
bool
GENX(pan_fb_resolve_shader_key_fill)(struct pan_fb_shader_key *key,
const struct pan_fb_layout *fb,
const struct pan_fb_resolve *resolve)
{
const bool has_border = pan_fb_has_partial_tiles(fb);
*key = (struct pan_fb_shader_key) {
.z = get_resolve_key_target(fb->z_format, fb->sample_count,
has_border, &resolve->z),
.s = get_resolve_key_target(fb->s_format, fb->sample_count,
has_border, &resolve->s),
.z_format = fb->z_format,
.fb_sample_count = fb->sample_count,
};
bool needs_shader = pan_fb_shader_key_target_written(&key->z) ||
pan_fb_shader_key_target_written(&key->s);
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
key->rts[rt] = get_resolve_key_target(fb->rt_formats[rt],
fb->sample_count,
has_border,
&resolve->rts[rt]);
if (pan_fb_shader_key_target_written(&key->rts[rt]))
needs_shader = true;
}
return needs_shader;
}
#endif
static nir_def *
combine_samples_no_div(nir_builder *b, nir_def **samples, uint8_t sample_count,
const nir_alu_type nir_type,
enum pan_fb_msaa_copy_op msaa)
{
assert(util_is_power_of_two_nonzero(sample_count));
if (sample_count == 1)
return samples[0];
nir_def *lo = combine_samples_no_div(b, samples,
sample_count / 2, nir_type, msaa);
nir_def *hi = combine_samples_no_div(b, samples + (sample_count / 2),
sample_count / 2, nir_type, msaa);
/* We assume that first half always comes before the second so setting the
* cursor after the second half combine instruction will give us the least
* common ancestor.
*/
b->cursor = nir_after_instr(nir_def_instr(hi));
switch (msaa) {
case PAN_FB_MSAA_COPY_AVERAGE:
assert(nir_alu_type_get_base_type(nir_type) == nir_type_float);
return nir_fadd(b, lo, hi);
case PAN_FB_MSAA_COPY_MIN:
switch (nir_alu_type_get_base_type(nir_type)) {
case nir_type_float:
return nir_fmin(b, lo, hi);
case nir_type_uint:
return nir_umin(b, lo, hi);
case nir_type_int:
return nir_imin(b, lo, hi);
default:
UNREACHABLE("Unsupported NIR type");
}
case PAN_FB_MSAA_COPY_MAX:
switch (nir_alu_type_get_base_type(nir_type)) {
case nir_type_float:
return nir_fmax(b, lo, hi);
case nir_type_uint:
return nir_umax(b, lo, hi);
case nir_type_int:
return nir_imax(b, lo, hi);
default:
UNREACHABLE("Unsupported NIR type");
}
default:
UNREACHABLE("Invalid MSAA op");
}
}
static nir_def *
combine_samples(nir_builder *b, nir_def **samples, uint8_t sample_count,
const nir_alu_type nir_type, enum pan_fb_msaa_copy_op msaa)
{
if (msaa == PAN_FB_MSAA_COPY_SAMPLE_0)
return samples[0];
nir_def *val = combine_samples_no_div(b, samples, sample_count,
nir_type, msaa);
if (msaa == PAN_FB_MSAA_COPY_AVERAGE)
val = nir_fdiv_imm(b, val, sample_count);
return val;
}
static nir_def *
build_sample_id(nir_builder *b)
{
b->shader->info.fs.uses_sample_shading = true;
return nir_load_sample_id(b);
}
static nir_def *
build_image_load(nir_builder *b, const nir_alu_type nir_type,
nir_def *pos, gl_frag_result location,
enum pan_fb_msaa_copy_op msaa, uint8_t sample_count,
enum mali_texture_dimension dim, bool is_array)
{
assert(pos->num_components == 3);
switch (dim) {
case MALI_TEXTURE_DIMENSION_1D:
if (is_array)
pos = nir_channels(b, pos, 0b101);
else
pos = nir_channel(b, pos, 0);
break;
case MALI_TEXTURE_DIMENSION_CUBE:
assert(is_array);
break;
case MALI_TEXTURE_DIMENSION_2D:
if (!is_array)
pos = nir_channels(b, pos, 0b011);
break;
case MALI_TEXTURE_DIMENSION_3D:
break;
default:
UNREACHABLE("Unsupported dim");
}
nir_def *val;
switch (msaa) {
case PAN_FB_MSAA_COPY_SINGLE:
val = nir_txf(b, pos,
.texture_index = location,
.dim = mali_to_glsl_dim(dim),
.dest_type = nir_type,
.is_array = is_array);
break;
case PAN_FB_MSAA_COPY_ALL:
case PAN_FB_MSAA_COPY_IDENTICAL:
case PAN_FB_MSAA_COPY_SAMPLE_0: {
assert(dim == MALI_TEXTURE_DIMENSION_2D);
nir_def *sample_id = msaa == PAN_FB_MSAA_COPY_ALL
? build_sample_id(b) : nir_imm_int(b, 0);
val = nir_txf_ms(b, pos, sample_id,
.texture_index = location,
.dim = GLSL_SAMPLER_DIM_MS,
.dest_type = nir_type,
.is_array = is_array);
break;
}
case PAN_FB_MSAA_COPY_AVERAGE:
case PAN_FB_MSAA_COPY_MIN:
case PAN_FB_MSAA_COPY_MAX: {
assert(dim == MALI_TEXTURE_DIMENSION_2D);
assert(sample_count > 0);
nir_def *samples[16];
assert(sample_count <= ARRAY_SIZE(samples));
for (uint32_t s = 0; s < sample_count; s++) {
samples[s] = nir_txf_ms(b, pos, nir_imm_int(b, s),
.texture_index = location,
.dim = GLSL_SAMPLER_DIM_MS,
.dest_type = nir_type,
.is_array = is_array);
}
val = combine_samples(b, samples, sample_count, nir_type, msaa);
break;
}
default:
UNREACHABLE("Invalid copy op");
}
if (location == FRAG_RESULT_DEPTH || location == FRAG_RESULT_STENCIL)
val = nir_channel(b, val, 0);
return val;
}
#if PAN_ARCH >= 6
static uint32_t
get_fb_conversion(nir_alu_type nir_type, enum pipe_format format)
{
#if PAN_ARCH >= 9
return 0;
#else
struct mali_internal_conversion_packed conv;
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
cfg.register_format = pan_blend_type_from_nir(nir_type);
cfg.memory_format = GENX(pan_dithered_format_from_pipe_format)(
format, false);
}
return conv.opaque[0];
#endif
}
static nir_def *
build_single_fb_load(nir_builder *b, const nir_alu_type nir_type,
gl_frag_result location, nir_def *sample,
enum pipe_format z_format)
{
const bool is_zs = location == FRAG_RESULT_DEPTH ||
location == FRAG_RESULT_STENCIL;
unsigned num_components = is_zs ? 1 : 4;
nir_def *conversion;
if (location == FRAG_RESULT_DEPTH) {
conversion = nir_imm_int(b, get_fb_conversion(nir_type, z_format));
} else if (location == FRAG_RESULT_STENCIL) {
conversion = nir_imm_int(b,
get_fb_conversion(nir_type, PIPE_FORMAT_S8_UINT));
} else {
assert(location >= FRAG_RESULT_DATA0);
uint8_t rt = location - FRAG_RESULT_DATA0;
assert(rt < PAN_MAX_RTS);
conversion = nir_unpack_64_2x32_split_y(b,
nir_load_blend_descriptor_pan(b, .base = rt));
}
nir_def *val = nir_load_tile_pan(b,
num_components, 32,
pan_nir_tile_location_sample(b, location, sample),
pan_nir_tile_default_coverage(b),
conversion,
.dest_type = nir_type,
.io_semantics.location = location,
.io_semantics.num_slots = 1);
/* If we loaded the stencil value, the upper 24 bits might contain garbage,
* so we need to mask that off before we do any math on the result.
*/
if (location == FRAG_RESULT_STENCIL)
val = nir_iand_imm(b, val, 0xff);
return val;
}
static nir_def *
build_fb_load(nir_builder *b, const nir_alu_type nir_type,
gl_frag_result location, enum pipe_format z_format,
enum pan_fb_msaa_copy_op msaa, uint8_t fb_sample_count)
{
switch (msaa) {
case PAN_FB_MSAA_COPY_SINGLE:
case PAN_FB_MSAA_COPY_ALL:
case PAN_FB_MSAA_COPY_IDENTICAL:
case PAN_FB_MSAA_COPY_SAMPLE_0: {
nir_def *sample_id = msaa == PAN_FB_MSAA_COPY_ALL
? build_sample_id(b) : nir_imm_int(b, 0);
return build_single_fb_load(b, nir_type, location, sample_id, z_format);
}
case PAN_FB_MSAA_COPY_AVERAGE:
case PAN_FB_MSAA_COPY_MIN:
case PAN_FB_MSAA_COPY_MAX: {
nir_def *samples[16];
assert(fb_sample_count > 0 && fb_sample_count <= ARRAY_SIZE(samples));
for (uint32_t s = 0; s < fb_sample_count; s++) {
samples[s] = build_single_fb_load(b, nir_type, location,
nir_imm_int(b, s), z_format);
}
return combine_samples(b, samples, fb_sample_count, nir_type, msaa);
}
default:
UNREACHABLE("Invalid copy op");
}
}
#endif
static nir_def *
build_load(nir_builder *b, nir_def *pos,
enum pan_fb_shader_op op, enum pan_fb_msaa_copy_op msaa,
gl_frag_result location,
const struct pan_fb_shader_key_target *target,
const struct pan_fb_shader_key *key)
{
const nir_alu_type nir_type = nir_alu_type_for_data_type(target->data_type);
const bool is_zs = location == FRAG_RESULT_DEPTH ||
location == FRAG_RESULT_STENCIL;
switch (op) {
#if PAN_ARCH >= 6
case PAN_FB_SHADER_PRESERVE:
return build_fb_load(b, nir_type, location, key->z_format,
PAN_FB_MSAA_COPY_ALL, key->fb_sample_count);
#endif
case PAN_FB_SHADER_DONT_CARE:
return nir_imm_zero(b, is_zs ? 1 : 4, 32);
case PAN_FB_SHADER_LOAD_CLEAR:
return nir_load_clear_value_pan(b, is_zs ? 1 : 4, 32,
.io_semantics.location = location,
.dest_type = nir_type);
case PAN_FB_SHADER_LOAD_IMAGE:
return build_image_load(b, nir_type, pos, location, msaa,
1 << target->image_samples_log2,
target->image_dim, target->image_is_array);
#if PAN_ARCH >= 6
default: { /* PAN_FB_SHADER_COPY_RT_N */
uint8_t rt = op - PAN_FB_SHADER_COPY_RT_0;
assert(rt < PAN_MAX_RTS);
return build_fb_load(b, nir_type, FRAG_RESULT_DATA0 + rt,
PIPE_FORMAT_NONE, msaa, key->fb_sample_count);
}
case PAN_FB_SHADER_COPY_Z:
return build_fb_load(b, nir_type, FRAG_RESULT_DEPTH,
key->z_format, msaa, key->fb_sample_count);
case PAN_FB_SHADER_COPY_S:
return build_fb_load(b, nir_type, FRAG_RESULT_STENCIL,
PIPE_FORMAT_S8_UINT, msaa, key->fb_sample_count);
#else
default:
UNREACHABLE("Unsupported shader op");
#endif
}
}
static void
build_op(nir_builder *b, nir_def *pos, nir_def *in_bounds,
gl_frag_result location,
const struct pan_fb_shader_key_target *target,
const struct pan_fb_shader_key *key)
{
nir_if *nif_s0 = NULL;
if (target->sample0_only)
nif_s0 = nir_push_if(b, nir_ieq_imm(b, build_sample_id(b), 0));
nir_def *val;
if (target->in_bounds_op == target->border_op &&
target->in_bounds_msaa == target->border_msaa) {
val = build_load(b, pos, target->in_bounds_op, target->in_bounds_msaa,
location, target, key);
} else {
nir_def *in_bounds_val, *border_val;
nir_if *nif = nir_push_if(b, in_bounds);
{
in_bounds_val = build_load(b, pos, target->in_bounds_op,
target->in_bounds_msaa,
location, target, key);
}
nir_push_else(b, nif);
{
border_val = build_load(b, pos, target->border_op,
target->border_msaa,
location, target, key);
}
nir_pop_if(b, nif);
val = nir_if_phi(b, in_bounds_val, border_val);
}
nir_store_output(b, val, nir_imm_int(b, 0),
.base = location, .range = 1,
.write_mask = nir_component_mask(val->num_components),
.src_type = nir_alu_type_for_data_type(target->data_type),
.io_semantics.location = location,
.io_semantics.num_slots = 1);
if (nif_s0)
nir_pop_if(b, nif_s0);
}
struct pan_fb_shader_info {
bool discard_in_bounds;
bool discard_border;
bool sample0_only;
};
static void
gather_target_info(struct pan_fb_shader_info *info,
const struct pan_fb_shader_key_target *target)
{
/* Ignore any targets we don't write */
if (!pan_fb_shader_key_target_written(target))
return;
if (!pan_fb_shader_op_can_discard(target->in_bounds_op))
info->discard_in_bounds = false;
if (!pan_fb_shader_op_can_discard(target->border_op))
info->discard_border = false;
if (!target->sample0_only)
info->sample0_only = false;
}
static struct pan_fb_shader_info
gather_shader_info(const struct pan_fb_shader_key *key)
{
struct pan_fb_shader_info info = {
.discard_in_bounds = true,
.discard_border = true,
.sample0_only = true,
};
for (unsigned rt = 0; rt < PAN_MAX_RTS; rt++)
gather_target_info(&info, &key->rts[rt]);
gather_target_info(&info, &key->z);
gather_target_info(&info, &key->s);
return info;
}
static bool
opt_sample0_only_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_sample_id:
b->cursor = nir_before_instr(&intr->instr);
nir_def_replace(&intr->def, nir_imm_int(b, 0));
return true;
case nir_intrinsic_load_cumulative_coverage_pan:
b->cursor = nir_before_instr(&intr->instr);
nir_def_replace(&intr->def, nir_imm_int(b, 1));
return true;
default:
return false;
}
}
nir_shader *
GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
const struct nir_shader_compiler_options *nir_options)
{
nir_builder builder = nir_builder_init_simple_shader(
MESA_SHADER_FRAGMENT, nir_options, "pan-fb-load");
nir_builder *b = &builder;
const struct pan_fb_shader_info info = gather_shader_info(key);
/* We shouldn't even be building a shader at all in this case but, on the
* off chance that we do, build one that just discards.
*/
if (info.discard_in_bounds && info.discard_border) {
nir_terminate(b);
return builder.shader;
}
nir_def *pos_xy = nir_load_pixel_coord(b);
nir_def *pos_x = nir_u2u32(b, nir_channel(b, pos_xy, 0));
nir_def *pos_y = nir_u2u32(b, nir_channel(b, pos_xy, 1));
nir_def *pos = nir_vec3(b, pos_x, pos_y, nir_load_layer_id(b));
nir_def *ra = nir_load_fb_render_area_pan(b);
nir_def *ra_min_xy = nir_channels(b, ra, 0b0011);
nir_def *ra_max_xy = nir_channels(b, ra, 0b1100);
nir_def *in_bounds =
nir_ball(b, nir_iand(b, nir_uge(b, pos_xy, ra_min_xy),
nir_uge(b, ra_max_xy, pos_xy)));
if (PAN_ARCH >= 6 && !pan_fb_shader_key_target_written(&key->z) &&
!pan_fb_shader_key_target_written(&key->s)) {
/* We assume that the framebuffer will clear if either load op is set to
* clear. For color-only on bifrost, we can do a bit better by emitting
* BLEND instructions directly only writing the tile buffer if we're
* actually loading the image.
*/
for (unsigned rt = 0; rt < PAN_MAX_RTS; rt++) {
const struct pan_fb_shader_key_target *target = &key->rts[rt];
const gl_frag_result location = FRAG_RESULT_DATA0 + rt;
if (!pan_fb_shader_key_target_written(target))
continue;
nir_def *coverage = nir_load_cumulative_coverage_pan(b);
nir_def *z1 = nir_imm_int(b, 0);
nir_def *u4 = nir_undef(b, 4, 32);
nir_if *nif_s0 = NULL;
if (target->sample0_only)
nif_s0 = nir_push_if(b, nir_ieq_imm(b, build_sample_id(b), 0));
nir_def *color;
if (target->in_bounds_op == target->border_op &&
target->in_bounds_msaa == target->border_msaa) {
color = build_load(b, pos, target->in_bounds_op,
target->in_bounds_msaa,
location, target, key);
} else {
nir_def *ib_color, *bd_color, *ib_cov, *bd_cov;
nir_if *nif = nir_push_if(b, in_bounds);
{
if (pan_fb_shader_op_can_discard(target->in_bounds_op)) {
ib_color = u4;
ib_cov = z1;
} else {
ib_color = build_load(b, pos, target->in_bounds_op,
target->in_bounds_msaa,
location, target, key);
ib_cov = coverage;
}
}
nir_push_else(b, nif);
{
if (pan_fb_shader_op_can_discard(target->border_op)) {
bd_color = u4;
bd_cov = z1;
} else {
bd_color = build_load(b, pos, target->border_op,
target->border_msaa,
location, target, key);
bd_cov = coverage;
}
}
nir_pop_if(b, nif);
color = nir_if_phi(b, ib_color, bd_color);
coverage = nir_if_phi(b, ib_cov, bd_cov);
}
if (nif_s0) {
nir_pop_if(b, nif_s0);
color = nir_if_phi(b, color, u4);
coverage = nir_if_phi(b, coverage, z1);
}
const nir_alu_type nir_type =
nir_alu_type_for_data_type(target->data_type);
nir_def *blend = nir_load_blend_descriptor_pan(b, .base = rt);
nir_blend_pan(b, coverage, blend, color,
.src_type = nir_type,
.io_semantics.location = location,
.io_semantics.num_slots = 1);
}
} else {
if (info.discard_in_bounds) {
nir_terminate_if(b, in_bounds);
in_bounds = nir_imm_false(b);
} else if (info.discard_border) {
nir_terminate_if(b, nir_inot(b, in_bounds));
in_bounds = nir_imm_true(b);
}
if (info.sample0_only) {
/* The little optimization we do at the end will take care of most
* of what we need for sample0-only but we also need to write
* SAMPLE_MASK so pan_nir_lower_fs_outputs will do the right thing.
*/
nir_store_output(b, nir_imm_int(b, 1), nir_imm_int(b, 0),
.base = FRAG_RESULT_SAMPLE_MASK, .range = 1,
.write_mask = 1,
.src_type = nir_type_uint32,
.io_semantics.location = FRAG_RESULT_SAMPLE_MASK,
.io_semantics.num_slots = 1);
}
if (pan_fb_shader_key_target_written(&key->z)) {
assert(key->z.sample0_only == info.sample0_only);
build_op(b, pos, in_bounds, FRAG_RESULT_DEPTH, &key->z, key);
}
if (pan_fb_shader_key_target_written(&key->s)) {
assert(key->s.sample0_only == info.sample0_only);
build_op(b, pos, in_bounds, FRAG_RESULT_STENCIL, &key->s, key);
}
for (unsigned rt = 0; rt < PAN_MAX_RTS; rt++) {
if (pan_fb_shader_key_target_written(&key->rts[rt])) {
assert(key->rts[rt].sample0_only == info.sample0_only);
build_op(b, pos, in_bounds, FRAG_RESULT_DATA0 + rt,
&key->rts[rt], key);
}
}
}
if (info.sample0_only) {
NIR_PASS(_, b->shader, nir_shader_intrinsics_pass,
opt_sample0_only_intr, nir_metadata_control_flow, NULL);
b->shader->info.fs.uses_sample_shading = false;
}
return builder.shader;
}