mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 12:48:13 +02:00
There's little point in having two unreachable blocks here. Yeah, sure, in theory we are a little bit safer against forgetting to add a case for newly introduced enum values here. But the UNREACHABLE macro should already tell us when we trigger such cases anyway, and the cost here is really readability. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40115>
859 lines
29 KiB
C
859 lines
29 KiB
C
/*
|
|
* Copyright (C) 2026 Collabora, Ltd.
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "pan_blend.h"
|
|
#include "pan_desc.h"
|
|
#include "pan_fb.h"
|
|
|
|
#include "compiler/nir/nir.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "compiler/pan_nir.h"
|
|
|
|
static enum glsl_sampler_dim
|
|
mali_to_glsl_dim(enum mali_texture_dimension dim)
|
|
{
|
|
switch (dim) {
|
|
case MALI_TEXTURE_DIMENSION_1D: return GLSL_SAMPLER_DIM_1D;
|
|
case MALI_TEXTURE_DIMENSION_CUBE:
|
|
case MALI_TEXTURE_DIMENSION_2D: return GLSL_SAMPLER_DIM_2D;
|
|
case MALI_TEXTURE_DIMENSION_3D: return GLSL_SAMPLER_DIM_3D;
|
|
default:
|
|
UNREACHABLE("Unknown mali_texture_dimension");
|
|
}
|
|
}
|
|
|
|
static enum pan_fb_shader_data_type
|
|
data_type_for_format(enum pipe_format format)
|
|
{
|
|
if (util_format_is_pure_uint(format))
|
|
return PAN_FB_SHADER_DATA_TYPE_U32;
|
|
else if (util_format_is_pure_sint(format))
|
|
return PAN_FB_SHADER_DATA_TYPE_I32;
|
|
else
|
|
return PAN_FB_SHADER_DATA_TYPE_F32;
|
|
}
|
|
|
|
static nir_alu_type
|
|
nir_alu_type_for_data_type(enum pan_fb_shader_data_type data_type)
|
|
{
|
|
switch (data_type) {
|
|
case PAN_FB_SHADER_DATA_TYPE_F32: return nir_type_float32;
|
|
case PAN_FB_SHADER_DATA_TYPE_I32: return nir_type_int32;
|
|
case PAN_FB_SHADER_DATA_TYPE_U32: return nir_type_uint32;
|
|
default:
|
|
UNREACHABLE("Invalid pan_fb_shader_data_type");
|
|
}
|
|
}
|
|
|
|
static enum pan_fb_shader_op
|
|
get_shader_op_for_load(enum pan_fb_load_op op)
|
|
{
|
|
switch (op) {
|
|
case PAN_FB_LOAD_NONE: return PAN_FB_SHADER_DONT_CARE;
|
|
case PAN_FB_LOAD_CLEAR: return PAN_FB_SHADER_LOAD_CLEAR;
|
|
case PAN_FB_LOAD_IMAGE: return PAN_FB_SHADER_LOAD_IMAGE;
|
|
default:
|
|
UNREACHABLE("Invalid load op");
|
|
}
|
|
}
|
|
|
|
static inline enum pan_fb_shader_op
|
|
get_shader_op_for_resolve(enum pan_fb_resolve_op op)
|
|
{
|
|
switch (op) {
|
|
case PAN_FB_RESOLVE_NONE: return PAN_FB_SHADER_PRESERVE;
|
|
case PAN_FB_RESOLVE_IMAGE: return PAN_FB_SHADER_LOAD_IMAGE;
|
|
case PAN_FB_RESOLVE_RT_0: return PAN_FB_SHADER_COPY_RT_0;
|
|
case PAN_FB_RESOLVE_RT_1: return PAN_FB_SHADER_COPY_RT_1;
|
|
case PAN_FB_RESOLVE_RT_2: return PAN_FB_SHADER_COPY_RT_2;
|
|
case PAN_FB_RESOLVE_RT_3: return PAN_FB_SHADER_COPY_RT_3;
|
|
case PAN_FB_RESOLVE_RT_4: return PAN_FB_SHADER_COPY_RT_4;
|
|
case PAN_FB_RESOLVE_RT_5: return PAN_FB_SHADER_COPY_RT_5;
|
|
case PAN_FB_RESOLVE_RT_6: return PAN_FB_SHADER_COPY_RT_6;
|
|
case PAN_FB_RESOLVE_RT_7: return PAN_FB_SHADER_COPY_RT_7;
|
|
case PAN_FB_RESOLVE_Z: return PAN_FB_SHADER_COPY_Z;
|
|
case PAN_FB_RESOLVE_S: return PAN_FB_SHADER_COPY_S;
|
|
default:
|
|
UNREACHABLE("Invalid resolve op");
|
|
}
|
|
}
|
|
|
|
static enum pan_fb_msaa_copy_op
|
|
reduce_msaa_op(enum pan_fb_msaa_copy_op msaa, enum pan_fb_shader_op op,
|
|
uint8_t fb_sample_count, uint8_t image_sample_count)
|
|
{
|
|
if (pan_fb_shader_op_can_discard(op))
|
|
return PAN_FB_MSAA_COPY_ALL;
|
|
|
|
const uint8_t src_sample_count = op == PAN_FB_SHADER_LOAD_IMAGE
|
|
? image_sample_count : fb_sample_count;
|
|
if (msaa == PAN_FB_MSAA_COPY_ALL)
|
|
assert(src_sample_count == fb_sample_count);
|
|
|
|
if (src_sample_count <= 1)
|
|
return PAN_FB_MSAA_COPY_SINGLE;
|
|
|
|
return msaa;
|
|
}
|
|
|
|
static bool
|
|
op_needs_sample_count(enum pan_fb_shader_op op,
|
|
enum pan_fb_msaa_copy_op msaa)
|
|
{
|
|
return msaa == PAN_FB_MSAA_COPY_AVERAGE ||
|
|
msaa == PAN_FB_MSAA_COPY_MIN ||
|
|
msaa == PAN_FB_MSAA_COPY_MAX;
|
|
}
|
|
|
|
static const struct pan_fb_shader_key_target key_target_dont_care = {
|
|
.in_bounds_op = PAN_FB_SHADER_DONT_CARE,
|
|
.border_op = PAN_FB_SHADER_DONT_CARE,
|
|
};
|
|
|
|
static const struct pan_fb_shader_key_target key_target_preserve = {
|
|
.in_bounds_op = PAN_FB_SHADER_PRESERVE,
|
|
.border_op = PAN_FB_SHADER_PRESERVE,
|
|
};
|
|
|
|
static struct pan_fb_shader_key_target
|
|
get_key_target(enum pipe_format format,
|
|
uint8_t fb_sample_count,
|
|
bool has_border,
|
|
enum pan_fb_shader_op in_bounds_op,
|
|
enum pan_fb_shader_op border_op,
|
|
enum pan_fb_msaa_copy_op in_bounds_msaa,
|
|
enum pan_fb_msaa_copy_op border_msaa,
|
|
bool sample0_only,
|
|
const struct pan_image_view *iview)
|
|
{
|
|
if (format == PIPE_FORMAT_NONE)
|
|
return key_target_dont_care;
|
|
|
|
/* If we have a full framebuffer, there is no border. Set the boarder load
|
|
* equal to the in-bounds load so things in the shader fold nicely.
|
|
*/
|
|
if (!has_border) {
|
|
border_op = in_bounds_op;
|
|
border_msaa = in_bounds_msaa;
|
|
}
|
|
|
|
if (in_bounds_op == PAN_FB_SHADER_DONT_CARE &&
|
|
border_op == PAN_FB_SHADER_DONT_CARE)
|
|
return key_target_dont_care;
|
|
|
|
/* If both load ops can be implemented as a discard, set the target to
|
|
* always PRESERVE and we just won't emit any code to do a load.
|
|
*
|
|
* Importantly, this is true for all load ops except PAN_FB_LOAD_OP_IMAGE.
|
|
*/
|
|
if (pan_fb_shader_op_can_discard(in_bounds_op) &&
|
|
pan_fb_shader_op_can_discard(border_op))
|
|
return key_target_preserve;
|
|
|
|
/* We don't have a bound on this enum so we need a live assert */
|
|
enum mali_texture_dimension dim = MALI_TEXTURE_DIMENSION_2D;
|
|
bool is_array = false;
|
|
uint8_t image_sample_count = 0;
|
|
if (in_bounds_op == PAN_FB_SHADER_LOAD_IMAGE ||
|
|
border_op == PAN_FB_SHADER_LOAD_IMAGE) {
|
|
if (util_format_has_depth(util_format_description(format)))
|
|
assert(util_format_get_depth_bits(format) ==
|
|
util_format_get_depth_bits(iview->format));
|
|
else if (util_format_has_stencil(util_format_description(format)))
|
|
assert(util_format_has_stencil(util_format_description(iview->format)));
|
|
else
|
|
assert(format == iview->format);
|
|
|
|
dim = iview->dim;
|
|
is_array = iview->first_layer != iview->last_layer;
|
|
image_sample_count = pan_image_view_get_nr_samples(iview);
|
|
}
|
|
|
|
in_bounds_msaa = reduce_msaa_op(in_bounds_msaa, in_bounds_op,
|
|
fb_sample_count, image_sample_count);
|
|
border_msaa = reduce_msaa_op(border_msaa, border_op,
|
|
fb_sample_count, image_sample_count);
|
|
|
|
const bool needs_sample_count =
|
|
op_needs_sample_count(in_bounds_op, in_bounds_msaa) ||
|
|
op_needs_sample_count(border_op, border_msaa);
|
|
|
|
assert(((unsigned)dim) < (1 << 2));
|
|
|
|
return (struct pan_fb_shader_key_target) {
|
|
.in_bounds_op = in_bounds_op,
|
|
.border_op = border_op,
|
|
.in_bounds_msaa = in_bounds_msaa,
|
|
.border_msaa = border_msaa,
|
|
.sample0_only = sample0_only,
|
|
.image_dim = dim,
|
|
.image_is_array = is_array,
|
|
.image_samples_log2 = needs_sample_count
|
|
? util_logbase2(image_sample_count) : 0,
|
|
.data_type = data_type_for_format(format),
|
|
};
|
|
}
|
|
|
|
static struct pan_fb_shader_key_target
|
|
get_load_key_target(enum pipe_format format,
|
|
uint8_t fb_sample_count,
|
|
bool has_border,
|
|
const struct pan_fb_load_target *load)
|
|
{
|
|
return get_key_target(format, fb_sample_count, has_border,
|
|
get_shader_op_for_load(load->in_bounds_load),
|
|
get_shader_op_for_load(load->border_load),
|
|
load->msaa, load->msaa, false, load->iview);
|
|
}
|
|
|
|
bool
|
|
GENX(pan_fb_load_shader_key_fill)(struct pan_fb_shader_key *key,
|
|
const struct pan_fb_layout *fb,
|
|
const struct pan_fb_load *load,
|
|
bool zs_prepass)
|
|
{
|
|
*key = (struct pan_fb_shader_key) { };
|
|
|
|
const bool has_border = pan_fb_has_partial_tiles(fb);
|
|
|
|
/* Z/S can only be written from the prepass and color can only be written
|
|
* from the non-prepass. Setting everything to zero will cause the shader
|
|
* to just not write anything in that case.
|
|
*/
|
|
if (zs_prepass) {
|
|
key->z = get_load_key_target(fb->z_format, fb->sample_count,
|
|
has_border, &load->z);
|
|
key->s = get_load_key_target(fb->s_format, fb->sample_count,
|
|
has_border, &load->s);
|
|
return pan_fb_shader_key_target_written(&key->z) ||
|
|
pan_fb_shader_key_target_written(&key->s);
|
|
} else {
|
|
bool needs_shader = false;
|
|
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
|
|
key->rts[rt] = get_load_key_target(fb->rt_formats[rt],
|
|
fb->sample_count,
|
|
has_border, &load->rts[rt]);
|
|
if (pan_fb_shader_key_target_written(&key->rts[rt]))
|
|
needs_shader = true;
|
|
}
|
|
return needs_shader;
|
|
}
|
|
}
|
|
|
|
#if PAN_ARCH >= 6
|
|
static struct pan_fb_shader_key_target
|
|
get_resolve_key_target(enum pipe_format format,
|
|
uint8_t fb_sample_count,
|
|
bool has_border,
|
|
const struct pan_fb_resolve_target *load)
|
|
{
|
|
const enum pan_fb_shader_op in_bounds_op =
|
|
get_shader_op_for_resolve(load->in_bounds.resolve);
|
|
const enum pan_fb_shader_op border_op =
|
|
get_shader_op_for_resolve(load->border.resolve);
|
|
return get_key_target(format, fb_sample_count, has_border,
|
|
in_bounds_op, border_op,
|
|
load->in_bounds.msaa, load->border.msaa,
|
|
true, load->iview);
|
|
}
|
|
|
|
bool
|
|
GENX(pan_fb_resolve_shader_key_fill)(struct pan_fb_shader_key *key,
|
|
const struct pan_fb_layout *fb,
|
|
const struct pan_fb_resolve *resolve)
|
|
{
|
|
const bool has_border = pan_fb_has_partial_tiles(fb);
|
|
|
|
*key = (struct pan_fb_shader_key) {
|
|
.z = get_resolve_key_target(fb->z_format, fb->sample_count,
|
|
has_border, &resolve->z),
|
|
.s = get_resolve_key_target(fb->s_format, fb->sample_count,
|
|
has_border, &resolve->s),
|
|
.z_format = fb->z_format,
|
|
.fb_sample_count = fb->sample_count,
|
|
};
|
|
bool needs_shader = pan_fb_shader_key_target_written(&key->z) ||
|
|
pan_fb_shader_key_target_written(&key->s);
|
|
|
|
for (unsigned rt = 0; rt < fb->rt_count; rt++) {
|
|
key->rts[rt] = get_resolve_key_target(fb->rt_formats[rt],
|
|
fb->sample_count,
|
|
has_border,
|
|
&resolve->rts[rt]);
|
|
if (pan_fb_shader_key_target_written(&key->rts[rt]))
|
|
needs_shader = true;
|
|
}
|
|
|
|
return needs_shader;
|
|
}
|
|
#endif
|
|
|
|
static nir_def *
|
|
combine_samples_no_div(nir_builder *b, nir_def **samples, uint8_t sample_count,
|
|
const nir_alu_type nir_type,
|
|
enum pan_fb_msaa_copy_op msaa)
|
|
{
|
|
assert(util_is_power_of_two_nonzero(sample_count));
|
|
if (sample_count == 1)
|
|
return samples[0];
|
|
|
|
nir_def *lo = combine_samples_no_div(b, samples,
|
|
sample_count / 2, nir_type, msaa);
|
|
nir_def *hi = combine_samples_no_div(b, samples + (sample_count / 2),
|
|
sample_count / 2, nir_type, msaa);
|
|
|
|
/* We assume that first half always comes before the second so setting the
|
|
* cursor after the second half combine instruction will give us the least
|
|
* common ancestor.
|
|
*/
|
|
b->cursor = nir_after_instr(nir_def_instr(hi));
|
|
|
|
switch (msaa) {
|
|
case PAN_FB_MSAA_COPY_AVERAGE:
|
|
assert(nir_alu_type_get_base_type(nir_type) == nir_type_float);
|
|
return nir_fadd(b, lo, hi);
|
|
|
|
case PAN_FB_MSAA_COPY_MIN:
|
|
switch (nir_alu_type_get_base_type(nir_type)) {
|
|
case nir_type_float:
|
|
return nir_fmin(b, lo, hi);
|
|
case nir_type_uint:
|
|
return nir_umin(b, lo, hi);
|
|
case nir_type_int:
|
|
return nir_imin(b, lo, hi);
|
|
default:
|
|
UNREACHABLE("Unsupported NIR type");
|
|
}
|
|
|
|
case PAN_FB_MSAA_COPY_MAX:
|
|
switch (nir_alu_type_get_base_type(nir_type)) {
|
|
case nir_type_float:
|
|
return nir_fmax(b, lo, hi);
|
|
case nir_type_uint:
|
|
return nir_umax(b, lo, hi);
|
|
case nir_type_int:
|
|
return nir_imax(b, lo, hi);
|
|
default:
|
|
UNREACHABLE("Unsupported NIR type");
|
|
}
|
|
|
|
default:
|
|
UNREACHABLE("Invalid MSAA op");
|
|
}
|
|
}
|
|
|
|
static nir_def *
|
|
combine_samples(nir_builder *b, nir_def **samples, uint8_t sample_count,
|
|
const nir_alu_type nir_type, enum pan_fb_msaa_copy_op msaa)
|
|
{
|
|
if (msaa == PAN_FB_MSAA_COPY_SAMPLE_0)
|
|
return samples[0];
|
|
|
|
nir_def *val = combine_samples_no_div(b, samples, sample_count,
|
|
nir_type, msaa);
|
|
if (msaa == PAN_FB_MSAA_COPY_AVERAGE)
|
|
val = nir_fdiv_imm(b, val, sample_count);
|
|
|
|
return val;
|
|
}
|
|
|
|
static nir_def *
|
|
build_sample_id(nir_builder *b)
|
|
{
|
|
b->shader->info.fs.uses_sample_shading = true;
|
|
return nir_load_sample_id(b);
|
|
}
|
|
|
|
static nir_def *
|
|
build_image_load(nir_builder *b, const nir_alu_type nir_type,
|
|
nir_def *pos, gl_frag_result location,
|
|
enum pan_fb_msaa_copy_op msaa, uint8_t sample_count,
|
|
enum mali_texture_dimension dim, bool is_array)
|
|
{
|
|
assert(pos->num_components == 3);
|
|
switch (dim) {
|
|
case MALI_TEXTURE_DIMENSION_1D:
|
|
if (is_array)
|
|
pos = nir_channels(b, pos, 0b101);
|
|
else
|
|
pos = nir_channel(b, pos, 0);
|
|
break;
|
|
|
|
case MALI_TEXTURE_DIMENSION_CUBE:
|
|
assert(is_array);
|
|
break;
|
|
|
|
case MALI_TEXTURE_DIMENSION_2D:
|
|
if (!is_array)
|
|
pos = nir_channels(b, pos, 0b011);
|
|
break;
|
|
|
|
case MALI_TEXTURE_DIMENSION_3D:
|
|
break;
|
|
|
|
default:
|
|
UNREACHABLE("Unsupported dim");
|
|
}
|
|
|
|
nir_def *val;
|
|
switch (msaa) {
|
|
case PAN_FB_MSAA_COPY_SINGLE:
|
|
val = nir_txf(b, pos,
|
|
.texture_index = location,
|
|
.dim = mali_to_glsl_dim(dim),
|
|
.dest_type = nir_type,
|
|
.is_array = is_array);
|
|
break;
|
|
|
|
case PAN_FB_MSAA_COPY_ALL:
|
|
case PAN_FB_MSAA_COPY_IDENTICAL:
|
|
case PAN_FB_MSAA_COPY_SAMPLE_0: {
|
|
assert(dim == MALI_TEXTURE_DIMENSION_2D);
|
|
|
|
nir_def *sample_id = msaa == PAN_FB_MSAA_COPY_ALL
|
|
? build_sample_id(b) : nir_imm_int(b, 0);
|
|
|
|
val = nir_txf_ms(b, pos, sample_id,
|
|
.texture_index = location,
|
|
.dim = GLSL_SAMPLER_DIM_MS,
|
|
.dest_type = nir_type,
|
|
.is_array = is_array);
|
|
break;
|
|
}
|
|
|
|
case PAN_FB_MSAA_COPY_AVERAGE:
|
|
case PAN_FB_MSAA_COPY_MIN:
|
|
case PAN_FB_MSAA_COPY_MAX: {
|
|
assert(dim == MALI_TEXTURE_DIMENSION_2D);
|
|
assert(sample_count > 0);
|
|
|
|
nir_def *samples[16];
|
|
assert(sample_count <= ARRAY_SIZE(samples));
|
|
for (uint32_t s = 0; s < sample_count; s++) {
|
|
samples[s] = nir_txf_ms(b, pos, nir_imm_int(b, s),
|
|
.texture_index = location,
|
|
.dim = GLSL_SAMPLER_DIM_MS,
|
|
.dest_type = nir_type,
|
|
.is_array = is_array);
|
|
}
|
|
val = combine_samples(b, samples, sample_count, nir_type, msaa);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
UNREACHABLE("Invalid copy op");
|
|
}
|
|
|
|
if (location == FRAG_RESULT_DEPTH || location == FRAG_RESULT_STENCIL)
|
|
val = nir_channel(b, val, 0);
|
|
|
|
return val;
|
|
}
|
|
|
|
#if PAN_ARCH >= 6
|
|
static uint32_t
|
|
get_fb_conversion(nir_alu_type nir_type, enum pipe_format format)
|
|
{
|
|
#if PAN_ARCH >= 9
|
|
return 0;
|
|
#else
|
|
struct mali_internal_conversion_packed conv;
|
|
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
|
cfg.register_format = pan_blend_type_from_nir(nir_type);
|
|
cfg.memory_format = GENX(pan_dithered_format_from_pipe_format)(
|
|
format, false);
|
|
}
|
|
return conv.opaque[0];
|
|
#endif
|
|
}
|
|
|
|
static nir_def *
|
|
build_single_fb_load(nir_builder *b, const nir_alu_type nir_type,
|
|
gl_frag_result location, nir_def *sample,
|
|
enum pipe_format z_format)
|
|
{
|
|
const bool is_zs = location == FRAG_RESULT_DEPTH ||
|
|
location == FRAG_RESULT_STENCIL;
|
|
unsigned num_components = is_zs ? 1 : 4;
|
|
|
|
nir_def *conversion;
|
|
if (location == FRAG_RESULT_DEPTH) {
|
|
conversion = nir_imm_int(b, get_fb_conversion(nir_type, z_format));
|
|
} else if (location == FRAG_RESULT_STENCIL) {
|
|
conversion = nir_imm_int(b,
|
|
get_fb_conversion(nir_type, PIPE_FORMAT_S8_UINT));
|
|
} else {
|
|
assert(location >= FRAG_RESULT_DATA0);
|
|
uint8_t rt = location - FRAG_RESULT_DATA0;
|
|
assert(rt < PAN_MAX_RTS);
|
|
|
|
conversion = nir_unpack_64_2x32_split_y(b,
|
|
nir_load_blend_descriptor_pan(b, .base = rt));
|
|
}
|
|
|
|
nir_def *val = nir_load_tile_pan(b,
|
|
num_components, 32,
|
|
pan_nir_tile_location_sample(b, location, sample),
|
|
pan_nir_tile_default_coverage(b),
|
|
conversion,
|
|
.dest_type = nir_type,
|
|
.io_semantics.location = location,
|
|
.io_semantics.num_slots = 1);
|
|
|
|
/* If we loaded the stencil value, the upper 24 bits might contain garbage,
|
|
* so we need to mask that off before we do any math on the result.
|
|
*/
|
|
if (location == FRAG_RESULT_STENCIL)
|
|
val = nir_iand_imm(b, val, 0xff);
|
|
|
|
return val;
|
|
}
|
|
|
|
static nir_def *
|
|
build_fb_load(nir_builder *b, const nir_alu_type nir_type,
|
|
gl_frag_result location, enum pipe_format z_format,
|
|
enum pan_fb_msaa_copy_op msaa, uint8_t fb_sample_count)
|
|
{
|
|
switch (msaa) {
|
|
case PAN_FB_MSAA_COPY_SINGLE:
|
|
case PAN_FB_MSAA_COPY_ALL:
|
|
case PAN_FB_MSAA_COPY_IDENTICAL:
|
|
case PAN_FB_MSAA_COPY_SAMPLE_0: {
|
|
nir_def *sample_id = msaa == PAN_FB_MSAA_COPY_ALL
|
|
? build_sample_id(b) : nir_imm_int(b, 0);
|
|
|
|
return build_single_fb_load(b, nir_type, location, sample_id, z_format);
|
|
}
|
|
|
|
case PAN_FB_MSAA_COPY_AVERAGE:
|
|
case PAN_FB_MSAA_COPY_MIN:
|
|
case PAN_FB_MSAA_COPY_MAX: {
|
|
nir_def *samples[16];
|
|
assert(fb_sample_count > 0 && fb_sample_count <= ARRAY_SIZE(samples));
|
|
for (uint32_t s = 0; s < fb_sample_count; s++) {
|
|
samples[s] = build_single_fb_load(b, nir_type, location,
|
|
nir_imm_int(b, s), z_format);
|
|
}
|
|
return combine_samples(b, samples, fb_sample_count, nir_type, msaa);
|
|
}
|
|
|
|
default:
|
|
UNREACHABLE("Invalid copy op");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static nir_def *
|
|
build_load(nir_builder *b, nir_def *pos,
|
|
enum pan_fb_shader_op op, enum pan_fb_msaa_copy_op msaa,
|
|
gl_frag_result location,
|
|
const struct pan_fb_shader_key_target *target,
|
|
const struct pan_fb_shader_key *key)
|
|
{
|
|
const nir_alu_type nir_type = nir_alu_type_for_data_type(target->data_type);
|
|
const bool is_zs = location == FRAG_RESULT_DEPTH ||
|
|
location == FRAG_RESULT_STENCIL;
|
|
|
|
switch (op) {
|
|
#if PAN_ARCH >= 6
|
|
case PAN_FB_SHADER_PRESERVE:
|
|
return build_fb_load(b, nir_type, location, key->z_format,
|
|
PAN_FB_MSAA_COPY_ALL, key->fb_sample_count);
|
|
#endif
|
|
|
|
case PAN_FB_SHADER_DONT_CARE:
|
|
return nir_imm_zero(b, is_zs ? 1 : 4, 32);
|
|
|
|
case PAN_FB_SHADER_LOAD_CLEAR:
|
|
return nir_load_clear_value_pan(b, is_zs ? 1 : 4, 32,
|
|
.io_semantics.location = location,
|
|
.dest_type = nir_type);
|
|
|
|
case PAN_FB_SHADER_LOAD_IMAGE:
|
|
return build_image_load(b, nir_type, pos, location, msaa,
|
|
1 << target->image_samples_log2,
|
|
target->image_dim, target->image_is_array);
|
|
|
|
#if PAN_ARCH >= 6
|
|
default: { /* PAN_FB_SHADER_COPY_RT_N */
|
|
uint8_t rt = op - PAN_FB_SHADER_COPY_RT_0;
|
|
assert(rt < PAN_MAX_RTS);
|
|
return build_fb_load(b, nir_type, FRAG_RESULT_DATA0 + rt,
|
|
PIPE_FORMAT_NONE, msaa, key->fb_sample_count);
|
|
}
|
|
|
|
case PAN_FB_SHADER_COPY_Z:
|
|
return build_fb_load(b, nir_type, FRAG_RESULT_DEPTH,
|
|
key->z_format, msaa, key->fb_sample_count);
|
|
|
|
case PAN_FB_SHADER_COPY_S:
|
|
return build_fb_load(b, nir_type, FRAG_RESULT_STENCIL,
|
|
PIPE_FORMAT_S8_UINT, msaa, key->fb_sample_count);
|
|
#else
|
|
default:
|
|
UNREACHABLE("Unsupported shader op");
|
|
#endif
|
|
}
|
|
}
|
|
|
|
static void
|
|
build_op(nir_builder *b, nir_def *pos, nir_def *in_bounds,
|
|
gl_frag_result location,
|
|
const struct pan_fb_shader_key_target *target,
|
|
const struct pan_fb_shader_key *key)
|
|
{
|
|
nir_if *nif_s0 = NULL;
|
|
if (target->sample0_only)
|
|
nif_s0 = nir_push_if(b, nir_ieq_imm(b, build_sample_id(b), 0));
|
|
|
|
nir_def *val;
|
|
if (target->in_bounds_op == target->border_op &&
|
|
target->in_bounds_msaa == target->border_msaa) {
|
|
val = build_load(b, pos, target->in_bounds_op, target->in_bounds_msaa,
|
|
location, target, key);
|
|
} else {
|
|
nir_def *in_bounds_val, *border_val;
|
|
nir_if *nif = nir_push_if(b, in_bounds);
|
|
{
|
|
in_bounds_val = build_load(b, pos, target->in_bounds_op,
|
|
target->in_bounds_msaa,
|
|
location, target, key);
|
|
}
|
|
nir_push_else(b, nif);
|
|
{
|
|
border_val = build_load(b, pos, target->border_op,
|
|
target->border_msaa,
|
|
location, target, key);
|
|
}
|
|
nir_pop_if(b, nif);
|
|
val = nir_if_phi(b, in_bounds_val, border_val);
|
|
}
|
|
|
|
nir_store_output(b, val, nir_imm_int(b, 0),
|
|
.base = location, .range = 1,
|
|
.write_mask = nir_component_mask(val->num_components),
|
|
.src_type = nir_alu_type_for_data_type(target->data_type),
|
|
.io_semantics.location = location,
|
|
.io_semantics.num_slots = 1);
|
|
|
|
if (nif_s0)
|
|
nir_pop_if(b, nif_s0);
|
|
}
|
|
|
|
struct pan_fb_shader_info {
|
|
bool discard_in_bounds;
|
|
bool discard_border;
|
|
bool sample0_only;
|
|
};
|
|
|
|
static void
|
|
gather_target_info(struct pan_fb_shader_info *info,
|
|
const struct pan_fb_shader_key_target *target)
|
|
{
|
|
/* Ignore any targets we don't write */
|
|
if (!pan_fb_shader_key_target_written(target))
|
|
return;
|
|
|
|
if (!pan_fb_shader_op_can_discard(target->in_bounds_op))
|
|
info->discard_in_bounds = false;
|
|
|
|
if (!pan_fb_shader_op_can_discard(target->border_op))
|
|
info->discard_border = false;
|
|
|
|
if (!target->sample0_only)
|
|
info->sample0_only = false;
|
|
}
|
|
|
|
static struct pan_fb_shader_info
|
|
gather_shader_info(const struct pan_fb_shader_key *key)
|
|
{
|
|
struct pan_fb_shader_info info = {
|
|
.discard_in_bounds = true,
|
|
.discard_border = true,
|
|
.sample0_only = true,
|
|
};
|
|
|
|
for (unsigned rt = 0; rt < PAN_MAX_RTS; rt++)
|
|
gather_target_info(&info, &key->rts[rt]);
|
|
|
|
gather_target_info(&info, &key->z);
|
|
gather_target_info(&info, &key->s);
|
|
|
|
return info;
|
|
}
|
|
|
|
static bool
|
|
opt_sample0_only_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|
{
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_sample_id:
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
nir_def_replace(&intr->def, nir_imm_int(b, 0));
|
|
return true;
|
|
|
|
case nir_intrinsic_load_cumulative_coverage_pan:
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
nir_def_replace(&intr->def, nir_imm_int(b, 1));
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
nir_shader *
|
|
GENX(pan_get_fb_shader)(const struct pan_fb_shader_key *key,
|
|
const struct nir_shader_compiler_options *nir_options)
|
|
{
|
|
nir_builder builder = nir_builder_init_simple_shader(
|
|
MESA_SHADER_FRAGMENT, nir_options, "pan-fb-load");
|
|
nir_builder *b = &builder;
|
|
|
|
const struct pan_fb_shader_info info = gather_shader_info(key);
|
|
|
|
/* We shouldn't even be building a shader at all in this case but, on the
|
|
* off chance that we do, build one that just discards.
|
|
*/
|
|
if (info.discard_in_bounds && info.discard_border) {
|
|
nir_terminate(b);
|
|
return builder.shader;
|
|
}
|
|
|
|
nir_def *pos_xy = nir_load_pixel_coord(b);
|
|
nir_def *pos_x = nir_u2u32(b, nir_channel(b, pos_xy, 0));
|
|
nir_def *pos_y = nir_u2u32(b, nir_channel(b, pos_xy, 1));
|
|
nir_def *pos = nir_vec3(b, pos_x, pos_y, nir_load_layer_id(b));
|
|
|
|
nir_def *ra = nir_load_fb_render_area_pan(b);
|
|
nir_def *ra_min_xy = nir_channels(b, ra, 0b0011);
|
|
nir_def *ra_max_xy = nir_channels(b, ra, 0b1100);
|
|
|
|
nir_def *in_bounds =
|
|
nir_ball(b, nir_iand(b, nir_uge(b, pos_xy, ra_min_xy),
|
|
nir_uge(b, ra_max_xy, pos_xy)));
|
|
|
|
if (PAN_ARCH >= 6 && !pan_fb_shader_key_target_written(&key->z) &&
|
|
!pan_fb_shader_key_target_written(&key->s)) {
|
|
/* We assume that the framebuffer will clear if either load op is set to
|
|
* clear. For color-only on bifrost, we can do a bit better by emitting
|
|
* BLEND instructions directly only writing the tile buffer if we're
|
|
* actually loading the image.
|
|
*/
|
|
for (unsigned rt = 0; rt < PAN_MAX_RTS; rt++) {
|
|
const struct pan_fb_shader_key_target *target = &key->rts[rt];
|
|
const gl_frag_result location = FRAG_RESULT_DATA0 + rt;
|
|
|
|
if (!pan_fb_shader_key_target_written(target))
|
|
continue;
|
|
|
|
nir_def *coverage = nir_load_cumulative_coverage_pan(b);
|
|
nir_def *z1 = nir_imm_int(b, 0);
|
|
nir_def *u4 = nir_undef(b, 4, 32);
|
|
|
|
nir_if *nif_s0 = NULL;
|
|
if (target->sample0_only)
|
|
nif_s0 = nir_push_if(b, nir_ieq_imm(b, build_sample_id(b), 0));
|
|
|
|
nir_def *color;
|
|
if (target->in_bounds_op == target->border_op &&
|
|
target->in_bounds_msaa == target->border_msaa) {
|
|
color = build_load(b, pos, target->in_bounds_op,
|
|
target->in_bounds_msaa,
|
|
location, target, key);
|
|
} else {
|
|
nir_def *ib_color, *bd_color, *ib_cov, *bd_cov;
|
|
nir_if *nif = nir_push_if(b, in_bounds);
|
|
{
|
|
if (pan_fb_shader_op_can_discard(target->in_bounds_op)) {
|
|
ib_color = u4;
|
|
ib_cov = z1;
|
|
} else {
|
|
ib_color = build_load(b, pos, target->in_bounds_op,
|
|
target->in_bounds_msaa,
|
|
location, target, key);
|
|
ib_cov = coverage;
|
|
}
|
|
}
|
|
nir_push_else(b, nif);
|
|
{
|
|
if (pan_fb_shader_op_can_discard(target->border_op)) {
|
|
bd_color = u4;
|
|
bd_cov = z1;
|
|
} else {
|
|
bd_color = build_load(b, pos, target->border_op,
|
|
target->border_msaa,
|
|
location, target, key);
|
|
bd_cov = coverage;
|
|
}
|
|
}
|
|
nir_pop_if(b, nif);
|
|
color = nir_if_phi(b, ib_color, bd_color);
|
|
coverage = nir_if_phi(b, ib_cov, bd_cov);
|
|
}
|
|
|
|
if (nif_s0) {
|
|
nir_pop_if(b, nif_s0);
|
|
color = nir_if_phi(b, color, u4);
|
|
coverage = nir_if_phi(b, coverage, z1);
|
|
}
|
|
|
|
const nir_alu_type nir_type =
|
|
nir_alu_type_for_data_type(target->data_type);
|
|
nir_def *blend = nir_load_blend_descriptor_pan(b, .base = rt);
|
|
|
|
nir_blend_pan(b, coverage, blend, color,
|
|
.src_type = nir_type,
|
|
.io_semantics.location = location,
|
|
.io_semantics.num_slots = 1);
|
|
}
|
|
} else {
|
|
if (info.discard_in_bounds) {
|
|
nir_terminate_if(b, in_bounds);
|
|
in_bounds = nir_imm_false(b);
|
|
} else if (info.discard_border) {
|
|
nir_terminate_if(b, nir_inot(b, in_bounds));
|
|
in_bounds = nir_imm_true(b);
|
|
}
|
|
|
|
if (info.sample0_only) {
|
|
/* The little optimization we do at the end will take care of most
|
|
* of what we need for sample0-only but we also need to write
|
|
* SAMPLE_MASK so pan_nir_lower_fs_outputs will do the right thing.
|
|
*/
|
|
nir_store_output(b, nir_imm_int(b, 1), nir_imm_int(b, 0),
|
|
.base = FRAG_RESULT_SAMPLE_MASK, .range = 1,
|
|
.write_mask = 1,
|
|
.src_type = nir_type_uint32,
|
|
.io_semantics.location = FRAG_RESULT_SAMPLE_MASK,
|
|
.io_semantics.num_slots = 1);
|
|
}
|
|
|
|
if (pan_fb_shader_key_target_written(&key->z)) {
|
|
assert(key->z.sample0_only == info.sample0_only);
|
|
build_op(b, pos, in_bounds, FRAG_RESULT_DEPTH, &key->z, key);
|
|
}
|
|
|
|
if (pan_fb_shader_key_target_written(&key->s)) {
|
|
assert(key->s.sample0_only == info.sample0_only);
|
|
build_op(b, pos, in_bounds, FRAG_RESULT_STENCIL, &key->s, key);
|
|
}
|
|
|
|
for (unsigned rt = 0; rt < PAN_MAX_RTS; rt++) {
|
|
if (pan_fb_shader_key_target_written(&key->rts[rt])) {
|
|
assert(key->rts[rt].sample0_only == info.sample0_only);
|
|
build_op(b, pos, in_bounds, FRAG_RESULT_DATA0 + rt,
|
|
&key->rts[rt], key);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (info.sample0_only) {
|
|
NIR_PASS(_, b->shader, nir_shader_intrinsics_pass,
|
|
opt_sample0_only_intr, nir_metadata_control_flow, NULL);
|
|
b->shader->info.fs.uses_sample_shading = false;
|
|
}
|
|
|
|
return builder.shader;
|
|
}
|