mesa/src/compiler/nir/nir_lower_blend.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1183 lines
40 KiB
C
Raw Normal View History

/*
* Copyright (C) 2025 Google LLC
* Copyright (C) 2019-2021 Collabora, Ltd.
* Copyright (C) 2019 Alyssa Rosenzweig
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file
*
* Implements the fragment pipeline (blending and writeout) in software, to be
* run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
* shader variant on typical GPUs. This pass is useful if hardware lacks
* fixed-function blending in part or in full.
*/
#include "nir_lower_blend.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_blend_equation_advanced_helper.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
#include "util/blend.h"
#include "nir_builder_opcodes.h"
struct ctx {
const nir_lower_blend_options *options;
nir_def *src1[8];
};
/* Given processed factors, combine them per a blend function */
static nir_def *
nir_blend_func(
nir_builder *b,
enum pipe_blend_func func,
nir_def *src, nir_def *dst)
{
switch (func) {
case PIPE_BLEND_ADD:
return nir_fadd(b, src, dst);
case PIPE_BLEND_SUBTRACT:
return nir_fsub(b, src, dst);
case PIPE_BLEND_REVERSE_SUBTRACT:
return nir_fsub(b, dst, src);
case PIPE_BLEND_MIN:
return nir_fmin(b, src, dst);
case PIPE_BLEND_MAX:
return nir_fmax(b, src, dst);
}
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("Invalid blend function");
}
/* Does this blend function multiply by a blend factor? */
static bool
nir_blend_factored(enum pipe_blend_func func)
{
switch (func) {
case PIPE_BLEND_ADD:
case PIPE_BLEND_SUBTRACT:
case PIPE_BLEND_REVERSE_SUBTRACT:
return true;
default:
return false;
}
}
/* Compute a src_alpha_saturate factor */
static nir_def *
nir_alpha_saturate(
nir_builder *b,
nir_def *src, nir_def *dst,
unsigned chan)
{
nir_def *Asrc = nir_channel(b, src, 3);
nir_def *Adst = nir_channel(b, dst, 3);
nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
nir_def *Adsti = nir_fsub(b, one, Adst);
return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
}
/* Returns a scalar single factor, unmultiplied */
static nir_def *
nir_blend_factor_value(
nir_builder *b,
nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
unsigned chan,
enum pipe_blendfactor factor_without_invert)
{
switch (factor_without_invert) {
case PIPE_BLENDFACTOR_ONE:
return nir_imm_floatN_t(b, 1.0, src->bit_size);
case PIPE_BLENDFACTOR_SRC_COLOR:
return nir_channel(b, src, chan);
case PIPE_BLENDFACTOR_SRC1_COLOR:
return nir_channel(b, src1, chan);
case PIPE_BLENDFACTOR_DST_COLOR:
return nir_channel(b, dst, chan);
case PIPE_BLENDFACTOR_SRC_ALPHA:
return nir_channel(b, src, 3);
case PIPE_BLENDFACTOR_SRC1_ALPHA:
return nir_channel(b, src1, 3);
case PIPE_BLENDFACTOR_DST_ALPHA:
return nir_channel(b, dst, 3);
case PIPE_BLENDFACTOR_CONST_COLOR:
return nir_channel(b, bconst, chan);
case PIPE_BLENDFACTOR_CONST_ALPHA:
return nir_channel(b, bconst, 3);
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
return nir_alpha_saturate(b, src, dst, chan);
default:
assert(util_blendfactor_is_inverted(factor_without_invert));
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("Unexpected inverted factor");
}
}
static nir_def *
nir_build_fsat_signed(nir_builder *b, nir_def *x)
{
return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
nir_imm_floatN_t(b, +1.0, x->bit_size));
}
static nir_def *
nir_fsat_to_format(nir_builder *b, nir_def *x, enum pipe_format format)
{
if (util_format_is_unorm(format))
return nir_fsat(b, x);
else if (util_format_is_snorm(format))
return nir_build_fsat_signed(b, x);
else
return x;
}
static bool
channel_uses_dest(nir_lower_blend_channel chan)
{
/* If blend factors are ignored, dest is used (min/max) */
if (!nir_blend_factored(chan.func))
return true;
/* If dest has a nonzero factor, it is used */
if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
return true;
/* Else, check the source factor */
switch (util_blendfactor_without_invert(chan.src_factor)) {
case PIPE_BLENDFACTOR_DST_COLOR:
case PIPE_BLENDFACTOR_DST_ALPHA:
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
return true;
default:
return false;
}
}
static nir_def *
nir_blend_factor(
nir_builder *b,
nir_def *raw_scalar,
nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
unsigned chan,
enum pipe_blendfactor factor,
enum pipe_format format)
{
nir_def *f =
nir_blend_factor_value(b, src, src1, dst, bconst, chan,
util_blendfactor_without_invert(factor));
if (util_blendfactor_is_inverted(factor))
f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
return nir_fmul(b, raw_scalar, f);
}
/* Given a colormask, "blend" with the destination */
nir_def *
nir_color_mask(nir_builder *b, nir_def *src, nir_def *dst, unsigned mask)
{
mask &= 0xf;
if (mask == 0)
return dst;
else if (mask == 0xf)
return src;
return nir_vec4(b,
nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
}
static nir_def *
nir_logicop_func(
nir_builder *b,
enum pipe_logicop func,
nir_def *src, nir_def *dst, nir_def *bitmask)
{
switch (func) {
case PIPE_LOGICOP_CLEAR:
return nir_imm_ivec4(b, 0, 0, 0, 0);
case PIPE_LOGICOP_NOR:
return nir_ixor(b, nir_ior(b, src, dst), bitmask);
case PIPE_LOGICOP_AND_INVERTED:
return nir_iand(b, nir_ixor(b, src, bitmask), dst);
case PIPE_LOGICOP_COPY_INVERTED:
return nir_ixor(b, src, bitmask);
case PIPE_LOGICOP_AND_REVERSE:
return nir_iand(b, src, nir_ixor(b, dst, bitmask));
case PIPE_LOGICOP_INVERT:
return nir_ixor(b, dst, bitmask);
case PIPE_LOGICOP_XOR:
return nir_ixor(b, src, dst);
case PIPE_LOGICOP_NAND:
return nir_ixor(b, nir_iand(b, src, dst), bitmask);
case PIPE_LOGICOP_AND:
return nir_iand(b, src, dst);
case PIPE_LOGICOP_EQUIV:
return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
case PIPE_LOGICOP_NOOP:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("optimized out");
case PIPE_LOGICOP_OR_INVERTED:
return nir_ior(b, nir_ixor(b, src, bitmask), dst);
case PIPE_LOGICOP_COPY:
return src;
case PIPE_LOGICOP_OR_REVERSE:
return nir_ior(b, src, nir_ixor(b, dst, bitmask));
case PIPE_LOGICOP_OR:
return nir_ior(b, src, dst);
case PIPE_LOGICOP_SET:
return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
}
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("Invalid logciop function");
}
nir_def *
nir_color_logicop(nir_builder *b, nir_def *src, nir_def *dst,
enum pipe_logicop func, enum pipe_format format)
{
unsigned bit_size = src->bit_size;
const struct util_format_description *format_desc =
util_format_description(format);
/* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
*
* Logical operation has no effect on a floating-point destination color
* buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
* FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
* corresponding to the destination buffer is SRGB (see section 9.2.3).
* However, if logical operation is enabled, blending is still disabled.
*/
if (util_format_is_float(format) || util_format_is_srgb(format))
return src;
else if (func == PIPE_LOGICOP_COPY)
return src;
else if (func == PIPE_LOGICOP_NOOP)
return dst;
nir_alu_type type =
util_format_is_pure_integer(format) ? nir_type_uint : nir_type_float;
if (bit_size != 32) {
src = nir_convert_to_bit_size(b, src, type, 32);
dst = nir_convert_to_bit_size(b, dst, type, 32);
}
assert(src->num_components <= 4);
assert(dst->num_components <= 4);
unsigned bits[4];
for (int i = 0; i < 4; ++i)
bits[i] = format_desc->channel[i].size;
if (util_format_is_unorm(format)) {
src = nir_format_float_to_unorm(b, src, bits);
dst = nir_format_float_to_unorm(b, dst, bits);
} else if (util_format_is_snorm(format)) {
src = nir_format_float_to_snorm(b, src, bits);
dst = nir_format_float_to_snorm(b, dst, bits);
} else {
assert(util_format_is_pure_integer(format));
}
nir_const_value mask[4];
for (int i = 0; i < 4; ++i)
mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);
nir_def *out = nir_logicop_func(b, func, src, dst,
nir_build_imm(b, 4, 32, mask));
if (util_format_is_unorm(format)) {
out = nir_format_unorm_to_float(b, out, bits);
} else if (util_format_is_snorm(format)) {
/* Sign extend before converting so the i2f in snorm_to_float works */
out = nir_format_sign_extend_ivec(b, out, bits);
out = nir_format_snorm_to_float(b, out, bits);
} else {
assert(util_format_is_pure_integer(format));
}
if (bit_size != 32)
out = nir_convert_to_bit_size(b, out, type, bit_size);
return out;
}
static bool
channel_exists(const struct util_format_description *desc, unsigned i)
{
return (i < desc->nr_channels) &&
desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
}
/*
* Test if the blending options for a given channel encode the "replace" blend
* mode: dest = source. In this case, blending may be specially optimized.
*/
static bool
nir_blend_replace_channel(const nir_lower_blend_channel *c)
{
return (c->func == PIPE_BLEND_ADD) &&
(c->src_factor == PIPE_BLENDFACTOR_ONE) &&
(c->dst_factor == PIPE_BLENDFACTOR_ZERO);
}
static bool
nir_blend_replace_rt(const nir_lower_blend_rt *rt)
{
return nir_blend_replace_channel(&rt->rgb) &&
nir_blend_replace_channel(&rt->alpha);
}
static nir_def *
minv3(nir_builder *b, nir_def *v)
{
return nir_fmin(b, nir_fmin(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
nir_channel(b, v, 2));
}
static nir_def *
maxv3(nir_builder *b, nir_def *v)
{
return nir_fmax(b, nir_fmax(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
nir_channel(b, v, 2));
}
static nir_def *
lumv3(nir_builder *b, nir_def *c)
{
return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11));
}
static nir_def *
satv3(nir_builder *b, nir_def *c)
{
return nir_fsub(b, maxv3(b, c), minv3(b, c));
}
/* Clip color to [0,1] while preserving luminosity */
static nir_def *
clip_color(nir_builder *b, nir_def *c)
{
nir_def *lum = lumv3(b, c);
nir_def *mincol = minv3(b, c);
nir_def *maxcol = maxv3(b, c);
/* If min < 0: c = lum + (c - lum) * lum / (lum - min) */
nir_def *t1 = nir_fdiv(b,
nir_fmul(b, nir_fsub(b, c, lum), lum),
nir_fsub(b, lum, mincol));
nir_def *c1 = nir_fadd(b, lum, t1);
/* If max > 1: c = lum + (c - lum) * (1 - lum) / (max - lum) */
nir_def *t2 = nir_fdiv(b,
nir_fmul(b, nir_fsub(b, c, lum), nir_fsub_imm(b, 1.0, lum)),
nir_fsub(b, maxcol, lum));
nir_def *c2 = nir_fadd(b, lum, t2);
nir_def *min_neg = nir_flt_imm(b, mincol, 0.0);
nir_def *max_gt1 = nir_fgt_imm(b, maxcol, 1.0);
return nir_bcsel(b, min_neg, c1,
nir_bcsel(b, max_gt1, c2, c));
}
/* Set luminosity of cbase to match clum */
static nir_def *
set_lum(nir_builder *b, nir_def *cbase, nir_def *clum)
{
nir_def *lbase = lumv3(b, cbase);
nir_def *llum = lumv3(b, clum);
nir_def *diff = nir_fsub(b, llum, lbase);
nir_def *c = nir_fadd(b, cbase, diff);
return clip_color(b, c);
}
/* Set saturation of cbase to match csat, then luminosity to match clum */
static nir_def *
set_lum_sat(nir_builder *b, nir_def *cbase, nir_def *csat, nir_def *clum)
{
nir_def *sbase = satv3(b, cbase);
nir_def *ssat = satv3(b, csat);
nir_def *minbase = minv3(b, cbase);
/* Scale saturation: (cbase - min) * ssat / sbase */
nir_def *scaled = nir_bcsel(b,
nir_fgt_imm(b, sbase, 0.0),
nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase, minbase), ssat), sbase),
imm3(b, 0.0));
return set_lum(b, scaled, clum);
}
static nir_def *
blend_hsl_hue(nir_builder *b, nir_def *src, nir_def *dst)
{
/* Hue from src, saturation and luminosity from dst */
return set_lum_sat(b, src, dst, dst);
}
static nir_def *
blend_hsl_saturation(nir_builder *b, nir_def *src, nir_def *dst)
{
/* Saturation from src, hue and luminosity from dst */
return set_lum_sat(b, dst, src, dst);
}
static nir_def *
blend_hsl_color(nir_builder *b, nir_def *src, nir_def *dst)
{
/* Hue and saturation from src, luminosity from dst */
return set_lum(b, src, dst);
}
static nir_def *
blend_hsl_luminosity(nir_builder *b, nir_def *src, nir_def *dst)
{
/* Luminosity from src, hue and saturation from dst */
return set_lum(b, dst, src);
}
static nir_def *
blend_invert(nir_builder *b, nir_def *src, nir_def *dst)
{
return nir_fsub_imm(b, 1.0, dst);
}
static nir_def *
blend_invert_rgb(nir_builder *b, nir_def *src, nir_def *dst)
{
return nir_fmul(b, src, nir_fsub_imm(b, 1.0, dst));
}
static nir_def *
blend_lineardodge(nir_builder *b, nir_def *src, nir_def *dst)
{
/* min(1, src + dst) */
return nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src, dst));
}
static nir_def *
blend_linearburn(nir_builder *b, nir_def *src, nir_def *dst)
{
/* max(0, src + dst - 1) */
return nir_fmax(b, nir_imm_float(b, 0.0),
nir_fadd(b, src, nir_fadd_imm(b, dst, -1.0)));
}
static nir_def *
blend_vividlight(nir_builder *b, nir_def *src, nir_def *dst)
{
/*
* if src <= 0: 0
* if src < 0.5: 1 - min(1, (1-dst) / (2*src))
* if src < 1: min(1, dst / (2*(1-src)))
* else: 1
*/
nir_def *two_src = nir_fmul_imm(b, src, 2.0);
nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst);
nir_def *one_minus_src = nir_fsub_imm(b, 1.0, src);
nir_def *case_lt_half = nir_fsub_imm(b, 1.0,
nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, one_minus_dst, two_src)));
nir_def *case_lt_one = nir_fmin(b, imm3(b, 1.0),
nir_fdiv(b, dst, nir_fmul_imm(b, one_minus_src, 2.0)));
return nir_bcsel(b, nir_fle_imm(b, src, 0.0), imm3(b, 0.0),
nir_bcsel(b, nir_flt_imm(b, src, 0.5), case_lt_half,
nir_bcsel(b, nir_flt_imm(b, src, 1.0), case_lt_one,
imm3(b, 1.0))));
}
static nir_def *
blend_linearlight(nir_builder *b, nir_def *src, nir_def *dst)
{
/*
* if 2*src + dst > 2: 1
* if 2*src + dst <= 1: 0
* else: 2*src + dst - 1
*/
nir_def *two_src = nir_fmul_imm(b, src, 2.0);
nir_def *sum = nir_fadd(b, two_src, dst);
nir_def *result = nir_fsub(b, sum, imm3(b, 1.0));
return nir_bcsel(b, nir_fgt_imm(b, sum, 2.0), imm3(b, 1.0),
nir_bcsel(b, nir_fge(b, imm3(b, 1.0), sum), imm3(b, 0.0),
result));
}
static nir_def *
blend_pinlight(nir_builder *b, nir_def *src, nir_def *dst)
{
/*
* if (2*src - 1 > dst) && src < 0.5: 0
* if (2*src - 1 > dst) && src >= 0.5: 2*src - 1
* if (2*src - 1 <= dst) && src < 0.5*dst: 2*src
* if (2*src - 1 <= dst) && src >= 0.5*dst: dst
*/
nir_def *two_src = nir_fmul_imm(b, src, 2.0);
nir_def *two_src_minus_1 = nir_fsub(b, two_src, imm3(b, 1.0));
nir_def *half_dst = nir_fmul_imm(b, dst, 0.5);
nir_def *cond1 = nir_flt(b, dst, two_src_minus_1);
nir_def *cond2 = nir_flt_imm(b, src, 0.5);
nir_def *cond3 = nir_flt(b, src, half_dst);
return nir_bcsel(b, cond1,
nir_bcsel(b, cond2, imm3(b, 0.0), two_src_minus_1),
nir_bcsel(b, cond3, two_src, dst));
}
static nir_def *
blend_hardmix(nir_builder *b, nir_def *src, nir_def *dst)
{
/* if src + dst >= 1: 1, else 0.
* Use small epsilon to handle 8-bit quantization.
*/
nir_def *sum = nir_fadd(b, src, dst);
nir_def *threshold = nir_imm_float(b, 1.0 - 0.5 / 255.0); /* ~0.998039 */
return nir_bcsel(b, nir_fge(b, sum, threshold),
imm3(b, 1.0), imm3(b, 0.0));
}
/*
* Calculate the blend factor f(Cs', Cd').
* Returns NULL for blend modes where X=0, meaning f() is not used.
*/
static nir_def *
calc_blend_factor(nir_builder *b, enum pipe_advanced_blend_mode blend_op, nir_def *src, nir_def *dst)
{
switch (blend_op) {
/* f() result unused (X=0) */
case PIPE_ADVANCED_BLEND_NONE:
case PIPE_ADVANCED_BLEND_SRC_OUT:
case PIPE_ADVANCED_BLEND_DST_OUT:
case PIPE_ADVANCED_BLEND_XOR:
return NULL;
/* Standard blend modes */
case PIPE_ADVANCED_BLEND_MULTIPLY:
return blend_multiply(b, src, dst);
case PIPE_ADVANCED_BLEND_SCREEN:
return blend_screen(b, src, dst);
case PIPE_ADVANCED_BLEND_OVERLAY:
return blend_overlay(b, src, dst);
case PIPE_ADVANCED_BLEND_DARKEN:
return blend_darken(b, src, dst);
case PIPE_ADVANCED_BLEND_LIGHTEN:
return blend_lighten(b, src, dst);
case PIPE_ADVANCED_BLEND_COLORDODGE:
return blend_colordodge(b, src, dst);
case PIPE_ADVANCED_BLEND_COLORBURN:
return blend_colorburn(b, src, dst);
case PIPE_ADVANCED_BLEND_HARDLIGHT:
return blend_hardlight(b, src, dst);
case PIPE_ADVANCED_BLEND_SOFTLIGHT:
return blend_softlight(b, src, dst);
case PIPE_ADVANCED_BLEND_DIFFERENCE:
return blend_difference(b, src, dst);
case PIPE_ADVANCED_BLEND_EXCLUSION:
return blend_exclusion(b, src, dst);
/* HSL blend modes */
case PIPE_ADVANCED_BLEND_HSL_HUE:
return blend_hsl_hue(b, src, dst);
case PIPE_ADVANCED_BLEND_HSL_SATURATION:
return blend_hsl_saturation(b, src, dst);
case PIPE_ADVANCED_BLEND_HSL_COLOR:
return blend_hsl_color(b, src, dst);
case PIPE_ADVANCED_BLEND_HSL_LUMINOSITY:
return blend_hsl_luminosity(b, src, dst);
/* Porter-Duff modes where f(Cs,Cd) = Cs or Cd */
case PIPE_ADVANCED_BLEND_SRC:
case PIPE_ADVANCED_BLEND_SRC_OVER:
case PIPE_ADVANCED_BLEND_SRC_IN:
case PIPE_ADVANCED_BLEND_SRC_ATOP:
return src;
case PIPE_ADVANCED_BLEND_DST:
case PIPE_ADVANCED_BLEND_DST_OVER:
case PIPE_ADVANCED_BLEND_DST_IN:
case PIPE_ADVANCED_BLEND_DST_ATOP:
return dst;
/* Extended blend modes */
case PIPE_ADVANCED_BLEND_INVERT:
return blend_invert(b, src, dst);
case PIPE_ADVANCED_BLEND_INVERT_RGB:
return blend_invert_rgb(b, src, dst);
case PIPE_ADVANCED_BLEND_LINEARDODGE:
return blend_lineardodge(b, src, dst);
case PIPE_ADVANCED_BLEND_LINEARBURN:
return blend_linearburn(b, src, dst);
case PIPE_ADVANCED_BLEND_VIVIDLIGHT:
return blend_vividlight(b, src, dst);
case PIPE_ADVANCED_BLEND_LINEARLIGHT:
return blend_linearlight(b, src, dst);
case PIPE_ADVANCED_BLEND_PINLIGHT:
return blend_pinlight(b, src, dst);
case PIPE_ADVANCED_BLEND_HARDMIX:
return blend_hardmix(b, src, dst);
default:
UNREACHABLE("Invalid advanced blend op");
}
}
static nir_def *
calc_additional_rgb_blend(nir_builder *b, const nir_lower_blend_options *options,
unsigned rt,
nir_def *src, nir_def *dst)
{
nir_def *src_rgb = nir_trim_vector(b, src, 3);
nir_def *dst_rgb = nir_trim_vector(b, dst, 3);
nir_def *src_a = nir_channel(b, src, 3);
nir_def *dst_a = nir_channel(b, dst, 3);
/* Premultiply if non-premultiplied */
if (!options->rt[rt].src_premultiplied)
src_rgb = nir_fmul(b, src_rgb, src_a);
if (!options->rt[rt].dst_premultiplied)
dst_rgb = nir_fmul(b, dst_rgb, dst_a);
nir_def *rgb, *a;
switch (options->rt[rt].blend_mode) {
case PIPE_ADVANCED_BLEND_PLUS:
rgb = nir_fadd(b, src_rgb, dst_rgb);
a = nir_fadd(b, src_a, dst_a);
break;
case PIPE_ADVANCED_BLEND_PLUS_CLAMPED:
rgb = nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src_rgb, dst_rgb));
a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
break;
case PIPE_ADVANCED_BLEND_PLUS_CLAMPED_ALPHA: {
nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
rgb = nir_fmin(b, max_a, nir_fadd(b, src_rgb, dst_rgb));
a = max_a;
break;
}
case PIPE_ADVANCED_BLEND_PLUS_DARKER: {
nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
/* max(0, max_a - ((src_a - src_rgb) + (dst_a - dst_rgb))) */
nir_def *src_diff = nir_fsub(b, src_a, src_rgb);
nir_def *dst_diff = nir_fsub(b, dst_a, dst_rgb);
rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, max_a, nir_fadd(b, src_diff, dst_diff)));
a = max_a;
break;
}
case PIPE_ADVANCED_BLEND_MINUS:
rgb = nir_fsub(b, dst_rgb, src_rgb);
a = nir_fsub(b, dst_a, src_a);
break;
case PIPE_ADVANCED_BLEND_MINUS_CLAMPED:
rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, dst_rgb, src_rgb));
a = nir_fmax(b, nir_imm_float(b, 0.0), nir_fsub(b, dst_a, src_a));
break;
case PIPE_ADVANCED_BLEND_CONTRAST: {
/* res.rgb = (dst_a / 2) + 2 * (dst_rgb - dst_a/2) * (src_rgb - src_a/2) */
nir_def *half_dst_a = nir_fmul_imm(b, dst_a, 0.5);
nir_def *half_src_a = nir_fmul_imm(b, src_a, 0.5);
nir_def *dst_centered = nir_fsub(b, dst_rgb, half_dst_a);
nir_def *src_centered = nir_fsub(b, src_rgb, half_src_a);
rgb = nir_fadd(b, half_dst_a,
nir_fmul_imm(b, nir_fmul(b, dst_centered, src_centered), 2.0));
a = dst_a;
break;
}
case PIPE_ADVANCED_BLEND_INVERT_OVG: {
/* res.rgb = src_a * (1 - dst_rgb) + (1 - src_a) * dst_rgb */
nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst_rgb);
nir_def *one_minus_src_a = nir_fsub_imm(b, 1.0, src_a);
rgb = nir_fadd(b, nir_fmul(b, src_a, one_minus_dst),
nir_fmul(b, one_minus_src_a, dst_rgb));
a = nir_fsub(b, nir_fadd(b, src_a, dst_a), nir_fmul(b, src_a, dst_a));
break;
}
case PIPE_ADVANCED_BLEND_RED:
rgb = nir_vec3(b, nir_channel(b, src_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, dst_rgb, 2));
a = dst_a;
break;
case PIPE_ADVANCED_BLEND_GREEN:
rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, src_rgb, 1), nir_channel(b, dst_rgb, 2));
a = dst_a;
break;
case PIPE_ADVANCED_BLEND_BLUE:
rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, src_rgb, 2));
a = dst_a;
break;
default:
UNREACHABLE("Invalid additional RGB blend op");
}
/* If dst is non-premultiplied, the output should also be non-premultiplied */
if (!options->rt[rt].dst_premultiplied) {
rgb = nir_bcsel(b,
nir_fgt_imm(b, a, 0.0),
nir_fdiv(b, rgb, a),
imm3(b, 0.0));
}
return nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1),
nir_channel(b, rgb, 2), a);
}
/*
* X, Y, Z blend factors for the advanced blend equation:
* RGB = f(Cs',Cd') * X * p0 + Cs' * Y * p1 + Cd' * Z * p2
* A = X * p0 + Y * p1 + Z * p2
*
* Index by enum pipe_advanced_blend_mode.
* Modes >= PIPE_ADVANCED_BLEND_PLUS use separate calc_additional_rgb_blend().
*/
static const float blend_xyz[][3] = {
[PIPE_ADVANCED_BLEND_NONE] = { 0, 0, 0 },
[PIPE_ADVANCED_BLEND_MULTIPLY] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_SCREEN] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_OVERLAY] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_DARKEN] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_LIGHTEN] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_COLORDODGE] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_COLORBURN] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_HARDLIGHT] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_SOFTLIGHT] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_DIFFERENCE] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_EXCLUSION] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_HSL_HUE] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_HSL_SATURATION] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_HSL_COLOR] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_HSL_LUMINOSITY] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_SRC] = { 1, 1, 0 },
[PIPE_ADVANCED_BLEND_DST] = { 1, 0, 1 },
[PIPE_ADVANCED_BLEND_SRC_OVER] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_DST_OVER] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_SRC_IN] = { 1, 0, 0 },
[PIPE_ADVANCED_BLEND_DST_IN] = { 1, 0, 0 },
[PIPE_ADVANCED_BLEND_SRC_OUT] = { 0, 1, 0 },
[PIPE_ADVANCED_BLEND_DST_OUT] = { 0, 0, 1 },
[PIPE_ADVANCED_BLEND_SRC_ATOP] = { 1, 0, 1 },
[PIPE_ADVANCED_BLEND_DST_ATOP] = { 1, 1, 0 },
[PIPE_ADVANCED_BLEND_XOR] = { 0, 1, 1 },
[PIPE_ADVANCED_BLEND_INVERT] = { 1, 0, 1 },
[PIPE_ADVANCED_BLEND_INVERT_RGB] = { 1, 0, 1 },
[PIPE_ADVANCED_BLEND_LINEARDODGE] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_LINEARBURN] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_VIVIDLIGHT] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_LINEARLIGHT] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_PINLIGHT] = { 1, 1, 1 },
[PIPE_ADVANCED_BLEND_HARDMIX] = { 1, 1, 1 },
};
static nir_def *
nir_blend_advanced(
nir_builder *b,
const nir_lower_blend_options *options,
unsigned rt,
nir_def *src, nir_def *dst)
{
/* Advanced blend uses hardcoded 32-bit constants. Convert inputs to f32
* and convert back at the end.
*/
const unsigned bit_size = src->bit_size;
src = nir_f2f32(b, src);
dst = nir_f2f32(b, dst);
/* Check if this is an additional RGB blend op */
if (options->rt[rt].blend_mode >= PIPE_ADVANCED_BLEND_PLUS &&
options->rt[rt].blend_mode <= PIPE_ADVANCED_BLEND_BLUE) {
nir_def *result = calc_additional_rgb_blend(b, options, rt, src, dst);
return nir_f2fN(b, result, bit_size);
}
nir_def *src_rgb = nir_trim_vector(b, src, 3);
nir_def *dst_rgb = nir_trim_vector(b, dst, 3);
nir_def *src_a = nir_channel(b, src, 3);
nir_def *dst_a = nir_channel(b, dst, 3);
/* Unpremultiply */
nir_def *src_rgb_unpre;
if (options->rt[rt].src_premultiplied) {
src_rgb_unpre = nir_bcsel(b,
nir_feq_imm(b, src_a, 0.0),
imm3(b, 0.0),
nir_fdiv(b, src_rgb, src_a));
} else {
src_rgb_unpre = src_rgb;
}
nir_def *dst_rgb_unpre;
if (options->rt[rt].dst_premultiplied) {
dst_rgb_unpre = nir_bcsel(b,
nir_feq_imm(b, dst_a, 0.0),
imm3(b, 0.0),
nir_fdiv(b, dst_rgb, dst_a));
} else {
dst_rgb_unpre = dst_rgb;
}
/* f(Cs', Cd') - may be NULL if X=0 (result unused) */
nir_def *factor = calc_blend_factor(b, options->rt[rt].blend_mode, src_rgb_unpre, dst_rgb_unpre);
nir_def *p0, *p1, *p2;
switch (options->rt[rt].overlap) {
case PIPE_BLEND_OVERLAP_UNCORRELATED:
/* p0 = As * Ad, p1 = As * (1 - Ad), p2 = Ad * (1 - As) */
p0 = nir_fmul(b, src_a, dst_a);
p1 = nir_fmul(b, src_a, nir_fsub_imm(b, 1.0, dst_a));
p2 = nir_fmul(b, dst_a, nir_fsub_imm(b, 1.0, src_a));
break;
case PIPE_BLEND_OVERLAP_CONJOINT:
/* p0 = min(As, Ad), p1 = max(As - Ad, 0), p2 = max(Ad - As, 0) */
p0 = nir_fmin(b, src_a, dst_a);
p1 = nir_fmax(b, nir_fsub(b, src_a, dst_a), nir_imm_float(b, 0.0));
p2 = nir_fmax(b, nir_fsub(b, dst_a, src_a), nir_imm_float(b, 0.0));
break;
case PIPE_BLEND_OVERLAP_DISJOINT:
/* p0 = max(As + Ad - 1, 0), p1 = min(As, 1 - Ad), p2 = min(Ad, 1 - As) */
p0 = nir_fmax(b, nir_fadd_imm(b, nir_fadd(b, src_a, dst_a), -1.0), nir_imm_float(b, 0.0));
p1 = nir_fmin(b, src_a, nir_fsub_imm(b, 1.0, dst_a));
p2 = nir_fmin(b, dst_a, nir_fsub_imm(b, 1.0, src_a));
break;
default:
UNREACHABLE("invalid overlap");
}
const float x = blend_xyz[options->rt[rt].blend_mode][0];
const float y = blend_xyz[options->rt[rt].blend_mode][1];
const float z = blend_xyz[options->rt[rt].blend_mode][2];
/* RGB = f * X * p0 + Cs' * Y * p1 + Cd' * Z * p2 */
nir_def *rgb = imm3(b, 0.0);
if (factor)
rgb = nir_fmul(b, factor, nir_fmul_imm(b, p0, x));
if (y != 0.0)
rgb = nir_fadd(b, rgb, nir_fmul(b, src_rgb_unpre, nir_fmul_imm(b, p1, y)));
if (z != 0.0)
rgb = nir_fadd(b, rgb, nir_fmul(b, dst_rgb_unpre, nir_fmul_imm(b, p2, z)));
/* A = X * p0 + Y * p1 + Z * p2 */
nir_def *a = nir_imm_float(b, 0.0);
if (x != 0.0)
a = nir_fmul_imm(b, p0, x);
if (y != 0.0)
a = nir_fadd(b, a, nir_fmul_imm(b, p1, y));
if (z != 0.0)
a = nir_fadd(b, a, nir_fmul_imm(b, p2, z));
/* If dst is non-premultiplied, the output should also be non-premultiplied */
if (!options->rt[rt].dst_premultiplied) {
rgb = nir_bcsel(b,
nir_fgt_imm(b, a, 0.0),
nir_fdiv(b, rgb, a),
imm3(b, 0.0));
}
nir_def *result = nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1),
nir_channel(b, rgb, 2), a);
return nir_f2fN(b, result, bit_size);
}
/* Given a blend state, the source color, and the destination color,
* return the blended color
*/
nir_def *
nir_color_blend(nir_builder *b, nir_def *src, nir_def *src1, nir_def *dst,
const nir_lower_blend_rt *rt, bool scalar_blend_const)
{
if (util_format_is_pure_integer(rt->format) || nir_blend_replace_rt(rt))
return src;
/* Don't crash if src1 isn't written. It doesn't matter what dual colour we
* blend with in that case, as long as we don't dereference NULL.
*/
if (!src1)
src1 = nir_imm_zero(b, 4, src->bit_size);
/* Grab the blend constant ahead of time */
nir_def *bconst;
if (scalar_blend_const) {
bconst = nir_vec4(b,
nir_load_blend_const_color_r_float(b),
nir_load_blend_const_color_g_float(b),
nir_load_blend_const_color_b_float(b),
nir_load_blend_const_color_a_float(b));
} else {
bconst = nir_load_blend_const_color_rgba(b);
}
if (src->bit_size == 16) {
bconst = nir_f2f16(b, bconst);
src1 = nir_f2f16(b, src1);
}
nir/lower_blend: fix snorm factor clamping The spec says (emphasis mine): If the color attachment is fixed-point, the components of the source and destination values **AND BLEND FACTORS** are each clamped to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment prior to evaluating the blend operations. If the color attachment is floating-point, no clamping occurs. However, neither the CTS nor any hardware implement this semantic. For unsigned normalized formats, the definitions are roughly equivalent (except perhaps around constant colours). 0 <= x <= 1 implies that 0 <= 1 - x <= 1. Therefore if the source/destination colours are clamped to [0, 1], then their complements are also in [0, 1], so clamping any blend factor (except constant colour) has no effect if the source/dest were already clamped. For signed normalized formats, however, this difference matters. -1 <= x <= 1 implies that 0 <= 1 - x <= 2... so to implement the spec text faithfully, we would need to clamp again the complemented colour blend factors to return back to signed normalized range. Software blending implementations can of course do that... but doing so causes CTS fails, as the CTS reference renderer does not do this. This commit adjusts nir_lower_blend to match what actual hardware does, what CTS requires, and what the spec should have said. See https://gitlab.khronos.org/vulkan/vulkan/-/issues/4293 for the spec resolution. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35519>
2025-06-13 14:30:27 -04:00
/* The input colours need to be clamped to the format. Contrary to the
* OpenGL/Vulkan specs, it really is the inputs that get clamped and not the
* intermediate blend factors. This matches the CTS and hardware behaviour.
*/
src = nir_fsat_to_format(b, src, rt->format);
bconst = nir_fsat_to_format(b, bconst, rt->format);
if (src1)
src1 = nir_fsat_to_format(b, src1, rt->format);
/* DST_ALPHA reads back 1.0 if there is no alpha channel */
const struct util_format_description *desc =
util_format_description(rt->format);
nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
dst = nir_vec4(b,
channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);
/* We blend per channel and recombine later */
nir_def *channels[4];
for (unsigned c = 0; c < 4; ++c) {
/* Decide properties based on channel */
nir_lower_blend_channel chan = (c < 3) ? rt->rgb : rt->alpha;
nir_def *psrc = nir_channel(b, src, c);
nir_def *pdst = nir_channel(b, dst, c);
if (nir_blend_factored(chan.func)) {
psrc = nir_blend_factor(
b, psrc,
src, src1, dst, bconst, c,
chan.src_factor, rt->format);
pdst = nir_blend_factor(
b, pdst,
src, src1, dst, bconst, c,
chan.dst_factor, rt->format);
}
channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
}
return nir_vec(b, channels, 4);
}
static int
color_index_for_location(unsigned location)
{
assert(location != FRAG_RESULT_COLOR &&
"gl_FragColor must be lowered before nir_lower_blend");
if (location < FRAG_RESULT_DATA0)
return -1;
else
return location - FRAG_RESULT_DATA0;
}
static bool
nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data)
{
struct ctx *ctx = data;
const nir_lower_blend_options *options = ctx->options;
if (store->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
int rt = color_index_for_location(sem.location);
/* No blend lowering requested on this RT */
if (rt < 0 || options->rt[rt].format == PIPE_FORMAT_NONE)
return false;
/* Only process stores once. Pass flags are cleared by consume_dual_stores */
if (store->instr.pass_flags)
return false;
store->instr.pass_flags = 1;
/* Store are sunk to the bottom of the block to ensure that the dual
* source colour is already written.
*/
b->cursor = nir_after_block(store->instr.block);
const enum pipe_format format = options->rt[rt].format;
enum pipe_logicop logicop_func = options->logicop_func;
/* From the Vulkan spec ("Logical operations"):
*
* Logical operations are not applied to floating-point or sRGB format
* color attachments...
*
* If logicOpEnable is VK_TRUE... blending of all attachments is treated
* as if it were disabled. Any attachments using color formats for which
* logical operations are not supported simply pass through the color
* values unmodified.
*
* The semantic for unsupported formats is equivalent to a logicop of COPY.
* It is /not/ equivalent to disabled logicops (which would incorrectly apply
* blending). To implement this spec text with minimal special casing, we
* override the logicop func to COPY for unsupported formats.
*/
if (util_format_is_float(format) || util_format_is_srgb(format)) {
logicop_func = PIPE_LOGICOP_COPY;
}
/* Don't bother copying the destination to the source for disabled RTs */
if (options->rt[rt].colormask == 0 ||
(options->logicop_enable && logicop_func == PIPE_LOGICOP_NOOP)) {
nir_instr_remove(&store->instr);
return true;
}
/* Grab the input color. We always want 4 channels during blend. Dead
* code will clean up any channels we don't need.
*/
nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);
assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");
/* Grab the previous fragment color if we need it */
nir_def *dst;
if (options->rt[rt].advanced_blend ||
channel_uses_dest(options->rt[rt].rgb) ||
channel_uses_dest(options->rt[rt].alpha) ||
options->logicop_enable ||
options->rt[rt].colormask != BITFIELD_MASK(4)) {
b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
b->shader->info.fs.uses_fbfetch_output = true;
b->shader->info.fs.uses_sample_shading = true;
sem.fb_fetch_output = true;
dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
nir_imm_int(b, 0),
.dest_type = nir_intrinsic_src_type(store),
.io_semantics = sem);
} else {
dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
}
/* Blend the two colors per the passed options. Blending is prioritized as:
* 1. Logic operations (if logicop_enable is true) - mutually exclusive with blending
* 2. Advanced blending (if advanced_blend is set) - uses complex blend equations
* 3. Standard blending (if configured) - uses traditional blend factors
*
* We only call nir_blend if blending is enabled with a blend mode other than replace
* (independent of the color mask). That avoids unnecessary fsat instructions in the
* common case where blending is disabled at an API level, but the driver calls
* nir_blend (possibly for color masking).
*/
nir_def *blended = src;
if (options->logicop_enable) {
blended = nir_color_logicop(b, src, dst, options->logicop_func, format);
} else if (options->rt[rt].advanced_blend) {
blended = nir_blend_advanced(b, options, rt, src, dst);
} else if (!util_format_is_pure_integer(format) &&
!nir_blend_replace_rt(&options->rt[rt])) {
assert(!util_format_is_scaled(format));
blended = nir_color_blend(b, src, ctx->src1[rt], dst, &options->rt[rt],
options->scalar_blend_const);
}
/* Apply a colormask if necessary */
blended = nir_color_mask(b, blended, dst, options->rt[rt].colormask);
/* Shave off any components we don't want to store */
const unsigned num_components = util_format_get_nr_components(format);
blended = nir_trim_vector(b, blended, num_components);
/* Grow or shrink the store destination as needed */
store->num_components = num_components;
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
nir_component_mask(num_components));
/* Write out the final color instead of the input */
nir_src_rewrite(&store->src[0], blended);
/* Sink to bottom */
nir_instr_remove(&store->instr);
nir_builder_instr_insert(b, &store->instr);
return true;
}
/*
* Dual-source colours are only for blending, so when nir_lower_blend is used,
* the dual source store_output is for us (only). Remove dual stores so the
* backend doesn't have to deal with them, collecting the sources for blending.
*/
static bool
consume_dual_stores(nir_builder *b, nir_intrinsic_instr *store, void *data)
{
nir_def **outputs = data;
if (store->intrinsic != nir_intrinsic_store_output)
return false;
/* While we're here, clear the pass flags for store_outputs, since we'll set
* them later.
*/
store->instr.pass_flags = 0;
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
int rt = 0;
if (sem.dual_source_blend_index)
rt = color_index_for_location(sem.location);
else if (sem.location != FRAG_RESULT_DUAL_SRC_BLEND)
return false;
assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");
outputs[rt] = store->src[0].ssa;
nir_instr_remove(&store->instr);
return true;
}
/** Lower blending to framebuffer fetch and some math
*
* This pass requires that shader I/O is lowered to explicit load/store
* instructions using nir_lower_io.
*/
bool
nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
{
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
struct ctx ctx = { .options = options };
bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
nir_metadata_control_flow,
ctx.src1);
progress |= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
nir_metadata_control_flow,
&ctx);
return progress;
}