2021-05-04 14:11:12 -04:00
|
|
|
/*
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
* Copyright (C) 2025 Google LLC
|
2021-05-04 14:11:12 -04:00
|
|
|
* Copyright (C) 2019-2021 Collabora, Ltd.
|
|
|
|
|
* Copyright (C) 2019 Alyssa Rosenzweig
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @file
|
|
|
|
|
*
|
|
|
|
|
* Implements the fragment pipeline (blending and writeout) in software, to be
|
|
|
|
|
* run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
|
|
|
|
|
* shader variant on typical GPUs. This pass is useful if hardware lacks
|
|
|
|
|
* fixed-function blending in part or in full.
|
|
|
|
|
*/
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
#include "nir_lower_blend.h"
|
2021-05-04 14:11:12 -04:00
|
|
|
#include "compiler/nir/nir.h"
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
#include "compiler/nir/nir_blend_equation_advanced_helper.h"
|
2021-05-04 14:11:12 -04:00
|
|
|
#include "compiler/nir/nir_builder.h"
|
|
|
|
|
#include "compiler/nir/nir_format_convert.h"
|
2023-07-11 07:51:03 -04:00
|
|
|
#include "util/blend.h"
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
#include "nir_builder_opcodes.h"
|
2021-05-04 14:11:12 -04:00
|
|
|
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
struct ctx {
|
|
|
|
|
const nir_lower_blend_options *options;
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *src1[8];
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
};
|
|
|
|
|
|
2021-05-04 14:11:12 -04:00
|
|
|
/* Given processed factors, combine them per a blend function */
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2021-05-04 14:11:12 -04:00
|
|
|
nir_blend_func(
|
|
|
|
|
nir_builder *b,
|
2023-07-11 07:51:03 -04:00
|
|
|
enum pipe_blend_func func,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *src, nir_def *dst)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
|
|
|
|
switch (func) {
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLEND_ADD:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_fadd(b, src, dst);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLEND_SUBTRACT:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_fsub(b, src, dst);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLEND_REVERSE_SUBTRACT:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_fsub(b, dst, src);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLEND_MIN:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_fmin(b, src, dst);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLEND_MAX:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_fmax(b, src, dst);
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-23 09:17:35 +02:00
|
|
|
UNREACHABLE("Invalid blend function");
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Does this blend function multiply by a blend factor? */
|
|
|
|
|
|
|
|
|
|
static bool
|
2023-07-11 07:51:03 -04:00
|
|
|
nir_blend_factored(enum pipe_blend_func func)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
|
|
|
|
switch (func) {
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLEND_ADD:
|
|
|
|
|
case PIPE_BLEND_SUBTRACT:
|
|
|
|
|
case PIPE_BLEND_REVERSE_SUBTRACT:
|
2021-05-04 14:11:12 -04:00
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Compute a src_alpha_saturate factor */
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2021-05-04 14:11:12 -04:00
|
|
|
nir_alpha_saturate(
|
|
|
|
|
nir_builder *b,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *src, nir_def *dst,
|
2021-05-04 14:11:12 -04:00
|
|
|
unsigned chan)
|
|
|
|
|
{
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *Asrc = nir_channel(b, src, 3);
|
|
|
|
|
nir_def *Adst = nir_channel(b, dst, 3);
|
|
|
|
|
nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
|
|
|
|
|
nir_def *Adsti = nir_fsub(b, one, Adst);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns a scalar single factor, unmultiplied */
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2021-05-04 14:11:12 -04:00
|
|
|
nir_blend_factor_value(
|
|
|
|
|
nir_builder *b,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
|
2021-05-04 14:11:12 -04:00
|
|
|
unsigned chan,
|
2023-07-11 07:51:03 -04:00
|
|
|
enum pipe_blendfactor factor_without_invert)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
2023-07-11 07:51:03 -04:00
|
|
|
switch (factor_without_invert) {
|
|
|
|
|
case PIPE_BLENDFACTOR_ONE:
|
|
|
|
|
return nir_imm_floatN_t(b, 1.0, src->bit_size);
|
|
|
|
|
case PIPE_BLENDFACTOR_SRC_COLOR:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, src, chan);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_SRC1_COLOR:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, src1, chan);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_DST_COLOR:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, dst, chan);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, src, 3);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_SRC1_ALPHA:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, src1, 3);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_DST_ALPHA:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, dst, 3);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_CONST_COLOR:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, bconst, chan);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_CONST_ALPHA:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_channel(b, bconst, 3);
|
2023-07-11 07:51:03 -04:00
|
|
|
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_alpha_saturate(b, src, dst, chan);
|
2023-07-11 07:51:03 -04:00
|
|
|
default:
|
|
|
|
|
assert(util_blendfactor_is_inverted(factor_without_invert));
|
2025-07-23 09:17:35 +02:00
|
|
|
UNREACHABLE("Unexpected inverted factor");
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2024-10-02 09:03:01 +02:00
|
|
|
nir_build_fsat_signed(nir_builder *b, nir_def *x)
|
2022-11-25 21:40:16 -05:00
|
|
|
{
|
|
|
|
|
return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_imm_floatN_t(b, +1.0, x->bit_size));
|
2022-11-25 21:40:16 -05:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
|
|
|
|
nir_fsat_to_format(nir_builder *b, nir_def *x, enum pipe_format format)
|
2022-11-25 21:40:16 -05:00
|
|
|
{
|
|
|
|
|
if (util_format_is_unorm(format))
|
|
|
|
|
return nir_fsat(b, x);
|
|
|
|
|
else if (util_format_is_snorm(format))
|
2024-10-02 09:03:01 +02:00
|
|
|
return nir_build_fsat_signed(b, x);
|
2022-11-25 21:40:16 -05:00
|
|
|
else
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-09 10:14:29 -04:00
|
|
|
static bool
|
|
|
|
|
channel_uses_dest(nir_lower_blend_channel chan)
|
|
|
|
|
{
|
2023-07-11 07:51:03 -04:00
|
|
|
/* If blend factors are ignored, dest is used (min/max) */
|
|
|
|
|
if (!nir_blend_factored(chan.func))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/* If dest has a nonzero factor, it is used */
|
|
|
|
|
if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/* Else, check the source factor */
|
|
|
|
|
switch (util_blendfactor_without_invert(chan.src_factor)) {
|
|
|
|
|
case PIPE_BLENDFACTOR_DST_COLOR:
|
|
|
|
|
case PIPE_BLENDFACTOR_DST_ALPHA:
|
|
|
|
|
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2023-04-09 10:14:29 -04:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2021-05-04 14:11:12 -04:00
|
|
|
nir_blend_factor(
|
|
|
|
|
nir_builder *b,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *raw_scalar,
|
|
|
|
|
nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
|
2021-05-04 14:11:12 -04:00
|
|
|
unsigned chan,
|
2023-07-11 07:51:03 -04:00
|
|
|
enum pipe_blendfactor factor,
|
2022-11-25 21:40:16 -05:00
|
|
|
enum pipe_format format)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *f =
|
2023-07-11 07:51:03 -04:00
|
|
|
nir_blend_factor_value(b, src, src1, dst, bconst, chan,
|
|
|
|
|
util_blendfactor_without_invert(factor));
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2023-07-11 07:51:03 -04:00
|
|
|
if (util_blendfactor_is_inverted(factor))
|
2021-05-04 14:11:12 -04:00
|
|
|
f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
|
|
|
|
|
|
|
|
|
|
return nir_fmul(b, raw_scalar, f);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Given a colormask, "blend" with the destination */
|
|
|
|
|
|
2026-01-15 15:43:25 -05:00
|
|
|
nir_def *
|
|
|
|
|
nir_color_mask(nir_builder *b, nir_def *src, nir_def *dst, unsigned mask)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
2026-01-15 15:43:25 -05:00
|
|
|
mask &= 0xf;
|
|
|
|
|
if (mask == 0)
|
|
|
|
|
return dst;
|
|
|
|
|
else if (mask == 0xf)
|
|
|
|
|
return src;
|
|
|
|
|
|
2021-05-04 14:11:12 -04:00
|
|
|
return nir_vec4(b,
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
|
|
|
|
|
nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
|
|
|
|
|
nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
|
|
|
|
|
nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static nir_def *
|
2021-05-04 14:11:12 -04:00
|
|
|
nir_logicop_func(
|
|
|
|
|
nir_builder *b,
|
2023-07-11 07:51:03 -04:00
|
|
|
enum pipe_logicop func,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *src, nir_def *dst, nir_def *bitmask)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
|
|
|
|
switch (func) {
|
|
|
|
|
case PIPE_LOGICOP_CLEAR:
|
|
|
|
|
return nir_imm_ivec4(b, 0, 0, 0, 0);
|
|
|
|
|
case PIPE_LOGICOP_NOR:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ixor(b, nir_ior(b, src, dst), bitmask);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_AND_INVERTED:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_iand(b, nir_ixor(b, src, bitmask), dst);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_COPY_INVERTED:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ixor(b, src, bitmask);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_AND_REVERSE:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_iand(b, src, nir_ixor(b, dst, bitmask));
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_INVERT:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ixor(b, dst, bitmask);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_XOR:
|
|
|
|
|
return nir_ixor(b, src, dst);
|
|
|
|
|
case PIPE_LOGICOP_NAND:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ixor(b, nir_iand(b, src, dst), bitmask);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_AND:
|
|
|
|
|
return nir_iand(b, src, dst);
|
|
|
|
|
case PIPE_LOGICOP_EQUIV:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_NOOP:
|
2025-07-23 09:17:35 +02:00
|
|
|
UNREACHABLE("optimized out");
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_OR_INVERTED:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ior(b, nir_ixor(b, src, bitmask), dst);
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_COPY:
|
|
|
|
|
return src;
|
|
|
|
|
case PIPE_LOGICOP_OR_REVERSE:
|
2022-11-25 21:40:38 -05:00
|
|
|
return nir_ior(b, src, nir_ixor(b, dst, bitmask));
|
2021-05-04 14:11:12 -04:00
|
|
|
case PIPE_LOGICOP_OR:
|
|
|
|
|
return nir_ior(b, src, dst);
|
|
|
|
|
case PIPE_LOGICOP_SET:
|
|
|
|
|
return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-23 09:17:35 +02:00
|
|
|
UNREACHABLE("Invalid logciop function");
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|
|
|
|
|
|
2026-01-15 15:43:25 -05:00
|
|
|
nir_def *
|
|
|
|
|
nir_color_logicop(nir_builder *b, nir_def *src, nir_def *dst,
|
|
|
|
|
enum pipe_logicop func, enum pipe_format format)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
|
|
|
|
unsigned bit_size = src->bit_size;
|
|
|
|
|
const struct util_format_description *format_desc =
|
2022-05-03 10:55:24 -05:00
|
|
|
util_format_description(format);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-11-25 21:40:42 -05:00
|
|
|
/* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
|
|
|
|
|
*
|
|
|
|
|
* Logical operation has no effect on a floating-point destination color
|
|
|
|
|
* buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
|
|
|
|
|
* FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
|
|
|
|
|
* corresponding to the destination buffer is SRGB (see section 9.2.3).
|
|
|
|
|
* However, if logical operation is enabled, blending is still disabled.
|
|
|
|
|
*/
|
|
|
|
|
if (util_format_is_float(format) || util_format_is_srgb(format))
|
|
|
|
|
return src;
|
2026-01-15 16:40:15 -05:00
|
|
|
else if (func == PIPE_LOGICOP_COPY)
|
|
|
|
|
return src;
|
|
|
|
|
else if (func == PIPE_LOGICOP_NOOP)
|
|
|
|
|
return dst;
|
2022-11-25 21:40:42 -05:00
|
|
|
|
2024-04-20 19:10:10 +02:00
|
|
|
nir_alu_type type =
|
|
|
|
|
util_format_is_pure_integer(format) ? nir_type_uint : nir_type_float;
|
|
|
|
|
|
2021-05-04 14:11:12 -04:00
|
|
|
if (bit_size != 32) {
|
2024-04-20 19:10:10 +02:00
|
|
|
src = nir_convert_to_bit_size(b, src, type, 32);
|
|
|
|
|
dst = nir_convert_to_bit_size(b, dst, type, 32);
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(src->num_components <= 4);
|
|
|
|
|
assert(dst->num_components <= 4);
|
|
|
|
|
|
|
|
|
|
unsigned bits[4];
|
|
|
|
|
for (int i = 0; i < 4; ++i)
|
2023-08-08 12:00:35 -05:00
|
|
|
bits[i] = format_desc->channel[i].size;
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-05-03 10:55:24 -05:00
|
|
|
if (util_format_is_unorm(format)) {
|
|
|
|
|
src = nir_format_float_to_unorm(b, src, bits);
|
|
|
|
|
dst = nir_format_float_to_unorm(b, dst, bits);
|
|
|
|
|
} else if (util_format_is_snorm(format)) {
|
|
|
|
|
src = nir_format_float_to_snorm(b, src, bits);
|
|
|
|
|
dst = nir_format_float_to_snorm(b, dst, bits);
|
|
|
|
|
} else {
|
|
|
|
|
assert(util_format_is_pure_integer(format));
|
|
|
|
|
}
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-11-25 21:40:38 -05:00
|
|
|
nir_const_value mask[4];
|
|
|
|
|
for (int i = 0; i < 4; ++i)
|
2023-07-19 18:22:02 -04:00
|
|
|
mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2025-04-06 18:20:33 -04:00
|
|
|
nir_def *out = nir_logicop_func(b, func, src, dst,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_build_imm(b, 4, 32, mask));
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-05-03 10:55:24 -05:00
|
|
|
if (util_format_is_unorm(format)) {
|
|
|
|
|
out = nir_format_unorm_to_float(b, out, bits);
|
|
|
|
|
} else if (util_format_is_snorm(format)) {
|
2022-11-25 21:40:30 -05:00
|
|
|
/* Sign extend before converting so the i2f in snorm_to_float works */
|
|
|
|
|
out = nir_format_sign_extend_ivec(b, out, bits);
|
2022-05-03 10:55:24 -05:00
|
|
|
out = nir_format_snorm_to_float(b, out, bits);
|
|
|
|
|
} else {
|
|
|
|
|
assert(util_format_is_pure_integer(format));
|
|
|
|
|
}
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2024-04-20 19:10:10 +02:00
|
|
|
if (bit_size != 32)
|
|
|
|
|
out = nir_convert_to_bit_size(b, out, type, bit_size);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-12 13:50:07 -05:00
|
|
|
static bool
|
|
|
|
|
channel_exists(const struct util_format_description *desc, unsigned i)
|
|
|
|
|
{
|
|
|
|
|
return (i < desc->nr_channels) &&
|
|
|
|
|
desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-15 15:43:25 -05:00
|
|
|
/*
|
|
|
|
|
* Test if the blending options for a given channel encode the "replace" blend
|
|
|
|
|
* mode: dest = source. In this case, blending may be specially optimized.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
nir_blend_replace_channel(const nir_lower_blend_channel *c)
|
|
|
|
|
{
|
|
|
|
|
return (c->func == PIPE_BLEND_ADD) &&
|
|
|
|
|
(c->src_factor == PIPE_BLENDFACTOR_ONE) &&
|
|
|
|
|
(c->dst_factor == PIPE_BLENDFACTOR_ZERO);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nir_blend_replace_rt(const nir_lower_blend_rt *rt)
|
|
|
|
|
{
|
|
|
|
|
return nir_blend_replace_channel(&rt->rgb) &&
|
|
|
|
|
nir_blend_replace_channel(&rt->alpha);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
static nir_def *
|
|
|
|
|
minv3(nir_builder *b, nir_def *v)
|
|
|
|
|
{
|
|
|
|
|
return nir_fmin(b, nir_fmin(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
|
|
|
|
|
nir_channel(b, v, 2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
maxv3(nir_builder *b, nir_def *v)
|
|
|
|
|
{
|
|
|
|
|
return nir_fmax(b, nir_fmax(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
|
|
|
|
|
nir_channel(b, v, 2));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
lumv3(nir_builder *b, nir_def *c)
|
|
|
|
|
{
|
|
|
|
|
return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
satv3(nir_builder *b, nir_def *c)
|
|
|
|
|
{
|
|
|
|
|
return nir_fsub(b, maxv3(b, c), minv3(b, c));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Clip color to [0,1] while preserving luminosity */
|
|
|
|
|
static nir_def *
|
|
|
|
|
clip_color(nir_builder *b, nir_def *c)
|
|
|
|
|
{
|
|
|
|
|
nir_def *lum = lumv3(b, c);
|
|
|
|
|
nir_def *mincol = minv3(b, c);
|
|
|
|
|
nir_def *maxcol = maxv3(b, c);
|
|
|
|
|
|
|
|
|
|
/* If min < 0: c = lum + (c - lum) * lum / (lum - min) */
|
|
|
|
|
nir_def *t1 = nir_fdiv(b,
|
|
|
|
|
nir_fmul(b, nir_fsub(b, c, lum), lum),
|
|
|
|
|
nir_fsub(b, lum, mincol));
|
|
|
|
|
nir_def *c1 = nir_fadd(b, lum, t1);
|
|
|
|
|
|
|
|
|
|
/* If max > 1: c = lum + (c - lum) * (1 - lum) / (max - lum) */
|
|
|
|
|
nir_def *t2 = nir_fdiv(b,
|
|
|
|
|
nir_fmul(b, nir_fsub(b, c, lum), nir_fsub_imm(b, 1.0, lum)),
|
|
|
|
|
nir_fsub(b, maxcol, lum));
|
|
|
|
|
nir_def *c2 = nir_fadd(b, lum, t2);
|
|
|
|
|
|
|
|
|
|
nir_def *min_neg = nir_flt_imm(b, mincol, 0.0);
|
|
|
|
|
nir_def *max_gt1 = nir_fgt_imm(b, maxcol, 1.0);
|
|
|
|
|
|
|
|
|
|
return nir_bcsel(b, min_neg, c1,
|
|
|
|
|
nir_bcsel(b, max_gt1, c2, c));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Set luminosity of cbase to match clum */
|
|
|
|
|
static nir_def *
|
|
|
|
|
set_lum(nir_builder *b, nir_def *cbase, nir_def *clum)
|
|
|
|
|
{
|
|
|
|
|
nir_def *lbase = lumv3(b, cbase);
|
|
|
|
|
nir_def *llum = lumv3(b, clum);
|
|
|
|
|
nir_def *diff = nir_fsub(b, llum, lbase);
|
|
|
|
|
nir_def *c = nir_fadd(b, cbase, diff);
|
|
|
|
|
|
|
|
|
|
return clip_color(b, c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Set saturation of cbase to match csat, then luminosity to match clum */
|
|
|
|
|
static nir_def *
|
|
|
|
|
set_lum_sat(nir_builder *b, nir_def *cbase, nir_def *csat, nir_def *clum)
|
|
|
|
|
{
|
|
|
|
|
nir_def *sbase = satv3(b, cbase);
|
|
|
|
|
nir_def *ssat = satv3(b, csat);
|
|
|
|
|
nir_def *minbase = minv3(b, cbase);
|
|
|
|
|
|
|
|
|
|
/* Scale saturation: (cbase - min) * ssat / sbase */
|
|
|
|
|
nir_def *scaled = nir_bcsel(b,
|
|
|
|
|
nir_fgt_imm(b, sbase, 0.0),
|
|
|
|
|
nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase, minbase), ssat), sbase),
|
|
|
|
|
imm3(b, 0.0));
|
|
|
|
|
|
|
|
|
|
return set_lum(b, scaled, clum);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_hsl_hue(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* Hue from src, saturation and luminosity from dst */
|
|
|
|
|
return set_lum_sat(b, src, dst, dst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_hsl_saturation(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* Saturation from src, hue and luminosity from dst */
|
|
|
|
|
return set_lum_sat(b, dst, src, dst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_hsl_color(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* Hue and saturation from src, luminosity from dst */
|
|
|
|
|
return set_lum(b, src, dst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_hsl_luminosity(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* Luminosity from src, hue and saturation from dst */
|
|
|
|
|
return set_lum(b, dst, src);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_invert(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
return nir_fsub_imm(b, 1.0, dst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_invert_rgb(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
return nir_fmul(b, src, nir_fsub_imm(b, 1.0, dst));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_lineardodge(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* min(1, src + dst) */
|
|
|
|
|
return nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src, dst));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_linearburn(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* max(0, src + dst - 1) */
|
|
|
|
|
return nir_fmax(b, nir_imm_float(b, 0.0),
|
|
|
|
|
nir_fadd(b, src, nir_fadd_imm(b, dst, -1.0)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_vividlight(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* if src <= 0: 0
|
|
|
|
|
* if src < 0.5: 1 - min(1, (1-dst) / (2*src))
|
|
|
|
|
* if src < 1: min(1, dst / (2*(1-src)))
|
|
|
|
|
* else: 1
|
|
|
|
|
*/
|
|
|
|
|
nir_def *two_src = nir_fmul_imm(b, src, 2.0);
|
|
|
|
|
nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst);
|
|
|
|
|
nir_def *one_minus_src = nir_fsub_imm(b, 1.0, src);
|
|
|
|
|
|
|
|
|
|
nir_def *case_lt_half = nir_fsub_imm(b, 1.0,
|
|
|
|
|
nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, one_minus_dst, two_src)));
|
|
|
|
|
nir_def *case_lt_one = nir_fmin(b, imm3(b, 1.0),
|
|
|
|
|
nir_fdiv(b, dst, nir_fmul_imm(b, one_minus_src, 2.0)));
|
|
|
|
|
|
|
|
|
|
return nir_bcsel(b, nir_fle_imm(b, src, 0.0), imm3(b, 0.0),
|
|
|
|
|
nir_bcsel(b, nir_flt_imm(b, src, 0.5), case_lt_half,
|
|
|
|
|
nir_bcsel(b, nir_flt_imm(b, src, 1.0), case_lt_one,
|
|
|
|
|
imm3(b, 1.0))));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_linearlight(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* if 2*src + dst > 2: 1
|
|
|
|
|
* if 2*src + dst <= 1: 0
|
|
|
|
|
* else: 2*src + dst - 1
|
|
|
|
|
*/
|
|
|
|
|
nir_def *two_src = nir_fmul_imm(b, src, 2.0);
|
|
|
|
|
nir_def *sum = nir_fadd(b, two_src, dst);
|
|
|
|
|
nir_def *result = nir_fsub(b, sum, imm3(b, 1.0));
|
|
|
|
|
|
|
|
|
|
return nir_bcsel(b, nir_fgt_imm(b, sum, 2.0), imm3(b, 1.0),
|
|
|
|
|
nir_bcsel(b, nir_fge(b, imm3(b, 1.0), sum), imm3(b, 0.0),
|
|
|
|
|
result));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_pinlight(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* if (2*src - 1 > dst) && src < 0.5: 0
|
|
|
|
|
* if (2*src - 1 > dst) && src >= 0.5: 2*src - 1
|
|
|
|
|
* if (2*src - 1 <= dst) && src < 0.5*dst: 2*src
|
|
|
|
|
* if (2*src - 1 <= dst) && src >= 0.5*dst: dst
|
|
|
|
|
*/
|
|
|
|
|
nir_def *two_src = nir_fmul_imm(b, src, 2.0);
|
|
|
|
|
nir_def *two_src_minus_1 = nir_fsub(b, two_src, imm3(b, 1.0));
|
|
|
|
|
nir_def *half_dst = nir_fmul_imm(b, dst, 0.5);
|
|
|
|
|
|
|
|
|
|
nir_def *cond1 = nir_flt(b, dst, two_src_minus_1);
|
|
|
|
|
nir_def *cond2 = nir_flt_imm(b, src, 0.5);
|
|
|
|
|
nir_def *cond3 = nir_flt(b, src, half_dst);
|
|
|
|
|
|
|
|
|
|
return nir_bcsel(b, cond1,
|
|
|
|
|
nir_bcsel(b, cond2, imm3(b, 0.0), two_src_minus_1),
|
|
|
|
|
nir_bcsel(b, cond3, two_src, dst));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
blend_hardmix(nir_builder *b, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* if src + dst >= 1: 1, else 0.
|
|
|
|
|
* Use small epsilon to handle 8-bit quantization.
|
|
|
|
|
*/
|
|
|
|
|
nir_def *sum = nir_fadd(b, src, dst);
|
|
|
|
|
nir_def *threshold = nir_imm_float(b, 1.0 - 0.5 / 255.0); /* ~0.998039 */
|
|
|
|
|
return nir_bcsel(b, nir_fge(b, sum, threshold),
|
|
|
|
|
imm3(b, 1.0), imm3(b, 0.0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Calculate the blend factor f(Cs', Cd').
|
|
|
|
|
* Returns NULL for blend modes where X=0, meaning f() is not used.
|
|
|
|
|
*/
|
|
|
|
|
static nir_def *
|
|
|
|
|
calc_blend_factor(nir_builder *b, enum pipe_advanced_blend_mode blend_op, nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
switch (blend_op) {
|
|
|
|
|
/* f() result unused (X=0) */
|
|
|
|
|
case PIPE_ADVANCED_BLEND_NONE:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SRC_OUT:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DST_OUT:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_XOR:
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
/* Standard blend modes */
|
|
|
|
|
case PIPE_ADVANCED_BLEND_MULTIPLY:
|
|
|
|
|
return blend_multiply(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SCREEN:
|
|
|
|
|
return blend_screen(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_OVERLAY:
|
|
|
|
|
return blend_overlay(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DARKEN:
|
|
|
|
|
return blend_darken(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_LIGHTEN:
|
|
|
|
|
return blend_lighten(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_COLORDODGE:
|
|
|
|
|
return blend_colordodge(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_COLORBURN:
|
|
|
|
|
return blend_colorburn(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_HARDLIGHT:
|
|
|
|
|
return blend_hardlight(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SOFTLIGHT:
|
|
|
|
|
return blend_softlight(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DIFFERENCE:
|
|
|
|
|
return blend_difference(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_EXCLUSION:
|
|
|
|
|
return blend_exclusion(b, src, dst);
|
|
|
|
|
|
|
|
|
|
/* HSL blend modes */
|
|
|
|
|
case PIPE_ADVANCED_BLEND_HSL_HUE:
|
|
|
|
|
return blend_hsl_hue(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_HSL_SATURATION:
|
|
|
|
|
return blend_hsl_saturation(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_HSL_COLOR:
|
|
|
|
|
return blend_hsl_color(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_HSL_LUMINOSITY:
|
|
|
|
|
return blend_hsl_luminosity(b, src, dst);
|
|
|
|
|
|
|
|
|
|
/* Porter-Duff modes where f(Cs,Cd) = Cs or Cd */
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SRC:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SRC_OVER:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SRC_IN:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_SRC_ATOP:
|
|
|
|
|
return src;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DST:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DST_OVER:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DST_IN:
|
|
|
|
|
case PIPE_ADVANCED_BLEND_DST_ATOP:
|
|
|
|
|
return dst;
|
|
|
|
|
|
|
|
|
|
/* Extended blend modes */
|
|
|
|
|
case PIPE_ADVANCED_BLEND_INVERT:
|
|
|
|
|
return blend_invert(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_INVERT_RGB:
|
|
|
|
|
return blend_invert_rgb(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_LINEARDODGE:
|
|
|
|
|
return blend_lineardodge(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_LINEARBURN:
|
|
|
|
|
return blend_linearburn(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_VIVIDLIGHT:
|
|
|
|
|
return blend_vividlight(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_LINEARLIGHT:
|
|
|
|
|
return blend_linearlight(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_PINLIGHT:
|
|
|
|
|
return blend_pinlight(b, src, dst);
|
|
|
|
|
case PIPE_ADVANCED_BLEND_HARDMIX:
|
|
|
|
|
return blend_hardmix(b, src, dst);
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("Invalid advanced blend op");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
calc_additional_rgb_blend(nir_builder *b, const nir_lower_blend_options *options,
|
|
|
|
|
unsigned rt,
|
|
|
|
|
nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
nir_def *src_rgb = nir_trim_vector(b, src, 3);
|
|
|
|
|
nir_def *dst_rgb = nir_trim_vector(b, dst, 3);
|
|
|
|
|
nir_def *src_a = nir_channel(b, src, 3);
|
|
|
|
|
nir_def *dst_a = nir_channel(b, dst, 3);
|
|
|
|
|
|
|
|
|
|
/* Premultiply if non-premultiplied */
|
|
|
|
|
if (!options->rt[rt].src_premultiplied)
|
|
|
|
|
src_rgb = nir_fmul(b, src_rgb, src_a);
|
|
|
|
|
if (!options->rt[rt].dst_premultiplied)
|
|
|
|
|
dst_rgb = nir_fmul(b, dst_rgb, dst_a);
|
|
|
|
|
|
|
|
|
|
nir_def *rgb, *a;
|
|
|
|
|
|
|
|
|
|
switch (options->rt[rt].blend_mode) {
|
|
|
|
|
case PIPE_ADVANCED_BLEND_PLUS:
|
|
|
|
|
rgb = nir_fadd(b, src_rgb, dst_rgb);
|
|
|
|
|
a = nir_fadd(b, src_a, dst_a);
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_PLUS_CLAMPED:
|
|
|
|
|
rgb = nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src_rgb, dst_rgb));
|
|
|
|
|
a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_PLUS_CLAMPED_ALPHA: {
|
|
|
|
|
nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
|
|
|
|
|
rgb = nir_fmin(b, max_a, nir_fadd(b, src_rgb, dst_rgb));
|
|
|
|
|
a = max_a;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case PIPE_ADVANCED_BLEND_PLUS_DARKER: {
|
|
|
|
|
nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
|
|
|
|
|
/* max(0, max_a - ((src_a - src_rgb) + (dst_a - dst_rgb))) */
|
|
|
|
|
nir_def *src_diff = nir_fsub(b, src_a, src_rgb);
|
|
|
|
|
nir_def *dst_diff = nir_fsub(b, dst_a, dst_rgb);
|
|
|
|
|
rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, max_a, nir_fadd(b, src_diff, dst_diff)));
|
|
|
|
|
a = max_a;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case PIPE_ADVANCED_BLEND_MINUS:
|
|
|
|
|
rgb = nir_fsub(b, dst_rgb, src_rgb);
|
|
|
|
|
a = nir_fsub(b, dst_a, src_a);
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_MINUS_CLAMPED:
|
|
|
|
|
rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, dst_rgb, src_rgb));
|
|
|
|
|
a = nir_fmax(b, nir_imm_float(b, 0.0), nir_fsub(b, dst_a, src_a));
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_CONTRAST: {
|
|
|
|
|
/* res.rgb = (dst_a / 2) + 2 * (dst_rgb - dst_a/2) * (src_rgb - src_a/2) */
|
|
|
|
|
nir_def *half_dst_a = nir_fmul_imm(b, dst_a, 0.5);
|
|
|
|
|
nir_def *half_src_a = nir_fmul_imm(b, src_a, 0.5);
|
|
|
|
|
nir_def *dst_centered = nir_fsub(b, dst_rgb, half_dst_a);
|
|
|
|
|
nir_def *src_centered = nir_fsub(b, src_rgb, half_src_a);
|
|
|
|
|
rgb = nir_fadd(b, half_dst_a,
|
|
|
|
|
nir_fmul_imm(b, nir_fmul(b, dst_centered, src_centered), 2.0));
|
|
|
|
|
a = dst_a;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case PIPE_ADVANCED_BLEND_INVERT_OVG: {
|
|
|
|
|
/* res.rgb = src_a * (1 - dst_rgb) + (1 - src_a) * dst_rgb */
|
|
|
|
|
nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst_rgb);
|
|
|
|
|
nir_def *one_minus_src_a = nir_fsub_imm(b, 1.0, src_a);
|
|
|
|
|
rgb = nir_fadd(b, nir_fmul(b, src_a, one_minus_dst),
|
|
|
|
|
nir_fmul(b, one_minus_src_a, dst_rgb));
|
|
|
|
|
a = nir_fsub(b, nir_fadd(b, src_a, dst_a), nir_fmul(b, src_a, dst_a));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case PIPE_ADVANCED_BLEND_RED:
|
|
|
|
|
rgb = nir_vec3(b, nir_channel(b, src_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, dst_rgb, 2));
|
|
|
|
|
a = dst_a;
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_GREEN:
|
|
|
|
|
rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, src_rgb, 1), nir_channel(b, dst_rgb, 2));
|
|
|
|
|
a = dst_a;
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_ADVANCED_BLEND_BLUE:
|
|
|
|
|
rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, src_rgb, 2));
|
|
|
|
|
a = dst_a;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("Invalid additional RGB blend op");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If dst is non-premultiplied, the output should also be non-premultiplied */
|
|
|
|
|
if (!options->rt[rt].dst_premultiplied) {
|
|
|
|
|
rgb = nir_bcsel(b,
|
|
|
|
|
nir_fgt_imm(b, a, 0.0),
|
|
|
|
|
nir_fdiv(b, rgb, a),
|
|
|
|
|
imm3(b, 0.0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1),
|
|
|
|
|
nir_channel(b, rgb, 2), a);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* X, Y, Z blend factors for the advanced blend equation:
|
|
|
|
|
* RGB = f(Cs',Cd') * X * p0 + Cs' * Y * p1 + Cd' * Z * p2
|
|
|
|
|
* A = X * p0 + Y * p1 + Z * p2
|
|
|
|
|
*
|
|
|
|
|
* Index by enum pipe_advanced_blend_mode.
|
|
|
|
|
* Modes >= PIPE_ADVANCED_BLEND_PLUS use separate calc_additional_rgb_blend().
|
|
|
|
|
*/
|
|
|
|
|
static const float blend_xyz[][3] = {
|
|
|
|
|
[PIPE_ADVANCED_BLEND_NONE] = { 0, 0, 0 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_MULTIPLY] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SCREEN] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_OVERLAY] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DARKEN] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_LIGHTEN] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_COLORDODGE] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_COLORBURN] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_HARDLIGHT] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SOFTLIGHT] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DIFFERENCE] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_EXCLUSION] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_HSL_HUE] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_HSL_SATURATION] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_HSL_COLOR] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_HSL_LUMINOSITY] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SRC] = { 1, 1, 0 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DST] = { 1, 0, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SRC_OVER] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DST_OVER] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SRC_IN] = { 1, 0, 0 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DST_IN] = { 1, 0, 0 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SRC_OUT] = { 0, 1, 0 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DST_OUT] = { 0, 0, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_SRC_ATOP] = { 1, 0, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_DST_ATOP] = { 1, 1, 0 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_XOR] = { 0, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_INVERT] = { 1, 0, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_INVERT_RGB] = { 1, 0, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_LINEARDODGE] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_LINEARBURN] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_VIVIDLIGHT] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_LINEARLIGHT] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_PINLIGHT] = { 1, 1, 1 },
|
|
|
|
|
[PIPE_ADVANCED_BLEND_HARDMIX] = { 1, 1, 1 },
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
nir_blend_advanced(
|
|
|
|
|
nir_builder *b,
|
|
|
|
|
const nir_lower_blend_options *options,
|
|
|
|
|
unsigned rt,
|
|
|
|
|
nir_def *src, nir_def *dst)
|
|
|
|
|
{
|
|
|
|
|
/* Advanced blend uses hardcoded 32-bit constants. Convert inputs to f32
|
|
|
|
|
* and convert back at the end.
|
|
|
|
|
*/
|
|
|
|
|
const unsigned bit_size = src->bit_size;
|
|
|
|
|
src = nir_f2f32(b, src);
|
|
|
|
|
dst = nir_f2f32(b, dst);
|
|
|
|
|
|
|
|
|
|
/* Check if this is an additional RGB blend op */
|
|
|
|
|
if (options->rt[rt].blend_mode >= PIPE_ADVANCED_BLEND_PLUS &&
|
|
|
|
|
options->rt[rt].blend_mode <= PIPE_ADVANCED_BLEND_BLUE) {
|
|
|
|
|
nir_def *result = calc_additional_rgb_blend(b, options, rt, src, dst);
|
|
|
|
|
return nir_f2fN(b, result, bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_def *src_rgb = nir_trim_vector(b, src, 3);
|
|
|
|
|
nir_def *dst_rgb = nir_trim_vector(b, dst, 3);
|
|
|
|
|
nir_def *src_a = nir_channel(b, src, 3);
|
|
|
|
|
nir_def *dst_a = nir_channel(b, dst, 3);
|
|
|
|
|
|
|
|
|
|
/* Unpremultiply */
|
|
|
|
|
nir_def *src_rgb_unpre;
|
|
|
|
|
if (options->rt[rt].src_premultiplied) {
|
|
|
|
|
src_rgb_unpre = nir_bcsel(b,
|
|
|
|
|
nir_feq_imm(b, src_a, 0.0),
|
|
|
|
|
imm3(b, 0.0),
|
|
|
|
|
nir_fdiv(b, src_rgb, src_a));
|
|
|
|
|
} else {
|
|
|
|
|
src_rgb_unpre = src_rgb;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_def *dst_rgb_unpre;
|
|
|
|
|
if (options->rt[rt].dst_premultiplied) {
|
|
|
|
|
dst_rgb_unpre = nir_bcsel(b,
|
|
|
|
|
nir_feq_imm(b, dst_a, 0.0),
|
|
|
|
|
imm3(b, 0.0),
|
|
|
|
|
nir_fdiv(b, dst_rgb, dst_a));
|
|
|
|
|
} else {
|
|
|
|
|
dst_rgb_unpre = dst_rgb;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* f(Cs', Cd') - may be NULL if X=0 (result unused) */
|
|
|
|
|
nir_def *factor = calc_blend_factor(b, options->rt[rt].blend_mode, src_rgb_unpre, dst_rgb_unpre);
|
|
|
|
|
|
|
|
|
|
nir_def *p0, *p1, *p2;
|
|
|
|
|
|
|
|
|
|
switch (options->rt[rt].overlap) {
|
|
|
|
|
case PIPE_BLEND_OVERLAP_UNCORRELATED:
|
|
|
|
|
/* p0 = As * Ad, p1 = As * (1 - Ad), p2 = Ad * (1 - As) */
|
|
|
|
|
p0 = nir_fmul(b, src_a, dst_a);
|
|
|
|
|
p1 = nir_fmul(b, src_a, nir_fsub_imm(b, 1.0, dst_a));
|
|
|
|
|
p2 = nir_fmul(b, dst_a, nir_fsub_imm(b, 1.0, src_a));
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_BLEND_OVERLAP_CONJOINT:
|
|
|
|
|
/* p0 = min(As, Ad), p1 = max(As - Ad, 0), p2 = max(Ad - As, 0) */
|
|
|
|
|
p0 = nir_fmin(b, src_a, dst_a);
|
|
|
|
|
p1 = nir_fmax(b, nir_fsub(b, src_a, dst_a), nir_imm_float(b, 0.0));
|
|
|
|
|
p2 = nir_fmax(b, nir_fsub(b, dst_a, src_a), nir_imm_float(b, 0.0));
|
|
|
|
|
break;
|
|
|
|
|
case PIPE_BLEND_OVERLAP_DISJOINT:
|
|
|
|
|
/* p0 = max(As + Ad - 1, 0), p1 = min(As, 1 - Ad), p2 = min(Ad, 1 - As) */
|
|
|
|
|
p0 = nir_fmax(b, nir_fadd_imm(b, nir_fadd(b, src_a, dst_a), -1.0), nir_imm_float(b, 0.0));
|
|
|
|
|
p1 = nir_fmin(b, src_a, nir_fsub_imm(b, 1.0, dst_a));
|
|
|
|
|
p2 = nir_fmin(b, dst_a, nir_fsub_imm(b, 1.0, src_a));
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
UNREACHABLE("invalid overlap");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const float x = blend_xyz[options->rt[rt].blend_mode][0];
|
|
|
|
|
const float y = blend_xyz[options->rt[rt].blend_mode][1];
|
|
|
|
|
const float z = blend_xyz[options->rt[rt].blend_mode][2];
|
|
|
|
|
|
|
|
|
|
/* RGB = f * X * p0 + Cs' * Y * p1 + Cd' * Z * p2 */
|
|
|
|
|
nir_def *rgb = imm3(b, 0.0);
|
|
|
|
|
if (factor)
|
|
|
|
|
rgb = nir_fmul(b, factor, nir_fmul_imm(b, p0, x));
|
|
|
|
|
if (y != 0.0)
|
|
|
|
|
rgb = nir_fadd(b, rgb, nir_fmul(b, src_rgb_unpre, nir_fmul_imm(b, p1, y)));
|
|
|
|
|
if (z != 0.0)
|
|
|
|
|
rgb = nir_fadd(b, rgb, nir_fmul(b, dst_rgb_unpre, nir_fmul_imm(b, p2, z)));
|
|
|
|
|
|
|
|
|
|
/* A = X * p0 + Y * p1 + Z * p2 */
|
|
|
|
|
nir_def *a = nir_imm_float(b, 0.0);
|
|
|
|
|
if (x != 0.0)
|
|
|
|
|
a = nir_fmul_imm(b, p0, x);
|
|
|
|
|
if (y != 0.0)
|
|
|
|
|
a = nir_fadd(b, a, nir_fmul_imm(b, p1, y));
|
|
|
|
|
if (z != 0.0)
|
|
|
|
|
a = nir_fadd(b, a, nir_fmul_imm(b, p2, z));
|
|
|
|
|
|
|
|
|
|
/* If dst is non-premultiplied, the output should also be non-premultiplied */
|
|
|
|
|
if (!options->rt[rt].dst_premultiplied) {
|
|
|
|
|
rgb = nir_bcsel(b,
|
|
|
|
|
nir_fgt_imm(b, a, 0.0),
|
|
|
|
|
nir_fdiv(b, rgb, a),
|
|
|
|
|
imm3(b, 0.0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_def *result = nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1),
|
|
|
|
|
nir_channel(b, rgb, 2), a);
|
|
|
|
|
return nir_f2fN(b, result, bit_size);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-04 14:11:12 -04:00
|
|
|
/* Given a blend state, the source color, and the destination color,
|
|
|
|
|
* return the blended color
|
|
|
|
|
*/
|
|
|
|
|
|
2026-01-15 15:43:25 -05:00
|
|
|
nir_def *
|
|
|
|
|
nir_color_blend(nir_builder *b, nir_def *src, nir_def *src1, nir_def *dst,
|
|
|
|
|
const nir_lower_blend_rt *rt, bool scalar_blend_const)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
2026-01-15 15:43:25 -05:00
|
|
|
if (util_format_is_pure_integer(rt->format) || nir_blend_replace_rt(rt))
|
|
|
|
|
return src;
|
|
|
|
|
|
2023-02-26 19:15:48 -05:00
|
|
|
/* Don't crash if src1 isn't written. It doesn't matter what dual colour we
|
|
|
|
|
* blend with in that case, as long as we don't dereference NULL.
|
|
|
|
|
*/
|
|
|
|
|
if (!src1)
|
|
|
|
|
src1 = nir_imm_zero(b, 4, src->bit_size);
|
|
|
|
|
|
2021-05-04 14:11:12 -04:00
|
|
|
/* Grab the blend constant ahead of time */
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *bconst;
|
2026-01-15 15:43:25 -05:00
|
|
|
if (scalar_blend_const) {
|
2021-05-04 14:11:12 -04:00
|
|
|
bconst = nir_vec4(b,
|
|
|
|
|
nir_load_blend_const_color_r_float(b),
|
|
|
|
|
nir_load_blend_const_color_g_float(b),
|
|
|
|
|
nir_load_blend_const_color_b_float(b),
|
|
|
|
|
nir_load_blend_const_color_a_float(b));
|
|
|
|
|
} else {
|
|
|
|
|
bconst = nir_load_blend_const_color_rgba(b);
|
|
|
|
|
}
|
|
|
|
|
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
if (src->bit_size == 16) {
|
2021-05-04 14:11:12 -04:00
|
|
|
bconst = nir_f2f16(b, bconst);
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
src1 = nir_f2f16(b, src1);
|
|
|
|
|
}
|
2021-05-04 14:11:12 -04:00
|
|
|
|
nir/lower_blend: fix snorm factor clamping
The spec says (emphasis mine):
If the color attachment is fixed-point, the components of the source and
destination values **AND BLEND FACTORS** are each clamped to [0,1] or [-1,1]
respectively for an unsigned normalized or signed normalized color attachment
prior to evaluating the blend operations. If the color attachment is
floating-point, no clamping occurs.
However, neither the CTS nor any hardware implement this semantic.
For unsigned normalized formats, the definitions are roughly equivalent (except
perhaps around constant colours). 0 <= x <= 1 implies that 0 <= 1 - x <= 1.
Therefore if the source/destination colours are clamped to [0, 1], then their
complements are also in [0, 1], so clamping any blend factor (except constant
colour) has no effect if the source/dest were already clamped.
For signed normalized formats, however, this difference matters. -1 <= x <= 1
implies that 0 <= 1 - x <= 2... so to implement the spec text faithfully, we
would need to clamp again the complemented colour blend factors to return back
to signed normalized range. Software blending implementations can of course do
that... but doing so causes CTS fails, as the CTS reference renderer does not do
this.
This commit adjusts nir_lower_blend to match what actual hardware does, what CTS
requires, and what the spec should have said.
See https://gitlab.khronos.org/vulkan/vulkan/-/issues/4293 for the spec
resolution.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35519>
2025-06-13 14:30:27 -04:00
|
|
|
/* The input colours need to be clamped to the format. Contrary to the
|
|
|
|
|
* OpenGL/Vulkan specs, it really is the inputs that get clamped and not the
|
|
|
|
|
* intermediate blend factors. This matches the CTS and hardware behaviour.
|
2021-10-22 21:12:59 -04:00
|
|
|
*/
|
2026-01-15 15:43:25 -05:00
|
|
|
src = nir_fsat_to_format(b, src, rt->format);
|
|
|
|
|
bconst = nir_fsat_to_format(b, bconst, rt->format);
|
2022-11-25 21:40:16 -05:00
|
|
|
|
|
|
|
|
if (src1)
|
2026-01-15 15:43:25 -05:00
|
|
|
src1 = nir_fsat_to_format(b, src1, rt->format);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
/* DST_ALPHA reads back 1.0 if there is no alpha channel */
|
|
|
|
|
const struct util_format_description *desc =
|
2026-01-15 15:43:25 -05:00
|
|
|
util_format_description(rt->format);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
|
|
|
|
|
nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-11-12 13:50:07 -05:00
|
|
|
dst = nir_vec4(b,
|
2023-08-08 12:00:35 -05:00
|
|
|
channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
|
|
|
|
|
channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
|
|
|
|
|
channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
|
|
|
|
|
channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
/* We blend per channel and recombine later */
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *channels[4];
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
/* Decide properties based on channel */
|
2026-01-15 15:43:25 -05:00
|
|
|
nir_lower_blend_channel chan = (c < 3) ? rt->rgb : rt->alpha;
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *psrc = nir_channel(b, src, c);
|
|
|
|
|
nir_def *pdst = nir_channel(b, dst, c);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
if (nir_blend_factored(chan.func)) {
|
|
|
|
|
psrc = nir_blend_factor(
|
2023-08-08 12:00:35 -05:00
|
|
|
b, psrc,
|
|
|
|
|
src, src1, dst, bconst, c,
|
2026-01-15 15:43:25 -05:00
|
|
|
chan.src_factor, rt->format);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
|
|
|
|
pdst = nir_blend_factor(
|
2023-08-08 12:00:35 -05:00
|
|
|
b, pdst,
|
|
|
|
|
src, src1, dst, bconst, c,
|
2026-01-15 15:43:25 -05:00
|
|
|
chan.dst_factor, rt->format);
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_vec(b, channels, 4);
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-03 12:34:44 -05:00
|
|
|
static int
|
2022-11-28 22:28:13 -05:00
|
|
|
color_index_for_location(unsigned location)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
2022-11-28 22:28:13 -05:00
|
|
|
assert(location != FRAG_RESULT_COLOR &&
|
2023-02-15 10:27:48 -05:00
|
|
|
"gl_FragColor must be lowered before nir_lower_blend");
|
2022-05-03 12:34:44 -05:00
|
|
|
|
2022-11-28 22:28:13 -05:00
|
|
|
if (location < FRAG_RESULT_DATA0)
|
2023-02-15 10:27:48 -05:00
|
|
|
return -1;
|
|
|
|
|
else
|
2022-11-28 22:28:13 -05:00
|
|
|
return location - FRAG_RESULT_DATA0;
|
2022-05-03 12:34:44 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
2023-08-17 17:47:42 -05:00
|
|
|
nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data)
|
2022-05-03 12:34:44 -05:00
|
|
|
{
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
struct ctx *ctx = data;
|
|
|
|
|
const nir_lower_blend_options *options = ctx->options;
|
2022-11-28 22:28:13 -05:00
|
|
|
if (store->intrinsic != nir_intrinsic_store_output)
|
|
|
|
|
return false;
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-11-28 22:28:13 -05:00
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
|
|
|
|
|
int rt = color_index_for_location(sem.location);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2021-06-10 16:29:34 +02:00
|
|
|
/* No blend lowering requested on this RT */
|
2026-01-15 15:26:30 -05:00
|
|
|
if (rt < 0 || options->rt[rt].format == PIPE_FORMAT_NONE)
|
2021-06-10 16:29:34 +02:00
|
|
|
return false;
|
|
|
|
|
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
/* Only process stores once. Pass flags are cleared by consume_dual_stores */
|
2023-08-17 17:47:42 -05:00
|
|
|
if (store->instr.pass_flags)
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
return false;
|
|
|
|
|
|
2023-08-17 17:47:42 -05:00
|
|
|
store->instr.pass_flags = 1;
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
|
|
|
|
|
/* Store are sunk to the bottom of the block to ensure that the dual
|
|
|
|
|
* source colour is already written.
|
|
|
|
|
*/
|
2023-08-17 17:47:42 -05:00
|
|
|
b->cursor = nir_after_block(store->instr.block);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2026-01-15 15:26:30 -05:00
|
|
|
const enum pipe_format format = options->rt[rt].format;
|
2025-04-06 18:20:33 -04:00
|
|
|
enum pipe_logicop logicop_func = options->logicop_func;
|
|
|
|
|
|
2025-04-06 18:20:50 -04:00
|
|
|
/* From the Vulkan spec ("Logical operations"):
|
|
|
|
|
*
|
|
|
|
|
* Logical operations are not applied to floating-point or sRGB format
|
|
|
|
|
* color attachments...
|
|
|
|
|
*
|
|
|
|
|
* If logicOpEnable is VK_TRUE... blending of all attachments is treated
|
|
|
|
|
* as if it were disabled. Any attachments using color formats for which
|
|
|
|
|
* logical operations are not supported simply pass through the color
|
|
|
|
|
* values unmodified.
|
|
|
|
|
*
|
|
|
|
|
* The semantic for unsupported formats is equivalent to a logicop of COPY.
|
|
|
|
|
* It is /not/ equivalent to disabled logicops (which would incorrectly apply
|
|
|
|
|
* blending). To implement this spec text with minimal special casing, we
|
|
|
|
|
* override the logicop func to COPY for unsupported formats.
|
|
|
|
|
*/
|
|
|
|
|
if (util_format_is_float(format) || util_format_is_srgb(format)) {
|
|
|
|
|
logicop_func = PIPE_LOGICOP_COPY;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-26 12:54:58 -04:00
|
|
|
/* Don't bother copying the destination to the source for disabled RTs */
|
2023-07-19 13:30:54 -04:00
|
|
|
if (options->rt[rt].colormask == 0 ||
|
2025-04-06 18:20:33 -04:00
|
|
|
(options->logicop_enable && logicop_func == PIPE_LOGICOP_NOOP)) {
|
2023-07-19 13:30:54 -04:00
|
|
|
|
2023-08-17 17:47:42 -05:00
|
|
|
nir_instr_remove(&store->instr);
|
2023-06-26 12:54:58 -04:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-03 12:34:44 -05:00
|
|
|
/* Grab the input color. We always want 4 channels during blend. Dead
|
|
|
|
|
* code will clean up any channels we don't need.
|
|
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-11-28 22:28:13 -05:00
|
|
|
assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");
|
|
|
|
|
|
2023-04-09 10:14:29 -04:00
|
|
|
/* Grab the previous fragment color if we need it */
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *dst;
|
2023-04-09 10:14:29 -04:00
|
|
|
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
if (options->rt[rt].advanced_blend ||
|
|
|
|
|
channel_uses_dest(options->rt[rt].rgb) ||
|
2023-04-09 10:14:29 -04:00
|
|
|
channel_uses_dest(options->rt[rt].alpha) ||
|
|
|
|
|
options->logicop_enable ||
|
|
|
|
|
options->rt[rt].colormask != BITFIELD_MASK(4)) {
|
|
|
|
|
|
|
|
|
|
b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
|
|
|
|
|
b->shader->info.fs.uses_fbfetch_output = true;
|
2023-04-09 10:21:32 -04:00
|
|
|
b->shader->info.fs.uses_sample_shading = true;
|
2023-04-09 10:14:29 -04:00
|
|
|
sem.fb_fetch_output = true;
|
|
|
|
|
|
|
|
|
|
dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
|
|
|
|
|
nir_imm_int(b, 0),
|
|
|
|
|
.dest_type = nir_intrinsic_src_type(store),
|
|
|
|
|
.io_semantics = sem);
|
|
|
|
|
} else {
|
2023-08-12 16:17:15 -04:00
|
|
|
dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
|
2023-04-09 10:14:29 -04:00
|
|
|
}
|
2021-05-04 14:11:12 -04:00
|
|
|
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
/* Blend the two colors per the passed options. Blending is prioritized as:
|
|
|
|
|
* 1. Logic operations (if logicop_enable is true) - mutually exclusive with blending
|
|
|
|
|
* 2. Advanced blending (if advanced_blend is set) - uses complex blend equations
|
|
|
|
|
* 3. Standard blending (if configured) - uses traditional blend factors
|
|
|
|
|
*
|
|
|
|
|
* We only call nir_blend if blending is enabled with a blend mode other than replace
|
|
|
|
|
* (independent of the color mask). That avoids unnecessary fsat instructions in the
|
|
|
|
|
* common case where blending is disabled at an API level, but the driver calls
|
2022-09-10 16:08:13 -04:00
|
|
|
* nir_blend (possibly for color masking).
|
|
|
|
|
*/
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *blended = src;
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2021-09-06 12:24:29 +02:00
|
|
|
if (options->logicop_enable) {
|
2026-01-15 15:43:25 -05:00
|
|
|
blended = nir_color_logicop(b, src, dst, options->logicop_func, format);
|
nir/lower_blend: Add advanced blending support
Add support for advanced blending (VK_EXT_blend_operation_advanced and
GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes
including multiply, screen, overlay, HSL modes (hue, saturation, color,
luminosity), Porter-Duff modes, and extended modes like lineardodge
and vividlight.
Advanced blending slots into the existing blending logic alongside logic
operations and standard blending. The implementation supports both
premultiplied and non-premultiplied alpha for source and destination, and
provides three overlap modes (uncorrelated, conjoint, disjoint).
Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
2025-12-12 21:15:36 +01:00
|
|
|
} else if (options->rt[rt].advanced_blend) {
|
|
|
|
|
blended = nir_blend_advanced(b, options, rt, src, dst);
|
2025-04-06 18:20:33 -04:00
|
|
|
} else if (!util_format_is_pure_integer(format) &&
|
2022-09-10 16:08:13 -04:00
|
|
|
!nir_blend_replace_rt(&options->rt[rt])) {
|
2025-04-06 18:20:33 -04:00
|
|
|
assert(!util_format_is_scaled(format));
|
2026-01-15 15:43:25 -05:00
|
|
|
blended = nir_color_blend(b, src, ctx->src1[rt], dst, &options->rt[rt],
|
|
|
|
|
options->scalar_blend_const);
|
2021-09-06 12:24:29 +02:00
|
|
|
}
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-12-17 23:56:52 -05:00
|
|
|
/* Apply a colormask if necessary */
|
2026-01-15 15:43:25 -05:00
|
|
|
blended = nir_color_mask(b, blended, dst, options->rt[rt].colormask);
|
2021-05-04 14:11:12 -04:00
|
|
|
|
2022-05-03 12:34:44 -05:00
|
|
|
/* Shave off any components we don't want to store */
|
2025-04-06 18:20:33 -04:00
|
|
|
const unsigned num_components = util_format_get_nr_components(format);
|
2022-05-03 12:34:44 -05:00
|
|
|
blended = nir_trim_vector(b, blended, num_components);
|
|
|
|
|
|
|
|
|
|
/* Grow or shrink the store destination as needed */
|
|
|
|
|
store->num_components = num_components;
|
2022-11-28 20:26:26 -05:00
|
|
|
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_component_mask(num_components));
|
2021-09-25 14:08:58 +02:00
|
|
|
|
2021-05-04 14:11:12 -04:00
|
|
|
/* Write out the final color instead of the input */
|
2023-08-17 15:44:47 -05:00
|
|
|
nir_src_rewrite(&store->src[0], blended);
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
|
|
|
|
|
/* Sink to bottom */
|
2023-08-17 17:47:42 -05:00
|
|
|
nir_instr_remove(&store->instr);
|
|
|
|
|
nir_builder_instr_insert(b, &store->instr);
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Dual-source colours are only for blending, so when nir_lower_blend is used,
|
|
|
|
|
* the dual source store_output is for us (only). Remove dual stores so the
|
|
|
|
|
* backend doesn't have to deal with them, collecting the sources for blending.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
2023-08-17 17:47:42 -05:00
|
|
|
consume_dual_stores(nir_builder *b, nir_intrinsic_instr *store, void *data)
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
{
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def **outputs = data;
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
if (store->intrinsic != nir_intrinsic_store_output)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* While we're here, clear the pass flags for store_outputs, since we'll set
|
|
|
|
|
* them later.
|
|
|
|
|
*/
|
2023-08-17 17:47:42 -05:00
|
|
|
store->instr.pass_flags = 0;
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
|
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
|
2025-11-23 13:11:11 -05:00
|
|
|
int rt = 0;
|
|
|
|
|
if (sem.dual_source_blend_index)
|
|
|
|
|
rt = color_index_for_location(sem.location);
|
|
|
|
|
else if (sem.location != FRAG_RESULT_DUAL_SRC_BLEND)
|
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour
via a sideband doesn't make any sense. The primary source blend colours are
implicitly passed in as the sources of store_output intrinsics; likewise, we
should get dual source blend colours from their respective stores. And since
dual colours are only needed by blending, we can delete the stores as we go.
That means nir_lower_blend now provides an all-in-one software lowering of dual
source blending with no driver support needed! It even works for 8 dual-src
render targets, but I don't have a use case for that.
The only tricky bit here is making sure we are robust against different orders
of store_output within the exit block. In particular, if we naively lower
x = ...
primary color = x
y = ...
dual color = y
we end up emitting uses of y before it has been defined, something like
x = ...
primary color = blend(x, y)
y = ...
Instead, we remove dual stores and sink blend stores to the bottom of the block,
so we end up with the correct
x = ...
y = ...
primary color = blend(x, y)
lower_io_to_temporaries ensures that the stores will be in the same (exit)
block, so we don't need to sink further than that ourselves.
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
2023-02-19 23:08:58 -05:00
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");
|
|
|
|
|
|
|
|
|
|
outputs[rt] = store->src[0].ssa;
|
2023-08-17 17:47:42 -05:00
|
|
|
nir_instr_remove(&store->instr);
|
2021-05-04 14:11:12 -04:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-03 09:28:01 -05:00
|
|
|
/** Lower blending to framebuffer fetch and some math
|
|
|
|
|
*
|
2022-11-28 22:28:13 -05:00
|
|
|
* This pass requires that shader I/O is lowered to explicit load/store
|
|
|
|
|
* instructions using nir_lower_io.
|
2022-05-03 09:28:01 -05:00
|
|
|
*/
|
2024-01-09 15:56:56 -04:00
|
|
|
bool
|
2022-05-03 10:44:30 -05:00
|
|
|
nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
|
2021-05-04 14:11:12 -04:00
|
|
|
{
|
|
|
|
|
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
|
|
|
|
|
2023-08-08 12:00:35 -05:00
|
|
|
struct ctx ctx = { .options = options };
|
2024-01-09 15:56:56 -04:00
|
|
|
bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
|
2024-06-16 16:32:01 -04:00
|
|
|
nir_metadata_control_flow,
|
2024-01-09 15:56:56 -04:00
|
|
|
ctx.src1);
|
|
|
|
|
|
|
|
|
|
progress |= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
|
2024-06-16 16:32:01 -04:00
|
|
|
nir_metadata_control_flow,
|
2024-01-09 15:56:56 -04:00
|
|
|
&ctx);
|
|
|
|
|
return progress;
|
2021-05-04 14:11:12 -04:00
|
|
|
}
|