From 4fedf6bedded4094834571ca6405bb24c68b0cf1 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Fri, 12 Dec 2025 21:15:36 +0100 Subject: [PATCH] nir/lower_blend: Add advanced blending support Add support for advanced blending (VK_EXT_blend_operation_advanced and GL_KHR_blend_equation_advanced), enabling around 40 advanced blend modes including multiply, screen, overlay, HSL modes (hue, saturation, color, luminosity), Porter-Duff modes, and extended modes like lineardodge and vividlight. Advanced blending slots into the existing blending logic alongside logic operations and standard blending. The implementation supports both premultiplied and non-premultiplied alpha for source and destination, and provides three overlap modes (uncorrelated, conjoint, disjoint). Signed-off-by: Christian Gmeiner Acked-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_lower_blend.c | 569 ++++++++++++++++++++++++++++- src/compiler/nir/nir_lower_blend.h | 8 +- 2 files changed, 571 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_lower_blend.c b/src/compiler/nir/nir_lower_blend.c index 7b0f68f04c1..726503fdfa9 100644 --- a/src/compiler/nir/nir_lower_blend.c +++ b/src/compiler/nir/nir_lower_blend.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2025 Google LLC * Copyright (C) 2019-2021 Collabora, Ltd. * Copyright (C) 2019 Alyssa Rosenzweig * @@ -33,9 +34,11 @@ #include "nir_lower_blend.h" #include "compiler/nir/nir.h" +#include "compiler/nir/nir_blend_equation_advanced_helper.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_format_convert.h" #include "util/blend.h" +#include "nir_builder_opcodes.h" struct ctx { const nir_lower_blend_options *options; @@ -351,6 +354,555 @@ nir_blend_replace_rt(const nir_lower_blend_rt *rt) } +static nir_def * +minv3(nir_builder *b, nir_def *v) +{ + return nir_fmin(b, nir_fmin(b, nir_channel(b, v, 0), nir_channel(b, v, 1)), + nir_channel(b, v, 2)); +} + +static nir_def * +maxv3(nir_builder *b, nir_def *v) +{ + return nir_fmax(b, nir_fmax(b, nir_channel(b, v, 0), nir_channel(b, v, 1)), + nir_channel(b, v, 2)); +} + +static nir_def * +lumv3(nir_builder *b, nir_def *c) +{ + return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11)); +} + +static nir_def * +satv3(nir_builder *b, nir_def *c) +{ + return nir_fsub(b, maxv3(b, c), minv3(b, c)); +} + +/* Clip color to [0,1] while preserving luminosity */ +static nir_def * +clip_color(nir_builder *b, nir_def *c) +{ + nir_def *lum = lumv3(b, c); + nir_def *mincol = minv3(b, c); + nir_def *maxcol = maxv3(b, c); + + /* If min < 0: c = lum + (c - lum) * lum / (lum - min) */ + nir_def *t1 = nir_fdiv(b, + nir_fmul(b, nir_fsub(b, c, lum), lum), + nir_fsub(b, lum, mincol)); + nir_def *c1 = nir_fadd(b, lum, t1); + + /* If max > 1: c = lum + (c - lum) * (1 - lum) / (max - lum) */ + nir_def *t2 = nir_fdiv(b, + nir_fmul(b, nir_fsub(b, c, lum), nir_fsub_imm(b, 1.0, lum)), + nir_fsub(b, maxcol, lum)); + nir_def *c2 = nir_fadd(b, lum, t2); + + nir_def *min_neg = nir_flt_imm(b, mincol, 0.0); + nir_def *max_gt1 = nir_fgt_imm(b, maxcol, 1.0); + + return nir_bcsel(b, min_neg, c1, + nir_bcsel(b, max_gt1, c2, c)); +} + +/* Set luminosity of cbase to match clum */ +static nir_def * +set_lum(nir_builder *b, nir_def *cbase, nir_def *clum) +{ + nir_def *lbase = lumv3(b, cbase); + nir_def *llum = lumv3(b, clum); + nir_def *diff = nir_fsub(b, llum, lbase); + nir_def *c = nir_fadd(b, cbase, diff); + + return clip_color(b, c); +} + +/* Set saturation of cbase to match csat, then luminosity to match clum */ +static nir_def * +set_lum_sat(nir_builder *b, nir_def *cbase, nir_def *csat, nir_def *clum) +{ + nir_def *sbase = satv3(b, cbase); + nir_def *ssat = satv3(b, csat); + nir_def *minbase = minv3(b, cbase); + + /* Scale saturation: (cbase - min) * ssat / sbase */ + nir_def *scaled = nir_bcsel(b, + nir_fgt_imm(b, sbase, 0.0), + nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase, minbase), ssat), sbase), + imm3(b, 0.0)); + + return set_lum(b, scaled, clum); +} + +static nir_def * +blend_hsl_hue(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* Hue from src, saturation and luminosity from dst */ + return set_lum_sat(b, src, dst, dst); +} + +static nir_def * +blend_hsl_saturation(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* Saturation from src, hue and luminosity from dst */ + return set_lum_sat(b, dst, src, dst); +} + +static nir_def * +blend_hsl_color(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* Hue and saturation from src, luminosity from dst */ + return set_lum(b, src, dst); +} + +static nir_def * +blend_hsl_luminosity(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* Luminosity from src, hue and saturation from dst */ + return set_lum(b, dst, src); +} + +static nir_def * +blend_invert(nir_builder *b, nir_def *src, nir_def *dst) +{ + return nir_fsub_imm(b, 1.0, dst); +} + +static nir_def * +blend_invert_rgb(nir_builder *b, nir_def *src, nir_def *dst) +{ + return nir_fmul(b, src, nir_fsub_imm(b, 1.0, dst)); +} + +static nir_def * +blend_lineardodge(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* min(1, src + dst) */ + return nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src, dst)); +} + +static nir_def * +blend_linearburn(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* max(0, src + dst - 1) */ + return nir_fmax(b, nir_imm_float(b, 0.0), + nir_fadd(b, src, nir_fadd_imm(b, dst, -1.0))); +} + +static nir_def * +blend_vividlight(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* + * if src <= 0: 0 + * if src < 0.5: 1 - min(1, (1-dst) / (2*src)) + * if src < 1: min(1, dst / (2*(1-src))) + * else: 1 + */ + nir_def *two_src = nir_fmul_imm(b, src, 2.0); + nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst); + nir_def *one_minus_src = nir_fsub_imm(b, 1.0, src); + + nir_def *case_lt_half = nir_fsub_imm(b, 1.0, + nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, one_minus_dst, two_src))); + nir_def *case_lt_one = nir_fmin(b, imm3(b, 1.0), + nir_fdiv(b, dst, nir_fmul_imm(b, one_minus_src, 2.0))); + + return nir_bcsel(b, nir_fle_imm(b, src, 0.0), imm3(b, 0.0), + nir_bcsel(b, nir_flt_imm(b, src, 0.5), case_lt_half, + nir_bcsel(b, nir_flt_imm(b, src, 1.0), case_lt_one, + imm3(b, 1.0)))); +} + +static nir_def * +blend_linearlight(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* + * if 2*src + dst > 2: 1 + * if 2*src + dst <= 1: 0 + * else: 2*src + dst - 1 + */ + nir_def *two_src = nir_fmul_imm(b, src, 2.0); + nir_def *sum = nir_fadd(b, two_src, dst); + nir_def *result = nir_fsub(b, sum, imm3(b, 1.0)); + + return nir_bcsel(b, nir_fgt_imm(b, sum, 2.0), imm3(b, 1.0), + nir_bcsel(b, nir_fge(b, imm3(b, 1.0), sum), imm3(b, 0.0), + result)); +} + +static nir_def * +blend_pinlight(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* + * if (2*src - 1 > dst) && src < 0.5: 0 + * if (2*src - 1 > dst) && src >= 0.5: 2*src - 1 + * if (2*src - 1 <= dst) && src < 0.5*dst: 2*src + * if (2*src - 1 <= dst) && src >= 0.5*dst: dst + */ + nir_def *two_src = nir_fmul_imm(b, src, 2.0); + nir_def *two_src_minus_1 = nir_fsub(b, two_src, imm3(b, 1.0)); + nir_def *half_dst = nir_fmul_imm(b, dst, 0.5); + + nir_def *cond1 = nir_flt(b, dst, two_src_minus_1); + nir_def *cond2 = nir_flt_imm(b, src, 0.5); + nir_def *cond3 = nir_flt(b, src, half_dst); + + return nir_bcsel(b, cond1, + nir_bcsel(b, cond2, imm3(b, 0.0), two_src_minus_1), + nir_bcsel(b, cond3, two_src, dst)); +} + +static nir_def * +blend_hardmix(nir_builder *b, nir_def *src, nir_def *dst) +{ + /* if src + dst >= 1: 1, else 0. + * Use small epsilon to handle 8-bit quantization. + */ + nir_def *sum = nir_fadd(b, src, dst); + nir_def *threshold = nir_imm_float(b, 1.0 - 0.5 / 255.0); /* ~0.998039 */ + return nir_bcsel(b, nir_fge(b, sum, threshold), + imm3(b, 1.0), imm3(b, 0.0)); +} + +/* + * Calculate the blend factor f(Cs', Cd'). + * Returns NULL for blend modes where X=0, meaning f() is not used. + */ +static nir_def * +calc_blend_factor(nir_builder *b, enum pipe_advanced_blend_mode blend_op, nir_def *src, nir_def *dst) +{ + switch (blend_op) { + /* f() result unused (X=0) */ + case PIPE_ADVANCED_BLEND_NONE: + case PIPE_ADVANCED_BLEND_SRC_OUT: + case PIPE_ADVANCED_BLEND_DST_OUT: + case PIPE_ADVANCED_BLEND_XOR: + return NULL; + + /* Standard blend modes */ + case PIPE_ADVANCED_BLEND_MULTIPLY: + return blend_multiply(b, src, dst); + case PIPE_ADVANCED_BLEND_SCREEN: + return blend_screen(b, src, dst); + case PIPE_ADVANCED_BLEND_OVERLAY: + return blend_overlay(b, src, dst); + case PIPE_ADVANCED_BLEND_DARKEN: + return blend_darken(b, src, dst); + case PIPE_ADVANCED_BLEND_LIGHTEN: + return blend_lighten(b, src, dst); + case PIPE_ADVANCED_BLEND_COLORDODGE: + return blend_colordodge(b, src, dst); + case PIPE_ADVANCED_BLEND_COLORBURN: + return blend_colorburn(b, src, dst); + case PIPE_ADVANCED_BLEND_HARDLIGHT: + return blend_hardlight(b, src, dst); + case PIPE_ADVANCED_BLEND_SOFTLIGHT: + return blend_softlight(b, src, dst); + case PIPE_ADVANCED_BLEND_DIFFERENCE: + return blend_difference(b, src, dst); + case PIPE_ADVANCED_BLEND_EXCLUSION: + return blend_exclusion(b, src, dst); + + /* HSL blend modes */ + case PIPE_ADVANCED_BLEND_HSL_HUE: + return blend_hsl_hue(b, src, dst); + case PIPE_ADVANCED_BLEND_HSL_SATURATION: + return blend_hsl_saturation(b, src, dst); + case PIPE_ADVANCED_BLEND_HSL_COLOR: + return blend_hsl_color(b, src, dst); + case PIPE_ADVANCED_BLEND_HSL_LUMINOSITY: + return blend_hsl_luminosity(b, src, dst); + + /* Porter-Duff modes where f(Cs,Cd) = Cs or Cd */ + case PIPE_ADVANCED_BLEND_SRC: + case PIPE_ADVANCED_BLEND_SRC_OVER: + case PIPE_ADVANCED_BLEND_SRC_IN: + case PIPE_ADVANCED_BLEND_SRC_ATOP: + return src; + case PIPE_ADVANCED_BLEND_DST: + case PIPE_ADVANCED_BLEND_DST_OVER: + case PIPE_ADVANCED_BLEND_DST_IN: + case PIPE_ADVANCED_BLEND_DST_ATOP: + return dst; + + /* Extended blend modes */ + case PIPE_ADVANCED_BLEND_INVERT: + return blend_invert(b, src, dst); + case PIPE_ADVANCED_BLEND_INVERT_RGB: + return blend_invert_rgb(b, src, dst); + case PIPE_ADVANCED_BLEND_LINEARDODGE: + return blend_lineardodge(b, src, dst); + case PIPE_ADVANCED_BLEND_LINEARBURN: + return blend_linearburn(b, src, dst); + case PIPE_ADVANCED_BLEND_VIVIDLIGHT: + return blend_vividlight(b, src, dst); + case PIPE_ADVANCED_BLEND_LINEARLIGHT: + return blend_linearlight(b, src, dst); + case PIPE_ADVANCED_BLEND_PINLIGHT: + return blend_pinlight(b, src, dst); + case PIPE_ADVANCED_BLEND_HARDMIX: + return blend_hardmix(b, src, dst); + default: + UNREACHABLE("Invalid advanced blend op"); + } +} + +static nir_def * +calc_additional_rgb_blend(nir_builder *b, const nir_lower_blend_options *options, + unsigned rt, + nir_def *src, nir_def *dst) +{ + nir_def *src_rgb = nir_trim_vector(b, src, 3); + nir_def *dst_rgb = nir_trim_vector(b, dst, 3); + nir_def *src_a = nir_channel(b, src, 3); + nir_def *dst_a = nir_channel(b, dst, 3); + + /* Premultiply if non-premultiplied */ + if (!options->rt[rt].src_premultiplied) + src_rgb = nir_fmul(b, src_rgb, src_a); + if (!options->rt[rt].dst_premultiplied) + dst_rgb = nir_fmul(b, dst_rgb, dst_a); + + nir_def *rgb, *a; + + switch (options->rt[rt].blend_mode) { + case PIPE_ADVANCED_BLEND_PLUS: + rgb = nir_fadd(b, src_rgb, dst_rgb); + a = nir_fadd(b, src_a, dst_a); + break; + case PIPE_ADVANCED_BLEND_PLUS_CLAMPED: + rgb = nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src_rgb, dst_rgb)); + a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a)); + break; + case PIPE_ADVANCED_BLEND_PLUS_CLAMPED_ALPHA: { + nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a)); + rgb = nir_fmin(b, max_a, nir_fadd(b, src_rgb, dst_rgb)); + a = max_a; + break; + } + case PIPE_ADVANCED_BLEND_PLUS_DARKER: { + nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a)); + /* max(0, max_a - ((src_a - src_rgb) + (dst_a - dst_rgb))) */ + nir_def *src_diff = nir_fsub(b, src_a, src_rgb); + nir_def *dst_diff = nir_fsub(b, dst_a, dst_rgb); + rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, max_a, nir_fadd(b, src_diff, dst_diff))); + a = max_a; + break; + } + case PIPE_ADVANCED_BLEND_MINUS: + rgb = nir_fsub(b, dst_rgb, src_rgb); + a = nir_fsub(b, dst_a, src_a); + break; + case PIPE_ADVANCED_BLEND_MINUS_CLAMPED: + rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, dst_rgb, src_rgb)); + a = nir_fmax(b, nir_imm_float(b, 0.0), nir_fsub(b, dst_a, src_a)); + break; + case PIPE_ADVANCED_BLEND_CONTRAST: { + /* res.rgb = (dst_a / 2) + 2 * (dst_rgb - dst_a/2) * (src_rgb - src_a/2) */ + nir_def *half_dst_a = nir_fmul_imm(b, dst_a, 0.5); + nir_def *half_src_a = nir_fmul_imm(b, src_a, 0.5); + nir_def *dst_centered = nir_fsub(b, dst_rgb, half_dst_a); + nir_def *src_centered = nir_fsub(b, src_rgb, half_src_a); + rgb = nir_fadd(b, half_dst_a, + nir_fmul_imm(b, nir_fmul(b, dst_centered, src_centered), 2.0)); + a = dst_a; + break; + } + case PIPE_ADVANCED_BLEND_INVERT_OVG: { + /* res.rgb = src_a * (1 - dst_rgb) + (1 - src_a) * dst_rgb */ + nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst_rgb); + nir_def *one_minus_src_a = nir_fsub_imm(b, 1.0, src_a); + rgb = nir_fadd(b, nir_fmul(b, src_a, one_minus_dst), + nir_fmul(b, one_minus_src_a, dst_rgb)); + a = nir_fsub(b, nir_fadd(b, src_a, dst_a), nir_fmul(b, src_a, dst_a)); + break; + } + case PIPE_ADVANCED_BLEND_RED: + rgb = nir_vec3(b, nir_channel(b, src_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, dst_rgb, 2)); + a = dst_a; + break; + case PIPE_ADVANCED_BLEND_GREEN: + rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, src_rgb, 1), nir_channel(b, dst_rgb, 2)); + a = dst_a; + break; + case PIPE_ADVANCED_BLEND_BLUE: + rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, src_rgb, 2)); + a = dst_a; + break; + default: + UNREACHABLE("Invalid additional RGB blend op"); + } + + /* If dst is non-premultiplied, the output should also be non-premultiplied */ + if (!options->rt[rt].dst_premultiplied) { + rgb = nir_bcsel(b, + nir_fgt_imm(b, a, 0.0), + nir_fdiv(b, rgb, a), + imm3(b, 0.0)); + } + + return nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1), + nir_channel(b, rgb, 2), a); +} + +/* + * X, Y, Z blend factors for the advanced blend equation: + * RGB = f(Cs',Cd') * X * p0 + Cs' * Y * p1 + Cd' * Z * p2 + * A = X * p0 + Y * p1 + Z * p2 + * + * Index by enum pipe_advanced_blend_mode. + * Modes >= PIPE_ADVANCED_BLEND_PLUS use separate calc_additional_rgb_blend(). + */ +static const float blend_xyz[][3] = { + [PIPE_ADVANCED_BLEND_NONE] = { 0, 0, 0 }, + [PIPE_ADVANCED_BLEND_MULTIPLY] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_SCREEN] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_OVERLAY] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_DARKEN] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_LIGHTEN] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_COLORDODGE] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_COLORBURN] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_HARDLIGHT] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_SOFTLIGHT] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_DIFFERENCE] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_EXCLUSION] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_HSL_HUE] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_HSL_SATURATION] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_HSL_COLOR] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_HSL_LUMINOSITY] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_SRC] = { 1, 1, 0 }, + [PIPE_ADVANCED_BLEND_DST] = { 1, 0, 1 }, + [PIPE_ADVANCED_BLEND_SRC_OVER] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_DST_OVER] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_SRC_IN] = { 1, 0, 0 }, + [PIPE_ADVANCED_BLEND_DST_IN] = { 1, 0, 0 }, + [PIPE_ADVANCED_BLEND_SRC_OUT] = { 0, 1, 0 }, + [PIPE_ADVANCED_BLEND_DST_OUT] = { 0, 0, 1 }, + [PIPE_ADVANCED_BLEND_SRC_ATOP] = { 1, 0, 1 }, + [PIPE_ADVANCED_BLEND_DST_ATOP] = { 1, 1, 0 }, + [PIPE_ADVANCED_BLEND_XOR] = { 0, 1, 1 }, + [PIPE_ADVANCED_BLEND_INVERT] = { 1, 0, 1 }, + [PIPE_ADVANCED_BLEND_INVERT_RGB] = { 1, 0, 1 }, + [PIPE_ADVANCED_BLEND_LINEARDODGE] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_LINEARBURN] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_VIVIDLIGHT] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_LINEARLIGHT] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_PINLIGHT] = { 1, 1, 1 }, + [PIPE_ADVANCED_BLEND_HARDMIX] = { 1, 1, 1 }, +}; + +static nir_def * +nir_blend_advanced( + nir_builder *b, + const nir_lower_blend_options *options, + unsigned rt, + nir_def *src, nir_def *dst) +{ + /* Advanced blend uses hardcoded 32-bit constants. Convert inputs to f32 + * and convert back at the end. + */ + const unsigned bit_size = src->bit_size; + src = nir_f2f32(b, src); + dst = nir_f2f32(b, dst); + + /* Check if this is an additional RGB blend op */ + if (options->rt[rt].blend_mode >= PIPE_ADVANCED_BLEND_PLUS && + options->rt[rt].blend_mode <= PIPE_ADVANCED_BLEND_BLUE) { + nir_def *result = calc_additional_rgb_blend(b, options, rt, src, dst); + return nir_f2fN(b, result, bit_size); + } + + nir_def *src_rgb = nir_trim_vector(b, src, 3); + nir_def *dst_rgb = nir_trim_vector(b, dst, 3); + nir_def *src_a = nir_channel(b, src, 3); + nir_def *dst_a = nir_channel(b, dst, 3); + + /* Unpremultiply */ + nir_def *src_rgb_unpre; + if (options->rt[rt].src_premultiplied) { + src_rgb_unpre = nir_bcsel(b, + nir_feq_imm(b, src_a, 0.0), + imm3(b, 0.0), + nir_fdiv(b, src_rgb, src_a)); + } else { + src_rgb_unpre = src_rgb; + } + + nir_def *dst_rgb_unpre; + if (options->rt[rt].dst_premultiplied) { + dst_rgb_unpre = nir_bcsel(b, + nir_feq_imm(b, dst_a, 0.0), + imm3(b, 0.0), + nir_fdiv(b, dst_rgb, dst_a)); + } else { + dst_rgb_unpre = dst_rgb; + } + + /* f(Cs', Cd') - may be NULL if X=0 (result unused) */ + nir_def *factor = calc_blend_factor(b, options->rt[rt].blend_mode, src_rgb_unpre, dst_rgb_unpre); + + nir_def *p0, *p1, *p2; + + switch (options->rt[rt].overlap) { + case PIPE_BLEND_OVERLAP_UNCORRELATED: + /* p0 = As * Ad, p1 = As * (1 - Ad), p2 = Ad * (1 - As) */ + p0 = nir_fmul(b, src_a, dst_a); + p1 = nir_fmul(b, src_a, nir_fsub_imm(b, 1.0, dst_a)); + p2 = nir_fmul(b, dst_a, nir_fsub_imm(b, 1.0, src_a)); + break; + case PIPE_BLEND_OVERLAP_CONJOINT: + /* p0 = min(As, Ad), p1 = max(As - Ad, 0), p2 = max(Ad - As, 0) */ + p0 = nir_fmin(b, src_a, dst_a); + p1 = nir_fmax(b, nir_fsub(b, src_a, dst_a), nir_imm_float(b, 0.0)); + p2 = nir_fmax(b, nir_fsub(b, dst_a, src_a), nir_imm_float(b, 0.0)); + break; + case PIPE_BLEND_OVERLAP_DISJOINT: + /* p0 = max(As + Ad - 1, 0), p1 = min(As, 1 - Ad), p2 = min(Ad, 1 - As) */ + p0 = nir_fmax(b, nir_fadd_imm(b, nir_fadd(b, src_a, dst_a), -1.0), nir_imm_float(b, 0.0)); + p1 = nir_fmin(b, src_a, nir_fsub_imm(b, 1.0, dst_a)); + p2 = nir_fmin(b, dst_a, nir_fsub_imm(b, 1.0, src_a)); + break; + default: + UNREACHABLE("invalid overlap"); + } + + const float x = blend_xyz[options->rt[rt].blend_mode][0]; + const float y = blend_xyz[options->rt[rt].blend_mode][1]; + const float z = blend_xyz[options->rt[rt].blend_mode][2]; + + /* RGB = f * X * p0 + Cs' * Y * p1 + Cd' * Z * p2 */ + nir_def *rgb = imm3(b, 0.0); + if (factor) + rgb = nir_fmul(b, factor, nir_fmul_imm(b, p0, x)); + if (y != 0.0) + rgb = nir_fadd(b, rgb, nir_fmul(b, src_rgb_unpre, nir_fmul_imm(b, p1, y))); + if (z != 0.0) + rgb = nir_fadd(b, rgb, nir_fmul(b, dst_rgb_unpre, nir_fmul_imm(b, p2, z))); + + /* A = X * p0 + Y * p1 + Z * p2 */ + nir_def *a = nir_imm_float(b, 0.0); + if (x != 0.0) + a = nir_fmul_imm(b, p0, x); + if (y != 0.0) + a = nir_fadd(b, a, nir_fmul_imm(b, p1, y)); + if (z != 0.0) + a = nir_fadd(b, a, nir_fmul_imm(b, p2, z)); + + /* If dst is non-premultiplied, the output should also be non-premultiplied */ + if (!options->rt[rt].dst_premultiplied) { + rgb = nir_bcsel(b, + nir_fgt_imm(b, a, 0.0), + nir_fdiv(b, rgb, a), + imm3(b, 0.0)); + } + + nir_def *result = nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1), + nir_channel(b, rgb, 2), a); + return nir_f2fN(b, result, bit_size); +} + /* Given a blend state, the source color, and the destination color, * return the blended color */ @@ -514,7 +1066,8 @@ nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data) /* Grab the previous fragment color if we need it */ nir_def *dst; - if (channel_uses_dest(options->rt[rt].rgb) || + if (options->rt[rt].advanced_blend || + channel_uses_dest(options->rt[rt].rgb) || channel_uses_dest(options->rt[rt].alpha) || options->logicop_enable || options->rt[rt].colormask != BITFIELD_MASK(4)) { @@ -532,16 +1085,22 @@ nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data) dst = nir_undef(b, 4, nir_src_bit_size(store->src[0])); } - /* Blend the two colors per the passed options. We only call nir_blend if - * blending is enabled with a blend mode other than replace (independent of - * the color mask). That avoids unnecessary fsat instructions in the common - * case where blending is disabled at an API level, but the driver calls + /* Blend the two colors per the passed options. Blending is prioritized as: + * 1. Logic operations (if logicop_enable is true) - mutually exclusive with blending + * 2. Advanced blending (if advanced_blend is set) - uses complex blend equations + * 3. Standard blending (if configured) - uses traditional blend factors + * + * We only call nir_blend if blending is enabled with a blend mode other than replace + * (independent of the color mask). That avoids unnecessary fsat instructions in the + * common case where blending is disabled at an API level, but the driver calls * nir_blend (possibly for color masking). */ nir_def *blended = src; if (options->logicop_enable) { blended = nir_color_logicop(b, src, dst, options->logicop_func, format); + } else if (options->rt[rt].advanced_blend) { + blended = nir_blend_advanced(b, options, rt, src, dst); } else if (!util_format_is_pure_integer(format) && !nir_blend_replace_rt(&options->rt[rt])) { assert(!util_format_is_scaled(format)); diff --git a/src/compiler/nir/nir_lower_blend.h b/src/compiler/nir/nir_lower_blend.h index 4d6697cc913..bc979a79efc 100644 --- a/src/compiler/nir/nir_lower_blend.h +++ b/src/compiler/nir/nir_lower_blend.h @@ -46,7 +46,13 @@ typedef struct { nir_lower_blend_channel alpha; /* 4-bit colormask. 0x0 for none, 0xF for RGBA, 0x1 for R */ - unsigned colormask; + unsigned colormask:4; + + unsigned advanced_blend:1; + enum pipe_advanced_blend_mode blend_mode; + bool src_premultiplied; + bool dst_premultiplied; + enum pipe_blend_overlap_mode overlap; } nir_lower_blend_rt; typedef struct {