/*
 * Copyright (C) 2025 Google LLC
 * Copyright (C) 2019-2021 Collabora, Ltd.
 * Copyright (C) 2019 Alyssa Rosenzweig
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

/**
 * @file
 *
 * Implements the fragment pipeline (blending and writeout) in software, to be
 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
 * shader variant on typical GPUs. This pass is useful if hardware lacks
 * fixed-function blending in part or in full.
 */

#include "nir_lower_blend.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_blend_equation_advanced_helper.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
#include "util/blend.h"
#include "nir_builder_opcodes.h"

struct ctx {
   const nir_lower_blend_options *options;
   nir_def *src1[8];
};

/* Given processed factors, combine them per a blend function */

static nir_def *
nir_blend_func(
   nir_builder *b,
   enum pipe_blend_func func,
   nir_def *src, nir_def *dst)
{
   switch (func) {
   case PIPE_BLEND_ADD:
      return nir_fadd(b, src, dst);
   case PIPE_BLEND_SUBTRACT:
      return nir_fsub(b, src, dst);
   case PIPE_BLEND_REVERSE_SUBTRACT:
      return nir_fsub(b, dst, src);
   case PIPE_BLEND_MIN:
      return nir_fmin(b, src, dst);
   case PIPE_BLEND_MAX:
      return nir_fmax(b, src, dst);
   }

   UNREACHABLE("Invalid blend function");
}

/* Does this blend function multiply by a blend factor? */

static bool
nir_blend_factored(enum pipe_blend_func func)
{
   switch (func) {
   case PIPE_BLEND_ADD:
   case PIPE_BLEND_SUBTRACT:
   case PIPE_BLEND_REVERSE_SUBTRACT:
      return true;
   default:
      return false;
   }
}

/* Compute a src_alpha_saturate factor */
static nir_def *
nir_alpha_saturate(
   nir_builder *b,
   nir_def *src, nir_def *dst,
   unsigned chan)
{
   nir_def *Asrc = nir_channel(b, src, 3);
   nir_def *Adst = nir_channel(b, dst, 3);
   nir_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
   nir_def *Adsti = nir_fsub(b, one, Adst);

   return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
}

/* Returns a scalar single factor, unmultiplied */

static nir_def *
nir_blend_factor_value(
   nir_builder *b,
   nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
   unsigned chan,
   enum pipe_blendfactor factor_without_invert)
{
   switch (factor_without_invert) {
   case PIPE_BLENDFACTOR_ONE:
      return nir_imm_floatN_t(b, 1.0, src->bit_size);
   case PIPE_BLENDFACTOR_SRC_COLOR:
      return nir_channel(b, src, chan);
   case PIPE_BLENDFACTOR_SRC1_COLOR:
      return nir_channel(b, src1, chan);
   case PIPE_BLENDFACTOR_DST_COLOR:
      return nir_channel(b, dst, chan);
   case PIPE_BLENDFACTOR_SRC_ALPHA:
      return nir_channel(b, src, 3);
   case PIPE_BLENDFACTOR_SRC1_ALPHA:
      return nir_channel(b, src1, 3);
   case PIPE_BLENDFACTOR_DST_ALPHA:
      return nir_channel(b, dst, 3);
   case PIPE_BLENDFACTOR_CONST_COLOR:
      return nir_channel(b, bconst, chan);
   case PIPE_BLENDFACTOR_CONST_ALPHA:
      return nir_channel(b, bconst, 3);
   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
      return nir_alpha_saturate(b, src, dst, chan);
   default:
      assert(util_blendfactor_is_inverted(factor_without_invert));
      UNREACHABLE("Unexpected inverted factor");
   }
}

static nir_def *
nir_build_fsat_signed(nir_builder *b, nir_def *x)
{
   return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
                     nir_imm_floatN_t(b, +1.0, x->bit_size));
}

static nir_def *
nir_fsat_to_format(nir_builder *b, nir_def *x, enum pipe_format format)
{
   if (util_format_is_unorm(format))
      return nir_fsat(b, x);
   else if (util_format_is_snorm(format))
      return nir_build_fsat_signed(b, x);
   else
      return x;
}

static bool
channel_uses_dest(nir_lower_blend_channel chan)
{
   /* If blend factors are ignored, dest is used (min/max) */
   if (!nir_blend_factored(chan.func))
      return true;

   /* If dest has a nonzero factor, it is used */
   if (chan.dst_factor != PIPE_BLENDFACTOR_ZERO)
      return true;

   /* Else, check the source factor */
   switch (util_blendfactor_without_invert(chan.src_factor)) {
   case PIPE_BLENDFACTOR_DST_COLOR:
   case PIPE_BLENDFACTOR_DST_ALPHA:
   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
      return true;
   default:
      return false;
   }
}

static nir_def *
nir_blend_factor(
   nir_builder *b,
   nir_def *raw_scalar,
   nir_def *src, nir_def *src1, nir_def *dst, nir_def *bconst,
   unsigned chan,
   enum pipe_blendfactor factor,
   enum pipe_format format)
{
   nir_def *f =
      nir_blend_factor_value(b, src, src1, dst, bconst, chan,
                             util_blendfactor_without_invert(factor));

   if (util_blendfactor_is_inverted(factor))
      f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);

   return nir_fmul(b, raw_scalar, f);
}

/* Given a colormask, "blend" with the destination */

nir_def *
nir_color_mask(nir_builder *b, nir_def *src, nir_def *dst, unsigned mask)
{
   mask &= 0xf;
   if (mask == 0)
      return dst;
   else if (mask == 0xf)
      return src;

   return nir_vec4(b,
                   nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
                   nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
                   nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
                   nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
}

static nir_def *
nir_logicop_func(
   nir_builder *b,
   enum pipe_logicop func,
   nir_def *src, nir_def *dst, nir_def *bitmask)
{
   switch (func) {
   case PIPE_LOGICOP_CLEAR:
      return nir_imm_ivec4(b, 0, 0, 0, 0);
   case PIPE_LOGICOP_NOR:
      return nir_ixor(b, nir_ior(b, src, dst), bitmask);
   case PIPE_LOGICOP_AND_INVERTED:
      return nir_iand(b, nir_ixor(b, src, bitmask), dst);
   case PIPE_LOGICOP_COPY_INVERTED:
      return nir_ixor(b, src, bitmask);
   case PIPE_LOGICOP_AND_REVERSE:
      return nir_iand(b, src, nir_ixor(b, dst, bitmask));
   case PIPE_LOGICOP_INVERT:
      return nir_ixor(b, dst, bitmask);
   case PIPE_LOGICOP_XOR:
      return nir_ixor(b, src, dst);
   case PIPE_LOGICOP_NAND:
      return nir_ixor(b, nir_iand(b, src, dst), bitmask);
   case PIPE_LOGICOP_AND:
      return nir_iand(b, src, dst);
   case PIPE_LOGICOP_EQUIV:
      return nir_ixor(b, nir_ixor(b, src, dst), bitmask);
   case PIPE_LOGICOP_NOOP:
      UNREACHABLE("optimized out");
   case PIPE_LOGICOP_OR_INVERTED:
      return nir_ior(b, nir_ixor(b, src, bitmask), dst);
   case PIPE_LOGICOP_COPY:
      return src;
   case PIPE_LOGICOP_OR_REVERSE:
      return nir_ior(b, src, nir_ixor(b, dst, bitmask));
   case PIPE_LOGICOP_OR:
      return nir_ior(b, src, dst);
   case PIPE_LOGICOP_SET:
      return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
   }

   UNREACHABLE("Invalid logciop function");
}

nir_def *
nir_color_logicop(nir_builder *b, nir_def *src, nir_def *dst,
                  enum pipe_logicop func, enum pipe_format format)
{
   unsigned bit_size = src->bit_size;
   const struct util_format_description *format_desc =
      util_format_description(format);

   /* From section 17.3.9 ("Logical Operation") of the OpenGL 4.6 core spec:
    *
    *    Logical operation has no effect on a floating-point destination color
    *    buffer, or when FRAMEBUFFER_SRGB is enabled and the value of
    *    FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer attachment
    *    corresponding to the destination buffer is SRGB (see section 9.2.3).
    *    However, if logical operation is enabled, blending is still disabled.
    */
   if (util_format_is_float(format) || util_format_is_srgb(format))
      return src;
   else if (func == PIPE_LOGICOP_COPY)
      return src;
   else if (func == PIPE_LOGICOP_NOOP)
      return dst;

   nir_alu_type type =
      util_format_is_pure_integer(format) ? nir_type_uint : nir_type_float;

   if (bit_size != 32) {
      src = nir_convert_to_bit_size(b, src, type, 32);
      dst = nir_convert_to_bit_size(b, dst, type, 32);
   }

   assert(src->num_components <= 4);
   assert(dst->num_components <= 4);

   unsigned bits[4];
   for (int i = 0; i < 4; ++i)
      bits[i] = format_desc->channel[i].size;

   if (util_format_is_unorm(format)) {
      src = nir_format_float_to_unorm(b, src, bits);
      dst = nir_format_float_to_unorm(b, dst, bits);
   } else if (util_format_is_snorm(format)) {
      src = nir_format_float_to_snorm(b, src, bits);
      dst = nir_format_float_to_snorm(b, dst, bits);
   } else {
      assert(util_format_is_pure_integer(format));
   }

   nir_const_value mask[4];
   for (int i = 0; i < 4; ++i)
      mask[i] = nir_const_value_for_uint(BITFIELD_MASK(bits[i]), 32);

   nir_def *out = nir_logicop_func(b, func, src, dst,
                                   nir_build_imm(b, 4, 32, mask));

   if (util_format_is_unorm(format)) {
      out = nir_format_unorm_to_float(b, out, bits);
   } else if (util_format_is_snorm(format)) {
      /* Sign extend before converting so the i2f in snorm_to_float works */
      out = nir_format_sign_extend_ivec(b, out, bits);
      out = nir_format_snorm_to_float(b, out, bits);
   } else {
      assert(util_format_is_pure_integer(format));
   }

   if (bit_size != 32)
      out = nir_convert_to_bit_size(b, out, type, bit_size);

   return out;
}

static bool
channel_exists(const struct util_format_description *desc, unsigned i)
{
   return (i < desc->nr_channels) &&
          desc->channel[i].type != UTIL_FORMAT_TYPE_VOID;
}

/*
 * Test if the blending options for a given channel encode the "replace" blend
 * mode: dest = source. In this case, blending may be specially optimized.
 */
static bool
nir_blend_replace_channel(const nir_lower_blend_channel *c)
{
   return (c->func == PIPE_BLEND_ADD) &&
          (c->src_factor == PIPE_BLENDFACTOR_ONE) &&
          (c->dst_factor == PIPE_BLENDFACTOR_ZERO);
}

static bool
nir_blend_replace_rt(const nir_lower_blend_rt *rt)
{
   return nir_blend_replace_channel(&rt->rgb) &&
          nir_blend_replace_channel(&rt->alpha);
}


static nir_def *
minv3(nir_builder *b, nir_def *v)
{
   return nir_fmin(b, nir_fmin(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
                   nir_channel(b, v, 2));
}

static nir_def *
maxv3(nir_builder *b, nir_def *v)
{
   return nir_fmax(b, nir_fmax(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
                   nir_channel(b, v, 2));
}

static nir_def *
lumv3(nir_builder *b, nir_def *c)
{
   return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11));
}

static nir_def *
satv3(nir_builder *b, nir_def *c)
{
   return nir_fsub(b, maxv3(b, c), minv3(b, c));
}

/* Clip color to [0,1] while preserving luminosity */
static nir_def *
clip_color(nir_builder *b, nir_def *c)
{
   nir_def *lum = lumv3(b, c);
   nir_def *mincol = minv3(b, c);
   nir_def *maxcol = maxv3(b, c);

   /* If min < 0: c = lum + (c - lum) * lum / (lum - min) */
   nir_def *t1 = nir_fdiv(b,
                          nir_fmul(b, nir_fsub(b, c, lum), lum),
                          nir_fsub(b, lum, mincol));
   nir_def *c1 = nir_fadd(b, lum, t1);

   /* If max > 1: c = lum + (c - lum) * (1 - lum) / (max - lum) */
   nir_def *t2 = nir_fdiv(b,
                          nir_fmul(b, nir_fsub(b, c, lum), nir_fsub_imm(b, 1.0, lum)),
                          nir_fsub(b, maxcol, lum));
   nir_def *c2 = nir_fadd(b, lum, t2);

   nir_def *min_neg = nir_flt_imm(b, mincol, 0.0);
   nir_def *max_gt1 = nir_fgt_imm(b, maxcol, 1.0);

   return nir_bcsel(b, min_neg, c1,
                    nir_bcsel(b, max_gt1, c2, c));
}

/* Set luminosity of cbase to match clum */
static nir_def *
set_lum(nir_builder *b, nir_def *cbase, nir_def *clum)
{
   nir_def *lbase = lumv3(b, cbase);
   nir_def *llum = lumv3(b, clum);
   nir_def *diff = nir_fsub(b, llum, lbase);
   nir_def *c = nir_fadd(b, cbase, diff);

   return clip_color(b, c);
}

/* Set saturation of cbase to match csat, then luminosity to match clum */
static nir_def *
set_lum_sat(nir_builder *b, nir_def *cbase, nir_def *csat, nir_def *clum)
{
   nir_def *sbase = satv3(b, cbase);
   nir_def *ssat = satv3(b, csat);
   nir_def *minbase = minv3(b, cbase);

   /* Scale saturation: (cbase - min) * ssat / sbase */
   nir_def *scaled = nir_bcsel(b,
                               nir_fgt_imm(b, sbase, 0.0),
                               nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase, minbase), ssat), sbase),
                               imm3(b, 0.0));

   return set_lum(b, scaled, clum);
}

static nir_def *
blend_hsl_hue(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* Hue from src, saturation and luminosity from dst */
   return set_lum_sat(b, src, dst, dst);
}

static nir_def *
blend_hsl_saturation(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* Saturation from src, hue and luminosity from dst */
   return set_lum_sat(b, dst, src, dst);
}

static nir_def *
blend_hsl_color(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* Hue and saturation from src, luminosity from dst */
   return set_lum(b, src, dst);
}

static nir_def *
blend_hsl_luminosity(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* Luminosity from src, hue and saturation from dst */
   return set_lum(b, dst, src);
}

static nir_def *
blend_invert(nir_builder *b, nir_def *src, nir_def *dst)
{
   return nir_fsub_imm(b, 1.0, dst);
}

static nir_def *
blend_invert_rgb(nir_builder *b, nir_def *src, nir_def *dst)
{
   return nir_fmul(b, src, nir_fsub_imm(b, 1.0, dst));
}

static nir_def *
blend_lineardodge(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* min(1, src + dst) */
   return nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src, dst));
}

static nir_def *
blend_linearburn(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* max(0, src + dst - 1) */
   return nir_fmax(b, nir_imm_float(b, 0.0),
                   nir_fadd(b, src, nir_fadd_imm(b, dst, -1.0)));
}

static nir_def *
blend_vividlight(nir_builder *b, nir_def *src, nir_def *dst)
{
   /*
    * if src <= 0: 0
    * if src < 0.5: 1 - min(1, (1-dst) / (2*src))
    * if src < 1: min(1, dst / (2*(1-src)))
    * else: 1
    */
   nir_def *two_src = nir_fmul_imm(b, src, 2.0);
   nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst);
   nir_def *one_minus_src = nir_fsub_imm(b, 1.0, src);

   nir_def *case_lt_half = nir_fsub_imm(b, 1.0,
                                        nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, one_minus_dst, two_src)));
   nir_def *case_lt_one = nir_fmin(b, imm3(b, 1.0),
                                   nir_fdiv(b, dst, nir_fmul_imm(b, one_minus_src, 2.0)));

   return nir_bcsel(b, nir_fle_imm(b, src, 0.0), imm3(b, 0.0),
                    nir_bcsel(b, nir_flt_imm(b, src, 0.5), case_lt_half,
                              nir_bcsel(b, nir_flt_imm(b, src, 1.0), case_lt_one,
                                        imm3(b, 1.0))));
}

static nir_def *
blend_linearlight(nir_builder *b, nir_def *src, nir_def *dst)
{
   /*
    * if 2*src + dst > 2: 1
    * if 2*src + dst <= 1: 0
    * else: 2*src + dst - 1
    */
   nir_def *two_src = nir_fmul_imm(b, src, 2.0);
   nir_def *sum = nir_fadd(b, two_src, dst);
   nir_def *result = nir_fsub(b, sum, imm3(b, 1.0));

   return nir_bcsel(b, nir_fgt_imm(b, sum, 2.0), imm3(b, 1.0),
                    nir_bcsel(b, nir_fge(b, imm3(b, 1.0), sum), imm3(b, 0.0),
                              result));
}

static nir_def *
blend_pinlight(nir_builder *b, nir_def *src, nir_def *dst)
{
   /*
    * if (2*src - 1 > dst) && src < 0.5: 0
    * if (2*src - 1 > dst) && src >= 0.5: 2*src - 1
    * if (2*src - 1 <= dst) && src < 0.5*dst: 2*src
    * if (2*src - 1 <= dst) && src >= 0.5*dst: dst
    */
   nir_def *two_src = nir_fmul_imm(b, src, 2.0);
   nir_def *two_src_minus_1 = nir_fsub(b, two_src, imm3(b, 1.0));
   nir_def *half_dst = nir_fmul_imm(b, dst, 0.5);

   nir_def *cond1 = nir_flt(b, dst, two_src_minus_1);
   nir_def *cond2 = nir_flt_imm(b, src, 0.5);
   nir_def *cond3 = nir_flt(b, src, half_dst);

   return nir_bcsel(b, cond1,
                    nir_bcsel(b, cond2, imm3(b, 0.0), two_src_minus_1),
                    nir_bcsel(b, cond3, two_src, dst));
}

static nir_def *
blend_hardmix(nir_builder *b, nir_def *src, nir_def *dst)
{
   /* if src + dst >= 1: 1, else 0.
    * Use small epsilon to handle 8-bit quantization.
    */
   nir_def *sum = nir_fadd(b, src, dst);
   nir_def *threshold = nir_imm_float(b, 1.0 - 0.5 / 255.0); /* ~0.998039 */
   return nir_bcsel(b, nir_fge(b, sum, threshold),
                    imm3(b, 1.0), imm3(b, 0.0));
}

/*
 * Calculate the blend factor f(Cs', Cd').
 * Returns NULL for blend modes where X=0, meaning f() is not used.
 */
static nir_def *
calc_blend_factor(nir_builder *b, enum pipe_advanced_blend_mode blend_op, nir_def *src, nir_def *dst)
{
   switch (blend_op) {
   /* f() result unused (X=0) */
   case PIPE_ADVANCED_BLEND_NONE:
   case PIPE_ADVANCED_BLEND_SRC_OUT:
   case PIPE_ADVANCED_BLEND_DST_OUT:
   case PIPE_ADVANCED_BLEND_XOR:
      return NULL;

   /* Standard blend modes */
   case PIPE_ADVANCED_BLEND_MULTIPLY:
      return blend_multiply(b, src, dst);
   case PIPE_ADVANCED_BLEND_SCREEN:
      return blend_screen(b, src, dst);
   case PIPE_ADVANCED_BLEND_OVERLAY:
      return blend_overlay(b, src, dst);
   case PIPE_ADVANCED_BLEND_DARKEN:
      return blend_darken(b, src, dst);
   case PIPE_ADVANCED_BLEND_LIGHTEN:
      return blend_lighten(b, src, dst);
   case PIPE_ADVANCED_BLEND_COLORDODGE:
      return blend_colordodge(b, src, dst);
   case PIPE_ADVANCED_BLEND_COLORBURN:
      return blend_colorburn(b, src, dst);
   case PIPE_ADVANCED_BLEND_HARDLIGHT:
      return blend_hardlight(b, src, dst);
   case PIPE_ADVANCED_BLEND_SOFTLIGHT:
      return blend_softlight(b, src, dst);
   case PIPE_ADVANCED_BLEND_DIFFERENCE:
      return blend_difference(b, src, dst);
   case PIPE_ADVANCED_BLEND_EXCLUSION:
      return blend_exclusion(b, src, dst);

   /* HSL blend modes */
   case PIPE_ADVANCED_BLEND_HSL_HUE:
      return blend_hsl_hue(b, src, dst);
   case PIPE_ADVANCED_BLEND_HSL_SATURATION:
      return blend_hsl_saturation(b, src, dst);
   case PIPE_ADVANCED_BLEND_HSL_COLOR:
      return blend_hsl_color(b, src, dst);
   case PIPE_ADVANCED_BLEND_HSL_LUMINOSITY:
      return blend_hsl_luminosity(b, src, dst);

   /* Porter-Duff modes where f(Cs,Cd) = Cs or Cd */
   case PIPE_ADVANCED_BLEND_SRC:
   case PIPE_ADVANCED_BLEND_SRC_OVER:
   case PIPE_ADVANCED_BLEND_SRC_IN:
   case PIPE_ADVANCED_BLEND_SRC_ATOP:
      return src;
   case PIPE_ADVANCED_BLEND_DST:
   case PIPE_ADVANCED_BLEND_DST_OVER:
   case PIPE_ADVANCED_BLEND_DST_IN:
   case PIPE_ADVANCED_BLEND_DST_ATOP:
      return dst;

   /* Extended blend modes */
   case PIPE_ADVANCED_BLEND_INVERT:
      return blend_invert(b, src, dst);
   case PIPE_ADVANCED_BLEND_INVERT_RGB:
      return blend_invert_rgb(b, src, dst);
   case PIPE_ADVANCED_BLEND_LINEARDODGE:
      return blend_lineardodge(b, src, dst);
   case PIPE_ADVANCED_BLEND_LINEARBURN:
      return blend_linearburn(b, src, dst);
   case PIPE_ADVANCED_BLEND_VIVIDLIGHT:
      return blend_vividlight(b, src, dst);
   case PIPE_ADVANCED_BLEND_LINEARLIGHT:
      return blend_linearlight(b, src, dst);
   case PIPE_ADVANCED_BLEND_PINLIGHT:
      return blend_pinlight(b, src, dst);
   case PIPE_ADVANCED_BLEND_HARDMIX:
      return blend_hardmix(b, src, dst);
   default:
      UNREACHABLE("Invalid advanced blend op");
   }
}

static nir_def *
calc_additional_rgb_blend(nir_builder *b, const nir_lower_blend_options *options,
                          unsigned rt,
                          nir_def *src, nir_def *dst)
{
   nir_def *src_rgb = nir_trim_vector(b, src, 3);
   nir_def *dst_rgb = nir_trim_vector(b, dst, 3);
   nir_def *src_a = nir_channel(b, src, 3);
   nir_def *dst_a = nir_channel(b, dst, 3);

   /* Premultiply if non-premultiplied */
   if (!options->rt[rt].src_premultiplied)
      src_rgb = nir_fmul(b, src_rgb, src_a);
   if (!options->rt[rt].dst_premultiplied)
      dst_rgb = nir_fmul(b, dst_rgb, dst_a);

   nir_def *rgb, *a;

   switch (options->rt[rt].blend_mode) {
   case PIPE_ADVANCED_BLEND_PLUS:
      rgb = nir_fadd(b, src_rgb, dst_rgb);
      a = nir_fadd(b, src_a, dst_a);
      break;
   case PIPE_ADVANCED_BLEND_PLUS_CLAMPED:
      rgb = nir_fmin(b, imm3(b, 1.0), nir_fadd(b, src_rgb, dst_rgb));
      a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
      break;
   case PIPE_ADVANCED_BLEND_PLUS_CLAMPED_ALPHA: {
      nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
      rgb = nir_fmin(b, max_a, nir_fadd(b, src_rgb, dst_rgb));
      a = max_a;
      break;
   }
   case PIPE_ADVANCED_BLEND_PLUS_DARKER: {
      nir_def *max_a = nir_fmin(b, nir_imm_float(b, 1.0), nir_fadd(b, src_a, dst_a));
      /* max(0, max_a - ((src_a - src_rgb) + (dst_a - dst_rgb))) */
      nir_def *src_diff = nir_fsub(b, src_a, src_rgb);
      nir_def *dst_diff = nir_fsub(b, dst_a, dst_rgb);
      rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, max_a, nir_fadd(b, src_diff, dst_diff)));
      a = max_a;
      break;
   }
   case PIPE_ADVANCED_BLEND_MINUS:
      rgb = nir_fsub(b, dst_rgb, src_rgb);
      a = nir_fsub(b, dst_a, src_a);
      break;
   case PIPE_ADVANCED_BLEND_MINUS_CLAMPED:
      rgb = nir_fmax(b, imm3(b, 0.0), nir_fsub(b, dst_rgb, src_rgb));
      a = nir_fmax(b, nir_imm_float(b, 0.0), nir_fsub(b, dst_a, src_a));
      break;
   case PIPE_ADVANCED_BLEND_CONTRAST: {
      /* res.rgb = (dst_a / 2) + 2 * (dst_rgb - dst_a/2) * (src_rgb - src_a/2) */
      nir_def *half_dst_a = nir_fmul_imm(b, dst_a, 0.5);
      nir_def *half_src_a = nir_fmul_imm(b, src_a, 0.5);
      nir_def *dst_centered = nir_fsub(b, dst_rgb, half_dst_a);
      nir_def *src_centered = nir_fsub(b, src_rgb, half_src_a);
      rgb = nir_fadd(b, half_dst_a,
                     nir_fmul_imm(b, nir_fmul(b, dst_centered, src_centered), 2.0));
      a = dst_a;
      break;
   }
   case PIPE_ADVANCED_BLEND_INVERT_OVG: {
      /* res.rgb = src_a * (1 - dst_rgb) + (1 - src_a) * dst_rgb */
      nir_def *one_minus_dst = nir_fsub_imm(b, 1.0, dst_rgb);
      nir_def *one_minus_src_a = nir_fsub_imm(b, 1.0, src_a);
      rgb = nir_fadd(b, nir_fmul(b, src_a, one_minus_dst),
                     nir_fmul(b, one_minus_src_a, dst_rgb));
      a = nir_fsub(b, nir_fadd(b, src_a, dst_a), nir_fmul(b, src_a, dst_a));
      break;
   }
   case PIPE_ADVANCED_BLEND_RED:
      rgb = nir_vec3(b, nir_channel(b, src_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, dst_rgb, 2));
      a = dst_a;
      break;
   case PIPE_ADVANCED_BLEND_GREEN:
      rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, src_rgb, 1), nir_channel(b, dst_rgb, 2));
      a = dst_a;
      break;
   case PIPE_ADVANCED_BLEND_BLUE:
      rgb = nir_vec3(b, nir_channel(b, dst_rgb, 0), nir_channel(b, dst_rgb, 1), nir_channel(b, src_rgb, 2));
      a = dst_a;
      break;
   default:
      UNREACHABLE("Invalid additional RGB blend op");
   }

   /* If dst is non-premultiplied, the output should also be non-premultiplied */
   if (!options->rt[rt].dst_premultiplied) {
      rgb = nir_bcsel(b,
                      nir_fgt_imm(b, a, 0.0),
                      nir_fdiv(b, rgb, a),
                      imm3(b, 0.0));
   }

   return nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1),
                   nir_channel(b, rgb, 2), a);
}

/*
 * X, Y, Z blend factors for the advanced blend equation:
 *   RGB = f(Cs',Cd') * X * p0 + Cs' * Y * p1 + Cd' * Z * p2
 *   A   = X * p0 + Y * p1 + Z * p2
 *
 * Index by enum pipe_advanced_blend_mode.
 * Modes >= PIPE_ADVANCED_BLEND_PLUS use separate calc_additional_rgb_blend().
 */
static const float blend_xyz[][3] = {
   [PIPE_ADVANCED_BLEND_NONE] = { 0, 0, 0 },
   [PIPE_ADVANCED_BLEND_MULTIPLY] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_SCREEN] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_OVERLAY] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_DARKEN] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_LIGHTEN] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_COLORDODGE] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_COLORBURN] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_HARDLIGHT] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_SOFTLIGHT] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_DIFFERENCE] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_EXCLUSION] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_HSL_HUE] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_HSL_SATURATION] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_HSL_COLOR] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_HSL_LUMINOSITY] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_SRC] = { 1, 1, 0 },
   [PIPE_ADVANCED_BLEND_DST] = { 1, 0, 1 },
   [PIPE_ADVANCED_BLEND_SRC_OVER] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_DST_OVER] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_SRC_IN] = { 1, 0, 0 },
   [PIPE_ADVANCED_BLEND_DST_IN] = { 1, 0, 0 },
   [PIPE_ADVANCED_BLEND_SRC_OUT] = { 0, 1, 0 },
   [PIPE_ADVANCED_BLEND_DST_OUT] = { 0, 0, 1 },
   [PIPE_ADVANCED_BLEND_SRC_ATOP] = { 1, 0, 1 },
   [PIPE_ADVANCED_BLEND_DST_ATOP] = { 1, 1, 0 },
   [PIPE_ADVANCED_BLEND_XOR] = { 0, 1, 1 },
   [PIPE_ADVANCED_BLEND_INVERT] = { 1, 0, 1 },
   [PIPE_ADVANCED_BLEND_INVERT_RGB] = { 1, 0, 1 },
   [PIPE_ADVANCED_BLEND_LINEARDODGE] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_LINEARBURN] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_VIVIDLIGHT] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_LINEARLIGHT] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_PINLIGHT] = { 1, 1, 1 },
   [PIPE_ADVANCED_BLEND_HARDMIX] = { 1, 1, 1 },
};

static nir_def *
nir_blend_advanced(
   nir_builder *b,
   const nir_lower_blend_options *options,
   unsigned rt,
   nir_def *src, nir_def *dst)
{
   /* Advanced blend uses hardcoded 32-bit constants. Convert inputs to f32
    * and convert back at the end.
    */
   const unsigned bit_size = src->bit_size;
   src = nir_f2f32(b, src);
   dst = nir_f2f32(b, dst);

   /* Check if this is an additional RGB blend op */
   if (options->rt[rt].blend_mode >= PIPE_ADVANCED_BLEND_PLUS &&
       options->rt[rt].blend_mode <= PIPE_ADVANCED_BLEND_BLUE) {
      nir_def *result = calc_additional_rgb_blend(b, options, rt, src, dst);
      return nir_f2fN(b, result, bit_size);
   }

   nir_def *src_rgb = nir_trim_vector(b, src, 3);
   nir_def *dst_rgb = nir_trim_vector(b, dst, 3);
   nir_def *src_a = nir_channel(b, src, 3);
   nir_def *dst_a = nir_channel(b, dst, 3);

   /* Unpremultiply */
   nir_def *src_rgb_unpre;
   if (options->rt[rt].src_premultiplied) {
      src_rgb_unpre = nir_bcsel(b,
                                nir_feq_imm(b, src_a, 0.0),
                                imm3(b, 0.0),
                                nir_fdiv(b, src_rgb, src_a));
   } else {
      src_rgb_unpre = src_rgb;
   }

   nir_def *dst_rgb_unpre;
   if (options->rt[rt].dst_premultiplied) {
      dst_rgb_unpre = nir_bcsel(b,
                                nir_feq_imm(b, dst_a, 0.0),
                                imm3(b, 0.0),
                                nir_fdiv(b, dst_rgb, dst_a));
   } else {
      dst_rgb_unpre = dst_rgb;
   }

   /* f(Cs', Cd') - may be NULL if X=0 (result unused) */
   nir_def *factor = calc_blend_factor(b, options->rt[rt].blend_mode, src_rgb_unpre, dst_rgb_unpre);

   nir_def *p0, *p1, *p2;

   switch (options->rt[rt].overlap) {
   case PIPE_BLEND_OVERLAP_UNCORRELATED:
      /* p0 = As * Ad, p1 = As * (1 - Ad), p2 = Ad * (1 - As) */
      p0 = nir_fmul(b, src_a, dst_a);
      p1 = nir_fmul(b, src_a, nir_fsub_imm(b, 1.0, dst_a));
      p2 = nir_fmul(b, dst_a, nir_fsub_imm(b, 1.0, src_a));
      break;
   case PIPE_BLEND_OVERLAP_CONJOINT:
      /* p0 = min(As, Ad), p1 = max(As - Ad, 0), p2 = max(Ad - As, 0) */
      p0 = nir_fmin(b, src_a, dst_a);
      p1 = nir_fmax(b, nir_fsub(b, src_a, dst_a), nir_imm_float(b, 0.0));
      p2 = nir_fmax(b, nir_fsub(b, dst_a, src_a), nir_imm_float(b, 0.0));
      break;
   case PIPE_BLEND_OVERLAP_DISJOINT:
      /* p0 = max(As + Ad - 1, 0), p1 = min(As, 1 - Ad), p2 = min(Ad, 1 - As) */
      p0 = nir_fmax(b, nir_fadd_imm(b, nir_fadd(b, src_a, dst_a), -1.0), nir_imm_float(b, 0.0));
      p1 = nir_fmin(b, src_a, nir_fsub_imm(b, 1.0, dst_a));
      p2 = nir_fmin(b, dst_a, nir_fsub_imm(b, 1.0, src_a));
      break;
   default:
      UNREACHABLE("invalid overlap");
   }

   const float x = blend_xyz[options->rt[rt].blend_mode][0];
   const float y = blend_xyz[options->rt[rt].blend_mode][1];
   const float z = blend_xyz[options->rt[rt].blend_mode][2];

   /* RGB = f * X * p0 + Cs' * Y * p1 + Cd' * Z * p2 */
   nir_def *rgb = imm3(b, 0.0);
   if (factor)
      rgb = nir_fmul(b, factor, nir_fmul_imm(b, p0, x));
   if (y != 0.0)
      rgb = nir_fadd(b, rgb, nir_fmul(b, src_rgb_unpre, nir_fmul_imm(b, p1, y)));
   if (z != 0.0)
      rgb = nir_fadd(b, rgb, nir_fmul(b, dst_rgb_unpre, nir_fmul_imm(b, p2, z)));

   /* A = X * p0 + Y * p1 + Z * p2 */
   nir_def *a = nir_imm_float(b, 0.0);
   if (x != 0.0)
      a = nir_fmul_imm(b, p0, x);
   if (y != 0.0)
      a = nir_fadd(b, a, nir_fmul_imm(b, p1, y));
   if (z != 0.0)
      a = nir_fadd(b, a, nir_fmul_imm(b, p2, z));

   /* If dst is non-premultiplied, the output should also be non-premultiplied */
   if (!options->rt[rt].dst_premultiplied) {
      rgb = nir_bcsel(b,
                      nir_fgt_imm(b, a, 0.0),
                      nir_fdiv(b, rgb, a),
                      imm3(b, 0.0));
   }

   nir_def *result = nir_vec4(b, nir_channel(b, rgb, 0), nir_channel(b, rgb, 1),
                              nir_channel(b, rgb, 2), a);
   return nir_f2fN(b, result, bit_size);
}

/* Given a blend state, the source color, and the destination color,
 * return the blended color
 */

nir_def *
nir_color_blend(nir_builder *b, nir_def *src, nir_def *src1, nir_def *dst,
                const nir_lower_blend_rt *rt, bool scalar_blend_const)
{
   if (util_format_is_pure_integer(rt->format) || nir_blend_replace_rt(rt))
      return src;

   /* Don't crash if src1 isn't written. It doesn't matter what dual colour we
    * blend with in that case, as long as we don't dereference NULL.
    */
   if (!src1)
      src1 = nir_imm_zero(b, 4, src->bit_size);

   /* Grab the blend constant ahead of time */
   nir_def *bconst;
   if (scalar_blend_const) {
      bconst = nir_vec4(b,
                        nir_load_blend_const_color_r_float(b),
                        nir_load_blend_const_color_g_float(b),
                        nir_load_blend_const_color_b_float(b),
                        nir_load_blend_const_color_a_float(b));
   } else {
      bconst = nir_load_blend_const_color_rgba(b);
   }

   if (src->bit_size == 16) {
      bconst = nir_f2f16(b, bconst);
      src1 = nir_f2f16(b, src1);
   }

   /* The input colours need to be clamped to the format. Contrary to the
    * OpenGL/Vulkan specs, it really is the inputs that get clamped and not the
    * intermediate blend factors. This matches the CTS and hardware behaviour.
    */
   src = nir_fsat_to_format(b, src, rt->format);
   bconst = nir_fsat_to_format(b, bconst, rt->format);

   if (src1)
      src1 = nir_fsat_to_format(b, src1, rt->format);

   /* DST_ALPHA reads back 1.0 if there is no alpha channel */
   const struct util_format_description *desc =
      util_format_description(rt->format);

   nir_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
   nir_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);

   dst = nir_vec4(b,
                  channel_exists(desc, 0) ? nir_channel(b, dst, 0) : zero,
                  channel_exists(desc, 1) ? nir_channel(b, dst, 1) : zero,
                  channel_exists(desc, 2) ? nir_channel(b, dst, 2) : zero,
                  channel_exists(desc, 3) ? nir_channel(b, dst, 3) : one);

   /* We blend per channel and recombine later */
   nir_def *channels[4];

   for (unsigned c = 0; c < 4; ++c) {
      /* Decide properties based on channel */
      nir_lower_blend_channel chan = (c < 3) ? rt->rgb : rt->alpha;

      nir_def *psrc = nir_channel(b, src, c);
      nir_def *pdst = nir_channel(b, dst, c);

      if (nir_blend_factored(chan.func)) {
         psrc = nir_blend_factor(
            b, psrc,
            src, src1, dst, bconst, c,
            chan.src_factor, rt->format);

         pdst = nir_blend_factor(
            b, pdst,
            src, src1, dst, bconst, c,
            chan.dst_factor, rt->format);
      }

      channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
   }

   return nir_vec(b, channels, 4);
}

static int
color_index_for_location(unsigned location)
{
   assert(location != FRAG_RESULT_COLOR &&
          "gl_FragColor must be lowered before nir_lower_blend");

   if (location < FRAG_RESULT_DATA0)
      return -1;
   else
      return location - FRAG_RESULT_DATA0;
}

static bool
nir_lower_blend_instr(nir_builder *b, nir_intrinsic_instr *store, void *data)
{
   struct ctx *ctx = data;
   const nir_lower_blend_options *options = ctx->options;
   if (store->intrinsic != nir_intrinsic_store_output)
      return false;

   nir_io_semantics sem = nir_intrinsic_io_semantics(store);
   int rt = color_index_for_location(sem.location);

   /* No blend lowering requested on this RT */
   if (rt < 0 || options->rt[rt].format == PIPE_FORMAT_NONE)
      return false;

   /* Only process stores once. Pass flags are cleared by consume_dual_stores */
   if (store->instr.pass_flags)
      return false;

   store->instr.pass_flags = 1;

   /* Store are sunk to the bottom of the block to ensure that the dual
    * source colour is already written.
    */
   b->cursor = nir_after_block(store->instr.block);

   const enum pipe_format format = options->rt[rt].format;
   enum pipe_logicop logicop_func = options->logicop_func;

   /* From the Vulkan spec ("Logical operations"):
    *
    *    Logical operations are not applied to floating-point or sRGB format
    *    color attachments...
    *
    *    If logicOpEnable is VK_TRUE... blending of all attachments is treated
    *    as if it were disabled. Any attachments using color formats for which
    *    logical operations are not supported simply pass through the color
    *    values unmodified.
    *
    * The semantic for unsupported formats is equivalent to a logicop of COPY.
    * It is /not/ equivalent to disabled logicops (which would incorrectly apply
    * blending). To implement this spec text with minimal special casing, we
    * override the logicop func to COPY for unsupported formats.
    */
   if (util_format_is_float(format) || util_format_is_srgb(format)) {
      logicop_func = PIPE_LOGICOP_COPY;
   }

   /* Don't bother copying the destination to the source for disabled RTs */
   if (options->rt[rt].colormask == 0 ||
       (options->logicop_enable && logicop_func == PIPE_LOGICOP_NOOP)) {

      nir_instr_remove(&store->instr);
      return true;
   }

   /* Grab the input color.  We always want 4 channels during blend.  Dead
    * code will clean up any channels we don't need.
    */
   nir_def *src = nir_pad_vector(b, store->src[0].ssa, 4);

   assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");

   /* Grab the previous fragment color if we need it */
   nir_def *dst;

   if (options->rt[rt].advanced_blend ||
       channel_uses_dest(options->rt[rt].rgb) ||
       channel_uses_dest(options->rt[rt].alpha) ||
       options->logicop_enable ||
       options->rt[rt].colormask != BITFIELD_MASK(4)) {

      b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
      b->shader->info.fs.uses_fbfetch_output = true;
      b->shader->info.fs.uses_sample_shading = true;
      sem.fb_fetch_output = true;

      dst = nir_load_output(b, 4, nir_src_bit_size(store->src[0]),
                            nir_imm_int(b, 0),
                            .dest_type = nir_intrinsic_src_type(store),
                            .io_semantics = sem);
   } else {
      dst = nir_undef(b, 4, nir_src_bit_size(store->src[0]));
   }

   /* Blend the two colors per the passed options. Blending is prioritized as:
    * 1. Logic operations (if logicop_enable is true) - mutually exclusive with blending
    * 2. Advanced blending (if advanced_blend is set) - uses complex blend equations
    * 3. Standard blending (if configured) - uses traditional blend factors
    *
    * We only call nir_blend if blending is enabled with a blend mode other than replace
    * (independent of the color mask). That avoids unnecessary fsat instructions in the
    * common case where blending is disabled at an API level, but the driver calls
    * nir_blend (possibly for color masking).
    */
   nir_def *blended = src;

   if (options->logicop_enable) {
      blended = nir_color_logicop(b, src, dst, options->logicop_func, format);
   } else if (options->rt[rt].advanced_blend) {
      blended = nir_blend_advanced(b, options, rt, src, dst);
   } else if (!util_format_is_pure_integer(format) &&
              !nir_blend_replace_rt(&options->rt[rt])) {
      assert(!util_format_is_scaled(format));
      blended = nir_color_blend(b, src, ctx->src1[rt], dst, &options->rt[rt],
                                options->scalar_blend_const);
   }

   /* Apply a colormask if necessary */
   blended = nir_color_mask(b, blended, dst, options->rt[rt].colormask);

   /* Shave off any components we don't want to store */
   const unsigned num_components = util_format_get_nr_components(format);
   blended = nir_trim_vector(b, blended, num_components);

   /* Grow or shrink the store destination as needed */
   store->num_components = num_components;
   nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(store) &
                                          nir_component_mask(num_components));

   /* Write out the final color instead of the input */
   nir_src_rewrite(&store->src[0], blended);

   /* Sink to bottom */
   nir_instr_remove(&store->instr);
   nir_builder_instr_insert(b, &store->instr);
   return true;
}

/*
 * Dual-source colours are only for blending, so when nir_lower_blend is used,
 * the dual source store_output is for us (only). Remove dual stores so the
 * backend doesn't have to deal with them, collecting the sources for blending.
 */
static bool
consume_dual_stores(nir_builder *b, nir_intrinsic_instr *store, void *data)
{
   nir_def **outputs = data;
   if (store->intrinsic != nir_intrinsic_store_output)
      return false;

   /* While we're here, clear the pass flags for store_outputs, since we'll set
    * them later.
    */
   store->instr.pass_flags = 0;

   nir_io_semantics sem = nir_intrinsic_io_semantics(store);
   int rt = 0;
   if (sem.dual_source_blend_index)
      rt = color_index_for_location(sem.location);
   else if (sem.location != FRAG_RESULT_DUAL_SRC_BLEND)
      return false;

   assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");

   outputs[rt] = store->src[0].ssa;
   nir_instr_remove(&store->instr);
   return true;
}

/** Lower blending to framebuffer fetch and some math
 *
 * This pass requires that shader I/O is lowered to explicit load/store
 * instructions using nir_lower_io.
 */
bool
nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
{
   assert(shader->info.stage == MESA_SHADER_FRAGMENT);

   struct ctx ctx = { .options = options };
   bool progress = nir_shader_intrinsics_pass(shader, consume_dual_stores,
                                              nir_metadata_control_flow,
                                              ctx.src1);

   progress |= nir_shader_intrinsics_pass(shader, nir_lower_blend_instr,
                                          nir_metadata_control_flow,
                                          &ctx);
   return progress;
}