asahi/hk: Implement VK_EXT_blend_operation_advanced
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Add support for VK_EXT_blend_operation_advanced to the Honeykrisp
Vulkan driver. Extend agx_blend_rt_key with a tagged mode field
interpreted via pack/unpack helpers for standard and advanced blend
state.

The driver advertises full extension support including all operations,
independent blend, non-premultiplied colors, and correlated overlap.

Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38929>
This commit is contained in:
Christian Gmeiner 2025-12-03 17:03:10 +01:00 committed by Marge Bot
parent 4fedf6bedd
commit e9e4bd8e62
6 changed files with 162 additions and 59 deletions

View file

@ -597,7 +597,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_acquire_xlib_display DONE (anv, hk, lvp, nvk, panvk, radv, tu, v3dv, vn)
VK_EXT_attachment_feedback_loop_dynamic_state DONE (anv, lvp, radv, tu, vn)
VK_EXT_attachment_feedback_loop_layout DONE (anv, hk, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_blend_operation_advanced DONE (vn)
VK_EXT_blend_operation_advanced DONE (hk, vn)
VK_EXT_border_color_swizzle DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv/gfx10+, tu, v3dv, vn)
VK_EXT_buffer_device_address DONE (anv, dzn/sm6.6+, hasvk, hk, nvk, panvk, radv, vn)
VK_EXT_calibrated_timestamps DONE (anv, hasvk, hk, kk, nvk, panvk/v10+, lvp, radv, vn, tu/a750+)

View file

@ -10,6 +10,7 @@
#include "agx_nir_lower_vbo.h"
#include "agx_pack.h"
#include "nir_lower_blend.h"
#include "util/macros.h"
struct agx_linked_shader {
/* Mapped executable memory */
@ -100,16 +101,88 @@ struct agx_fs_prolog_key {
};
struct agx_blend_rt_key {
unsigned colormask : 4;
unsigned advanced_blend : 1;
unsigned pad : 1;
unsigned mode : 26;
};
static_assert(sizeof(struct agx_blend_rt_key) == 4, "packed");
struct agx_blend_standard {
enum pipe_blend_func rgb_func : 3;
enum pipe_blendfactor rgb_src_factor : 5;
enum pipe_blendfactor rgb_dst_factor : 5;
enum pipe_blend_func alpha_func : 3;
enum pipe_blendfactor alpha_src_factor : 5;
enum pipe_blendfactor alpha_dst_factor : 5;
unsigned colormask : 4;
unsigned pad : 2;
unsigned pad : 6;
};
static_assert(sizeof(struct agx_blend_rt_key) == 4, "packed");
static_assert(sizeof(struct agx_blend_standard) == 4, "packed");
static inline unsigned
agx_pack_blend_standard(enum pipe_blend_func rgb_func,
enum pipe_blendfactor rgb_src_factor,
enum pipe_blendfactor rgb_dst_factor,
enum pipe_blend_func alpha_func,
enum pipe_blendfactor alpha_src_factor,
enum pipe_blendfactor alpha_dst_factor)
{
struct agx_blend_standard blend = {
.rgb_func = rgb_func,
.rgb_src_factor = rgb_src_factor,
.rgb_dst_factor = rgb_dst_factor,
.alpha_func = alpha_func,
.alpha_src_factor = alpha_src_factor,
.alpha_dst_factor = alpha_dst_factor,
};
unsigned val;
typed_memcpy(&val, &blend, 1);
return val;
}
static inline struct agx_blend_standard
agx_unpack_blend_standard(unsigned mode)
{
struct agx_blend_standard blend;
typed_memcpy(&blend, &mode, 1);
return blend;
}
struct agx_blend_advanced {
enum pipe_advanced_blend_mode op : 8;
enum pipe_blend_overlap_mode overlap : 2;
unsigned src_premultiplied : 1;
unsigned dst_premultiplied : 1;
unsigned clamp_results : 1;
unsigned pad : 19;
};
static_assert(sizeof(struct agx_blend_advanced) == 4, "packed");
static inline unsigned
agx_pack_blend_advanced(enum pipe_advanced_blend_mode op,
enum pipe_blend_overlap_mode overlap,
bool src_premultiplied, bool dst_premultiplied,
bool clamp_results)
{
struct agx_blend_advanced blend = {
.op = op,
.overlap = overlap,
.src_premultiplied = src_premultiplied,
.dst_premultiplied = dst_premultiplied,
.clamp_results = clamp_results,
};
unsigned val;
typed_memcpy(&val, &blend, 1);
return val;
}
static inline struct agx_blend_advanced
agx_unpack_blend_advanced(unsigned mode)
{
struct agx_blend_advanced blend;
typed_memcpy(&blend, &mode, 1);
return blend;
}
struct agx_blend_key {
struct agx_blend_rt_key rt[8];

View file

@ -329,11 +329,14 @@ lower_tests_zs(nir_shader *s, bool value)
static inline bool
blend_uses_2src(struct agx_blend_rt_key rt)
{
assert(rt.advanced_blend == false);
const struct agx_blend_standard blend = agx_unpack_blend_standard(rt.mode);
enum pipe_blendfactor factors[] = {
rt.rgb_src_factor,
rt.rgb_dst_factor,
rt.alpha_src_factor,
rt.alpha_dst_factor,
blend.rgb_src_factor,
blend.rgb_dst_factor,
blend.alpha_src_factor,
blend.alpha_dst_factor,
};
for (unsigned i = 0; i < ARRAY_SIZE(factors); ++i) {
@ -394,7 +397,8 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
* for blending so should be suppressed for missing attachments to keep
* the assert from blowing up on OpenGL.
*/
if (blend_uses_2src(key->blend.rt[rt]) &&
if (!key->blend.rt[rt].advanced_blend &&
blend_uses_2src(key->blend.rt[rt]) &&
key->rt_formats[rt] != PIPE_FORMAT_NONE) {
assert(location == 0);
@ -432,19 +436,37 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
static_assert(ARRAY_SIZE(opts.rt) == 8, "max RTs out of sync");
for (unsigned i = 0; i < 8; ++i) {
opts.rt[i] = (nir_lower_blend_rt){
.format = key->rt_formats[i],
if (key->blend.rt[i].advanced_blend) {
const struct agx_blend_advanced blend =
agx_unpack_blend_advanced(key->blend.rt[i].mode);
.rgb.src_factor = key->blend.rt[i].rgb_src_factor,
.rgb.dst_factor = key->blend.rt[i].rgb_dst_factor,
.rgb.func = key->blend.rt[i].rgb_func,
opts.rt[i] = (nir_lower_blend_rt){
.format = key->rt_formats[i],
.advanced_blend = true,
.colormask = key->blend.rt[i].colormask,
.blend_mode = blend.op,
.src_premultiplied = blend.src_premultiplied,
.dst_premultiplied = blend.dst_premultiplied,
.overlap = blend.overlap,
};
} else {
const struct agx_blend_standard blend =
agx_unpack_blend_standard(key->blend.rt[i].mode);
.alpha.src_factor = key->blend.rt[i].alpha_src_factor,
.alpha.dst_factor = key->blend.rt[i].alpha_dst_factor,
.alpha.func = key->blend.rt[i].alpha_func,
opts.rt[i] = (nir_lower_blend_rt){
.format = key->rt_formats[i],
.colormask = key->blend.rt[i].colormask,
};
.rgb.src_factor = blend.rgb_src_factor,
.rgb.dst_factor = blend.rgb_dst_factor,
.rgb.func = blend.rgb_func,
.alpha.src_factor = blend.alpha_src_factor,
.alpha.dst_factor = blend.alpha_dst_factor,
.alpha.func = blend.alpha_func,
.colormask = key->blend.rt[i].colormask,
};
}
}
/* It's more efficient to use masked stores (with

View file

@ -128,6 +128,7 @@ hk_cmd_buffer_dirty_render_pass(struct hk_cmd_buffer *cmd)
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES);
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS);
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS);
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ADVANCED);
/* These depend on the depth/stencil format */
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE);
@ -2796,7 +2797,7 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
IS_DIRTY(CB_LOGIC_OP_ENABLE) || IS_DIRTY(CB_WRITE_MASKS) ||
IS_DIRTY(CB_COLOR_WRITE_ENABLES) || IS_DIRTY(CB_ATTACHMENT_COUNT) ||
IS_DIRTY(CB_BLEND_ENABLES) || IS_DIRTY(CB_BLEND_EQUATIONS) ||
IS_DIRTY(CB_BLEND_CONSTANTS) ||
IS_DIRTY(CB_BLEND_CONSTANTS) || IS_DIRTY(CB_BLEND_ADVANCED) ||
desc->root_dirty /* for pipeline stats */ || true) {
unsigned tib_sample_mask = BITFIELD_MASK(dyn->ms.rasterization_samples);
@ -2890,32 +2891,32 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
if (!dyn->cb.attachments[i].blend_enable) {
key.epilog.blend.rt[i] = (struct agx_blend_rt_key){
.colormask = write_mask,
.rgb_func = PIPE_BLEND_ADD,
.alpha_func = PIPE_BLEND_ADD,
.rgb_src_factor = PIPE_BLENDFACTOR_ONE,
.alpha_src_factor = PIPE_BLENDFACTOR_ONE,
.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO,
.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO,
.mode = agx_pack_blend_standard(
PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO,
PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO),
};
} else if (cb->color_blend_op >= VK_BLEND_OP_ZERO_EXT) {
key.epilog.blend.rt[i] = (struct agx_blend_rt_key){
.colormask = write_mask,
.advanced_blend = 1,
.mode = agx_pack_blend_advanced(
vk_advanced_blend_op_to_pipe(cb->color_blend_op),
vk_blend_overlap_to_pipe(cb->blend_overlap),
cb->src_premultiplied, cb->dst_premultiplied,
cb->clamp_results),
};
assert(cb->clamp_results == false);
} else {
key.epilog.blend.rt[i] = (struct agx_blend_rt_key){
.colormask = write_mask,
.rgb_src_factor =
.mode = agx_pack_blend_standard(
vk_blend_op_to_pipe(cb->color_blend_op),
vk_blend_factor_to_pipe(cb->src_color_blend_factor),
.rgb_dst_factor =
vk_blend_factor_to_pipe(cb->dst_color_blend_factor),
.rgb_func = vk_blend_op_to_pipe(cb->color_blend_op),
.alpha_src_factor =
vk_blend_op_to_pipe(cb->alpha_blend_op),
vk_blend_factor_to_pipe(cb->src_alpha_blend_factor),
.alpha_dst_factor =
vk_blend_factor_to_pipe(cb->dst_alpha_blend_factor),
.alpha_func = vk_blend_op_to_pipe(cb->alpha_blend_op),
vk_blend_factor_to_pipe(cb->dst_alpha_blend_factor)),
};
}
}

View file

@ -140,6 +140,7 @@ hk_get_device_extensions(const struct hk_instance *instance,
.KHR_zero_initialize_workgroup_memory = true,
.EXT_4444_formats = true,
.EXT_attachment_feedback_loop_layout = true,
.EXT_blend_operation_advanced = true,
.EXT_border_color_swizzle = true,
.EXT_buffer_device_address = true,
.EXT_calibrated_timestamps = true,
@ -632,6 +633,9 @@ hk_get_device_features(
/* VK_KHR_shader_relaxed_extended_instruction */
.shaderRelaxedExtendedInstruction = true,
/* VK_EXT_blend_operation_advanced */
.advancedBlendCoherentOperations = true,
};
}
@ -1012,6 +1016,14 @@ hk_get_device_properties(const struct agx_device *dev,
/* VK_KHR_fragment_shader_barycentric */
.triStripVertexOrderIndependentOfProvokingVertex = false,
/* VK_EXT_blend_operation_advanced */
.advancedBlendMaxColorAttachments = HK_MAX_RTS,
.advancedBlendIndependentBlend = true,
.advancedBlendNonPremultipliedSrcColor = true,
.advancedBlendNonPremultipliedDstColor = true,
.advancedBlendCorrelatedOverlap = true,
.advancedBlendAllOperations = true,
};
strncpy(properties->deviceName, dev->name, sizeof(properties->deviceName));

View file

@ -218,23 +218,13 @@ agx_create_blend_state(struct pipe_context *ctx,
if (state->logicop_enable || !rt.blend_enable) {
/* No blending, but we get the colour mask below */
key->rt[i] = (struct agx_blend_rt_key){
.rgb_func = PIPE_BLEND_ADD,
.rgb_src_factor = PIPE_BLENDFACTOR_ONE,
.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO,
.alpha_func = PIPE_BLEND_ADD,
.alpha_src_factor = PIPE_BLENDFACTOR_ONE,
.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO,
};
key->rt[i].mode = agx_pack_blend_standard(
PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO,
PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO);
} else {
key->rt[i].rgb_func = rt.rgb_func;
key->rt[i].rgb_src_factor = rt.rgb_src_factor;
key->rt[i].rgb_dst_factor = rt.rgb_dst_factor;
key->rt[i].alpha_func = rt.alpha_func;
key->rt[i].alpha_src_factor = rt.alpha_src_factor;
key->rt[i].alpha_dst_factor = rt.alpha_dst_factor;
key->rt[i].mode = agx_pack_blend_standard(
rt.rgb_func, rt.rgb_src_factor, rt.rgb_dst_factor, rt.alpha_func,
rt.alpha_src_factor, rt.alpha_dst_factor);
}
key->rt[i].colormask = rt.colormask;
@ -2357,12 +2347,17 @@ agx_update_fs(struct agx_batch *batch)
/* Try to disable blending to get rid of some fsats */
if (link_key.epilog.fs.link.loc0_w_1) {
struct agx_blend_rt_key *k = &link_key.epilog.fs.blend.rt[0];
struct agx_blend_standard b = agx_unpack_blend_standard(k->mode);
k->rgb_src_factor = optimize_blend_factor_w_1(k->rgb_src_factor);
k->rgb_dst_factor = optimize_blend_factor_w_1(k->rgb_dst_factor);
b.rgb_src_factor = optimize_blend_factor_w_1(b.rgb_src_factor);
b.rgb_dst_factor = optimize_blend_factor_w_1(b.rgb_dst_factor);
k->alpha_src_factor = optimize_blend_factor_w_1(k->alpha_src_factor);
k->alpha_dst_factor = optimize_blend_factor_w_1(k->alpha_dst_factor);
b.alpha_src_factor = optimize_blend_factor_w_1(b.alpha_src_factor);
b.alpha_dst_factor = optimize_blend_factor_w_1(b.alpha_dst_factor);
k->mode = agx_pack_blend_standard(b.rgb_func, b.rgb_src_factor,
b.rgb_dst_factor, b.alpha_func,
b.alpha_src_factor, b.alpha_dst_factor);
}
link_key.epilog.fs.blend.alpha_to_coverage &= msaa;