diff --git a/docs/features.txt b/docs/features.txt index c24c5a81ed9..bdf7524703d 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -597,7 +597,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_acquire_xlib_display DONE (anv, hk, lvp, nvk, panvk, radv, tu, v3dv, vn) VK_EXT_attachment_feedback_loop_dynamic_state DONE (anv, lvp, radv, tu, vn) VK_EXT_attachment_feedback_loop_layout DONE (anv, hk, lvp, nvk, radv, tu, v3dv, vn) - VK_EXT_blend_operation_advanced DONE (vn) + VK_EXT_blend_operation_advanced DONE (hk, vn) VK_EXT_border_color_swizzle DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv/gfx10+, tu, v3dv, vn) VK_EXT_buffer_device_address DONE (anv, dzn/sm6.6+, hasvk, hk, nvk, panvk, radv, vn) VK_EXT_calibrated_timestamps DONE (anv, hasvk, hk, kk, nvk, panvk/v10+, lvp, radv, vn, tu/a750+) diff --git a/src/asahi/lib/agx_linker.h b/src/asahi/lib/agx_linker.h index 9532fa6b80f..419dccf1b9b 100644 --- a/src/asahi/lib/agx_linker.h +++ b/src/asahi/lib/agx_linker.h @@ -10,6 +10,7 @@ #include "agx_nir_lower_vbo.h" #include "agx_pack.h" #include "nir_lower_blend.h" +#include "util/macros.h" struct agx_linked_shader { /* Mapped executable memory */ @@ -100,16 +101,88 @@ struct agx_fs_prolog_key { }; struct agx_blend_rt_key { + unsigned colormask : 4; + unsigned advanced_blend : 1; + unsigned pad : 1; + unsigned mode : 26; +}; +static_assert(sizeof(struct agx_blend_rt_key) == 4, "packed"); + +struct agx_blend_standard { enum pipe_blend_func rgb_func : 3; enum pipe_blendfactor rgb_src_factor : 5; enum pipe_blendfactor rgb_dst_factor : 5; enum pipe_blend_func alpha_func : 3; enum pipe_blendfactor alpha_src_factor : 5; enum pipe_blendfactor alpha_dst_factor : 5; - unsigned colormask : 4; - unsigned pad : 2; + unsigned pad : 6; }; -static_assert(sizeof(struct agx_blend_rt_key) == 4, "packed"); +static_assert(sizeof(struct agx_blend_standard) == 4, "packed"); + +static inline unsigned +agx_pack_blend_standard(enum pipe_blend_func rgb_func, + enum pipe_blendfactor rgb_src_factor, + enum pipe_blendfactor rgb_dst_factor, + enum pipe_blend_func alpha_func, + enum pipe_blendfactor alpha_src_factor, + enum pipe_blendfactor alpha_dst_factor) +{ + struct agx_blend_standard blend = { + .rgb_func = rgb_func, + .rgb_src_factor = rgb_src_factor, + .rgb_dst_factor = rgb_dst_factor, + .alpha_func = alpha_func, + .alpha_src_factor = alpha_src_factor, + .alpha_dst_factor = alpha_dst_factor, + }; + unsigned val; + typed_memcpy(&val, &blend, 1); + return val; +} + +static inline struct agx_blend_standard +agx_unpack_blend_standard(unsigned mode) +{ + struct agx_blend_standard blend; + typed_memcpy(&blend, &mode, 1); + return blend; +} + +struct agx_blend_advanced { + enum pipe_advanced_blend_mode op : 8; + enum pipe_blend_overlap_mode overlap : 2; + unsigned src_premultiplied : 1; + unsigned dst_premultiplied : 1; + unsigned clamp_results : 1; + unsigned pad : 19; +}; +static_assert(sizeof(struct agx_blend_advanced) == 4, "packed"); + +static inline unsigned +agx_pack_blend_advanced(enum pipe_advanced_blend_mode op, + enum pipe_blend_overlap_mode overlap, + bool src_premultiplied, bool dst_premultiplied, + bool clamp_results) +{ + struct agx_blend_advanced blend = { + .op = op, + .overlap = overlap, + .src_premultiplied = src_premultiplied, + .dst_premultiplied = dst_premultiplied, + .clamp_results = clamp_results, + }; + unsigned val; + typed_memcpy(&val, &blend, 1); + return val; +} + +static inline struct agx_blend_advanced +agx_unpack_blend_advanced(unsigned mode) +{ + struct agx_blend_advanced blend; + typed_memcpy(&blend, &mode, 1); + return blend; +} struct agx_blend_key { struct agx_blend_rt_key rt[8]; diff --git a/src/asahi/lib/agx_nir_prolog_epilog.c b/src/asahi/lib/agx_nir_prolog_epilog.c index d2f228ef263..86a8ba67dab 100644 --- a/src/asahi/lib/agx_nir_prolog_epilog.c +++ b/src/asahi/lib/agx_nir_prolog_epilog.c @@ -329,11 +329,14 @@ lower_tests_zs(nir_shader *s, bool value) static inline bool blend_uses_2src(struct agx_blend_rt_key rt) { + assert(rt.advanced_blend == false); + const struct agx_blend_standard blend = agx_unpack_blend_standard(rt.mode); + enum pipe_blendfactor factors[] = { - rt.rgb_src_factor, - rt.rgb_dst_factor, - rt.alpha_src_factor, - rt.alpha_dst_factor, + blend.rgb_src_factor, + blend.rgb_dst_factor, + blend.alpha_src_factor, + blend.alpha_dst_factor, }; for (unsigned i = 0; i < ARRAY_SIZE(factors); ++i) { @@ -394,7 +397,8 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_) * for blending so should be suppressed for missing attachments to keep * the assert from blowing up on OpenGL. */ - if (blend_uses_2src(key->blend.rt[rt]) && + if (!key->blend.rt[rt].advanced_blend && + blend_uses_2src(key->blend.rt[rt]) && key->rt_formats[rt] != PIPE_FORMAT_NONE) { assert(location == 0); @@ -432,19 +436,37 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_) static_assert(ARRAY_SIZE(opts.rt) == 8, "max RTs out of sync"); for (unsigned i = 0; i < 8; ++i) { - opts.rt[i] = (nir_lower_blend_rt){ - .format = key->rt_formats[i], + if (key->blend.rt[i].advanced_blend) { + const struct agx_blend_advanced blend = + agx_unpack_blend_advanced(key->blend.rt[i].mode); - .rgb.src_factor = key->blend.rt[i].rgb_src_factor, - .rgb.dst_factor = key->blend.rt[i].rgb_dst_factor, - .rgb.func = key->blend.rt[i].rgb_func, + opts.rt[i] = (nir_lower_blend_rt){ + .format = key->rt_formats[i], + .advanced_blend = true, + .colormask = key->blend.rt[i].colormask, + .blend_mode = blend.op, + .src_premultiplied = blend.src_premultiplied, + .dst_premultiplied = blend.dst_premultiplied, + .overlap = blend.overlap, + }; + } else { + const struct agx_blend_standard blend = + agx_unpack_blend_standard(key->blend.rt[i].mode); - .alpha.src_factor = key->blend.rt[i].alpha_src_factor, - .alpha.dst_factor = key->blend.rt[i].alpha_dst_factor, - .alpha.func = key->blend.rt[i].alpha_func, + opts.rt[i] = (nir_lower_blend_rt){ + .format = key->rt_formats[i], - .colormask = key->blend.rt[i].colormask, - }; + .rgb.src_factor = blend.rgb_src_factor, + .rgb.dst_factor = blend.rgb_dst_factor, + .rgb.func = blend.rgb_func, + + .alpha.src_factor = blend.alpha_src_factor, + .alpha.dst_factor = blend.alpha_dst_factor, + .alpha.func = blend.alpha_func, + + .colormask = key->blend.rt[i].colormask, + }; + } } /* It's more efficient to use masked stores (with diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index bcbec0a0fac..781b7d86bac 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -128,6 +128,7 @@ hk_cmd_buffer_dirty_render_pass(struct hk_cmd_buffer *cmd) BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES); BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS); BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ADVANCED); /* These depend on the depth/stencil format */ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE); @@ -2796,7 +2797,7 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, IS_DIRTY(CB_LOGIC_OP_ENABLE) || IS_DIRTY(CB_WRITE_MASKS) || IS_DIRTY(CB_COLOR_WRITE_ENABLES) || IS_DIRTY(CB_ATTACHMENT_COUNT) || IS_DIRTY(CB_BLEND_ENABLES) || IS_DIRTY(CB_BLEND_EQUATIONS) || - IS_DIRTY(CB_BLEND_CONSTANTS) || + IS_DIRTY(CB_BLEND_CONSTANTS) || IS_DIRTY(CB_BLEND_ADVANCED) || desc->root_dirty /* for pipeline stats */ || true) { unsigned tib_sample_mask = BITFIELD_MASK(dyn->ms.rasterization_samples); @@ -2890,32 +2891,32 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, if (!dyn->cb.attachments[i].blend_enable) { key.epilog.blend.rt[i] = (struct agx_blend_rt_key){ .colormask = write_mask, - .rgb_func = PIPE_BLEND_ADD, - .alpha_func = PIPE_BLEND_ADD, - .rgb_src_factor = PIPE_BLENDFACTOR_ONE, - .alpha_src_factor = PIPE_BLENDFACTOR_ONE, - .rgb_dst_factor = PIPE_BLENDFACTOR_ZERO, - .alpha_dst_factor = PIPE_BLENDFACTOR_ZERO, + .mode = agx_pack_blend_standard( + PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO, + PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO), }; + } else if (cb->color_blend_op >= VK_BLEND_OP_ZERO_EXT) { + key.epilog.blend.rt[i] = (struct agx_blend_rt_key){ + .colormask = write_mask, + .advanced_blend = 1, + .mode = agx_pack_blend_advanced( + vk_advanced_blend_op_to_pipe(cb->color_blend_op), + vk_blend_overlap_to_pipe(cb->blend_overlap), + cb->src_premultiplied, cb->dst_premultiplied, + cb->clamp_results), + }; + + assert(cb->clamp_results == false); } else { key.epilog.blend.rt[i] = (struct agx_blend_rt_key){ .colormask = write_mask, - - .rgb_src_factor = + .mode = agx_pack_blend_standard( + vk_blend_op_to_pipe(cb->color_blend_op), vk_blend_factor_to_pipe(cb->src_color_blend_factor), - - .rgb_dst_factor = vk_blend_factor_to_pipe(cb->dst_color_blend_factor), - - .rgb_func = vk_blend_op_to_pipe(cb->color_blend_op), - - .alpha_src_factor = + vk_blend_op_to_pipe(cb->alpha_blend_op), vk_blend_factor_to_pipe(cb->src_alpha_blend_factor), - - .alpha_dst_factor = - vk_blend_factor_to_pipe(cb->dst_alpha_blend_factor), - - .alpha_func = vk_blend_op_to_pipe(cb->alpha_blend_op), + vk_blend_factor_to_pipe(cb->dst_alpha_blend_factor)), }; } } diff --git a/src/asahi/vulkan/hk_physical_device.c b/src/asahi/vulkan/hk_physical_device.c index b32acd48abb..1a46dfa96d5 100644 --- a/src/asahi/vulkan/hk_physical_device.c +++ b/src/asahi/vulkan/hk_physical_device.c @@ -140,6 +140,7 @@ hk_get_device_extensions(const struct hk_instance *instance, .KHR_zero_initialize_workgroup_memory = true, .EXT_4444_formats = true, .EXT_attachment_feedback_loop_layout = true, + .EXT_blend_operation_advanced = true, .EXT_border_color_swizzle = true, .EXT_buffer_device_address = true, .EXT_calibrated_timestamps = true, @@ -632,6 +633,9 @@ hk_get_device_features( /* VK_KHR_shader_relaxed_extended_instruction */ .shaderRelaxedExtendedInstruction = true, + + /* VK_EXT_blend_operation_advanced */ + .advancedBlendCoherentOperations = true, }; } @@ -1012,6 +1016,14 @@ hk_get_device_properties(const struct agx_device *dev, /* VK_KHR_fragment_shader_barycentric */ .triStripVertexOrderIndependentOfProvokingVertex = false, + + /* VK_EXT_blend_operation_advanced */ + .advancedBlendMaxColorAttachments = HK_MAX_RTS, + .advancedBlendIndependentBlend = true, + .advancedBlendNonPremultipliedSrcColor = true, + .advancedBlendNonPremultipliedDstColor = true, + .advancedBlendCorrelatedOverlap = true, + .advancedBlendAllOperations = true, }; strncpy(properties->deviceName, dev->name, sizeof(properties->deviceName)); diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 6269ed55bc7..1488d487bc2 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -218,23 +218,13 @@ agx_create_blend_state(struct pipe_context *ctx, if (state->logicop_enable || !rt.blend_enable) { /* No blending, but we get the colour mask below */ - key->rt[i] = (struct agx_blend_rt_key){ - .rgb_func = PIPE_BLEND_ADD, - .rgb_src_factor = PIPE_BLENDFACTOR_ONE, - .rgb_dst_factor = PIPE_BLENDFACTOR_ZERO, - - .alpha_func = PIPE_BLEND_ADD, - .alpha_src_factor = PIPE_BLENDFACTOR_ONE, - .alpha_dst_factor = PIPE_BLENDFACTOR_ZERO, - }; + key->rt[i].mode = agx_pack_blend_standard( + PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO, + PIPE_BLEND_ADD, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO); } else { - key->rt[i].rgb_func = rt.rgb_func; - key->rt[i].rgb_src_factor = rt.rgb_src_factor; - key->rt[i].rgb_dst_factor = rt.rgb_dst_factor; - - key->rt[i].alpha_func = rt.alpha_func; - key->rt[i].alpha_src_factor = rt.alpha_src_factor; - key->rt[i].alpha_dst_factor = rt.alpha_dst_factor; + key->rt[i].mode = agx_pack_blend_standard( + rt.rgb_func, rt.rgb_src_factor, rt.rgb_dst_factor, rt.alpha_func, + rt.alpha_src_factor, rt.alpha_dst_factor); } key->rt[i].colormask = rt.colormask; @@ -2357,12 +2347,17 @@ agx_update_fs(struct agx_batch *batch) /* Try to disable blending to get rid of some fsats */ if (link_key.epilog.fs.link.loc0_w_1) { struct agx_blend_rt_key *k = &link_key.epilog.fs.blend.rt[0]; + struct agx_blend_standard b = agx_unpack_blend_standard(k->mode); - k->rgb_src_factor = optimize_blend_factor_w_1(k->rgb_src_factor); - k->rgb_dst_factor = optimize_blend_factor_w_1(k->rgb_dst_factor); + b.rgb_src_factor = optimize_blend_factor_w_1(b.rgb_src_factor); + b.rgb_dst_factor = optimize_blend_factor_w_1(b.rgb_dst_factor); - k->alpha_src_factor = optimize_blend_factor_w_1(k->alpha_src_factor); - k->alpha_dst_factor = optimize_blend_factor_w_1(k->alpha_dst_factor); + b.alpha_src_factor = optimize_blend_factor_w_1(b.alpha_src_factor); + b.alpha_dst_factor = optimize_blend_factor_w_1(b.alpha_dst_factor); + + k->mode = agx_pack_blend_standard(b.rgb_func, b.rgb_src_factor, + b.rgb_dst_factor, b.alpha_func, + b.alpha_src_factor, b.alpha_dst_factor); } link_key.epilog.fs.blend.alpha_to_coverage &= msaa;