diff --git a/docs/features.txt b/docs/features.txt index af9eff19ab8..8ca11e43320 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -598,7 +598,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_acquire_xlib_display DONE (anv, hk, lvp, nvk, panvk, radv, tu, v3dv, vn) VK_EXT_attachment_feedback_loop_dynamic_state DONE (anv, lvp, radv, tu, vn) VK_EXT_attachment_feedback_loop_layout DONE (anv, hk, lvp, nvk, radv, tu, v3dv, vn) - VK_EXT_blend_operation_advanced DONE (hk, vn) + VK_EXT_blend_operation_advanced DONE (hk, lvp, vn) VK_EXT_border_color_swizzle DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv/gfx10+, tu, v3dv, vn) VK_EXT_buffer_device_address DONE (anv, dzn/sm6.6+, hasvk, hk, nvk, panvk, radv, vn) VK_EXT_calibrated_timestamps DONE (anv, hasvk, hk, kk, nvk, panvk/v10+, lvp, radv, vn, tu/a750+) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 8c615e7c54d..66cc468efe9 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -3,3 +3,4 @@ VK_EXT_present_timing on RADV, NVK, Turnip, ANV, Honeykrisp, panvk VK_KHR_sampler_ycbcr_conversion on pvr VK_EXT_image_drm_format_modifier on pvr VK_KHR_internally_synchronized_queues on RADV +VK_EXT_blend_operation_advanced on lavapipe diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c index 67f48e3ca29..0d70ab98e95 100644 --- a/src/gallium/frontends/lavapipe/lvp_device.c +++ b/src/gallium/frontends/lavapipe/lvp_device.c @@ -218,6 +218,7 @@ static const struct vk_device_extension_table lvp_device_extensions_supported = .EXT_4444_formats = true, .EXT_attachment_feedback_loop_layout = true, .EXT_attachment_feedback_loop_dynamic_state = true, + .EXT_blend_operation_advanced = true, .EXT_border_color_swizzle = true, .EXT_calibrated_timestamps = true, .EXT_color_write_enable = true, @@ -629,6 +630,9 @@ lvp_get_features(const struct lvp_physical_device *pdevice, /* VK_EXT_attachment_feedback_loop_layout_dynamic_state */ .attachmentFeedbackLoopDynamicState = true, + /* VK_EXT_blend_operation_advanced */ + .advancedBlendCoherentOperations = true, + /* VK_KHR_ray_query */ .rayQuery = true, @@ -1156,6 +1160,14 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie .copyDstLayoutCount = ARRAY_SIZE(lvp_host_copy_image_layouts), .identicalMemoryTypeRequirements = VK_FALSE, + /* VK_EXT_blend_operation_advanced */ + .advancedBlendMaxColorAttachments = device->pscreen->caps.max_render_targets, + .advancedBlendIndependentBlend = true, + .advancedBlendNonPremultipliedSrcColor = true, + .advancedBlendNonPremultipliedDstColor = true, + .advancedBlendCorrelatedOverlap = true, + .advancedBlendAllOperations = true, + /* VK_EXT_transform_feedback */ .maxTransformFeedbackStreams = device->pscreen->caps.max_vertex_streams, .maxTransformFeedbackBuffers = device->pscreen->caps.max_stream_output_buffers, diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 890ffbe95f1..03dbbefea83 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -33,6 +33,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_from_mesa.h" +#include "util/blend.h" #include "util/format/u_format.h" #include "util/u_surface.h" #include "util/u_sampler.h" @@ -223,6 +224,17 @@ struct rendering_state { struct lvp_shader *compute_shader; uint8_t push_constants[128 * 4]; } saved; + + struct { + VkBlendOp op; + bool blend_enable; + bool src_premultiplied; + bool dst_premultiplied; + VkBlendOverlapEXT overlap; + } advanced_blend[PIPE_MAX_COLOR_BUFS]; + bool advanced_blend_dirty; + void *advanced_blend_fs_variant; /* cached lowered FS for cleanup */ + struct lvp_shader *advanced_blend_fs_shader; /* shader used to build variant */ }; static struct pipe_resource * @@ -380,8 +392,88 @@ static void update_vertex_elements_buffer_index(struct rendering_state *state) state->velem.velems[i].vertex_buffer_index = state->vertex_buffer_index[i] - state->start_vb; } +static void +emit_advanced_blend_fs(struct rendering_state *state) +{ + struct lvp_shader *shader = state->shaders[MESA_SHADER_FRAGMENT]; + if (!shader || !shader->pipeline_nir) + return; + + /* Compute which RTs need advanced blend lowering. + * Use advanced_blend[].blend_enable instead of blend_state.rt[].blend_enable + * because the latter is set to false after lowering to disable HW blending. + */ + unsigned advanced_rts = 0; + for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (state->advanced_blend[i].blend_enable && + state->advanced_blend[i].op >= VK_BLEND_OP_ZERO_EXT) + advanced_rts |= BITFIELD_BIT(i); + } + + if (!advanced_rts) { + /* If we previously bound a lowered variant, rebind the original shader */ + if (state->advanced_blend_fs_variant) { + state->pctx->delete_fs_state(state->pctx, state->advanced_blend_fs_variant); + state->advanced_blend_fs_variant = NULL; + state->advanced_blend_fs_shader = NULL; + state->pctx->bind_fs_state(state->pctx, shader->shader_cso); + } + return; + } + + /* Reuse the cached variant if the shader and blend state haven't changed */ + if (state->advanced_blend_fs_variant && + state->advanced_blend_fs_shader == shader && + !state->advanced_blend_dirty) + return; + + /* Clone and lower the shader */ + nir_shader *nir = nir_shader_clone(NULL, shader->pipeline_nir->nir); + nir_lower_blend_options opts = { 0 }; + + u_foreach_bit(rt, advanced_rts) { + const bool write_enable = !(state->color_write_disables & BITFIELD_BIT(rt)); + const unsigned write_mask = write_enable ? state->blend_state.rt[rt].colormask : 0; + + opts.rt[rt] = (nir_lower_blend_rt){ + .format = state->framebuffer.cbufs[rt].format, + .advanced_blend = true, + .colormask = write_mask, + .blend_mode = vk_advanced_blend_op_to_pipe(state->advanced_blend[rt].op), + .src_premultiplied = state->advanced_blend[rt].src_premultiplied, + .dst_premultiplied = state->advanced_blend[rt].dst_premultiplied, + .overlap = vk_blend_overlap_to_pipe(state->advanced_blend[rt].overlap), + }; + + /* Set passthrough blending for this RT since shader does the blend */ + state->blend_state.rt[rt].blend_enable = false; + } + state->blend_dirty = true; + + lvp_nir_lower_blend(nir, &opts); + + /* Compile the variant */ + void *cso = state->pctx->create_fs_state(state->pctx, + &(struct pipe_shader_state) { + .type = PIPE_SHADER_IR_NIR, + .ir.nir = nir, + }); + + /* Clean up previous variant if any */ + if (state->advanced_blend_fs_variant) + state->pctx->delete_fs_state(state->pctx, state->advanced_blend_fs_variant); + + state->advanced_blend_fs_variant = cso; + state->advanced_blend_fs_shader = shader; + state->advanced_blend_dirty = false; + state->pctx->bind_fs_state(state->pctx, cso); + state->noop_fs_bound = false; +} + static void emit_state(struct rendering_state *state) { + emit_advanced_blend_fs(state); + if (!state->shaders[MESA_SHADER_FRAGMENT] && !state->noop_fs_bound) { state->pctx->bind_fs_state(state->pctx, state->device->noop_fs); state->noop_fs_bound = true; @@ -877,8 +969,21 @@ static void handle_graphics_pipeline(struct lvp_pipeline *pipeline, if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) state->color_write_disables = ~ps->cb->color_write_enables; + if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_BLEND_ADVANCED)) { + for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + state->advanced_blend[i].op = 0; + } + for (unsigned i = 0; i < ps->cb->attachment_count; i++) { const struct vk_color_blend_attachment_state *att = &ps->cb->attachments[i]; + + /* Advanced blend is lowered in the fragment shader - use passthrough */ + if (pipeline->advanced_blend_rts & BITFIELD_BIT(i)) { + state->blend_state.rt[i].blend_enable = false; + state->blend_state.rt[i].colormask = att->write_mask; + continue; + } + if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS)) state->blend_state.rt[i].colormask = att->write_mask; if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES)) @@ -3624,6 +3729,7 @@ static void handle_set_color_write_enable(struct vk_cmd_queue_entry *cmd, disable_mask |= BITFIELD_BIT(i); } + state->advanced_blend_dirty |= state->color_write_disables != disable_mask; state->blend_dirty |= state->color_write_disables != disable_mask; state->color_write_disables = disable_mask; } @@ -3769,10 +3875,14 @@ static void handle_set_color_blend_enable(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { for (unsigned i = 0; i < cmd->u.set_color_blend_enable_ext.attachment_count; i++) { - if (state->blend_state.rt[cmd->u.set_color_blend_enable_ext.first_attachment + i].blend_enable != !!cmd->u.set_color_blend_enable_ext.color_blend_enables[i]) { + unsigned idx = cmd->u.set_color_blend_enable_ext.first_attachment + i; + bool enable = !!cmd->u.set_color_blend_enable_ext.color_blend_enables[i]; + if (state->blend_state.rt[idx].blend_enable != enable) { state->blend_dirty = true; + state->advanced_blend_dirty = true; } - state->blend_state.rt[cmd->u.set_color_blend_enable_ext.first_attachment + i].blend_enable = !!cmd->u.set_color_blend_enable_ext.color_blend_enables[i]; + state->blend_state.rt[idx].blend_enable = enable; + state->advanced_blend[idx].blend_enable = enable; } } @@ -3780,8 +3890,10 @@ static void handle_set_color_write_mask(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { for (unsigned i = 0; i < cmd->u.set_color_write_mask_ext.attachment_count; i++) { - if (state->blend_state.rt[cmd->u.set_color_write_mask_ext.first_attachment + i].colormask != cmd->u.set_color_write_mask_ext.color_write_masks[i]) + if (state->blend_state.rt[cmd->u.set_color_write_mask_ext.first_attachment + i].colormask != cmd->u.set_color_write_mask_ext.color_write_masks[i]) { state->blend_dirty = true; + state->advanced_blend_dirty = true; + } state->blend_state.rt[cmd->u.set_color_write_mask_ext.first_attachment + i].colormask = cmd->u.set_color_write_mask_ext.color_write_masks[i]; } } @@ -3792,12 +3904,16 @@ static void handle_set_color_blend_equation(struct vk_cmd_queue_entry *cmd, const VkColorBlendEquationEXT *cb = cmd->u.set_color_blend_equation_ext.color_blend_equations; state->blend_dirty = true; for (unsigned i = 0; i < cmd->u.set_color_blend_equation_ext.attachment_count; i++) { - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_func = vk_blend_op_to_pipe(cb[i].colorBlendOp); - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_src_factor = vk_blend_factor_to_pipe(cb[i].srcColorBlendFactor); - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_dst_factor = vk_blend_factor_to_pipe(cb[i].dstColorBlendFactor); - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_func = vk_blend_op_to_pipe(cb[i].alphaBlendOp); - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_src_factor = vk_blend_factor_to_pipe(cb[i].srcAlphaBlendFactor); - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_dst_factor = vk_blend_factor_to_pipe(cb[i].dstAlphaBlendFactor); + unsigned idx = cmd->u.set_color_blend_equation_ext.first_attachment + i; + state->blend_state.rt[idx].rgb_func = vk_blend_op_to_pipe(cb[i].colorBlendOp); + state->blend_state.rt[idx].rgb_src_factor = vk_blend_factor_to_pipe(cb[i].srcColorBlendFactor); + state->blend_state.rt[idx].rgb_dst_factor = vk_blend_factor_to_pipe(cb[i].dstColorBlendFactor); + state->blend_state.rt[idx].alpha_func = vk_blend_op_to_pipe(cb[i].alphaBlendOp); + state->blend_state.rt[idx].alpha_src_factor = vk_blend_factor_to_pipe(cb[i].srcAlphaBlendFactor); + state->blend_state.rt[idx].alpha_dst_factor = vk_blend_factor_to_pipe(cb[i].dstAlphaBlendFactor); + + /* Setting a regular blend equation clears advanced blend op for this RT */ + state->advanced_blend[idx].op = 0; /* At least llvmpipe applies the blend factor prior to the blend function, * regardless of what function is used. (like i965 hardware). @@ -3805,18 +3921,35 @@ static void handle_set_color_blend_equation(struct vk_cmd_queue_entry *cmd, */ if (cb[i].colorBlendOp == VK_BLEND_OP_MIN || cb[i].colorBlendOp == VK_BLEND_OP_MAX) { - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + state->blend_state.rt[idx].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + state->blend_state.rt[idx].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; } if (cb[i].alphaBlendOp == VK_BLEND_OP_MIN || cb[i].alphaBlendOp == VK_BLEND_OP_MAX) { - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - state->blend_state.rt[cmd->u.set_color_blend_equation_ext.first_attachment + i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + state->blend_state.rt[idx].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + state->blend_state.rt[idx].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; } } } +static void handle_set_color_blend_advanced(struct vk_cmd_queue_entry *cmd, + struct rendering_state *state) +{ + const struct vk_cmd_set_color_blend_advanced_ext *adv = &cmd->u.set_color_blend_advanced_ext; + state->blend_dirty = true; + state->advanced_blend_dirty = true; + for (unsigned i = 0; i < adv->attachment_count; i++) { + const VkColorBlendAdvancedEXT *a = &adv->color_blend_advanced[i]; + unsigned idx = adv->first_attachment + i; + assert(a->clampResults == false); + state->advanced_blend[idx].op = a->advancedBlendOp; + state->advanced_blend[idx].src_premultiplied = a->srcPremultiplied; + state->advanced_blend[idx].dst_premultiplied = a->dstPremultiplied; + state->advanced_blend[idx].overlap = a->blendOverlap; + } +} + static void handle_shaders(struct vk_cmd_queue_entry *cmd, struct rendering_state *state) { @@ -4947,6 +5080,7 @@ void lvp_add_enqueue_cmd_entrypoints(struct vk_device_dispatch_table *disp) ENQUEUE_CMD(CmdSetSampleLocationsEnableEXT) ENQUEUE_CMD(CmdSetSampleLocationsEXT) + ENQUEUE_CMD(CmdSetColorBlendAdvancedEXT) #undef ENQUEUE_CMD } @@ -5369,6 +5503,9 @@ static void lvp_execute_cmd_buffer(struct list_head *cmds, case VK_CMD_SET_SAMPLE_LOCATIONS_EXT: handle_set_sample_locations(cmd, state); break; + case VK_CMD_SET_COLOR_BLEND_ADVANCED_EXT: + handle_set_color_blend_advanced(cmd, state); + break; default: fprintf(stderr, "Unsupported command %s\n", vk_cmd_queue_type_names[cmd->type]); UNREACHABLE("Unsupported command"); @@ -5451,6 +5588,9 @@ VkResult lvp_execute_cmds(struct lvp_device *device, for (unsigned i = 0; i < ARRAY_SIZE(state->desc_buffers); i++) pipe_resource_reference(&state->desc_buffers[i], NULL); + if (state->advanced_blend_fs_variant) + state->pctx->delete_fs_state(state->pctx, state->advanced_blend_fs_variant); + return VK_SUCCESS; } diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index d5bb0040d25..e9544b21a83 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -22,6 +22,7 @@ */ #include "lvp_private.h" +#include "vk_blend.h" #include "vk_nir_convert_ycbcr.h" #include "vk_pipeline.h" #include "vk_render_pass.h" @@ -131,6 +132,74 @@ shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) *align = comp_size; } +static bool +lvp_needs_advanced_blend_lowering(struct lvp_pipeline *pipeline) +{ + const struct vk_color_blend_state *cb = pipeline->graphics_state.cb; + if (!cb) + return false; + + for (uint32_t i = 0; i < cb->attachment_count; i++) + if (cb->attachments[i].color_blend_op >= VK_BLEND_OP_ZERO_EXT) + return true; + + return false; +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +void +lvp_nir_lower_blend(nir_shader *nir, const nir_lower_blend_options *opts) +{ + /* nir_lower_blend operates on IO intrinsics, so lower derefs to intrinsics + * first, run the blend lowering, then convert back to derefs for llvmpipe. + */ + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, 0); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out, NULL); + NIR_PASS(_, nir, nir_lower_blend, opts); + NIR_PASS(_, nir, nir_unlower_io_to_vars, false); +} + +static void +lvp_lower_advanced_blend(struct lvp_pipeline *pipeline) +{ + const struct vk_color_blend_state *cb = pipeline->graphics_state.cb; + const struct vk_render_pass_state *rp = pipeline->graphics_state.rp; + nir_shader *nir = pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir; + nir_lower_blend_options opts = { 0 }; + + for (unsigned rt = 0; rt < cb->attachment_count; rt++) { + const struct vk_color_blend_attachment_state *att = &cb->attachments[rt]; + + /* Advanced blend ops start at VK_BLEND_OP_ZERO_EXT */ + if (att->color_blend_op < VK_BLEND_OP_ZERO_EXT) + continue; + + const bool write_enable = cb->color_write_enables & BITFIELD_BIT(rt); + const unsigned write_mask = write_enable ? att->write_mask : 0; + + opts.rt[rt] = (nir_lower_blend_rt){ + .format = lvp_vk_format_to_pipe_format(rp->color_attachment_formats[rt]), + .advanced_blend = true, + .colormask = write_mask, + .blend_mode = vk_advanced_blend_op_to_pipe(att->color_blend_op), + .src_premultiplied = att->src_premultiplied, + .dst_premultiplied = att->dst_premultiplied, + .overlap = vk_blend_overlap_to_pipe(att->blend_overlap), + }; + + assert(att->clamp_results == false); + pipeline->advanced_blend_rts |= BITFIELD_BIT(rt); + } + + lvp_nir_lower_blend(nir, &opts); +} + static bool remove_barriers_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data) { @@ -915,6 +984,17 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, pipeline->line_rectangular = true; lvp_pipeline_xfb_init(pipeline); } + + if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir && pipeline->graphics_state.cb) { + if (lvp_needs_advanced_blend_lowering(pipeline)) { + /* Clone to avoid modifying shared library NIR. */ + nir_shader *cloned = nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir); + lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, NULL); + pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir = lvp_create_pipeline_nir(cloned); + lvp_lower_advanced_blend(pipeline); + } + } + if (!libstate && !pipeline->library) lvp_pipeline_shaders_compile(pipeline, false); diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index 6dc5193d7d9..137d7fe4fbf 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -43,6 +43,7 @@ #include "pipe/p_state.h" #include "cso_cache/cso_context.h" #include "nir.h" +#include "nir/nir_lower_blend.h" #ifdef HAVE_LIBDRM #include @@ -539,6 +540,8 @@ struct lvp_pipeline { uint32_t group_count; } rt; + uint8_t advanced_blend_rts; + unsigned num_groups; unsigned num_groups_total; VkPipeline groups[0]; @@ -751,6 +754,9 @@ lvp_vk_format_to_pipe_format(VkFormat format) } } +void +lvp_nir_lower_blend(nir_shader *nir, const nir_lower_blend_options *opts); + void lvp_sampler_init(struct lvp_device *device, struct lp_descriptor *desc, const VkSamplerCreateInfo *pCreateInfo, const struct vk_sampler *sampler);