diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 56ea06648b1..752990c3c3f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4099,6 +4099,68 @@ radv_cmp_ps_epilog(const void *a_, const void *b_) return memcmp(a, b, sizeof(*a)) == 0; } +static struct radv_shader_part * +lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; + const struct radv_rendering_state *render = &cmd_buffer->state.render; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_device *device = cmd_buffer->device; + struct radv_shader_part *epilog = NULL; + struct radv_ps_epilog_state state = {0}; + + state.color_attachment_count = render->color_att_count; + for (unsigned i = 0; i < render->color_att_count; ++i) { + state.color_attachment_formats[i] = render->color_att[i].format; + } + + for (unsigned i = 0; i < MAX_RTS; i++) { + state.color_write_mask |= d->vk.cb.attachments[i].write_mask << (4 * i); + state.color_blend_enable |= d->vk.cb.attachments[i].blend_enable << (4 * i); + } + + state.mrt0_is_dual_src = pipeline->mrt0_is_dual_src; + + state.need_src_alpha = pipeline->need_src_alpha; + if (d->vk.ms.alpha_to_coverage_enable) { + /* Select a color export format with alpha when alpha to coverage is enabled. */ + state.need_src_alpha |= 0x1; + } + + struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(pipeline, &state, true); + uint32_t hash = radv_hash_ps_epilog(&key); + + u_rwlock_rdlock(&device->ps_epilogs_lock); + struct hash_entry *epilog_entry = + _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key); + u_rwlock_rdunlock(&device->ps_epilogs_lock); + + if (!epilog_entry) { + u_rwlock_wrlock(&device->ps_epilogs_lock); + epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key); + if (epilog_entry) { + u_rwlock_wrunlock(&device->ps_epilogs_lock); + return epilog_entry->data; + } + + epilog = radv_create_ps_epilog(device, &key); + struct radv_ps_epilog_key *key2 = malloc(sizeof(*key2)); + if (!epilog || !key2) { + radv_shader_part_unref(device, epilog); + free(key2); + u_rwlock_wrunlock(&device->ps_epilogs_lock); + return NULL; + } + memcpy(key2, &key, sizeof(*key2)); + _mesa_hash_table_insert_pre_hashed(device->ps_epilogs, hash, key2, epilog); + + u_rwlock_wrunlock(&device->ps_epilogs_lock); + return epilog; + } + + return epilog_entry->data; +} + static void radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer) { @@ -8457,9 +8519,28 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r const struct radv_device *device = cmd_buffer->device; bool late_scissor_emission; - if (cmd_buffer->state.graphics_pipeline->ps_epilog) - radv_emit_ps_epilog_state(cmd_buffer, cmd_buffer->state.graphics_pipeline->ps_epilog, - pipeline_is_dirty); + if (cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog) { + struct radv_shader_part *ps_epilog = NULL; + + if (cmd_buffer->state.graphics_pipeline->ps_epilog) { + ps_epilog = cmd_buffer->state.graphics_pipeline->ps_epilog; + } else if ((cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline || + (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK | + RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE)))) { + ps_epilog = lookup_ps_epilog(cmd_buffer); + if (!ps_epilog) { + vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); + return; + } + + cmd_buffer->state.col_format_non_compacted = ps_epilog->spi_shader_col_format; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; + } + + if (ps_epilog) + radv_emit_ps_epilog_state(cmd_buffer, ps_epilog, pipeline_is_dirty); + } if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS) radv_emit_rbplus_state(cmd_buffer); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 8fc97e279ec..0c2ac8aabdf 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2305,6 +2305,15 @@ radv_graphics_pipeline_link(const struct radv_pipeline *pipeline, } } +static bool +radv_pipeline_has_dynamic_ps_epilog(const struct radv_graphics_pipeline *pipeline) +{ + /* These dynamic states need to compile PS epilogs on-demand. */ + return pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | + RADV_DYNAMIC_COLOR_WRITE_MASK | + RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE); +} + struct radv_pipeline_key radv_generate_pipeline_key(const struct radv_pipeline *pipeline, VkPipelineCreateFlags flags) { @@ -2327,19 +2336,7 @@ radv_generate_pipeline_key(const struct radv_pipeline *pipeline, VkPipelineCreat return key; } -struct radv_ps_epilog_state -{ - uint8_t color_attachment_count; - VkFormat color_attachment_formats[MAX_RTS]; - - uint32_t color_write_mask; - uint32_t color_blend_enable; - - bool mrt0_is_dual_src; - uint8_t need_src_alpha; -}; - -static struct radv_ps_epilog_key +struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_graphics_pipeline *pipeline, const struct radv_ps_epilog_state *state, bool disable_mrt_compaction) @@ -2404,6 +2401,7 @@ radv_generate_ps_epilog_key(const struct radv_graphics_pipeline *pipeline, key.color_is_int10 = device->physical_device->rad_info.gfx_level < GFX8 ? is_int10 : 0; key.enable_mrt_output_nan_fixup = device->instance->enable_mrt_output_nan_fixup ? is_float32 : 0; key.mrt0_is_dual_src = state->mrt0_is_dual_src; + key.need_src_alpha = state->need_src_alpha; return key; } @@ -3920,7 +3918,7 @@ radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline, const struct radv_blend_state *blend) { - if (pipeline->ps_epilog) + if (pipeline->ps_epilog || radv_pipeline_has_dynamic_ps_epilog(pipeline)) return; radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); @@ -5189,6 +5187,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv pipeline->col_format_non_compacted = blend.spi_shader_col_format; pipeline->mrt0_is_dual_src = key.ps.epilog.mrt0_is_dual_src; + pipeline->need_src_alpha = key.ps.epilog.need_src_alpha; struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; bool enable_mrt_compaction = !key.ps.epilog.mrt0_is_dual_src && !ps->info.ps.has_epilog; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 2e1850b2b22..5664f7ce756 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1712,6 +1712,22 @@ bool radv_cmp_vs_prolog(const void *a_, const void *b_); uint32_t radv_hash_ps_epilog(const void *key_); bool radv_cmp_ps_epilog(const void *a_, const void *b_); +struct radv_ps_epilog_state +{ + uint8_t color_attachment_count; + VkFormat color_attachment_formats[MAX_RTS]; + + uint32_t color_write_mask; + uint32_t color_blend_enable; + + bool mrt0_is_dual_src; + uint8_t need_src_alpha; +}; + +struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_graphics_pipeline *pipeline, + const struct radv_ps_epilog_state *state, + bool disable_mrt_compaction); + void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer); bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr); @@ -2044,6 +2060,7 @@ struct radv_graphics_pipeline { uint32_t col_format_non_compacted; bool mrt0_is_dual_src; + uint8_t need_src_alpha; bool uses_drawid; bool uses_baseinstance; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 5837e3df2be..477201b13e1 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -61,6 +61,8 @@ struct radv_ps_epilog_key { uint8_t enable_mrt_output_nan_fixup; bool mrt0_is_dual_src; + + uint8_t need_src_alpha; /* XXX: Remove this when color blend equations are dynamic! */ }; struct radv_pipeline_key {