diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index c58388ee325..44562fca48e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -402,6 +402,13 @@ struct si_shader_info { * fragment shader invocations if flat shading. */ bool allow_flat_shading; + + /* Optimization: if the texture bound to this texunit has been cleared to 1, + * then the draw can be skipped (see si_draw_vbo_skip_noop). Initially the + * value is 0xff (undetermined) and can be later changed to 0 (= false) or + * texunit + 1. + */ + uint8_t writes_1_if_tex_is_1; }; /* A shader selector is a gallium CSO and contains shader variants and diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 199f6528275..98c2a0cf08d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -418,6 +418,10 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : TGSI_INTERPOLATE_LOC_CENTER; + /* Set an invalid value. Will be determined at draw time if needed when the expected + * conditions are met. + */ + info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff; } info->constbuf0_num_slots = nir->num_uniforms; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 7a3b5818370..25db0ca54aa 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -24,6 +24,7 @@ #include "si_build_pm4.h" #include "si_query.h" +#include "si_shader_internal.h" #include "sid.h" #include "util/fast_idiv_by_const.h" #include "util/format/u_format.h" @@ -445,6 +446,14 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend->alpha_to_one = state->alpha_to_one; blend->dual_src_blend = util_blend_state_is_dual(state, 0); blend->logicop_enable = logicop_enable; + blend->allows_noop_optimization = + state->rt[0].rgb_func == PIPE_BLEND_ADD && + state->rt[0].alpha_func == PIPE_BLEND_ADD && + state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR && + state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR && + state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && + state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO && + mode == V_028808_CB_NORMAL; unsigned num_shader_outputs = state->max_rt + 1; /* estimate */ if (blend->dual_src_blend) @@ -627,6 +636,57 @@ static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_b return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); } +static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx, + const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) { + struct si_context *sctx = (struct si_context *)ctx; + + if (sctx->framebuffer.state.nr_cbufs == 1) { + struct si_shader_selector *sel = sctx->shader.ps.cso; + bool free_nir; + if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) { + struct nir_shader *nir = si_get_nir_shader(sel, NULL, &free_nir); + + /* Determine if this fragment shader always writes vec4(1) if a specific texture + * is all 1s. + */ + float in[4] = { 1.0, 1.0, 1.0, 1.0 }; + float out[4]; + int texunit; + if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) && + !memcmp(in, out, 4 * sizeof(float))) { + sel->info.writes_1_if_tex_is_1 = 1 + texunit; + } else { + sel->info.writes_1_if_tex_is_1 = 0; + } + + if (free_nir) + ralloc_free(nir); + } + + if (sel->info.writes_1_if_tex_is_1 && + sel->info.writes_1_if_tex_is_1 != 0xff) { + /* Now check if the texture is cleared to 1 */ + int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1; + struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT]; + if ((1u << unit) & samp->enabled_mask) { + struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture; + if (tex->is_depth && + tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) && + tex->depth_clear_value[0] == 1) { + return; + } + /* TODO: handle color textures */ + } + } + } + + sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws); +} + static void si_bind_blend_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; @@ -664,6 +724,14 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state) old_blend->commutative_4bit != blend->commutative_4bit || old_blend->logicop_enable != blend->logicop_enable))) si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); + + if (likely(!radeon_uses_secure_bos(sctx->ws))) { + if (unlikely(blend->allows_noop_optimization)) { + si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop); + } else { + si_install_draw_wrapper(sctx, NULL); + } + } } static void si_delete_blend_state(struct pipe_context *ctx, void *state) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fc7ba98800b..4981d490db0 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -65,6 +65,7 @@ struct si_state_blend { bool alpha_to_one : 1; bool dual_src_blend : 1; bool logicop_enable : 1; + bool allows_noop_optimization : 1; }; struct si_state_rasterizer {