mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 17:40:11 +01:00
radeonsi: use si_nir_is_output_const_if_tex_is_const
When a blending mode producing "color = src * dst" is used and we can determine that dst is 1, then the draw call can dropped completely. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10979>
This commit is contained in:
parent
83250036be
commit
b78a38bd02
4 changed files with 80 additions and 0 deletions
|
|
@ -402,6 +402,13 @@ struct si_shader_info {
|
||||||
* fragment shader invocations if flat shading.
|
* fragment shader invocations if flat shading.
|
||||||
*/
|
*/
|
||||||
bool allow_flat_shading;
|
bool allow_flat_shading;
|
||||||
|
|
||||||
|
/* Optimization: if the texture bound to this texunit has been cleared to 1,
|
||||||
|
* then the draw can be skipped (see si_draw_vbo_skip_noop). Initially the
|
||||||
|
* value is 0xff (undetermined) and can be later changed to 0 (= false) or
|
||||||
|
* texunit + 1.
|
||||||
|
*/
|
||||||
|
uint8_t writes_1_if_tex_is_1;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* A shader selector is a gallium CSO and contains shader variants and
|
/* A shader selector is a gallium CSO and contains shader variants and
|
||||||
|
|
|
||||||
|
|
@ -418,6 +418,10 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
||||||
info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
|
info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
|
||||||
nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
|
nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
|
||||||
TGSI_INTERPOLATE_LOC_CENTER;
|
TGSI_INTERPOLATE_LOC_CENTER;
|
||||||
|
/* Set an invalid value. Will be determined at draw time if needed when the expected
|
||||||
|
* conditions are met.
|
||||||
|
*/
|
||||||
|
info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
info->constbuf0_num_slots = nir->num_uniforms;
|
info->constbuf0_num_slots = nir->num_uniforms;
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
#include "si_build_pm4.h"
|
#include "si_build_pm4.h"
|
||||||
#include "si_query.h"
|
#include "si_query.h"
|
||||||
|
#include "si_shader_internal.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
#include "util/fast_idiv_by_const.h"
|
#include "util/fast_idiv_by_const.h"
|
||||||
#include "util/format/u_format.h"
|
#include "util/format/u_format.h"
|
||||||
|
|
@ -445,6 +446,14 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||||
blend->alpha_to_one = state->alpha_to_one;
|
blend->alpha_to_one = state->alpha_to_one;
|
||||||
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
|
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
|
||||||
blend->logicop_enable = logicop_enable;
|
blend->logicop_enable = logicop_enable;
|
||||||
|
blend->allows_noop_optimization =
|
||||||
|
state->rt[0].rgb_func == PIPE_BLEND_ADD &&
|
||||||
|
state->rt[0].alpha_func == PIPE_BLEND_ADD &&
|
||||||
|
state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&
|
||||||
|
state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&
|
||||||
|
state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
|
||||||
|
state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO &&
|
||||||
|
mode == V_028808_CB_NORMAL;
|
||||||
|
|
||||||
unsigned num_shader_outputs = state->max_rt + 1; /* estimate */
|
unsigned num_shader_outputs = state->max_rt + 1; /* estimate */
|
||||||
if (blend->dual_src_blend)
|
if (blend->dual_src_blend)
|
||||||
|
|
@ -627,6 +636,57 @@ static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_b
|
||||||
return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
|
return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,
|
||||||
|
const struct pipe_draw_info *info,
|
||||||
|
unsigned drawid_offset,
|
||||||
|
const struct pipe_draw_indirect_info *indirect,
|
||||||
|
const struct pipe_draw_start_count_bias *draws,
|
||||||
|
unsigned num_draws) {
|
||||||
|
struct si_context *sctx = (struct si_context *)ctx;
|
||||||
|
|
||||||
|
if (sctx->framebuffer.state.nr_cbufs == 1) {
|
||||||
|
struct si_shader_selector *sel = sctx->shader.ps.cso;
|
||||||
|
bool free_nir;
|
||||||
|
if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) {
|
||||||
|
struct nir_shader *nir = si_get_nir_shader(sel, NULL, &free_nir);
|
||||||
|
|
||||||
|
/* Determine if this fragment shader always writes vec4(1) if a specific texture
|
||||||
|
* is all 1s.
|
||||||
|
*/
|
||||||
|
float in[4] = { 1.0, 1.0, 1.0, 1.0 };
|
||||||
|
float out[4];
|
||||||
|
int texunit;
|
||||||
|
if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) &&
|
||||||
|
!memcmp(in, out, 4 * sizeof(float))) {
|
||||||
|
sel->info.writes_1_if_tex_is_1 = 1 + texunit;
|
||||||
|
} else {
|
||||||
|
sel->info.writes_1_if_tex_is_1 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (free_nir)
|
||||||
|
ralloc_free(nir);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sel->info.writes_1_if_tex_is_1 &&
|
||||||
|
sel->info.writes_1_if_tex_is_1 != 0xff) {
|
||||||
|
/* Now check if the texture is cleared to 1 */
|
||||||
|
int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1;
|
||||||
|
struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT];
|
||||||
|
if ((1u << unit) & samp->enabled_mask) {
|
||||||
|
struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture;
|
||||||
|
if (tex->is_depth &&
|
||||||
|
tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) &&
|
||||||
|
tex->depth_clear_value[0] == 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* TODO: handle color textures */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws);
|
||||||
|
}
|
||||||
|
|
||||||
static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
||||||
{
|
{
|
||||||
struct si_context *sctx = (struct si_context *)ctx;
|
struct si_context *sctx = (struct si_context *)ctx;
|
||||||
|
|
@ -664,6 +724,14 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
||||||
old_blend->commutative_4bit != blend->commutative_4bit ||
|
old_blend->commutative_4bit != blend->commutative_4bit ||
|
||||||
old_blend->logicop_enable != blend->logicop_enable)))
|
old_blend->logicop_enable != blend->logicop_enable)))
|
||||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
||||||
|
|
||||||
|
if (likely(!radeon_uses_secure_bos(sctx->ws))) {
|
||||||
|
if (unlikely(blend->allows_noop_optimization)) {
|
||||||
|
si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop);
|
||||||
|
} else {
|
||||||
|
si_install_draw_wrapper(sctx, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
|
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,7 @@ struct si_state_blend {
|
||||||
bool alpha_to_one : 1;
|
bool alpha_to_one : 1;
|
||||||
bool dual_src_blend : 1;
|
bool dual_src_blend : 1;
|
||||||
bool logicop_enable : 1;
|
bool logicop_enable : 1;
|
||||||
|
bool allows_noop_optimization : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct si_state_rasterizer {
|
struct si_state_rasterizer {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue