mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-23 18:10:36 +02:00
ac/nir: optimize txd(coord, ddx/ddy(coord))
This is done in ac_nir_lower_tex so that we can optimize derivative calculations with a different exec mask than the texture sample by using the nir_strict_wqm_coord_amd path. It's also more aware of divergence than nir_lower_tex is. fossil-db (gfx1201): Totals from 103 (0.13% of 79839) affected shaders: MaxWaves: 2610 -> 2620 (+0.38%) Instrs: 347283 -> 345912 (-0.39%); split: -0.40%, +0.00% CodeSize: 1892380 -> 1883824 (-0.45%); split: -0.46%, +0.00% VGPRs: 8028 -> 7824 (-2.54%) Latency: 3942575 -> 3939623 (-0.07%); split: -0.08%, +0.01% InvThroughput: 867147 -> 865281 (-0.22%); split: -0.24%, +0.02% VClause: 6230 -> 6221 (-0.14%); split: -0.19%, +0.05% SClause: 3910 -> 3914 (+0.10%); split: -0.26%, +0.36% Copies: 16091 -> 15721 (-2.30%); split: -2.74%, +0.44% PreSGPRs: 4651 -> 4658 (+0.15%) PreVGPRs: 6389 -> 6320 (-1.08%); split: -1.17%, +0.09% VALU: 228715 -> 227490 (-0.54%); split: -0.54%, +0.01% SALU: 32763 -> 32767 (+0.01%); split: -0.06%, +0.07% VMEM: 9027 -> 9024 (-0.03%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37561>
This commit is contained in:
parent
309ac1f0c0
commit
7d552d71e9
1 changed files with 89 additions and 18 deletions
|
|
@ -221,12 +221,20 @@ typedef struct {
|
|||
nir_intrinsic_instr *load;
|
||||
} coord_info;
|
||||
|
||||
static bool
|
||||
can_move_coord(nir_scalar scalar, coord_info *info)
|
||||
static bool can_move_coord(nir_scalar scalar, coord_info *info, nir_block *toplevel_block, bool txd)
|
||||
{
|
||||
if (scalar.def->bit_size != 32)
|
||||
return false;
|
||||
|
||||
/* Allow any def that is reachable from the nir_strict_wqm_coord_amd when
|
||||
* optimizing nir_texop_txd. Otherwise, we only use nir_strict_wqm_coord_amd
|
||||
* for cases that D3D11 requires.
|
||||
*/
|
||||
if (txd && nir_block_dominates(scalar.def->parent_instr->block, toplevel_block)) {
|
||||
info->load = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (nir_scalar_is_const(scalar))
|
||||
return true;
|
||||
|
||||
|
|
@ -273,7 +281,8 @@ struct move_tex_coords_state {
|
|||
|
||||
struct loop_if_state {
|
||||
bool inside_loop;
|
||||
bool divergent_discard;
|
||||
unsigned prev_terminate;
|
||||
unsigned prev_break_continue;
|
||||
};
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -284,6 +293,9 @@ build_coordinate(struct move_tex_coords_state *state, nir_scalar scalar, coord_i
|
|||
if (nir_scalar_is_const(scalar))
|
||||
return nir_imm_intN_t(b, nir_scalar_as_uint(scalar), scalar.def->bit_size);
|
||||
|
||||
if (!info.load)
|
||||
return nir_mov_scalar(b, scalar);
|
||||
|
||||
ASSERTED nir_src offset = *nir_get_io_offset_src(info.load);
|
||||
assert(nir_src_is_const(offset) && !nir_src_as_uint(offset));
|
||||
|
||||
|
|
@ -304,11 +316,48 @@ build_coordinate(struct move_tex_coords_state *state, nir_scalar scalar, coord_i
|
|||
return res;
|
||||
}
|
||||
|
||||
static bool can_optimize_txd(nir_shader *shader, struct loop_if_state *loop_if, nir_tex_instr *tex,
|
||||
bool *need_strict_wqm_coord)
|
||||
{
|
||||
nir_instr *ddxy_instrs[NIR_MAX_VEC_COMPONENTS * 2];
|
||||
unsigned size = nir_tex_parse_txd_coords(shader, tex, ddxy_instrs);
|
||||
if (!size)
|
||||
return false;
|
||||
|
||||
bool incomplete_quad =
|
||||
tex->instr.block->divergent || loop_if->prev_terminate || loop_if->inside_loop;
|
||||
|
||||
*need_strict_wqm_coord = false;
|
||||
if (incomplete_quad) {
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
nir_instr *instr = ddxy_instrs[i];
|
||||
*need_strict_wqm_coord |=
|
||||
instr->block->cf_node.parent != tex->instr.block->cf_node.parent ||
|
||||
loop_if->prev_terminate > instr->index || loop_if->prev_break_continue > instr->index;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool optimize_txd(nir_tex_instr *tex)
|
||||
{
|
||||
if (tex->op == nir_texop_txd) {
|
||||
tex->op = nir_texop_tex;
|
||||
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
|
||||
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, nir_instr *instr)
|
||||
{
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
if (tex->op != nir_texop_tex && tex->op != nir_texop_txb && tex->op != nir_texop_lod)
|
||||
if (tex->op != nir_texop_tex && tex->op != nir_texop_txb && tex->op != nir_texop_lod &&
|
||||
tex->op != nir_texop_txd)
|
||||
return false;
|
||||
|
||||
switch (tex->sampler_dim) {
|
||||
|
|
@ -333,9 +382,11 @@ move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, ni
|
|||
nir_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
coord_info infos[NIR_MAX_VEC_COMPONENTS];
|
||||
bool can_move_all = true;
|
||||
nir_block *toplevel_block = nir_cursor_current_block(state->toplevel_b.cursor);
|
||||
for (unsigned i = 0; i < tex->coord_components; i++) {
|
||||
components[i] = nir_scalar_resolved(src->src.ssa, i);
|
||||
can_move_all &= can_move_coord(components[i], &infos[i]);
|
||||
can_move_all &=
|
||||
can_move_coord(components[i], &infos[i], toplevel_block, tex->op == nir_texop_txd);
|
||||
}
|
||||
if (!can_move_all)
|
||||
return false;
|
||||
|
|
@ -377,6 +428,8 @@ move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, ni
|
|||
if (offset_src >= 0) /* Workaround requirement in nir_tex_instr_src_size(). */
|
||||
tex->src[offset_src].src_type = nir_tex_src_backend2;
|
||||
|
||||
optimize_txd(tex);
|
||||
|
||||
state->num_wqm_vgprs += linear_vgpr_size;
|
||||
|
||||
return true;
|
||||
|
|
@ -391,7 +444,7 @@ move_ddxy(struct move_tex_coords_state *state, nir_function_impl *impl, nir_intr
|
|||
bool can_move_all = true;
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
components[i] = nir_scalar_resolved(instr->src[0].ssa, i);
|
||||
can_move_all &= can_move_coord(components[i], &infos[i]);
|
||||
can_move_all &= can_move_coord(components[i], &infos[i], NULL, false);
|
||||
}
|
||||
if (!can_move_all || state->num_wqm_vgprs + num_components > state->options->max_wqm_vgprs)
|
||||
return false;
|
||||
|
|
@ -415,6 +468,7 @@ static bool move_coords_from_divergent_cf(struct move_tex_coords_state *state,
|
|||
struct loop_if_state *loop_if, struct exec_list *cf_list)
|
||||
{
|
||||
nir_function_impl *impl = state->toplevel_b.impl;
|
||||
nir_shader *shader = impl->function->shader;
|
||||
|
||||
bool progress = false;
|
||||
foreach_list_typed (nir_cf_node, cf_node, node, cf_list) {
|
||||
|
|
@ -425,27 +479,38 @@ static bool move_coords_from_divergent_cf(struct move_tex_coords_state *state,
|
|||
bool top_level = cf_list == &impl->body;
|
||||
|
||||
nir_foreach_instr (instr, block) {
|
||||
if (top_level && !loop_if->divergent_discard)
|
||||
if (top_level && !loop_if->prev_terminate)
|
||||
state->toplevel_b.cursor = nir_before_instr(instr);
|
||||
|
||||
/* Assume quads might be incomplete when inside loops in case of a
|
||||
* divergent terminate from a previous iteration.
|
||||
*/
|
||||
bool incomplete_quad =
|
||||
block->divergent || loop_if->divergent_discard || loop_if->inside_loop;
|
||||
block->divergent || loop_if->prev_terminate || loop_if->inside_loop;
|
||||
|
||||
if (instr->type == nir_instr_type_tex && incomplete_quad) {
|
||||
progress |= move_tex_coords(state, impl, instr);
|
||||
if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
if (tex->op == nir_texop_txd) {
|
||||
bool txd_need_strict_wqm_coord = false;
|
||||
if (!can_optimize_txd(shader, loop_if, tex, &txd_need_strict_wqm_coord))
|
||||
continue;
|
||||
if (!txd_need_strict_wqm_coord)
|
||||
progress |= optimize_txd(tex);
|
||||
}
|
||||
|
||||
if (state->options->fix_derivs_in_divergent_cf && incomplete_quad)
|
||||
progress |= move_tex_coords(state, impl, instr);
|
||||
} else if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_terminate:
|
||||
if (block->divergent)
|
||||
loop_if->divergent_discard = true;
|
||||
loop_if->prev_terminate = instr->index;
|
||||
break;
|
||||
case nir_intrinsic_terminate_if:
|
||||
if (block->divergent || nir_src_is_divergent(&intrin->src[0]))
|
||||
loop_if->divergent_discard = true;
|
||||
loop_if->prev_terminate = instr->index;
|
||||
break;
|
||||
case nir_intrinsic_ddx:
|
||||
case nir_intrinsic_ddy:
|
||||
|
|
@ -459,10 +524,12 @@ static bool move_coords_from_divergent_cf(struct move_tex_coords_state *state,
|
|||
default:
|
||||
break;
|
||||
}
|
||||
} else if (instr->type == nir_instr_type_jump && block->divergent) {
|
||||
loop_if->prev_break_continue = instr->index;
|
||||
}
|
||||
}
|
||||
|
||||
if (top_level && !loop_if->divergent_discard)
|
||||
if (top_level && !loop_if->prev_terminate)
|
||||
state->toplevel_b.cursor = nir_after_block_before_jump(block);
|
||||
break;
|
||||
}
|
||||
|
|
@ -472,7 +539,9 @@ static bool move_coords_from_divergent_cf(struct move_tex_coords_state *state,
|
|||
struct loop_if_state inner_else = *loop_if;
|
||||
progress |= move_coords_from_divergent_cf(state, &inner_then, &nif->then_list);
|
||||
progress |= move_coords_from_divergent_cf(state, &inner_else, &nif->else_list);
|
||||
loop_if->divergent_discard |= inner_then.divergent_discard || inner_else.divergent_discard;
|
||||
loop_if->prev_terminate = MAX2(inner_then.prev_terminate, inner_else.prev_terminate);
|
||||
loop_if->prev_break_continue =
|
||||
MAX2(inner_then.prev_break_continue, inner_else.prev_break_continue);
|
||||
break;
|
||||
}
|
||||
case nir_cf_node_loop: {
|
||||
|
|
@ -481,7 +550,7 @@ static bool move_coords_from_divergent_cf(struct move_tex_coords_state *state,
|
|||
struct loop_if_state inner = *loop_if;
|
||||
inner.inside_loop = true;
|
||||
progress |= move_coords_from_divergent_cf(state, &inner, &loop->body);
|
||||
loop_if->divergent_discard |= inner.divergent_discard;
|
||||
loop_if->prev_terminate = inner.prev_terminate;
|
||||
break;
|
||||
}
|
||||
case nir_cf_node_function:
|
||||
|
|
@ -496,9 +565,10 @@ bool
|
|||
ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options)
|
||||
{
|
||||
bool progress = false;
|
||||
if (options->fix_derivs_in_divergent_cf) {
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_metadata_require(impl, nir_metadata_divergence);
|
||||
nir_metadata_require(
|
||||
impl, nir_metadata_divergence | nir_metadata_dominance | nir_metadata_instr_index);
|
||||
|
||||
struct move_tex_coords_state state;
|
||||
state.toplevel_b = nir_builder_create(impl);
|
||||
|
|
@ -507,7 +577,8 @@ ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options)
|
|||
|
||||
struct loop_if_state loop_if;
|
||||
loop_if.inside_loop = false;
|
||||
loop_if.divergent_discard = false;
|
||||
loop_if.prev_terminate = 0;
|
||||
loop_if.prev_break_continue = 0;
|
||||
bool impl_progress = move_coords_from_divergent_cf(&state, &loop_if, &impl->body);
|
||||
progress |= nir_progress(impl_progress, impl, nir_metadata_control_flow);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue