nir/lower_tex: optimize txd(coord, ddx/ddy(coord))

fossil-db (gfx1201):
Totals from 73 (0.09% of 79839) affected shaders:
MaxWaves: 1668 -> 1670 (+0.12%)
Instrs: 352537 -> 347991 (-1.29%); split: -1.29%, +0.00%
CodeSize: 1924140 -> 1887660 (-1.90%); split: -1.90%, +0.00%
VGPRs: 6360 -> 6324 (-0.57%)
Latency: 3891330 -> 3888192 (-0.08%); split: -0.10%, +0.02%
InvThroughput: 789998 -> 783583 (-0.81%); split: -0.84%, +0.03%
VClause: 6409 -> 6408 (-0.02%); split: -0.06%, +0.05%
SClause: 4071 -> 4102 (+0.76%); split: -0.10%, +0.86%
Copies: 16756 -> 16316 (-2.63%); split: -2.94%, +0.32%
PreVGPRs: 5456 -> 5432 (-0.44%); split: -0.57%, +0.13%
VALU: 232982 -> 228117 (-2.09%)
SALU: 32853 -> 32848 (-0.02%); split: -0.05%, +0.03%
VMEM: 9234 -> 9237 (+0.03%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37561>
This commit is contained in:
Rhys Perry 2025-09-24 16:00:42 +01:00 committed by Marge Bot
parent 8e7ea4a882
commit 92beca9aa5
3 changed files with 113 additions and 3 deletions

View file

@ -5843,12 +5843,17 @@ typedef struct nir_lower_tex_options {
*/
bool lower_index_to_offset;
/* Optimize txd(coord, ddxy_coarse(coord)) to tex(coord). */
bool optimize_txd;
/**
* Payload data to be sent to callback / filter functions.
*/
void *callback_data;
} nir_lower_tex_options;
unsigned nir_tex_parse_txd_coords(nir_shader *shader, nir_tex_instr *tex, nir_instr **ddxy_instrs);
/** Lowers complex texture instructions to simpler ones */
bool nir_lower_tex(nir_shader *shader,
const nir_lower_tex_options *options);

View file

@ -42,6 +42,7 @@
#include "nir_builder.h"
#include "nir_builtin_builder.h"
#include "nir_format_convert.h"
#include "nir_loop_analyze.h"
typedef struct nir_const_value_3_4 {
nir_const_value v[3][4];
@ -1530,16 +1531,105 @@ lower_index_to_offset(nir_builder *b, nir_tex_instr *tex)
return progress;
}
unsigned
nir_tex_parse_txd_coords(nir_shader *shader, nir_tex_instr *tex, nir_instr **ddxy_instrs)
{
if (tex->op != nir_texop_txd)
return 0;
/* Non-uniform texture samples with implicit LOD might require that the resource is quad-uniform. */
if (tex->texture_non_uniform || tex->sampler_non_uniform)
return 0;
nir_def *coord = nir_get_tex_src(tex, nir_tex_src_coord);
nir_def *ddxy[] = { nir_get_tex_src(tex, nir_tex_src_ddx), nir_get_tex_src(tex, nir_tex_src_ddy) };
assert(coord && ddxy[0] && ddxy[0]);
for (unsigned i = 0; i < ddxy[0]->num_components; i++) {
nir_scalar coord_comp = nir_scalar_resolved(coord, i);
for (unsigned j = 0; j < 2; j++) {
nir_scalar ddxy_comp = nir_scalar_resolved(ddxy[j], i);
if (!nir_scalar_is_intrinsic(ddxy_comp))
return 0;
nir_intrinsic_op op = nir_scalar_intrinsic_op(ddxy_comp);
bool coarse_default = shader->options->coarse_ddx;
if (j == 0 && (op != nir_intrinsic_ddx || !coarse_default) &&
op != nir_intrinsic_ddx_coarse)
return 0;
if (j == 1 && (op != nir_intrinsic_ddy || !coarse_default) &&
op != nir_intrinsic_ddy_coarse)
return 0;
ddxy_instrs[i * 2 + j] = ddxy_comp.def->parent_instr;
nir_def *def = nir_def_as_intrinsic(ddxy_comp.def)->src[0].ssa;
ddxy_comp = nir_scalar_resolved(def, ddxy_comp.comp);
if (!nir_scalar_equal(coord_comp, ddxy_comp))
return 0;
}
}
return ddxy[0]->num_components;
}
static bool
optimize_txd(nir_shader *shader, nir_tex_instr *tex, unsigned prev_terminate_return)
{
nir_instr *ddxy_instrs[NIR_MAX_VEC_COMPONENTS * 2];
unsigned size = nir_tex_parse_txd_coords(shader, tex, ddxy_instrs);
if (!size)
return false;
for (unsigned i = 0; i < size; i++) {
nir_instr *instr = ddxy_instrs[i];
if (instr->block->cf_node.parent != tex->instr.block->cf_node.parent)
return false;
if (prev_terminate_return > instr->index)
return false;
nir_cf_node *cur = &tex->instr.block->cf_node;
while (cur != &instr->block->cf_node) {
cur = nir_cf_node_prev(cur);
if (contains_other_jump(cur, NULL))
return false;
}
}
tex->op = nir_texop_tex;
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
return true;
}
static bool
nir_lower_tex_block(nir_block *block, nir_builder *b,
const nir_lower_tex_options *options,
const struct nir_shader_compiler_options *compiler_options)
const struct nir_shader_compiler_options *compiler_options,
unsigned *prev_terminate_return)
{
bool progress = false;
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_tex)
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_terminate:
case nir_intrinsic_terminate_if:
*prev_terminate_return = instr->index;
break;
default:
break;
}
continue;
} else if (instr->type == nir_instr_type_jump) {
if (nir_instr_as_jump(instr)->type == nir_jump_halt ||
nir_instr_as_jump(instr)->type == nir_jump_return)
*prev_terminate_return = instr->index;
continue;
} else if (instr->type != nir_instr_type_tex) {
continue;
}
nir_tex_instr *tex = nir_instr_as_tex(instr);
bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
@ -1717,6 +1807,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
progress = true;
}
/* saturate_src() replaces tex with txd, so skip if sat_mask!=0. */
if (options->optimize_txd && tex->op == nir_texop_txd && !sat_mask &&
nir_shader_supports_implicit_lod(b->shader)) {
progress |= optimize_txd(b->shader, tex, *prev_terminate_return);
}
if (tex->op == nir_texop_txd &&
(options->lower_txd ||
(options->lower_txd_clamp && has_min_lod) ||
@ -1810,8 +1906,11 @@ nir_lower_tex_impl(nir_function_impl *impl,
bool progress = false;
nir_builder builder = nir_builder_create(impl);
nir_metadata_require(impl, nir_metadata_instr_index);
unsigned prev_terminate_return = 0;
nir_foreach_block(block, impl) {
progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
progress |= nir_lower_tex_block(block, &builder, options, compiler_options, &prev_terminate_return);
}
nir_progress(true, impl, nir_metadata_control_flow);

View file

@ -787,6 +787,12 @@ typedef struct nir_shader_compiler_options {
/** Whether derivative intrinsics must be scalarized. */
bool scalarize_ddx;
/**
* Whether unspecified derivative intrinsics are always coarse.
* If this is false, they might be either coarse or fine.
*/
bool coarse_ddx;
/**
* Assign a range of driver locations to per-view outputs, with unique
* slots for each view. If unset, per-view outputs will be treated