mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-22 07:30:37 +02:00
nir/lower_tex: optimize txd(coord, ddx/ddy(coord))
fossil-db (gfx1201): Totals from 73 (0.09% of 79839) affected shaders: MaxWaves: 1668 -> 1670 (+0.12%) Instrs: 352537 -> 347991 (-1.29%); split: -1.29%, +0.00% CodeSize: 1924140 -> 1887660 (-1.90%); split: -1.90%, +0.00% VGPRs: 6360 -> 6324 (-0.57%) Latency: 3891330 -> 3888192 (-0.08%); split: -0.10%, +0.02% InvThroughput: 789998 -> 783583 (-0.81%); split: -0.84%, +0.03% VClause: 6409 -> 6408 (-0.02%); split: -0.06%, +0.05% SClause: 4071 -> 4102 (+0.76%); split: -0.10%, +0.86% Copies: 16756 -> 16316 (-2.63%); split: -2.94%, +0.32% PreVGPRs: 5456 -> 5432 (-0.44%); split: -0.57%, +0.13% VALU: 232982 -> 228117 (-2.09%) SALU: 32853 -> 32848 (-0.02%); split: -0.05%, +0.03% VMEM: 9234 -> 9237 (+0.03%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37561>
This commit is contained in:
parent
8e7ea4a882
commit
92beca9aa5
3 changed files with 113 additions and 3 deletions
|
|
@ -5843,12 +5843,17 @@ typedef struct nir_lower_tex_options {
|
|||
*/
|
||||
bool lower_index_to_offset;
|
||||
|
||||
/* Optimize txd(coord, ddxy_coarse(coord)) to tex(coord). */
|
||||
bool optimize_txd;
|
||||
|
||||
/**
|
||||
* Payload data to be sent to callback / filter functions.
|
||||
*/
|
||||
void *callback_data;
|
||||
} nir_lower_tex_options;
|
||||
|
||||
unsigned nir_tex_parse_txd_coords(nir_shader *shader, nir_tex_instr *tex, nir_instr **ddxy_instrs);
|
||||
|
||||
/** Lowers complex texture instructions to simpler ones */
|
||||
bool nir_lower_tex(nir_shader *shader,
|
||||
const nir_lower_tex_options *options);
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@
|
|||
#include "nir_builder.h"
|
||||
#include "nir_builtin_builder.h"
|
||||
#include "nir_format_convert.h"
|
||||
#include "nir_loop_analyze.h"
|
||||
|
||||
typedef struct nir_const_value_3_4 {
|
||||
nir_const_value v[3][4];
|
||||
|
|
@ -1530,16 +1531,105 @@ lower_index_to_offset(nir_builder *b, nir_tex_instr *tex)
|
|||
return progress;
|
||||
}
|
||||
|
||||
unsigned
|
||||
nir_tex_parse_txd_coords(nir_shader *shader, nir_tex_instr *tex, nir_instr **ddxy_instrs)
|
||||
{
|
||||
if (tex->op != nir_texop_txd)
|
||||
return 0;
|
||||
|
||||
/* Non-uniform texture samples with implicit LOD might require that the resource is quad-uniform. */
|
||||
if (tex->texture_non_uniform || tex->sampler_non_uniform)
|
||||
return 0;
|
||||
|
||||
nir_def *coord = nir_get_tex_src(tex, nir_tex_src_coord);
|
||||
nir_def *ddxy[] = { nir_get_tex_src(tex, nir_tex_src_ddx), nir_get_tex_src(tex, nir_tex_src_ddy) };
|
||||
assert(coord && ddxy[0] && ddxy[0]);
|
||||
for (unsigned i = 0; i < ddxy[0]->num_components; i++) {
|
||||
nir_scalar coord_comp = nir_scalar_resolved(coord, i);
|
||||
for (unsigned j = 0; j < 2; j++) {
|
||||
nir_scalar ddxy_comp = nir_scalar_resolved(ddxy[j], i);
|
||||
if (!nir_scalar_is_intrinsic(ddxy_comp))
|
||||
return 0;
|
||||
|
||||
nir_intrinsic_op op = nir_scalar_intrinsic_op(ddxy_comp);
|
||||
bool coarse_default = shader->options->coarse_ddx;
|
||||
if (j == 0 && (op != nir_intrinsic_ddx || !coarse_default) &&
|
||||
op != nir_intrinsic_ddx_coarse)
|
||||
return 0;
|
||||
if (j == 1 && (op != nir_intrinsic_ddy || !coarse_default) &&
|
||||
op != nir_intrinsic_ddy_coarse)
|
||||
return 0;
|
||||
|
||||
ddxy_instrs[i * 2 + j] = ddxy_comp.def->parent_instr;
|
||||
|
||||
nir_def *def = nir_def_as_intrinsic(ddxy_comp.def)->src[0].ssa;
|
||||
ddxy_comp = nir_scalar_resolved(def, ddxy_comp.comp);
|
||||
if (!nir_scalar_equal(coord_comp, ddxy_comp))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return ddxy[0]->num_components;
|
||||
}
|
||||
|
||||
static bool
|
||||
optimize_txd(nir_shader *shader, nir_tex_instr *tex, unsigned prev_terminate_return)
|
||||
{
|
||||
nir_instr *ddxy_instrs[NIR_MAX_VEC_COMPONENTS * 2];
|
||||
unsigned size = nir_tex_parse_txd_coords(shader, tex, ddxy_instrs);
|
||||
if (!size)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
nir_instr *instr = ddxy_instrs[i];
|
||||
if (instr->block->cf_node.parent != tex->instr.block->cf_node.parent)
|
||||
return false;
|
||||
|
||||
if (prev_terminate_return > instr->index)
|
||||
return false;
|
||||
|
||||
nir_cf_node *cur = &tex->instr.block->cf_node;
|
||||
while (cur != &instr->block->cf_node) {
|
||||
cur = nir_cf_node_prev(cur);
|
||||
if (contains_other_jump(cur, NULL))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
tex->op = nir_texop_tex;
|
||||
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
|
||||
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_lower_tex_block(nir_block *block, nir_builder *b,
|
||||
const nir_lower_tex_options *options,
|
||||
const struct nir_shader_compiler_options *compiler_options)
|
||||
const struct nir_shader_compiler_options *compiler_options,
|
||||
unsigned *prev_terminate_return)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_terminate:
|
||||
case nir_intrinsic_terminate_if:
|
||||
*prev_terminate_return = instr->index;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
} else if (instr->type == nir_instr_type_jump) {
|
||||
if (nir_instr_as_jump(instr)->type == nir_jump_halt ||
|
||||
nir_instr_as_jump(instr)->type == nir_jump_return)
|
||||
*prev_terminate_return = instr->index;
|
||||
continue;
|
||||
} else if (instr->type != nir_instr_type_tex) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
|
||||
|
|
@ -1717,6 +1807,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
|
|||
progress = true;
|
||||
}
|
||||
|
||||
/* saturate_src() replaces tex with txd, so skip if sat_mask!=0. */
|
||||
if (options->optimize_txd && tex->op == nir_texop_txd && !sat_mask &&
|
||||
nir_shader_supports_implicit_lod(b->shader)) {
|
||||
progress |= optimize_txd(b->shader, tex, *prev_terminate_return);
|
||||
}
|
||||
|
||||
if (tex->op == nir_texop_txd &&
|
||||
(options->lower_txd ||
|
||||
(options->lower_txd_clamp && has_min_lod) ||
|
||||
|
|
@ -1810,8 +1906,11 @@ nir_lower_tex_impl(nir_function_impl *impl,
|
|||
bool progress = false;
|
||||
nir_builder builder = nir_builder_create(impl);
|
||||
|
||||
nir_metadata_require(impl, nir_metadata_instr_index);
|
||||
|
||||
unsigned prev_terminate_return = 0;
|
||||
nir_foreach_block(block, impl) {
|
||||
progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
|
||||
progress |= nir_lower_tex_block(block, &builder, options, compiler_options, &prev_terminate_return);
|
||||
}
|
||||
|
||||
nir_progress(true, impl, nir_metadata_control_flow);
|
||||
|
|
|
|||
|
|
@ -787,6 +787,12 @@ typedef struct nir_shader_compiler_options {
|
|||
/** Whether derivative intrinsics must be scalarized. */
|
||||
bool scalarize_ddx;
|
||||
|
||||
/**
|
||||
* Whether unspecified derivative intrinsics are always coarse.
|
||||
* If this is false, they might be either coarse or fine.
|
||||
*/
|
||||
bool coarse_ddx;
|
||||
|
||||
/**
|
||||
* Assign a range of driver locations to per-view outputs, with unique
|
||||
* slots for each view. If unset, per-view outputs will be treated
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue