mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
nir/loop_unroll: unroll more aggressively if it can improve load scheduling
Significantly improves performance of a Control compute shader. Also seems to increase FPS at the very start of the game by ~5% (RX 580, 1080p, medium settings, no MSAA). fossil-db (Sienna): Totals from 81 (0.06% of 139391) affected shaders: SGPRs: 3848 -> 4362 (+13.36%); split: -0.99%, +14.35% VGPRs: 4132 -> 4648 (+12.49%) CodeSize: 275532 -> 659188 (+139.24%) MaxWaves: 986 -> 906 (-8.11%) Instrs: 54422 -> 126865 (+133.11%) Cycles: 1057240 -> 750464 (-29.02%); split: -42.61%, +13.60% VMEM: 26507 -> 61829 (+133.26%); split: +135.56%, -2.30% SMEM: 4748 -> 5895 (+24.16%); split: +31.47%, -7.31% VClause: 1933 -> 6802 (+251.89%); split: -0.72%, +252.61% SClause: 1179 -> 1810 (+53.52%); split: -3.14%, +56.66% Branches: 1174 -> 1157 (-1.45%); split: -23.94%, +22.49% PreVGPRs: 3219 -> 3387 (+5.22%); split: -0.96%, +6.18% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6538>
This commit is contained in:
parent
74748f16c9
commit
dfe429eb41
3 changed files with 83 additions and 7 deletions
|
|
@ -83,6 +83,7 @@ static const struct nir_shader_compiler_options nir_options = {
|
||||||
.has_isub = true,
|
.has_isub = true,
|
||||||
.use_scoped_barrier = true,
|
.use_scoped_barrier = true,
|
||||||
.max_unroll_iterations = 32,
|
.max_unroll_iterations = 32,
|
||||||
|
.max_unroll_iterations_aggressive = 128,
|
||||||
.use_interpolated_input_intrinsics = true,
|
.use_interpolated_input_intrinsics = true,
|
||||||
.vectorize_vec2_16bit = true,
|
.vectorize_vec2_16bit = true,
|
||||||
/* nir_lower_int64() isn't actually called for the LLVM backend, but
|
/* nir_lower_int64() isn't actually called for the LLVM backend, but
|
||||||
|
|
|
||||||
|
|
@ -3390,6 +3390,7 @@ typedef struct nir_shader_compiler_options {
|
||||||
bool support_16bit_alu;
|
bool support_16bit_alu;
|
||||||
|
|
||||||
unsigned max_unroll_iterations;
|
unsigned max_unroll_iterations;
|
||||||
|
unsigned max_unroll_iterations_aggressive;
|
||||||
|
|
||||||
/* For the non-zero value of the enum corresponds multiplier when
|
/* For the non-zero value of the enum corresponds multiplier when
|
||||||
* calling lower_uniforms_to_ubo */
|
* calling lower_uniforms_to_ubo */
|
||||||
|
|
|
||||||
|
|
@ -750,6 +750,77 @@ partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
|
||||||
_mesa_hash_table_destroy(remap_table, NULL);
|
_mesa_hash_table_destroy(remap_table, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_indirect_load(nir_instr *instr)
|
||||||
|
{
|
||||||
|
if (instr->type == nir_instr_type_intrinsic) {
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
|
||||||
|
if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
|
||||||
|
intrin->intrinsic == nir_intrinsic_load_ssbo ||
|
||||||
|
intrin->intrinsic == nir_intrinsic_load_global) &&
|
||||||
|
!nir_src_is_const(intrin->src[1])) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intrin->intrinsic == nir_intrinsic_load_deref ||
|
||||||
|
intrin->intrinsic == nir_intrinsic_store_deref) {
|
||||||
|
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||||
|
nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
|
||||||
|
if (!nir_deref_mode_may_be(deref, mem_modes))
|
||||||
|
return false;
|
||||||
|
while (deref) {
|
||||||
|
if ((deref->deref_type == nir_deref_type_array ||
|
||||||
|
deref->deref_type == nir_deref_type_ptr_as_array) &&
|
||||||
|
!nir_src_is_const(deref->arr.index)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
deref = nir_deref_instr_parent(deref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (instr->type == nir_instr_type_tex) {
|
||||||
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||||
|
if (!nir_src_is_const(tex->src[i].src))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
can_pipeline_loads(nir_loop *loop)
|
||||||
|
{
|
||||||
|
if (!loop->info->exact_trip_count_known)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
bool interesting_loads = false;
|
||||||
|
|
||||||
|
foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
|
||||||
|
if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Control flow usually prevents useful scheduling */
|
||||||
|
if (cf_node->type != nir_cf_node_block)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (interesting_loads)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_block *block = nir_cf_node_as_block(cf_node);
|
||||||
|
nir_foreach_instr(instr, block) {
|
||||||
|
if (is_indirect_load(instr)) {
|
||||||
|
interesting_loads = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return interesting_loads;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if we should unroll the loop, otherwise false.
|
* Returns true if we should unroll the loop, otherwise false.
|
||||||
*/
|
*/
|
||||||
|
|
@ -764,19 +835,22 @@ check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
|
||||||
|
|
||||||
nir_loop_info *li = loop->info;
|
nir_loop_info *li = loop->info;
|
||||||
unsigned max_iter = shader->options->max_unroll_iterations;
|
unsigned max_iter = shader->options->max_unroll_iterations;
|
||||||
|
/* Unroll much more aggressively if it can hide load latency. */
|
||||||
|
if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
|
||||||
|
max_iter = shader->options->max_unroll_iterations_aggressive;
|
||||||
unsigned trip_count =
|
unsigned trip_count =
|
||||||
li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
|
li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
|
||||||
|
|
||||||
if (trip_count > max_iter)
|
if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
|
||||||
return false;
|
|
||||||
|
|
||||||
if (li->force_unroll && !li->guessed_trip_count)
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
bool loop_not_too_large =
|
unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
|
||||||
li->instr_cost * trip_count <= max_iter * LOOP_UNROLL_LIMIT;
|
unsigned cost = li->instr_cost * trip_count;
|
||||||
|
|
||||||
return loop_not_too_large;
|
if (cost <= cost_limit && trip_count <= max_iter)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue