mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
nir/algebraic: ad-hoc constant-fold ALU instructions
Slight differences due to different optimization order. Totals from 135 (0.17% of 79839) affected shaders: (Navi48) Instrs: 287852 -> 287527 (-0.11%); split: -0.15%, +0.03% CodeSize: 1522972 -> 1521764 (-0.08%); split: -0.12%, +0.04% Latency: 1806803 -> 1825754 (+1.05%); split: -0.08%, +1.12% InvThroughput: 242693 -> 244703 (+0.83%); split: -0.02%, +0.84% VClause: 4092 -> 4084 (-0.20%) SClause: 7462 -> 7478 (+0.21%) Copies: 20509 -> 20401 (-0.53%); split: -0.74%, +0.21% Branches: 6395 -> 6386 (-0.14%) PreSGPRs: 7334 -> 7337 (+0.04%); split: -0.03%, +0.07% PreVGPRs: 6375 -> 6382 (+0.11%) VALU: 151787 -> 151595 (-0.13%); split: -0.15%, +0.02% SALU: 52967 -> 52910 (-0.11%); split: -0.23%, +0.12% VMEM: 6704 -> 6696 (-0.12%) SMEM: 12099 -> 12129 (+0.25%) Tested on a small collection of 2518 shaders from Dredge with callgrind using RADV: baseline: nir_opt_algebraic was called 12917 times from radv_optimize_nir() nir_opt_cse was called 15204 times from radv_optimize_nir() relative time spent in radv_optimize_nir(): 31.48% total instruction fetch cost: 28,642,638,021 with nir/algebraic: ad-hoc constant-fold ALU instructions nir_opt_algebraic was called 12797 times from radv_optimize_nir() nir_opt_cse was called 12963 times from radv_optimize_nir() relative time spent in radv_optimize_nir(): 30.63% total instruction fetch cost: 28,284,386,123 => ~1.27% improvement in total compile times Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37195>
This commit is contained in:
parent
10be538851
commit
b3615e5d6f
2 changed files with 20 additions and 11 deletions
|
|
@ -494,15 +494,24 @@ construct_value(nir_builder *build,
|
|||
state, instr);
|
||||
}
|
||||
|
||||
nir_builder_instr_insert(build, &alu->instr);
|
||||
|
||||
assert(alu->def.index ==
|
||||
/* Immediately try to constant-fold the expression, in order to allow
|
||||
* for more expressions to be matched within a single pass.
|
||||
*/
|
||||
nir_def *def = &alu->def;
|
||||
nir_def *const_expr = nir_try_constant_fold_alu(build, alu);
|
||||
if (const_expr) {
|
||||
nir_instr_free(&alu->instr);
|
||||
def = const_expr;
|
||||
} else {
|
||||
nir_builder_instr_insert(build, &alu->instr);
|
||||
}
|
||||
assert(def->index ==
|
||||
util_dynarray_num_elements(state->states, uint16_t));
|
||||
util_dynarray_append_typed(state->states, uint16_t, 0);
|
||||
nir_algebraic_automaton(&alu->instr, state->states, state->pass_op_table);
|
||||
nir_algebraic_automaton(def->parent_instr, state->states, state->pass_op_table);
|
||||
|
||||
nir_alu_src val;
|
||||
val.src = nir_src_for_ssa(&alu->def);
|
||||
val.src = nir_src_for_ssa(def);
|
||||
if (expr->swizzle < 0)
|
||||
memcpy(val.swizzle, identity_swizzle, sizeof(val.swizzle));
|
||||
else
|
||||
|
|
|
|||
|
|
@ -36,20 +36,20 @@ traces:
|
|||
checksum: f53ac20e17da91c0359c31f2fa3f401e
|
||||
0ad/0ad-v2.trace:
|
||||
gl-intel-apl:
|
||||
checksum: b25b9dfce05def046f6277d05b3cba98
|
||||
checksum: 6c9f66db38f03847574c543de48fa203
|
||||
gl-intel-glk:
|
||||
checksum: b25b9dfce05def046f6277d05b3cba98
|
||||
checksum: 6c9f66db38f03847574c543de48fa203
|
||||
gl-intel-amly:
|
||||
checksum: b25b9dfce05def046f6277d05b3cba98
|
||||
checksum: 6c9f66db38f03847574c543de48fa203
|
||||
gl-intel-kbl:
|
||||
checksum: b25b9dfce05def046f6277d05b3cba98
|
||||
checksum: 6c9f66db38f03847574c543de48fa203
|
||||
gl-intel-whl:
|
||||
checksum: e67b7a93bac02e41de0326419ee17a3e
|
||||
gl-intel-cml:
|
||||
checksum: b25b9dfce05def046f6277d05b3cba98
|
||||
checksum: 6c9f66db38f03847574c543de48fa203
|
||||
gl-intel-adl:
|
||||
label: [no-perf]
|
||||
checksum: b25b9dfce05def046f6277d05b3cba98
|
||||
checksum: 6c9f66db38f03847574c543de48fa203
|
||||
pathfinder/demo-v2.trace:
|
||||
gl-intel-apl:
|
||||
checksum: d9b33f0a2efe17c21b7933242afd9ec7
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue