mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-15 09:58:05 +02:00
aco: don't use v_mad_mix on GFX9 if 16-bit denormals must be preserved
This probably effectively disables the v_mad_mix optimization on GFX9. fossil-db (Vega): Totals from 11545 (7.15% of 161366) affected shaders: MaxWaves: 43025 -> 42780 (-0.57%); split: +0.06%, -0.63% Instrs: 18571635 -> 18734201 (+0.88%); split: -0.00%, +0.88% CodeSize: 96483568 -> 96611012 (+0.13%); split: -0.11%, +0.24% SGPRs: 1079056 -> 1077616 (-0.13%); split: -0.14%, +0.01% VGPRs: 819248 -> 821868 (+0.32%); split: -0.04%, +0.36% SpillSGPRs: 13313 -> 12464 (-6.38%) Latency: 293804093 -> 295046122 (+0.42%); split: -0.09%, +0.51% InvThroughput: 110002239 -> 110994978 (+0.90%); split: -0.03%, +0.93% VClause: 342458 -> 342596 (+0.04%); split: -0.12%, +0.16% SClause: 648566 -> 648046 (-0.08%); split: -0.12%, +0.04% Copies: 1728225 -> 1726679 (-0.09%); split: -0.66%, +0.57% Branches: 552973 -> 552963 (-0.00%); split: -0.02%, +0.02% PreSGPRs: 862360 -> 856820 (-0.64%); split: -0.69%, +0.05% PreVGPRs: 773689 -> 776818 (+0.40%); split: -0.02%, +0.42% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6178 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15718>
This commit is contained in:
parent
03cf788891
commit
5b4e41e4db
2 changed files with 24 additions and 0 deletions
|
|
@ -3473,6 +3473,10 @@ can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (ctx.program->chip_class < GFX9)
|
||||
return false;
|
||||
|
||||
/* v_mad_mix* on GFX9 always flushes denormals for 16-bit inputs/outputs */
|
||||
if (ctx.program->chip_class == GFX9 && ctx.fp_mode.denorm16_64)
|
||||
return false;
|
||||
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::v_add_f32:
|
||||
case aco_opcode::v_sub_f32:
|
||||
|
|
|
|||
|
|
@ -1164,6 +1164,8 @@ BEGIN_TEST(optimize.mad_mix.input_conv.basic)
|
|||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp a16 = inputs[1];
|
||||
|
||||
|
|
@ -1197,6 +1199,8 @@ BEGIN_TEST(optimize.mad_mix.input_conv.precision)
|
|||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp a16 = inputs[1];
|
||||
|
||||
|
|
@ -1248,6 +1252,8 @@ BEGIN_TEST(optimize.mad_mix.input_conv.modifiers)
|
|||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp a16 = inputs[1];
|
||||
|
||||
|
|
@ -1341,6 +1347,8 @@ BEGIN_TEST(optimize.mad_mix.output_conv.basic)
|
|||
if (!setup_cs("v1 v1 v1 v2b v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp b = inputs[1];
|
||||
Temp c = inputs[2];
|
||||
|
|
@ -1381,6 +1389,8 @@ BEGIN_TEST(optimize.mad_mix.output_conv.precision)
|
|||
if (!setup_cs("v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a16 = inputs[0];
|
||||
|
||||
//! v2b: %res0_tmp = v_mul_f16 %a16, %a16
|
||||
|
|
@ -1403,6 +1413,8 @@ BEGIN_TEST(optimize.mad_mix.output_conv.modifiers)
|
|||
if (!setup_cs("v1 v1 v2b v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp b = inputs[1];
|
||||
Temp a16 = inputs[2];
|
||||
|
|
@ -1450,6 +1462,8 @@ BEGIN_TEST(optimize.mad_mix.fma.basic)
|
|||
if (!setup_cs("v1 v1 v1 v2b v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp b = inputs[1];
|
||||
Temp c = inputs[2];
|
||||
|
|
@ -1502,6 +1516,8 @@ BEGIN_TEST(optimize.mad_mix.fma.precision)
|
|||
if (!setup_cs("v1 v1 v1 v2b v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp b = inputs[1];
|
||||
Temp c = inputs[2];
|
||||
|
|
@ -1562,6 +1578,8 @@ BEGIN_TEST(optimize.mad_mix.clamp)
|
|||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp a16 = inputs[1];
|
||||
|
||||
|
|
@ -1587,6 +1605,8 @@ BEGIN_TEST(optimize.mad_mix.cast)
|
|||
if (!setup_cs("v1 v2b", (chip_class)i))
|
||||
continue;
|
||||
|
||||
program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp a16 = inputs[1];
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue