mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
radeonsi: enable uniform propagation for varyings except VP/Energy
It regresses performance of VP/Energy. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
This commit is contained in:
parent
db5cf460d7
commit
abf0de57ab
3 changed files with 139 additions and 0 deletions
|
|
@ -1339,6 +1339,137 @@ static int si_get_screen_fd(struct pipe_screen *screen)
|
|||
return ws->get_fd(ws);
|
||||
}
|
||||
|
||||
static unsigned si_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
|
||||
{
|
||||
unsigned num_profiles = si_get_num_shader_profiles();
|
||||
|
||||
for (unsigned i = 0; i < num_profiles; i++) {
|
||||
if (_mesa_printed_sha1_equal(consumer->info.source_sha1, si_shader_profiles[i].sha1)) {
|
||||
if (si_shader_profiles[i].options & SI_PROFILE_NO_OPT_UNIFORM_VARYINGS)
|
||||
return 0; /* only propagate constants */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (consumer->info.stage) {
|
||||
case MESA_SHADER_TESS_CTRL: /* VS->TCS */
|
||||
/* Non-amplifying shaders can always have their variyng expressions
|
||||
* moved into later shaders.
|
||||
*/
|
||||
return UINT_MAX;
|
||||
|
||||
case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
|
||||
return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
|
||||
consumer->info.gs.vertices_in == 2 ? 20 : 14;
|
||||
|
||||
case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
/* Up to 3 uniforms and 5 ALUs. */
|
||||
return 14;
|
||||
|
||||
default:
|
||||
unreachable("unexpected shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned si_varying_estimate_instr_cost(nir_instr *instr)
|
||||
{
|
||||
unsigned dst_bit_size, src_bit_size, num_dst_dwords;
|
||||
nir_op alu_op;
|
||||
|
||||
/* This is a very loose approximation based on gfx10. */
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu:
|
||||
dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
|
||||
src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
|
||||
alu_op = nir_instr_as_alu(instr)->op;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (alu_op) {
|
||||
case nir_op_mov:
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
return 0;
|
||||
|
||||
case nir_op_imul:
|
||||
case nir_op_umul_low:
|
||||
return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
|
||||
|
||||
case nir_op_imul_high:
|
||||
case nir_op_umul_high:
|
||||
case nir_op_imul_2x32_64:
|
||||
case nir_op_umul_2x32_64:
|
||||
return 4;
|
||||
|
||||
case nir_op_fexp2:
|
||||
case nir_op_flog2:
|
||||
case nir_op_frcp:
|
||||
case nir_op_frsq:
|
||||
case nir_op_fsqrt:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd:
|
||||
return 4; /* FP16 & FP32. */
|
||||
|
||||
case nir_op_fpow:
|
||||
return 4 + 1 + 4; /* log2 + mul + exp2 */
|
||||
|
||||
case nir_op_fsign:
|
||||
return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
|
||||
|
||||
case nir_op_idiv:
|
||||
case nir_op_udiv:
|
||||
case nir_op_imod:
|
||||
case nir_op_umod:
|
||||
case nir_op_irem:
|
||||
return dst_bit_size == 64 ? 80 : 40;
|
||||
|
||||
case nir_op_fdiv:
|
||||
return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
|
||||
|
||||
case nir_op_fmod:
|
||||
case nir_op_frem:
|
||||
return dst_bit_size == 64 ? 80 : 8;
|
||||
|
||||
default:
|
||||
/* Double opcodes. Comparisons have always full performance. */
|
||||
if ((dst_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].output_type & nir_type_float) ||
|
||||
(dst_bit_size >= 8 && src_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].input_types[0] & nir_type_float))
|
||||
return 16;
|
||||
|
||||
return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
|
||||
}
|
||||
|
||||
case nir_instr_type_intrinsic:
|
||||
dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
/* Uniform or UBO load.
|
||||
* Set a low cost to balance the number of scalar loads and ALUs.
|
||||
*/
|
||||
return 3 * num_dst_dwords;
|
||||
|
||||
default:
|
||||
unreachable("unexpected intrinsic");
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("unexpected instr type");
|
||||
}
|
||||
}
|
||||
|
||||
void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||
{
|
||||
sscreen->b.get_name = si_get_name;
|
||||
|
|
@ -1439,4 +1570,6 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
options->support_indirect_inputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |
|
||||
BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
|
||||
options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
|
||||
options->varying_expression_max_cost = si_varying_expression_max_cost;
|
||||
options->varying_estimate_instr_cost = si_varying_estimate_instr_cost;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -326,6 +326,7 @@ unsigned si_get_num_shader_profiles(void);
|
|||
#define SI_PROFILE_VS_NO_BINNING (1 << 3)
|
||||
#define SI_PROFILE_GFX9_GFX10_PS_NO_BINNING (1 << 4)
|
||||
#define SI_PROFILE_CLAMP_DIV_BY_ZERO (1 << 5)
|
||||
#define SI_PROFILE_NO_OPT_UNIFORM_VARYINGS (1 << 6)
|
||||
|
||||
enum si_shader_dump_type {
|
||||
SI_DUMP_SHADER_KEY,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,11 @@ struct si_shader_profile si_shader_profiles[] =
|
|||
{0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451},
|
||||
SI_PROFILE_VS_NO_BINNING,
|
||||
},
|
||||
{
|
||||
/* Viewperf/Energy */
|
||||
{0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0},
|
||||
SI_PROFILE_NO_OPT_UNIFORM_VARYINGS, /* Uniform propagation regresses performance. */
|
||||
},
|
||||
{
|
||||
/* Viewperf/Medical */
|
||||
{0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue