mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
radeonsi: double pixel throughput in certain cases of PS without inputs
When no barycentric VGPRs are needed, we always enabled one of the pairs (e.g. PERSP_SAMPLE_ENA) because it's a HW requirement. However, the requirement says that LINE_STIPPLE_TEX_ENA can be enabled instead, which occupies only 1 VGPR. To get maximum pixel throughput, we can only have 2 initialized VGPRs at most. By reducing initialized VGPRs from 2 (with PERSP_SAMPLE_ENA) to 1 (with LINE_STIPPLE_TEX_ENA), we can have 1 additional initialized VGPR for free with maximum pixel throughput, such as POS_FIXED_PT for frag_coord.xy without MSAA. Only ACO gets this perf improvement because the change would be more complicated with LLVM. Reviewed-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38915>
This commit is contained in:
parent
6592a18cd7
commit
5acabdd1f8
3 changed files with 13 additions and 4 deletions
|
|
@ -671,7 +671,7 @@ void si_get_ps_prolog_args(struct si_shader_args *args,
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_sample);
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_sample);
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_center);
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_center);
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid);
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid);
|
||||||
/* skip LINE_STIPPLE_TEX */
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, NULL); /* LINE_STIPPLE_TEX */
|
||||||
|
|
||||||
/* POS_X|Y|Z|W_FLOAT */
|
/* POS_X|Y|Z|W_FLOAT */
|
||||||
u_foreach_bit(i, key->ps_prolog.fragcoord_usage_mask)
|
u_foreach_bit(i, key->ps_prolog.fragcoord_usage_mask)
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@
|
||||||
S_0286D0_LINEAR_SAMPLE_ENA(1) | \
|
S_0286D0_LINEAR_SAMPLE_ENA(1) | \
|
||||||
S_0286D0_LINEAR_CENTER_ENA(1) | \
|
S_0286D0_LINEAR_CENTER_ENA(1) | \
|
||||||
S_0286D0_LINEAR_CENTROID_ENA(1) | \
|
S_0286D0_LINEAR_CENTROID_ENA(1) | \
|
||||||
|
S_0286D0_LINE_STIPPLE_TEX_ENA(1) | \
|
||||||
S_0286D0_FRONT_FACE_ENA(1) | \
|
S_0286D0_FRONT_FACE_ENA(1) | \
|
||||||
S_0286D0_ANCILLARY_ENA(1) | \
|
S_0286D0_ANCILLARY_ENA(1) | \
|
||||||
S_0286D0_SAMPLE_COVERAGE_ENA(1) | \
|
S_0286D0_SAMPLE_COVERAGE_ENA(1) | \
|
||||||
|
|
|
||||||
|
|
@ -423,7 +423,15 @@ void si_fixup_spi_ps_input_config(struct si_shader *shader)
|
||||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* At least one pair of interpolation weights must be enabled. */
|
/* At least one pair of barycentric coordinates or LINE_STIPPLE_TEX_ENA must be enabled.
|
||||||
if (!(shader->config.spi_ps_input_ena & 0x7f))
|
* Since LINE_STIPPLE_TEX_ENA is the only one that loads only 1 VGPR, use it.
|
||||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
*/
|
||||||
|
if (!(shader->config.spi_ps_input_ena & 0x7f) &&
|
||||||
|
!G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_ena)) {
|
||||||
|
/* LLVM sets PERSP_SAMPLE_ENA in this case, so we have to do the same. */
|
||||||
|
if (shader->selector->info.base.use_aco_amd)
|
||||||
|
shader->config.spi_ps_input_ena |= S_0286CC_LINE_STIPPLE_TEX_ENA(1);
|
||||||
|
else
|
||||||
|
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue