radv: always run nir_opt_16bit_tex_image

The pass can optimize pack_half and constants sources even when
no 16bit instructions exist.

Foz-DB Navi21:
Totals from 3042 (3.83% of 79395) affected shaders:
MaxWaves: 69039 -> 69031 (-0.01%); split: +0.01%, -0.02%
Instrs: 2292054 -> 2291874 (-0.01%); split: -0.03%, +0.02%
CodeSize: 12567868 -> 12544888 (-0.18%); split: -0.23%, +0.05%
VGPRs: 145384 -> 145352 (-0.02%); split: -0.06%, +0.04%
SpillSGPRs: 451 -> 452 (+0.22%)
Latency: 23546543 -> 23536416 (-0.04%); split: -0.07%, +0.03%
InvThroughput: 5180446 -> 5164437 (-0.31%); split: -0.35%, +0.04%
VClause: 50537 -> 50535 (-0.00%); split: -0.05%, +0.04%
SClause: 84726 -> 84750 (+0.03%); split: -0.04%, +0.06%
Copies: 140384 -> 140421 (+0.03%); split: -0.34%, +0.37%
Branches: 40412 -> 40413 (+0.00%)
PreVGPRs: 120213 -> 120262 (+0.04%); split: -0.03%, +0.07%
VALU: 1607545 -> 1607593 (+0.00%); split: -0.03%, +0.03%
SALU: 215846 -> 215837 (-0.00%); split: -0.03%, +0.02%

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28730>
This commit is contained in:
Georg Lehmann 2024-04-13 16:27:55 +02:00 committed by Marge Bot
parent 3a35522c8a
commit e63afdc681
2 changed files with 11 additions and 10 deletions

View file

@ -132,18 +132,18 @@ BEGIN_TEST(isel.sparse.clause)
};
void main() {
//>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_, %_ dmask:xyzw 2d tfe
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_ dmask:xyzw 2d tfe a16
//>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_, %_ dmask:xyzw 2d tfe
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_ dmask:xyzw 2d tfe a16
//>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_, %_ dmask:xyzw 2d tfe
//>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_ dmask:xyzw 2d tfe a16
//>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0
//>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_, (kill)%_ dmask:xyzw 2d tfe
//>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_ dmask:xyzw 2d tfe a16
//>> s_clause 0x3
//! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
//! image_sample_lz_o v[#_:#_], v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe
//! image_sample_lz_o v[#_:#_], [v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe
//! image_sample_lz_o v[#_:#_], [v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe
//! image_sample_lz_o v[#_:#_], [v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe
code[0] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(1, 0), res[0]);
code[1] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(2, 0), res[1]);
code[2] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(3, 0), res[2]);

View file

@ -678,7 +678,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
if (gfx_level >= GFX8)
NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
}
if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) && gfx_level >= GFX9) {
if (gfx_level >= GFX9) {
bool separate_g16 = gfx_level >= GFX10;
struct nir_opt_tex_srcs_options opt_srcs_options[] = {
{
@ -703,7 +703,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
};
NIR_PASS(_, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options);
if (!stage->key.optimisations_disabled) {
if (!stage->key.optimisations_disabled &&
((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16)) {
NIR_PASS(_, stage->nir, nir_opt_vectorize, opt_vectorize_callback, device);
}
}