From e63afdc6815e4461c75faa217062d2de595c86da Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 13 Apr 2024 16:27:55 +0200 Subject: [PATCH] radv: always run nir_opt_16bit_tex_image The pass can optimize pack_half and constants sources even when no 16bit instructions exist. Foz-DB Navi21: Totals from 3042 (3.83% of 79395) affected shaders: MaxWaves: 69039 -> 69031 (-0.01%); split: +0.01%, -0.02% Instrs: 2292054 -> 2291874 (-0.01%); split: -0.03%, +0.02% CodeSize: 12567868 -> 12544888 (-0.18%); split: -0.23%, +0.05% VGPRs: 145384 -> 145352 (-0.02%); split: -0.06%, +0.04% SpillSGPRs: 451 -> 452 (+0.22%) Latency: 23546543 -> 23536416 (-0.04%); split: -0.07%, +0.03% InvThroughput: 5180446 -> 5164437 (-0.31%); split: -0.35%, +0.04% VClause: 50537 -> 50535 (-0.00%); split: -0.05%, +0.04% SClause: 84726 -> 84750 (+0.03%); split: -0.04%, +0.06% Copies: 140384 -> 140421 (+0.03%); split: -0.34%, +0.37% Branches: 40412 -> 40413 (+0.00%) PreVGPRs: 120213 -> 120262 (+0.04%); split: -0.03%, +0.07% VALU: 1607545 -> 1607593 (+0.00%); split: -0.03%, +0.03% SALU: 215846 -> 215837 (-0.00%); split: -0.03%, +0.02% Reviewed-by: Alyssa Rosenzweig Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/tests/test_isel.cpp | 16 ++++++++-------- src/amd/vulkan/radv_pipeline.c | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 8917f63b734..5e454eb9617 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -132,18 +132,18 @@ BEGIN_TEST(isel.sparse.clause) }; void main() { //>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_, %_ dmask:xyzw 2d tfe + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero0, (kill)%_, %_ dmask:xyzw 2d tfe a16 //>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_, %_ dmask:xyzw 2d tfe + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero1, (kill)%_, %_ dmask:xyzw 2d tfe a16 //>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_, %_ dmask:xyzw 2d tfe + //>> v5: %_ = image_sample_lz_o %_, %_, (kill)%zero2, (kill)%_, %_ dmask:xyzw 2d tfe a16 //>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0 - //>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_, (kill)%_ dmask:xyzw 2d tfe + //>> v5: %_ = image_sample_lz_o (kill)%_, (kill)%_, (kill)%zero3, (kill)%_, (kill)%_ dmask:xyzw 2d tfe a16 //>> s_clause 0x3 - //! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe - //! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe - //! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe - //! image_sample_lz_o v[#_:#_], [v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe + //! image_sample_lz_o v[#_:#_], v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe + //! image_sample_lz_o v[#_:#_], [v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe + //! image_sample_lz_o v[#_:#_], [v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe + //! image_sample_lz_o v[#_:#_], [v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D a16 tfe code[0] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(1, 0), res[0]); code[1] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(2, 0), res[1]); code[2] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(3, 0), res[2]); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 25eae2db519..951508026a2 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -678,7 +678,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat if (gfx_level >= GFX8) NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */ } - if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) && gfx_level >= GFX9) { + if (gfx_level >= GFX9) { bool separate_g16 = gfx_level >= GFX10; struct nir_opt_tex_srcs_options opt_srcs_options[] = { { @@ -703,7 +703,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat }; NIR_PASS(_, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options); - if (!stage->key.optimisations_disabled) { + if (!stage->key.optimisations_disabled && + ((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16)) { NIR_PASS(_, stage->nir, nir_opt_vectorize, opt_vectorize_callback, device); } }