From 1d50ef9ca64ccaf56f812f627bdb9e1587b3ece8 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 12 Feb 2021 10:49:03 +0000 Subject: [PATCH] aco: adjust the condition for expanding vertex fetch data format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of avoiding out-of-bounds access, avoid creating a load larger than the original attribute. This should work just as well, since the only situations expending a load helped was because we shrunk it first. Also fixes a bug where a 3 component load (4 components with the first component skipped) would be incorrectly expanded to 4 components because the stride check would never be performed. Maybe we should avoid skipping the first component in some situations, but I'm not sure if it's worth the VGPR cost. fossil-db (vega10): Totals from 583 (0.39% of 149974) affected shaders: CodeSize: 1496848 -> 1500868 (+0.27%); split: -0.03%, +0.30% Instrs: 286155 -> 286575 (+0.15%); split: -0.07%, +0.22% Latency: 2947101 -> 2946865 (-0.01%); split: -0.23%, +0.22% InvThroughput: 797396 -> 797127 (-0.03%); split: -0.08%, +0.04% fossil-db (polaris10): Totals from 583 (0.39% of 151365) affected shaders: SGPRs: 38880 -> 39216 (+0.86%) VGPRs: 24440 -> 24356 (-0.34%) CodeSize: 1506808 -> 1510876 (+0.27%); split: -0.01%, +0.28% Instrs: 288735 -> 289167 (+0.15%); split: -0.06%, +0.21% Latency: 2963263 -> 2961884 (-0.05%); split: -0.24%, +0.19% InvThroughput: 802351 -> 801665 (-0.09%); split: -0.12%, +0.04% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 112e4c3f227..8f8b5348a0d 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4609,7 +4609,7 @@ bool check_vertex_fetch_size(isel_context *ctx, const ac_data_format_info *vtx_i } uint8_t get_fetch_data_format(isel_context *ctx, const ac_data_format_info *vtx_info, - unsigned offset, unsigned stride, unsigned *channels, + unsigned offset, unsigned *channels, unsigned max_channels, unsigned binding_align) { if (!vtx_info->chan_byte_size) { @@ -4621,15 +4621,12 @@ uint8_t get_fetch_data_format(isel_context *ctx, const ac_data_format_info *vtx_ if (!check_vertex_fetch_size(ctx, vtx_info, offset, binding_align, *channels)) { unsigned new_channels = num_channels + 1; /* first, assume more loads is worse and try using a larger data format */ - while (new_channels <= 4 && + while (new_channels <= max_channels && !check_vertex_fetch_size(ctx, vtx_info, offset, binding_align, new_channels)) { new_channels++; - /* don't make the attribute potentially out-of-bounds */ - if (offset + new_channels * vtx_info->chan_byte_size > stride) - new_channels = 5; } - if (new_channels == 5) { + if (new_channels > max_channels) { /* then try decreasing load size (at the cost of more loads) */ new_channels = *channels; while (new_channels > 1 && @@ -4781,8 +4778,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) vtx_info->chan_byte_size == 4; unsigned fetch_dfmt = V_008F0C_BUF_DATA_FORMAT_INVALID; if (!use_mubuf) { - fetch_dfmt = get_fetch_data_format(ctx, vtx_info, fetch_offset, attrib_stride, &fetch_component, - binding_align); + fetch_dfmt = get_fetch_data_format(ctx, vtx_info, fetch_offset, &fetch_component, + vtx_info->num_channels - channel_start, binding_align); } else { if (fetch_component == 3 && ctx->options->chip_class == GFX6) { /* GFX6 only supports loading vec3 with MTBUF, expand to vec4. */