mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 09:00:10 +01:00
radv: optimize the number of loaded components for VS inputs in NIR
fossils-db (Sienna Cichlid): Totals from 3691 (2.74% of 134913) affected shaders: VGPRs: 121368 -> 121584 (+0.18%); split: -0.36%, +0.54% CodeSize: 7597912 -> 7561140 (-0.48%); split: -0.66%, +0.18% MaxWaves: 104706 -> 104772 (+0.06%) Instrs: 1441229 -> 1437652 (-0.25%); split: -0.53%, +0.28% Latency: 5500766 -> 5482101 (-0.34%); split: -0.45%, +0.11% InvThroughput: 804401 -> 797178 (-0.90%); split: -1.09%, +0.20% VClause: 25185 -> 25143 (-0.17%); split: -0.50%, +0.33% SClause: 27486 -> 27445 (-0.15%); split: -0.57%, +0.42% Copies: 143816 -> 147900 (+2.84%); split: -0.54%, +3.38% PreSGPRs: 109584 -> 110396 (+0.74%); split: -0.04%, +0.79% PreVGPRs: 95541 -> 94583 (-1.00%); split: -1.12%, +0.12% fossils-db (Polaris10): Totals from 1773 (1.30% of 135960) affected shaders: SGPRs: 80848 -> 80864 (+0.02%); split: -0.14%, +0.16% VGPRs: 56424 -> 55600 (-1.46%); split: -1.47%, +0.01% CodeSize: 1732588 -> 1696840 (-2.06%); split: -2.07%, +0.01% MaxWaves: 12103 -> 12106 (+0.02%) Instrs: 347684 -> 341597 (-1.75%); split: -1.76%, +0.01% Latency: 2542840 -> 2523946 (-0.74%); split: -0.95%, +0.21% InvThroughput: 924601 -> 905102 (-2.11%); split: -2.13%, +0.02% VClause: 9565 -> 9545 (-0.21%); split: -0.51%, +0.30% SClause: 10587 -> 10333 (-2.40%); split: -2.82%, +0.43% Copies: 19321 -> 20307 (+5.10%); split: -0.78%, +5.88% PreSGPRs: 30879 -> 30875 (-0.01%); split: -0.20%, +0.18% PreVGPRs: 41211 -> 41270 (+0.14%); split: -0.73%, +0.87% Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15317>
This commit is contained in:
parent
1ec4e568de
commit
b366fef091
1 changed files with 19 additions and 4 deletions
|
|
@ -3762,12 +3762,19 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
|
||||||
enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
|
enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
|
||||||
bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
|
bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
|
||||||
|
|
||||||
if (alpha_adjust == ALPHA_ADJUST_NONE && !post_shuffle)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
unsigned component = nir_intrinsic_component(intrin);
|
unsigned component = nir_intrinsic_component(intrin);
|
||||||
unsigned num_components = intrin->dest.ssa.num_components;
|
unsigned num_components = intrin->dest.ssa.num_components;
|
||||||
|
|
||||||
|
unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
|
||||||
|
unsigned dfmt = attrib_format & 0xf;
|
||||||
|
unsigned nfmt = (attrib_format >> 4) & 0x7;
|
||||||
|
const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt);
|
||||||
|
bool is_float =
|
||||||
|
nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
|
||||||
|
|
||||||
|
unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
|
||||||
|
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
|
||||||
|
|
||||||
static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
|
static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
|
||||||
static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
|
static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
|
||||||
const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
|
const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
|
||||||
|
|
@ -3781,12 +3788,20 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
|
||||||
intrin->dest.ssa.num_components = intrin->num_components;
|
intrin->dest.ssa.num_components = intrin->num_components;
|
||||||
|
|
||||||
nir_intrinsic_set_component(intrin, 0);
|
nir_intrinsic_set_component(intrin, 0);
|
||||||
|
|
||||||
|
num_channels = MAX2(num_channels, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < num_components; i++) {
|
for (uint32_t i = 0; i < num_components; i++) {
|
||||||
unsigned idx = i + (post_shuffle ? component : 0);
|
unsigned idx = i + (post_shuffle ? component : 0);
|
||||||
|
|
||||||
|
if (swizzle[i + component] < num_channels) {
|
||||||
channels[i] = nir_channel(&b, &intrin->dest.ssa, swizzle[idx]);
|
channels[i] = nir_channel(&b, &intrin->dest.ssa, swizzle[idx]);
|
||||||
|
} else if (i + component == 3) {
|
||||||
|
channels[i] = is_float ? nir_imm_float(&b, 1.0f) : nir_imm_int(&b, 1u);
|
||||||
|
} else {
|
||||||
|
channels[i] = nir_imm_zero(&b, 1, 32);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) {
|
if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue