radv: vectorize some integer arithmetic and bcsel with scalar condition

Totals from 106 (0.13% of 79839) affected shaders: (Navi48)

Instrs: 131026 -> 130042 (-0.75%); split: -0.82%, +0.07%
CodeSize: 719120 -> 711516 (-1.06%); split: -1.20%, +0.14%
VGPRs: 5244 -> 5232 (-0.23%)
Latency: 2020748 -> 2004602 (-0.80%); split: -0.81%, +0.01%
InvThroughput: 393330 -> 385414 (-2.01%); split: -2.01%, +0.00%
VClause: 2193 -> 2192 (-0.05%)
Copies: 13963 -> 13558 (-2.90%); split: -2.91%, +0.01%
PreVGPRs: 2953 -> 2921 (-1.08%)
VALU: 65595 -> 64835 (-1.16%); split: -1.16%, +0.00%
SALU: 26887 -> 26611 (-1.03%)
VMEM: 2921 -> 3005 (+2.88%)
VOPD: 168 -> 173 (+2.98%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35784>
This commit is contained in:
Daniel Schürmann 2025-06-27 08:29:39 +02:00 committed by Marge Bot
parent 764ee3a834
commit 4671e5f20d
2 changed files with 49 additions and 2 deletions

View file

@ -253,6 +253,23 @@ ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32
return ycbcr_samplers + array_index;
}
static uint8_t
max_alu_src_identity_swizzle(const nir_alu_instr *alu, const nir_alu_src *src)
{
uint8_t max_vector = 32 / alu->def.bit_size;
if (nir_src_is_const(src->src))
return max_vector;
/* Return the number of correctly swizzled components. */
for (unsigned i = 1; i < alu->def.num_components; i++) {
if (src->swizzle[i] != src->swizzle[0] + i)
/* Ensure that the result is a power of 2. */
return MAX2(i & 0x6, 1);
}
return max_vector;
}
static uint8_t
opt_vectorize_callback(const nir_instr *instr, const void *_)
{
@ -281,10 +298,38 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
}
const unsigned bit_size = alu->def.bit_size;
if (bit_size != 16)
if (bit_size == 16 && aco_nir_op_supports_packed_math_16bit(alu))
return 2;
if (bit_size != 8 && bit_size != 16)
return 1;
return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
/* Keep some opcodes vectorized if the operation can be performed as
* 32-bit instruction with packed sources. The condition is that the
* sources must have identity swizzles. */
uint8_t target_width = 32 / bit_size;
switch (alu->op) {
case nir_op_bcsel:
/* Must have scalar condition. */
for (unsigned i = 1; i < alu->def.num_components; i++) {
if (alu->src[0].swizzle[i] != alu->src[0].swizzle[0])
return 1;
}
for (unsigned idx = 1; idx < 3; idx++)
target_width = MIN2(target_width, max_alu_src_identity_swizzle(alu, &alu->src[idx]));
break;
case nir_op_iand:
case nir_op_ior:
case nir_op_ixor:
case nir_op_inot:
for (unsigned idx = 0; idx < nir_op_infos[alu->op].num_inputs; idx++)
target_width = MIN2(target_width, max_alu_src_identity_swizzle(alu, &alu->src[idx]));
break;
default:
return 1;
}
return target_width;
}
static nir_component_mask_t

View file

@ -98,6 +98,8 @@ vectorize_vec2_16bit(const nir_instr *instr, const void *_)
const unsigned bit_size = alu->def.bit_size;
if (bit_size == 16)
return 2;
else if (bit_size == 8)
return 4;
else
return 1;
}