mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
radv: vectorize some integer arithmetic and bcsel with scalar condition
Totals from 106 (0.13% of 79839) affected shaders: (Navi48) Instrs: 131026 -> 130042 (-0.75%); split: -0.82%, +0.07% CodeSize: 719120 -> 711516 (-1.06%); split: -1.20%, +0.14% VGPRs: 5244 -> 5232 (-0.23%) Latency: 2020748 -> 2004602 (-0.80%); split: -0.81%, +0.01% InvThroughput: 393330 -> 385414 (-2.01%); split: -2.01%, +0.00% VClause: 2193 -> 2192 (-0.05%) Copies: 13963 -> 13558 (-2.90%); split: -2.91%, +0.01% PreVGPRs: 2953 -> 2921 (-1.08%) VALU: 65595 -> 64835 (-1.16%); split: -1.16%, +0.00% SALU: 26887 -> 26611 (-1.03%) VMEM: 2921 -> 3005 (+2.88%) VOPD: 168 -> 173 (+2.98%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35784>
This commit is contained in:
parent
764ee3a834
commit
4671e5f20d
2 changed files with 49 additions and 2 deletions
|
|
@ -253,6 +253,23 @@ ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32
|
|||
return ycbcr_samplers + array_index;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
max_alu_src_identity_swizzle(const nir_alu_instr *alu, const nir_alu_src *src)
|
||||
{
|
||||
uint8_t max_vector = 32 / alu->def.bit_size;
|
||||
if (nir_src_is_const(src->src))
|
||||
return max_vector;
|
||||
|
||||
/* Return the number of correctly swizzled components. */
|
||||
for (unsigned i = 1; i < alu->def.num_components; i++) {
|
||||
if (src->swizzle[i] != src->swizzle[0] + i)
|
||||
/* Ensure that the result is a power of 2. */
|
||||
return MAX2(i & 0x6, 1);
|
||||
}
|
||||
|
||||
return max_vector;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
opt_vectorize_callback(const nir_instr *instr, const void *_)
|
||||
{
|
||||
|
|
@ -281,10 +298,38 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
|
|||
}
|
||||
|
||||
const unsigned bit_size = alu->def.bit_size;
|
||||
if (bit_size != 16)
|
||||
if (bit_size == 16 && aco_nir_op_supports_packed_math_16bit(alu))
|
||||
return 2;
|
||||
|
||||
if (bit_size != 8 && bit_size != 16)
|
||||
return 1;
|
||||
|
||||
return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
|
||||
/* Keep some opcodes vectorized if the operation can be performed as
|
||||
* 32-bit instruction with packed sources. The condition is that the
|
||||
* sources must have identity swizzles. */
|
||||
uint8_t target_width = 32 / bit_size;
|
||||
switch (alu->op) {
|
||||
case nir_op_bcsel:
|
||||
/* Must have scalar condition. */
|
||||
for (unsigned i = 1; i < alu->def.num_components; i++) {
|
||||
if (alu->src[0].swizzle[i] != alu->src[0].swizzle[0])
|
||||
return 1;
|
||||
}
|
||||
for (unsigned idx = 1; idx < 3; idx++)
|
||||
target_width = MIN2(target_width, max_alu_src_identity_swizzle(alu, &alu->src[idx]));
|
||||
break;
|
||||
case nir_op_iand:
|
||||
case nir_op_ior:
|
||||
case nir_op_ixor:
|
||||
case nir_op_inot:
|
||||
for (unsigned idx = 0; idx < nir_op_infos[alu->op].num_inputs; idx++)
|
||||
target_width = MIN2(target_width, max_alu_src_identity_swizzle(alu, &alu->src[idx]));
|
||||
break;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
||||
return target_width;
|
||||
}
|
||||
|
||||
static nir_component_mask_t
|
||||
|
|
|
|||
|
|
@ -98,6 +98,8 @@ vectorize_vec2_16bit(const nir_instr *instr, const void *_)
|
|||
const unsigned bit_size = alu->def.bit_size;
|
||||
if (bit_size == 16)
|
||||
return 2;
|
||||
else if (bit_size == 8)
|
||||
return 4;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue