mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
pan/compiler: Use SHADDX instruction for i64 add
For Valhall, use SHADDX instruction for 64-bit integer addition instead of lowering it to 32-bit operations. The instruction sequence for doing it in 32-bit costs 3 cycles but SHADDX only takes 2 cycles to perform. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40841>
This commit is contained in:
parent
d4b843c24d
commit
4542982062
8 changed files with 57 additions and 10 deletions
|
|
@ -8,6 +8,7 @@
|
|||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
#include "nodearray.h"
|
||||
#include "valhall.h"
|
||||
|
||||
struct lcra_state {
|
||||
unsigned node_count;
|
||||
|
|
@ -380,6 +381,9 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
|
|||
bi_foreach_ssa_src(ins, s) {
|
||||
if (bi_count_read_registers(ins, s) >= 2)
|
||||
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
|
||||
else if (s < valhall_opcodes[ins->op].nr_srcs &&
|
||||
va_src_info(ins->op, s).size > VA_SIZE_32)
|
||||
l->affinity[ins->src[s].value] &= EVEN_BITS_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2310,6 +2310,15 @@ bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
|
|||
{
|
||||
unsigned bitsize = nir_src_bit_size(src.src);
|
||||
|
||||
if (b->shader->arch >= 9 && bitsize == 64) {
|
||||
/* For Valhall, 64-bit instructions only encode one register but will read
|
||||
* the adjacent register that comes right after as well. Therefore we
|
||||
* don't need to extract a single register here.
|
||||
*/
|
||||
assert(comps == 1);
|
||||
return bi_src_index(&src.src);
|
||||
}
|
||||
|
||||
/* the bi_index carries the 32-bit (word) offset separate from the
|
||||
* subword swizzle, first handle the offset */
|
||||
|
||||
|
|
@ -3373,7 +3382,14 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_iadd:
|
||||
bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
|
||||
if (sz == 64) {
|
||||
assert(b->shader->arch >= 9);
|
||||
bi_shaddx_s64_to(b, dst, s0, s1, 0);
|
||||
bi_index dsts[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
|
||||
bi_emit_split_i32(b, dsts, dst, 2);
|
||||
bi_cache_collect(b, dst, dsts, 2);
|
||||
} else
|
||||
bi_iadd_to(b, nir_type_int, sz, dst, s0, s1, false);
|
||||
break;
|
||||
|
||||
case nir_op_iadd_sat:
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ bool valhall_can_merge_workgroups(nir_shader *nir);
|
|||
\
|
||||
.lower_doubles_options = \
|
||||
nir_lower_dmod, /* TODO: Don't lower supported 64-bit operations */ \
|
||||
.lower_int64_options = ~0, /* TODO: Use IMULD on v7 */ \
|
||||
.lower_int64_options = arch >= 9 ? ~(nir_lower_iadd64) : ~0, \
|
||||
.lower_mul_high = true, \
|
||||
.lower_fisnormal = true, \
|
||||
.lower_uadd_carry = true, \
|
||||
|
|
|
|||
|
|
@ -77,6 +77,8 @@ bi_count_read_registers(const bi_instr *ins, unsigned s)
|
|||
return ins->sr_count_2; /* Dual source blending */
|
||||
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
|
||||
return ins->nr_dests;
|
||||
else if (ins->op == BI_OPCODE_SHADDX_S64 || ins->op == BI_OPCODE_SHADDX_U64)
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -123,7 +125,9 @@ bi_count_write_registers(const bi_instr *ins, unsigned d)
|
|||
default:
|
||||
return bi_count_staging_registers(ins);
|
||||
}
|
||||
} else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
|
||||
} else if (ins->op == BI_OPCODE_SEG_ADD_I64 ||
|
||||
ins->op == BI_OPCODE_SHADDX_S64 ||
|
||||
ins->op == BI_OPCODE_SHADDX_U64) {
|
||||
return 2;
|
||||
} else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
|
||||
return ins->sr_count_2;
|
||||
|
|
|
|||
|
|
@ -2626,7 +2626,7 @@
|
|||
<src widen="true">B</src>
|
||||
</group>
|
||||
|
||||
<group name="SHADDX" title="Shift, extend, and 64-bit add" dests="1" unused="true" unit="CVT">
|
||||
<group name="SHADDX" title="Shift, extend, and 64-bit add" dests="1" unit="CVT">
|
||||
<desc>
|
||||
Sign or zero extend B to 64-bits, left-shift by `shift`, and add the
|
||||
64-bit value A. These instructions accelerate address arithmetic, but may
|
||||
|
|
|
|||
|
|
@ -26,14 +26,18 @@ lower_split_src(bi_context *ctx, bi_instr *I, unsigned s, bi_instr** lut)
|
|||
return;
|
||||
}
|
||||
|
||||
/* Check if the source regs are already coming from a split. */
|
||||
/* Check if the source regs are already coming from a split/collect pair. */
|
||||
bi_index *src_a = &I->src[s];
|
||||
bi_index *src_b = &I->src[s + 1];
|
||||
if (bi_is_ssa(*src_a) && bi_is_ssa(*src_b)) {
|
||||
bi_instr *src_ins_a = lut[src_a->value];
|
||||
bi_instr *src_ins_b = lut[src_b->value];
|
||||
if (src_ins_a->op == BI_OPCODE_SPLIT_I32 && src_ins_a == src_ins_b)
|
||||
return;
|
||||
if (src_ins_a->op == BI_OPCODE_SPLIT_I32 && src_ins_a == src_ins_b) {
|
||||
bi_index split_src = src_ins_a->src[0];
|
||||
if (!bi_is_ssa(split_src) ||
|
||||
lut[split_src.value]->op == BI_OPCODE_COLLECT_I32)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate temporary before the instruction */
|
||||
|
|
@ -76,7 +80,9 @@ va_lower_split_64bit(bi_context *ctx)
|
|||
|
||||
struct va_src_info info = va_src_info(I->op, s);
|
||||
|
||||
if (info.size == VA_SIZE_64)
|
||||
/* Only split if the instruction expects 64-bit inputs as two separate
|
||||
* sources. */
|
||||
if (info.size == VA_SIZE_64 && bi_count_read_registers(I, s) == 1)
|
||||
lower_split_src(ctx, I, s, lut);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -325,6 +325,25 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size)
|
|||
default:
|
||||
invalid_instruction(I, "32-bit widen");
|
||||
}
|
||||
} else if (size == VA_SIZE_64) {
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_SWIZZLES_64_BIT_NONE;
|
||||
case BI_SWIZZLE_H0:
|
||||
return VA_SWIZZLES_64_BIT_H0;
|
||||
case BI_SWIZZLE_H1:
|
||||
return VA_SWIZZLES_64_BIT_H1;
|
||||
case BI_SWIZZLE_B0:
|
||||
return VA_SWIZZLES_64_BIT_B0;
|
||||
case BI_SWIZZLE_B1:
|
||||
return VA_SWIZZLES_64_BIT_B1;
|
||||
case BI_SWIZZLE_B2:
|
||||
return VA_SWIZZLES_64_BIT_B2;
|
||||
case BI_SWIZZLE_B3:
|
||||
return VA_SWIZZLES_64_BIT_B3;
|
||||
default:
|
||||
invalid_instruction(I, "64-bit widen");
|
||||
}
|
||||
} else {
|
||||
invalid_instruction(I, "type size for widen");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,8 +34,6 @@ SKIP = set([
|
|||
"ISUB.u64",
|
||||
"ISUB.s64",
|
||||
"IMULD.u64",
|
||||
"SHADDX.u64",
|
||||
"SHADDX.s64",
|
||||
"IMULD.u64",
|
||||
"LSHIFT_AND.i64",
|
||||
"RSHIFT_AND.i64",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue