aco: split vop3p results

Removes copies in the case of:
a = fmul
b = fmul
c = vec4(a.x, a.y, b.x, b.y)

fossil-db (navi31):
Totals from 21 (0.03% of 79395) affected shaders:
Instrs: 96481 -> 96338 (-0.15%)
CodeSize: 548452 -> 548196 (-0.05%); split: -0.13%, +0.09%
Latency: 1514460 -> 1514238 (-0.01%); split: -0.02%, +0.00%
InvThroughput: 683048 -> 682942 (-0.02%); split: -0.02%, +0.00%
VClause: 1611 -> 1613 (+0.12%)
Copies: 21326 -> 21190 (-0.64%)
Branches: 2427 -> 2426 (-0.04%)
PreVGPRs: 2289 -> 2298 (+0.39%)
VALU: 59090 -> 58954 (-0.23%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
This commit is contained in:
Rhys Perry 2024-04-15 11:22:08 +01:00 committed by Marge Bot
parent 88e03feb27
commit 37e9e8b06c

View file

@ -953,6 +953,7 @@ emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, T
Builder bld(ctx->program, ctx->block);
bld.is_precise = instr->exact;
Builder::Result res = bld.vop3p(op, Definition(dst), src0, src1, opsel_lo, opsel_hi);
emit_split_vector(ctx, dst, 2);
return res;
}
@ -1515,6 +1516,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(),
src, opsel_lo, opsel_hi);
bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi);
emit_split_vector(ctx, dst, 2);
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
@ -2411,6 +2413,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi);
emit_split_vector(ctx, dst, 2);
} else if (dst.regClass() == v1) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f32, dst,
ctx->block->fp_mode.must_flush_denorms32, 3);
@ -2547,6 +2550,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
vop3p->valu().neg_lo[0] = true;
vop3p->valu().neg_hi[0] = true;
emit_split_vector(ctx, dst, 2);
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
@ -2577,6 +2581,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
.instr;
vop3p->valu().neg_lo[1] = true;
vop3p->valu().neg_hi[1] = true;
emit_split_vector(ctx, dst, 2);
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
@ -2610,6 +2615,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
vop3p->valu().clamp = true;
emit_split_vector(ctx, dst, 2);
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
@ -3922,6 +3928,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
.instr->valu();
sub.neg_lo[1] = true;
sub.neg_hi[1] = true;
emit_split_vector(ctx, dst, 2);
} else {
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));