From 37e9e8b06cab385c1e74333bcf6b289ded884b6d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 15 Apr 2024 11:22:08 +0100 Subject: [PATCH] aco: split vop3p results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes copies in the case of: a = fmul b = fmul c = vec4(a.x, a.y, b.x, b.y) fossil-db (navi31): Totals from 21 (0.03% of 79395) affected shaders: Instrs: 96481 -> 96338 (-0.15%) CodeSize: 548452 -> 548196 (-0.05%); split: -0.13%, +0.09% Latency: 1514460 -> 1514238 (-0.01%); split: -0.02%, +0.00% InvThroughput: 683048 -> 682942 (-0.02%); split: -0.02%, +0.00% VClause: 1611 -> 1613 (+0.12%) Copies: 21326 -> 21190 (-0.64%) Branches: 2427 -> 2426 (-0.04%) PreVGPRs: 2289 -> 2298 (+0.39%) VALU: 59090 -> 58954 (-0.23%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 755f3e43bd5..f98912d2980 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -953,6 +953,7 @@ emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, T Builder bld(ctx->program, ctx->block); bld.is_precise = instr->exact; Builder::Result res = bld.vop3p(op, Definition(dst), src0, src1, opsel_lo, opsel_hi); + emit_split_vector(ctx, dst, 2); return res; } @@ -1515,6 +1516,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(), src, opsel_lo, opsel_hi); bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi); + emit_split_vector(ctx, dst, 2); break; } Temp src = get_alu_src(ctx, instr->src[0]); @@ -2411,6 +2413,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi); + emit_split_vector(ctx, dst, 2); } else if (dst.regClass() == v1) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f32, dst, ctx->block->fp_mode.must_flush_denorms32, 3); @@ -2547,6 +2550,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); vop3p->valu().neg_lo[0] = true; vop3p->valu().neg_hi[0] = true; + emit_split_vector(ctx, dst, 2); break; } Temp src = get_alu_src(ctx, instr->src[0]); @@ -2577,6 +2581,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) .instr; vop3p->valu().neg_lo[1] = true; vop3p->valu().neg_hi[1] = true; + emit_split_vector(ctx, dst, 2); break; } Temp src = get_alu_src(ctx, instr->src[0]); @@ -2610,6 +2615,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00), instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); vop3p->valu().clamp = true; + emit_split_vector(ctx, dst, 2); break; } Temp src = get_alu_src(ctx, instr->src[0]); @@ -3922,6 +3928,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) .instr->valu(); sub.neg_lo[1] = true; sub.neg_hi[1] = true; + emit_split_vector(ctx, dst, 2); } else { Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));