From d495a5c1837c7744bd1f49c823b0e56d51623f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 27 Aug 2020 15:11:13 +0100 Subject: [PATCH] radv: enable .lower_ineg We already emit ineg as isub most of the time. The results are a bit mixed, but shouldn't really make a difference. A couple of additional copies are needed as isub writes scc. Totals from 5975 (4.29% of 139391) affected shaders: CodeSize: 31508648 -> 31509264 (+0.00%); split: -0.00%, +0.00% Instrs: 6073379 -> 6073531 (+0.00%); split: -0.00%, +0.00% Cycles: 47186280 -> 47187116 (+0.00%); split: -0.00%, +0.00% VMEM: 2528515 -> 2529139 (+0.02%); split: +0.03%, -0.01% SMEM: 596842 -> 596924 (+0.01%); split: +0.02%, -0.00% SClause: 280596 -> 280594 (-0.00%) Copies: 288554 -> 288669 (+0.04%); split: -0.00%, +0.04% PreSGPRs: 240390 -> 240397 (+0.00%) PreVGPRs: 349630 -> 349749 (+0.03%) Reviewed-by: Rhys Perry Part-of: --- .../compiler/aco_instruction_selection.cpp | 27 ------------------- src/amd/vulkan/radv_shader.c | 1 + 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index e9907ec4ac5..9c6bbe00003 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1316,33 +1316,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } break; } - case nir_op_ineg: { - Temp src = get_alu_src(ctx, instr->src[0]); - if (dst.regClass() == v1) { - bld.vsub32(Definition(dst), Operand(0u), Operand(src)); - } else if (dst.regClass() == s1) { - bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand((uint32_t) -1), src); - } else if (dst.size() == 2) { - Temp src0 = bld.tmp(dst.type(), 1); - Temp src1 = bld.tmp(dst.type(), 1); - bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src); - - if (dst.regClass() == s2) { - Temp borrow = bld.tmp(s1); - Temp dst0 = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), Operand(0u), src0); - Temp dst1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), Operand(0u), src1, bld.scc(borrow)); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1); - } else { - Temp lower = bld.tmp(v1); - Temp borrow = bld.vsub32(Definition(lower), Operand(0u), src0, true).def(1).getTemp(); - Temp upper = bld.vsub32(bld.def(v1), Operand(0u), src1, false, borrow); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); - } - } else { - isel_err(&instr->instr, "Unimplemented NIR instr bit size"); - } - break; - } case nir_op_iabs: { Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == s1) { diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index ebaa567873a..653bb0b5ca4 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -55,6 +55,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_device_index_to_zero = true, .lower_fdiv = true, .lower_fmod = true, + .lower_ineg = true, .lower_bitfield_insert_to_bitfield_select = true, .lower_bitfield_extract = true, .lower_pack_snorm_2x16 = true,