diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 33042af6828..ee5509efd53 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3875,7 +3875,6 @@ radv_postprocess_nir(struct radv_pipeline *pipeline, NIR_PASS(_, stage->nir, nir_lower_idiv, &(nir_lower_idiv_options){ - .imprecise_32bit_lowering = false, .allow_fp16 = gfx_level >= GFX9, }); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b7287bf6f57..af5fbfb6f55 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5254,18 +5254,6 @@ bool nir_lower_non_uniform_access(nir_shader *shader, const nir_lower_non_uniform_access_options *options); typedef struct { - /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV() - * is used. It is the faster of the two but it is not exact in some cases - * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432). - * - * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and - * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more - * instructions than the nv50 path and many of them are integer - * multiplications, so it is probably slower. It should always return the - * correct result, though. - */ - bool imprecise_32bit_lowering; - /* Whether 16-bit floating point arithmetic should be allowed in 8-bit * division lowering */ diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c index ebbdaac1bcd..3b0b99a373f 100644 --- a/src/compiler/nir/nir_lower_idiv.c +++ b/src/compiler/nir/nir_lower_idiv.c @@ -27,98 +27,6 @@ #include "nir.h" #include "nir_builder.h" -/* Has two paths - * One (nir_lower_idiv_fast) lowers idiv/udiv/umod and is based on - * NV50LegalizeSSA::handleDIV() - * - * Note that this path probably does not have not enough precision for - * compute shaders. Perhaps we want a second higher precision (looping) - * version of this? Or perhaps we assume if you can do compute shaders you - * can also branch out to a pre-optimized shader library routine.. - * - * The other path (nir_lower_idiv_precise) is based off of code used by LLVM's - * AMDGPU target. It should handle 32-bit idiv/irem/imod/udiv/umod exactly. - */ - -static nir_ssa_def * -convert_instr(nir_builder *bld, nir_op op, - nir_ssa_def *numer, nir_ssa_def *denom) -{ - nir_ssa_def *af, *bf, *a, *b, *q, *r, *rt; - bool is_signed; - - is_signed = (op == nir_op_idiv || - op == nir_op_imod || - op == nir_op_irem); - - if (is_signed) { - af = nir_i2f32(bld, numer); - bf = nir_i2f32(bld, denom); - af = nir_fabs(bld, af); - bf = nir_fabs(bld, bf); - a = nir_iabs(bld, numer); - b = nir_iabs(bld, denom); - } else { - af = nir_u2f32(bld, numer); - bf = nir_u2f32(bld, denom); - a = numer; - b = denom; - } - - /* get first result: */ - bf = nir_frcp(bld, bf); - bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */ - q = nir_fmul(bld, af, bf); - - if (is_signed) { - q = nir_f2i32(bld, q); - } else { - q = nir_f2u32(bld, q); - } - - /* get error of first result: */ - r = nir_imul(bld, q, b); - r = nir_isub(bld, a, r); - r = nir_u2f32(bld, r); - r = nir_fmul(bld, r, bf); - r = nir_f2u32(bld, r); - - /* add quotients: */ - q = nir_iadd(bld, q, r); - - /* correction: if modulus >= divisor, add 1 */ - r = nir_imul(bld, q, b); - r = nir_isub(bld, a, r); - rt = nir_uge(bld, r, b); - - if (op == nir_op_umod) { - q = nir_bcsel(bld, rt, nir_isub(bld, r, b), r); - } else { - r = nir_b2i32(bld, rt); - - q = nir_iadd(bld, q, r); - if (is_signed) { - /* fix the sign: */ - r = nir_ixor(bld, numer, denom); - r = nir_ilt(bld, r, nir_imm_int(bld, 0)); - b = nir_ineg(bld, q); - q = nir_bcsel(bld, r, b, q); - - if (op == nir_op_imod || op == nir_op_irem) { - q = nir_imul(bld, q, denom); - q = nir_isub(bld, numer, q); - if (op == nir_op_imod) { - q = nir_bcsel(bld, nir_ieq_imm(bld, q, 0), - nir_imm_int(bld, 0), - nir_bcsel(bld, r, nir_iadd(bld, q, denom), q)); - } - } - } - } - - return q; -} - /* ported from LLVM's AMDGPUTargetLowering::LowerUDIVREM */ static nir_ssa_def * emit_udiv(nir_builder *bld, nir_ssa_def *numer, nir_ssa_def *denom, bool modulo) @@ -245,8 +153,6 @@ lower_idiv(nir_builder *b, nir_instr *instr, void *_data) if (numer->bit_size < 32) return convert_instr_small(b, alu->op, numer, denom, options); - else if (options->imprecise_32bit_lowering) - return convert_instr(b, alu->op, numer, denom); else return convert_instr_precise(b, alu->op, numer, denom); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index aa47e597523..cb88c50ffa6 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1254,7 +1254,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, /* Lower integer division by constants before nir_lower_idiv. */ OPT(nir_opt_idiv_const, 32); const nir_lower_idiv_options options = { - .imprecise_32bit_lowering = false, .allow_fp16 = false }; OPT(nir_lower_idiv, &options); diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index 0ba00ee8626..cbefa93d25f 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3356,7 +3356,6 @@ Converter::run() * nir_opt_idiv_const effectively before this. */ nir_lower_idiv_options idiv_options = { - .imprecise_32bit_lowering = false, .allow_fp16 = true, }; NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);