From 73e8fc3efbfe4a46bfd5ca5b3269d06f086bc224 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 3 Aug 2022 10:44:45 +0200 Subject: [PATCH] broadcom/compiler: don't use imprecise_32bit_lowering for idiv lowering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is known to produce bogus results for certain combinations of operands, so don't use it. See this issue for details: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6555 With this change, the idiv lowering will produce mul_high instructions, so we need to instruct the compiler to lower those with the ALU lowering right after the idiv lowering by adding the lower_mul_high option (we only need to add this to V3D, since V3DV already had it set). This will cause injection of uadd_carry instructions, for which we have backend implementations that produce better code for us than the NIR lowering. total instructions in shared programs: 12457692 -> 12463625 (0.05%) instructions in affected programs: 23115 -> 29048 (25.67%) helped: 0 HURT: 111 total threads in shared programs: 416372 -> 416368 (<.01%) threads in affected programs: 8 -> 4 (-50.00%) helped: 0 HURT: 2 total uniforms in shared programs: 3704067 -> 3704589 (0.01%) uniforms in affected programs: 5804 -> 6326 (8.99%) helped: 2 HURT: 109 total max-temps in shared programs: 2147845 -> 2148088 (0.01%) max-temps in affected programs: 2456 -> 2699 (9.89%) helped: 6 HURT: 91 Reviewed-by: Alyssa Rosenzweig Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/compiler/vir.c | 2 +- src/gallium/drivers/v3d/v3d_screen.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 5ab6300b8bb..719cb2ab9e5 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1563,10 +1563,10 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS(_, c->s, v3d_nir_lower_txf_ms, c); NIR_PASS(_, c->s, v3d_nir_lower_image_load_store); nir_lower_idiv_options idiv_options = { - .imprecise_32bit_lowering = true, .allow_fp16 = true, }; NIR_PASS(_, c->s, nir_lower_idiv, &idiv_options); + NIR_PASS(_, c->s, nir_lower_alu); if (c->key->robust_buffer_access) { /* v3d_nir_lower_robust_buffer_access assumes constant buffer diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 6b79a4978cb..83fe69b31ca 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -732,6 +732,7 @@ static const nir_shader_compiler_options v3d_nir_options = { .lower_int64_options = nir_lower_imul_2x32_64, .has_fsub = true, .has_isub = true, + .lower_mul_high = true, .divergence_analysis_options = nir_divergence_multiple_workgroup_per_compute_subgroup, /* This will enable loop unrolling in the state tracker so we won't