ir3: Use the new NIR lowering pass for integer multiplication

Shader-db stats courtesy of Eric Anholt: total instructions in shared programs: 6480215 -> 6475457 (-0.07%) instructions in affected programs: 662105 -> 657347 (-0.72%) helped: 1209 HURT: 13 total constlen in shared programs: 1432704 -> 1427769 (-0.34%) constlen in affected programs: 100063 -> 95128 (-4.93%) helped: 512 HURT: 0 total max_sun in shared programs: 875561 -> 873387 (-0.25%) max_sun in affected programs: 46179 -> 44005 (-4.71%) helped: 1087 HURT: 0 Reviewed-by: Eric Anholt <eric@anholt.net>
2025-12-24 13:10:10 +01:00 · 2019-05-13 00:33:57 +02:00 · 2019-05-13 00:33:57 +02:00 · c02ffd2700
commit c02ffd2700
parent 340277ad71
2 changed files with 16 additions and 17 deletions
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@ -542,23 +542,6 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
 	case nir_op_umin:
 		dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0);
 		break;
-	case nir_op_imul:
-		if (bs[0] > 16 || bs[1] > 16) {
-			/*
-			 * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
-			 *   mull.u tmp0, a, b           ; mul low, i.e. al * bl
-			 *   madsh.m16 tmp1, a, b, tmp0  ; mul-add shift high mix,
-			 *                               ; i.e. ah * bl << 16
-			 *   madsh.m16 dst, b, a, tmp1   ; i.e. al * bh << 16
-			 */
-			dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0,
-								   ir3_MADSH_M16(b, src[0], 0, src[1], 0,
-												 ir3_MULL_U(b, src[0], 0,
-															src[1], 0), 0), 0);
-		} else {
-			dst[0] = ir3_MUL_S(b, src[0], 0, src[1], 0);
-		}
-		break;
 	case nir_op_umul_low:
 		dst[0] = ir3_MULL_U(b, src[0], 0, src[1], 0);
 		break;
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@ -84,6 +84,22 @@ ir3_context_init(struct ir3_compiler *compiler,
 	 */
 	NIR_PASS_V(ctx->s, nir_lower_bool_to_int32);
 	NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
+
+	/* We want to lower nir_op_imul as late as possible, to catch also
+	 * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
+	 * However, we want a final swing of a few passes to have a chance
+	 * at optimizing the result.
+	 */
+	bool progress;
+	NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
+	if (progress) {
+		NIR_PASS_V(ctx->s, nir_opt_algebraic);
+		NIR_PASS_V(ctx->s, nir_opt_copy_prop_vars);
+		NIR_PASS_V(ctx->s, nir_opt_dead_write_vars);
+		NIR_PASS_V(ctx->s, nir_opt_dce);
+		NIR_PASS_V(ctx->s, nir_opt_constant_folding);
+	}
+
 	NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);

 	if (ir3_shader_debug & IR3_DBG_DISASM) {