r300: move the ROUND+ARL->ARR fusing to main optimization loop

Its particularly important to have the copy-propagate pass run first. So that when the round is vectorized, we don't have to follow the MOVs to find out if it leads to ARL or not (we don't vectorize ARR/ARL at the moment). No shader-db change. Reviewed-by: Emma Anholt <emma@anholt.net> Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23560>
2026-05-05 09:38:07 +02:00 · 2023-06-07 14:02:47 +02:00 · 2023-06-07 14:02:47 +02:00 · 886a6aa5be
commit 886a6aa5be
parent f82574fb2c
2 changed files with 31 additions and 28 deletions
--- a/src/gallium/drivers/r300/compiler/radeon_optimize.c
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@ -1374,6 +1374,35 @@ static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst)
 	}
 }

+/**
+ * According to the GLSL spec, round is only 1.30 and up
+ * so the only reason why we should ever see round is if it actually
+ * is lowered ARR (from nine->ttn). In that case we want to reconstruct
+ * the ARR instead of lowering the round.
+ */
+static void transform_vertex_ROUND(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_reader_data readers;
+	rc_get_readers(c, inst, &readers, NULL, NULL, NULL);
+
+	assert(readers.ReaderCount > 0);
+	for (unsigned i = 0; i < readers.ReaderCount; i++) {
+		struct rc_instruction *reader = readers.Readers[i].Inst;
+		if (reader->U.I.Opcode != RC_OPCODE_ARL) {
+			assert(!"Unable to convert ROUND+ARL to ARR\n");
+			return;
+		}
+	}
+
+	/* Only ARL readers, convert all to ARR */
+	for (unsigned i = 0; i < readers.ReaderCount; i++) {
+		readers.Readers[i].Inst->U.I.Opcode = RC_OPCODE_ARR;
+	}
+	/* Switch ROUND to MOV and let copy propagate sort it out later. */
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
 /**
 * Apply various optimizations specific to the A0 adress register loads.
 */
@ -1385,6 +1414,8 @@ static void optimize_A0_loads(struct radeon_compiler * c) {
 		inst = inst->Next;
 		if (cur->U.I.Opcode == RC_OPCODE_ARL) {
 			merge_ARL(c, cur);
+		} else if (cur->U.I.Opcode == RC_OPCODE_ROUND) {
+			transform_vertex_ROUND(c, cur);
 		}
 	}
 }
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@ -447,33 +447,6 @@ static void transform_ROUND(struct radeon_compiler* c,
 	rc_remove_instruction(inst);
 }

-/**
- * According to the GLSL spec, round is only 1.30 and up
- * so the only reason why we should ever see round is if it actually
- * is lowered ARR (from nine->ttn). In that case we want to reconstruct
- * the ARR instead of lowering the round.
- */
-static void transform_vertex_ROUND(struct radeon_compiler* c,
-	struct rc_instruction* inst)
-{
-	struct rc_reader_data readers;
-	rc_get_readers(c, inst, &readers, NULL, NULL, NULL);
-
-	assert(readers.ReaderCount > 0);
-	for (unsigned i = 0; i < readers.ReaderCount; i++) {
-		struct rc_instruction *reader = readers.Readers[i].Inst;
-		if (reader->U.I.Opcode != RC_OPCODE_ARL)
-			return;
-	}
-
-	/* Only ARL readers, convert all to ARR */
-	for (unsigned i = 0; i < readers.ReaderCount; i++) {
-		readers.Readers[i].Inst->U.I.Opcode = RC_OPCODE_ARR;
-	}
-	/* Switch ROUND to MOV and let copy propagate sort it out later. */
-	inst->U.I.Opcode = RC_OPCODE_MOV;
-}
-
 static void transform_RSQ(struct radeon_compiler* c,
 	struct rc_instruction* inst)
 {
@ -860,7 +833,6 @@ int r300_transform_vertex_alu(
 	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
 	case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
 	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
-	case RC_OPCODE_ROUND: transform_vertex_ROUND(c, inst); return 1;
 	case RC_OPCODE_SEQ:
 		if (!c->is_r500) {
 			transform_r300_vertex_SEQ(c, inst);