mesa: optimize _mesa_matrix_is_identity

+5% performance in VP13/Sw/teslaTower_shaded Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26548>
2026-01-13 09:50:17 +01:00 · 2023-11-26 21:30:12 -05:00 · 2023-11-26 21:30:12 -05:00 · d321b1500b
commit d321b1500b
parent d17ddcc847
3 changed files with 17 additions and 21 deletions
--- a/src/mesa/main/glthread_marshal.h
+++ b/src/mesa/main/glthread_marshal.h
@ -33,6 +33,7 @@
 #include "main/glthread.h"
 #include "main/context.h"
 #include "main/macros.h"
+#include "main/matrix.h"
 #include "marshal_generated.h"

 struct marshal_cmd_base
@ -399,19 +400,6 @@ _mesa_get_matrix_index(struct gl_context *ctx, GLenum mode)
   return M_DUMMY;
 }

-static inline bool
-_mesa_matrix_is_identity(const float *m)
-{
-   static float identity[16] = {
-      1, 0, 0, 0,
-      0, 1, 0, 0,
-      0, 0, 1, 0,
-      0, 0, 0, 1
-   };
-
-   return !memcmp(m, identity, sizeof(identity));
-}
-
 static inline void
 _mesa_glthread_Enable(struct gl_context *ctx, GLenum cap)
 {
--- a/src/mesa/main/matrix.c
+++ b/src/mesa/main/matrix.c
@ -568,15 +568,8 @@ _mesa_MatrixLoadfEXT( GLenum matrixMode, const GLfloat *m )
 static void
 matrix_mult(struct gl_matrix_stack *stack, const GLfloat *m, const char* caller)
 {
-   static float identity[16] = {
-      1, 0, 0, 0,
-      0, 1, 0, 0,
-      0, 0, 1, 0,
-      0, 0, 0, 1,
-   };
-
   GET_CURRENT_CONTEXT(ctx);
-   if (!m || !memcmp(m, identity, sizeof(identity)))
+   if (!m || _mesa_matrix_is_identity(m))
      return;

   if (MESA_VERBOSE & VERBOSE_API)
--- a/src/mesa/main/matrix.h
+++ b/src/mesa/main/matrix.h
@ -52,5 +52,20 @@ _mesa_free_matrix_data( struct gl_context *ctx );
 extern void 
 _mesa_update_modelview_project( struct gl_context *ctx, GLuint newstate );

+/* "m" must be a 4x4 matrix. Return true if it's the identity matrix. */
+static inline bool
+_mesa_matrix_is_identity(const float *m)
+{
+   const uint32_t *u = (const uint32_t *)m;
+   const float one = IEEE_ONE;
+
+   /* This is faster than memcmp with static identity matrix. Instead of
+    * comparing every non-diagonal element against zero, OR them and compare
+    * the result. Verified with Viewperf13/Sw/teslaTower_shaded.
+    */
+   return u[0] == one && u[5] == one && u[10] == one && u[15] == one &&
+          !(u[1] | u[2] | u[3] | u[4] | u[6] | u[7] | u[8] | u[9] | u[11] |
+            u[12] | u[13] | u[14]);
+}

 #endif