mesa: optimize _mesa_matrix_is_identity

+5% performance in VP13/Sw/teslaTower_shaded

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26548>
This commit is contained in:
Marek Olšák 2023-11-26 21:30:12 -05:00 committed by Marge Bot
parent d17ddcc847
commit d321b1500b
3 changed files with 17 additions and 21 deletions

View file

@ -33,6 +33,7 @@
#include "main/glthread.h"
#include "main/context.h"
#include "main/macros.h"
#include "main/matrix.h"
#include "marshal_generated.h"
struct marshal_cmd_base
@ -399,19 +400,6 @@ _mesa_get_matrix_index(struct gl_context *ctx, GLenum mode)
return M_DUMMY;
}
static inline bool
_mesa_matrix_is_identity(const float *m)
{
static float identity[16] = {
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1
};
return !memcmp(m, identity, sizeof(identity));
}
static inline void
_mesa_glthread_Enable(struct gl_context *ctx, GLenum cap)
{

View file

@ -568,15 +568,8 @@ _mesa_MatrixLoadfEXT( GLenum matrixMode, const GLfloat *m )
static void
matrix_mult(struct gl_matrix_stack *stack, const GLfloat *m, const char* caller)
{
static float identity[16] = {
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1,
};
GET_CURRENT_CONTEXT(ctx);
if (!m || !memcmp(m, identity, sizeof(identity)))
if (!m || _mesa_matrix_is_identity(m))
return;
if (MESA_VERBOSE & VERBOSE_API)

View file

@ -52,5 +52,20 @@ _mesa_free_matrix_data( struct gl_context *ctx );
extern void
_mesa_update_modelview_project( struct gl_context *ctx, GLuint newstate );
/* "m" must be a 4x4 matrix. Return true if it's the identity matrix. */
static inline bool
_mesa_matrix_is_identity(const float *m)
{
const uint32_t *u = (const uint32_t *)m;
const float one = IEEE_ONE;
/* This is faster than memcmp with static identity matrix. Instead of
* comparing every non-diagonal element against zero, OR them and compare
* the result. Verified with Viewperf13/Sw/teslaTower_shaded.
*/
return u[0] == one && u[5] == one && u[10] == one && u[15] == one &&
!(u[1] | u[2] | u[3] | u[4] | u[6] | u[7] | u[8] | u[9] | u[11] |
u[12] | u[13] | u[14]);
}
#endif