tnl: Optimize SSE load[23]f_1 since they don't need the identity swizzle.

SSE movss from memory zeroes out everything above the destination dword, so we get the (a, 0) or (a, 0, 0) result that these functions needed. Bug #16520.
2026-05-05 13:58:04 +02:00 · 2008-12-09 13:10:56 -08:00 · 2008-12-09 13:10:56 -08:00 · b66495a0d9
commit b66495a0d9
parent 6e29a3c8e2
1 changed files with 4 additions and 2 deletions
--- a/src/mesa/tnl/t_vertex_sse.c
+++ b/src/mesa/tnl/t_vertex_sse.c
@ -146,7 +146,8 @@ static void emit_load3f_1( struct x86_program *p,
 			   struct x86_reg dest,
 			   struct x86_reg arg0 )
 {
-   emit_load4f_1(p, dest, arg0);
+   /* Loading from memory erases the upper bits. */
+   sse_movss(&p->func, dest, arg0);
 }

 static void emit_load2f_2( struct x86_program *p, 
@ -160,7 +161,8 @@ static void emit_load2f_1( struct x86_program *p,
 			   struct x86_reg dest,
 			   struct x86_reg arg0 )
 {
-   emit_load4f_1(p, dest, arg0);
+   /* Loading from memory erases the upper bits. */
+   sse_movss(&p->func, dest, arg0);
 }

 static void emit_load1f_1( struct x86_program *p,