Remove the tcl fallback for texture rectangle (by manipulating the texture matrix) (tested with texrect). Enable texgen for r/q coordinates (tested with projtex). Fix projected texcoords when an app uses TexCoord3x and the texture matrix to save on vertex size (fixes ut2k3 shadow projectors in tcl mode). From texgenmix, all cases with all texgen or no texgen work, with the exception of texgen enabled for s/t only, this one works with hw tcl, but not with vtxfmt (suspect issues with vtxfmt), the mixed cases do not work (which is expected, and should be rare in practice), with the exception of the first one which hits a tcl fallback.

2026-04-23 16:00:41 +02:00 · 2005-10-05 11:42:44 +00:00 · 2005-10-05 11:42:44 +00:00 · a3c8de2fa7
commit a3c8de2fa7
parent 06f606ce57
10 changed files with 299 additions and 100 deletions
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@ -405,10 +405,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,

   _math_matrix_ctr( &rmesa->TexGenMatrix[0] );
   _math_matrix_ctr( &rmesa->TexGenMatrix[1] );
-   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_ctr( &rmesa->tmpmat[0] );
+   _math_matrix_ctr( &rmesa->tmpmat[1] );
   _math_matrix_set_identity( &rmesa->TexGenMatrix[0] );
   _math_matrix_set_identity( &rmesa->TexGenMatrix[1] );
-   _math_matrix_set_identity( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->tmpmat[0] );
+   _math_matrix_set_identity( &rmesa->tmpmat[1] );

   driInitExtensions( ctx, card_extensions, GL_TRUE );
   if (rmesa->glCtx->Mesa_DXTn) {
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@ -753,9 +753,10 @@ struct radeon_context {
   GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS];
   GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS];
   GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS];
-   GLuint TexMatEnabled;
   GLuint TexGenEnabled;
-   GLmatrix tmpmat;
+   GLuint NeedTexMatrix;
+   GLuint TexMatColSwap;
+   GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
   GLuint last_ReallyEnabled;

   /* VBI
--- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
@ -387,6 +387,7 @@ static void emit_tex_vector( GLcontext *ctx,

   switch (size) {
   case 4: emitsize = 3; break;
+   case 3: emitsize = 3; break;
   default: emitsize = 2; break;
   }

@ -416,7 +417,7 @@ static void emit_tex_vector( GLcontext *ctx,
      emit_vec8( ctx, rvb, data, stride, count );
      break;
   case 3:
-      emit_vec8( ctx, rvb, data, stride, count );
+      emit_vec12( ctx, rvb, data, stride, count );
      break;
   case 4:
      emit_stq_vec( ctx, rvb, data, stride, count );
@ -529,38 +530,52 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )

   if (inputs & VERT_BIT_TEX0) {
      if (!rmesa->tcl.tex[0].buf)
-	 emit_tex_vector( ctx, 
-			  &(rmesa->tcl.tex[0]), 
+	 emit_tex_vector( ctx,
+			  &(rmesa->tcl.tex[0]),
 			  (char *)VB->TexCoordPtr[0]->data,
 			  VB->TexCoordPtr[0]->size,
 			  VB->TexCoordPtr[0]->stride,
 			  count );

-      switch( VB->TexCoordPtr[0]->size ) {
-      case 4:
-	 vtx |= RADEON_TCL_VTX_Q0; 
+      vfmt |= RADEON_CP_VC_FRMT_ST0;
+      /* assume we need the 3rd coord if texgen is active for r/q OR at least 3
+         coords are submitted. This may not be 100% correct */
+      if ( (VB->TexCoordPtr[0]->size >= 3) {
+	 vtx |= RADEON_TCL_VTX_Q0;
 	 vfmt |= RADEON_CP_VC_FRMT_Q0;
-      default: 
-	 vfmt |= RADEON_CP_VC_FRMT_ST0;
+      }
+      if ( (ctx->Texture.Unit[0].TexGenEnabled & (R_BIT | Q_BIT)) )
+	 vtx |= RADEON_TCL_VTX_Q0;
+      else if (VB->TexCoordPtr[0]->size >= 3) {
+	 GLuint swaptexmatcol = (VB->TexCoordPtr[0]->size - 3);
+	 if ((rmesa->NeedTexMatrix & 1) &&
+		(swaptexmatcol != (rmesa->TexMatColSwap & 1)))
+	    radeonUploadTexMatrix( rmesa, rmesa->tmpmat[0].m, 0, swaptexmatcol ) ;
      }
      component[nr++] = &rmesa->tcl.tex[0];
   }

   if (inputs & VERT_BIT_TEX1) {
      if (!rmesa->tcl.tex[1].buf)
-	 emit_tex_vector( ctx, 
-			  &(rmesa->tcl.tex[1]), 
+	 emit_tex_vector( ctx,
+			  &(rmesa->tcl.tex[1]),
 			  (char *)VB->TexCoordPtr[1]->data,
 			  VB->TexCoordPtr[1]->size,
 			  VB->TexCoordPtr[1]->stride,
 			  count );
 	 
-      switch( VB->TexCoordPtr[1]->size ) {
-      case 4: 
+      vfmt |= RADEON_CP_VC_FRMT_ST1;
+      if ( (VB->TexCoordPtr[1]->size >= 3) {
 	 vtx |= RADEON_TCL_VTX_Q1;
 	 vfmt |= RADEON_CP_VC_FRMT_Q1;
-      default: 
-	 vfmt |= RADEON_CP_VC_FRMT_ST1;
+      }
+      if ( (ctx->Texture.Unit[1].TexGenEnabled & (R_BIT | Q_BIT)) )
+	 vtx |= RADEON_TCL_VTX_Q1;
+      else if (VB->TexCoordPtr[1]->size >= 3) {
+	 GLuint swaptexmatcol = (VB->TexCoordPtr[1]->size - 3);
+	 if (((rmesa->NeedTexMatrix >> 1) & 1) &&
+		(swaptexmatcol != ((rmesa->TexMatColSwap >> 1) & 1)))
+	    radeonUploadTexMatrix( rmesa, rmesa->tmpmat[1].m, 1, swaptexmatcol ) ;
      }
      component[nr++] = &rmesa->tcl.tex[1];
   }
--- a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
@ -47,6 +47,7 @@ static void TAG(emit)( GLcontext *ctx,
   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
   GLuint tc2_stride, norm_stride;
   GLuint fill_tex = 0;
+   GLuint rqcoordsnoswap = 0;
   GLuint (*coord)[4];
   GLuint coord_stride; /* object coordinates */
   GLubyte dummy[4];
@ -65,9 +66,14 @@ static void TAG(emit)( GLcontext *ctx,
 	 const GLuint t2 = GET_TEXSOURCE(2);
 	 tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
 	 tc2_stride = VB->TexCoordPtr[t2]->stride;
-	 if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	 if (DO_PTEX && VB->TexCoordPtr[t2]->size < 3) {
+	 /* since DO_PTEX is only true when we have 3 or more coords
+	    in the first place we don't really need this right? */
 	    fill_tex |= (1<<2);
 	 }
+	 else if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	    rqcoordsnoswap |= (1<<2);
+	 }
      } else {
 	 tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2];
 	 tc2_stride = 0;
@ -79,9 +85,12 @@ static void TAG(emit)( GLcontext *ctx,
 	 const GLuint t1 = GET_TEXSOURCE(1);
 	 tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
 	 tc1_stride = VB->TexCoordPtr[t1]->stride;
-	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
+	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 3) {
 	    fill_tex |= (1<<1);
 	 }
+	 else if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
+	    rqcoordsnoswap |= (1<<1);
+	 }
      } else {
 	 tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1];
 	 tc1_stride = 0;
@ -93,9 +102,12 @@ static void TAG(emit)( GLcontext *ctx,
 	 const GLuint t0 = GET_TEXSOURCE(0);
 	 tc0_stride = VB->TexCoordPtr[t0]->stride;
 	 tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
-	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
+	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 3) {
 	    fill_tex |= (1<<0);
 	 }
+	 else if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
+	    rqcoordsnoswap |= (1<<0);
+	 }
      } else {
 	 tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0];
 	 tc0_stride = 0;
@ -213,6 +225,8 @@ static void TAG(emit)( GLcontext *ctx,
 	    if (DO_PTEX) {
 	       if (fill_tex & (1<<0))
 		  v[2].f = 1.0;
+	       else if (rqcoordsnoswap & (1<<0))
+		  v[2].ui = tc0[0][2];
 	       else
 		  v[2].ui = tc0[0][3];
 	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
@ -229,6 +243,8 @@ static void TAG(emit)( GLcontext *ctx,
 	    if (DO_PTEX) {
 	       if (fill_tex & (1<<1))
 		  v[2].f = 1.0;
+	       else if (rqcoordsnoswap & (1<<1))
+		  v[2].ui = tc1[0][2];
 	       else
 		  v[2].ui = tc1[0][3];
 	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
@ -244,6 +260,8 @@ static void TAG(emit)( GLcontext *ctx,
 	    if (DO_PTEX) {
 	       if (fill_tex & (1<<2))
 		  v[2].f = 1.0;
+	       else if (rqcoordsnoswap & (1<<2))
+		  v[2].ui = tc2[0][2];
 	       else
 		  v[2].ui = tc2[0][3];
 	       v += 3;
--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
@ -243,7 +243,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
      init_tcl_verts();
      firsttime = 0;
   }
-		     
+
   if (1) {
      req |= RADEON_CP_VC_FRMT_Z;
      if (VB->ObjPtr->size == 4) {
@ -254,7 +254,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
   if (inputs & VERT_BIT_NORMAL) {
      req |= RADEON_CP_VC_FRMT_N0;
   }
-   
+
   if (inputs & VERT_BIT_COLOR0) {
      req |= RADEON_CP_VC_FRMT_PKCOLOR;
   }
@ -265,20 +265,38 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )

   if (inputs & VERT_BIT_TEX0) {
      req |= RADEON_CP_VC_FRMT_ST0;
-
-      if (VB->TexCoordPtr[0]->size == 4) {
+      /* assume we need the 3rd coord if texgen is active for r/q OR at least 3
+         coords are submitted. This may not be 100% correct */
+      if (VB->TexCoordPtr[0]->size >= 3) {
 	 req |= RADEON_CP_VC_FRMT_Q0;
 	 vtx |= RADEON_TCL_VTX_Q0;
      }
+      if ( (ctx->Texture.Unit[0].TexGenEnabled & (R_BIT | Q_BIT)) )
+	 vtx |= RADEON_TCL_VTX_Q0;
+      else if (VB->TexCoordPtr[0]->size >= 3) {
+	 GLuint swaptexmatcol = (VB->TexCoordPtr[0]->size - 3);
+	 if ((rmesa->NeedTexMatrix & 1) &&
+		(swaptexmatcol != (rmesa->TexMatColSwap & 1)))
+	    radeonUploadTexMatrix( rmesa, rmesa->tmpmat[0].m, 0, swaptexmatcol ) ;
+      }
   }

+
   if (inputs & VERT_BIT_TEX1) {
      req |= RADEON_CP_VC_FRMT_ST1;

-      if (VB->TexCoordPtr[1]->size == 4) {
+      if (VB->TexCoordPtr[1]->size >= 3) {
 	 req |= RADEON_CP_VC_FRMT_Q1;
 	 vtx |= RADEON_TCL_VTX_Q1;
      }
+      if ( (ctx->Texture.Unit[1].TexGenEnabled & (R_BIT | Q_BIT)) )
+	 vtx |= RADEON_TCL_VTX_Q1;
+      else if (VB->TexCoordPtr[1]->size >= 3) {
+	 GLuint swaptexmatcol = (VB->TexCoordPtr[1]->size - 3);
+	 if (((rmesa->NeedTexMatrix >> 1) & 1) &&
+		(swaptexmatcol != ((rmesa->TexMatColSwap >> 1) & 1)))
+	    radeonUploadTexMatrix( rmesa, rmesa->tmpmat[1].m, 1, swaptexmatcol ) ;
+      }
   }

   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@ -2024,7 +2024,105 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
 * Deferred state management - matrices, textures, other?
 */

+static void texmat_set_texrect( radeonContextPtr rmesa,
+				struct gl_texture_object *tObj, GLuint unit )
+{
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   _math_matrix_set_identity( &rmesa->tmpmat[unit] );
+   rmesa->tmpmat[unit].m[0] = 1.0 / baseImage->Width;
+   rmesa->tmpmat[unit].m[5] = 1.0 / baseImage->Height;

+}
+
+static void texmat_fixup_texrect( radeonContextPtr rmesa,
+				  struct gl_texture_object *tObj, GLuint unit )
+{
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   GLuint i;
+   for (i = 0; i < 4; i++) {
+      rmesa->tmpmat[unit].m[i] = rmesa->tmpmat[unit].m[i] / baseImage->Width;
+      rmesa->tmpmat[unit].m[i+4] = rmesa->tmpmat[unit].m[i+4] / baseImage->Height;
+   }}
+
+
+void radeonUploadTexMatrix( radeonContextPtr rmesa, GLfloat *src,
+			    int unit, GLboolean swapcols )
+{
+/* Here's how this works: on r100, only 3 tex coords can be submitted, so the
+   vector looks like this probably: (s t r|q 0) (not sure if the last coord
+   is hardwired to 0, could be 1 too). Interestingly, it actually looks like
+   texgen generates all 4 coords, at least tests with projtex indicated that.
+   So: if we need the q coord in the end (solely determined by the texture
+   target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row.
+   Additionally, if we don't have texgen but 4 tex coords submitted, we swap
+   column 3 and 4 (for the 2d / 1d / texrect targets) since the the q coord
+   will get submitted in the "wrong", i.e. 3rd, slot.
+   If an app submits 3 coords for 2d targets, we assume it is saving on vertex
+   size and using the texture matrix to swap the r and q coords around (ut2k3
+   does exactly that), so we don't need the 3rd / 4th column swap - still need
+   the 3rd / 4th row swap of course. This will potentially break for apps which
+   use TexCoord3x just for fun. Additionally, it will never work if an app uses
+   an "advanced" texture matrix and relies on all 4 texcoord inputs to generate
+   the maximum needed 3. This seems impossible to do with hw tcl on r100, and
+   incredibly hard to detect so we can't just fallback in such a case. Assume
+   it never happens... - rs
+*/
+
+   int idx = TEXMAT_0 + unit;
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
+   int i;
+   struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
+
+   rmesa->TexMatColSwap &= ~(1 << unit);
+   if ((tUnit._ReallyEnabled & (TEXTURE_3D_BIT | TEXTURE_CUBE_BIT)) == 0) {
+      if (swapcols) {
+	 rmesa->TexMatColSwap |= 1 << unit;
+	 /* attention some elems are swapped 2 times! */
+	 *dest++ = src[0];
+	 *dest++ = src[4];
+	 *dest++ = src[12];
+	 *dest++ = src[8];
+	 *dest++ = src[1];
+	 *dest++ = src[5];
+	 *dest++ = src[13];
+	 *dest++ = src[9];
+	 *dest++ = src[2];
+	 *dest++ = src[6];
+	 *dest++ = src[15];
+	 *dest++ = src[11];
+	 /* those last 4 are probably never used */
+	 *dest++ = src[3];
+	 *dest++ = src[7];
+	 *dest++ = src[14];
+	 *dest++ = src[10];
+      }
+      else {
+	 for (i = 0; i < 2; i++) {
+	    *dest++ = src[i];
+	    *dest++ = src[i+4];
+	    *dest++ = src[i+8];
+	    *dest++ = src[i+12];
+	 }
+	 for (i = 3; i >= 2; i--) {
+	    *dest++ = src[i];
+	    *dest++ = src[i+4];
+	    *dest++ = src[i+8];
+	    *dest++ = src[i+12];
+	 }
+      }
+   }
+   else {
+      /* never used currently - no swapping needed at all presumably */
+      for (i = 0 ; i < 4 ; i++) {
+	 *dest++ = src[i];
+	 *dest++ = src[i+4];
+	 *dest++ = src[i+8];
+	 *dest++ = src[i+12];
+      }
+   }
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}


 static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
@ -2057,42 +2155,53 @@ static void update_texturematrix( GLcontext *ctx )
   GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
   GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
   int unit;
-
-   rmesa->TexMatEnabled = 0;
+   GLuint texMatEnabled = 0;
+   rmesa->NeedTexMatrix = 0;
+   rmesa->TexMatColSwap = 0;

   for (unit = 0 ; unit < 2; unit++) {
-      if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
-      }
-      else if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
-	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-	 
-	 rmesa->TexMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE|
-				  RADEON_TEXMAT_0_ENABLE) << unit;
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+	 GLboolean needMatrix = GL_FALSE;
+	 if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+	    needMatrix = GL_TRUE;
+	    texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE |
+			      RADEON_TEXMAT_0_ENABLE) << unit;

-	 if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
-	    /* Need to preconcatenate any active texgen 
-	     * obj/eyeplane matrices:
-	     */
-	    _math_matrix_mul_matrix( &rmesa->tmpmat,
+	    if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	       /* Need to preconcatenate any active texgen
+	        * obj/eyeplane matrices:
+	        */
+	       _math_matrix_mul_matrix( &rmesa->tmpmat[unit],
 				     ctx->TextureMatrixStack[unit].Top,
 				     &rmesa->TexGenMatrix[unit] );
-	    upload_matrix( rmesa, rmesa->tmpmat.m, TEXMAT_0+unit );
+	    }
+	    else {
+	       _math_matrix_copy( &rmesa->tmpmat[unit],
+		  ctx->TextureMatrixStack[unit].Top );
+	    }
 	 }
-	 else {
-	    rmesa->TexMatEnabled |= 
-	       (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-	    upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, 
-			   TEXMAT_0+unit );
+	 else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	    _math_matrix_copy( &rmesa->tmpmat[unit], &rmesa->TexGenMatrix[unit] );
+	    needMatrix = GL_TRUE;
+	 }
+	 if (ctx->Texture.Unit[unit]._ReallyEnabled == TEXTURE_RECT_BIT) {
+	    texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE |
+			      RADEON_TEXMAT_0_ENABLE) << unit;
+	    if (needMatrix)
+	       texmat_fixup_texrect( rmesa, ctx->Texture.Unit[unit]._Current, unit );
+	    else
+	       texmat_set_texrect( rmesa, ctx->Texture.Unit[unit]._Current, unit );
+	    needMatrix = GL_TRUE;
+	 }
+	 if (needMatrix) {
+	    rmesa->NeedTexMatrix |= 1 << unit;
+	    radeonUploadTexMatrix( rmesa, rmesa->tmpmat[unit].m, unit,
+			!ctx->Texture.Unit[unit].TexGenEnabled );
 	 }
-      }
-      else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
-	 upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
-			TEXMAT_0+unit );
      }
   }

-
-   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+   tpc = (texMatEnabled | rmesa->TexGenEnabled);

   vs &= ~((0xf << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
 	   (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT));
@ -2109,7 +2218,7 @@ static void update_texturematrix( GLcontext *ctx )

   if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
       vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
-      
+
      RADEON_STATECHANGE(rmesa, tcl);
      rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
@ -2188,7 +2297,7 @@ void radeonValidateState( GLcontext *ctx )
    */
   if (new_state & _NEW_TEXTURE_MATRIX) {
      update_texturematrix( ctx );
-   }      
+   }

   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
      update_light( ctx );
--- a/src/mesa/drivers/dri/radeon/radeon_state.h
+++ b/src/mesa/drivers/dri/radeon/radeon_state.h
@ -49,7 +49,9 @@ extern void radeonSetCliprects( radeonContextPtr rmesa );
 extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
 extern void radeonUpdateViewportOffset( GLcontext *ctx );
 extern void radeonUpdateWindow( GLcontext *ctx );
-extern void radeonUpdateDrawBuffer(GLcontext *ctx);
+extern void radeonUpdateDrawBuffer( GLcontext *ctx );
+extern void radeonUploadTexMatrix( radeonContextPtr rmesa, GLfloat *src,
+				       int unit, GLboolean swapcols );

 extern void radeonValidateState( GLcontext *ctx );

--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@ -320,6 +320,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,

   for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+      /* TODO: probably should not emit texture coords when texgen is enabled */
 	 if (rmesa->TexGenNeedNormals[i]) {
 	    inputs |= VERT_BIT_NORMAL;
 	 }
@ -444,10 +445,7 @@ static char *fallbackStrings[] = {
   "Texgen unit 0",
   "Texgen unit 1",
   "Texgen unit 2",
-   "User disable",
-   "texture rectangle unit 0",
-   "texture rectangle unit 1",
-   "texture rectangle unit 2"
+   "User disable"
 };


--- a/src/mesa/drivers/dri/radeon/radeon_tcl.h
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.h
@ -55,9 +55,6 @@ extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
 #define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
 #define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
 #define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
-#define RADEON_TCL_FALLBACK_TEXRECT_0         0x100 /* texture rectangle */
-#define RADEON_TCL_FALLBACK_TEXRECT_1         0x200 /* texture rectangle */
-#define RADEON_TCL_FALLBACK_TEXRECT_2         0x400 /* texture rectangle */

 #define RADEON_MAX_TCL_VERTSIZE (15*4)

--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@ -834,7 +834,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
 static void set_texgen_matrix( radeonContextPtr rmesa, 
 			       GLuint unit,
 			       const GLfloat *s_plane,
-			       const GLfloat *t_plane )
+			       const GLfloat *t_plane,
+			       const GLfloat *r_plane,
+			       const GLfloat *q_plane )
 {
   rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
   rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
@ -846,78 +848,119 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
   rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
   rmesa->TexGenMatrix[unit].m[13] = t_plane[3];

+   rmesa->TexGenMatrix[unit].m[2]  = r_plane[0];
+   rmesa->TexGenMatrix[unit].m[6]  = r_plane[1];
+   rmesa->TexGenMatrix[unit].m[10] = r_plane[2];
+   rmesa->TexGenMatrix[unit].m[14] = r_plane[3];
+
+   rmesa->TexGenMatrix[unit].m[3]  = q_plane[0];
+   rmesa->TexGenMatrix[unit].m[7]  = q_plane[1];
+   rmesa->TexGenMatrix[unit].m[11] = q_plane[2];
+   rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
+
   rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
   rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
 }

-/* Ignoring the Q texcoord for now.
- *
- * Returns GL_FALSE if fallback required.  
+/* Returns GL_FALSE if fallback required.
 */
 static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
-{  
+{
   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
   GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
   GLuint tmp = rmesa->TexGenEnabled;
+   static const GLfloat reflect[16] = {
+      -1,  0,  0,  0,
+       0, -1,  0,  0,
+       0,  0,  -1, 0,
+       0,  0,  0,  1 };

-   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
-   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
-   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE << unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE << unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK << inputshift);
   rmesa->TexGenNeedNormals[unit] = 0;

-   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) == 0) {
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT|Q_BIT)) == 0) {
      /* Disabled, no fallback:
       */
-      rmesa->TexGenEnabled |= 
-	 (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+      rmesa->TexGenEnabled |=
+	 (RADEON_TEXGEN_INPUT_TEXCOORD_0 + unit) << inputshift;
      return GL_TRUE;
   }
-   else if (texUnit->TexGenEnabled & Q_BIT) {
-      /* Very easy to do this, in fact would remove a fallback case
-       * elsewhere, but I haven't done it yet...  Fallback: 
-       */
-      if (RADEON_DEBUG & DEBUG_FALLBACKS) 
-	fprintf(stderr, "fallback Q_BIT\n");
-      return GL_FALSE;
-   }
-   else if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) != (S_BIT|T_BIT) ||
-	    texUnit->GenModeS != texUnit->GenModeT) {
-      /* Mixed modes, fallback:
-       */
-      if (RADEON_DEBUG & DEBUG_FALLBACKS) 
-        fprintf(stderr, "fallback mixed texgen\n");
-      return GL_FALSE;
-   }
-   else
+   /* the r100 cannot do texgen for some coords and not for others
+    * we do not detect such cases (certainly can't do it here) and just
+    * ASSUME that when S and T are texgen enabled we do not need other
+    * non-texgen enabled coords, no matter if the R and Q bits are texgen
+    * enabled. Still check for mixed mode texgen for all coords.
+    */
+   else if ( (texUnit->TexGenEnabled & S_BIT) &&
+	     (texUnit->TexGenEnabled & T_BIT) &&
+	     (texUnit->GenModeS == texUnit->GenModeT) ) {
+      if ( ((texUnit->TexGenEnabled & R_BIT) &&
+	    (texUnit->GenModeS != texUnit->GenModeR)) ||
+	   ((texUnit->TexGenEnabled & Q_BIT) &&
+	    (texUnit->GenModeS != texUnit->GenModeQ)) ) {
+	 /* Mixed modes, fallback:
+	  */
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS)
+	    fprintf(stderr, "fallback mixed texgen\n");
+	 return GL_FALSE;
+      }
      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+   }
+   else {
+   /* some texgen mode not including both S and T bits */
+      if (RADEON_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "fallback mixed texgen/nontexgen\n");
+      return GL_FALSE;
+   }
+
+   if ((texUnit->TexGenEnabled & (R_BIT | Q_BIT)) != 0) {
+      /* need this here for vtxfmt presumably. Argh we need to set
+         this from way too many places, would be much easier if we could leave
+         tcl q coord always enabled as on r200) */
+      RADEON_STATECHANGE( rmesa, tcl );
+      if (unit == 0)
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_Q0;
+      else
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_Q1;
+   }

   switch (texUnit->GenModeS) {
   case GL_OBJECT_LINEAR:
      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
-      set_texgen_matrix( rmesa, unit, 
+      set_texgen_matrix( rmesa, unit,
 			 texUnit->ObjectPlaneS,
-			 texUnit->ObjectPlaneT);
+			 texUnit->ObjectPlaneT,
+			 texUnit->ObjectPlaneR,
+			 texUnit->ObjectPlaneQ);
      break;

   case GL_EYE_LINEAR:
      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
-      set_texgen_matrix( rmesa, unit, 
+      set_texgen_matrix( rmesa, unit,
 			 texUnit->EyePlaneS,
-			 texUnit->EyePlaneT);
+			 texUnit->EyePlaneT,
+			 texUnit->EyePlaneR,
+			 texUnit->EyePlaneQ);
      break;

   case GL_REFLECTION_MAP_NV:
      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
-      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT << inputshift;
+      /* TODO: unknown if this is needed/correct */
+      set_texgen_matrix( rmesa, unit, reflect, reflect + 4,
+			reflect + 8, reflect + 12 );
      break;

   case GL_NORMAL_MAP_NV:
      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
-      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL << inputshift;
      break;

   case GL_SPHERE_MAP:
+      /* the mode which everyone uses :-( */
   default:
      /* Unsupported mode, fallback:
       */
@ -1131,11 +1174,7 @@ static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
 {
   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];

-   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_TEXRECT_0 << unit, 0 );
-
   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
-      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_TEXRECT_0 << unit, 1 );
-
      return (enable_tex_rect( ctx, unit ) &&
 	      update_tex_common( ctx, unit ));
   }