Enable USER_BUFFERS, RADEON_VTXFMT_A and HW_VBOS on big endian by forcing all elts

to 32 bits (it would be possible to do a half word swap for 16 bits if somebody is interested) and by using the input route mecanism to swap ubyte's properly in r300EmitArrays. Tested with cva, varray and quake3
This commit is contained in:
Benjamin Herrenschmidt 2006-04-23 05:54:06 +00:00
parent 2f92adb9e0
commit aa93b4ced0
4 changed files with 101 additions and 43 deletions

View file

@ -384,6 +384,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
resize_u_list(r300);
#endif
#ifdef USER_BUFFERS
for (i = r300->rmm->u_last + 1; i > 0; i--) {
if (r300->rmm->u_list[i].ptr == NULL) {
continue;
@ -417,6 +418,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
}
}
r300->rmm->u_head = i;
#endif /* USER_BUFFERS */
}
/* Destroy the device specific context.

View file

@ -47,11 +47,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "colormac.h"
#include "radeon_context.h"
/* PPC doesnt support 16 bit elts ... */
#ifndef MESA_BIG_ENDIAN
#define USER_BUFFERS
#define RADEON_VTXFMT_A
#define HW_VBOS
/* We don't handle 16 bits elts swapping yet */
#ifdef MESA_BIG_ENDIAN
#define FORCE_32BITS_ELTS
#endif
//#define OPTIMIZE_ELTS

View file

@ -252,6 +252,43 @@ void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_siz
memcpy(out, elts, n_elts * elt_size);
}
/* Mesa assumes that all missing components are from (0, 0, 0, 1) */
#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
static GLuint t_comps(GLuint aos_size)
{
GLuint mask;
mask = (1 << (aos_size*3)) - 1;
return (ALL_COMPONENTS & mask) | (ALL_DEFAULT & ~mask);
}
static GLuint fix_comps(GLuint dw, int fmt)
{
#ifdef MESA_BIG_ENDIAN
if (fmt == 2) {
GLuint dw_temp = 0;
dw_temp |= ((dw >> R300_INPUT_ROUTE_X_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_W_SHIFT;
dw_temp |= ((dw >> R300_INPUT_ROUTE_Y_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Z_SHIFT;
dw_temp |= ((dw >> R300_INPUT_ROUTE_Z_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Y_SHIFT;
dw_temp |= ((dw >> R300_INPUT_ROUTE_W_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_X_SHIFT;
return dw_temp;
}
#endif /* MESA_BIG_ENDIAN */
return dw;
}
/* Emit vertex data to GART memory (unless immediate mode)
* Route inputs to the vertex processor
*/
@ -264,7 +301,7 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd)
//struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
GLuint nr = 0;
GLuint count = VB->Count;
GLuint dw,mask;
GLuint dw;
GLuint vic_1 = 0; /* R300_VAP_INPUT_CNTL_1 */
GLuint aa_vap_reg = 0; /* VAP register assignment */
GLuint i;
@ -469,17 +506,6 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd)
((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = (nr+1)>>1;
/* Mesa assumes that all missing components are from (0, 0, 0, 1) */
#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<<R300_INPUT_ROUTE_X_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_Y<<R300_INPUT_ROUTE_Y_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_Z<<R300_INPUT_ROUTE_Z_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_W<<R300_INPUT_ROUTE_W_SHIFT))
#define ALL_DEFAULT ((R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_X_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Y_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_ZERO<<R300_INPUT_ROUTE_Z_SHIFT) \
| (R300_INPUT_ROUTE_SELECT_ONE<<R300_INPUT_ROUTE_W_SHIFT))
R300_STATECHANGE(r300, vir[1]);
for(i=0; i < nr; i++)
@ -488,33 +514,22 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd)
r300->state.aos[i].aos_size=/*3*/4; /* XXX */
}
for(i=0;i+1<nr;i+=2){
for (i=0;i+1<nr;i+=2) {
/* do i first.. */
mask=(1<<(r300->state.aos[i].aos_size*3))-1;
dw=(ALL_COMPONENTS & mask)
| (ALL_DEFAULT & ~mask)
| R300_INPUT_ROUTE_ENABLE;
dw = fix_comps(t_comps(r300->state.aos[i].aos_size), r300->state.aos[i].aos_format) | R300_INPUT_ROUTE_ENABLE;
/* i+1 */
mask=(1<<(r300->state.aos[i+1].aos_size*3))-1;
dw|=(
(ALL_COMPONENTS & mask)
| (ALL_DEFAULT & ~mask)
| R300_INPUT_ROUTE_ENABLE
)<<16;
dw |= (fix_comps(t_comps(r300->state.aos[i+1].aos_size), r300->state.aos[i+1].aos_format) | R300_INPUT_ROUTE_ENABLE) << 16;
//fprintf(stderr, "vir1 dw=%08x\n", dw);
r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw;
}
if(nr & 1){
mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1;
dw=(ALL_COMPONENTS & mask)
| (ALL_DEFAULT & ~mask)
| R300_INPUT_ROUTE_ENABLE;
r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
}
if (nr & 1) {
dw = fix_comps(t_comps(r300->state.aos[nr-1].aos_size), r300->state.aos[nr-1].aos_format) | R300_INPUT_ROUTE_ENABLE;
//fprintf(stderr, "vir1 dw=%08x\n", dw);
}
r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw;
}
/* Set the rest of INPUT_ROUTE_1 to 0 */
//for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0;
((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = (nr+1)>>1;

View file

@ -216,14 +216,22 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G
max = ((unsigned char *)indices)[i];
}
#ifdef FORCE_32BITS_ELTS
elt_size = 4;
#else
elt_size = 2;
#endif
r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
rvb.aos_offset = GET_START(&rvb);
ptr = rvb.address + rvb.start;
#ifdef FORCE_32BITS_ELTS
for (i=0; i < count; i++)
((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
#else
for (i=0; i < count; i++)
((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
#endif
break;
case GL_UNSIGNED_SHORT:
@ -234,14 +242,23 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G
max = ((unsigned short int *)indices)[i];
}
#ifdef FORCE_32BITS_ELTS
elt_size = 4;
#else
elt_size = 2;
#endif
r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
rvb.aos_offset = GET_START(&rvb);
ptr = rvb.address + rvb.start;
#ifdef FORCE_32BITS_ELTS
for (i=0; i < count; i++)
((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
#else
for (i=0; i < count; i++)
((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
#endif
break;
case GL_UNSIGNED_INT:
@ -252,17 +269,20 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G
max = ((unsigned int *)indices)[i];
}
#ifdef FORCE_32BITS_ELTS
elt_size = 4;
#else
if (max - min <= 65535)
elt_size = 2;
else
elt_size = 4;
#endif
r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
rvb.aos_offset = GET_START(&rvb);
ptr = rvb.address + rvb.start;
if (max - min <= 65535)
if (elt_size == 2)
for (i=0; i < count; i++)
((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min;
else
@ -388,19 +408,30 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
memset(&rvb, 0, sizeof(rvb));
switch (type){
case GL_UNSIGNED_BYTE:
#ifdef FORCE_32BITS_ELTS
elt_size = 4;
#else
elt_size = 2;
#endif
r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
rvb.aos_offset = GET_START(&rvb);
ptr = rvb.address + rvb.start;
#ifdef FORCE_32BITS_ELTS
for(i=0; i < count; i++)
((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
#else
for(i=0; i < count; i++)
((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min;
#endif
break;
case GL_UNSIGNED_SHORT:
#ifdef FORCE_32BITS_ELTS
elt_size = 4;
#else
elt_size = 2;
#endif
#ifdef OPTIMIZE_ELTS
if (min == 0 && ctx->Array.ElementArrayBufferObj->Name){
ptr = indices;
@ -411,21 +442,29 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei
rvb.aos_offset = GET_START(&rvb);
ptr = rvb.address + rvb.start;
#ifdef FORCE_32BITS_ELTS
for(i=0; i < count; i++)
((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
#else
for(i=0; i < count; i++)
((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min;
#endif
break;
case GL_UNSIGNED_INT:
#ifdef FORCE_32BITS_ELTS
elt_size = 4;
#else
if (max - min <= 65535)
elt_size = 2;
else
elt_size = 4;
#endif
r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size);
rvb.aos_offset = GET_START(&rvb);
ptr = rvb.address + rvb.start;
if (max - min <= 65535)
if (elt_size == 2)
for (i=0; i < count; i++)
((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min;
else