mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-15 19:30:38 +02:00
r200r300: start merging span code
This commit is contained in:
parent
0788e42471
commit
eda2284961
5 changed files with 241 additions and 590 deletions
|
|
@ -52,21 +52,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
* information.
|
||||
*/
|
||||
#define LOCAL_VARS \
|
||||
driRenderbuffer *drb = (driRenderbuffer *) rb; \
|
||||
const __DRIdrawablePrivate *dPriv = drb->dPriv; \
|
||||
struct radeon_renderbuffer *rrb = (void *) rb; \
|
||||
const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
|
||||
const GLuint bottom = dPriv->h - 1; \
|
||||
GLubyte *buf = (GLubyte *) drb->flippedData \
|
||||
+ (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
|
||||
GLuint p; \
|
||||
(void) p;
|
||||
|
||||
#define LOCAL_DEPTH_VARS \
|
||||
driRenderbuffer *drb = (driRenderbuffer *) rb; \
|
||||
const __DRIdrawablePrivate *dPriv = drb->dPriv; \
|
||||
struct radeon_renderbuffer *rrb = (void *) rb; \
|
||||
const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
|
||||
const GLuint bottom = dPriv->h - 1; \
|
||||
GLuint xo = dPriv->x; \
|
||||
GLuint yo = dPriv->y; \
|
||||
GLubyte *buf = (GLubyte *) drb->Base.Data;
|
||||
GLuint yo = dPriv->y;
|
||||
|
||||
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
|
||||
|
||||
|
|
@ -89,7 +86,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
#define TAG(x) radeon##x##_RGB565
|
||||
#define TAG2(x,y) radeon##x##_RGB565##y
|
||||
#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
|
||||
#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
|
||||
#include "spantmp2.h"
|
||||
|
||||
/* 32 bit, ARGB8888 color spanline and pixel functions
|
||||
|
|
@ -99,7 +96,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
#define TAG(x) radeon##x##_ARGB8888
|
||||
#define TAG2(x,y) radeon##x##_ARGB8888##y
|
||||
#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
|
||||
#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
|
||||
#include "spantmp2.h"
|
||||
|
||||
|
||||
|
|
@ -116,70 +113,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
* are set up correctly. It is not quite enough to get it working with hyperz too...
|
||||
*/
|
||||
|
||||
/* extract bit 'b' of x, result is zero or one */
|
||||
#define BIT(x,b) ((x & (1<<b))>>b)
|
||||
|
||||
static GLuint
|
||||
r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
|
||||
{
|
||||
GLuint pitch = drb->pitch;
|
||||
if (drb->depthHasSurface) {
|
||||
return 4 * (x + y * pitch);
|
||||
}
|
||||
else {
|
||||
GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
|
||||
GLuint a =
|
||||
(BIT(x,0) << 2) |
|
||||
(BIT(y,0) << 3) |
|
||||
(BIT(x,1) << 4) |
|
||||
(BIT(y,1) << 5) |
|
||||
(BIT(x,3) << 6) |
|
||||
(BIT(x,4) << 7) |
|
||||
(BIT(x,2) << 8) |
|
||||
(BIT(y,2) << 9) |
|
||||
(BIT(y,3) << 10) |
|
||||
(((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
|
||||
((b >> 1) << 12);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
static GLuint
|
||||
r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
|
||||
{
|
||||
GLuint pitch = drb->pitch;
|
||||
if (drb->depthHasSurface) {
|
||||
return 2 * (x + y * pitch);
|
||||
}
|
||||
else {
|
||||
GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
|
||||
GLuint a =
|
||||
(BIT(x,0) << 1) |
|
||||
(BIT(y,0) << 2) |
|
||||
(BIT(x,1) << 3) |
|
||||
(BIT(y,1) << 4) |
|
||||
(BIT(x,2) << 5) |
|
||||
(BIT(x,4) << 6) |
|
||||
(BIT(x,5) << 7) |
|
||||
(BIT(x,3) << 8) |
|
||||
(BIT(y,2) << 9) |
|
||||
(BIT(y,3) << 10) |
|
||||
(((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
|
||||
((b >> 1) << 12);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* 16-bit depth buffer functions
|
||||
*/
|
||||
#define VALUE_TYPE GLushort
|
||||
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
*(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
|
||||
*(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo) = d
|
||||
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
|
||||
d = *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo)
|
||||
|
||||
#define TAG(x) radeon##x##_z16
|
||||
#include "depthtmp.h"
|
||||
|
|
@ -191,16 +133,17 @@ r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
|
|||
|
||||
#define WRITE_DEPTH( _x, _y, d ) \
|
||||
do { \
|
||||
GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
|
||||
GLuint tmp = *(GLuint *)(buf + offset); \
|
||||
GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
|
||||
GLuint tmp = *_ptr; \
|
||||
tmp &= 0xff000000; \
|
||||
tmp |= ((d) & 0x00ffffff); \
|
||||
*(GLuint *)(buf + offset) = tmp; \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define READ_DEPTH( d, _x, _y ) \
|
||||
d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo, \
|
||||
_y + yo )) & 0x00ffffff;
|
||||
do { \
|
||||
d = (*(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo)) & 0x00ffffff); \
|
||||
}while(0)
|
||||
|
||||
#define TAG(x) radeon##x##_z24_s8
|
||||
#include "depthtmp.h"
|
||||
|
|
@ -214,17 +157,17 @@ do { \
|
|||
*/
|
||||
#define WRITE_STENCIL( _x, _y, d ) \
|
||||
do { \
|
||||
GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
|
||||
GLuint tmp = *(GLuint *)(buf + offset); \
|
||||
GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
|
||||
GLuint tmp = *_ptr; \
|
||||
tmp &= 0x00ffffff; \
|
||||
tmp |= (((d) & 0xff) << 24); \
|
||||
*(GLuint *)(buf + offset) = tmp; \
|
||||
*_ptr = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define READ_STENCIL( d, _x, _y ) \
|
||||
do { \
|
||||
GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
|
||||
GLuint tmp = *(GLuint *)(buf + offset); \
|
||||
GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
|
||||
GLuint tmp = *_ptr; \
|
||||
tmp &= 0xff000000; \
|
||||
d = tmp >> 24; \
|
||||
} while (0)
|
||||
|
|
@ -233,51 +176,11 @@ do { \
|
|||
#include "stenciltmp.h"
|
||||
|
||||
|
||||
/* Move locking out to get reasonable span performance (10x better
|
||||
* than doing this in HW_LOCK above). WaitForIdle() is the main
|
||||
* culprit.
|
||||
*/
|
||||
|
||||
static void r200SpanRenderStart( GLcontext *ctx )
|
||||
{
|
||||
r200ContextPtr rmesa = R200_CONTEXT( ctx );
|
||||
|
||||
R200_FIREVERTICES( rmesa );
|
||||
LOCK_HARDWARE( &rmesa->radeon );
|
||||
radeonWaitForIdleLocked( &rmesa->radeon );
|
||||
|
||||
/* Read & rewrite the first pixel in the frame buffer. This should
|
||||
* be a noop, right? In fact without this conform fails as reading
|
||||
* from the framebuffer sometimes produces old results -- the
|
||||
* on-card read cache gets mixed up and doesn't notice that the
|
||||
* framebuffer has been updated.
|
||||
*
|
||||
* In the worst case this is buggy too as p might get the wrong
|
||||
* value first time, so really need a hidden pixel somewhere for this.
|
||||
*/
|
||||
{
|
||||
int p;
|
||||
driRenderbuffer *drb =
|
||||
(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
|
||||
volatile int *buf =
|
||||
(volatile int *)(rmesa->radeon.dri.screen->pFB + drb->offset);
|
||||
p = *buf;
|
||||
*buf = p;
|
||||
}
|
||||
}
|
||||
|
||||
static void r200SpanRenderFinish( GLcontext *ctx )
|
||||
{
|
||||
r200ContextPtr rmesa = R200_CONTEXT( ctx );
|
||||
_swrast_flush( ctx );
|
||||
UNLOCK_HARDWARE( &rmesa->radeon );
|
||||
}
|
||||
|
||||
void r200InitSpanFuncs( GLcontext *ctx )
|
||||
{
|
||||
struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
|
||||
swdd->SpanRenderStart = r200SpanRenderStart;
|
||||
swdd->SpanRenderFinish = r200SpanRenderFinish;
|
||||
swdd->SpanRenderStart = radeonSpanRenderStart;
|
||||
swdd->SpanRenderFinish = radeonSpanRenderFinish;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -141,259 +141,6 @@ static const struct tx_table tx_table_le[] =
|
|||
#undef _ALPHA
|
||||
#undef _INVALID
|
||||
|
||||
#if 0
|
||||
|
||||
/**
|
||||
* This function computes the number of bytes of storage needed for
|
||||
* the given texture object (all mipmap levels, all cube faces).
|
||||
* The \c image[face][level].x/y/width/height parameters for upload/blitting
|
||||
* are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here
|
||||
* too.
|
||||
*
|
||||
* \param rmesa Context pointer
|
||||
* \param tObj GL texture object whose images are to be posted to
|
||||
* hardware state.
|
||||
*/
|
||||
static void r200SetTexImages( r200ContextPtr rmesa,
|
||||
struct gl_texture_object *tObj )
|
||||
{
|
||||
radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
|
||||
const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
|
||||
GLint curOffset, blitWidth;
|
||||
GLint i, texelBytes;
|
||||
GLint numLevels;
|
||||
GLint log2Width, log2Height, log2Depth;
|
||||
|
||||
/* Set the hardware texture format
|
||||
*/
|
||||
if ( !t->image_override ) {
|
||||
if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
|
||||
const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
|
||||
tx_table_be;
|
||||
|
||||
t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
|
||||
R200_TXFORMAT_ALPHA_IN_MAP);
|
||||
t->pp_txfilter &= ~R200_YUV_TO_RGB;
|
||||
|
||||
t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
|
||||
t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
|
||||
}
|
||||
else {
|
||||
_mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Compute which mipmap levels we really want to send to the hardware.
|
||||
*/
|
||||
|
||||
driCalculateTextureFirstLastLevel( (driTextureObject *) t );
|
||||
log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
|
||||
log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
|
||||
log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
|
||||
|
||||
numLevels = t->base.lastLevel - t->base.firstLevel + 1;
|
||||
|
||||
assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
|
||||
|
||||
/* Calculate mipmap offsets and dimensions for blitting (uploading)
|
||||
* The idea is that we lay out the mipmap levels within a block of
|
||||
* memory organized as a rectangle of width BLIT_WIDTH_BYTES.
|
||||
*/
|
||||
curOffset = 0;
|
||||
blitWidth = BLIT_WIDTH_BYTES;
|
||||
t->tile_bits = 0;
|
||||
|
||||
/* figure out if this texture is suitable for tiling. */
|
||||
if (texelBytes) {
|
||||
if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
|
||||
/* texrect might be able to use micro tiling too in theory? */
|
||||
(baseImage->Height > 1)) {
|
||||
/* allow 32 (bytes) x 1 mip (which will use two times the space
|
||||
the non-tiled version would use) max if base texture is large enough */
|
||||
if ((numLevels == 1) ||
|
||||
(((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
|
||||
(baseImage->Width * texelBytes > 64)) ||
|
||||
((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
|
||||
t->tile_bits |= R200_TXO_MICRO_TILE;
|
||||
}
|
||||
}
|
||||
if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
|
||||
/* we can set macro tiling even for small textures, they will be untiled anyway */
|
||||
t->tile_bits |= R200_TXO_MACRO_TILE;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < numLevels; i++) {
|
||||
const struct gl_texture_image *texImage;
|
||||
GLuint size;
|
||||
|
||||
texImage = tObj->Image[0][i + t->base.firstLevel];
|
||||
if ( !texImage )
|
||||
break;
|
||||
|
||||
/* find image size in bytes */
|
||||
if (texImage->IsCompressed) {
|
||||
/* need to calculate the size AFTER padding even though the texture is
|
||||
submitted without padding.
|
||||
Only handle pot textures currently - don't know if npot is even possible,
|
||||
size calculation would certainly need (trivial) adjustments.
|
||||
Align (and later pad) to 32byte, not sure what that 64byte blit width is
|
||||
good for? */
|
||||
if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
|
||||
/* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
|
||||
if ((texImage->Width + 3) < 8) /* width one block */
|
||||
size = texImage->CompressedSize * 4;
|
||||
else if ((texImage->Width + 3) < 16)
|
||||
size = texImage->CompressedSize * 2;
|
||||
else size = texImage->CompressedSize;
|
||||
}
|
||||
else /* DXT3/5, 16 bytes per block */
|
||||
if ((texImage->Width + 3) < 8)
|
||||
size = texImage->CompressedSize * 2;
|
||||
else size = texImage->CompressedSize;
|
||||
}
|
||||
else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
|
||||
size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
|
||||
}
|
||||
else if (t->tile_bits & R200_TXO_MICRO_TILE) {
|
||||
/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
|
||||
though the actual offset may be different (if texture is less than
|
||||
32 bytes width) to the untiled case */
|
||||
int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
|
||||
size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
|
||||
blitWidth = MAX2(texImage->Width, 64 / texelBytes);
|
||||
}
|
||||
else {
|
||||
int w = (texImage->Width * texelBytes + 31) & ~31;
|
||||
size = w * texImage->Height * texImage->Depth;
|
||||
blitWidth = MAX2(texImage->Width, 64 / texelBytes);
|
||||
}
|
||||
assert(size > 0);
|
||||
|
||||
/* Align to 32-byte offset. It is faster to do this unconditionally
|
||||
* (no branch penalty).
|
||||
*/
|
||||
|
||||
curOffset = (curOffset + 0x1f) & ~0x1f;
|
||||
|
||||
if (texelBytes) {
|
||||
t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
|
||||
t->image[0][i].y = 0;
|
||||
t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
|
||||
t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
|
||||
}
|
||||
else {
|
||||
t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
|
||||
t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
|
||||
t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
|
||||
t->image[0][i].height = size / t->image[0][i].width;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* for debugging only and only applicable to non-rectangle targets */
|
||||
assert(size % t->image[0][i].width == 0);
|
||||
assert(t->image[0][i].x == 0
|
||||
|| (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
|
||||
#endif
|
||||
|
||||
if (0)
|
||||
fprintf(stderr,
|
||||
"level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
|
||||
i, texImage->Width, texImage->Height,
|
||||
t->image[0][i].x, t->image[0][i].y,
|
||||
t->image[0][i].width, t->image[0][i].height, size, curOffset);
|
||||
|
||||
curOffset += size;
|
||||
|
||||
}
|
||||
|
||||
/* Align the total size of texture memory block.
|
||||
*/
|
||||
t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
|
||||
|
||||
/* Setup remaining cube face blits, if needed */
|
||||
if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
|
||||
const GLuint faceSize = t->base.totalSize;
|
||||
GLuint face;
|
||||
/* reuse face 0 x/y/width/height - just update the offset when uploading */
|
||||
for (face = 1; face < 6; face++) {
|
||||
for (i = 0; i < numLevels; i++) {
|
||||
t->image[face][i].x = t->image[0][i].x;
|
||||
t->image[face][i].y = t->image[0][i].y;
|
||||
t->image[face][i].width = t->image[0][i].width;
|
||||
t->image[face][i].height = t->image[0][i].height;
|
||||
}
|
||||
}
|
||||
t->base.totalSize = 6 * faceSize; /* total texmem needed */
|
||||
}
|
||||
|
||||
|
||||
/* Hardware state:
|
||||
*/
|
||||
t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
|
||||
t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
|
||||
|
||||
t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
|
||||
R200_TXFORMAT_HEIGHT_MASK |
|
||||
R200_TXFORMAT_CUBIC_MAP_ENABLE |
|
||||
R200_TXFORMAT_F5_WIDTH_MASK |
|
||||
R200_TXFORMAT_F5_HEIGHT_MASK);
|
||||
t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
|
||||
(log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
|
||||
|
||||
t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
|
||||
if (tObj->Target == GL_TEXTURE_3D) {
|
||||
t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
|
||||
t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
|
||||
}
|
||||
else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
|
||||
ASSERT(log2Width == log2Height);
|
||||
t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
|
||||
(log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
|
||||
/* don't think we need this bit, if it exists at all - fglrx does not set it */
|
||||
(R200_TXFORMAT_CUBIC_MAP_ENABLE));
|
||||
t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
|
||||
t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
|
||||
(log2Height << R200_FACE_HEIGHT_1_SHIFT) |
|
||||
(log2Width << R200_FACE_WIDTH_2_SHIFT) |
|
||||
(log2Height << R200_FACE_HEIGHT_2_SHIFT) |
|
||||
(log2Width << R200_FACE_WIDTH_3_SHIFT) |
|
||||
(log2Height << R200_FACE_HEIGHT_3_SHIFT) |
|
||||
(log2Width << R200_FACE_WIDTH_4_SHIFT) |
|
||||
(log2Height << R200_FACE_HEIGHT_4_SHIFT));
|
||||
}
|
||||
else {
|
||||
/* If we don't in fact send enough texture coordinates, q will be 1,
|
||||
* making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
|
||||
*/
|
||||
t->pp_txformat_x |= R200_TEXCOORD_PROJ;
|
||||
}
|
||||
|
||||
t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
|
||||
((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
|
||||
|
||||
/* Only need to round to nearest 32 for textures, but the blitter
|
||||
* requires 64-byte aligned pitches, and we may/may not need the
|
||||
* blitter. NPOT only!
|
||||
*/
|
||||
if ( !t->image_override ) {
|
||||
if (baseImage->IsCompressed)
|
||||
t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
|
||||
else
|
||||
t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
|
||||
t->pp_txpitch -= 32;
|
||||
}
|
||||
|
||||
t->dirty_state = R200_TEX_ALL;
|
||||
|
||||
/* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* ================================================================
|
||||
* Texture combine functions
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -79,133 +79,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
#define HW_UNLOCK()
|
||||
|
||||
static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
GLint nmacroblkpl;
|
||||
GLint nmicroblkpl;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = 0;
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
|
||||
nmacroblkpl = rrb->pitch >> 5;
|
||||
offset += ((y >> 4) * nmacroblkpl) << 11;
|
||||
offset += ((y & 15) >> 1) << 8;
|
||||
offset += (y & 1) << 4;
|
||||
offset += (x >> 5) << 11;
|
||||
offset += ((x & 31) >> 2) << 5;
|
||||
offset += (x & 3) << 2;
|
||||
} else {
|
||||
nmacroblkpl = rrb->pitch >> 6;
|
||||
offset += ((y >> 3) * nmacroblkpl) << 11;
|
||||
offset += (y & 7) << 8;
|
||||
offset += (x >> 6) << 11;
|
||||
offset += ((x & 63) >> 3) << 5;
|
||||
offset += (x & 7) << 2;
|
||||
}
|
||||
} else {
|
||||
nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
|
||||
offset += (y * nmicroblkpl) << 5;
|
||||
offset += (x >> 3) << 5;
|
||||
offset += (x & 7) << 2;
|
||||
}
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
GLint nmacroblkpl;
|
||||
GLint nmicroblkpl;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = 0;
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
|
||||
nmacroblkpl = rrb->pitch >> 6;
|
||||
offset += ((y >> 4) * nmacroblkpl) << 11;
|
||||
offset += ((y & 15) >> 1) << 8;
|
||||
offset += (y & 1) << 4;
|
||||
offset += (x >> 6) << 11;
|
||||
offset += ((x & 63) >> 3) << 5;
|
||||
offset += (x & 7) << 1;
|
||||
} else {
|
||||
nmacroblkpl = rrb->pitch >> 7;
|
||||
offset += ((y >> 3) * nmacroblkpl) << 11;
|
||||
offset += (y & 7) << 8;
|
||||
offset += (x >> 7) << 11;
|
||||
offset += ((x & 127) >> 4) << 5;
|
||||
offset += (x & 15) << 2;
|
||||
}
|
||||
} else {
|
||||
nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
|
||||
offset += (y * nmicroblkpl) << 5;
|
||||
offset += (x >> 4) << 5;
|
||||
offset += (x & 15) << 2;
|
||||
}
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
GLint microblkxs;
|
||||
GLint macroblkxs;
|
||||
GLint nmacroblkpl;
|
||||
GLint nmicroblkpl;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = 0;
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
|
||||
microblkxs = 16 / rrb->cpp;
|
||||
macroblkxs = 128 / rrb->cpp;
|
||||
nmacroblkpl = rrb->pitch / macroblkxs;
|
||||
offset += ((y >> 4) * nmacroblkpl) << 11;
|
||||
offset += ((y & 15) >> 1) << 8;
|
||||
offset += (y & 1) << 4;
|
||||
offset += (x / macroblkxs) << 11;
|
||||
offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
|
||||
offset += (x & (microblkxs - 1)) * rrb->cpp;
|
||||
} else {
|
||||
microblkxs = 32 / rrb->cpp;
|
||||
macroblkxs = 256 / rrb->cpp;
|
||||
nmacroblkpl = rrb->pitch / macroblkxs;
|
||||
offset += ((y >> 3) * nmacroblkpl) << 11;
|
||||
offset += (y & 7) << 8;
|
||||
offset += (x / macroblkxs) << 11;
|
||||
offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
|
||||
offset += (x & (microblkxs - 1)) * rrb->cpp;
|
||||
}
|
||||
} else {
|
||||
microblkxs = 32 / rrb->cpp;
|
||||
nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
|
||||
offset += (y * nmicroblkpl) << 5;
|
||||
offset += (x / microblkxs) << 5;
|
||||
offset += (x & (microblkxs - 1)) * rrb->cpp;
|
||||
}
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
/* ================================================================
|
||||
* Color buffer
|
||||
*/
|
||||
|
|
@ -345,96 +218,6 @@ do { \
|
|||
#define TAG(x) radeon##x##_z24_s8
|
||||
#include "stenciltmp.h"
|
||||
|
||||
static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
|
||||
{
|
||||
struct radeon_renderbuffer *rrb = (void*)rb;
|
||||
int r;
|
||||
|
||||
if (rrb->bo) {
|
||||
r = radeon_bo_map(rrb->bo, write);
|
||||
if (r) {
|
||||
fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
|
||||
__FUNCTION__, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void unmap_buffer(struct gl_renderbuffer *rb)
|
||||
{
|
||||
struct radeon_renderbuffer *rrb = (void*)rb;
|
||||
|
||||
if (rrb->bo) {
|
||||
radeon_bo_unmap(rrb->bo);
|
||||
}
|
||||
}
|
||||
|
||||
/* Move locking out to get reasonable span performance (10x better
|
||||
* than doing this in HW_LOCK above). WaitForIdle() is the main
|
||||
* culprit.
|
||||
*/
|
||||
|
||||
static void radeonSpanRenderStart(GLcontext * ctx)
|
||||
{
|
||||
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
|
||||
int i;
|
||||
#ifdef COMPILE_R300
|
||||
r300ContextPtr r300 = (r300ContextPtr) rmesa;
|
||||
R300_FIREVERTICES(r300);
|
||||
#else
|
||||
RADEON_FIREVERTICES(rmesa);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
|
||||
if (ctx->Texture.Unit[i]._ReallyEnabled)
|
||||
ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
|
||||
}
|
||||
|
||||
/* color draw buffers */
|
||||
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
|
||||
map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
|
||||
}
|
||||
|
||||
map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
|
||||
|
||||
if (ctx->DrawBuffer->_DepthBuffer) {
|
||||
map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
|
||||
}
|
||||
if (ctx->DrawBuffer->_StencilBuffer)
|
||||
map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
|
||||
|
||||
/* The locking and wait for idle should really only be needed in classic mode.
|
||||
* In a future memory manager based implementation, this should become
|
||||
* unnecessary due to the fact that mapping our buffers, textures, etc.
|
||||
* should implicitly wait for any previous rendering commands that must
|
||||
* be waited on. */
|
||||
LOCK_HARDWARE(rmesa);
|
||||
radeonWaitForIdleLocked(rmesa);
|
||||
}
|
||||
|
||||
static void radeonSpanRenderFinish(GLcontext * ctx)
|
||||
{
|
||||
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
|
||||
int i;
|
||||
_swrast_flush(ctx);
|
||||
UNLOCK_HARDWARE(rmesa);
|
||||
|
||||
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
|
||||
if (ctx->Texture.Unit[i]._ReallyEnabled)
|
||||
ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
|
||||
}
|
||||
|
||||
/* color draw buffers */
|
||||
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
|
||||
unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
|
||||
|
||||
unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
|
||||
|
||||
if (ctx->DrawBuffer->_DepthBuffer)
|
||||
unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
|
||||
if (ctx->DrawBuffer->_StencilBuffer)
|
||||
unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
|
||||
}
|
||||
|
||||
void radeonInitSpanFuncs(GLcontext * ctx)
|
||||
{
|
||||
struct swrast_device_driver *swdd =
|
||||
|
|
|
|||
|
|
@ -2110,3 +2110,214 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
|
|||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
GLint nmacroblkpl;
|
||||
GLint nmicroblkpl;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = 0;
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
|
||||
nmacroblkpl = rrb->pitch >> 5;
|
||||
offset += ((y >> 4) * nmacroblkpl) << 11;
|
||||
offset += ((y & 15) >> 1) << 8;
|
||||
offset += (y & 1) << 4;
|
||||
offset += (x >> 5) << 11;
|
||||
offset += ((x & 31) >> 2) << 5;
|
||||
offset += (x & 3) << 2;
|
||||
} else {
|
||||
nmacroblkpl = rrb->pitch >> 6;
|
||||
offset += ((y >> 3) * nmacroblkpl) << 11;
|
||||
offset += (y & 7) << 8;
|
||||
offset += (x >> 6) << 11;
|
||||
offset += ((x & 63) >> 3) << 5;
|
||||
offset += (x & 7) << 2;
|
||||
}
|
||||
} else {
|
||||
nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
|
||||
offset += (y * nmicroblkpl) << 5;
|
||||
offset += (x >> 3) << 5;
|
||||
offset += (x & 7) << 2;
|
||||
}
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
GLint nmacroblkpl;
|
||||
GLint nmicroblkpl;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = 0;
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
|
||||
nmacroblkpl = rrb->pitch >> 6;
|
||||
offset += ((y >> 4) * nmacroblkpl) << 11;
|
||||
offset += ((y & 15) >> 1) << 8;
|
||||
offset += (y & 1) << 4;
|
||||
offset += (x >> 6) << 11;
|
||||
offset += ((x & 63) >> 3) << 5;
|
||||
offset += (x & 7) << 1;
|
||||
} else {
|
||||
nmacroblkpl = rrb->pitch >> 7;
|
||||
offset += ((y >> 3) * nmacroblkpl) << 11;
|
||||
offset += (y & 7) << 8;
|
||||
offset += (x >> 7) << 11;
|
||||
offset += ((x & 127) >> 4) << 5;
|
||||
offset += (x & 15) << 2;
|
||||
}
|
||||
} else {
|
||||
nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
|
||||
offset += (y * nmicroblkpl) << 5;
|
||||
offset += (x >> 4) << 5;
|
||||
offset += (x & 15) << 2;
|
||||
}
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y)
|
||||
{
|
||||
GLubyte *ptr = rrb->bo->ptr;
|
||||
uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
|
||||
GLint offset;
|
||||
GLint microblkxs;
|
||||
GLint macroblkxs;
|
||||
GLint nmacroblkpl;
|
||||
GLint nmicroblkpl;
|
||||
|
||||
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
|
||||
offset = x * rrb->cpp + y * rrb->pitch;
|
||||
} else {
|
||||
offset = 0;
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
|
||||
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
|
||||
microblkxs = 16 / rrb->cpp;
|
||||
macroblkxs = 128 / rrb->cpp;
|
||||
nmacroblkpl = rrb->pitch / macroblkxs;
|
||||
offset += ((y >> 4) * nmacroblkpl) << 11;
|
||||
offset += ((y & 15) >> 1) << 8;
|
||||
offset += (y & 1) << 4;
|
||||
offset += (x / macroblkxs) << 11;
|
||||
offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
|
||||
offset += (x & (microblkxs - 1)) * rrb->cpp;
|
||||
} else {
|
||||
microblkxs = 32 / rrb->cpp;
|
||||
macroblkxs = 256 / rrb->cpp;
|
||||
nmacroblkpl = rrb->pitch / macroblkxs;
|
||||
offset += ((y >> 3) * nmacroblkpl) << 11;
|
||||
offset += (y & 7) << 8;
|
||||
offset += (x / macroblkxs) << 11;
|
||||
offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
|
||||
offset += (x & (microblkxs - 1)) * rrb->cpp;
|
||||
}
|
||||
} else {
|
||||
microblkxs = 32 / rrb->cpp;
|
||||
nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
|
||||
offset += (y * nmicroblkpl) << 5;
|
||||
offset += (x / microblkxs) << 5;
|
||||
offset += (x & (microblkxs - 1)) * rrb->cpp;
|
||||
}
|
||||
}
|
||||
return &ptr[offset];
|
||||
}
|
||||
|
||||
|
||||
static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
|
||||
{
|
||||
struct radeon_renderbuffer *rrb = (void*)rb;
|
||||
int r;
|
||||
|
||||
if (rrb->bo) {
|
||||
r = radeon_bo_map(rrb->bo, write);
|
||||
if (r) {
|
||||
fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
|
||||
__FUNCTION__, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void unmap_buffer(struct gl_renderbuffer *rb)
|
||||
{
|
||||
struct radeon_renderbuffer *rrb = (void*)rb;
|
||||
|
||||
if (rrb->bo) {
|
||||
radeon_bo_unmap(rrb->bo);
|
||||
}
|
||||
}
|
||||
|
||||
void radeonSpanRenderStart(GLcontext * ctx)
|
||||
{
|
||||
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
|
||||
int i;
|
||||
|
||||
rmesa->vtbl.flush_vertices(rmesa);
|
||||
|
||||
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
|
||||
if (ctx->Texture.Unit[i]._ReallyEnabled)
|
||||
ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
|
||||
}
|
||||
|
||||
/* color draw buffers */
|
||||
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
|
||||
map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
|
||||
}
|
||||
|
||||
map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
|
||||
|
||||
if (ctx->DrawBuffer->_DepthBuffer) {
|
||||
map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
|
||||
}
|
||||
if (ctx->DrawBuffer->_StencilBuffer)
|
||||
map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
|
||||
|
||||
/* The locking and wait for idle should really only be needed in classic mode.
|
||||
* In a future memory manager based implementation, this should become
|
||||
* unnecessary due to the fact that mapping our buffers, textures, etc.
|
||||
* should implicitly wait for any previous rendering commands that must
|
||||
* be waited on. */
|
||||
LOCK_HARDWARE(rmesa);
|
||||
radeonWaitForIdleLocked(rmesa);
|
||||
}
|
||||
|
||||
void radeonSpanRenderFinish(GLcontext * ctx)
|
||||
{
|
||||
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
|
||||
int i;
|
||||
_swrast_flush(ctx);
|
||||
UNLOCK_HARDWARE(rmesa);
|
||||
|
||||
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
|
||||
if (ctx->Texture.Unit[i]._ReallyEnabled)
|
||||
ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
|
||||
}
|
||||
|
||||
/* color draw buffers */
|
||||
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
|
||||
unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
|
||||
|
||||
unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
|
||||
|
||||
if (ctx->DrawBuffer->_DepthBuffer)
|
||||
unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
|
||||
if (ctx->DrawBuffer->_StencilBuffer)
|
||||
unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -110,5 +110,12 @@ void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
|
|||
struct gl_texture_object *texObj,
|
||||
struct gl_texture_image *texImage);
|
||||
|
||||
|
||||
void radeonSpanRenderStart(GLcontext * ctx);
|
||||
void radeonSpanRenderFinish(GLcontext * ctx);
|
||||
GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y);
|
||||
GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y);
|
||||
GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
|
||||
GLint x, GLint y);
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue