mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 11:00:11 +01:00
Initial pass at vertex cache, more vertex fetch changes
This is just another step towards dynamic generate of vertex fetch code.
This commit is contained in:
parent
c290367985
commit
ca1a2da645
2 changed files with 368 additions and 119 deletions
|
|
@ -40,41 +40,83 @@
|
|||
#include "spu_vertex_shader.h"
|
||||
#include "spu_main.h"
|
||||
|
||||
#define CACHE_NAME attribute
|
||||
#define CACHED_TYPE qword
|
||||
#define CACHE_TYPE CACHE_TYPE_RO
|
||||
#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
|
||||
#define CACHE_LOG2NNWAY 2
|
||||
#define CACHE_LOG2NSETS 6
|
||||
#include <cache-api.h>
|
||||
|
||||
/* Yes folks, this is ugly.
|
||||
*/
|
||||
#undef CACHE_NWAY
|
||||
#undef CACHE_NSETS
|
||||
#define CACHE_NAME attribute
|
||||
#define CACHE_NWAY 4
|
||||
#define CACHE_NSETS (1U << 6)
|
||||
|
||||
|
||||
#define DRAW_DBG 0
|
||||
|
||||
|
||||
static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 };
|
||||
|
||||
static INLINE qword
|
||||
fetch_unaligned_qword(const void *ptr)
|
||||
/**
|
||||
* Fetch between 1 and 32 bytes from an unaligned address
|
||||
*/
|
||||
static INLINE void
|
||||
fetch_unaligned(qword *dst, unsigned ea, unsigned size)
|
||||
{
|
||||
const int shift = (unsigned)(ptr) & 0x0f;
|
||||
const qword x = *(qword *)(ptr);
|
||||
const qword y = *(qword *)(ptr + 16);
|
||||
qword tmp[4];
|
||||
const int shift = ea & 0x0f;
|
||||
const unsigned aligned_start_ea = ea & ~0x0f;
|
||||
const unsigned aligned_end_ea = (ea + size) & ~0x0f;
|
||||
const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1;
|
||||
unsigned i;
|
||||
|
||||
return si_or((qword) spu_slqwbyte(x, shift),
|
||||
(qword) spu_rlmaskqwbyte(y, shift - 16));
|
||||
|
||||
if (shift == 0) {
|
||||
/* Data is already aligned. Fetch directly into the destination buffer.
|
||||
*/
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
|
||||
}
|
||||
} else {
|
||||
/* Fetch data from the cache to the local buffer.
|
||||
*/
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
|
||||
}
|
||||
|
||||
|
||||
/* Fix the alignment of the data and write to the destination buffer.
|
||||
*/
|
||||
for (i = 0; i < ((size + 15) / 16); i++) {
|
||||
dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift),
|
||||
(qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static qword
|
||||
fetch_R32G32B32A32_FLOAT(const void *ptr)
|
||||
fetch_R32G32B32A32_FLOAT(const qword *qw)
|
||||
{
|
||||
return fetch_unaligned_qword(ptr);
|
||||
return *qw;
|
||||
}
|
||||
|
||||
|
||||
static qword
|
||||
fetch_R32G32B32A32_USCALED(const void *ptr)
|
||||
fetch_R32G32B32A32_USCALED(const qword *qw)
|
||||
{
|
||||
return si_cuflt(fetch_unaligned_qword(ptr), 0);
|
||||
return si_cuflt(*qw, 0);
|
||||
}
|
||||
|
||||
|
||||
static qword
|
||||
fetch_R32G32B32A32_UNORM(const void *ptr)
|
||||
fetch_R32G32B32A32_UNORM(const qword *qw)
|
||||
{
|
||||
qword x = si_cuflt(fetch_unaligned_qword(ptr), 0);
|
||||
qword x = si_cuflt(*qw, 0);
|
||||
vec_float4 scale = spu_splats(1.0f / 255.0f);
|
||||
|
||||
return si_fm(x, (qword) scale);
|
||||
|
|
@ -82,12 +124,147 @@ fetch_R32G32B32A32_UNORM(const void *ptr)
|
|||
|
||||
|
||||
static qword
|
||||
fetch_R32G32B32A32_SSCALED(const void *ptr)
|
||||
fetch_R32G32B32A32_SSCALED(const qword *qw)
|
||||
{
|
||||
return si_csflt(fetch_unaligned_qword(ptr), 0);
|
||||
return si_csflt(*qw, 0);
|
||||
}
|
||||
|
||||
|
||||
#define CVT_32_FLOAT(q) (*q)
|
||||
|
||||
static INLINE qword
|
||||
CVT_64_FLOAT(const qword *qw)
|
||||
{
|
||||
qword shuf_first = (qword) {
|
||||
0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
};
|
||||
|
||||
qword a = si_frds(qw[0]);
|
||||
qword b = si_frds(si_rotqbyi(qw[0], 8));
|
||||
qword c = si_frds(qw[1]);
|
||||
qword d = si_frds(si_rotqbyi(qw[1], 8));
|
||||
|
||||
qword ab = si_shufb(a, b, shuf_first);
|
||||
qword cd = si_shufb(c, d, si_rotqbyi(shuf_first, 8));
|
||||
|
||||
return si_or(ab, cd);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_8_USCALED(const qword *qw)
|
||||
{
|
||||
qword shuffle = (qword) {
|
||||
0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
|
||||
0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
|
||||
};
|
||||
|
||||
return si_cuflt(si_shufb(*qw, *qw, shuffle), 0);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_16_USCALED(const qword *qw)
|
||||
{
|
||||
qword shuffle = (qword) {
|
||||
0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
|
||||
0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
|
||||
};
|
||||
|
||||
return si_cuflt(si_shufb(*qw, *qw, shuffle), 0);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_32_USCALED(const qword *qw)
|
||||
{
|
||||
return si_cuflt(*qw, 0);
|
||||
}
|
||||
|
||||
static INLINE qword
|
||||
CVT_8_SSCALED(const qword *qw)
|
||||
{
|
||||
qword shuffle = (qword) {
|
||||
0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
|
||||
0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
|
||||
};
|
||||
|
||||
return si_csflt(si_shufb(*qw, *qw, shuffle), 0);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_16_SSCALED(const qword *qw)
|
||||
{
|
||||
qword shuffle = (qword) {
|
||||
0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
|
||||
0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
|
||||
};
|
||||
|
||||
return si_csflt(si_shufb(*qw, *qw, shuffle), 0);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_32_SSCALED(const qword *qw)
|
||||
{
|
||||
return si_csflt(*qw, 0);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_8_UNORM(const qword *qw)
|
||||
{
|
||||
const qword scale = (qword) spu_splats(1.0f / 255.0f);
|
||||
return si_fm(CVT_8_USCALED(qw), scale);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_16_UNORM(const qword *qw)
|
||||
{
|
||||
const qword scale = (qword) spu_splats(1.0f / 65535.0f);
|
||||
return si_fm(CVT_16_USCALED(qw), scale);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_32_UNORM(const qword *qw)
|
||||
{
|
||||
const qword scale = (qword) spu_splats(1.0f / 4294967295.0f);
|
||||
return si_fm(CVT_32_USCALED(qw), scale);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_8_SNORM(const qword *qw)
|
||||
{
|
||||
const qword scale = (qword) spu_splats(1.0f / 127.0f);
|
||||
return si_fm(CVT_8_SSCALED(qw), scale);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_16_SNORM(const qword *qw)
|
||||
{
|
||||
const qword scale = (qword) spu_splats(1.0f / 32767.0f);
|
||||
return si_fm(CVT_16_SSCALED(qw), scale);
|
||||
}
|
||||
|
||||
|
||||
static INLINE qword
|
||||
CVT_32_SNORM(const qword *qw)
|
||||
{
|
||||
const qword scale = (qword) spu_splats(1.0f / 2147483647.0f);
|
||||
return si_fm(CVT_32_SSCALED(qw), scale);
|
||||
}
|
||||
|
||||
#define SZ_4 si_il(0U)
|
||||
#define SZ_3 si_rotqmbyi(si_il(~0), -12)
|
||||
#define SZ_2 si_rotqmbyi(si_il(~0), -8)
|
||||
#define SZ_1 si_rotqmbyi(si_il(~0), -4)
|
||||
|
||||
/**
|
||||
* Fetch a float[4] vertex attribute from memory, doing format/type
|
||||
* conversion as needed.
|
||||
|
|
@ -97,117 +274,84 @@ fetch_R32G32B32A32_SSCALED(const void *ptr)
|
|||
*/
|
||||
#define FETCH_ATTRIB( NAME, SZ, CVT ) \
|
||||
static qword \
|
||||
fetch_##NAME(const void *ptr) \
|
||||
fetch_##NAME(const qword *qw) \
|
||||
{ \
|
||||
vec_float4 attrib = defaults; \
|
||||
int i; \
|
||||
\
|
||||
for (i = 0; i < SZ; i++) { \
|
||||
attrib = spu_insert(CVT, attrib, i); \
|
||||
} \
|
||||
return (qword) attrib; \
|
||||
qword expanded = CVT(qw); \
|
||||
return si_selb(expanded, (qword) defaults, SZ); \
|
||||
}
|
||||
|
||||
#define CVT_64_FLOAT (float) ((double *) ptr)[i]
|
||||
#define CVT_32_FLOAT ((float *) ptr)[i]
|
||||
FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT )
|
||||
|
||||
#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
|
||||
#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
|
||||
#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
|
||||
FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT )
|
||||
|
||||
#define CVT_8_SSCALED (float) ((char *) ptr)[i]
|
||||
#define CVT_16_SSCALED (float) ((short *) ptr)[i]
|
||||
#define CVT_32_SSCALED (float) ((int *) ptr)[i]
|
||||
FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED )
|
||||
|
||||
#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
|
||||
#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
|
||||
#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
|
||||
FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED )
|
||||
|
||||
#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
|
||||
#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
|
||||
#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
|
||||
FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM )
|
||||
|
||||
FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
|
||||
|
||||
FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
|
||||
//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM )
|
||||
|
||||
|
||||
|
||||
static spu_fetch_func get_fetch_func( enum pipe_format format )
|
||||
{
|
||||
#if 0
|
||||
{
|
||||
char tmp[80];
|
||||
pf_sprint_name(tmp, format);
|
||||
_mesa_printf("%s: %s\n", __FUNCTION__, tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R64_FLOAT:
|
||||
return fetch_R64_FLOAT;
|
||||
|
|
@ -348,6 +492,96 @@ static spu_fetch_func get_fetch_func( enum pipe_format format )
|
|||
}
|
||||
|
||||
|
||||
static unsigned get_vertex_size( enum pipe_format format )
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R64_FLOAT:
|
||||
return 8;
|
||||
case PIPE_FORMAT_R64G64_FLOAT:
|
||||
return 2 * 8;
|
||||
case PIPE_FORMAT_R64G64B64_FLOAT:
|
||||
return 3 * 8;
|
||||
case PIPE_FORMAT_R64G64B64A64_FLOAT:
|
||||
return 4 * 8;
|
||||
|
||||
case PIPE_FORMAT_R32_SSCALED:
|
||||
case PIPE_FORMAT_R32_SNORM:
|
||||
case PIPE_FORMAT_R32_USCALED:
|
||||
case PIPE_FORMAT_R32_UNORM:
|
||||
case PIPE_FORMAT_R32_FLOAT:
|
||||
return 4;
|
||||
case PIPE_FORMAT_R32G32_SSCALED:
|
||||
case PIPE_FORMAT_R32G32_SNORM:
|
||||
case PIPE_FORMAT_R32G32_USCALED:
|
||||
case PIPE_FORMAT_R32G32_UNORM:
|
||||
case PIPE_FORMAT_R32G32_FLOAT:
|
||||
return 2 * 4;
|
||||
case PIPE_FORMAT_R32G32B32_SSCALED:
|
||||
case PIPE_FORMAT_R32G32B32_SNORM:
|
||||
case PIPE_FORMAT_R32G32B32_USCALED:
|
||||
case PIPE_FORMAT_R32G32B32_UNORM:
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
return 3 * 4;
|
||||
case PIPE_FORMAT_R32G32B32A32_SSCALED:
|
||||
case PIPE_FORMAT_R32G32B32A32_SNORM:
|
||||
case PIPE_FORMAT_R32G32B32A32_USCALED:
|
||||
case PIPE_FORMAT_R32G32B32A32_UNORM:
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
return 4 * 4;
|
||||
|
||||
case PIPE_FORMAT_R16_SSCALED:
|
||||
case PIPE_FORMAT_R16_SNORM:
|
||||
case PIPE_FORMAT_R16_UNORM:
|
||||
case PIPE_FORMAT_R16_USCALED:
|
||||
return 2;
|
||||
case PIPE_FORMAT_R16G16_SSCALED:
|
||||
case PIPE_FORMAT_R16G16_SNORM:
|
||||
case PIPE_FORMAT_R16G16_USCALED:
|
||||
case PIPE_FORMAT_R16G16_UNORM:
|
||||
return 2 * 2;
|
||||
case PIPE_FORMAT_R16G16B16_SSCALED:
|
||||
case PIPE_FORMAT_R16G16B16_SNORM:
|
||||
case PIPE_FORMAT_R16G16B16_USCALED:
|
||||
case PIPE_FORMAT_R16G16B16_UNORM:
|
||||
return 3 * 2;
|
||||
case PIPE_FORMAT_R16G16B16A16_SSCALED:
|
||||
case PIPE_FORMAT_R16G16B16A16_SNORM:
|
||||
case PIPE_FORMAT_R16G16B16A16_USCALED:
|
||||
case PIPE_FORMAT_R16G16B16A16_UNORM:
|
||||
return 4 * 2;
|
||||
|
||||
case PIPE_FORMAT_R8_SSCALED:
|
||||
case PIPE_FORMAT_R8_SNORM:
|
||||
case PIPE_FORMAT_R8_USCALED:
|
||||
case PIPE_FORMAT_R8_UNORM:
|
||||
return 1;
|
||||
case PIPE_FORMAT_R8G8_SSCALED:
|
||||
case PIPE_FORMAT_R8G8_SNORM:
|
||||
case PIPE_FORMAT_R8G8_USCALED:
|
||||
case PIPE_FORMAT_R8G8_UNORM:
|
||||
return 2 * 1;
|
||||
case PIPE_FORMAT_R8G8B8_SSCALED:
|
||||
case PIPE_FORMAT_R8G8B8_SNORM:
|
||||
case PIPE_FORMAT_R8G8B8_USCALED:
|
||||
case PIPE_FORMAT_R8G8B8_UNORM:
|
||||
return 3 * 1;
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_SSCALED:
|
||||
case PIPE_FORMAT_R8G8B8A8_SNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_USCALED:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
return 4 * 1;
|
||||
|
||||
case 0:
|
||||
return 0; /* not sure why this is needed */
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetch vertex attributes for 'count' vertices.
|
||||
*/
|
||||
|
|
@ -361,8 +595,6 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
|
|||
|
||||
assert(count <= 4);
|
||||
|
||||
wait_on_mask(1 << TAG_VERTEX_BUFFER);
|
||||
|
||||
#if DRAW_DBG
|
||||
printf("SPU: %s count = %u, nr_attrs = %u\n",
|
||||
__FUNCTION__, count, nr_attrs);
|
||||
|
|
@ -375,33 +607,40 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
|
|||
const uint64_t src = draw->vertex_fetch.src_ptr[attr];
|
||||
const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr];
|
||||
unsigned i;
|
||||
unsigned idx;
|
||||
const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
|
||||
const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
|
||||
qword p[4];
|
||||
qword in[2 * 4];
|
||||
|
||||
|
||||
/* Fetch four attributes for four vertices.
|
||||
*
|
||||
* Could fetch directly into AOS format, but this is meant to be
|
||||
* a prototype for an sse implementation, which would have
|
||||
* difficulties doing that.
|
||||
*/
|
||||
idx = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
uint8_t buffer[32] ALIGN16_ATTRIB;
|
||||
const uint64_t addr = src + (elts[i] * pitch);
|
||||
const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
|
||||
|
||||
#if DRAW_DBG
|
||||
printf("SPU: fetching = 0x%llx\n", addr);
|
||||
#endif
|
||||
mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
|
||||
wait_on_mask(1 << TAG_VERTEX_BUFFER);
|
||||
|
||||
p[i] = (*fetch)(buffer + (addr & 0x0f));
|
||||
fetch_unaligned(& in[idx], addr, bytes_per_entry);
|
||||
idx += quads_per_entry;
|
||||
}
|
||||
|
||||
/* Be nice and zero out any missing vertices:
|
||||
/* Be nice and zero out any missing vertices.
|
||||
*/
|
||||
for (/* empty */; i < 4; i++)
|
||||
p[i] = si_xor(p[i], p[i]);
|
||||
(void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
|
||||
|
||||
|
||||
/* Convert all 4 vertices to vectors of float.
|
||||
*/
|
||||
idx = 0;
|
||||
for (i = 0; i < 4; i++) {
|
||||
p[i] = (*fetch)(in + idx);
|
||||
idx += quads_per_entry;
|
||||
}
|
||||
|
||||
|
||||
/* Transpose/swizzle into vector-friendly format. Currently
|
||||
* assuming that all vertex shader inputs are float[4], but this
|
||||
|
|
@ -422,9 +661,18 @@ void spu_update_vertex_fetch( struct spu_vs_context *draw )
|
|||
unsigned i;
|
||||
|
||||
|
||||
/* Invalidate the vertex cache.
|
||||
*/
|
||||
for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
|
||||
CACHELINE_CLEARVALID(i);
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
|
||||
draw->vertex_fetch.fetch[i] =
|
||||
get_fetch_func(draw->vertex_fetch.format[i]);
|
||||
draw->vertex_fetch.size[i] =
|
||||
get_vertex_size(draw->vertex_fetch.format[i]);
|
||||
}
|
||||
|
||||
draw->vertex_fetch.fetch_func = generic_vertex_fetch;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
struct spu_vs_context;
|
||||
|
||||
typedef qword (*spu_fetch_func)(const void *ptr);
|
||||
typedef qword (*spu_fetch_func)(const qword *qw);
|
||||
typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
|
||||
struct spu_exec_machine *machine,
|
||||
const unsigned *elts,
|
||||
|
|
@ -18,6 +18,7 @@ struct spu_vs_context {
|
|||
struct {
|
||||
uint64_t src_ptr[PIPE_ATTRIB_MAX];
|
||||
unsigned pitch[PIPE_ATTRIB_MAX];
|
||||
unsigned size[PIPE_ATTRIB_MAX];
|
||||
enum pipe_format format[PIPE_ATTRIB_MAX];
|
||||
unsigned nr_attrs;
|
||||
boolean dirty;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue