mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
Fetch routines convert and transpose all 4 vertices at once.
This commit is contained in:
parent
125451b9f0
commit
dd07e154d2
2 changed files with 71 additions and 83 deletions
|
|
@ -100,7 +100,7 @@ fetch_unaligned(qword *dst, unsigned ea, unsigned size)
|
|||
}
|
||||
|
||||
|
||||
#define CVT_32_FLOAT(q) (*q)
|
||||
#define CVT_32_FLOAT(q) (*(q))
|
||||
|
||||
static INLINE qword
|
||||
CVT_64_FLOAT(const qword *qw)
|
||||
|
|
@ -242,85 +242,90 @@ CVT_32_SNORM(const qword *qw)
|
|||
* This is probably needed/dupliocated elsewhere, eg format
|
||||
* conversion, texture sampling etc.
|
||||
*/
|
||||
#define FETCH_ATTRIB( NAME, SZ, CVT ) \
|
||||
static qword \
|
||||
fetch_##NAME(const qword *qw) \
|
||||
{ \
|
||||
qword expanded = CVT(qw); \
|
||||
return si_selb(expanded, (qword) defaults, SZ); \
|
||||
#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \
|
||||
static void \
|
||||
fetch_##NAME(qword *out, const qword *in) \
|
||||
{ \
|
||||
qword tmp[4]; \
|
||||
\
|
||||
tmp[0] = si_selb(CVT(in + (0 * N)), (qword) defaults, SZ); \
|
||||
tmp[1] = si_selb(CVT(in + (1 * N)), (qword) defaults, SZ); \
|
||||
tmp[2] = si_selb(CVT(in + (2 * N)), (qword) defaults, SZ); \
|
||||
tmp[3] = si_selb(CVT(in + (3 * N)), (qword) defaults, SZ); \
|
||||
_transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) tmp); \
|
||||
}
|
||||
|
||||
FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT )
|
||||
FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 )
|
||||
FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 )
|
||||
FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 )
|
||||
FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT )
|
||||
FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 )
|
||||
FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 )
|
||||
FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 )
|
||||
FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED )
|
||||
FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 )
|
||||
FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 )
|
||||
FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 )
|
||||
FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED )
|
||||
FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM )
|
||||
FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 )
|
||||
FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 )
|
||||
FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 )
|
||||
FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 )
|
||||
|
||||
FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM )
|
||||
FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 )
|
||||
FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 )
|
||||
FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 )
|
||||
FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED )
|
||||
FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 )
|
||||
FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 )
|
||||
FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 )
|
||||
FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED )
|
||||
FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM )
|
||||
FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 )
|
||||
FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 )
|
||||
FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 )
|
||||
FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 )
|
||||
|
||||
FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM )
|
||||
FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 )
|
||||
FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 )
|
||||
FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 )
|
||||
FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED )
|
||||
FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 )
|
||||
FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 )
|
||||
FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 )
|
||||
FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED )
|
||||
FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 )
|
||||
FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 )
|
||||
FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 )
|
||||
FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 )
|
||||
FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 )
|
||||
|
||||
FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM )
|
||||
FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 )
|
||||
FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 )
|
||||
FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 )
|
||||
FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 )
|
||||
|
||||
FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM )
|
||||
FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 )
|
||||
|
||||
|
||||
|
||||
|
|
@ -584,7 +589,6 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
|
|||
unsigned idx;
|
||||
const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
|
||||
const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
|
||||
qword p[4];
|
||||
qword in[2 * 4];
|
||||
|
||||
|
||||
|
|
@ -609,23 +613,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
|
|||
|
||||
/* Convert all 4 vertices to vectors of float.
|
||||
*/
|
||||
idx = 0;
|
||||
for (i = 0; i < 4; i++) {
|
||||
p[i] = (*fetch)(in + idx);
|
||||
idx += quads_per_entry;
|
||||
}
|
||||
|
||||
|
||||
/* Transpose/swizzle into vector-friendly format. Currently
|
||||
* assuming that all vertex shader inputs are float[4], but this
|
||||
* isn't true -- if the vertex shader only wants tex0.xy, we
|
||||
* could optimize for that.
|
||||
*
|
||||
* To do so fully without codegen would probably require an
|
||||
* excessive number of fetch functions, but we could at least
|
||||
* minimize the transpose step:
|
||||
*/
|
||||
_transpose_matrix4x4(&machine->Inputs[attr].xyzw[0].q, p);
|
||||
(*fetch)(&machine->Inputs[attr].xyzw[0].q, in);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
struct spu_vs_context;
|
||||
|
||||
typedef qword (*spu_fetch_func)(const qword *qw);
|
||||
typedef void (*spu_fetch_func)(qword *out, const qword *in);
|
||||
typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
|
||||
struct spu_exec_machine *machine,
|
||||
const unsigned *elts,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue