mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 12:30:09 +01:00
Merge ../mesa into vulkan
This commit is contained in:
commit
f0993f81c7
316 changed files with 6879 additions and 3530 deletions
76
configure.ac
76
configure.ac
|
|
@ -396,6 +396,61 @@ fi
|
|||
AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
|
||||
AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)
|
||||
|
||||
dnl Check for Endianness
|
||||
AC_C_BIGENDIAN(
|
||||
little_endian=no,
|
||||
little_endian=yes,
|
||||
little_endian=no,
|
||||
little_endian=no
|
||||
)
|
||||
|
||||
dnl Check for POWER8 Architecture
|
||||
PWR8_CFLAGS="-mpower8-vector"
|
||||
have_pwr8_intrinsics=no
|
||||
AC_MSG_CHECKING(whether gcc supports -mpower8-vector)
|
||||
save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$PWR8_CFLAGS $CFLAGS"
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))
|
||||
#error "Need GCC >= 4.8 for sane POWER8 support"
|
||||
#endif
|
||||
#include <altivec.h>
|
||||
int main () {
|
||||
vector unsigned char r;
|
||||
vector unsigned int v = vec_splat_u32 (1);
|
||||
r = __builtin_vec_vgbbd ((vector unsigned char) v);
|
||||
return 0;
|
||||
}]])], have_pwr8_intrinsics=yes)
|
||||
CFLAGS=$save_CFLAGS
|
||||
|
||||
AC_ARG_ENABLE(pwr8,
|
||||
[AC_HELP_STRING([--disable-pwr8-inst],
|
||||
[disable POWER8-specific instructions])],
|
||||
[enable_pwr8=$enableval], [enable_pwr8=auto])
|
||||
|
||||
if test "x$enable_pwr8" = xno ; then
|
||||
have_pwr8_intrinsics=disabled
|
||||
fi
|
||||
|
||||
if test $have_pwr8_intrinsics = yes && test $little_endian = yes ; then
|
||||
DEFINES="$DEFINES -D_ARCH_PWR8"
|
||||
CXXFLAGS="$CXXFLAGS $PWR8_CFLAGS"
|
||||
CFLAGS="$CFLAGS $PWR8_CFLAGS"
|
||||
else
|
||||
PWR8_CFLAGS=
|
||||
fi
|
||||
|
||||
AC_MSG_RESULT($have_pwr8_intrinsics)
|
||||
if test "x$enable_pwr8" = xyes && test $have_pwr8_intrinsics = no ; then
|
||||
AC_MSG_ERROR([POWER8 compiler support not detected])
|
||||
fi
|
||||
|
||||
if test $have_pwr8_intrinsics = yes && test $little_endian = no ; then
|
||||
AC_MSG_WARN([POWER8 optimization is enabled only on POWER8 Little-Endian])
|
||||
fi
|
||||
|
||||
AC_SUBST([PWR8_CFLAGS], $PWR8_CFLAGS)
|
||||
|
||||
dnl Can't have static and shared libraries, default to static if user
|
||||
dnl explicitly requested. If both disabled, set to static since shared
|
||||
dnl was explicitly requested.
|
||||
|
|
@ -421,8 +476,29 @@ AC_ARG_ENABLE([debug],
|
|||
[enable_debug="$enableval"],
|
||||
[enable_debug=no]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE([profile],
|
||||
[AS_HELP_STRING([--enable-profile],
|
||||
[enable profiling of code @<:@default=disabled@:>@])],
|
||||
[enable_profile="$enableval"],
|
||||
[enable_profile=no]
|
||||
)
|
||||
|
||||
if test "x$enable_profile" = xyes; then
|
||||
DEFINES="$DEFINES -DPROFILE"
|
||||
if test "x$GCC" = xyes; then
|
||||
CFLAGS="$CFLAGS -fno-omit-frame-pointer"
|
||||
fi
|
||||
if test "x$GXX" = xyes; then
|
||||
CXXFLAGS="$CXXFLAGS -fno-omit-frame-pointer"
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "x$enable_debug" = xyes; then
|
||||
DEFINES="$DEFINES -DDEBUG"
|
||||
if test "x$enable_profile" = xyes; then
|
||||
AC_MSG_WARN([Debug and Profile are enabled at the same time])
|
||||
fi
|
||||
if test "x$GCC" = xyes; then
|
||||
if ! echo "$CFLAGS" | grep -q -e '-g'; then
|
||||
CFLAGS="$CFLAGS -g"
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@
|
|||
<li><a href="http://www.opengl.org" target="_parent">OpenGL website</a>
|
||||
<li><a href="http://dri.freedesktop.org" target="_parent">DRI website</a>
|
||||
<li><a href="http://www.freedesktop.org" target="_parent">freedesktop.org</a>
|
||||
<li><a href="http://planet.freedesktop.org" target="_parent">Developer blogs</a>
|
||||
</ul>
|
||||
|
||||
<b>Hosted by:</b>
|
||||
|
|
|
|||
|
|
@ -47,6 +47,8 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_base_instance on freedreno/a4xx</li>
|
||||
<li>GL_ARB_compute_shader on i965</li>
|
||||
<li>GL_ARB_copy_image on r600</li>
|
||||
<li>GL_ARB_indirect_parameters on nvc0</li>
|
||||
<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
|
||||
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
|
||||
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
|
||||
|
|
|
|||
|
|
@ -132,6 +132,28 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
|
|||
CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1")
|
||||
CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
|
||||
CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
|
||||
CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
|
||||
CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
|
||||
CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
|
||||
CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
|
||||
CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
|
||||
CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
|
||||
CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
|
||||
CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
|
||||
CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
|
||||
CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3")
|
||||
CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3")
|
||||
CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4")
|
||||
CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4")
|
||||
CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
|
||||
CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4")
|
||||
CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
|
||||
CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
|
||||
CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
|
||||
|
|
|
|||
|
|
@ -938,7 +938,7 @@ draw_aaline_prepare_outputs(struct draw_context *draw,
|
|||
const struct pipe_rasterizer_state *rast = draw->rasterizer;
|
||||
|
||||
/* update vertex attrib info */
|
||||
aaline->pos_slot = draw_current_shader_position_output(draw);;
|
||||
aaline->pos_slot = draw_current_shader_position_output(draw);
|
||||
|
||||
if (!rast->line_smooth)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -611,6 +611,8 @@ do_clip_line(struct draw_stage *stage,
|
|||
struct prim_header newprim;
|
||||
int viewport_index;
|
||||
|
||||
newprim.flags = header->flags;
|
||||
|
||||
if (stage->draw->rasterizer->flatshade_first) {
|
||||
prov_vertex = v0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,27 +86,33 @@ inject_front_face_info(struct draw_stage *stage,
|
|||
}
|
||||
|
||||
|
||||
static void point( struct draw_stage *stage,
|
||||
struct vertex_header *v0 )
|
||||
static void point(struct draw_stage *stage,
|
||||
struct prim_header *header,
|
||||
struct vertex_header *v0)
|
||||
{
|
||||
struct prim_header tmp;
|
||||
tmp.det = header->det;
|
||||
tmp.flags = 0;
|
||||
tmp.v[0] = v0;
|
||||
stage->next->point( stage->next, &tmp );
|
||||
stage->next->point(stage->next, &tmp);
|
||||
}
|
||||
|
||||
static void line( struct draw_stage *stage,
|
||||
struct vertex_header *v0,
|
||||
struct vertex_header *v1 )
|
||||
static void line(struct draw_stage *stage,
|
||||
struct prim_header *header,
|
||||
struct vertex_header *v0,
|
||||
struct vertex_header *v1)
|
||||
{
|
||||
struct prim_header tmp;
|
||||
tmp.det = header->det;
|
||||
tmp.flags = 0;
|
||||
tmp.v[0] = v0;
|
||||
tmp.v[1] = v1;
|
||||
stage->next->line( stage->next, &tmp );
|
||||
stage->next->line(stage->next, &tmp);
|
||||
}
|
||||
|
||||
|
||||
static void points( struct draw_stage *stage,
|
||||
struct prim_header *header )
|
||||
static void points(struct draw_stage *stage,
|
||||
struct prim_header *header)
|
||||
{
|
||||
struct vertex_header *v0 = header->v[0];
|
||||
struct vertex_header *v1 = header->v[1];
|
||||
|
|
@ -114,27 +120,41 @@ static void points( struct draw_stage *stage,
|
|||
|
||||
inject_front_face_info(stage, header);
|
||||
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 );
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 );
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 );
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
|
||||
point(stage, header, v0);
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
|
||||
point(stage, header, v1);
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
|
||||
point(stage, header, v2);
|
||||
}
|
||||
|
||||
|
||||
static void lines( struct draw_stage *stage,
|
||||
struct prim_header *header )
|
||||
static void lines(struct draw_stage *stage,
|
||||
struct prim_header *header)
|
||||
{
|
||||
struct vertex_header *v0 = header->v[0];
|
||||
struct vertex_header *v1 = header->v[1];
|
||||
struct vertex_header *v2 = header->v[2];
|
||||
|
||||
if (header->flags & DRAW_PIPE_RESET_STIPPLE)
|
||||
stage->next->reset_stipple_counter( stage->next );
|
||||
/*
|
||||
* XXX could revisit this. The only stage which cares is the line
|
||||
* stipple stage. Could just emit correct reset flags here and not
|
||||
* bother about all the calling through reset_stipple_counter
|
||||
* stages. Though technically it is necessary if line stipple is
|
||||
* handled by the driver, but this is not actually hooked up when
|
||||
* using vbuf (vbuf stage reset_stipple_counter does nothing).
|
||||
*/
|
||||
stage->next->reset_stipple_counter(stage->next);
|
||||
|
||||
inject_front_face_info(stage, header);
|
||||
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 );
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 );
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 );
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
|
||||
line(stage, header, v2, v0);
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
|
||||
line(stage, header, v0, v1);
|
||||
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
|
||||
line(stage, header, v1, v2);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -74,9 +74,10 @@ struct vbuf_stage {
|
|||
unsigned max_indices;
|
||||
unsigned nr_indices;
|
||||
|
||||
/* Cache point size somewhere it's address won't change:
|
||||
/* Cache point size somewhere its address won't change:
|
||||
*/
|
||||
float point_size;
|
||||
float zero4[4];
|
||||
|
||||
struct translate_cache *cache;
|
||||
};
|
||||
|
|
@ -205,6 +206,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
|
|||
struct translate_key hw_key;
|
||||
unsigned dst_offset;
|
||||
unsigned i;
|
||||
const struct vertex_info *vinfo;
|
||||
|
||||
vbuf->render->set_primitive(vbuf->render, prim);
|
||||
|
||||
|
|
@ -215,27 +217,33 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
|
|||
* state change.
|
||||
*/
|
||||
vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
|
||||
vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
|
||||
vinfo = vbuf->vinfo;
|
||||
vbuf->vertex_size = vinfo->size * sizeof(float);
|
||||
|
||||
/* Translate from pipeline vertices to hw vertices.
|
||||
*/
|
||||
dst_offset = 0;
|
||||
|
||||
for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
|
||||
for (i = 0; i < vinfo->num_attribs; i++) {
|
||||
unsigned emit_sz = 0;
|
||||
unsigned src_buffer = 0;
|
||||
enum pipe_format output_format;
|
||||
unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
|
||||
unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
|
||||
|
||||
output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit);
|
||||
emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit);
|
||||
output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
|
||||
emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
|
||||
|
||||
/* doesn't handle EMIT_OMIT */
|
||||
assert(emit_sz != 0);
|
||||
|
||||
if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
|
||||
src_buffer = 1;
|
||||
src_offset = 0;
|
||||
if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
|
||||
src_buffer = 1;
|
||||
src_offset = 0;
|
||||
}
|
||||
else if (vinfo->attrib[i].src_index == DRAW_ATTR_NONEXIST) {
|
||||
/* elements which don't exist will get assigned zeros */
|
||||
src_buffer = 2;
|
||||
src_offset = 0;
|
||||
}
|
||||
|
||||
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
|
||||
|
|
@ -249,7 +257,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
|
|||
dst_offset += emit_sz;
|
||||
}
|
||||
|
||||
hw_key.nr_elements = vbuf->vinfo->num_attribs;
|
||||
hw_key.nr_elements = vinfo->num_attribs;
|
||||
hw_key.output_stride = vbuf->vertex_size;
|
||||
|
||||
/* Don't bother with caching at this stage:
|
||||
|
|
@ -261,6 +269,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
|
|||
vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
|
||||
|
||||
vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0);
|
||||
vbuf->translate->set_buffer(vbuf->translate, 2, &vbuf->zero4[0], 0, ~0);
|
||||
}
|
||||
|
||||
vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
|
||||
|
|
@ -428,7 +437,7 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
|
|||
struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
|
||||
if (!vbuf)
|
||||
goto fail;
|
||||
|
||||
|
||||
vbuf->stage.draw = draw;
|
||||
vbuf->stage.name = "vbuf";
|
||||
vbuf->stage.point = vbuf_first_point;
|
||||
|
|
@ -437,29 +446,30 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
|
|||
vbuf->stage.flush = vbuf_flush;
|
||||
vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
|
||||
vbuf->stage.destroy = vbuf_destroy;
|
||||
|
||||
|
||||
vbuf->render = render;
|
||||
vbuf->max_indices = MIN2(render->max_indices, UNDEFINED_VERTEX_ID-1);
|
||||
|
||||
vbuf->indices = (ushort *) align_malloc( vbuf->max_indices *
|
||||
sizeof(vbuf->indices[0]),
|
||||
16 );
|
||||
vbuf->indices = (ushort *) align_malloc(vbuf->max_indices *
|
||||
sizeof(vbuf->indices[0]),
|
||||
16);
|
||||
if (!vbuf->indices)
|
||||
goto fail;
|
||||
|
||||
vbuf->cache = translate_cache_create();
|
||||
if (!vbuf->cache)
|
||||
if (!vbuf->cache)
|
||||
goto fail;
|
||||
|
||||
|
||||
|
||||
vbuf->vertices = NULL;
|
||||
vbuf->vertex_ptr = vbuf->vertices;
|
||||
|
||||
|
||||
vbuf->zero4[0] = vbuf->zero4[1] = vbuf->zero4[2] = vbuf->zero4[3] = 0.0f;
|
||||
|
||||
return &vbuf->stage;
|
||||
|
||||
fail:
|
||||
fail:
|
||||
if (vbuf)
|
||||
vbuf_destroy(&vbuf->stage);
|
||||
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -524,7 +524,7 @@ draw_vbo(struct draw_context *draw,
|
|||
#endif
|
||||
{
|
||||
if (index_limit == 0) {
|
||||
/* one of the buffers is too small to do any valid drawing */
|
||||
/* one of the buffers is too small to do any valid drawing */
|
||||
debug_warning("draw: VBO too small to draw anything\n");
|
||||
util_fpstate_set(fpstate);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ struct pt_emit {
|
|||
unsigned prim;
|
||||
|
||||
const struct vertex_info *vinfo;
|
||||
|
||||
float zero4[4];
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -92,6 +95,11 @@ draw_pt_emit_prepare(struct pt_emit *emit,
|
|||
src_buffer = 1;
|
||||
src_offset = 0;
|
||||
}
|
||||
else if (vinfo->attrib[i].src_index == DRAW_ATTR_NONEXIST) {
|
||||
/* elements which don't exist will get assigned zeros */
|
||||
src_buffer = 2;
|
||||
src_offset = 0;
|
||||
}
|
||||
|
||||
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
|
||||
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
|
||||
|
|
@ -111,6 +119,8 @@ draw_pt_emit_prepare(struct pt_emit *emit,
|
|||
translate_key_compare(&emit->translate->key, &hw_key) != 0) {
|
||||
translate_key_sanitize(&hw_key);
|
||||
emit->translate = translate_cache_find(emit->cache, &hw_key);
|
||||
|
||||
emit->translate->set_buffer(emit->translate, 2, &emit->zero4[0], 0, ~0);
|
||||
}
|
||||
|
||||
if (!vinfo->size)
|
||||
|
|
@ -287,6 +297,8 @@ draw_pt_emit_create(struct draw_context *draw)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
emit->zero4[0] = emit->zero4[1] = emit->zero4[2] = emit->zero4[3] = 0.0f;
|
||||
|
||||
return emit;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@
|
|||
#include "util/u_debug.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#define DRAW_ATTR_NONEXIST 255
|
||||
|
||||
/**
|
||||
* Vertex attribute emit modes
|
||||
|
|
@ -60,18 +61,6 @@ enum attrib_emit {
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
* Attribute interpolation mode
|
||||
*/
|
||||
enum interp_mode {
|
||||
INTERP_NONE, /**< never interpolate vertex header info */
|
||||
INTERP_POS, /**< special case for frag position */
|
||||
INTERP_CONSTANT,
|
||||
INTERP_LINEAR,
|
||||
INTERP_PERSPECTIVE
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Information about hardware/rasterization vertex layout.
|
||||
*/
|
||||
|
|
@ -85,8 +74,7 @@ struct vertex_info
|
|||
* memcmp() comparisons.
|
||||
*/
|
||||
struct {
|
||||
unsigned interp_mode:4; /**< INTERP_x */
|
||||
unsigned emit:4; /**< EMIT_x */
|
||||
unsigned emit:8; /**< EMIT_x */
|
||||
unsigned src_index:8; /**< map to post-xform attribs */
|
||||
} attrib[PIPE_MAX_SHADER_OUTPUTS];
|
||||
};
|
||||
|
|
@ -124,20 +112,18 @@ draw_vinfo_copy( struct vertex_info *dst,
|
|||
static inline uint
|
||||
draw_emit_vertex_attr(struct vertex_info *vinfo,
|
||||
enum attrib_emit emit,
|
||||
enum interp_mode interp, /* only used by softpipe??? */
|
||||
int src_index)
|
||||
{
|
||||
const uint n = vinfo->num_attribs;
|
||||
|
||||
/* If the src_index is negative, meaning it hasn't been found
|
||||
* lets just redirect it to the first output slot */
|
||||
* we'll assign it all zeros later - set to DRAW_ATTR_NONEXIST */
|
||||
if (src_index < 0) {
|
||||
src_index = 0;
|
||||
src_index = DRAW_ATTR_NONEXIST;
|
||||
}
|
||||
|
||||
assert(n < Elements(vinfo->attrib));
|
||||
vinfo->attrib[n].emit = emit;
|
||||
vinfo->attrib[n].interp_mode = interp;
|
||||
vinfo->attrib[n].src_index = src_index;
|
||||
vinfo->num_attribs++;
|
||||
return n;
|
||||
|
|
|
|||
|
|
@ -458,7 +458,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
|
|||
{
|
||||
/* Special case 4x4f --> 1x16ub */
|
||||
if (src_type.length == 4 &&
|
||||
util_cpu_caps.has_sse2)
|
||||
(util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
|
||||
{
|
||||
num_dsts = (num_srcs + 3) / 4;
|
||||
dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4;
|
||||
|
|
@ -545,7 +545,7 @@ lp_build_conv(struct gallivm_state *gallivm,
|
|||
((dst_type.length == 16 && 4 * num_dsts == num_srcs) ||
|
||||
(num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) &&
|
||||
|
||||
util_cpu_caps.has_sse2)
|
||||
(util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
|
||||
{
|
||||
struct lp_build_context bld;
|
||||
struct lp_type int16_type, int32_type;
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
|
|||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
|
|
|
|||
|
|
@ -720,7 +720,7 @@ lp_build_transpose_aos_n(struct gallivm_state *gallivm,
|
|||
|
||||
default:
|
||||
assert(0);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1197,7 +1197,7 @@ get_soa_array_offsets(struct lp_build_context *uint_bld,
|
|||
|
||||
if (need_perelement_offset) {
|
||||
LLVMValueRef pixel_offsets;
|
||||
int i;
|
||||
unsigned i;
|
||||
/* build pixel offset vector: {0, 1, 2, 3, ...} */
|
||||
pixel_offsets = uint_bld->undef;
|
||||
for (i = 0; i < uint_bld->type.length; i++) {
|
||||
|
|
@ -1809,7 +1809,7 @@ emit_store_double_chan(struct lp_build_tgsi_context *bld_base,
|
|||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct lp_build_context *float_bld = &bld_base->base;
|
||||
int i;
|
||||
unsigned i;
|
||||
LLVMValueRef temp, temp2;
|
||||
LLVMValueRef shuffles[8];
|
||||
LLVMValueRef shuffles2[8];
|
||||
|
|
@ -2713,7 +2713,7 @@ static boolean
|
|||
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
|
||||
int pc)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
unsigned opcode;
|
||||
|
|
|
|||
|
|
@ -431,7 +431,7 @@ hud_alloc_vertices(struct hud_context *hud, struct vertex_queue *v,
|
|||
v->max_num_vertices = num_vertices;
|
||||
v->vbuf.stride = stride;
|
||||
u_upload_alloc(hud->uploader, 0, v->vbuf.stride * v->max_num_vertices,
|
||||
&v->vbuf.buffer_offset, &v->vbuf.buffer,
|
||||
16, &v->vbuf.buffer_offset, &v->vbuf.buffer,
|
||||
(void**)&v->vertices);
|
||||
}
|
||||
|
||||
|
|
@ -1176,8 +1176,8 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
|
|||
|
||||
hud->pipe = pipe;
|
||||
hud->cso = cso;
|
||||
hud->uploader = u_upload_create(pipe, 256 * 1024, 16,
|
||||
PIPE_BIND_VERTEX_BUFFER);
|
||||
hud->uploader = u_upload_create(pipe, 256 * 1024,
|
||||
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
|
||||
|
||||
/* font */
|
||||
if (!util_font_create(pipe, UTIL_FONT_FIXED_8X13, &hud->font)) {
|
||||
|
|
|
|||
|
|
@ -153,10 +153,11 @@ util_primconvert_draw_vbo(struct primconvert_context *pc,
|
|||
}
|
||||
|
||||
if (!pc->upload) {
|
||||
pc->upload = u_upload_create(pc->pipe, 4096, 4, PIPE_BIND_INDEX_BUFFER);
|
||||
pc->upload = u_upload_create(pc->pipe, 4096, PIPE_BIND_INDEX_BUFFER,
|
||||
PIPE_USAGE_STREAM);
|
||||
}
|
||||
|
||||
u_upload_alloc(pc->upload, 0, new_ib.index_size * new_info.count,
|
||||
u_upload_alloc(pc->upload, 0, new_ib.index_size * new_info.count, 4,
|
||||
&new_ib.offset, &new_ib.buffer, &dst);
|
||||
|
||||
if (info->indexed) {
|
||||
|
|
|
|||
|
|
@ -1950,7 +1950,7 @@ tgsi_processor_to_shader_stage(unsigned processor)
|
|||
case TGSI_PROCESSOR_COMPUTE: return MESA_SHADER_COMPUTE;
|
||||
default:
|
||||
unreachable("invalid TGSI processor");
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
struct nir_shader *
|
||||
|
|
|
|||
|
|
@ -259,36 +259,39 @@ tgsi_build_declaration_semantic(
|
|||
return ds;
|
||||
}
|
||||
|
||||
static struct tgsi_declaration_resource
|
||||
tgsi_default_declaration_resource(void)
|
||||
static struct tgsi_declaration_image
|
||||
tgsi_default_declaration_image(void)
|
||||
{
|
||||
struct tgsi_declaration_resource dr;
|
||||
struct tgsi_declaration_image di;
|
||||
|
||||
dr.Resource = TGSI_TEXTURE_BUFFER;
|
||||
dr.Raw = 0;
|
||||
dr.Writable = 0;
|
||||
dr.Padding = 0;
|
||||
di.Resource = TGSI_TEXTURE_BUFFER;
|
||||
di.Raw = 0;
|
||||
di.Writable = 0;
|
||||
di.Format = 0;
|
||||
di.Padding = 0;
|
||||
|
||||
return dr;
|
||||
return di;
|
||||
}
|
||||
|
||||
static struct tgsi_declaration_resource
|
||||
tgsi_build_declaration_resource(unsigned texture,
|
||||
unsigned raw,
|
||||
unsigned writable,
|
||||
struct tgsi_declaration *declaration,
|
||||
struct tgsi_header *header)
|
||||
static struct tgsi_declaration_image
|
||||
tgsi_build_declaration_image(unsigned texture,
|
||||
unsigned format,
|
||||
unsigned raw,
|
||||
unsigned writable,
|
||||
struct tgsi_declaration *declaration,
|
||||
struct tgsi_header *header)
|
||||
{
|
||||
struct tgsi_declaration_resource dr;
|
||||
struct tgsi_declaration_image di;
|
||||
|
||||
dr = tgsi_default_declaration_resource();
|
||||
dr.Resource = texture;
|
||||
dr.Raw = raw;
|
||||
dr.Writable = writable;
|
||||
di = tgsi_default_declaration_image();
|
||||
di.Resource = texture;
|
||||
di.Format = format;
|
||||
di.Raw = raw;
|
||||
di.Writable = writable;
|
||||
|
||||
declaration_grow(declaration, header);
|
||||
|
||||
return dr;
|
||||
return di;
|
||||
}
|
||||
|
||||
static struct tgsi_declaration_sampler_view
|
||||
|
|
@ -364,7 +367,7 @@ tgsi_default_full_declaration( void )
|
|||
full_declaration.Range = tgsi_default_declaration_range();
|
||||
full_declaration.Semantic = tgsi_default_declaration_semantic();
|
||||
full_declaration.Interp = tgsi_default_declaration_interp();
|
||||
full_declaration.Resource = tgsi_default_declaration_resource();
|
||||
full_declaration.Image = tgsi_default_declaration_image();
|
||||
full_declaration.SamplerView = tgsi_default_declaration_sampler_view();
|
||||
full_declaration.Array = tgsi_default_declaration_array();
|
||||
|
||||
|
|
@ -454,20 +457,21 @@ tgsi_build_full_declaration(
|
|||
header );
|
||||
}
|
||||
|
||||
if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) {
|
||||
struct tgsi_declaration_resource *dr;
|
||||
if (full_decl->Declaration.File == TGSI_FILE_IMAGE) {
|
||||
struct tgsi_declaration_image *di;
|
||||
|
||||
if (maxsize <= size) {
|
||||
return 0;
|
||||
}
|
||||
dr = (struct tgsi_declaration_resource *)&tokens[size];
|
||||
di = (struct tgsi_declaration_image *)&tokens[size];
|
||||
size++;
|
||||
|
||||
*dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
|
||||
full_decl->Resource.Raw,
|
||||
full_decl->Resource.Writable,
|
||||
declaration,
|
||||
header);
|
||||
*di = tgsi_build_declaration_image(full_decl->Image.Resource,
|
||||
full_decl->Image.Format,
|
||||
full_decl->Image.Raw,
|
||||
full_decl->Image.Writable,
|
||||
declaration,
|
||||
header);
|
||||
}
|
||||
|
||||
if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
|
|
@ -616,7 +620,8 @@ tgsi_default_instruction( void )
|
|||
instruction.NumSrcRegs = 1;
|
||||
instruction.Label = 0;
|
||||
instruction.Texture = 0;
|
||||
instruction.Padding = 0;
|
||||
instruction.Memory = 0;
|
||||
instruction.Padding = 0;
|
||||
|
||||
return instruction;
|
||||
}
|
||||
|
|
@ -762,6 +767,34 @@ tgsi_build_instruction_texture(
|
|||
return instruction_texture;
|
||||
}
|
||||
|
||||
static struct tgsi_instruction_memory
|
||||
tgsi_default_instruction_memory( void )
|
||||
{
|
||||
struct tgsi_instruction_memory instruction_memory;
|
||||
|
||||
instruction_memory.Qualifier = 0;
|
||||
instruction_memory.Padding = 0;
|
||||
|
||||
return instruction_memory;
|
||||
}
|
||||
|
||||
static struct tgsi_instruction_memory
|
||||
tgsi_build_instruction_memory(
|
||||
unsigned qualifier,
|
||||
struct tgsi_token *prev_token,
|
||||
struct tgsi_instruction *instruction,
|
||||
struct tgsi_header *header )
|
||||
{
|
||||
struct tgsi_instruction_memory instruction_memory;
|
||||
|
||||
instruction_memory.Qualifier = qualifier;
|
||||
instruction_memory.Padding = 0;
|
||||
instruction->Memory = 1;
|
||||
|
||||
instruction_grow( instruction, header );
|
||||
|
||||
return instruction_memory;
|
||||
}
|
||||
|
||||
static struct tgsi_texture_offset
|
||||
tgsi_default_texture_offset( void )
|
||||
|
|
@ -1008,6 +1041,7 @@ tgsi_default_full_instruction( void )
|
|||
full_instruction.Predicate = tgsi_default_instruction_predicate();
|
||||
full_instruction.Label = tgsi_default_instruction_label();
|
||||
full_instruction.Texture = tgsi_default_instruction_texture();
|
||||
full_instruction.Memory = tgsi_default_instruction_memory();
|
||||
for( i = 0; i < TGSI_FULL_MAX_TEX_OFFSETS; i++ ) {
|
||||
full_instruction.TexOffsets[i] = tgsi_default_texture_offset();
|
||||
}
|
||||
|
|
@ -1119,6 +1153,24 @@ tgsi_build_full_instruction(
|
|||
prev_token = (struct tgsi_token *) texture_offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (full_inst->Instruction.Memory) {
|
||||
struct tgsi_instruction_memory *instruction_memory;
|
||||
|
||||
if( maxsize <= size )
|
||||
return 0;
|
||||
instruction_memory =
|
||||
(struct tgsi_instruction_memory *) &tokens[size];
|
||||
size++;
|
||||
|
||||
*instruction_memory = tgsi_build_instruction_memory(
|
||||
full_inst->Memory.Qualifier,
|
||||
prev_token,
|
||||
instruction,
|
||||
header );
|
||||
prev_token = (struct tgsi_token *) instruction_memory;
|
||||
}
|
||||
|
||||
for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
|
||||
const struct tgsi_full_dst_register *reg = &full_inst->Dst[i];
|
||||
struct tgsi_dst_register *dst_register;
|
||||
|
|
|
|||
|
|
@ -348,15 +348,22 @@ iter_declaration(
|
|||
}
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
|
||||
if (decl->Declaration.File == TGSI_FILE_IMAGE) {
|
||||
TXT(", ");
|
||||
ENM(decl->Resource.Resource, tgsi_texture_names);
|
||||
if (decl->Resource.Writable)
|
||||
ENM(decl->Image.Resource, tgsi_texture_names);
|
||||
TXT(", ");
|
||||
UID(decl->Image.Format);
|
||||
if (decl->Image.Writable)
|
||||
TXT(", WR");
|
||||
if (decl->Resource.Raw)
|
||||
if (decl->Image.Raw)
|
||||
TXT(", RAW");
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_BUFFER) {
|
||||
if (decl->Declaration.Atomic)
|
||||
TXT(", ATOMIC");
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
TXT(", ");
|
||||
ENM(decl->SamplerView.Resource, tgsi_texture_names);
|
||||
|
|
@ -617,6 +624,16 @@ iter_instruction(
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->Instruction.Memory) {
|
||||
uint32_t qualifier = inst->Memory.Qualifier;
|
||||
while (qualifier) {
|
||||
int bit = ffs(qualifier) - 1;
|
||||
qualifier &= ~(1U << bit);
|
||||
TXT(", ");
|
||||
ENM(bit, tgsi_memory_names);
|
||||
}
|
||||
}
|
||||
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_IF:
|
||||
case TGSI_OPCODE_UIF:
|
||||
|
|
|
|||
|
|
@ -473,6 +473,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
|
|||
return 1;
|
||||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
|
|
|
|||
|
|
@ -37,231 +37,231 @@
|
|||
|
||||
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
||||
{
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
|
||||
{ 1, 1, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
|
||||
{ 1, 1, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
|
||||
{ 1, 1, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
|
||||
{ 1, 2, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
|
||||
{ 1, 2, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
|
||||
{ 1, 2, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
|
||||
{ 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
|
||||
{ 1, 2, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "PK2H", TGSI_OPCODE_PK2H },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "PK2US", TGSI_OPCODE_PK2US },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "PK4B", TGSI_OPCODE_PK4B },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "PK4UB", TGSI_OPCODE_PK4UB },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
|
||||
{ 1, 1, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
|
||||
{ 1, 4, 1, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "UP2H", TGSI_OPCODE_UP2H },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "UP2US", TGSI_OPCODE_UP2US },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "UP4B", TGSI_OPCODE_UP4B },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "UP4UB", TGSI_OPCODE_UP4UB },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
|
||||
{ 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
|
||||
{ 1, 1, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
|
||||
{ 1, 2, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
|
||||
{ 0, 1, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
|
||||
{ 0, 1, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
|
||||
{ 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
|
||||
{ 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
|
||||
{ 1, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
|
||||
{ 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
|
||||
{ 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
|
||||
{ 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
|
||||
{ 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
|
||||
{ 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
|
||||
{ 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
|
||||
{ 0, 1, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
|
||||
{ 0, 1, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
|
||||
{ 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
|
||||
{ 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
|
||||
{ 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
|
||||
{ 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
|
||||
{ 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
|
||||
{ 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
|
||||
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
|
||||
{ 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
|
||||
{ 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
|
||||
{ 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
|
||||
{ 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
|
||||
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
|
||||
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
|
||||
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, NONE, "RESQ", TGSI_OPCODE_RESQ },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
|
||||
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
|
||||
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
|
||||
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
|
||||
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
|
||||
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
|
||||
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
|
||||
{ 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
|
||||
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
|
||||
{ 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
|
||||
{ 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
|
||||
{ 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
|
||||
{ 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
|
||||
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
|
||||
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
|
||||
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
|
||||
{ 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
|
||||
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
|
||||
{ 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
|
||||
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
|
||||
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
|
||||
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
|
||||
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
|
||||
{ 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
|
||||
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
|
||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
|
||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
|
||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
|
||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
|
||||
{ 1, 2, 1, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
|
||||
{ 1, 3, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
|
||||
{ 1, 4, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
|
||||
{ 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
|
||||
{ 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
|
||||
{ 1, 1, 0, 0 ,0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
|
||||
{ 1, 3, 0, 0 ,0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
|
||||
{ 1, 2, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
|
||||
{ 2, 1, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
|
||||
{ 1, 1, 0, 0 ,0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
|
||||
{ 1, 1, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
|
||||
{ 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
|
||||
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
|
||||
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
|
||||
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
|
||||
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
|
||||
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
|
||||
{ 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
|
||||
{ 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
|
||||
};
|
||||
|
||||
const struct tgsi_opcode_info *
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ struct tgsi_opcode_info
|
|||
unsigned num_dst:3;
|
||||
unsigned num_src:3;
|
||||
unsigned is_tex:1;
|
||||
unsigned is_store:1;
|
||||
unsigned is_branch:1;
|
||||
int pre_dedent:2;
|
||||
int post_indent:2;
|
||||
|
|
|
|||
|
|
@ -121,8 +121,8 @@ tgsi_parse_token(
|
|||
next_token( ctx, &decl->Semantic );
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
|
||||
next_token(ctx, &decl->Resource);
|
||||
if (decl->Declaration.File == TGSI_FILE_IMAGE) {
|
||||
next_token(ctx, &decl->Image);
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
|
|
@ -195,6 +195,10 @@ tgsi_parse_token(
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->Instruction.Memory) {
|
||||
next_token(ctx, &inst->Memory);
|
||||
}
|
||||
|
||||
assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
|
||||
|
||||
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ struct tgsi_full_declaration
|
|||
struct tgsi_declaration_dimension Dim;
|
||||
struct tgsi_declaration_interp Interp;
|
||||
struct tgsi_declaration_semantic Semantic;
|
||||
struct tgsi_declaration_resource Resource;
|
||||
struct tgsi_declaration_image Image;
|
||||
struct tgsi_declaration_sampler_view SamplerView;
|
||||
struct tgsi_declaration_array Array;
|
||||
};
|
||||
|
|
@ -91,6 +91,7 @@ struct tgsi_full_instruction
|
|||
struct tgsi_instruction_predicate Predicate;
|
||||
struct tgsi_instruction_label Label;
|
||||
struct tgsi_instruction_texture Texture;
|
||||
struct tgsi_instruction_memory Memory;
|
||||
struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS];
|
||||
struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS];
|
||||
struct tgsi_texture_offset TexOffsets[TGSI_FULL_MAX_TEX_OFFSETS];
|
||||
|
|
|
|||
|
|
@ -187,13 +187,28 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_FRAGMENT &&
|
||||
info->reads_position &&
|
||||
src->Register.Index == 0 &&
|
||||
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleW == TGSI_SWIZZLE_Z)) {
|
||||
info->reads_z = TRUE;
|
||||
!src->Register.Indirect) {
|
||||
unsigned name =
|
||||
info->input_semantic_name[src->Register.Index];
|
||||
unsigned index =
|
||||
info->input_semantic_index[src->Register.Index];
|
||||
|
||||
if (name == TGSI_SEMANTIC_POSITION &&
|
||||
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
|
||||
src->Register.SwizzleW == TGSI_SWIZZLE_Z))
|
||||
info->reads_z = TRUE;
|
||||
|
||||
if (name == TGSI_SEMANTIC_COLOR) {
|
||||
unsigned mask =
|
||||
(1 << src->Register.SwizzleX) |
|
||||
(1 << src->Register.SwizzleY) |
|
||||
(1 << src->Register.SwizzleZ) |
|
||||
(1 << src->Register.SwizzleW);
|
||||
|
||||
info->colors_read |= mask << (index * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -358,7 +373,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
info->uses_primid = TRUE;
|
||||
} else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
|
||||
info->uses_invocationid = TRUE;
|
||||
}
|
||||
} else if (semName == TGSI_SEMANTIC_POSITION)
|
||||
info->reads_position = TRUE;
|
||||
else if (semName == TGSI_SEMANTIC_FACE)
|
||||
info->uses_frontface = TRUE;
|
||||
}
|
||||
else if (file == TGSI_FILE_OUTPUT) {
|
||||
info->output_semantic_name[reg] = (ubyte) semName;
|
||||
|
|
@ -392,6 +410,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
}
|
||||
else if (semName == TGSI_SEMANTIC_STENCIL) {
|
||||
info->writes_stencil = TRUE;
|
||||
} else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
|
||||
info->writes_samplemask = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -77,11 +77,13 @@ struct tgsi_shader_info
|
|||
|
||||
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
|
||||
|
||||
ubyte colors_read; /**< which color components are read by the FS */
|
||||
ubyte colors_written;
|
||||
boolean reads_position; /**< does fragment shader read position? */
|
||||
boolean reads_z; /**< does fragment shader read depth? */
|
||||
boolean writes_z; /**< does fragment shader write Z value? */
|
||||
boolean writes_stencil; /**< does fragment shader write stencil value? */
|
||||
boolean writes_samplemask; /**< does fragment shader write sample mask? */
|
||||
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
|
||||
boolean uses_kill; /**< KILL or KILL_IF instruction used? */
|
||||
boolean uses_persp_center;
|
||||
|
|
|
|||
|
|
@ -54,8 +54,9 @@ static const char *tgsi_file_names[] =
|
|||
"IMM",
|
||||
"PRED",
|
||||
"SV",
|
||||
"RES",
|
||||
"SVIEW"
|
||||
"IMAGE",
|
||||
"SVIEW",
|
||||
"BUFFER",
|
||||
};
|
||||
|
||||
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
||||
|
|
@ -96,6 +97,8 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
|||
"TESSINNER",
|
||||
"VERTICESIN",
|
||||
"HELPER_INVOCATION",
|
||||
"BASEINSTANCE",
|
||||
"DRAWID",
|
||||
};
|
||||
|
||||
const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
|
||||
|
|
@ -205,6 +208,13 @@ const char *tgsi_immediate_type_names[4] =
|
|||
"FLT64"
|
||||
};
|
||||
|
||||
const char *tgsi_memory_names[3] =
|
||||
{
|
||||
"COHERENT",
|
||||
"RESTRICT",
|
||||
"VOLATILE",
|
||||
};
|
||||
|
||||
|
||||
static inline void
|
||||
tgsi_strings_check(void)
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@ extern const char *tgsi_fs_coord_pixel_center_names[2];
|
|||
|
||||
extern const char *tgsi_immediate_type_names[4];
|
||||
|
||||
extern const char *tgsi_memory_names[3];
|
||||
|
||||
|
||||
const char *
|
||||
tgsi_file_name(unsigned file);
|
||||
|
|
|
|||
|
|
@ -1039,6 +1039,12 @@ parse_instruction(
|
|||
inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN;
|
||||
}
|
||||
|
||||
if ((i >= TGSI_OPCODE_LOAD && i <= TGSI_OPCODE_ATOMIMAX) ||
|
||||
i == TGSI_OPCODE_RESQ) {
|
||||
inst.Instruction.Memory = 1;
|
||||
inst.Memory.Qualifier = 0;
|
||||
}
|
||||
|
||||
/* Parse instruction operands.
|
||||
*/
|
||||
for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {
|
||||
|
|
@ -1090,6 +1096,27 @@ parse_instruction(
|
|||
}
|
||||
inst.Texture.NumOffsets = i;
|
||||
|
||||
cur = ctx->cur;
|
||||
eat_opt_white(&cur);
|
||||
for (i = 0; inst.Instruction.Memory && *cur == ','; i++) {
|
||||
uint j;
|
||||
cur++;
|
||||
eat_opt_white(&cur);
|
||||
ctx->cur = cur;
|
||||
for (j = 0; j < 3; j++) {
|
||||
if (str_match_nocase_whole(&ctx->cur, tgsi_memory_names[j])) {
|
||||
inst.Memory.Qualifier |= 1U << j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == 3) {
|
||||
report_error(ctx, "Expected memory qualifier");
|
||||
return FALSE;
|
||||
}
|
||||
cur = ctx->cur;
|
||||
eat_opt_white(&cur);
|
||||
}
|
||||
|
||||
cur = ctx->cur;
|
||||
eat_opt_white( &cur );
|
||||
if (info->is_branch && *cur == ':') {
|
||||
|
|
@ -1251,10 +1278,10 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
|||
|
||||
cur++;
|
||||
eat_opt_white( &cur );
|
||||
if (file == TGSI_FILE_RESOURCE) {
|
||||
if (file == TGSI_FILE_IMAGE) {
|
||||
for (i = 0; i < TGSI_TEXTURE_COUNT; i++) {
|
||||
if (str_match_nocase_whole(&cur, tgsi_texture_names[i])) {
|
||||
decl.Resource.Resource = i;
|
||||
decl.Image.Resource = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -1263,16 +1290,18 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
/* XXX format */
|
||||
|
||||
cur2 = cur;
|
||||
eat_opt_white(&cur2);
|
||||
while (*cur2 == ',') {
|
||||
cur2++;
|
||||
eat_opt_white(&cur2);
|
||||
if (str_match_nocase_whole(&cur2, "RAW")) {
|
||||
decl.Resource.Raw = 1;
|
||||
decl.Image.Raw = 1;
|
||||
|
||||
} else if (str_match_nocase_whole(&cur2, "WR")) {
|
||||
decl.Resource.Writable = 1;
|
||||
decl.Image.Writable = 1;
|
||||
|
||||
} else {
|
||||
break;
|
||||
|
|
@ -1348,6 +1377,11 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
|||
decl.SamplerView.ReturnTypeX;
|
||||
}
|
||||
ctx->cur = cur;
|
||||
} else if (file == TGSI_FILE_BUFFER) {
|
||||
if (str_match_nocase_whole(&cur, "ATOMIC")) {
|
||||
decl.Declaration.Atomic = 1;
|
||||
ctx->cur = cur;
|
||||
}
|
||||
} else {
|
||||
if (str_match_nocase_whole(&cur, "LOCAL")) {
|
||||
decl.Declaration.Local = 1;
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ union tgsi_any_token {
|
|||
struct tgsi_declaration_range decl_range;
|
||||
struct tgsi_declaration_dimension decl_dim;
|
||||
struct tgsi_declaration_interp decl_interp;
|
||||
struct tgsi_declaration_image decl_image;
|
||||
struct tgsi_declaration_semantic decl_semantic;
|
||||
struct tgsi_declaration_sampler_view decl_sampler_view;
|
||||
struct tgsi_declaration_array array;
|
||||
|
|
@ -59,6 +60,7 @@ union tgsi_any_token {
|
|||
struct tgsi_instruction_predicate insn_predicate;
|
||||
struct tgsi_instruction_label insn_label;
|
||||
struct tgsi_instruction_texture insn_texture;
|
||||
struct tgsi_instruction_memory insn_memory;
|
||||
struct tgsi_texture_offset insn_texture_offset;
|
||||
struct tgsi_src_register src;
|
||||
struct tgsi_ind_register ind;
|
||||
|
|
@ -115,7 +117,6 @@ struct ureg_program
|
|||
unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
|
||||
|
||||
struct {
|
||||
unsigned index;
|
||||
unsigned semantic_name;
|
||||
unsigned semantic_index;
|
||||
} system_value[UREG_MAX_SYSTEM_VALUE];
|
||||
|
|
@ -155,6 +156,21 @@ struct ureg_program
|
|||
} sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
|
||||
unsigned nr_sampler_views;
|
||||
|
||||
struct {
|
||||
unsigned index;
|
||||
unsigned target;
|
||||
unsigned format;
|
||||
boolean wr;
|
||||
boolean raw;
|
||||
} image[PIPE_MAX_SHADER_IMAGES];
|
||||
unsigned nr_images;
|
||||
|
||||
struct {
|
||||
unsigned index;
|
||||
bool atomic;
|
||||
} buffer[PIPE_MAX_SHADER_BUFFERS];
|
||||
unsigned nr_buffers;
|
||||
|
||||
struct util_bitmask *free_temps;
|
||||
struct util_bitmask *local_temps;
|
||||
struct util_bitmask *decl_temps;
|
||||
|
|
@ -320,20 +336,29 @@ ureg_DECL_input(struct ureg_program *ureg,
|
|||
|
||||
struct ureg_src
|
||||
ureg_DECL_system_value(struct ureg_program *ureg,
|
||||
unsigned index,
|
||||
unsigned semantic_name,
|
||||
unsigned semantic_index)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ureg->nr_system_values; i++) {
|
||||
if (ureg->system_value[i].semantic_name == semantic_name &&
|
||||
ureg->system_value[i].semantic_index == semantic_index) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
|
||||
ureg->system_value[ureg->nr_system_values].index = index;
|
||||
ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
|
||||
ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
|
||||
i = ureg->nr_system_values;
|
||||
ureg->nr_system_values++;
|
||||
} else {
|
||||
set_bad(ureg);
|
||||
}
|
||||
|
||||
return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
|
||||
out:
|
||||
return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -648,6 +673,60 @@ ureg_DECL_sampler_view(struct ureg_program *ureg,
|
|||
return reg;
|
||||
}
|
||||
|
||||
/* Allocate a new image.
|
||||
*/
|
||||
struct ureg_src
|
||||
ureg_DECL_image(struct ureg_program *ureg,
|
||||
unsigned index,
|
||||
unsigned target,
|
||||
unsigned format,
|
||||
boolean wr,
|
||||
boolean raw)
|
||||
{
|
||||
struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ureg->nr_images; i++)
|
||||
if (ureg->image[i].index == index)
|
||||
return reg;
|
||||
|
||||
if (i < PIPE_MAX_SHADER_IMAGES) {
|
||||
ureg->image[i].index = index;
|
||||
ureg->image[i].target = target;
|
||||
ureg->image[i].wr = wr;
|
||||
ureg->image[i].raw = raw;
|
||||
ureg->image[i].format = format;
|
||||
ureg->nr_images++;
|
||||
return reg;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return reg;
|
||||
}
|
||||
|
||||
/* Allocate a new buffer.
|
||||
*/
|
||||
struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
|
||||
bool atomic)
|
||||
{
|
||||
struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ureg->nr_buffers; i++)
|
||||
if (ureg->buffer[i].index == nr)
|
||||
return reg;
|
||||
|
||||
if (i < PIPE_MAX_SHADER_BUFFERS) {
|
||||
ureg->buffer[i].index = nr;
|
||||
ureg->buffer[i].atomic = atomic;
|
||||
ureg->nr_buffers++;
|
||||
return reg;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static int
|
||||
match_or_expand_immediate64( const unsigned *v,
|
||||
int type,
|
||||
|
|
@ -1148,6 +1227,21 @@ ureg_emit_texture_offset(struct ureg_program *ureg,
|
|||
|
||||
}
|
||||
|
||||
void
|
||||
ureg_emit_memory(struct ureg_program *ureg,
|
||||
unsigned extended_token,
|
||||
unsigned qualifier)
|
||||
{
|
||||
union tgsi_any_token *out, *insn;
|
||||
|
||||
out = get_tokens( ureg, DOMAIN_INSN, 1 );
|
||||
insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
|
||||
|
||||
insn->insn.Memory = 1;
|
||||
|
||||
out[0].value = 0;
|
||||
out[0].insn_memory.Qualifier = qualifier;
|
||||
}
|
||||
|
||||
void
|
||||
ureg_fixup_insn_size(struct ureg_program *ureg,
|
||||
|
|
@ -1300,6 +1394,42 @@ ureg_label_insn(struct ureg_program *ureg,
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
ureg_memory_insn(struct ureg_program *ureg,
|
||||
unsigned opcode,
|
||||
const struct ureg_dst *dst,
|
||||
unsigned nr_dst,
|
||||
const struct ureg_src *src,
|
||||
unsigned nr_src,
|
||||
unsigned qualifier)
|
||||
{
|
||||
struct ureg_emit_insn_result insn;
|
||||
unsigned i;
|
||||
|
||||
insn = ureg_emit_insn(ureg,
|
||||
opcode,
|
||||
FALSE,
|
||||
FALSE,
|
||||
FALSE,
|
||||
TGSI_SWIZZLE_X,
|
||||
TGSI_SWIZZLE_Y,
|
||||
TGSI_SWIZZLE_Z,
|
||||
TGSI_SWIZZLE_W,
|
||||
nr_dst,
|
||||
nr_src);
|
||||
|
||||
ureg_emit_memory(ureg, insn.extended_token, qualifier);
|
||||
|
||||
for (i = 0; i < nr_dst; i++)
|
||||
ureg_emit_dst(ureg, dst[i]);
|
||||
|
||||
for (i = 0; i < nr_src; i++)
|
||||
ureg_emit_src(ureg, src[i]);
|
||||
|
||||
ureg_fixup_insn_size(ureg, insn.insn_token);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
emit_decl_semantic(struct ureg_program *ureg,
|
||||
unsigned file,
|
||||
|
|
@ -1477,6 +1607,52 @@ emit_decl_sampler_view(struct ureg_program *ureg,
|
|||
out[2].decl_sampler_view.ReturnTypeW = return_type_w;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_decl_image(struct ureg_program *ureg,
|
||||
unsigned index,
|
||||
unsigned target,
|
||||
unsigned format,
|
||||
boolean wr,
|
||||
boolean raw)
|
||||
{
|
||||
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
|
||||
|
||||
out[0].value = 0;
|
||||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 3;
|
||||
out[0].decl.File = TGSI_FILE_IMAGE;
|
||||
out[0].decl.UsageMask = 0xf;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = index;
|
||||
out[1].decl_range.Last = index;
|
||||
|
||||
out[2].value = 0;
|
||||
out[2].decl_image.Resource = target;
|
||||
out[2].decl_image.Writable = wr;
|
||||
out[2].decl_image.Raw = raw;
|
||||
out[2].decl_image.Format = format;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_decl_buffer(struct ureg_program *ureg,
|
||||
unsigned index,
|
||||
bool atomic)
|
||||
{
|
||||
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
|
||||
|
||||
out[0].value = 0;
|
||||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 2;
|
||||
out[0].decl.File = TGSI_FILE_BUFFER;
|
||||
out[0].decl.UsageMask = 0xf;
|
||||
out[0].decl.Atomic = atomic;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = index;
|
||||
out[1].decl_range.Last = index;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_immediate( struct ureg_program *ureg,
|
||||
const unsigned *v,
|
||||
|
|
@ -1587,8 +1763,8 @@ static void emit_decls( struct ureg_program *ureg )
|
|||
for (i = 0; i < ureg->nr_system_values; i++) {
|
||||
emit_decl_semantic(ureg,
|
||||
TGSI_FILE_SYSTEM_VALUE,
|
||||
ureg->system_value[i].index,
|
||||
ureg->system_value[i].index,
|
||||
i,
|
||||
i,
|
||||
ureg->system_value[i].semantic_name,
|
||||
ureg->system_value[i].semantic_index,
|
||||
TGSI_WRITEMASK_XYZW, 0);
|
||||
|
|
@ -1636,6 +1812,19 @@ static void emit_decls( struct ureg_program *ureg )
|
|||
ureg->sampler_view[i].return_type_w);
|
||||
}
|
||||
|
||||
for (i = 0; i < ureg->nr_images; i++) {
|
||||
emit_decl_image(ureg,
|
||||
ureg->image[i].index,
|
||||
ureg->image[i].target,
|
||||
ureg->image[i].format,
|
||||
ureg->image[i].wr,
|
||||
ureg->image[i].raw);
|
||||
}
|
||||
|
||||
for (i = 0; i < ureg->nr_buffers; i++) {
|
||||
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
|
||||
}
|
||||
|
||||
if (ureg->const_decls.nr_constant_ranges) {
|
||||
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
|
||||
emit_decl_range(ureg,
|
||||
|
|
|
|||
|
|
@ -221,7 +221,6 @@ ureg_DECL_input(struct ureg_program *,
|
|||
|
||||
struct ureg_src
|
||||
ureg_DECL_system_value(struct ureg_program *,
|
||||
unsigned index,
|
||||
unsigned semantic_name,
|
||||
unsigned semantic_index);
|
||||
|
||||
|
|
@ -327,6 +326,16 @@ ureg_DECL_sampler_view(struct ureg_program *,
|
|||
unsigned return_type_z,
|
||||
unsigned return_type_w );
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_image(struct ureg_program *ureg,
|
||||
unsigned index,
|
||||
unsigned target,
|
||||
unsigned format,
|
||||
boolean wr,
|
||||
boolean raw);
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
|
||||
|
||||
static inline struct ureg_src
|
||||
ureg_imm4f( struct ureg_program *ureg,
|
||||
|
|
@ -522,6 +531,14 @@ ureg_label_insn(struct ureg_program *ureg,
|
|||
unsigned nr_src,
|
||||
unsigned *label);
|
||||
|
||||
void
|
||||
ureg_memory_insn(struct ureg_program *ureg,
|
||||
unsigned opcode,
|
||||
const struct ureg_dst *dst,
|
||||
unsigned nr_dst,
|
||||
const struct ureg_src *src,
|
||||
unsigned nr_src,
|
||||
unsigned qualifier);
|
||||
|
||||
/***********************************************************************
|
||||
* Internal instruction helpers, don't call these directly:
|
||||
|
|
@ -559,6 +576,11 @@ void
|
|||
ureg_emit_texture_offset(struct ureg_program *ureg,
|
||||
const struct tgsi_texture_offset *offset);
|
||||
|
||||
void
|
||||
ureg_emit_memory(struct ureg_program *ureg,
|
||||
unsigned insn_token,
|
||||
unsigned qualifier);
|
||||
|
||||
void
|
||||
ureg_emit_dst( struct ureg_program *ureg,
|
||||
struct ureg_dst dst );
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
#include "pipe/p_shader_tokens.h"
|
||||
#include "tgsi_parse.h"
|
||||
#include "tgsi_util.h"
|
||||
#include "tgsi_exec.h"
|
||||
|
||||
union pointer_hack
|
||||
{
|
||||
|
|
@ -53,17 +54,17 @@ tgsi_util_get_src_register_swizzle(
|
|||
const struct tgsi_src_register *reg,
|
||||
unsigned component )
|
||||
{
|
||||
switch( component ) {
|
||||
case 0:
|
||||
switch (component) {
|
||||
case TGSI_CHAN_X:
|
||||
return reg->SwizzleX;
|
||||
case 1:
|
||||
case TGSI_CHAN_Y:
|
||||
return reg->SwizzleY;
|
||||
case 2:
|
||||
case TGSI_CHAN_Z:
|
||||
return reg->SwizzleZ;
|
||||
case 3:
|
||||
case TGSI_CHAN_W:
|
||||
return reg->SwizzleW;
|
||||
default:
|
||||
assert( 0 );
|
||||
assert(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -320,7 +320,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
|
|||
for (i = 0; i < 4; i++)
|
||||
ctx->vertices[i][0][3] = 1; /*v.w*/
|
||||
|
||||
ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
|
||||
ctx->upload = u_upload_create(pipe, 65536, PIPE_BIND_VERTEX_BUFFER,
|
||||
PIPE_USAGE_STREAM);
|
||||
|
||||
return &ctx->base;
|
||||
}
|
||||
|
|
@ -1191,7 +1192,7 @@ static void blitter_draw(struct blitter_context_priv *ctx,
|
|||
|
||||
vb.stride = 8 * sizeof(float);
|
||||
|
||||
u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
|
||||
u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), 4, ctx->vertices,
|
||||
&vb.buffer_offset, &vb.buffer);
|
||||
if (!vb.buffer)
|
||||
return;
|
||||
|
|
@ -2111,7 +2112,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
|
|||
return;
|
||||
}
|
||||
|
||||
u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
|
||||
u_upload_data(ctx->upload, 0, num_channels*4, 4, clear_value,
|
||||
&vb.buffer_offset, &vb.buffer);
|
||||
if (!vb.buffer)
|
||||
goto out;
|
||||
|
|
|
|||
|
|
@ -727,6 +727,65 @@ error1:
|
|||
;
|
||||
}
|
||||
|
||||
void
|
||||
debug_dump_ubyte_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
const ubyte *rgba, unsigned stride)
|
||||
{
|
||||
FILE *stream;
|
||||
struct bmp_file_header bmfh;
|
||||
struct bmp_info_header bmih;
|
||||
unsigned x, y;
|
||||
|
||||
assert(rgba);
|
||||
if(!rgba)
|
||||
goto error1;
|
||||
|
||||
bmfh.bfType = 0x4d42;
|
||||
bmfh.bfSize = 14 + 40 + height*width*4;
|
||||
bmfh.bfReserved1 = 0;
|
||||
bmfh.bfReserved2 = 0;
|
||||
bmfh.bfOffBits = 14 + 40;
|
||||
|
||||
bmih.biSize = 40;
|
||||
bmih.biWidth = width;
|
||||
bmih.biHeight = height;
|
||||
bmih.biPlanes = 1;
|
||||
bmih.biBitCount = 32;
|
||||
bmih.biCompression = 0;
|
||||
bmih.biSizeImage = height*width*4;
|
||||
bmih.biXPelsPerMeter = 0;
|
||||
bmih.biYPelsPerMeter = 0;
|
||||
bmih.biClrUsed = 0;
|
||||
bmih.biClrImportant = 0;
|
||||
|
||||
stream = fopen(filename, "wb");
|
||||
assert(stream);
|
||||
if(!stream)
|
||||
goto error1;
|
||||
|
||||
fwrite(&bmfh, 14, 1, stream);
|
||||
fwrite(&bmih, 40, 1, stream);
|
||||
|
||||
y = height;
|
||||
while(y--) {
|
||||
const ubyte *ptr = rgba + (stride * y * 4);
|
||||
for(x = 0; x < width; ++x)
|
||||
{
|
||||
struct bmp_rgb_quad pixel;
|
||||
pixel.rgbRed = ptr[x*4 + 0];
|
||||
pixel.rgbGreen = ptr[x*4 + 1];
|
||||
pixel.rgbBlue = ptr[x*4 + 2];
|
||||
pixel.rgbAlpha = ptr[x*4 + 3];
|
||||
fwrite(&pixel, 1, 4, stream);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(stream);
|
||||
error1:
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Print PIPE_TRANSFER_x flags with a message.
|
||||
|
|
|
|||
|
|
@ -490,12 +490,16 @@ void debug_dump_transfer_bmp(struct pipe_context *pipe,
|
|||
void debug_dump_float_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
float *rgba, unsigned stride);
|
||||
void debug_dump_ubyte_rgba_bmp(const char *filename,
|
||||
unsigned width, unsigned height,
|
||||
const ubyte *rgba, unsigned stride);
|
||||
#else
|
||||
#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
|
||||
#define debug_dump_surface(pipe, prefix, surface) ((void)0)
|
||||
#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
|
||||
#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
|
||||
#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
|
||||
#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -177,6 +177,7 @@ struct pstip_transform_context {
|
|||
struct tgsi_shader_info info;
|
||||
uint tempsUsed; /**< bitmask */
|
||||
int wincoordInput;
|
||||
unsigned wincoordFile;
|
||||
int maxInput;
|
||||
uint samplersUsed; /**< bitfield of samplers used */
|
||||
int freeSampler; /** an available sampler for the pstipple */
|
||||
|
|
@ -206,7 +207,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
|
|||
pctx->samplersUsed |= 1 << i;
|
||||
}
|
||||
}
|
||||
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
|
||||
else if (decl->Declaration.File == pctx->wincoordFile) {
|
||||
pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
|
||||
if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
|
||||
pctx->wincoordInput = (int) decl->Range.First;
|
||||
|
|
@ -275,10 +276,22 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
|||
wincoordInput = pctx->wincoordInput;
|
||||
|
||||
if (pctx->wincoordInput < 0) {
|
||||
struct tgsi_full_declaration decl;
|
||||
|
||||
decl = tgsi_default_full_declaration();
|
||||
/* declare new position input reg */
|
||||
tgsi_transform_input_decl(ctx, wincoordInput,
|
||||
TGSI_SEMANTIC_POSITION, 1,
|
||||
TGSI_INTERPOLATE_LINEAR);
|
||||
decl.Declaration.File = pctx->wincoordFile;
|
||||
decl.Declaration.Semantic = 1;
|
||||
decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
|
||||
decl.Range.First =
|
||||
decl.Range.Last = wincoordInput;
|
||||
|
||||
if (pctx->wincoordFile == TGSI_FILE_INPUT) {
|
||||
decl.Declaration.Interpolate = 1;
|
||||
decl.Interp.Interpolate = TGSI_INTERPOLATE_LINEAR;
|
||||
}
|
||||
|
||||
ctx->emit_declaration(ctx, &decl);
|
||||
}
|
||||
|
||||
sampIdx = pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler;
|
||||
|
|
@ -327,7 +340,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
|||
tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
|
||||
TGSI_FILE_TEMPORARY, texTemp,
|
||||
TGSI_WRITEMASK_XYZW,
|
||||
TGSI_FILE_INPUT, wincoordInput,
|
||||
pctx->wincoordFile, wincoordInput,
|
||||
TGSI_FILE_IMMEDIATE, pctx->numImmed);
|
||||
|
||||
/* TEX texTemp, texTemp, sampler; */
|
||||
|
|
@ -351,11 +364,15 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
|||
* will be used to sample the stipple texture;
|
||||
* if NULL, the fixed unit is used
|
||||
* \param fixedUnit fixed texture unit used for the stipple texture
|
||||
* \param wincoordFile TGSI_FILE_INPUT or TGSI_FILE_SYSTEM_VALUE,
|
||||
* depending on which one is supported by the driver
|
||||
* for TGSI_SEMANTIC_POSITION in the fragment shader
|
||||
*/
|
||||
struct tgsi_token *
|
||||
util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
|
||||
unsigned *samplerUnitOut,
|
||||
unsigned fixedUnit)
|
||||
unsigned fixedUnit,
|
||||
unsigned wincoordFile)
|
||||
{
|
||||
struct pstip_transform_context transform;
|
||||
const uint newLen = tgsi_num_tokens(tokens) + NUM_NEW_TOKENS;
|
||||
|
|
@ -370,6 +387,7 @@ util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
|
|||
*/
|
||||
memset(&transform, 0, sizeof(transform));
|
||||
transform.wincoordInput = -1;
|
||||
transform.wincoordFile = wincoordFile;
|
||||
transform.maxInput = -1;
|
||||
transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
|
||||
transform.hasFixedUnit = !samplerUnitOut;
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ util_pstipple_create_sampler(struct pipe_context *pipe);
|
|||
struct tgsi_token *
|
||||
util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
|
||||
unsigned *samplerUnitOut,
|
||||
unsigned fixed_unit);
|
||||
unsigned fixed_unit,
|
||||
unsigned wincoordFile);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
|||
310
src/gallium/auxiliary/util/u_pwr8.h
Normal file
310
src/gallium/auxiliary/util/u_pwr8.h
Normal file
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright 2015 Red Hat Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Author: Oded Gabbay <oded.gabbay@redhat.com>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* POWER8 intrinsics portability header.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef U_PWR8_H_
|
||||
#define U_PWR8_H_
|
||||
|
||||
#if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
|
||||
|
||||
#define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||
|
||||
typedef VECTOR_ALIGN_16 vector unsigned char __m128i;
|
||||
|
||||
typedef VECTOR_ALIGN_16 union m128i {
|
||||
__m128i m128i;
|
||||
vector signed int m128si;
|
||||
vector unsigned int m128ui;
|
||||
ubyte ub[16];
|
||||
ushort us[8];
|
||||
int i[4];
|
||||
uint ui[4];
|
||||
} __m128i_union;
|
||||
|
||||
static inline __m128i
|
||||
vec_set_epi32 (int i3, int i2, int i1, int i0)
|
||||
{
|
||||
__m128i_union vdst;
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
vdst.i[0] = i0;
|
||||
vdst.i[1] = i1;
|
||||
vdst.i[2] = i2;
|
||||
vdst.i[3] = i3;
|
||||
#else
|
||||
vdst.i[3] = i0;
|
||||
vdst.i[2] = i1;
|
||||
vdst.i[1] = i2;
|
||||
vdst.i[0] = i3;
|
||||
#endif
|
||||
|
||||
return (__m128i) vdst.m128si;
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_setr_epi32 (int i0, int i1, int i2, int i3)
|
||||
{
|
||||
return vec_set_epi32 (i3, i2, i1, i0);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_unpacklo_epi32 (__m128i even, __m128i odd)
|
||||
{
|
||||
static const __m128i perm_mask =
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
|
||||
#else
|
||||
{24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
|
||||
#endif
|
||||
|
||||
return vec_perm (even, odd, perm_mask);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_unpackhi_epi32 (__m128i even, __m128i odd)
|
||||
{
|
||||
static const __m128i perm_mask =
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
{ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
|
||||
#else
|
||||
{16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7};
|
||||
#endif
|
||||
|
||||
return vec_perm (even, odd, perm_mask);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_unpacklo_epi64 (__m128i even, __m128i odd)
|
||||
{
|
||||
static const __m128i perm_mask =
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
|
||||
#else
|
||||
{24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
#endif
|
||||
|
||||
return vec_perm (even, odd, perm_mask);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_unpackhi_epi64 (__m128i even, __m128i odd)
|
||||
{
|
||||
static const __m128i perm_mask =
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
{ 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||
#else
|
||||
{16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||
#endif
|
||||
|
||||
return vec_perm (even, odd, perm_mask);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_add_epi32 (__m128i a, __m128i b)
|
||||
{
|
||||
return (__m128i) vec_add ((vector signed int) a, (vector signed int) b);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_sub_epi32 (__m128i a, __m128i b)
|
||||
{
|
||||
return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b);
|
||||
}
|
||||
|
||||
/* Call this function ONLY on POWER8 and newer platforms */
|
||||
static inline __m128i
|
||||
vec_mullo_epi32 (__m128i a, __m128i b)
|
||||
{
|
||||
__m128i v;
|
||||
|
||||
__asm__(
|
||||
"vmuluwm %0, %1, %2 \n"
|
||||
: "=v" (v)
|
||||
: "v" (a), "v" (b)
|
||||
);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void
|
||||
transpose4_epi32(const __m128i * restrict a,
|
||||
const __m128i * restrict b,
|
||||
const __m128i * restrict c,
|
||||
const __m128i * restrict d,
|
||||
__m128i * restrict o,
|
||||
__m128i * restrict p,
|
||||
__m128i * restrict q,
|
||||
__m128i * restrict r)
|
||||
{
|
||||
__m128i t0 = vec_unpacklo_epi32(*a, *b);
|
||||
__m128i t1 = vec_unpacklo_epi32(*c, *d);
|
||||
__m128i t2 = vec_unpackhi_epi32(*a, *b);
|
||||
__m128i t3 = vec_unpackhi_epi32(*c, *d);
|
||||
|
||||
*o = vec_unpacklo_epi64(t0, t1);
|
||||
*p = vec_unpackhi_epi64(t0, t1);
|
||||
*q = vec_unpacklo_epi64(t2, t3);
|
||||
*r = vec_unpackhi_epi64(t2, t3);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_slli_epi32 (__m128i vsrc, unsigned int count)
|
||||
{
|
||||
__m128i_union vec_count;
|
||||
|
||||
if (count >= 32)
|
||||
return (__m128i) vec_splats (0);
|
||||
else if (count == 0)
|
||||
return vsrc;
|
||||
|
||||
/* In VMX, all shift count fields must contain the same value */
|
||||
vec_count.m128si = (vector signed int) vec_splats (count);
|
||||
return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_srli_epi32 (__m128i vsrc, unsigned int count)
|
||||
{
|
||||
__m128i_union vec_count;
|
||||
|
||||
if (count >= 32)
|
||||
return (__m128i) vec_splats (0);
|
||||
else if (count == 0)
|
||||
return vsrc;
|
||||
|
||||
/* In VMX, all shift count fields must contain the same value */
|
||||
vec_count.m128si = (vector signed int) vec_splats (count);
|
||||
return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_srai_epi32 (__m128i vsrc, unsigned int count)
|
||||
{
|
||||
__m128i_union vec_count;
|
||||
|
||||
if (count >= 32)
|
||||
return (__m128i) vec_splats (0);
|
||||
else if (count == 0)
|
||||
return vsrc;
|
||||
|
||||
/* In VMX, all shift count fields must contain the same value */
|
||||
vec_count.m128si = (vector signed int) vec_splats (count);
|
||||
return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_cmpeq_epi32 (__m128i a, __m128i b)
|
||||
{
|
||||
return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_loadu_si128 (const uint32_t* src)
|
||||
{
|
||||
__m128i_union vsrc;
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
|
||||
vsrc.m128ui = *((vector unsigned int *) src);
|
||||
|
||||
#else
|
||||
|
||||
__m128i vmask, tmp1, tmp2;
|
||||
|
||||
vmask = vec_lvsl(0, src);
|
||||
|
||||
tmp1 = (__m128i) vec_ld (0, src);
|
||||
tmp2 = (__m128i) vec_ld (15, src);
|
||||
vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask);
|
||||
|
||||
#endif
|
||||
|
||||
return vsrc.m128i;
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_load_si128 (const uint32_t* src)
|
||||
{
|
||||
__m128i_union vsrc;
|
||||
|
||||
vsrc.m128ui = *((vector unsigned int *) src);
|
||||
|
||||
return vsrc.m128i;
|
||||
}
|
||||
|
||||
static inline void
|
||||
vec_store_si128 (uint32_t* dest, __m128i vdata)
|
||||
{
|
||||
vec_st ((vector unsigned int) vdata, 0, dest);
|
||||
}
|
||||
|
||||
/* Call this function ONLY on POWER8 and newer platforms */
|
||||
static inline int
|
||||
vec_movemask_epi8 (__m128i vsrc)
|
||||
{
|
||||
__m128i_union vtemp;
|
||||
int result;
|
||||
|
||||
vtemp.m128i = vec_vgbbd(vsrc);
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
result = vtemp.ub[15] << 8 | vtemp.ub[7];
|
||||
#else
|
||||
result = vtemp.ub[0] << 8 | vtemp.ub[8];
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_packs_epi16 (__m128i a, __m128i b)
|
||||
{
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
return (__m128i) vec_packs ((vector signed short) a,
|
||||
(vector signed short) b);
|
||||
#else
|
||||
return (__m128i) vec_packs ((vector signed short) b,
|
||||
(vector signed short) a);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
vec_packs_epi32 (__m128i a, __m128i b)
|
||||
{
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b);
|
||||
#else
|
||||
return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
|
||||
|
||||
#endif /* U_PWR8_H_ */
|
||||
|
|
@ -166,14 +166,49 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
|
|||
#endif /* !PIPE_ARCH_SSSE3 */
|
||||
|
||||
|
||||
/*
|
||||
* Provide an SSE implementation of _mm_mul_epi32() in terms of
|
||||
* _mm_mul_epu32().
|
||||
*
|
||||
* Basically, albeit surprising at first (and second, and third...) look
|
||||
* if a * b is done signed instead of unsigned, can just
|
||||
* subtract b from the high bits of the result if a is negative
|
||||
* (and the same for a if b is negative). Modular arithmetic at its best!
|
||||
*
|
||||
* So for int32 a,b in crude pseudo-code ("*" here denoting a widening mul)
|
||||
* fixupb = (signmask(b) & a) << 32ULL
|
||||
* fixupa = (signmask(a) & b) << 32ULL
|
||||
* a * b = (unsigned)a * (unsigned)b - fixupb - fixupa
|
||||
* = (unsigned)a * (unsigned)b -(fixupb + fixupa)
|
||||
*
|
||||
* This does both lo (dwords 0/2) and hi parts (1/3) at the same time due
|
||||
* to some optimization potential.
|
||||
*/
|
||||
static inline __m128i
|
||||
mm_mullohi_epi32(const __m128i a, const __m128i b, __m128i *res13)
|
||||
{
|
||||
__m128i a13, b13, mul02, mul13;
|
||||
__m128i anegmask, bnegmask, fixup, fixup02, fixup13;
|
||||
a13 = _mm_shuffle_epi32(a, _MM_SHUFFLE(2,3,0,1));
|
||||
b13 = _mm_shuffle_epi32(b, _MM_SHUFFLE(2,3,0,1));
|
||||
anegmask = _mm_srai_epi32(a, 31);
|
||||
bnegmask = _mm_srai_epi32(b, 31);
|
||||
fixup = _mm_add_epi32(_mm_and_si128(anegmask, b),
|
||||
_mm_and_si128(bnegmask, a));
|
||||
mul02 = _mm_mul_epu32(a, b);
|
||||
mul13 = _mm_mul_epu32(a13, b13);
|
||||
fixup02 = _mm_slli_epi64(fixup, 32);
|
||||
fixup13 = _mm_and_si128(fixup, _mm_set_epi32(-1,0,-1,0));
|
||||
*res13 = _mm_sub_epi64(mul13, fixup13);
|
||||
return _mm_sub_epi64(mul02, fixup02);
|
||||
}
|
||||
|
||||
|
||||
/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of
|
||||
* _mm_mul_epu32().
|
||||
*
|
||||
* I suspect this works fine for us because one of our operands is
|
||||
* always positive, but not sure that this can be used for general
|
||||
* signed integer multiplication.
|
||||
* This always works regardless the signs of the operands, since
|
||||
* the high bits (which would be different) aren't used.
|
||||
*
|
||||
* This seems close enough to the speed of SSE4 and the real
|
||||
* _mm_mullo_epi32() intrinsic as to not justify adding an sse4
|
||||
|
|
@ -188,6 +223,12 @@ static inline __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
|
|||
|
||||
/* Interleave the results, either with shuffles or (slightly
|
||||
* faster) direct bit operations:
|
||||
* XXX: might be only true for some cpus (in particular 65nm
|
||||
* Core 2). On most cpus (including that Core 2, but not Nehalem...)
|
||||
* using _mm_shuffle_ps/_mm_shuffle_epi32 might also be faster
|
||||
* than using the 3 instructions below. But logic should be fine
|
||||
* as well, we can't have optimal solution for all cpus (if anything,
|
||||
* should just use _mm_mullo_epi32() if sse41 is available...).
|
||||
*/
|
||||
#if 0
|
||||
__m128i ba8 = _mm_shuffle_epi32(ba, 8);
|
||||
|
|
@ -214,17 +255,44 @@ transpose4_epi32(const __m128i * restrict a,
|
|||
__m128i * restrict q,
|
||||
__m128i * restrict r)
|
||||
{
|
||||
__m128i t0 = _mm_unpacklo_epi32(*a, *b);
|
||||
__m128i t1 = _mm_unpacklo_epi32(*c, *d);
|
||||
__m128i t2 = _mm_unpackhi_epi32(*a, *b);
|
||||
__m128i t3 = _mm_unpackhi_epi32(*c, *d);
|
||||
__m128i t0 = _mm_unpacklo_epi32(*a, *b);
|
||||
__m128i t1 = _mm_unpacklo_epi32(*c, *d);
|
||||
__m128i t2 = _mm_unpackhi_epi32(*a, *b);
|
||||
__m128i t3 = _mm_unpackhi_epi32(*c, *d);
|
||||
|
||||
*o = _mm_unpacklo_epi64(t0, t1);
|
||||
*p = _mm_unpackhi_epi64(t0, t1);
|
||||
*q = _mm_unpacklo_epi64(t2, t3);
|
||||
*r = _mm_unpackhi_epi64(t2, t3);
|
||||
*o = _mm_unpacklo_epi64(t0, t1);
|
||||
*p = _mm_unpackhi_epi64(t0, t1);
|
||||
*q = _mm_unpacklo_epi64(t2, t3);
|
||||
*r = _mm_unpackhi_epi64(t2, t3);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Same as above, except the first two values are already interleaved
|
||||
* (i.e. contain 64bit values).
|
||||
*/
|
||||
static inline void
|
||||
transpose2_64_2_32(const __m128i * restrict a01,
|
||||
const __m128i * restrict a23,
|
||||
const __m128i * restrict c,
|
||||
const __m128i * restrict d,
|
||||
__m128i * restrict o,
|
||||
__m128i * restrict p,
|
||||
__m128i * restrict q,
|
||||
__m128i * restrict r)
|
||||
{
|
||||
__m128i t0 = *a01;
|
||||
__m128i t1 = _mm_unpacklo_epi32(*c, *d);
|
||||
__m128i t2 = *a23;
|
||||
__m128i t3 = _mm_unpackhi_epi32(*c, *d);
|
||||
|
||||
*o = _mm_unpacklo_epi64(t0, t1);
|
||||
*p = _mm_unpackhi_epi64(t0, t1);
|
||||
*q = _mm_unpacklo_epi64(t2, t3);
|
||||
*r = _mm_unpackhi_epi64(t2, t3);
|
||||
}
|
||||
|
||||
|
||||
#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -600,7 +600,8 @@ is_box_inside_resource(const struct pipe_resource *res,
|
|||
depth = res->array_size;
|
||||
assert(res->array_size % 6 == 0);
|
||||
break;
|
||||
case PIPE_MAX_TEXTURE_TYPES:;
|
||||
case PIPE_MAX_TEXTURE_TYPES:
|
||||
break;
|
||||
}
|
||||
|
||||
return box->x >= 0 &&
|
||||
|
|
|
|||
|
|
@ -42,8 +42,8 @@ struct u_upload_mgr {
|
|||
struct pipe_context *pipe;
|
||||
|
||||
unsigned default_size; /* Minimum size of the upload buffer, in bytes. */
|
||||
unsigned alignment; /* Alignment of each sub-allocation. */
|
||||
unsigned bind; /* Bitmask of PIPE_BIND_* flags. */
|
||||
unsigned usage; /* PIPE_USAGE_* */
|
||||
unsigned map_flags; /* Bitmask of PIPE_TRANSFER_* flags. */
|
||||
boolean map_persistent; /* If persistent mappings are supported. */
|
||||
|
||||
|
|
@ -55,10 +55,9 @@ struct u_upload_mgr {
|
|||
};
|
||||
|
||||
|
||||
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
|
||||
unsigned default_size,
|
||||
unsigned alignment,
|
||||
unsigned bind )
|
||||
struct u_upload_mgr *
|
||||
u_upload_create(struct pipe_context *pipe, unsigned default_size,
|
||||
unsigned bind, unsigned usage)
|
||||
{
|
||||
struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr );
|
||||
if (!upload)
|
||||
|
|
@ -66,8 +65,8 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
|
|||
|
||||
upload->pipe = pipe;
|
||||
upload->default_size = default_size;
|
||||
upload->alignment = alignment;
|
||||
upload->bind = bind;
|
||||
upload->usage = usage;
|
||||
|
||||
upload->map_persistent =
|
||||
pipe->screen->get_param(pipe->screen,
|
||||
|
|
@ -149,7 +148,7 @@ u_upload_alloc_buffer(struct u_upload_mgr *upload,
|
|||
buffer.target = PIPE_BUFFER;
|
||||
buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
|
||||
buffer.bind = upload->bind;
|
||||
buffer.usage = PIPE_USAGE_STREAM;
|
||||
buffer.usage = upload->usage;
|
||||
buffer.width0 = size;
|
||||
buffer.height0 = 1;
|
||||
buffer.depth0 = 1;
|
||||
|
|
@ -181,19 +180,24 @@ void
|
|||
u_upload_alloc(struct u_upload_mgr *upload,
|
||||
unsigned min_out_offset,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
unsigned *out_offset,
|
||||
struct pipe_resource **outbuf,
|
||||
void **ptr)
|
||||
{
|
||||
unsigned alloc_size = align(size, upload->alignment);
|
||||
unsigned alloc_offset = align(min_out_offset, upload->alignment);
|
||||
unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
|
||||
unsigned offset;
|
||||
|
||||
min_out_offset = align(min_out_offset, alignment);
|
||||
|
||||
offset = align(upload->offset, alignment);
|
||||
offset = MAX2(offset, min_out_offset);
|
||||
|
||||
/* Make sure we have enough space in the upload buffer
|
||||
* for the sub-allocation. */
|
||||
if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
|
||||
u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
|
||||
* for the sub-allocation.
|
||||
*/
|
||||
if (unlikely(!upload->buffer || offset + size > buffer_size)) {
|
||||
u_upload_alloc_buffer(upload, min_out_offset + size);
|
||||
|
||||
if (unlikely(!upload->buffer)) {
|
||||
*out_offset = ~0;
|
||||
|
|
@ -202,11 +206,10 @@ u_upload_alloc(struct u_upload_mgr *upload,
|
|||
return;
|
||||
}
|
||||
|
||||
offset = min_out_offset;
|
||||
buffer_size = upload->buffer->width0;
|
||||
}
|
||||
|
||||
offset = MAX2(upload->offset, alloc_offset);
|
||||
|
||||
if (unlikely(!upload->map)) {
|
||||
upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
|
||||
offset,
|
||||
|
|
@ -224,8 +227,8 @@ u_upload_alloc(struct u_upload_mgr *upload,
|
|||
upload->map -= offset;
|
||||
}
|
||||
|
||||
assert(offset < upload->buffer->width0);
|
||||
assert(offset + size <= upload->buffer->width0);
|
||||
assert(offset < buffer_size);
|
||||
assert(offset + size <= buffer_size);
|
||||
assert(size);
|
||||
|
||||
/* Emit the return values: */
|
||||
|
|
@ -233,19 +236,20 @@ u_upload_alloc(struct u_upload_mgr *upload,
|
|||
pipe_resource_reference(outbuf, upload->buffer);
|
||||
*out_offset = offset;
|
||||
|
||||
upload->offset = offset + alloc_size;
|
||||
upload->offset = offset + size;
|
||||
}
|
||||
|
||||
void u_upload_data(struct u_upload_mgr *upload,
|
||||
unsigned min_out_offset,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
const void *data,
|
||||
unsigned *out_offset,
|
||||
struct pipe_resource **outbuf)
|
||||
{
|
||||
uint8_t *ptr;
|
||||
|
||||
u_upload_alloc(upload, min_out_offset, size,
|
||||
u_upload_alloc(upload, min_out_offset, size, alignment,
|
||||
out_offset, outbuf,
|
||||
(void**)&ptr);
|
||||
if (ptr)
|
||||
|
|
@ -257,6 +261,7 @@ void u_upload_buffer(struct u_upload_mgr *upload,
|
|||
unsigned min_out_offset,
|
||||
unsigned offset,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
struct pipe_resource *inbuf,
|
||||
unsigned *out_offset,
|
||||
struct pipe_resource **outbuf)
|
||||
|
|
@ -278,6 +283,7 @@ void u_upload_buffer(struct u_upload_mgr *upload,
|
|||
if (0)
|
||||
debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);
|
||||
|
||||
u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
|
||||
u_upload_data(upload, min_out_offset, size, alignment,
|
||||
map, out_offset, outbuf);
|
||||
pipe_buffer_unmap( upload->pipe, transfer );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,13 +43,12 @@ struct pipe_resource;
|
|||
*
|
||||
* \param pipe Pipe driver.
|
||||
* \param default_size Minimum size of the upload buffer, in bytes.
|
||||
* \param alignment Alignment of each suballocation in the upload buffer.
|
||||
* \param bind Bitmask of PIPE_BIND_* flags.
|
||||
* \param usage PIPE_USAGE_*
|
||||
*/
|
||||
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
|
||||
unsigned default_size,
|
||||
unsigned alignment,
|
||||
unsigned bind );
|
||||
struct u_upload_mgr *
|
||||
u_upload_create(struct pipe_context *pipe, unsigned default_size,
|
||||
unsigned bind, unsigned usage);
|
||||
|
||||
/**
|
||||
* Destroy the upload manager.
|
||||
|
|
@ -74,6 +73,7 @@ void u_upload_unmap( struct u_upload_mgr *upload );
|
|||
* \param upload Upload manager
|
||||
* \param min_out_offset Minimum offset that should be returned in out_offset.
|
||||
* \param size Size of the allocation.
|
||||
* \param alignment Alignment of the suballocation within the buffer
|
||||
* \param out_offset Pointer to where the new buffer offset will be returned.
|
||||
* \param outbuf Pointer to where the upload buffer will be returned.
|
||||
* \param ptr Pointer to the allocated memory that is returned.
|
||||
|
|
@ -81,6 +81,7 @@ void u_upload_unmap( struct u_upload_mgr *upload );
|
|||
void u_upload_alloc(struct u_upload_mgr *upload,
|
||||
unsigned min_out_offset,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
unsigned *out_offset,
|
||||
struct pipe_resource **outbuf,
|
||||
void **ptr);
|
||||
|
|
@ -95,6 +96,7 @@ void u_upload_alloc(struct u_upload_mgr *upload,
|
|||
void u_upload_data(struct u_upload_mgr *upload,
|
||||
unsigned min_out_offset,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
const void *data,
|
||||
unsigned *out_offset,
|
||||
struct pipe_resource **outbuf);
|
||||
|
|
@ -110,6 +112,7 @@ void u_upload_buffer(struct u_upload_mgr *upload,
|
|||
unsigned min_out_offset,
|
||||
unsigned offset,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
struct pipe_resource *inbuf,
|
||||
unsigned *out_offset,
|
||||
struct pipe_resource **outbuf);
|
||||
|
|
|
|||
|
|
@ -314,8 +314,9 @@ u_vbuf_create(struct pipe_context *pipe,
|
|||
mgr->translate_cache = translate_cache_create();
|
||||
memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
|
||||
|
||||
mgr->uploader = u_upload_create(pipe, 1024 * 1024, 4,
|
||||
PIPE_BIND_VERTEX_BUFFER);
|
||||
mgr->uploader = u_upload_create(pipe, 1024 * 1024,
|
||||
PIPE_BIND_VERTEX_BUFFER,
|
||||
PIPE_USAGE_STREAM);
|
||||
|
||||
return mgr;
|
||||
}
|
||||
|
|
@ -454,7 +455,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
|
|||
|
||||
/* Create and map the output buffer. */
|
||||
u_upload_alloc(mgr->uploader, 0,
|
||||
key->output_stride * num_indices,
|
||||
key->output_stride * num_indices, 4,
|
||||
&out_offset, &out_buffer,
|
||||
(void**)&out_map);
|
||||
if (!out_buffer)
|
||||
|
|
@ -487,7 +488,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
|
|||
/* Create and map the output buffer. */
|
||||
u_upload_alloc(mgr->uploader,
|
||||
key->output_stride * start_vertex,
|
||||
key->output_stride * num_vertices,
|
||||
key->output_stride * num_vertices, 4,
|
||||
&out_offset, &out_buffer,
|
||||
(void**)&out_map);
|
||||
if (!out_buffer)
|
||||
|
|
@ -987,7 +988,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
|
|||
real_vb = &mgr->real_vertex_buffer[i];
|
||||
ptr = mgr->vertex_buffer[i].user_buffer;
|
||||
|
||||
u_upload_data(mgr->uploader, start, end - start, ptr + start,
|
||||
u_upload_data(mgr->uploader, start, end - start, 4, ptr + start,
|
||||
&real_vb->buffer_offset, &real_vb->buffer);
|
||||
if (!real_vb->buffer)
|
||||
return PIPE_ERROR_OUT_OF_MEMORY;
|
||||
|
|
|
|||
|
|
@ -716,6 +716,7 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u
|
|||
/* Allocate new memory for vertices. */
|
||||
u_upload_alloc(c->upload, 0,
|
||||
c->vertex_buf.stride * VL_COMPOSITOR_MAX_LAYERS * 4, /* size */
|
||||
4, /* alignment */
|
||||
&c->vertex_buf.buffer_offset, &c->vertex_buf.buffer,
|
||||
(void**)&vb);
|
||||
|
||||
|
|
@ -1090,7 +1091,8 @@ vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
|
|||
|
||||
c->pipe = pipe;
|
||||
|
||||
c->upload = u_upload_create(pipe, 128 * 1024, 4, PIPE_BIND_VERTEX_BUFFER);
|
||||
c->upload = u_upload_create(pipe, 128 * 1024, PIPE_BIND_VERTEX_BUFFER,
|
||||
PIPE_USAGE_STREAM);
|
||||
|
||||
if (!c->upload)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -79,14 +79,18 @@ calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src bloc
|
|||
}
|
||||
|
||||
static struct ureg_dst
|
||||
calc_line(struct ureg_program *shader)
|
||||
calc_line(struct pipe_screen *screen, struct ureg_program *shader)
|
||||
{
|
||||
struct ureg_dst tmp;
|
||||
struct ureg_src pos;
|
||||
|
||||
tmp = ureg_DECL_temporary(shader);
|
||||
|
||||
pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR);
|
||||
if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL))
|
||||
pos = ureg_DECL_system_value(shader, TGSI_SEMANTIC_POSITION, 0);
|
||||
else
|
||||
pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS,
|
||||
TGSI_INTERPOLATE_LINEAR);
|
||||
|
||||
/*
|
||||
* tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
|
||||
|
|
@ -177,7 +181,7 @@ create_ref_frag_shader(struct vl_mc *r)
|
|||
|
||||
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
|
||||
|
||||
field = calc_line(shader);
|
||||
field = calc_line(r->pipe->screen, shader);
|
||||
|
||||
/*
|
||||
* ref = field.z ? tc[1] : tc[0]
|
||||
|
|
@ -324,7 +328,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
|
|||
|
||||
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
|
||||
|
||||
tmp = calc_line(shader);
|
||||
tmp = calc_line(r->pipe->screen, shader);
|
||||
|
||||
/*
|
||||
* if (field == tc.w)
|
||||
|
|
|
|||
|
|
@ -792,7 +792,7 @@ vl_mpeg12_end_frame(struct pipe_video_codec *decoder,
|
|||
for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
|
||||
if (!ref_frames[j] || !ref_frames[j][i]) continue;
|
||||
|
||||
vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
|
||||
vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);
|
||||
dec->context->set_vertex_buffers(dec->context, 0, 3, vb);
|
||||
|
||||
vl_mc_render_ref(i ? &dec->mc_c : &dec->mc_y, &buf->mc[i], ref_frames[j][i]);
|
||||
|
|
|
|||
|
|
@ -213,6 +213,11 @@ The integer capabilities:
|
|||
* ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw arguments
|
||||
{ count, instance_count, start, index_bias } from a PIPE_BUFFER resource.
|
||||
See pipe_draw_info.
|
||||
* ``PIPE_CAP_MULTI_DRAW_INDIRECT``: Whether the driver supports
|
||||
pipe_draw_info::indirect_stride and ::indirect_count
|
||||
* ``PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS``: Whether the driver supports
|
||||
taking the number of indirect draws from a separate parameter
|
||||
buffer, see pipe_draw_info::indirect_params.
|
||||
* ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader supports
|
||||
the FINE versions of DDX/DDY.
|
||||
* ``PIPE_CAP_VENDOR_ID``: The vendor ID of the underlying hardware. If it's
|
||||
|
|
@ -239,8 +244,7 @@ The integer capabilities:
|
|||
will need to lower TGSI_SEMANTIC_VERTEXID to TGSI_SEMANTIC_VERTEXID_NOBASE
|
||||
and TGSI_SEMANTIC_BASEVERTEX, so drivers setting this must handle both these
|
||||
semantics. Only relevant if geometry shaders are supported.
|
||||
(Currently not possible to query availability of these two semantics outside
|
||||
this, at least BASEVERTEX should be exposed separately too).
|
||||
(BASEVERTEX could be exposed separately too via ``PIPE_CAP_DRAW_PARAMETERS``).
|
||||
* ``PIPE_CAP_POLYGON_OFFSET_CLAMP``: If true, the driver implements support
|
||||
for ``pipe_rasterizer_state::offset_clamp``.
|
||||
* ``PIPE_CAP_MULTISAMPLE_Z_RESOLVE``: Whether the driver supports blitting
|
||||
|
|
@ -283,6 +287,20 @@ The integer capabilities:
|
|||
a compressed block is copied to/from a plain pixel of the same size.
|
||||
* ``PIPE_CAP_CLEAR_TEXTURE``: Whether `clear_texture` will be
|
||||
available in contexts.
|
||||
* ``PIPE_CAP_DRAW_PARAMETERS``: Whether ``TGSI_SEMANTIC_BASEVERTEX``,
|
||||
``TGSI_SEMANTIC_BASEINSTANCE``, and ``TGSI_SEMANTIC_DRAWID`` are
|
||||
supported in vertex shaders.
|
||||
* ``PIPE_CAP_TGSI_PACK_HALF_FLOAT``: Whether the ``UP2H`` and ``PK2H``
|
||||
TGSI opcodes are supported.
|
||||
* ``PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL``: If state trackers should use
|
||||
a system value for the POSITION fragment shader input.
|
||||
* ``PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL``: If state trackers should use
|
||||
a system value for the FACE fragment shader input.
|
||||
Also, the FACE system value is integer, not float.
|
||||
* ``PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT``: Describes the required
|
||||
alignment for pipe_shader_buffer::buffer_offset, in bytes. Maximum
|
||||
value allowed is 256 (for GL conformance). 0 is only allowed if
|
||||
shader buffers are not supported.
|
||||
|
||||
|
||||
.. _pipe_capf:
|
||||
|
|
@ -375,6 +393,10 @@ to be 0.
|
|||
of iterations that loops are allowed to have to be unrolled. It is only
|
||||
a hint to state trackers. Whether any loops will be unrolled is not
|
||||
guaranteed.
|
||||
* ``PIPE_SHADER_CAP_MAX_SHADER_BUFFERS``: Maximum number of memory buffers
|
||||
(also used to implement atomic counters). Having this be non-0 also
|
||||
implies support for the ``LOAD``, ``STORE``, and ``ATOM*`` TGSI
|
||||
opcodes.
|
||||
|
||||
|
||||
.. _pipe_compute_cap:
|
||||
|
|
|
|||
|
|
@ -458,7 +458,11 @@ while DDY is allowed to be the same for the entire 2x2 quad.
|
|||
|
||||
.. opcode:: PK2H - Pack Two 16-bit Floats
|
||||
|
||||
TBD
|
||||
This instruction replicates its result.
|
||||
|
||||
.. math::
|
||||
|
||||
dst = f32\_to\_f16(src.x) | f32\_to\_f16(src.y) << 16
|
||||
|
||||
|
||||
.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
|
||||
|
|
@ -615,7 +619,15 @@ This instruction replicates its result.
|
|||
|
||||
.. opcode:: UP2H - Unpack Two 16-Bit Floats
|
||||
|
||||
TBD
|
||||
.. math::
|
||||
|
||||
dst.x = f16\_to\_f32(src0.x \& 0xffff)
|
||||
|
||||
dst.y = f16\_to\_f32(src0.x >> 16)
|
||||
|
||||
dst.z = f16\_to\_f32(src0.x \& 0xffff)
|
||||
|
||||
dst.w = f16\_to\_f32(src0.x >> 16)
|
||||
|
||||
.. note::
|
||||
|
||||
|
|
@ -2252,11 +2264,11 @@ after lookup.
|
|||
Resource Access Opcodes
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. opcode:: LOAD - Fetch data from a shader resource
|
||||
.. opcode:: LOAD - Fetch data from a shader buffer or image
|
||||
|
||||
Syntax: ``LOAD dst, resource, address``
|
||||
|
||||
Example: ``LOAD TEMP[0], RES[0], TEMP[1]``
|
||||
Example: ``LOAD TEMP[0], BUFFER[0], TEMP[1]``
|
||||
|
||||
Using the provided integer address, LOAD fetches data
|
||||
from the specified buffer or texture without any
|
||||
|
|
@ -2280,7 +2292,7 @@ Resource Access Opcodes
|
|||
|
||||
Syntax: ``STORE resource, address, src``
|
||||
|
||||
Example: ``STORE RES[0], TEMP[0], TEMP[1]``
|
||||
Example: ``STORE BUFFER[0], TEMP[0], TEMP[1]``
|
||||
|
||||
Using the provided integer address, STORE writes data
|
||||
to the specified buffer or texture.
|
||||
|
|
@ -2299,6 +2311,18 @@ Resource Access Opcodes
|
|||
texture arrays and 2D textures. address.w is always
|
||||
ignored.
|
||||
|
||||
.. opcode:: RESQ - Query information about a resource
|
||||
|
||||
Syntax: ``RESQ dst, resource``
|
||||
|
||||
Example: ``RESQ TEMP[0], BUFFER[0]``
|
||||
|
||||
Returns information about the buffer or image resource. For buffer
|
||||
resources, the size (in bytes) is returned in the x component. For
|
||||
image resources, .xyz will contain the width/height/layers of the
|
||||
image, while .w will contain the number of samples for multi-sampled
|
||||
images.
|
||||
|
||||
|
||||
.. _threadsyncopcodes:
|
||||
|
||||
|
|
@ -2358,158 +2382,159 @@ These opcodes provide atomic variants of some common arithmetic and
|
|||
logical operations. In this context atomicity means that another
|
||||
concurrent memory access operation that affects the same memory
|
||||
location is guaranteed to be performed strictly before or after the
|
||||
entire execution of the atomic operation.
|
||||
|
||||
For the moment they're only valid in compute programs.
|
||||
entire execution of the atomic operation. The resource may be a buffer
|
||||
or an image. In the case of an image, the offset works the same as for
|
||||
``LOAD`` and ``STORE``, specified above. These atomic operations may
|
||||
only be used with 32-bit integer image formats.
|
||||
|
||||
.. opcode:: ATOMUADD - Atomic integer addition
|
||||
|
||||
Syntax: ``ATOMUADD dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMUADD TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMUADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = dst_i + src_i
|
||||
resource[offset] = dst_x + src_x
|
||||
|
||||
|
||||
.. opcode:: ATOMXCHG - Atomic exchange
|
||||
|
||||
Syntax: ``ATOMXCHG dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMXCHG TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMXCHG TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = src_i
|
||||
resource[offset] = src_x
|
||||
|
||||
|
||||
.. opcode:: ATOMCAS - Atomic compare-and-exchange
|
||||
|
||||
Syntax: ``ATOMCAS dst, resource, offset, cmp, src``
|
||||
|
||||
Example: ``ATOMCAS TEMP[0], RES[0], TEMP[1], TEMP[2], TEMP[3]``
|
||||
Example: ``ATOMCAS TEMP[0], BUFFER[0], TEMP[1], TEMP[2], TEMP[3]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = (dst_i == cmp_i ? src_i : dst_i)
|
||||
resource[offset] = (dst_x == cmp_x ? src_x : dst_x)
|
||||
|
||||
|
||||
.. opcode:: ATOMAND - Atomic bitwise And
|
||||
|
||||
Syntax: ``ATOMAND dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMAND TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMAND TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = dst_i \& src_i
|
||||
resource[offset] = dst_x \& src_x
|
||||
|
||||
|
||||
.. opcode:: ATOMOR - Atomic bitwise Or
|
||||
|
||||
Syntax: ``ATOMOR dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMOR TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = dst_i | src_i
|
||||
resource[offset] = dst_x | src_x
|
||||
|
||||
|
||||
.. opcode:: ATOMXOR - Atomic bitwise Xor
|
||||
|
||||
Syntax: ``ATOMXOR dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMXOR TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMXOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = dst_i \oplus src_i
|
||||
resource[offset] = dst_x \oplus src_x
|
||||
|
||||
|
||||
.. opcode:: ATOMUMIN - Atomic unsigned minimum
|
||||
|
||||
Syntax: ``ATOMUMIN dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMUMIN TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMUMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = (dst_i < src_i ? dst_i : src_i)
|
||||
resource[offset] = (dst_x < src_x ? dst_x : src_x)
|
||||
|
||||
|
||||
.. opcode:: ATOMUMAX - Atomic unsigned maximum
|
||||
|
||||
Syntax: ``ATOMUMAX dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMUMAX TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMUMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = (dst_i > src_i ? dst_i : src_i)
|
||||
resource[offset] = (dst_x > src_x ? dst_x : src_x)
|
||||
|
||||
|
||||
.. opcode:: ATOMIMIN - Atomic signed minimum
|
||||
|
||||
Syntax: ``ATOMIMIN dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMIMIN TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMIMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = (dst_i < src_i ? dst_i : src_i)
|
||||
resource[offset] = (dst_x < src_x ? dst_x : src_x)
|
||||
|
||||
|
||||
.. opcode:: ATOMIMAX - Atomic signed maximum
|
||||
|
||||
Syntax: ``ATOMIMAX dst, resource, offset, src``
|
||||
|
||||
Example: ``ATOMIMAX TEMP[0], RES[0], TEMP[1], TEMP[2]``
|
||||
Example: ``ATOMIMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
|
||||
|
||||
The following operation is performed atomically on each component:
|
||||
The following operation is performed atomically:
|
||||
|
||||
.. math::
|
||||
|
||||
dst_i = resource[offset]_i
|
||||
dst_x = resource[offset]
|
||||
|
||||
resource[offset]_i = (dst_i > src_i ? dst_i : src_i)
|
||||
resource[offset] = (dst_x > src_x ? dst_x : src_x)
|
||||
|
||||
|
||||
|
||||
|
|
@ -2646,7 +2671,8 @@ space coordinate system. After clipping, the X, Y and Z components of the
|
|||
vertex will be divided by the W value to get normalized device coordinates.
|
||||
|
||||
For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
|
||||
fragment shader input contains the fragment's window position. The X
|
||||
fragment shader input (or system value, depending on which one is
|
||||
supported by the driver) contains the fragment's window position. The X
|
||||
component starts at zero and always increases from left to right.
|
||||
The Y component starts at zero and always increases but Y=0 may either
|
||||
indicate the top of the window or the bottom depending on the fragment
|
||||
|
|
@ -2758,11 +2784,17 @@ typically only used for legacy graphics APIs.
|
|||
TGSI_SEMANTIC_FACE
|
||||
""""""""""""""""""
|
||||
|
||||
This label applies to fragment shader inputs only and indicates that
|
||||
the register contains front/back-face information of the form (F, 0,
|
||||
0, 1). The first component will be positive when the fragment belongs
|
||||
to a front-facing polygon, and negative when the fragment belongs to a
|
||||
back-facing polygon.
|
||||
This label applies to fragment shader inputs (or system values,
|
||||
depending on which one is supported by the driver) and indicates that
|
||||
the register contains front/back-face information.
|
||||
|
||||
If it is an input, it will be a floating-point vector in the form (F, 0, 0, 1),
|
||||
where F will be positive when the fragment belongs to a front-facing polygon,
|
||||
and negative when the fragment belongs to a back-facing polygon.
|
||||
|
||||
If it is a system value, it will be an integer vector in the form (F, 0, 0, 1),
|
||||
where F is 0xffffffff when the fragment belongs to a front-facing polygon and
|
||||
0 when the fragment belongs to a back-facing polygon.
|
||||
|
||||
|
||||
TGSI_SEMANTIC_EDGEFLAG
|
||||
|
|
@ -2949,6 +2981,19 @@ invocation is covered or not. Helper invocations are created in order
|
|||
to properly compute derivatives, however it may be desirable to skip
|
||||
some of the logic in those cases. See ``gl_HelperInvocation`` documentation.
|
||||
|
||||
TGSI_SEMANTIC_BASEINSTANCE
|
||||
""""""""""""""""""""""""""
|
||||
|
||||
For vertex shaders, the base instance argument supplied for this
|
||||
draw. This is an integer value, and only the X component is used.
|
||||
|
||||
TGSI_SEMANTIC_DRAWID
|
||||
""""""""""""""""""""
|
||||
|
||||
For vertex shaders, the zero-based index of the current draw in a
|
||||
``glMultiDraw*`` invocation. This is an integer value, and only the X
|
||||
component is used.
|
||||
|
||||
|
||||
Declaration Interpolate
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
|
|||
1
src/gallium/drivers/freedreno/.gitignore
vendored
Normal file
1
src/gallium/drivers/freedreno/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
ir3_compiler
|
||||
|
|
@ -128,6 +128,7 @@ ir3_SOURCES := \
|
|||
ir3/ir3_group.c \
|
||||
ir3/ir3.h \
|
||||
ir3/ir3_legalize.c \
|
||||
ir3/ir3_nir.c \
|
||||
ir3/ir3_nir.h \
|
||||
ir3/ir3_nir_lower_if_else.c \
|
||||
ir3/ir3_print.c \
|
||||
|
|
|
|||
|
|
@ -171,8 +171,8 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
|||
|
||||
fd3_query_context_init(pctx);
|
||||
|
||||
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
|
||||
2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
|
||||
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
|
||||
PIPE_USAGE_STREAM);
|
||||
|
||||
return pctx;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,7 +145,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
void *ptr;
|
||||
|
||||
u_upload_alloc(fd3_ctx->border_color_uploader,
|
||||
0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
|
||||
0, BORDER_COLOR_UPLOAD_SIZE,
|
||||
BORDER_COLOR_UPLOAD_SIZE, &off,
|
||||
&fd3_ctx->border_color_buf,
|
||||
&ptr);
|
||||
|
||||
|
|
|
|||
|
|
@ -171,8 +171,8 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
|||
|
||||
fd4_query_context_init(pctx);
|
||||
|
||||
fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
|
||||
2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
|
||||
fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
|
||||
PIPE_USAGE_STREAM);
|
||||
|
||||
return pctx;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
void *ptr;
|
||||
|
||||
u_upload_alloc(fd4_ctx->border_color_uploader,
|
||||
0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
|
||||
0, BORDER_COLOR_UPLOAD_SIZE,
|
||||
BORDER_COLOR_UPLOAD_SIZE, &off,
|
||||
&fd4_ctx->border_color_buf,
|
||||
&ptr);
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@
|
|||
#include "freedreno_gmem.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
|
||||
|
||||
struct fd_vertex_stateobj;
|
||||
|
||||
struct fd_texture_stateobj {
|
||||
|
|
|
|||
|
|
@ -226,6 +226,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
|
||||
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
|
||||
case PIPE_CAP_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
|
||||
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
|
||||
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
|
||||
|
|
@ -238,6 +240,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
@ -414,6 +421,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
return 0;
|
||||
}
|
||||
debug_printf("unknown shader param %d\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@
|
|||
#include "freedreno_util.h"
|
||||
|
||||
#include "ir3_compiler.h"
|
||||
#include "ir3_nir.h"
|
||||
#include "instr-a3xx.h"
|
||||
#include "ir3.h"
|
||||
|
||||
|
|
@ -105,10 +106,10 @@ int main(int argc, char **argv)
|
|||
const char *filename;
|
||||
struct tgsi_token toks[65536];
|
||||
struct tgsi_parse_context parse;
|
||||
struct ir3_compiler *compiler;
|
||||
struct ir3_shader_variant v;
|
||||
struct ir3_shader s;
|
||||
struct ir3_shader_key key = {};
|
||||
/* TODO cmdline option to target different gpus: */
|
||||
unsigned gpu_id = 320;
|
||||
const char *info;
|
||||
void *ptr;
|
||||
|
|
@ -228,7 +229,12 @@ int main(int argc, char **argv)
|
|||
if (!tgsi_text_translate(ptr, toks, Elements(toks)))
|
||||
errx(1, "could not parse `%s'", filename);
|
||||
|
||||
s.tokens = toks;
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS)
|
||||
tgsi_dump(toks, 0);
|
||||
|
||||
nir_shader *nir = ir3_tgsi_to_nir(toks);
|
||||
s.compiler = ir3_compiler_create(gpu_id);
|
||||
s.nir = ir3_optimize_nir(&s, nir, NULL);
|
||||
|
||||
v.key = key;
|
||||
v.shader = &s;
|
||||
|
|
@ -246,11 +252,8 @@ int main(int argc, char **argv)
|
|||
break;
|
||||
}
|
||||
|
||||
/* TODO cmdline option to target different gpus: */
|
||||
compiler = ir3_compiler_create(gpu_id);
|
||||
|
||||
info = "NIR compiler";
|
||||
ret = ir3_compile_shader_nir(compiler, &v);
|
||||
ret = ir3_compile_shader_nir(s.compiler, &v);
|
||||
if (ret) {
|
||||
fprintf(stderr, "compiler failed!\n");
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -32,10 +32,6 @@
|
|||
#include "util/u_string.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "tgsi/tgsi_lowering.h"
|
||||
#include "tgsi/tgsi_strings.h"
|
||||
|
||||
#include "nir/tgsi_to_nir.h"
|
||||
|
||||
#include "freedreno_util.h"
|
||||
|
||||
|
|
@ -123,97 +119,10 @@ struct ir3_compile {
|
|||
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
|
||||
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
|
||||
|
||||
static struct nir_shader *to_nir(struct ir3_compile *ctx,
|
||||
const struct tgsi_token *tokens, struct ir3_shader_variant *so)
|
||||
{
|
||||
static const nir_shader_compiler_options options = {
|
||||
.lower_fpow = true,
|
||||
.lower_fsat = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp = true,
|
||||
.lower_ffract = true,
|
||||
.native_integers = true,
|
||||
};
|
||||
struct nir_lower_tex_options tex_options = {
|
||||
.lower_rect = 0,
|
||||
};
|
||||
bool progress;
|
||||
|
||||
switch (so->type) {
|
||||
case SHADER_FRAGMENT:
|
||||
case SHADER_COMPUTE:
|
||||
tex_options.saturate_s = so->key.fsaturate_s;
|
||||
tex_options.saturate_t = so->key.fsaturate_t;
|
||||
tex_options.saturate_r = so->key.fsaturate_r;
|
||||
break;
|
||||
case SHADER_VERTEX:
|
||||
tex_options.saturate_s = so->key.vsaturate_s;
|
||||
tex_options.saturate_t = so->key.vsaturate_t;
|
||||
tex_options.saturate_r = so->key.vsaturate_r;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ctx->compiler->gpu_id >= 400) {
|
||||
/* a4xx seems to have *no* sam.p */
|
||||
tex_options.lower_txp = ~0; /* lower all txp */
|
||||
} else {
|
||||
/* a3xx just needs to avoid sam.p for 3d tex */
|
||||
tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
|
||||
}
|
||||
|
||||
struct nir_shader *s = tgsi_to_nir(tokens, &options);
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
debug_printf("----------------------\n");
|
||||
nir_print_shader(s, stdout);
|
||||
debug_printf("----------------------\n");
|
||||
}
|
||||
|
||||
nir_opt_global_to_local(s);
|
||||
nir_convert_to_ssa(s);
|
||||
if (s->stage == MESA_SHADER_VERTEX) {
|
||||
nir_lower_clip_vs(s, so->key.ucp_enables);
|
||||
} else if (s->stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_lower_clip_fs(s, so->key.ucp_enables);
|
||||
}
|
||||
nir_lower_tex(s, &tex_options);
|
||||
if (so->key.color_two_side)
|
||||
nir_lower_two_sided_color(s);
|
||||
nir_lower_idiv(s);
|
||||
nir_lower_load_const_to_scalar(s);
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
nir_lower_vars_to_ssa(s);
|
||||
nir_lower_alu_to_scalar(s);
|
||||
nir_lower_phis_to_scalar(s);
|
||||
|
||||
progress |= nir_copy_prop(s);
|
||||
progress |= nir_opt_dce(s);
|
||||
progress |= nir_opt_cse(s);
|
||||
progress |= ir3_nir_lower_if_else(s);
|
||||
progress |= nir_opt_algebraic(s);
|
||||
progress |= nir_opt_constant_folding(s);
|
||||
|
||||
} while (progress);
|
||||
|
||||
nir_remove_dead_variables(s);
|
||||
nir_validate_shader(s);
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
debug_printf("----------------------\n");
|
||||
nir_print_shader(s, stdout);
|
||||
debug_printf("----------------------\n");
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static struct ir3_compile *
|
||||
compile_init(struct ir3_compiler *compiler,
|
||||
struct ir3_shader_variant *so,
|
||||
const struct tgsi_token *tokens)
|
||||
struct ir3_shader_variant *so)
|
||||
{
|
||||
struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
|
||||
|
||||
|
|
@ -239,7 +148,28 @@ compile_init(struct ir3_compiler *compiler,
|
|||
ctx->block_ht = _mesa_hash_table_create(ctx,
|
||||
_mesa_hash_pointer, _mesa_key_pointer_equal);
|
||||
|
||||
ctx->s = to_nir(ctx, tokens, so);
|
||||
/* TODO: maybe generate some sort of bitmask of what key
|
||||
* lowers vs what shader has (ie. no need to lower
|
||||
* texture clamp lowering if no texture sample instrs)..
|
||||
* although should be done further up the stack to avoid
|
||||
* creating duplicate variants..
|
||||
*/
|
||||
|
||||
if (ir3_key_lowers_nir(&so->key)) {
|
||||
nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
|
||||
ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
|
||||
} else {
|
||||
/* fast-path for shader key that lowers nothing in NIR: */
|
||||
ctx->s = so->shader->nir;
|
||||
}
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump nir%dv%d: type=%d, k={bp=%u,cts=%u,hp=%u}",
|
||||
so->shader->id, so->id, so->type,
|
||||
so->key.binning_pass, so->key.color_two_side,
|
||||
so->key.half_precision);
|
||||
nir_print_shader(ctx->s, stdout);
|
||||
}
|
||||
|
||||
so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
|
||||
|
||||
|
|
@ -1954,8 +1884,6 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
|
|||
case nir_texop_query_levels:
|
||||
emit_tex_query_levels(ctx, tex);
|
||||
break;
|
||||
case nir_texop_samples_identical:
|
||||
unreachable("nir_texop_samples_identical");
|
||||
default:
|
||||
emit_tex(ctx, tex);
|
||||
break;
|
||||
|
|
@ -2170,6 +2098,8 @@ emit_stream_out(struct ir3_compile *ctx)
|
|||
static void
|
||||
emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
|
||||
{
|
||||
nir_metadata_require(impl, nir_metadata_block_index);
|
||||
|
||||
emit_cf_list(ctx, &impl->body);
|
||||
emit_block(ctx, impl->end_block);
|
||||
|
||||
|
|
@ -2499,7 +2429,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
|
||||
assert(!so->ir);
|
||||
|
||||
ctx = compile_init(compiler, so, so->shader->tokens);
|
||||
ctx = compile_init(compiler, so);
|
||||
if (!ctx) {
|
||||
DBG("INIT failed!");
|
||||
ret = -1;
|
||||
|
|
|
|||
153
src/gallium/drivers/freedreno/ir3/ir3_nir.c
Normal file
153
src/gallium/drivers/freedreno/ir3/ir3_nir.c
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
|
||||
#include "freedreno_util.h"
|
||||
|
||||
#include "ir3_nir.h"
|
||||
#include "ir3_compiler.h"
|
||||
#include "ir3_shader.h"
|
||||
|
||||
#include "nir/tgsi_to_nir.h"
|
||||
|
||||
struct nir_shader *
|
||||
ir3_tgsi_to_nir(const struct tgsi_token *tokens)
|
||||
{
|
||||
static const nir_shader_compiler_options options = {
|
||||
.lower_fpow = true,
|
||||
.lower_fsat = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp = true,
|
||||
.lower_ffract = true,
|
||||
.native_integers = true,
|
||||
};
|
||||
return tgsi_to_nir(tokens, &options);
|
||||
}
|
||||
|
||||
/* for given shader key, are any steps handled in nir? */
|
||||
bool
|
||||
ir3_key_lowers_nir(const struct ir3_shader_key *key)
|
||||
{
|
||||
return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
|
||||
key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
|
||||
key->ucp_enables | key->color_two_side;
|
||||
}
|
||||
|
||||
#define OPT(nir, pass, ...) ({ \
|
||||
bool this_progress = false; \
|
||||
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
|
||||
this_progress; \
|
||||
})
|
||||
|
||||
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
|
||||
|
||||
struct nir_shader *
|
||||
ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
|
||||
const struct ir3_shader_key *key)
|
||||
{
|
||||
struct nir_lower_tex_options tex_options = {
|
||||
.lower_rect = 0,
|
||||
};
|
||||
bool progress;
|
||||
|
||||
if (key) {
|
||||
switch (shader->type) {
|
||||
case SHADER_FRAGMENT:
|
||||
case SHADER_COMPUTE:
|
||||
tex_options.saturate_s = key->fsaturate_s;
|
||||
tex_options.saturate_t = key->fsaturate_t;
|
||||
tex_options.saturate_r = key->fsaturate_r;
|
||||
break;
|
||||
case SHADER_VERTEX:
|
||||
tex_options.saturate_s = key->vsaturate_s;
|
||||
tex_options.saturate_t = key->vsaturate_t;
|
||||
tex_options.saturate_r = key->vsaturate_r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (shader->compiler->gpu_id >= 400) {
|
||||
/* a4xx seems to have *no* sam.p */
|
||||
tex_options.lower_txp = ~0; /* lower all txp */
|
||||
} else {
|
||||
/* a3xx just needs to avoid sam.p for 3d tex */
|
||||
tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
|
||||
}
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
debug_printf("----------------------\n");
|
||||
nir_print_shader(s, stdout);
|
||||
debug_printf("----------------------\n");
|
||||
}
|
||||
|
||||
OPT_V(s, nir_opt_global_to_local);
|
||||
OPT_V(s, nir_convert_to_ssa);
|
||||
|
||||
if (key) {
|
||||
if (s->stage == MESA_SHADER_VERTEX) {
|
||||
OPT_V(s, nir_lower_clip_vs, key->ucp_enables);
|
||||
} else if (s->stage == MESA_SHADER_FRAGMENT) {
|
||||
OPT_V(s, nir_lower_clip_fs, key->ucp_enables);
|
||||
}
|
||||
if (key->color_two_side) {
|
||||
OPT_V(s, nir_lower_two_sided_color);
|
||||
}
|
||||
}
|
||||
|
||||
OPT_V(s, nir_lower_tex, &tex_options);
|
||||
OPT_V(s, nir_lower_idiv);
|
||||
OPT_V(s, nir_lower_load_const_to_scalar);
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
OPT_V(s, nir_lower_vars_to_ssa);
|
||||
OPT_V(s, nir_lower_alu_to_scalar);
|
||||
OPT_V(s, nir_lower_phis_to_scalar);
|
||||
|
||||
progress |= OPT(s, nir_copy_prop);
|
||||
progress |= OPT(s, nir_opt_dce);
|
||||
progress |= OPT(s, nir_opt_cse);
|
||||
progress |= OPT(s, ir3_nir_lower_if_else);
|
||||
progress |= OPT(s, nir_opt_algebraic);
|
||||
progress |= OPT(s, nir_opt_constant_folding);
|
||||
|
||||
} while (progress);
|
||||
|
||||
OPT_V(s, nir_remove_dead_variables);
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
debug_printf("----------------------\n");
|
||||
nir_print_shader(s, stdout);
|
||||
debug_printf("----------------------\n");
|
||||
}
|
||||
|
||||
nir_sweep(s);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
|
@ -32,6 +32,13 @@
|
|||
#include "glsl/nir/nir.h"
|
||||
#include "glsl/nir/shader_enums.h"
|
||||
|
||||
#include "ir3_shader.h"
|
||||
|
||||
bool ir3_nir_lower_if_else(nir_shader *shader);
|
||||
|
||||
struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens);
|
||||
bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
|
||||
struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
|
||||
const struct ir3_shader_key *key);
|
||||
|
||||
#endif /* IR3_NIR_H_ */
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
#include "ir3_shader.h"
|
||||
#include "ir3_compiler.h"
|
||||
|
||||
#include "ir3_nir.h"
|
||||
|
||||
static void
|
||||
delete_variant(struct ir3_shader_variant *v)
|
||||
|
|
@ -187,12 +187,6 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
|
|||
v->key = key;
|
||||
v->type = shader->type;
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
|
||||
key.binning_pass, key.color_two_side, key.half_precision);
|
||||
tgsi_dump(shader->tokens, 0);
|
||||
}
|
||||
|
||||
ret = ir3_compile_shader_nir(shader->compiler, v);
|
||||
if (ret) {
|
||||
debug_error("compile failed!");
|
||||
|
|
@ -267,7 +261,7 @@ ir3_shader_destroy(struct ir3_shader *shader)
|
|||
v = v->next;
|
||||
delete_variant(t);
|
||||
}
|
||||
free((void *)shader->tokens);
|
||||
ralloc_free(shader->nir);
|
||||
free(shader);
|
||||
}
|
||||
|
||||
|
|
@ -281,14 +275,24 @@ ir3_shader_create(struct pipe_context *pctx,
|
|||
shader->id = ++shader->compiler->shader_count;
|
||||
shader->pctx = pctx;
|
||||
shader->type = type;
|
||||
shader->tokens = tgsi_dup_tokens(cso->tokens);
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump tgsi: type=%d", shader->type);
|
||||
tgsi_dump(cso->tokens, 0);
|
||||
}
|
||||
nir_shader *nir = ir3_tgsi_to_nir(cso->tokens);
|
||||
/* do first pass optimization, ignoring the key: */
|
||||
shader->nir = ir3_optimize_nir(shader, nir, NULL);
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump nir%d: type=%d", shader->id, shader->type);
|
||||
nir_print_shader(shader->nir, stdout);
|
||||
}
|
||||
shader->stream_output = cso->stream_output;
|
||||
if (fd_mesa_debug & FD_DBG_SHADERDB) {
|
||||
/* if shader-db run, create a standard variant immediately
|
||||
* (as otherwise nothing will trigger the shader to be
|
||||
* actually compiled)
|
||||
*/
|
||||
static struct ir3_shader_key key = {};
|
||||
static struct ir3_shader_key key = {0};
|
||||
ir3_shader_variant(shader, key);
|
||||
}
|
||||
return shader;
|
||||
|
|
|
|||
|
|
@ -230,6 +230,8 @@ struct ir3_shader_variant {
|
|||
struct ir3_shader *shader;
|
||||
};
|
||||
|
||||
typedef struct nir_shader nir_shader;
|
||||
|
||||
struct ir3_shader {
|
||||
enum shader_t type;
|
||||
|
||||
|
|
@ -240,7 +242,7 @@ struct ir3_shader {
|
|||
struct ir3_compiler *compiler;
|
||||
|
||||
struct pipe_context *pctx; /* TODO replace w/ pipe_screen */
|
||||
const struct tgsi_token *tokens;
|
||||
nir_shader *nir;
|
||||
struct pipe_stream_output_info stream_output;
|
||||
|
||||
struct ir3_shader_variant *variants;
|
||||
|
|
|
|||
|
|
@ -195,7 +195,6 @@ struct i915_rasterizer_state {
|
|||
|
||||
unsigned light_twoside : 1;
|
||||
unsigned st;
|
||||
enum interp_mode color_interp;
|
||||
|
||||
unsigned LIS4;
|
||||
unsigned LIS7;
|
||||
|
|
|
|||
|
|
@ -254,6 +254,11 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
@ -264,6 +269,8 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
|
||||
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
||||
case PIPE_CAP_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ i915_prepare_vertex_sampling(struct i915_context *i915)
|
|||
for (j = view->u.tex.first_level; j <= tex->last_level; j++) {
|
||||
mip_offsets[j] = i915_texture_offset(i915_tex, j , 0 /* FIXME depth */);
|
||||
row_stride[j] = i915_tex->stride;
|
||||
img_stride[j] = 0; /* FIXME */;
|
||||
img_stride[j] = 0; /* FIXME */
|
||||
}
|
||||
|
||||
draw_set_mapped_texture(i915->draw,
|
||||
|
|
@ -920,7 +920,6 @@ i915_create_rasterizer_state(struct pipe_context *pipe,
|
|||
struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state );
|
||||
|
||||
cso->templ = *rasterizer;
|
||||
cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
|
||||
cso->light_twoside = rasterizer->light_twoside;
|
||||
cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE;
|
||||
cso->ds[1].f = rasterizer->offset_scale;
|
||||
|
|
|
|||
|
|
@ -57,7 +57,6 @@ static uint find_mapping(const struct i915_fragment_shader* fs, int unit)
|
|||
static void calculate_vertex_layout(struct i915_context *i915)
|
||||
{
|
||||
const struct i915_fragment_shader *fs = i915->fs;
|
||||
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
|
||||
struct vertex_info vinfo;
|
||||
boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW, face;
|
||||
uint i;
|
||||
|
|
@ -107,12 +106,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
|
|||
/* pos */
|
||||
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
|
||||
if (needW) {
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4F, src);
|
||||
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
|
||||
vinfo.attrib[0].emit = EMIT_4F;
|
||||
}
|
||||
else {
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_3F, src);
|
||||
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
|
||||
vinfo.attrib[0].emit = EMIT_3F;
|
||||
}
|
||||
|
|
@ -123,21 +122,21 @@ static void calculate_vertex_layout(struct i915_context *i915)
|
|||
/* primary color */
|
||||
if (colors[0]) {
|
||||
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, colorInterp, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, src);
|
||||
vinfo.hwfmt[0] |= S4_VFMT_COLOR;
|
||||
}
|
||||
|
||||
/* secondary color */
|
||||
if (colors[1]) {
|
||||
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, colorInterp, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, src);
|
||||
vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
|
||||
}
|
||||
|
||||
/* fog coord, not fog blend factor */
|
||||
if (fog) {
|
||||
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_1F, src);
|
||||
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
|
||||
}
|
||||
|
||||
|
|
@ -147,7 +146,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
|
|||
if (texCoords[i]) {
|
||||
hwtc = TEXCOORDFMT_4D;
|
||||
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_4F, src);
|
||||
}
|
||||
else {
|
||||
hwtc = TEXCOORDFMT_NOT_PRESENT;
|
||||
|
|
@ -164,7 +163,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
|
|||
* module by adding an extra shader output.
|
||||
*/
|
||||
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FACE, 0);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_CONSTANT, src);
|
||||
draw_emit_vertex_attr(&vinfo, EMIT_1F, src);
|
||||
vinfo.hwfmt[1] &= ~(TEXCOORDFMT_NOT_PRESENT << (slot * 4));
|
||||
vinfo.hwfmt[1] |= TEXCOORDFMT_1D << (slot * 4);
|
||||
}
|
||||
|
|
@ -185,7 +184,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
|
|||
struct i915_tracked_state i915_update_vertex_layout = {
|
||||
"vertex_layout",
|
||||
calculate_vertex_layout,
|
||||
I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS
|
||||
I915_NEW_FS | I915_NEW_VS
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -333,7 +333,7 @@ ilo_builder_init(struct ilo_builder *builder,
|
|||
const struct ilo_dev *dev,
|
||||
struct intel_winsys *winsys)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
assert(ilo_is_zeroed(builder, sizeof(*builder)));
|
||||
|
||||
|
|
@ -366,7 +366,7 @@ ilo_builder_init(struct ilo_builder *builder,
|
|||
void
|
||||
ilo_builder_reset(struct ilo_builder *builder)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
|
||||
ilo_builder_writer_reset(builder, i);
|
||||
|
|
@ -382,7 +382,7 @@ ilo_builder_reset(struct ilo_builder *builder)
|
|||
bool
|
||||
ilo_builder_begin(struct ilo_builder *builder)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) {
|
||||
if (!ilo_builder_writer_alloc_and_map(builder, i)) {
|
||||
|
|
@ -407,7 +407,7 @@ struct intel_bo *
|
|||
ilo_builder_end(struct ilo_builder *builder, unsigned *used)
|
||||
{
|
||||
struct ilo_builder_writer *bat;
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
ilo_builder_batch_patch_sba(builder);
|
||||
|
||||
|
|
|
|||
|
|
@ -189,8 +189,9 @@ ilo_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
|
|||
* These must be called last as u_upload/u_blitter are clients of the pipe
|
||||
* context.
|
||||
*/
|
||||
ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024, 16,
|
||||
PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER);
|
||||
ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024,
|
||||
PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER,
|
||||
PIPE_USAGE_STREAM);
|
||||
if (!ilo->uploader) {
|
||||
ilo_context_destroy(&ilo->base);
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ ilo_launch_grid(struct pipe_context *pipe,
|
|||
input_buf.buffer_size =
|
||||
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
|
||||
if (input_buf.buffer_size) {
|
||||
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, input,
|
||||
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
|
||||
&input_buf.buffer_offset, &input_buf.buffer);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -463,6 +463,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
|
||||
case PIPE_CAP_MAX_VERTEX_STREAMS:
|
||||
case PIPE_CAP_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
|
|
@ -476,6 +478,11 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
|||
|
|
@ -376,7 +376,7 @@ finalize_cbuf_state(struct ilo_context *ilo,
|
|||
if (cbuf->cso[i].resource)
|
||||
continue;
|
||||
|
||||
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
|
||||
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size, 16,
|
||||
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
|
||||
|
||||
cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
|
||||
|
|
@ -426,12 +426,12 @@ finalize_index_buffer(struct ilo_context *ilo)
|
|||
unsigned hw_offset;
|
||||
|
||||
if (vec->ib.state.user_buffer) {
|
||||
u_upload_data(ilo->uploader, 0, size,
|
||||
u_upload_data(ilo->uploader, 0, size, 16,
|
||||
vec->ib.state.user_buffer + offset,
|
||||
&hw_offset, &vec->ib.hw_resource);
|
||||
} else {
|
||||
u_upload_buffer(ilo->uploader, 0,
|
||||
vec->ib.state.offset + offset, size, vec->ib.state.buffer,
|
||||
vec->ib.state.offset + offset, size, 16, vec->ib.state.buffer,
|
||||
&hw_offset, &vec->ib.hw_resource);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -266,7 +266,7 @@ fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
|
|||
struct toy_inst *inst;
|
||||
struct toy_src desc, real_src[4];
|
||||
struct toy_dst tmp, real_dst[4];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
tsrc_transpose(idx, real_src);
|
||||
|
||||
|
|
@ -319,7 +319,7 @@ fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
|
|||
const int grf_subreg = (idx.val32 & 1) * 16;
|
||||
struct toy_src src;
|
||||
struct toy_dst real_dst[4];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
|
||||
grf >= fcc->first_attr_grf)
|
||||
|
|
@ -350,7 +350,7 @@ fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
|
|||
struct toy_inst *inst;
|
||||
struct toy_src desc;
|
||||
struct toy_dst tmp, real_dst[4];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
|
||||
return;
|
||||
|
|
@ -396,7 +396,7 @@ fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
|
|||
struct toy_src desc;
|
||||
struct toy_inst *inst;
|
||||
struct toy_dst tmp, real_dst[4];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
|
||||
return;
|
||||
|
|
@ -1168,7 +1168,7 @@ fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
|
|||
{
|
||||
struct toy_dst dst[4];
|
||||
struct toy_src src[4];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
tdst_transpose(inst->dst, dst);
|
||||
tsrc_transpose(inst->src[0], src);
|
||||
|
|
@ -1257,7 +1257,7 @@ fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
|
|||
}
|
||||
else {
|
||||
struct toy_src src[4];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
tsrc_transpose(inst->src[0], src);
|
||||
/* mask out killed pixels */
|
||||
|
|
@ -1583,7 +1583,7 @@ fs_write_fb(struct fs_compile_context *fcc)
|
|||
static void
|
||||
fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
sh->out.count = tgsi->num_outputs;
|
||||
for (i = 0; i < tgsi->num_outputs; i++) {
|
||||
|
|
@ -1603,7 +1603,7 @@ static void
|
|||
fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
|
||||
bool flatshade)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
sh->in.count = tgsi->num_inputs;
|
||||
for (i = 0; i < tgsi->num_inputs; i++) {
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
|
|||
tc_MOV(tc, block_offsets, idx);
|
||||
|
||||
msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ;
|
||||
msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;;
|
||||
msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;
|
||||
msg_len = 2;
|
||||
|
||||
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
|
||||
|
|
@ -522,7 +522,7 @@ vs_prepare_tgsi_sampling(struct vs_compile_context *vcc,
|
|||
if (num_coords >= 3) {
|
||||
struct toy_dst tmp, max;
|
||||
struct toy_src abs_coords[3];
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
tmp = tc_alloc_tmp(tc);
|
||||
max = tdst_writemask(tmp, TOY_WRITEMASK_W);
|
||||
|
|
@ -804,7 +804,7 @@ static int
|
|||
vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
|
||||
{
|
||||
const struct toy_tgsi *tgsi = &vcc->tgsi;
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < vcc->shader->out.count; i++) {
|
||||
const int slot = vcc->output_map[i];
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ struct linear_scan {
|
|||
static void
|
||||
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
|
||||
|
|
@ -221,7 +221,7 @@ linear_scan_spill(struct linear_scan *ls,
|
|||
static void
|
||||
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
|
||||
{
|
||||
int i;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct linear_scan_live_interval *interval = &ls->intervals[first + i];
|
||||
|
|
|
|||
|
|
@ -1593,7 +1593,7 @@ ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst
|
|||
tgsi_inst->Src[operand].Register.File;
|
||||
switch (file) {
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_RESOURCE:
|
||||
case TGSI_FILE_IMAGE:
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
type = TOY_TYPE_D;
|
||||
break;
|
||||
|
|
@ -1834,7 +1834,7 @@ ra_get_src_indirect(struct toy_tgsi *tgsi,
|
|||
src = tsrc_null();
|
||||
break;
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_RESOURCE:
|
||||
case TGSI_FILE_IMAGE:
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
is_resource = true;
|
||||
/* fall through */
|
||||
|
|
@ -1918,7 +1918,7 @@ ra_get_src(struct toy_tgsi *tgsi,
|
|||
need_vrf = true;
|
||||
break;
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_RESOURCE:
|
||||
case TGSI_FILE_IMAGE:
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
assert(!s->Register.Dimension);
|
||||
src = tsrc_imm_d(s->Register.Index);
|
||||
|
|
@ -2256,7 +2256,7 @@ parse_declaration(struct toy_tgsi *tgsi,
|
|||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_PREDICATE:
|
||||
case TGSI_FILE_ADDRESS:
|
||||
case TGSI_FILE_RESOURCE:
|
||||
case TGSI_FILE_IMAGE:
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
/* nothing to do */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -63,8 +63,7 @@ enum lp_interp {
|
|||
LP_INTERP_LINEAR,
|
||||
LP_INTERP_PERSPECTIVE,
|
||||
LP_INTERP_POSITION,
|
||||
LP_INTERP_FACING,
|
||||
LP_INTERP_ZERO
|
||||
LP_INTERP_FACING
|
||||
};
|
||||
|
||||
struct lp_shader_input {
|
||||
|
|
|
|||
|
|
@ -108,28 +108,22 @@ struct llvmpipe_context {
|
|||
struct vertex_info vertex_info;
|
||||
|
||||
/** Which vertex shader output slot contains color */
|
||||
uint8_t color_slot[2];
|
||||
int8_t color_slot[2];
|
||||
|
||||
/** Which vertex shader output slot contains bcolor */
|
||||
uint8_t bcolor_slot[2];
|
||||
int8_t bcolor_slot[2];
|
||||
|
||||
/** Which vertex shader output slot contains point size */
|
||||
uint8_t psize_slot;
|
||||
int8_t psize_slot;
|
||||
|
||||
/** Which vertex shader output slot contains viewport index */
|
||||
uint8_t viewport_index_slot;
|
||||
int8_t viewport_index_slot;
|
||||
|
||||
/** Which geometry shader output slot contains layer */
|
||||
uint8_t layer_slot;
|
||||
int8_t layer_slot;
|
||||
|
||||
/** A fake frontface output for unfilled primitives */
|
||||
uint8_t face_slot;
|
||||
|
||||
/** Which output slot is used for the fake vp index info */
|
||||
uint8_t fake_vpindex_slot;
|
||||
|
||||
/** Which output slot is used for the fake layer info */
|
||||
uint8_t fake_layer_slot;
|
||||
int8_t face_slot;
|
||||
|
||||
/** Depth format and bias settings. */
|
||||
boolean floating_point_depth;
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ struct lp_rast_plane {
|
|||
int32_t dcdy;
|
||||
|
||||
/* one-pixel sized trivial reject offsets for each plane */
|
||||
int64_t eo;
|
||||
uint32_t eo;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -133,36 +133,8 @@ lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
|
|||
lp_rast_triangle_4(task, arg2);
|
||||
}
|
||||
|
||||
#if !defined(PIPE_ARCH_SSE)
|
||||
#if defined(PIPE_ARCH_SSE)
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
union lp_rast_cmd_arg arg2;
|
||||
arg2.triangle.tri = arg.triangle.tri;
|
||||
arg2.triangle.plane_mask = (1<<3)-1;
|
||||
lp_rast_triangle_32_3(task, arg2);
|
||||
}
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
union lp_rast_cmd_arg arg2;
|
||||
arg2.triangle.tri = arg.triangle.tri;
|
||||
arg2.triangle.plane_mask = (1<<4)-1;
|
||||
lp_rast_triangle_32_4(task, arg2);
|
||||
}
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
lp_rast_triangle_32_3_16(task, arg);
|
||||
}
|
||||
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#include "util/u_sse.h"
|
||||
|
||||
|
|
@ -265,12 +237,6 @@ sign_bits4(const __m128i *cstep, int cdiff)
|
|||
|
||||
#define NR_PLANES 3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
|
|
@ -381,10 +347,6 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
|||
0xffff & ~out[i].mask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
|
|
@ -471,6 +433,254 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
|
|||
}
|
||||
|
||||
#undef NR_PLANES
|
||||
|
||||
#else
|
||||
|
||||
#if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
|
||||
|
||||
#include <altivec.h>
|
||||
#include "util/u_pwr8.h"
|
||||
|
||||
static inline void
|
||||
build_masks_32(int c,
|
||||
int cdiff,
|
||||
int dcdx,
|
||||
int dcdy,
|
||||
unsigned *outmask,
|
||||
unsigned *partmask)
|
||||
{
|
||||
__m128i cstep0 = vec_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
|
||||
__m128i xdcdy = (__m128i) vec_splats(dcdy);
|
||||
|
||||
/* Get values across the quad
|
||||
*/
|
||||
__m128i cstep1 = vec_add_epi32(cstep0, xdcdy);
|
||||
__m128i cstep2 = vec_add_epi32(cstep1, xdcdy);
|
||||
__m128i cstep3 = vec_add_epi32(cstep2, xdcdy);
|
||||
|
||||
{
|
||||
__m128i cstep01, cstep23, result;
|
||||
|
||||
cstep01 = vec_packs_epi32(cstep0, cstep1);
|
||||
cstep23 = vec_packs_epi32(cstep2, cstep3);
|
||||
result = vec_packs_epi16(cstep01, cstep23);
|
||||
|
||||
*outmask |= vec_movemask_epi8(result);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
__m128i cio4 = (__m128i) vec_splats(cdiff);
|
||||
__m128i cstep01, cstep23, result;
|
||||
|
||||
cstep0 = vec_add_epi32(cstep0, cio4);
|
||||
cstep1 = vec_add_epi32(cstep1, cio4);
|
||||
cstep2 = vec_add_epi32(cstep2, cio4);
|
||||
cstep3 = vec_add_epi32(cstep3, cio4);
|
||||
|
||||
cstep01 = vec_packs_epi32(cstep0, cstep1);
|
||||
cstep23 = vec_packs_epi32(cstep2, cstep3);
|
||||
result = vec_packs_epi16(cstep01, cstep23);
|
||||
|
||||
*partmask |= vec_movemask_epi8(result);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
build_mask_linear_32(int c, int dcdx, int dcdy)
|
||||
{
|
||||
__m128i cstep0 = vec_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
|
||||
__m128i xdcdy = (__m128i) vec_splats(dcdy);
|
||||
|
||||
/* Get values across the quad
|
||||
*/
|
||||
__m128i cstep1 = vec_add_epi32(cstep0, xdcdy);
|
||||
__m128i cstep2 = vec_add_epi32(cstep1, xdcdy);
|
||||
__m128i cstep3 = vec_add_epi32(cstep2, xdcdy);
|
||||
|
||||
/* pack pairs of results into epi16
|
||||
*/
|
||||
__m128i cstep01 = vec_packs_epi32(cstep0, cstep1);
|
||||
__m128i cstep23 = vec_packs_epi32(cstep2, cstep3);
|
||||
|
||||
/* pack into epi8, preserving sign bits
|
||||
*/
|
||||
__m128i result = vec_packs_epi16(cstep01, cstep23);
|
||||
|
||||
/* extract sign bits to create mask
|
||||
*/
|
||||
return vec_movemask_epi8(result);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
lp_plane_to_m128i(const struct lp_rast_plane *plane)
|
||||
{
|
||||
return vec_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
|
||||
(int32_t)plane->dcdy, (int32_t)plane->eo);
|
||||
}
|
||||
|
||||
#define NR_PLANES 3
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
const struct lp_rast_triangle *tri = arg.triangle.tri;
|
||||
const struct lp_rast_plane *plane = GET_PLANES(tri);
|
||||
int x = (arg.triangle.plane_mask & 0xff) + task->x;
|
||||
int y = (arg.triangle.plane_mask >> 8) + task->y;
|
||||
unsigned i, j;
|
||||
|
||||
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
|
||||
unsigned nr = 0;
|
||||
|
||||
__m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
|
||||
__m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
|
||||
__m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
|
||||
__m128i zero = vec_splats((unsigned char) 0);
|
||||
|
||||
__m128i c;
|
||||
__m128i dcdx;
|
||||
__m128i dcdy;
|
||||
__m128i rej4;
|
||||
|
||||
__m128i dcdx2;
|
||||
__m128i dcdx3;
|
||||
|
||||
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
|
||||
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
|
||||
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
|
||||
__m128i unused;
|
||||
|
||||
__m128i vshuf_mask0;
|
||||
__m128i vshuf_mask1;
|
||||
__m128i vshuf_mask2;
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x03020100);
|
||||
vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x07060504);
|
||||
vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x0B0A0908);
|
||||
#else
|
||||
vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x0C0D0E0F);
|
||||
vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x08090A0B);
|
||||
vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x04050607);
|
||||
#endif
|
||||
|
||||
transpose4_epi32(&p0, &p1, &p2, &zero,
|
||||
&c, &dcdx, &dcdy, &rej4);
|
||||
|
||||
/* Adjust dcdx;
|
||||
*/
|
||||
dcdx = vec_sub_epi32(zero, dcdx);
|
||||
|
||||
c = vec_add_epi32(c, vec_mullo_epi32(dcdx, (__m128i) vec_splats(x)));
|
||||
c = vec_add_epi32(c, vec_mullo_epi32(dcdy, (__m128i) vec_splats(y)));
|
||||
rej4 = vec_slli_epi32(rej4, 2);
|
||||
|
||||
/*
|
||||
* Adjust so we can just check the sign bit (< 0 comparison),
|
||||
* instead of having to do a less efficient <= 0 comparison
|
||||
*/
|
||||
c = vec_sub_epi32(c, (__m128i) vec_splats((unsigned int) 1));
|
||||
rej4 = vec_add_epi32(rej4, (__m128i) vec_splats((unsigned int) 1));
|
||||
|
||||
dcdx2 = vec_add_epi32(dcdx, dcdx);
|
||||
dcdx3 = vec_add_epi32(dcdx2, dcdx);
|
||||
|
||||
transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
|
||||
&span_0, &span_1, &span_2, &unused);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
__m128i cx = c;
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
__m128i c4rej = vec_add_epi32(cx, rej4);
|
||||
__m128i rej_masks = vec_srai_epi32(c4rej, 31);
|
||||
|
||||
/* if (is_zero(rej_masks)) */
|
||||
if (vec_movemask_epi8(rej_masks) == 0) {
|
||||
__m128i c0_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask0), span_0);
|
||||
__m128i c1_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask1), span_1);
|
||||
__m128i c2_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask2), span_2);
|
||||
|
||||
__m128i c_0 = vec_or(vec_or(c0_0, c1_0), c2_0);
|
||||
|
||||
__m128i c0_1 = vec_add_epi32(c0_0, vec_perm(dcdy, dcdy, vshuf_mask0));
|
||||
__m128i c1_1 = vec_add_epi32(c1_0, vec_perm(dcdy, dcdy, vshuf_mask1));
|
||||
__m128i c2_1 = vec_add_epi32(c2_0, vec_perm(dcdy, dcdy, vshuf_mask2));
|
||||
|
||||
__m128i c_1 = vec_or(vec_or(c0_1, c1_1), c2_1);
|
||||
__m128i c_01 = vec_packs_epi32(c_0, c_1);
|
||||
|
||||
__m128i c0_2 = vec_add_epi32(c0_1, vec_perm(dcdy, dcdy, vshuf_mask0));
|
||||
__m128i c1_2 = vec_add_epi32(c1_1, vec_perm(dcdy, dcdy, vshuf_mask1));
|
||||
__m128i c2_2 = vec_add_epi32(c2_1, vec_perm(dcdy, dcdy, vshuf_mask2));
|
||||
|
||||
__m128i c_2 = vec_or(vec_or(c0_2, c1_2), c2_2);
|
||||
|
||||
__m128i c0_3 = vec_add_epi32(c0_2, vec_perm(dcdy, dcdy, vshuf_mask0));
|
||||
__m128i c1_3 = vec_add_epi32(c1_2, vec_perm(dcdy, dcdy, vshuf_mask1));
|
||||
__m128i c2_3 = vec_add_epi32(c2_2, vec_perm(dcdy, dcdy, vshuf_mask2));
|
||||
|
||||
__m128i c_3 = vec_or(vec_or(c0_3, c1_3), c2_3);
|
||||
__m128i c_23 = vec_packs_epi32(c_2, c_3);
|
||||
__m128i c_0123 = vec_packs_epi16(c_01, c_23);
|
||||
|
||||
unsigned mask = vec_movemask_epi8(c_0123);
|
||||
|
||||
out[nr].i = i;
|
||||
out[nr].j = j;
|
||||
out[nr].mask = mask;
|
||||
if (mask != 0xffff)
|
||||
nr++;
|
||||
}
|
||||
cx = vec_add_epi32(cx, vec_slli_epi32(dcdx, 2));
|
||||
}
|
||||
|
||||
c = vec_add_epi32(c, vec_slli_epi32(dcdy, 2));
|
||||
}
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
lp_rast_shade_quads_mask(task,
|
||||
&tri->inputs,
|
||||
x + 4 * out[i].j,
|
||||
y + 4 * out[i].i,
|
||||
0xffff & ~out[i].mask);
|
||||
}
|
||||
|
||||
#undef NR_PLANES
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
union lp_rast_cmd_arg arg2;
|
||||
arg2.triangle.tri = arg.triangle.tri;
|
||||
arg2.triangle.plane_mask = (1<<3)-1;
|
||||
lp_rast_triangle_32_3(task, arg2);
|
||||
}
|
||||
|
||||
#endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
union lp_rast_cmd_arg arg2;
|
||||
arg2.triangle.tri = arg.triangle.tri;
|
||||
arg2.triangle.plane_mask = (1<<4)-1;
|
||||
lp_rast_triangle_32_4(task, arg2);
|
||||
}
|
||||
|
||||
void
|
||||
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
lp_rast_triangle_32_3_16(task, arg);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -512,7 +722,7 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
|
|||
#define NR_PLANES 8
|
||||
#include "lp_rast_tri_tmp.h"
|
||||
|
||||
#ifdef PIPE_ARCH_SSE
|
||||
#if defined(PIPE_ARCH_SSE) || (defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN))
|
||||
#undef BUILD_MASKS
|
||||
#undef BUILD_MASK_LINEAR
|
||||
#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask)
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
|
|||
const int64_t dcdx = -IMUL64(plane[j].dcdx, 4);
|
||||
const int64_t dcdy = IMUL64(plane[j].dcdy, 4);
|
||||
const int64_t cox = IMUL64(plane[j].eo, 4);
|
||||
const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
|
||||
const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
|
||||
const int64_t cio = IMUL64(ei, 4) - 1;
|
||||
|
||||
BUILD_MASKS(c[j] + cox,
|
||||
|
|
@ -182,7 +182,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
|
|||
const int64_t dcdx = -IMUL64(plane[j].dcdx, 16);
|
||||
const int64_t dcdy = IMUL64(plane[j].dcdy, 16);
|
||||
const int64_t cox = IMUL64(plane[j].eo, 16);
|
||||
const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
|
||||
const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
|
||||
const int64_t cio = IMUL64(ei, 16) - 1;
|
||||
|
||||
BUILD_MASKS(c[j] + cox,
|
||||
|
|
|
|||
|
|
@ -301,6 +301,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
|||
|
|
@ -486,6 +486,11 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
|
|||
depth,
|
||||
stencil);
|
||||
|
||||
/*
|
||||
* XXX: should make a full mask here for things like D24X8,
|
||||
* otherwise we'll do a read-modify-write clear later which
|
||||
* should be unnecessary.
|
||||
*/
|
||||
zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
|
||||
zmask32,
|
||||
smask8);
|
||||
|
|
|
|||
|
|
@ -105,10 +105,10 @@ struct lp_setup_context
|
|||
float pixel_offset;
|
||||
float line_width;
|
||||
float point_size;
|
||||
uint8_t psize_slot;
|
||||
uint8_t viewport_index_slot;
|
||||
uint8_t layer_slot;
|
||||
uint8_t face_slot;
|
||||
int8_t psize_slot;
|
||||
int8_t viewport_index_slot;
|
||||
int8_t layer_slot;
|
||||
int8_t face_slot;
|
||||
|
||||
struct pipe_framebuffer_state fb;
|
||||
struct u_rect framebuffer;
|
||||
|
|
|
|||
|
|
@ -644,19 +644,25 @@ try_setup_line( struct lp_setup_context *setup,
|
|||
line->inputs.layer = layer;
|
||||
line->inputs.viewport_index = viewport_index;
|
||||
|
||||
/*
|
||||
* XXX: this code is mostly identical to the one in lp_setup_tri, except it
|
||||
* uses 4 planes instead of 3. Could share the code (including the sse
|
||||
* assembly, in fact we'd get the 4th plane for free).
|
||||
* The only difference apart from storing the 4th plane would be some
|
||||
* different shuffle for calculating dcdx/dcdy.
|
||||
*/
|
||||
for (i = 0; i < 4; i++) {
|
||||
|
||||
/* half-edge constants, will be interated over the whole render
|
||||
/* half-edge constants, will be iterated over the whole render
|
||||
* target.
|
||||
*/
|
||||
plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]);
|
||||
|
||||
|
||||
/* correct for top-left vs. bottom-left fill convention.
|
||||
*/
|
||||
/* correct for top-left vs. bottom-left fill convention.
|
||||
*/
|
||||
if (plane[i].dcdx < 0) {
|
||||
/* both fill conventions want this - adjust for left edges */
|
||||
plane[i].c++;
|
||||
plane[i].c++;
|
||||
}
|
||||
else if (plane[i].dcdx == 0) {
|
||||
if (setup->pixel_offset == 0) {
|
||||
|
|
|
|||
|
|
@ -46,6 +46,9 @@
|
|||
|
||||
#if defined(PIPE_ARCH_SSE)
|
||||
#include <emmintrin.h>
|
||||
#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
|
||||
#include <altivec.h>
|
||||
#include "util/u_pwr8.h"
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
|
|
@ -387,25 +390,21 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
plane = GET_PLANES(tri);
|
||||
|
||||
#if defined(PIPE_ARCH_SSE)
|
||||
if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
|
||||
setup->fb.height <= MAX_FIXED_LENGTH32 &&
|
||||
(bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
|
||||
(bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
|
||||
if (1) {
|
||||
__m128i vertx, verty;
|
||||
__m128i shufx, shufy;
|
||||
__m128i dcdx, dcdy, c;
|
||||
__m128i unused;
|
||||
__m128i dcdx, dcdy;
|
||||
__m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
|
||||
__m128i c01, c23, unused;
|
||||
__m128i dcdx_neg_mask;
|
||||
__m128i dcdy_neg_mask;
|
||||
__m128i dcdx_zero_mask;
|
||||
__m128i top_left_flag;
|
||||
__m128i c_inc_mask, c_inc;
|
||||
__m128i top_left_flag, c_dec;
|
||||
__m128i eo, p0, p1, p2;
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
|
||||
|
||||
vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
|
||||
verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
|
||||
vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
|
||||
verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
|
||||
|
||||
shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
|
||||
shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
|
||||
|
|
@ -419,42 +418,161 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
|
||||
top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
|
||||
|
||||
c_inc_mask = _mm_or_si128(dcdx_neg_mask,
|
||||
_mm_and_si128(dcdx_zero_mask,
|
||||
_mm_xor_si128(dcdy_neg_mask,
|
||||
top_left_flag)));
|
||||
c_dec = _mm_or_si128(dcdx_neg_mask,
|
||||
_mm_and_si128(dcdx_zero_mask,
|
||||
_mm_xor_si128(dcdy_neg_mask,
|
||||
top_left_flag)));
|
||||
|
||||
c_inc = _mm_srli_epi32(c_inc_mask, 31);
|
||||
/*
|
||||
* 64 bit arithmetic.
|
||||
* Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
|
||||
*/
|
||||
cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
|
||||
cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
|
||||
c02 = _mm_sub_epi64(cdx02, cdy02);
|
||||
c13 = _mm_sub_epi64(cdx13, cdy13);
|
||||
c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
|
||||
_MM_SHUFFLE(2,2,0,0)));
|
||||
c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
|
||||
_MM_SHUFFLE(3,3,1,1)));
|
||||
|
||||
c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
|
||||
mm_mullo_epi32(dcdy, verty));
|
||||
|
||||
c = _mm_add_epi32(c, c_inc);
|
||||
/*
|
||||
* Useful for very small fbs/tris (or fewer subpixel bits) only:
|
||||
* c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
|
||||
* mm_mullo_epi32(dcdy, verty));
|
||||
*
|
||||
* c = _mm_sub_epi32(c, c_dec);
|
||||
*/
|
||||
|
||||
/* Scale up to match c:
|
||||
*/
|
||||
dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
|
||||
dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
|
||||
|
||||
/* Calculate trivial reject values:
|
||||
/*
|
||||
* Calculate trivial reject values:
|
||||
* Note eo cannot overflow even if dcdx/dcdy would already have
|
||||
* 31 bits (which they shouldn't have). This is because eo
|
||||
* is never negative (albeit if we rely on that need to be careful...)
|
||||
*/
|
||||
eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
|
||||
_mm_and_si128(dcdx_neg_mask, dcdx));
|
||||
|
||||
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
|
||||
|
||||
/*
|
||||
* Pointless transpose which gets undone immediately in
|
||||
* rasterization.
|
||||
* It is actually difficult to do away with it - would essentially
|
||||
* need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
|
||||
* for this then would need to depend on the number of planes.
|
||||
* The transpose is quite special here due to c being 64bit...
|
||||
* The store has to be unaligned (unless we'd make the plane size
|
||||
* a multiple of 128), and of course storing eo separately...
|
||||
*/
|
||||
c01 = _mm_unpacklo_epi64(c02, c13);
|
||||
c23 = _mm_unpackhi_epi64(c02, c13);
|
||||
transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
|
||||
&p0, &p1, &p2, &unused);
|
||||
_mm_storeu_si128((__m128i *)&plane[0], p0);
|
||||
plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
|
||||
_mm_storeu_si128((__m128i *)&plane[1], p1);
|
||||
eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
|
||||
plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
|
||||
_mm_storeu_si128((__m128i *)&plane[2], p2);
|
||||
eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
|
||||
plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
|
||||
} else
|
||||
#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
|
||||
/*
|
||||
* XXX this code is effectively disabled for all practical purposes,
|
||||
* as the allowed fb size is tiny if FIXED_ORDER is 8.
|
||||
*/
|
||||
if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
|
||||
setup->fb.height <= MAX_FIXED_LENGTH32 &&
|
||||
(bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
|
||||
(bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
|
||||
unsigned int bottom_edge;
|
||||
__m128i vertx, verty;
|
||||
__m128i shufx, shufy;
|
||||
__m128i dcdx, dcdy, c;
|
||||
__m128i unused;
|
||||
__m128i dcdx_neg_mask;
|
||||
__m128i dcdy_neg_mask;
|
||||
__m128i dcdx_zero_mask;
|
||||
__m128i top_left_flag;
|
||||
__m128i c_inc_mask, c_inc;
|
||||
__m128i eo, p0, p1, p2;
|
||||
__m128i_union vshuf_mask;
|
||||
__m128i zero = vec_splats((unsigned char) 0);
|
||||
PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
vshuf_mask.i[0] = 0x07060504;
|
||||
vshuf_mask.i[1] = 0x0B0A0908;
|
||||
vshuf_mask.i[2] = 0x03020100;
|
||||
vshuf_mask.i[3] = 0x0F0E0D0C;
|
||||
#else
|
||||
vshuf_mask.i[0] = 0x00010203;
|
||||
vshuf_mask.i[1] = 0x0C0D0E0F;
|
||||
vshuf_mask.i[2] = 0x04050607;
|
||||
vshuf_mask.i[3] = 0x08090A0B;
|
||||
#endif
|
||||
|
||||
/* vertex x coords */
|
||||
vertx = vec_load_si128((const uint32_t *) position->x);
|
||||
/* vertex y coords */
|
||||
verty = vec_load_si128((const uint32_t *) position->y);
|
||||
|
||||
shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
|
||||
shufy = vec_perm (verty, verty, vshuf_mask.m128i);
|
||||
|
||||
dcdx = vec_sub_epi32(verty, shufy);
|
||||
dcdy = vec_sub_epi32(vertx, shufx);
|
||||
|
||||
dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
|
||||
dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
|
||||
dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
|
||||
|
||||
bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
|
||||
top_left_flag = (__m128i) vec_splats(bottom_edge);
|
||||
|
||||
c_inc_mask = vec_or(dcdx_neg_mask,
|
||||
vec_and(dcdx_zero_mask,
|
||||
vec_xor(dcdy_neg_mask,
|
||||
top_left_flag)));
|
||||
|
||||
c_inc = vec_srli_epi32(c_inc_mask, 31);
|
||||
|
||||
c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
|
||||
vec_mullo_epi32(dcdy, verty));
|
||||
|
||||
c = vec_add_epi32(c, c_inc);
|
||||
|
||||
/* Scale up to match c:
|
||||
*/
|
||||
dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
|
||||
dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
|
||||
|
||||
/* Calculate trivial reject values:
|
||||
*/
|
||||
eo = vec_sub_epi32(vec_andc(dcdy_neg_mask, dcdy),
|
||||
vec_and(dcdx_neg_mask, dcdx));
|
||||
|
||||
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
|
||||
|
||||
/* Pointless transpose which gets undone immediately in
|
||||
* rasterization:
|
||||
*/
|
||||
transpose4_epi32(&c, &dcdx, &dcdy, &eo,
|
||||
&p0, &p1, &p2, &unused);
|
||||
|
||||
#define STORE_PLANE(plane, vec) do { \
|
||||
_mm_store_si128((__m128i *)&temp_vec, vec); \
|
||||
plane.c = (int64_t)temp_vec[0]; \
|
||||
plane.dcdx = temp_vec[1]; \
|
||||
plane.dcdy = temp_vec[2]; \
|
||||
plane.eo = temp_vec[3]; \
|
||||
#define STORE_PLANE(plane, vec) do { \
|
||||
vec_store_si128((uint32_t *)&temp_vec, vec); \
|
||||
plane.c = (int64_t)temp_vec[0]; \
|
||||
plane.dcdx = temp_vec[1]; \
|
||||
plane.dcdy = temp_vec[2]; \
|
||||
plane.eo = temp_vec[3]; \
|
||||
} while(0)
|
||||
|
||||
STORE_PLANE(plane[0], p0);
|
||||
|
|
@ -473,17 +591,17 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
plane[2].dcdx = position->dy20;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
/* half-edge constants, will be interated over the whole render
|
||||
/* half-edge constants, will be iterated over the whole render
|
||||
* target.
|
||||
*/
|
||||
plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
|
||||
IMUL64(plane[i].dcdy, position->y[i]);
|
||||
IMUL64(plane[i].dcdy, position->y[i]);
|
||||
|
||||
/* correct for top-left vs. bottom-left fill convention.
|
||||
*/
|
||||
*/
|
||||
if (plane[i].dcdx < 0) {
|
||||
/* both fill conventions want this - adjust for left edges */
|
||||
plane[i].c++;
|
||||
plane[i].c++;
|
||||
}
|
||||
else if (plane[i].dcdx == 0) {
|
||||
if (setup->bottom_edge_rule == 0){
|
||||
|
|
@ -517,19 +635,19 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
}
|
||||
|
||||
if (0) {
|
||||
debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
|
||||
debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
|
||||
plane[0].c,
|
||||
plane[0].dcdx,
|
||||
plane[0].dcdy,
|
||||
plane[0].eo);
|
||||
|
||||
debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
|
||||
|
||||
debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
|
||||
plane[1].c,
|
||||
plane[1].dcdx,
|
||||
plane[1].dcdy,
|
||||
plane[1].eo);
|
||||
|
||||
debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
|
||||
|
||||
debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
|
||||
plane[2].c,
|
||||
plane[2].dcdx,
|
||||
plane[2].dcdy,
|
||||
|
|
@ -590,7 +708,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
|
|||
static inline uint32_t
|
||||
floor_pot(uint32_t n)
|
||||
{
|
||||
#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
|
||||
#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
|
|
@ -738,9 +856,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
|
|||
|
||||
ei[i] = (plane[i].dcdy -
|
||||
plane[i].dcdx -
|
||||
plane[i].eo) << TILE_ORDER;
|
||||
(int64_t)plane[i].eo) << TILE_ORDER;
|
||||
|
||||
eo[i] = plane[i].eo << TILE_ORDER;
|
||||
eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
|
||||
xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
|
||||
ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
|
||||
}
|
||||
|
|
@ -932,12 +1050,12 @@ rotate_fixed_position_12( struct fixed_position* position )
|
|||
/**
|
||||
* Draw triangle if it's CW, cull otherwise.
|
||||
*/
|
||||
static void triangle_cw( struct lp_setup_context *setup,
|
||||
const float (*v0)[4],
|
||||
const float (*v1)[4],
|
||||
const float (*v2)[4] )
|
||||
static void triangle_cw(struct lp_setup_context *setup,
|
||||
const float (*v0)[4],
|
||||
const float (*v1)[4],
|
||||
const float (*v2)[4])
|
||||
{
|
||||
struct fixed_position position;
|
||||
PIPE_ALIGN_VAR(16) struct fixed_position position;
|
||||
|
||||
calc_fixed_position(setup, &position, v0, v1, v2);
|
||||
|
||||
|
|
@ -953,12 +1071,12 @@ static void triangle_cw( struct lp_setup_context *setup,
|
|||
}
|
||||
|
||||
|
||||
static void triangle_ccw( struct lp_setup_context *setup,
|
||||
const float (*v0)[4],
|
||||
const float (*v1)[4],
|
||||
const float (*v2)[4])
|
||||
static void triangle_ccw(struct lp_setup_context *setup,
|
||||
const float (*v0)[4],
|
||||
const float (*v1)[4],
|
||||
const float (*v2)[4])
|
||||
{
|
||||
struct fixed_position position;
|
||||
PIPE_ALIGN_VAR(16) struct fixed_position position;
|
||||
|
||||
calc_fixed_position(setup, &position, v0, v1, v2);
|
||||
|
||||
|
|
@ -969,12 +1087,12 @@ static void triangle_ccw( struct lp_setup_context *setup,
|
|||
/**
|
||||
* Draw triangle whether it's CW or CCW.
|
||||
*/
|
||||
static void triangle_both( struct lp_setup_context *setup,
|
||||
const float (*v0)[4],
|
||||
const float (*v1)[4],
|
||||
const float (*v2)[4] )
|
||||
static void triangle_both(struct lp_setup_context *setup,
|
||||
const float (*v0)[4],
|
||||
const float (*v1)[4],
|
||||
const float (*v2)[4])
|
||||
{
|
||||
struct fixed_position position;
|
||||
PIPE_ALIGN_VAR(16) struct fixed_position position;
|
||||
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
|
||||
|
||||
if (lp_context->active_statistics_queries &&
|
||||
|
|
|
|||
|
|
@ -48,21 +48,26 @@
|
|||
static void
|
||||
compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
||||
{
|
||||
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
|
||||
const struct tgsi_shader_info *fsInfo = &llvmpipe->fs->info.base;
|
||||
struct vertex_info *vinfo = &llvmpipe->vertex_info;
|
||||
int vs_index;
|
||||
uint i;
|
||||
|
||||
draw_prepare_shader_outputs(llvmpipe->draw);
|
||||
|
||||
llvmpipe->color_slot[0] = 0;
|
||||
llvmpipe->color_slot[1] = 0;
|
||||
llvmpipe->bcolor_slot[0] = 0;
|
||||
llvmpipe->bcolor_slot[1] = 0;
|
||||
llvmpipe->viewport_index_slot = 0;
|
||||
llvmpipe->layer_slot = 0;
|
||||
llvmpipe->face_slot = 0;
|
||||
llvmpipe->psize_slot = 0;
|
||||
/*
|
||||
* Those can't actually be 0 (because pos is always at 0).
|
||||
* But use ints anyway to avoid confusion (in vs outputs, they
|
||||
* can very well be at pos 0).
|
||||
*/
|
||||
llvmpipe->color_slot[0] = -1;
|
||||
llvmpipe->color_slot[1] = -1;
|
||||
llvmpipe->bcolor_slot[0] = -1;
|
||||
llvmpipe->bcolor_slot[1] = -1;
|
||||
llvmpipe->viewport_index_slot = -1;
|
||||
llvmpipe->layer_slot = -1;
|
||||
llvmpipe->face_slot = -1;
|
||||
llvmpipe->psize_slot = -1;
|
||||
|
||||
/*
|
||||
* Match FS inputs against VS outputs, emitting the necessary
|
||||
|
|
@ -73,60 +78,49 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
vinfo->num_attribs = 0;
|
||||
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_POSITION,
|
||||
0);
|
||||
TGSI_SEMANTIC_POSITION, 0);
|
||||
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
|
||||
for (i = 0; i < lpfs->info.base.num_inputs; i++) {
|
||||
for (i = 0; i < fsInfo->num_inputs; i++) {
|
||||
/*
|
||||
* Search for each input in current vs output:
|
||||
*/
|
||||
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
lpfs->info.base.input_semantic_name[i],
|
||||
lpfs->info.base.input_semantic_index[i]);
|
||||
fsInfo->input_semantic_name[i],
|
||||
fsInfo->input_semantic_index[i]);
|
||||
|
||||
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
|
||||
lpfs->info.base.input_semantic_index[i] < 2) {
|
||||
int idx = lpfs->info.base.input_semantic_index[i];
|
||||
llvmpipe->color_slot[idx] = vinfo->num_attribs;
|
||||
if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
|
||||
fsInfo->input_semantic_index[i] < 2) {
|
||||
int idx = fsInfo->input_semantic_index[i];
|
||||
llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
|
||||
}
|
||||
|
||||
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
|
||||
llvmpipe->face_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
|
||||
llvmpipe->face_slot = (int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
/*
|
||||
* For vp index and layer, if the fs requires them but the vs doesn't
|
||||
* provide them, store the slot - we'll later replace the data directly
|
||||
* with zero (as required by ARB_fragment_layer_viewport). This is
|
||||
* because draw itself just redirects them to whatever was at output 0.
|
||||
* We'll also store the real vpindex/layer slot for setup use.
|
||||
* provide them, draw (vbuf) will give us the required 0 (slot -1).
|
||||
* (This means in this case we'll also use those slots in setup, which
|
||||
* isn't necessary but they'll contain the correct (0) value.)
|
||||
*/
|
||||
} else if (lpfs->info.base.input_semantic_name[i] ==
|
||||
} else if (fsInfo->input_semantic_name[i] ==
|
||||
TGSI_SEMANTIC_VIEWPORT_INDEX) {
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->viewport_index_slot = vinfo->num_attribs;
|
||||
}
|
||||
else {
|
||||
llvmpipe->fake_vpindex_slot = vinfo->num_attribs;
|
||||
}
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->layer_slot = vinfo->num_attribs;
|
||||
}
|
||||
else {
|
||||
llvmpipe->fake_layer_slot = vinfo->num_attribs;
|
||||
}
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
llvmpipe->viewport_index_slot = (int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
} else if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
|
||||
llvmpipe->layer_slot = (int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
} else {
|
||||
/*
|
||||
* Emit the requested fs attribute for all but position.
|
||||
* Note that we'd actually want to skip position (as we won't use
|
||||
* the attribute in the fs) but can't. The reason is that we don't
|
||||
* actually have a input/output map for setup (even though it looks
|
||||
* like we do...). Could adjust for this though even without a map
|
||||
* (in llvmpipe_create_fs_state()).
|
||||
*/
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -137,8 +131,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
TGSI_SEMANTIC_BCOLOR, i);
|
||||
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->bcolor_slot[i] = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
|
||||
llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -148,29 +142,29 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
TGSI_SEMANTIC_PSIZE, 0);
|
||||
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->psize_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
llvmpipe->psize_slot = (int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
}
|
||||
|
||||
/* Figure out if we need viewport index (if it wasn't already in fs input) */
|
||||
if (llvmpipe->viewport_index_slot == 0) {
|
||||
if (llvmpipe->viewport_index_slot < 0) {
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_VIEWPORT_INDEX,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->viewport_index_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
llvmpipe->viewport_index_slot =(int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Figure out if we need layer (if it wasn't already in fs input) */
|
||||
if (llvmpipe->layer_slot == 0) {
|
||||
if (llvmpipe->layer_slot < 0) {
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_LAYER,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->layer_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
llvmpipe->layer_slot = (int)vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -197,10 +191,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
|
|||
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
|
||||
}
|
||||
|
||||
if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
|
||||
LP_NEW_FS |
|
||||
if (llvmpipe->dirty & (LP_NEW_FS |
|
||||
LP_NEW_VS))
|
||||
compute_vertex_info( llvmpipe );
|
||||
compute_vertex_info(llvmpipe);
|
||||
|
||||
if (llvmpipe->dirty & (LP_NEW_FS |
|
||||
LP_NEW_FRAMEBUFFER |
|
||||
|
|
|
|||
|
|
@ -2695,34 +2695,35 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
|
|||
|
||||
switch (shader->info.base.input_interpolate[i]) {
|
||||
case TGSI_INTERPOLATE_CONSTANT:
|
||||
shader->inputs[i].interp = LP_INTERP_CONSTANT;
|
||||
break;
|
||||
shader->inputs[i].interp = LP_INTERP_CONSTANT;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_LINEAR:
|
||||
shader->inputs[i].interp = LP_INTERP_LINEAR;
|
||||
break;
|
||||
shader->inputs[i].interp = LP_INTERP_LINEAR;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_PERSPECTIVE:
|
||||
shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
|
||||
break;
|
||||
shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
|
||||
break;
|
||||
case TGSI_INTERPOLATE_COLOR:
|
||||
shader->inputs[i].interp = LP_INTERP_COLOR;
|
||||
break;
|
||||
shader->inputs[i].interp = LP_INTERP_COLOR;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (shader->info.base.input_semantic_name[i]) {
|
||||
case TGSI_SEMANTIC_FACE:
|
||||
shader->inputs[i].interp = LP_INTERP_FACING;
|
||||
break;
|
||||
shader->inputs[i].interp = LP_INTERP_FACING;
|
||||
break;
|
||||
case TGSI_SEMANTIC_POSITION:
|
||||
/* Position was already emitted above
|
||||
*/
|
||||
shader->inputs[i].interp = LP_INTERP_POSITION;
|
||||
shader->inputs[i].src_index = 0;
|
||||
continue;
|
||||
/* Position was already emitted above
|
||||
*/
|
||||
shader->inputs[i].interp = LP_INTERP_POSITION;
|
||||
shader->inputs[i].src_index = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* XXX this is a completely pointless index map... */
|
||||
shader->inputs[i].src_index = i+1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -372,9 +372,9 @@ load_attribute(struct gallivm_state *gallivm,
|
|||
/* Potentially modify it according to twoside, etc:
|
||||
*/
|
||||
if (key->twoside) {
|
||||
if (vert_attr == key->color_slot && key->bcolor_slot > 0)
|
||||
if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
|
||||
lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
|
||||
else if (vert_attr == key->spec_slot && key->bspec_slot > 0)
|
||||
else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
|
||||
lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
|
||||
}
|
||||
}
|
||||
|
|
@ -602,13 +602,6 @@ emit_tri_coef( struct gallivm_state *gallivm,
|
|||
*/
|
||||
break;
|
||||
|
||||
case LP_INTERP_ZERO:
|
||||
/*
|
||||
* The information we get from the output is bogus, replace it
|
||||
* with zero.
|
||||
*/
|
||||
emit_constant_coef4(gallivm, args, slot+1, args->bld.zero);
|
||||
break;
|
||||
case LP_INTERP_FACING:
|
||||
emit_facing_coef(gallivm, args, slot+1);
|
||||
break;
|
||||
|
|
@ -879,13 +872,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
|
|||
key->pad = 0;
|
||||
memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
|
||||
for (i = 0; i < key->num_inputs; i++) {
|
||||
if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
|
||||
if (key->inputs[i].src_index == lp->fake_vpindex_slot ||
|
||||
key->inputs[i].src_index == lp->fake_layer_slot) {
|
||||
key->inputs[i].interp = LP_INTERP_ZERO;
|
||||
}
|
||||
}
|
||||
else if (key->inputs[i].interp == LP_INTERP_COLOR) {
|
||||
if (key->inputs[i].interp == LP_INTERP_COLOR) {
|
||||
if (lp->rasterizer->flatshade)
|
||||
key->inputs[i].interp = LP_INTERP_CONSTANT;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ struct lp_setup_variant_list_item
|
|||
struct lp_setup_variant_key {
|
||||
unsigned size:16;
|
||||
unsigned num_inputs:8;
|
||||
unsigned color_slot:8;
|
||||
unsigned bcolor_slot:8;
|
||||
unsigned spec_slot:8;
|
||||
unsigned bspec_slot:8;
|
||||
int color_slot:8;
|
||||
int bcolor_slot:8;
|
||||
int spec_slot:8;
|
||||
int bspec_slot:8;
|
||||
unsigned flatshade_first:1;
|
||||
unsigned pixel_center_half:1;
|
||||
unsigned twoside:1;
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ add_blend_test(struct gallivm_state *gallivm,
|
|||
|
||||
LLVMBuildStore(builder, res, res_ptr);
|
||||
|
||||
LLVMBuildRetVoid(builder);;
|
||||
LLVMBuildRetVoid(builder);
|
||||
|
||||
gallivm_verify_function(gallivm, func);
|
||||
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ add_conv_test(struct gallivm_state *gallivm,
|
|||
LLVMBuildStore(builder, dst[i], ptr);
|
||||
}
|
||||
|
||||
LLVMBuildRetVoid(builder);;
|
||||
LLVMBuildRetVoid(builder);
|
||||
|
||||
gallivm_verify_function(gallivm, func);
|
||||
|
||||
|
|
|
|||
|
|
@ -390,6 +390,9 @@ enum SVSemantic
|
|||
SV_VERTEX_STRIDE,
|
||||
SV_INVOCATION_INFO,
|
||||
SV_THREAD_KILL,
|
||||
SV_BASEVERTEX,
|
||||
SV_BASEINSTANCE,
|
||||
SV_DRAWID,
|
||||
SV_UNDEFINED,
|
||||
SV_LAST
|
||||
};
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ struct nv50_ir_prog_info
|
|||
union {
|
||||
struct {
|
||||
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
|
||||
bool usesDrawParameters;
|
||||
} vp;
|
||||
struct {
|
||||
uint8_t inputPatchSize;
|
||||
|
|
@ -160,8 +161,9 @@ struct nv50_ir_prog_info
|
|||
uint8_t clipDistances; /* number of clip distance outputs */
|
||||
uint8_t cullDistances; /* number of cull distance outputs */
|
||||
int8_t genUserClip; /* request user clip planes for ClipVertex */
|
||||
uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
|
||||
uint16_t ucpBase; /* base address for UCPs */
|
||||
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
|
||||
uint16_t drawInfoBase; /* base address for draw parameters */
|
||||
uint8_t pointSize; /* output index for PointSize */
|
||||
uint8_t instanceId; /* system value index of InstanceID */
|
||||
uint8_t vertexId; /* system value index of VertexID */
|
||||
|
|
|
|||
|
|
@ -740,6 +740,7 @@ CodeEmitterGM107::emitF2F()
|
|||
emitCC (0x2f);
|
||||
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
|
||||
emitFMZ (0x2c, 1);
|
||||
emitField(0x29, 1, insn->subOp);
|
||||
emitRND (0x27, rnd, 0x2a);
|
||||
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
|
||||
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
|
||||
|
|
|
|||
|
|
@ -1030,7 +1030,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
|
|||
|
||||
// for 8/16 source types, the byte/word is in subOp. word 1 is
|
||||
// represented as 2.
|
||||
code[1] |= i->subOp << 0x17;
|
||||
if (!isFloatType(i->sType))
|
||||
code[1] |= i->subOp << 0x17;
|
||||
else
|
||||
code[1] |= i->subOp << 0x18;
|
||||
|
||||
if (sat)
|
||||
code[0] |= 0x20;
|
||||
|
|
|
|||
|
|
@ -319,6 +319,10 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
|||
x |= 2;
|
||||
return x;
|
||||
}
|
||||
case TGSI_OPCODE_PK2H:
|
||||
return 0x3;
|
||||
case TGSI_OPCODE_UP2H:
|
||||
return 0x1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -348,7 +352,7 @@ static nv50_ir::DataFile translateFile(uint file)
|
|||
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
|
||||
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
|
||||
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
|
||||
case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
|
||||
//case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
|
||||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_NULL:
|
||||
default:
|
||||
|
|
@ -377,6 +381,9 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
|
|||
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
|
||||
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
|
||||
case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
|
||||
case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
|
||||
case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
|
||||
case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
|
||||
default:
|
||||
assert(0);
|
||||
return nv50_ir::SV_CLOCK;
|
||||
|
|
@ -449,6 +456,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
|
|||
case TGSI_OPCODE_ATOMUMAX:
|
||||
case TGSI_OPCODE_UBFE:
|
||||
case TGSI_OPCODE_UMSB:
|
||||
case TGSI_OPCODE_UP2H:
|
||||
return nv50_ir::TYPE_U32;
|
||||
case TGSI_OPCODE_I2F:
|
||||
case TGSI_OPCODE_I2D:
|
||||
|
|
@ -513,10 +521,12 @@ nv50_ir::DataType Instruction::inferDstType() const
|
|||
case TGSI_OPCODE_DSGE:
|
||||
case TGSI_OPCODE_DSLT:
|
||||
case TGSI_OPCODE_DSNE:
|
||||
case TGSI_OPCODE_PK2H:
|
||||
return nv50_ir::TYPE_U32;
|
||||
case TGSI_OPCODE_I2F:
|
||||
case TGSI_OPCODE_U2F:
|
||||
case TGSI_OPCODE_D2F:
|
||||
case TGSI_OPCODE_UP2H:
|
||||
return nv50_ir::TYPE_F32;
|
||||
case TGSI_OPCODE_I2D:
|
||||
case TGSI_OPCODE_U2D:
|
||||
|
|
@ -861,7 +871,7 @@ bool Source::scanSource()
|
|||
clipVertexOutput = -1;
|
||||
|
||||
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
|
||||
resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
|
||||
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
|
||||
|
||||
info->immd.bufSize = 0;
|
||||
|
||||
|
|
@ -1128,6 +1138,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
case TGSI_SEMANTIC_SAMPLEPOS:
|
||||
info->prop.fp.sampleInterp = 1;
|
||||
break;
|
||||
case TGSI_SEMANTIC_BASEVERTEX:
|
||||
case TGSI_SEMANTIC_BASEINSTANCE:
|
||||
case TGSI_SEMANTIC_DRAWID:
|
||||
info->prop.vp.usesDrawParameters = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1144,6 +1159,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
}
|
||||
}
|
||||
break;
|
||||
/*
|
||||
case TGSI_FILE_RESOURCE:
|
||||
for (i = first; i <= last; ++i) {
|
||||
resources[i].target = decl->Resource.Resource;
|
||||
|
|
@ -1151,6 +1167,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
resources[i].slot = i;
|
||||
}
|
||||
break;
|
||||
*/
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
for (i = first; i <= last; ++i)
|
||||
textureViews[i].target = decl->SamplerView.Resource;
|
||||
|
|
@ -1216,11 +1233,13 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
|||
if (src.isIndirect(0))
|
||||
mainTempsInLMem = true;
|
||||
} else
|
||||
/*
|
||||
if (src.getFile() == TGSI_FILE_RESOURCE) {
|
||||
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
|
||||
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
|
||||
0x1 : 0x2;
|
||||
} else
|
||||
*/
|
||||
if (src.getFile() == TGSI_FILE_OUTPUT) {
|
||||
if (src.isIndirect(0)) {
|
||||
// We don't know which one is accessed, just mark everything for
|
||||
|
|
@ -1271,9 +1290,11 @@ Instruction::getTexture(const tgsi::Source *code, int s) const
|
|||
unsigned int r;
|
||||
|
||||
switch (getSrc(s).getFile()) {
|
||||
/*
|
||||
case TGSI_FILE_RESOURCE:
|
||||
r = getSrc(s).getIndex(0);
|
||||
return translateTexture(code->resources.at(r).target);
|
||||
*/
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
r = getSrc(s).getIndex(0);
|
||||
return translateTexture(code->textureViews.at(r).target);
|
||||
|
|
@ -1639,8 +1660,6 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
|
|||
// don't load masked inputs, won't be assigned a slot
|
||||
if (!ptr && !(info->in[idx].mask & (1 << swz)))
|
||||
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
|
||||
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
|
||||
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
|
||||
return interpolate(src, c, shiftAddress(ptr));
|
||||
} else
|
||||
if (prog->getType() == Program::TYPE_GEOMETRY) {
|
||||
|
|
@ -1681,7 +1700,7 @@ Converter::acquireDst(int d, int c)
|
|||
const int idx = dst.getIndex(0);
|
||||
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
|
||||
|
||||
if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
|
||||
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
|
||||
return NULL;
|
||||
|
||||
if (dst.isIndirect(0) ||
|
||||
|
|
@ -2799,6 +2818,21 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
|
||||
break;
|
||||
case TGSI_OPCODE_PK2H:
|
||||
val0 = getScratch();
|
||||
val1 = getScratch();
|
||||
mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
|
||||
mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
|
||||
break;
|
||||
case TGSI_OPCODE_UP2H:
|
||||
src0 = fetchSrc(0, 0);
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
|
||||
geni->subOp = c & 1;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_EMIT:
|
||||
/* export the saved viewport index */
|
||||
if (viewport != NULL) {
|
||||
|
|
@ -3252,7 +3286,7 @@ Converter::handleUserClipPlanes()
|
|||
|
||||
for (c = 0; c < 4; ++c) {
|
||||
for (i = 0; i < info->io.genUserClip; ++i) {
|
||||
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
|
||||
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
|
||||
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
|
||||
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
|
||||
if (c == 0)
|
||||
|
|
|
|||
|
|
@ -1576,6 +1576,17 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
|
|||
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
|
||||
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
|
||||
break;
|
||||
case SV_BASEVERTEX:
|
||||
case SV_BASEINSTANCE:
|
||||
case SV_DRAWID:
|
||||
ld = bld.mkLoad(TYPE_U32, i->getDef(0),
|
||||
bld.mkSymbol(FILE_MEMORY_CONST,
|
||||
prog->driver->io.auxCBSlot,
|
||||
TYPE_U32,
|
||||
prog->driver->io.drawInfoBase +
|
||||
4 * (sv - SV_BASEVERTEX)),
|
||||
NULL);
|
||||
break;
|
||||
default:
|
||||
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
|
||||
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
|
||||
|
|
|
|||
|
|
@ -676,23 +676,22 @@ ConstantFolding::expr(Instruction *i,
|
|||
switch (i->op) {
|
||||
case OP_MAD:
|
||||
case OP_FMA: {
|
||||
i->op = OP_ADD;
|
||||
ImmediateValue src0, src1 = *i->getSrc(0)->asImm();
|
||||
|
||||
/* Move the immediate to the second arg, otherwise the ADD operation
|
||||
* won't be emittable
|
||||
*/
|
||||
i->setSrc(1, i->getSrc(0));
|
||||
// Move the immediate into position 1, where we know it might be
|
||||
// emittable. However it might not be anyways, as there may be other
|
||||
// restrictions, so move it into a separate LValue.
|
||||
bld.setPosition(i, false);
|
||||
i->op = OP_ADD;
|
||||
i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0));
|
||||
i->setSrc(0, i->getSrc(2));
|
||||
i->src(0).mod = i->src(2).mod;
|
||||
i->setSrc(2, NULL);
|
||||
|
||||
ImmediateValue src0;
|
||||
if (i->src(0).getImmediate(src0))
|
||||
expr(i, src0, *i->getSrc(1)->asImm());
|
||||
if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
|
||||
bld.setPosition(i, false);
|
||||
i->setSrc(1, bld.loadImm(NULL, res.data.u32));
|
||||
}
|
||||
expr(i, src0, src1);
|
||||
else
|
||||
opnd(i, src1, 1);
|
||||
break;
|
||||
}
|
||||
case OP_PFETCH:
|
||||
|
|
|
|||
|
|
@ -295,6 +295,9 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
|
|||
case SV_SAMPLE_INDEX: return 0;
|
||||
case SV_SAMPLE_POS: return 0;
|
||||
case SV_SAMPLE_MASK: return 0;
|
||||
case SV_BASEVERTEX: return 0;
|
||||
case SV_BASEINSTANCE: return 0;
|
||||
case SV_DRAWID: return 0;
|
||||
default:
|
||||
return 0xffffffff;
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue