Merge ../mesa into vulkan

This commit is contained in:
Kristian Høgsberg Kristensen 2016-01-08 22:16:43 -08:00
commit f0993f81c7
316 changed files with 6879 additions and 3530 deletions

View file

@ -396,6 +396,61 @@ fi
AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)
dnl Check for Endianness
AC_C_BIGENDIAN(
little_endian=no,
little_endian=yes,
little_endian=no,
little_endian=no
)
dnl Check for POWER8 Architecture
PWR8_CFLAGS="-mpower8-vector"
have_pwr8_intrinsics=no
AC_MSG_CHECKING(whether gcc supports -mpower8-vector)
save_CFLAGS=$CFLAGS
CFLAGS="$PWR8_CFLAGS $CFLAGS"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))
#error "Need GCC >= 4.8 for sane POWER8 support"
#endif
#include <altivec.h>
int main () {
vector unsigned char r;
vector unsigned int v = vec_splat_u32 (1);
r = __builtin_vec_vgbbd ((vector unsigned char) v);
return 0;
}]])], have_pwr8_intrinsics=yes)
CFLAGS=$save_CFLAGS
AC_ARG_ENABLE(pwr8,
[AC_HELP_STRING([--disable-pwr8-inst],
[disable POWER8-specific instructions])],
[enable_pwr8=$enableval], [enable_pwr8=auto])
if test "x$enable_pwr8" = xno ; then
have_pwr8_intrinsics=disabled
fi
if test $have_pwr8_intrinsics = yes && test $little_endian = yes ; then
DEFINES="$DEFINES -D_ARCH_PWR8"
CXXFLAGS="$CXXFLAGS $PWR8_CFLAGS"
CFLAGS="$CFLAGS $PWR8_CFLAGS"
else
PWR8_CFLAGS=
fi
AC_MSG_RESULT($have_pwr8_intrinsics)
if test "x$enable_pwr8" = xyes && test $have_pwr8_intrinsics = no ; then
AC_MSG_ERROR([POWER8 compiler support not detected])
fi
if test $have_pwr8_intrinsics = yes && test $little_endian = no ; then
AC_MSG_WARN([POWER8 optimization is enabled only on POWER8 Little-Endian])
fi
AC_SUBST([PWR8_CFLAGS], $PWR8_CFLAGS)
dnl Can't have static and shared libraries, default to static if user
dnl explicitly requested. If both disabled, set to static since shared
dnl was explicitly requested.
@ -421,8 +476,29 @@ AC_ARG_ENABLE([debug],
[enable_debug="$enableval"],
[enable_debug=no]
)
AC_ARG_ENABLE([profile],
[AS_HELP_STRING([--enable-profile],
[enable profiling of code @<:@default=disabled@:>@])],
[enable_profile="$enableval"],
[enable_profile=no]
)
if test "x$enable_profile" = xyes; then
DEFINES="$DEFINES -DPROFILE"
if test "x$GCC" = xyes; then
CFLAGS="$CFLAGS -fno-omit-frame-pointer"
fi
if test "x$GXX" = xyes; then
CXXFLAGS="$CXXFLAGS -fno-omit-frame-pointer"
fi
fi
if test "x$enable_debug" = xyes; then
DEFINES="$DEFINES -DDEBUG"
if test "x$enable_profile" = xyes; then
AC_MSG_WARN([Debug and Profile are enabled at the same time])
fi
if test "x$GCC" = xyes; then
if ! echo "$CFLAGS" | grep -q -e '-g'; then
CFLAGS="$CFLAGS -g"

View file

@ -90,6 +90,7 @@
<li><a href="http://www.opengl.org" target="_parent">OpenGL website</a>
<li><a href="http://dri.freedesktop.org" target="_parent">DRI website</a>
<li><a href="http://www.freedesktop.org" target="_parent">freedesktop.org</a>
<li><a href="http://planet.freedesktop.org" target="_parent">Developer blogs</a>
</ul>
<b>Hosted by:</b>

View file

@ -47,6 +47,8 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_base_instance on freedreno/a4xx</li>
<li>GL_ARB_compute_shader on i965</li>
<li>GL_ARB_copy_image on r600</li>
<li>GL_ARB_indirect_parameters on nvc0</li>
<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>

View file

@ -132,6 +132,28 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3")
CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3")
CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4")
CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4")
CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4")
CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")

View file

@ -938,7 +938,7 @@ draw_aaline_prepare_outputs(struct draw_context *draw,
const struct pipe_rasterizer_state *rast = draw->rasterizer;
/* update vertex attrib info */
aaline->pos_slot = draw_current_shader_position_output(draw);;
aaline->pos_slot = draw_current_shader_position_output(draw);
if (!rast->line_smooth)
return;

View file

@ -611,6 +611,8 @@ do_clip_line(struct draw_stage *stage,
struct prim_header newprim;
int viewport_index;
newprim.flags = header->flags;
if (stage->draw->rasterizer->flatshade_first) {
prov_vertex = v0;
}

View file

@ -86,27 +86,33 @@ inject_front_face_info(struct draw_stage *stage,
}
static void point( struct draw_stage *stage,
struct vertex_header *v0 )
static void point(struct draw_stage *stage,
struct prim_header *header,
struct vertex_header *v0)
{
struct prim_header tmp;
tmp.det = header->det;
tmp.flags = 0;
tmp.v[0] = v0;
stage->next->point( stage->next, &tmp );
stage->next->point(stage->next, &tmp);
}
static void line( struct draw_stage *stage,
struct vertex_header *v0,
struct vertex_header *v1 )
static void line(struct draw_stage *stage,
struct prim_header *header,
struct vertex_header *v0,
struct vertex_header *v1)
{
struct prim_header tmp;
tmp.det = header->det;
tmp.flags = 0;
tmp.v[0] = v0;
tmp.v[1] = v1;
stage->next->line( stage->next, &tmp );
stage->next->line(stage->next, &tmp);
}
static void points( struct draw_stage *stage,
struct prim_header *header )
static void points(struct draw_stage *stage,
struct prim_header *header)
{
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
@ -114,27 +120,41 @@ static void points( struct draw_stage *stage,
inject_front_face_info(stage, header);
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 );
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 );
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 );
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
point(stage, header, v0);
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
point(stage, header, v1);
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
point(stage, header, v2);
}
static void lines( struct draw_stage *stage,
struct prim_header *header )
static void lines(struct draw_stage *stage,
struct prim_header *header)
{
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
struct vertex_header *v2 = header->v[2];
if (header->flags & DRAW_PIPE_RESET_STIPPLE)
stage->next->reset_stipple_counter( stage->next );
/*
* XXX could revisit this. The only stage which cares is the line
* stipple stage. Could just emit correct reset flags here and not
* bother about all the calling through reset_stipple_counter
* stages. Though technically it is necessary if line stipple is
* handled by the driver, but this is not actually hooked up when
* using vbuf (vbuf stage reset_stipple_counter does nothing).
*/
stage->next->reset_stipple_counter(stage->next);
inject_front_face_info(stage, header);
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 );
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 );
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 );
if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
line(stage, header, v2, v0);
if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
line(stage, header, v0, v1);
if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
line(stage, header, v1, v2);
}

View file

@ -74,9 +74,10 @@ struct vbuf_stage {
unsigned max_indices;
unsigned nr_indices;
/* Cache point size somewhere it's address won't change:
/* Cache point size somewhere its address won't change:
*/
float point_size;
float zero4[4];
struct translate_cache *cache;
};
@ -205,6 +206,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
struct translate_key hw_key;
unsigned dst_offset;
unsigned i;
const struct vertex_info *vinfo;
vbuf->render->set_primitive(vbuf->render, prim);
@ -215,27 +217,33 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
* state change.
*/
vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);
vinfo = vbuf->vinfo;
vbuf->vertex_size = vinfo->size * sizeof(float);
/* Translate from pipeline vertices to hw vertices.
*/
dst_offset = 0;
for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
for (i = 0; i < vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
unsigned src_buffer = 0;
enum pipe_format output_format;
unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit);
emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit);
output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
/* doesn't handle EMIT_OMIT */
assert(emit_sz != 0);
if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
src_buffer = 1;
src_offset = 0;
if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
src_buffer = 1;
src_offset = 0;
}
else if (vinfo->attrib[i].src_index == DRAW_ATTR_NONEXIST) {
/* elements which don't exist will get assigned zeros */
src_buffer = 2;
src_offset = 0;
}
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@ -249,7 +257,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
dst_offset += emit_sz;
}
hw_key.nr_elements = vbuf->vinfo->num_attribs;
hw_key.nr_elements = vinfo->num_attribs;
hw_key.output_stride = vbuf->vertex_size;
/* Don't bother with caching at this stage:
@ -261,6 +269,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0);
vbuf->translate->set_buffer(vbuf->translate, 2, &vbuf->zero4[0], 0, ~0);
}
vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
@ -428,7 +437,7 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
if (!vbuf)
goto fail;
vbuf->stage.draw = draw;
vbuf->stage.name = "vbuf";
vbuf->stage.point = vbuf_first_point;
@ -437,29 +446,30 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
vbuf->stage.flush = vbuf_flush;
vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
vbuf->stage.destroy = vbuf_destroy;
vbuf->render = render;
vbuf->max_indices = MIN2(render->max_indices, UNDEFINED_VERTEX_ID-1);
vbuf->indices = (ushort *) align_malloc( vbuf->max_indices *
sizeof(vbuf->indices[0]),
16 );
vbuf->indices = (ushort *) align_malloc(vbuf->max_indices *
sizeof(vbuf->indices[0]),
16);
if (!vbuf->indices)
goto fail;
vbuf->cache = translate_cache_create();
if (!vbuf->cache)
if (!vbuf->cache)
goto fail;
vbuf->vertices = NULL;
vbuf->vertex_ptr = vbuf->vertices;
vbuf->zero4[0] = vbuf->zero4[1] = vbuf->zero4[2] = vbuf->zero4[3] = 0.0f;
return &vbuf->stage;
fail:
fail:
if (vbuf)
vbuf_destroy(&vbuf->stage);
return NULL;
}

View file

@ -524,7 +524,7 @@ draw_vbo(struct draw_context *draw,
#endif
{
if (index_limit == 0) {
/* one of the buffers is too small to do any valid drawing */
/* one of the buffers is too small to do any valid drawing */
debug_warning("draw: VBO too small to draw anything\n");
util_fpstate_set(fpstate);
return;

View file

@ -44,6 +44,9 @@ struct pt_emit {
unsigned prim;
const struct vertex_info *vinfo;
float zero4[4];
};
@ -92,6 +95,11 @@ draw_pt_emit_prepare(struct pt_emit *emit,
src_buffer = 1;
src_offset = 0;
}
else if (vinfo->attrib[i].src_index == DRAW_ATTR_NONEXIST) {
/* elements which don't exist will get assigned zeros */
src_buffer = 2;
src_offset = 0;
}
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
@ -111,6 +119,8 @@ draw_pt_emit_prepare(struct pt_emit *emit,
translate_key_compare(&emit->translate->key, &hw_key) != 0) {
translate_key_sanitize(&hw_key);
emit->translate = translate_cache_find(emit->cache, &hw_key);
emit->translate->set_buffer(emit->translate, 2, &emit->zero4[0], 0, ~0);
}
if (!vinfo->size)
@ -287,6 +297,8 @@ draw_pt_emit_create(struct draw_context *draw)
return NULL;
}
emit->zero4[0] = emit->zero4[1] = emit->zero4[2] = emit->zero4[3] = 0.0f;
return emit;
}

View file

@ -44,6 +44,7 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
#define DRAW_ATTR_NONEXIST 255
/**
* Vertex attribute emit modes
@ -60,18 +61,6 @@ enum attrib_emit {
};
/**
* Attribute interpolation mode
*/
enum interp_mode {
INTERP_NONE, /**< never interpolate vertex header info */
INTERP_POS, /**< special case for frag position */
INTERP_CONSTANT,
INTERP_LINEAR,
INTERP_PERSPECTIVE
};
/**
* Information about hardware/rasterization vertex layout.
*/
@ -85,8 +74,7 @@ struct vertex_info
* memcmp() comparisons.
*/
struct {
unsigned interp_mode:4; /**< INTERP_x */
unsigned emit:4; /**< EMIT_x */
unsigned emit:8; /**< EMIT_x */
unsigned src_index:8; /**< map to post-xform attribs */
} attrib[PIPE_MAX_SHADER_OUTPUTS];
};
@ -124,20 +112,18 @@ draw_vinfo_copy( struct vertex_info *dst,
static inline uint
draw_emit_vertex_attr(struct vertex_info *vinfo,
enum attrib_emit emit,
enum interp_mode interp, /* only used by softpipe??? */
int src_index)
{
const uint n = vinfo->num_attribs;
/* If the src_index is negative, meaning it hasn't been found
* lets just redirect it to the first output slot */
* we'll assign it all zeros later - set to DRAW_ATTR_NONEXIST */
if (src_index < 0) {
src_index = 0;
src_index = DRAW_ATTR_NONEXIST;
}
assert(n < Elements(vinfo->attrib));
vinfo->attrib[n].emit = emit;
vinfo->attrib[n].interp_mode = interp;
vinfo->attrib[n].src_index = src_index;
vinfo->num_attribs++;
return n;

View file

@ -458,7 +458,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
{
/* Special case 4x4f --> 1x16ub */
if (src_type.length == 4 &&
util_cpu_caps.has_sse2)
(util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
{
num_dsts = (num_srcs + 3) / 4;
dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4;
@ -545,7 +545,7 @@ lp_build_conv(struct gallivm_state *gallivm,
((dst_type.length == 16 && 4 * num_dsts == num_srcs) ||
(num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) &&
util_cpu_caps.has_sse2)
(util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
{
struct lp_build_context bld;
struct lp_type int16_type, int32_type;

View file

@ -136,6 +136,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;

View file

@ -720,7 +720,7 @@ lp_build_transpose_aos_n(struct gallivm_state *gallivm,
default:
assert(0);
};
}
}

View file

@ -1197,7 +1197,7 @@ get_soa_array_offsets(struct lp_build_context *uint_bld,
if (need_perelement_offset) {
LLVMValueRef pixel_offsets;
int i;
unsigned i;
/* build pixel offset vector: {0, 1, 2, 3, ...} */
pixel_offsets = uint_bld->undef;
for (i = 0; i < uint_bld->type.length; i++) {
@ -1809,7 +1809,7 @@ emit_store_double_chan(struct lp_build_tgsi_context *bld_base,
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *float_bld = &bld_base->base;
int i;
unsigned i;
LLVMValueRef temp, temp2;
LLVMValueRef shuffles[8];
LLVMValueRef shuffles2[8];
@ -2713,7 +2713,7 @@ static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
int pc)
{
int i;
unsigned i;
for (i = 0; i < 5; i++) {
unsigned opcode;

View file

@ -431,7 +431,7 @@ hud_alloc_vertices(struct hud_context *hud, struct vertex_queue *v,
v->max_num_vertices = num_vertices;
v->vbuf.stride = stride;
u_upload_alloc(hud->uploader, 0, v->vbuf.stride * v->max_num_vertices,
&v->vbuf.buffer_offset, &v->vbuf.buffer,
16, &v->vbuf.buffer_offset, &v->vbuf.buffer,
(void**)&v->vertices);
}
@ -1176,8 +1176,8 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
hud->pipe = pipe;
hud->cso = cso;
hud->uploader = u_upload_create(pipe, 256 * 1024, 16,
PIPE_BIND_VERTEX_BUFFER);
hud->uploader = u_upload_create(pipe, 256 * 1024,
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM);
/* font */
if (!util_font_create(pipe, UTIL_FONT_FIXED_8X13, &hud->font)) {

View file

@ -153,10 +153,11 @@ util_primconvert_draw_vbo(struct primconvert_context *pc,
}
if (!pc->upload) {
pc->upload = u_upload_create(pc->pipe, 4096, 4, PIPE_BIND_INDEX_BUFFER);
pc->upload = u_upload_create(pc->pipe, 4096, PIPE_BIND_INDEX_BUFFER,
PIPE_USAGE_STREAM);
}
u_upload_alloc(pc->upload, 0, new_ib.index_size * new_info.count,
u_upload_alloc(pc->upload, 0, new_ib.index_size * new_info.count, 4,
&new_ib.offset, &new_ib.buffer, &dst);
if (info->indexed) {

View file

@ -1950,7 +1950,7 @@ tgsi_processor_to_shader_stage(unsigned processor)
case TGSI_PROCESSOR_COMPUTE: return MESA_SHADER_COMPUTE;
default:
unreachable("invalid TGSI processor");
};
}
}
struct nir_shader *

View file

@ -259,36 +259,39 @@ tgsi_build_declaration_semantic(
return ds;
}
static struct tgsi_declaration_resource
tgsi_default_declaration_resource(void)
static struct tgsi_declaration_image
tgsi_default_declaration_image(void)
{
struct tgsi_declaration_resource dr;
struct tgsi_declaration_image di;
dr.Resource = TGSI_TEXTURE_BUFFER;
dr.Raw = 0;
dr.Writable = 0;
dr.Padding = 0;
di.Resource = TGSI_TEXTURE_BUFFER;
di.Raw = 0;
di.Writable = 0;
di.Format = 0;
di.Padding = 0;
return dr;
return di;
}
static struct tgsi_declaration_resource
tgsi_build_declaration_resource(unsigned texture,
unsigned raw,
unsigned writable,
struct tgsi_declaration *declaration,
struct tgsi_header *header)
static struct tgsi_declaration_image
tgsi_build_declaration_image(unsigned texture,
unsigned format,
unsigned raw,
unsigned writable,
struct tgsi_declaration *declaration,
struct tgsi_header *header)
{
struct tgsi_declaration_resource dr;
struct tgsi_declaration_image di;
dr = tgsi_default_declaration_resource();
dr.Resource = texture;
dr.Raw = raw;
dr.Writable = writable;
di = tgsi_default_declaration_image();
di.Resource = texture;
di.Format = format;
di.Raw = raw;
di.Writable = writable;
declaration_grow(declaration, header);
return dr;
return di;
}
static struct tgsi_declaration_sampler_view
@ -364,7 +367,7 @@ tgsi_default_full_declaration( void )
full_declaration.Range = tgsi_default_declaration_range();
full_declaration.Semantic = tgsi_default_declaration_semantic();
full_declaration.Interp = tgsi_default_declaration_interp();
full_declaration.Resource = tgsi_default_declaration_resource();
full_declaration.Image = tgsi_default_declaration_image();
full_declaration.SamplerView = tgsi_default_declaration_sampler_view();
full_declaration.Array = tgsi_default_declaration_array();
@ -454,20 +457,21 @@ tgsi_build_full_declaration(
header );
}
if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) {
struct tgsi_declaration_resource *dr;
if (full_decl->Declaration.File == TGSI_FILE_IMAGE) {
struct tgsi_declaration_image *di;
if (maxsize <= size) {
return 0;
}
dr = (struct tgsi_declaration_resource *)&tokens[size];
di = (struct tgsi_declaration_image *)&tokens[size];
size++;
*dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
full_decl->Resource.Raw,
full_decl->Resource.Writable,
declaration,
header);
*di = tgsi_build_declaration_image(full_decl->Image.Resource,
full_decl->Image.Format,
full_decl->Image.Raw,
full_decl->Image.Writable,
declaration,
header);
}
if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
@ -616,7 +620,8 @@ tgsi_default_instruction( void )
instruction.NumSrcRegs = 1;
instruction.Label = 0;
instruction.Texture = 0;
instruction.Padding = 0;
instruction.Memory = 0;
instruction.Padding = 0;
return instruction;
}
@ -762,6 +767,34 @@ tgsi_build_instruction_texture(
return instruction_texture;
}
static struct tgsi_instruction_memory
tgsi_default_instruction_memory( void )
{
struct tgsi_instruction_memory instruction_memory;
instruction_memory.Qualifier = 0;
instruction_memory.Padding = 0;
return instruction_memory;
}
static struct tgsi_instruction_memory
tgsi_build_instruction_memory(
unsigned qualifier,
struct tgsi_token *prev_token,
struct tgsi_instruction *instruction,
struct tgsi_header *header )
{
struct tgsi_instruction_memory instruction_memory;
instruction_memory.Qualifier = qualifier;
instruction_memory.Padding = 0;
instruction->Memory = 1;
instruction_grow( instruction, header );
return instruction_memory;
}
static struct tgsi_texture_offset
tgsi_default_texture_offset( void )
@ -1008,6 +1041,7 @@ tgsi_default_full_instruction( void )
full_instruction.Predicate = tgsi_default_instruction_predicate();
full_instruction.Label = tgsi_default_instruction_label();
full_instruction.Texture = tgsi_default_instruction_texture();
full_instruction.Memory = tgsi_default_instruction_memory();
for( i = 0; i < TGSI_FULL_MAX_TEX_OFFSETS; i++ ) {
full_instruction.TexOffsets[i] = tgsi_default_texture_offset();
}
@ -1119,6 +1153,24 @@ tgsi_build_full_instruction(
prev_token = (struct tgsi_token *) texture_offset;
}
}
if (full_inst->Instruction.Memory) {
struct tgsi_instruction_memory *instruction_memory;
if( maxsize <= size )
return 0;
instruction_memory =
(struct tgsi_instruction_memory *) &tokens[size];
size++;
*instruction_memory = tgsi_build_instruction_memory(
full_inst->Memory.Qualifier,
prev_token,
instruction,
header );
prev_token = (struct tgsi_token *) instruction_memory;
}
for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
const struct tgsi_full_dst_register *reg = &full_inst->Dst[i];
struct tgsi_dst_register *dst_register;

View file

@ -348,15 +348,22 @@ iter_declaration(
}
}
if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
if (decl->Declaration.File == TGSI_FILE_IMAGE) {
TXT(", ");
ENM(decl->Resource.Resource, tgsi_texture_names);
if (decl->Resource.Writable)
ENM(decl->Image.Resource, tgsi_texture_names);
TXT(", ");
UID(decl->Image.Format);
if (decl->Image.Writable)
TXT(", WR");
if (decl->Resource.Raw)
if (decl->Image.Raw)
TXT(", RAW");
}
if (decl->Declaration.File == TGSI_FILE_BUFFER) {
if (decl->Declaration.Atomic)
TXT(", ATOMIC");
}
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
TXT(", ");
ENM(decl->SamplerView.Resource, tgsi_texture_names);
@ -617,6 +624,16 @@ iter_instruction(
}
}
if (inst->Instruction.Memory) {
uint32_t qualifier = inst->Memory.Qualifier;
while (qualifier) {
int bit = ffs(qualifier) - 1;
qualifier &= ~(1U << bit);
TXT(", ");
ENM(bit, tgsi_memory_names);
}
}
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:

View file

@ -473,6 +473,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;

View file

@ -37,231 +37,231 @@
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{
{ 1, 1, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
{ 1, 1, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
{ 1, 1, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
{ 1, 1, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
{ 1, 1, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
{ 1, 1, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
{ 1, 1, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
{ 1, 2, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
{ 1, 2, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
{ 1, 2, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
{ 1, 2, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
{ 1, 2, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
{ 1, 2, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
{ 1, 2, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
{ 1, 2, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
{ 1, 2, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
{ 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
{ 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
{ 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
{ 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
{ 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
{ 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
{ 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
{ 1, 1, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
{ 1, 3, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
{ 1, 1, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
{ 1, 1, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
{ 1, 1, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
{ 1, 1, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
{ 1, 2, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
{ 1, 2, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
{ 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
{ 1, 1, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
{ 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
{ 1, 2, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
{ 1, 1, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
{ 1, 1, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
{ 1, 1, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
{ 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
{ 1, 1, 0, 0, 0, 0, COMP, "PK2H", TGSI_OPCODE_PK2H },
{ 1, 1, 0, 0, 0, 0, COMP, "PK2US", TGSI_OPCODE_PK2US },
{ 1, 1, 0, 0, 0, 0, COMP, "PK4B", TGSI_OPCODE_PK4B },
{ 1, 1, 0, 0, 0, 0, COMP, "PK4UB", TGSI_OPCODE_PK4UB },
{ 0, 1, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
{ 1, 2, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
{ 0, 1, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
{ 1, 2, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
{ 1, 1, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
{ 1, 2, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
{ 1, 2, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
{ 0, 1, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
{ 1, 2, 1, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
{ 1, 4, 1, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
{ 1, 2, 1, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
{ 1, 1, 0, 0, 0, 0, COMP, "UP2H", TGSI_OPCODE_UP2H },
{ 1, 1, 0, 0, 0, 0, COMP, "UP2US", TGSI_OPCODE_UP2US },
{ 1, 1, 0, 0, 0, 0, COMP, "UP4B", TGSI_OPCODE_UP4B },
{ 1, 1, 0, 0, 0, 0, COMP, "UP4UB", TGSI_OPCODE_UP4UB },
{ 0, 1, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
{ 0, 1, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
{ 1, 1, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
{ 0, 1, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
{ 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
{ 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
{ 1, 1, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
{ 1, 3, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
{ 1, 1, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
{ 1, 2, 1, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
{ 0, 1, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
{ 1, 2, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
{ 1, 2, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
{ 1, 2, 1, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
{ 0, 1, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
{ 0, 1, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
{ 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
{ 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
{ 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
{ 0, 1, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
{ 1, 1, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
{ 1, 1, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
{ 1, 1, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
{ 1, 2, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
{ 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
{ 1, 2, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
{ 1, 2, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
{ 1, 2, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
{ 1, 2, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
{ 1, 3, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
{ 1, 2, 1, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
{ 1, 2, 1, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
{ 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
{ 0, 1, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
{ 0, 1, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
{ 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
{ 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
{ 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
{ 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
{ 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
{ 0, 0, 0, 0, 0, 0, NONE, "", 105 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
{ 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
{ 1, 2, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
{ 1, 2, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
{ 0, 1, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
{ 0, 1, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
{ 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
{ 1, 3, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
{ 1, 1, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
{ 1, 2, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
{ 1, 2, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
{ 1, 2, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
{ 1, 1, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
{ 1, 2, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
{ 1, 2, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
{ 1, 2, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
{ 1, 1, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
{ 1, 1, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
{ 1, 2, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
{ 1, 2, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
{ 1, 3, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
{ 1, 2, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
{ 1, 2, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
{ 1, 2, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
{ 1, 2, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
{ 1, 2, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
{ 1, 2, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
{ 1, 2, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
{ 1, 2, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
{ 1, 2, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
{ 0, 1, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
{ 0, 1, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
{ 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
{ 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
{ 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
{ 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 }, /* removed */
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 }, /* removed */
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 }, /* removed */
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 }, /* removed */
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 }, /* removed */
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 }, /* removed */
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 }, /* removed */
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
{ 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 }, /* removed */
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 }, /* removed */
{ 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 69 }, /* removed */
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
{ 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 }, /* removed */
{ 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 }, /* removed */
{ 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
{ 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
{ 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
{ 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
{ 1, 1, 0, 0, 0, 0, 0, NONE, "RESQ", TGSI_OPCODE_RESQ },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
{ 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
{ 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
{ 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
{ 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
{ 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
{ 1, 3, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
{ 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
{ 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
{ 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
{ 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
{ 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
{ 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
{ 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
{ 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
{ 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
{ 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
{ 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
{ 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
{ 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
{ 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
{ 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
{ 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
{ 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
{ 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
{ 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
{ 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
{ 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
{ 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
{ 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
{ 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
{ 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
{ 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
{ 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
{ 1, 2, 1, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
{ 1, 3, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
{ 1, 3, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
{ 1, 4, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
{ 1, 1, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
{ 1, 1, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
{ 1, 1, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
{ 1, 1, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
{ 1, 1, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
{ 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
{ 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
{ 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
{ 1, 1, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
{ 1, 1, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
{ 1, 1, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
{ 1, 1, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
{ 1, 2, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
{ 1, 2, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
{ 1, 2, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
{ 1, 2, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
{ 1, 2, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
{ 1, 2, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
{ 1, 2, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
{ 1, 2, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
{ 1, 1, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
{ 1, 1, 0, 0 ,0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
{ 1, 3, 0, 0 ,0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
{ 1, 1, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
{ 1, 2, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
{ 2, 1, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
{ 1, 1, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
{ 1, 1, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
{ 1, 1, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
{ 1, 1, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
{ 1, 1, 0, 0 ,0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
{ 1, 1, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
{ 1, 1, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
{ 1, 1, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
{ 1, 1, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
{ 1, 1, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
{ 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
{ 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
{ 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
{ 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
{ 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
{ 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
{ 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
};
const struct tgsi_opcode_info *

View file

@ -74,6 +74,7 @@ struct tgsi_opcode_info
unsigned num_dst:3;
unsigned num_src:3;
unsigned is_tex:1;
unsigned is_store:1;
unsigned is_branch:1;
int pre_dedent:2;
int post_indent:2;

View file

@ -121,8 +121,8 @@ tgsi_parse_token(
next_token( ctx, &decl->Semantic );
}
if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
next_token(ctx, &decl->Resource);
if (decl->Declaration.File == TGSI_FILE_IMAGE) {
next_token(ctx, &decl->Image);
}
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
@ -195,6 +195,10 @@ tgsi_parse_token(
}
}
if (inst->Instruction.Memory) {
next_token(ctx, &inst->Memory);
}
assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {

View file

@ -64,7 +64,7 @@ struct tgsi_full_declaration
struct tgsi_declaration_dimension Dim;
struct tgsi_declaration_interp Interp;
struct tgsi_declaration_semantic Semantic;
struct tgsi_declaration_resource Resource;
struct tgsi_declaration_image Image;
struct tgsi_declaration_sampler_view SamplerView;
struct tgsi_declaration_array Array;
};
@ -91,6 +91,7 @@ struct tgsi_full_instruction
struct tgsi_instruction_predicate Predicate;
struct tgsi_instruction_label Label;
struct tgsi_instruction_texture Texture;
struct tgsi_instruction_memory Memory;
struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS];
struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS];
struct tgsi_texture_offset TexOffsets[TGSI_FULL_MAX_TEX_OFFSETS];

View file

@ -187,13 +187,28 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
if (procType == TGSI_PROCESSOR_FRAGMENT &&
info->reads_position &&
src->Register.Index == 0 &&
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
src->Register.SwizzleW == TGSI_SWIZZLE_Z)) {
info->reads_z = TRUE;
!src->Register.Indirect) {
unsigned name =
info->input_semantic_name[src->Register.Index];
unsigned index =
info->input_semantic_index[src->Register.Index];
if (name == TGSI_SEMANTIC_POSITION &&
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
src->Register.SwizzleW == TGSI_SWIZZLE_Z))
info->reads_z = TRUE;
if (name == TGSI_SEMANTIC_COLOR) {
unsigned mask =
(1 << src->Register.SwizzleX) |
(1 << src->Register.SwizzleY) |
(1 << src->Register.SwizzleZ) |
(1 << src->Register.SwizzleW);
info->colors_read |= mask << (index * 4);
}
}
}
@ -358,7 +373,10 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->uses_primid = TRUE;
} else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
info->uses_invocationid = TRUE;
}
} else if (semName == TGSI_SEMANTIC_POSITION)
info->reads_position = TRUE;
else if (semName == TGSI_SEMANTIC_FACE)
info->uses_frontface = TRUE;
}
else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte) semName;
@ -392,6 +410,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}
else if (semName == TGSI_SEMANTIC_STENCIL) {
info->writes_stencil = TRUE;
} else if (semName == TGSI_SEMANTIC_SAMPLEMASK) {
info->writes_samplemask = TRUE;
}
}

View file

@ -77,11 +77,13 @@ struct tgsi_shader_info
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
ubyte colors_read; /**< which color components are read by the FS */
ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_samplemask; /**< does fragment shader write sample mask? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KILL or KILL_IF instruction used? */
boolean uses_persp_center;

View file

@ -54,8 +54,9 @@ static const char *tgsi_file_names[] =
"IMM",
"PRED",
"SV",
"RES",
"SVIEW"
"IMAGE",
"SVIEW",
"BUFFER",
};
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
@ -96,6 +97,8 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
"TESSINNER",
"VERTICESIN",
"HELPER_INVOCATION",
"BASEINSTANCE",
"DRAWID",
};
const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
@ -205,6 +208,13 @@ const char *tgsi_immediate_type_names[4] =
"FLT64"
};
const char *tgsi_memory_names[3] =
{
"COHERENT",
"RESTRICT",
"VOLATILE",
};
static inline void
tgsi_strings_check(void)

View file

@ -60,6 +60,8 @@ extern const char *tgsi_fs_coord_pixel_center_names[2];
extern const char *tgsi_immediate_type_names[4];
extern const char *tgsi_memory_names[3];
const char *
tgsi_file_name(unsigned file);

View file

@ -1039,6 +1039,12 @@ parse_instruction(
inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN;
}
if ((i >= TGSI_OPCODE_LOAD && i <= TGSI_OPCODE_ATOMIMAX) ||
i == TGSI_OPCODE_RESQ) {
inst.Instruction.Memory = 1;
inst.Memory.Qualifier = 0;
}
/* Parse instruction operands.
*/
for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {
@ -1090,6 +1096,27 @@ parse_instruction(
}
inst.Texture.NumOffsets = i;
cur = ctx->cur;
eat_opt_white(&cur);
for (i = 0; inst.Instruction.Memory && *cur == ','; i++) {
uint j;
cur++;
eat_opt_white(&cur);
ctx->cur = cur;
for (j = 0; j < 3; j++) {
if (str_match_nocase_whole(&ctx->cur, tgsi_memory_names[j])) {
inst.Memory.Qualifier |= 1U << j;
break;
}
}
if (j == 3) {
report_error(ctx, "Expected memory qualifier");
return FALSE;
}
cur = ctx->cur;
eat_opt_white(&cur);
}
cur = ctx->cur;
eat_opt_white( &cur );
if (info->is_branch && *cur == ':') {
@ -1251,10 +1278,10 @@ static boolean parse_declaration( struct translate_ctx *ctx )
cur++;
eat_opt_white( &cur );
if (file == TGSI_FILE_RESOURCE) {
if (file == TGSI_FILE_IMAGE) {
for (i = 0; i < TGSI_TEXTURE_COUNT; i++) {
if (str_match_nocase_whole(&cur, tgsi_texture_names[i])) {
decl.Resource.Resource = i;
decl.Image.Resource = i;
break;
}
}
@ -1263,16 +1290,18 @@ static boolean parse_declaration( struct translate_ctx *ctx )
return FALSE;
}
/* XXX format */
cur2 = cur;
eat_opt_white(&cur2);
while (*cur2 == ',') {
cur2++;
eat_opt_white(&cur2);
if (str_match_nocase_whole(&cur2, "RAW")) {
decl.Resource.Raw = 1;
decl.Image.Raw = 1;
} else if (str_match_nocase_whole(&cur2, "WR")) {
decl.Resource.Writable = 1;
decl.Image.Writable = 1;
} else {
break;
@ -1348,6 +1377,11 @@ static boolean parse_declaration( struct translate_ctx *ctx )
decl.SamplerView.ReturnTypeX;
}
ctx->cur = cur;
} else if (file == TGSI_FILE_BUFFER) {
if (str_match_nocase_whole(&cur, "ATOMIC")) {
decl.Declaration.Atomic = 1;
ctx->cur = cur;
}
} else {
if (str_match_nocase_whole(&cur, "LOCAL")) {
decl.Declaration.Local = 1;

View file

@ -50,6 +50,7 @@ union tgsi_any_token {
struct tgsi_declaration_range decl_range;
struct tgsi_declaration_dimension decl_dim;
struct tgsi_declaration_interp decl_interp;
struct tgsi_declaration_image decl_image;
struct tgsi_declaration_semantic decl_semantic;
struct tgsi_declaration_sampler_view decl_sampler_view;
struct tgsi_declaration_array array;
@ -59,6 +60,7 @@ union tgsi_any_token {
struct tgsi_instruction_predicate insn_predicate;
struct tgsi_instruction_label insn_label;
struct tgsi_instruction_texture insn_texture;
struct tgsi_instruction_memory insn_memory;
struct tgsi_texture_offset insn_texture_offset;
struct tgsi_src_register src;
struct tgsi_ind_register ind;
@ -115,7 +117,6 @@ struct ureg_program
unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
struct {
unsigned index;
unsigned semantic_name;
unsigned semantic_index;
} system_value[UREG_MAX_SYSTEM_VALUE];
@ -155,6 +156,21 @@ struct ureg_program
} sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned nr_sampler_views;
struct {
unsigned index;
unsigned target;
unsigned format;
boolean wr;
boolean raw;
} image[PIPE_MAX_SHADER_IMAGES];
unsigned nr_images;
struct {
unsigned index;
bool atomic;
} buffer[PIPE_MAX_SHADER_BUFFERS];
unsigned nr_buffers;
struct util_bitmask *free_temps;
struct util_bitmask *local_temps;
struct util_bitmask *decl_temps;
@ -320,20 +336,29 @@ ureg_DECL_input(struct ureg_program *ureg,
struct ureg_src
ureg_DECL_system_value(struct ureg_program *ureg,
unsigned index,
unsigned semantic_name,
unsigned semantic_index)
{
unsigned i;
for (i = 0; i < ureg->nr_system_values; i++) {
if (ureg->system_value[i].semantic_name == semantic_name &&
ureg->system_value[i].semantic_index == semantic_index) {
goto out;
}
}
if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
ureg->system_value[ureg->nr_system_values].index = index;
ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
i = ureg->nr_system_values;
ureg->nr_system_values++;
} else {
set_bad(ureg);
}
return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, index);
out:
return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
}
@ -648,6 +673,60 @@ ureg_DECL_sampler_view(struct ureg_program *ureg,
return reg;
}
/* Allocate a new image.
*/
struct ureg_src
ureg_DECL_image(struct ureg_program *ureg,
unsigned index,
unsigned target,
unsigned format,
boolean wr,
boolean raw)
{
struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
unsigned i;
for (i = 0; i < ureg->nr_images; i++)
if (ureg->image[i].index == index)
return reg;
if (i < PIPE_MAX_SHADER_IMAGES) {
ureg->image[i].index = index;
ureg->image[i].target = target;
ureg->image[i].wr = wr;
ureg->image[i].raw = raw;
ureg->image[i].format = format;
ureg->nr_images++;
return reg;
}
assert(0);
return reg;
}
/* Allocate a new buffer.
*/
struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
bool atomic)
{
struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
unsigned i;
for (i = 0; i < ureg->nr_buffers; i++)
if (ureg->buffer[i].index == nr)
return reg;
if (i < PIPE_MAX_SHADER_BUFFERS) {
ureg->buffer[i].index = nr;
ureg->buffer[i].atomic = atomic;
ureg->nr_buffers++;
return reg;
}
assert(0);
return reg;
}
static int
match_or_expand_immediate64( const unsigned *v,
int type,
@ -1148,6 +1227,21 @@ ureg_emit_texture_offset(struct ureg_program *ureg,
}
void
ureg_emit_memory(struct ureg_program *ureg,
unsigned extended_token,
unsigned qualifier)
{
union tgsi_any_token *out, *insn;
out = get_tokens( ureg, DOMAIN_INSN, 1 );
insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
insn->insn.Memory = 1;
out[0].value = 0;
out[0].insn_memory.Qualifier = qualifier;
}
void
ureg_fixup_insn_size(struct ureg_program *ureg,
@ -1300,6 +1394,42 @@ ureg_label_insn(struct ureg_program *ureg,
}
void
ureg_memory_insn(struct ureg_program *ureg,
unsigned opcode,
const struct ureg_dst *dst,
unsigned nr_dst,
const struct ureg_src *src,
unsigned nr_src,
unsigned qualifier)
{
struct ureg_emit_insn_result insn;
unsigned i;
insn = ureg_emit_insn(ureg,
opcode,
FALSE,
FALSE,
FALSE,
TGSI_SWIZZLE_X,
TGSI_SWIZZLE_Y,
TGSI_SWIZZLE_Z,
TGSI_SWIZZLE_W,
nr_dst,
nr_src);
ureg_emit_memory(ureg, insn.extended_token, qualifier);
for (i = 0; i < nr_dst; i++)
ureg_emit_dst(ureg, dst[i]);
for (i = 0; i < nr_src; i++)
ureg_emit_src(ureg, src[i]);
ureg_fixup_insn_size(ureg, insn.insn_token);
}
static void
emit_decl_semantic(struct ureg_program *ureg,
unsigned file,
@ -1477,6 +1607,52 @@ emit_decl_sampler_view(struct ureg_program *ureg,
out[2].decl_sampler_view.ReturnTypeW = return_type_w;
}
static void
emit_decl_image(struct ureg_program *ureg,
unsigned index,
unsigned target,
unsigned format,
boolean wr,
boolean raw)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
out[0].value = 0;
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 3;
out[0].decl.File = TGSI_FILE_IMAGE;
out[0].decl.UsageMask = 0xf;
out[1].value = 0;
out[1].decl_range.First = index;
out[1].decl_range.Last = index;
out[2].value = 0;
out[2].decl_image.Resource = target;
out[2].decl_image.Writable = wr;
out[2].decl_image.Raw = raw;
out[2].decl_image.Format = format;
}
static void
emit_decl_buffer(struct ureg_program *ureg,
unsigned index,
bool atomic)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
out[0].value = 0;
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
out[0].decl.NrTokens = 2;
out[0].decl.File = TGSI_FILE_BUFFER;
out[0].decl.UsageMask = 0xf;
out[0].decl.Atomic = atomic;
out[1].value = 0;
out[1].decl_range.First = index;
out[1].decl_range.Last = index;
}
static void
emit_immediate( struct ureg_program *ureg,
const unsigned *v,
@ -1587,8 +1763,8 @@ static void emit_decls( struct ureg_program *ureg )
for (i = 0; i < ureg->nr_system_values; i++) {
emit_decl_semantic(ureg,
TGSI_FILE_SYSTEM_VALUE,
ureg->system_value[i].index,
ureg->system_value[i].index,
i,
i,
ureg->system_value[i].semantic_name,
ureg->system_value[i].semantic_index,
TGSI_WRITEMASK_XYZW, 0);
@ -1636,6 +1812,19 @@ static void emit_decls( struct ureg_program *ureg )
ureg->sampler_view[i].return_type_w);
}
for (i = 0; i < ureg->nr_images; i++) {
emit_decl_image(ureg,
ureg->image[i].index,
ureg->image[i].target,
ureg->image[i].format,
ureg->image[i].wr,
ureg->image[i].raw);
}
for (i = 0; i < ureg->nr_buffers; i++) {
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
}
if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
emit_decl_range(ureg,

View file

@ -221,7 +221,6 @@ ureg_DECL_input(struct ureg_program *,
struct ureg_src
ureg_DECL_system_value(struct ureg_program *,
unsigned index,
unsigned semantic_name,
unsigned semantic_index);
@ -327,6 +326,16 @@ ureg_DECL_sampler_view(struct ureg_program *,
unsigned return_type_z,
unsigned return_type_w );
struct ureg_src
ureg_DECL_image(struct ureg_program *ureg,
unsigned index,
unsigned target,
unsigned format,
boolean wr,
boolean raw);
struct ureg_src
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
static inline struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
@ -522,6 +531,14 @@ ureg_label_insn(struct ureg_program *ureg,
unsigned nr_src,
unsigned *label);
void
ureg_memory_insn(struct ureg_program *ureg,
unsigned opcode,
const struct ureg_dst *dst,
unsigned nr_dst,
const struct ureg_src *src,
unsigned nr_src,
unsigned qualifier);
/***********************************************************************
* Internal instruction helpers, don't call these directly:
@ -559,6 +576,11 @@ void
ureg_emit_texture_offset(struct ureg_program *ureg,
const struct tgsi_texture_offset *offset);
void
ureg_emit_memory(struct ureg_program *ureg,
unsigned insn_token,
unsigned qualifier);
void
ureg_emit_dst( struct ureg_program *ureg,
struct ureg_dst dst );

View file

@ -29,6 +29,7 @@
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_util.h"
#include "tgsi_exec.h"
union pointer_hack
{
@ -53,17 +54,17 @@ tgsi_util_get_src_register_swizzle(
const struct tgsi_src_register *reg,
unsigned component )
{
switch( component ) {
case 0:
switch (component) {
case TGSI_CHAN_X:
return reg->SwizzleX;
case 1:
case TGSI_CHAN_Y:
return reg->SwizzleY;
case 2:
case TGSI_CHAN_Z:
return reg->SwizzleZ;
case 3:
case TGSI_CHAN_W:
return reg->SwizzleW;
default:
assert( 0 );
assert(0);
}
return 0;
}

View file

@ -320,7 +320,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
for (i = 0; i < 4; i++)
ctx->vertices[i][0][3] = 1; /*v.w*/
ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
ctx->upload = u_upload_create(pipe, 65536, PIPE_BIND_VERTEX_BUFFER,
PIPE_USAGE_STREAM);
return &ctx->base;
}
@ -1191,7 +1192,7 @@ static void blitter_draw(struct blitter_context_priv *ctx,
vb.stride = 8 * sizeof(float);
u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), 4, ctx->vertices,
&vb.buffer_offset, &vb.buffer);
if (!vb.buffer)
return;
@ -2111,7 +2112,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
return;
}
u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
u_upload_data(ctx->upload, 0, num_channels*4, 4, clear_value,
&vb.buffer_offset, &vb.buffer);
if (!vb.buffer)
goto out;

View file

@ -727,6 +727,65 @@ error1:
;
}
void
debug_dump_ubyte_rgba_bmp(const char *filename,
unsigned width, unsigned height,
const ubyte *rgba, unsigned stride)
{
FILE *stream;
struct bmp_file_header bmfh;
struct bmp_info_header bmih;
unsigned x, y;
assert(rgba);
if(!rgba)
goto error1;
bmfh.bfType = 0x4d42;
bmfh.bfSize = 14 + 40 + height*width*4;
bmfh.bfReserved1 = 0;
bmfh.bfReserved2 = 0;
bmfh.bfOffBits = 14 + 40;
bmih.biSize = 40;
bmih.biWidth = width;
bmih.biHeight = height;
bmih.biPlanes = 1;
bmih.biBitCount = 32;
bmih.biCompression = 0;
bmih.biSizeImage = height*width*4;
bmih.biXPelsPerMeter = 0;
bmih.biYPelsPerMeter = 0;
bmih.biClrUsed = 0;
bmih.biClrImportant = 0;
stream = fopen(filename, "wb");
assert(stream);
if(!stream)
goto error1;
fwrite(&bmfh, 14, 1, stream);
fwrite(&bmih, 40, 1, stream);
y = height;
while(y--) {
const ubyte *ptr = rgba + (stride * y * 4);
for(x = 0; x < width; ++x)
{
struct bmp_rgb_quad pixel;
pixel.rgbRed = ptr[x*4 + 0];
pixel.rgbGreen = ptr[x*4 + 1];
pixel.rgbBlue = ptr[x*4 + 2];
pixel.rgbAlpha = ptr[x*4 + 3];
fwrite(&pixel, 1, 4, stream);
}
}
fclose(stream);
error1:
;
}
/**
* Print PIPE_TRANSFER_x flags with a message.

View file

@ -490,12 +490,16 @@ void debug_dump_transfer_bmp(struct pipe_context *pipe,
void debug_dump_float_rgba_bmp(const char *filename,
unsigned width, unsigned height,
float *rgba, unsigned stride);
void debug_dump_ubyte_rgba_bmp(const char *filename,
unsigned width, unsigned height,
const ubyte *rgba, unsigned stride);
#else
#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
#define debug_dump_surface(pipe, prefix, surface) ((void)0)
#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
#define debug_dump_transfer_bmp(filename, transfer, ptr) ((void)0)
#define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
#define debug_dump_ubyte_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
#endif

View file

@ -177,6 +177,7 @@ struct pstip_transform_context {
struct tgsi_shader_info info;
uint tempsUsed; /**< bitmask */
int wincoordInput;
unsigned wincoordFile;
int maxInput;
uint samplersUsed; /**< bitfield of samplers used */
int freeSampler; /** an available sampler for the pstipple */
@ -206,7 +207,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
pctx->samplersUsed |= 1 << i;
}
}
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
else if (decl->Declaration.File == pctx->wincoordFile) {
pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
pctx->wincoordInput = (int) decl->Range.First;
@ -275,10 +276,22 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
wincoordInput = pctx->wincoordInput;
if (pctx->wincoordInput < 0) {
struct tgsi_full_declaration decl;
decl = tgsi_default_full_declaration();
/* declare new position input reg */
tgsi_transform_input_decl(ctx, wincoordInput,
TGSI_SEMANTIC_POSITION, 1,
TGSI_INTERPOLATE_LINEAR);
decl.Declaration.File = pctx->wincoordFile;
decl.Declaration.Semantic = 1;
decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
decl.Range.First =
decl.Range.Last = wincoordInput;
if (pctx->wincoordFile == TGSI_FILE_INPUT) {
decl.Declaration.Interpolate = 1;
decl.Interp.Interpolate = TGSI_INTERPOLATE_LINEAR;
}
ctx->emit_declaration(ctx, &decl);
}
sampIdx = pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler;
@ -327,7 +340,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
TGSI_FILE_TEMPORARY, texTemp,
TGSI_WRITEMASK_XYZW,
TGSI_FILE_INPUT, wincoordInput,
pctx->wincoordFile, wincoordInput,
TGSI_FILE_IMMEDIATE, pctx->numImmed);
/* TEX texTemp, texTemp, sampler; */
@ -351,11 +364,15 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
* will be used to sample the stipple texture;
* if NULL, the fixed unit is used
* \param fixedUnit fixed texture unit used for the stipple texture
* \param wincoordFile TGSI_FILE_INPUT or TGSI_FILE_SYSTEM_VALUE,
* depending on which one is supported by the driver
* for TGSI_SEMANTIC_POSITION in the fragment shader
*/
struct tgsi_token *
util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
unsigned *samplerUnitOut,
unsigned fixedUnit)
unsigned fixedUnit,
unsigned wincoordFile)
{
struct pstip_transform_context transform;
const uint newLen = tgsi_num_tokens(tokens) + NUM_NEW_TOKENS;
@ -370,6 +387,7 @@ util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
*/
memset(&transform, 0, sizeof(transform));
transform.wincoordInput = -1;
transform.wincoordFile = wincoordFile;
transform.maxInput = -1;
transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
transform.hasFixedUnit = !samplerUnitOut;

View file

@ -50,7 +50,8 @@ util_pstipple_create_sampler(struct pipe_context *pipe);
struct tgsi_token *
util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
unsigned *samplerUnitOut,
unsigned fixed_unit);
unsigned fixed_unit,
unsigned wincoordFile);
#endif

View file

@ -0,0 +1,310 @@
/*
* Copyright 2015 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Oded Gabbay <oded.gabbay@redhat.com>
*/
/**
* @file
* POWER8 intrinsics portability header.
*
*/
#ifndef U_PWR8_H_
#define U_PWR8_H_
#if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
#define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
typedef VECTOR_ALIGN_16 vector unsigned char __m128i;
typedef VECTOR_ALIGN_16 union m128i {
__m128i m128i;
vector signed int m128si;
vector unsigned int m128ui;
ubyte ub[16];
ushort us[8];
int i[4];
uint ui[4];
} __m128i_union;
static inline __m128i
vec_set_epi32 (int i3, int i2, int i1, int i0)
{
__m128i_union vdst;
#ifdef PIPE_ARCH_LITTLE_ENDIAN
vdst.i[0] = i0;
vdst.i[1] = i1;
vdst.i[2] = i2;
vdst.i[3] = i3;
#else
vdst.i[3] = i0;
vdst.i[2] = i1;
vdst.i[1] = i2;
vdst.i[0] = i3;
#endif
return (__m128i) vdst.m128si;
}
static inline __m128i
vec_setr_epi32 (int i0, int i1, int i2, int i3)
{
return vec_set_epi32 (i3, i2, i1, i0);
}
static inline __m128i
vec_unpacklo_epi32 (__m128i even, __m128i odd)
{
static const __m128i perm_mask =
#ifdef PIPE_ARCH_LITTLE_ENDIAN
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
#else
{24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
#endif
return vec_perm (even, odd, perm_mask);
}
static inline __m128i
vec_unpackhi_epi32 (__m128i even, __m128i odd)
{
static const __m128i perm_mask =
#ifdef PIPE_ARCH_LITTLE_ENDIAN
{ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
#else
{16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7};
#endif
return vec_perm (even, odd, perm_mask);
}
static inline __m128i
vec_unpacklo_epi64 (__m128i even, __m128i odd)
{
static const __m128i perm_mask =
#ifdef PIPE_ARCH_LITTLE_ENDIAN
{ 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
#else
{24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15};
#endif
return vec_perm (even, odd, perm_mask);
}
static inline __m128i
vec_unpackhi_epi64 (__m128i even, __m128i odd)
{
static const __m128i perm_mask =
#ifdef PIPE_ARCH_LITTLE_ENDIAN
{ 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
#else
{16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7};
#endif
return vec_perm (even, odd, perm_mask);
}
static inline __m128i
vec_add_epi32 (__m128i a, __m128i b)
{
return (__m128i) vec_add ((vector signed int) a, (vector signed int) b);
}
static inline __m128i
vec_sub_epi32 (__m128i a, __m128i b)
{
return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b);
}
/* Call this function ONLY on POWER8 and newer platforms */
static inline __m128i
vec_mullo_epi32 (__m128i a, __m128i b)
{
__m128i v;
__asm__(
"vmuluwm %0, %1, %2 \n"
: "=v" (v)
: "v" (a), "v" (b)
);
return v;
}
static inline void
transpose4_epi32(const __m128i * restrict a,
const __m128i * restrict b,
const __m128i * restrict c,
const __m128i * restrict d,
__m128i * restrict o,
__m128i * restrict p,
__m128i * restrict q,
__m128i * restrict r)
{
__m128i t0 = vec_unpacklo_epi32(*a, *b);
__m128i t1 = vec_unpacklo_epi32(*c, *d);
__m128i t2 = vec_unpackhi_epi32(*a, *b);
__m128i t3 = vec_unpackhi_epi32(*c, *d);
*o = vec_unpacklo_epi64(t0, t1);
*p = vec_unpackhi_epi64(t0, t1);
*q = vec_unpacklo_epi64(t2, t3);
*r = vec_unpackhi_epi64(t2, t3);
}
static inline __m128i
vec_slli_epi32 (__m128i vsrc, unsigned int count)
{
__m128i_union vec_count;
if (count >= 32)
return (__m128i) vec_splats (0);
else if (count == 0)
return vsrc;
/* In VMX, all shift count fields must contain the same value */
vec_count.m128si = (vector signed int) vec_splats (count);
return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui);
}
static inline __m128i
vec_srli_epi32 (__m128i vsrc, unsigned int count)
{
__m128i_union vec_count;
if (count >= 32)
return (__m128i) vec_splats (0);
else if (count == 0)
return vsrc;
/* In VMX, all shift count fields must contain the same value */
vec_count.m128si = (vector signed int) vec_splats (count);
return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui);
}
static inline __m128i
vec_srai_epi32 (__m128i vsrc, unsigned int count)
{
__m128i_union vec_count;
if (count >= 32)
return (__m128i) vec_splats (0);
else if (count == 0)
return vsrc;
/* In VMX, all shift count fields must contain the same value */
vec_count.m128si = (vector signed int) vec_splats (count);
return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui);
}
static inline __m128i
vec_cmpeq_epi32 (__m128i a, __m128i b)
{
return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b);
}
static inline __m128i
vec_loadu_si128 (const uint32_t* src)
{
__m128i_union vsrc;
#ifdef PIPE_ARCH_LITTLE_ENDIAN
vsrc.m128ui = *((vector unsigned int *) src);
#else
__m128i vmask, tmp1, tmp2;
vmask = vec_lvsl(0, src);
tmp1 = (__m128i) vec_ld (0, src);
tmp2 = (__m128i) vec_ld (15, src);
vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask);
#endif
return vsrc.m128i;
}
static inline __m128i
vec_load_si128 (const uint32_t* src)
{
__m128i_union vsrc;
vsrc.m128ui = *((vector unsigned int *) src);
return vsrc.m128i;
}
static inline void
vec_store_si128 (uint32_t* dest, __m128i vdata)
{
vec_st ((vector unsigned int) vdata, 0, dest);
}
/* Call this function ONLY on POWER8 and newer platforms */
static inline int
vec_movemask_epi8 (__m128i vsrc)
{
__m128i_union vtemp;
int result;
vtemp.m128i = vec_vgbbd(vsrc);
#ifdef PIPE_ARCH_LITTLE_ENDIAN
result = vtemp.ub[15] << 8 | vtemp.ub[7];
#else
result = vtemp.ub[0] << 8 | vtemp.ub[8];
#endif
return result;
}
static inline __m128i
vec_packs_epi16 (__m128i a, __m128i b)
{
#ifdef PIPE_ARCH_LITTLE_ENDIAN
return (__m128i) vec_packs ((vector signed short) a,
(vector signed short) b);
#else
return (__m128i) vec_packs ((vector signed short) b,
(vector signed short) a);
#endif
}
static inline __m128i
vec_packs_epi32 (__m128i a, __m128i b)
{
#ifdef PIPE_ARCH_LITTLE_ENDIAN
return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b);
#else
return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a);
#endif
}
#endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
#endif /* U_PWR8_H_ */

View file

@ -166,14 +166,49 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
#endif /* !PIPE_ARCH_SSSE3 */
/*
* Provide an SSE implementation of _mm_mul_epi32() in terms of
* _mm_mul_epu32().
*
* Basically, albeit surprising at first (and second, and third...) look
* if a * b is done signed instead of unsigned, can just
* subtract b from the high bits of the result if a is negative
* (and the same for a if b is negative). Modular arithmetic at its best!
*
* So for int32 a,b in crude pseudo-code ("*" here denoting a widening mul)
* fixupb = (signmask(b) & a) << 32ULL
* fixupa = (signmask(a) & b) << 32ULL
* a * b = (unsigned)a * (unsigned)b - fixupb - fixupa
* = (unsigned)a * (unsigned)b -(fixupb + fixupa)
*
* This does both lo (dwords 0/2) and hi parts (1/3) at the same time due
* to some optimization potential.
*/
static inline __m128i
mm_mullohi_epi32(const __m128i a, const __m128i b, __m128i *res13)
{
__m128i a13, b13, mul02, mul13;
__m128i anegmask, bnegmask, fixup, fixup02, fixup13;
a13 = _mm_shuffle_epi32(a, _MM_SHUFFLE(2,3,0,1));
b13 = _mm_shuffle_epi32(b, _MM_SHUFFLE(2,3,0,1));
anegmask = _mm_srai_epi32(a, 31);
bnegmask = _mm_srai_epi32(b, 31);
fixup = _mm_add_epi32(_mm_and_si128(anegmask, b),
_mm_and_si128(bnegmask, a));
mul02 = _mm_mul_epu32(a, b);
mul13 = _mm_mul_epu32(a13, b13);
fixup02 = _mm_slli_epi64(fixup, 32);
fixup13 = _mm_and_si128(fixup, _mm_set_epi32(-1,0,-1,0));
*res13 = _mm_sub_epi64(mul13, fixup13);
return _mm_sub_epi64(mul02, fixup02);
}
/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of
* _mm_mul_epu32().
*
* I suspect this works fine for us because one of our operands is
* always positive, but not sure that this can be used for general
* signed integer multiplication.
* This always works regardless the signs of the operands, since
* the high bits (which would be different) aren't used.
*
* This seems close enough to the speed of SSE4 and the real
* _mm_mullo_epi32() intrinsic as to not justify adding an sse4
@ -188,6 +223,12 @@ static inline __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
/* Interleave the results, either with shuffles or (slightly
* faster) direct bit operations:
* XXX: might be only true for some cpus (in particular 65nm
* Core 2). On most cpus (including that Core 2, but not Nehalem...)
* using _mm_shuffle_ps/_mm_shuffle_epi32 might also be faster
* than using the 3 instructions below. But logic should be fine
* as well, we can't have optimal solution for all cpus (if anything,
* should just use _mm_mullo_epi32() if sse41 is available...).
*/
#if 0
__m128i ba8 = _mm_shuffle_epi32(ba, 8);
@ -214,17 +255,44 @@ transpose4_epi32(const __m128i * restrict a,
__m128i * restrict q,
__m128i * restrict r)
{
__m128i t0 = _mm_unpacklo_epi32(*a, *b);
__m128i t1 = _mm_unpacklo_epi32(*c, *d);
__m128i t2 = _mm_unpackhi_epi32(*a, *b);
__m128i t3 = _mm_unpackhi_epi32(*c, *d);
__m128i t0 = _mm_unpacklo_epi32(*a, *b);
__m128i t1 = _mm_unpacklo_epi32(*c, *d);
__m128i t2 = _mm_unpackhi_epi32(*a, *b);
__m128i t3 = _mm_unpackhi_epi32(*c, *d);
*o = _mm_unpacklo_epi64(t0, t1);
*p = _mm_unpackhi_epi64(t0, t1);
*q = _mm_unpacklo_epi64(t2, t3);
*r = _mm_unpackhi_epi64(t2, t3);
*o = _mm_unpacklo_epi64(t0, t1);
*p = _mm_unpackhi_epi64(t0, t1);
*q = _mm_unpacklo_epi64(t2, t3);
*r = _mm_unpackhi_epi64(t2, t3);
}
/*
* Same as above, except the first two values are already interleaved
* (i.e. contain 64bit values).
*/
static inline void
transpose2_64_2_32(const __m128i * restrict a01,
const __m128i * restrict a23,
const __m128i * restrict c,
const __m128i * restrict d,
__m128i * restrict o,
__m128i * restrict p,
__m128i * restrict q,
__m128i * restrict r)
{
__m128i t0 = *a01;
__m128i t1 = _mm_unpacklo_epi32(*c, *d);
__m128i t2 = *a23;
__m128i t3 = _mm_unpackhi_epi32(*c, *d);
*o = _mm_unpacklo_epi64(t0, t1);
*p = _mm_unpackhi_epi64(t0, t1);
*q = _mm_unpacklo_epi64(t2, t3);
*r = _mm_unpackhi_epi64(t2, t3);
}
#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))

View file

@ -600,7 +600,8 @@ is_box_inside_resource(const struct pipe_resource *res,
depth = res->array_size;
assert(res->array_size % 6 == 0);
break;
case PIPE_MAX_TEXTURE_TYPES:;
case PIPE_MAX_TEXTURE_TYPES:
break;
}
return box->x >= 0 &&

View file

@ -42,8 +42,8 @@ struct u_upload_mgr {
struct pipe_context *pipe;
unsigned default_size; /* Minimum size of the upload buffer, in bytes. */
unsigned alignment; /* Alignment of each sub-allocation. */
unsigned bind; /* Bitmask of PIPE_BIND_* flags. */
unsigned usage; /* PIPE_USAGE_* */
unsigned map_flags; /* Bitmask of PIPE_TRANSFER_* flags. */
boolean map_persistent; /* If persistent mappings are supported. */
@ -55,10 +55,9 @@ struct u_upload_mgr {
};
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned default_size,
unsigned alignment,
unsigned bind )
struct u_upload_mgr *
u_upload_create(struct pipe_context *pipe, unsigned default_size,
unsigned bind, unsigned usage)
{
struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr );
if (!upload)
@ -66,8 +65,8 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
upload->pipe = pipe;
upload->default_size = default_size;
upload->alignment = alignment;
upload->bind = bind;
upload->usage = usage;
upload->map_persistent =
pipe->screen->get_param(pipe->screen,
@ -149,7 +148,7 @@ u_upload_alloc_buffer(struct u_upload_mgr *upload,
buffer.target = PIPE_BUFFER;
buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
buffer.bind = upload->bind;
buffer.usage = PIPE_USAGE_STREAM;
buffer.usage = upload->usage;
buffer.width0 = size;
buffer.height0 = 1;
buffer.depth0 = 1;
@ -181,19 +180,24 @@ void
u_upload_alloc(struct u_upload_mgr *upload,
unsigned min_out_offset,
unsigned size,
unsigned alignment,
unsigned *out_offset,
struct pipe_resource **outbuf,
void **ptr)
{
unsigned alloc_size = align(size, upload->alignment);
unsigned alloc_offset = align(min_out_offset, upload->alignment);
unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
unsigned offset;
min_out_offset = align(min_out_offset, alignment);
offset = align(upload->offset, alignment);
offset = MAX2(offset, min_out_offset);
/* Make sure we have enough space in the upload buffer
* for the sub-allocation. */
if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
* for the sub-allocation.
*/
if (unlikely(!upload->buffer || offset + size > buffer_size)) {
u_upload_alloc_buffer(upload, min_out_offset + size);
if (unlikely(!upload->buffer)) {
*out_offset = ~0;
@ -202,11 +206,10 @@ u_upload_alloc(struct u_upload_mgr *upload,
return;
}
offset = min_out_offset;
buffer_size = upload->buffer->width0;
}
offset = MAX2(upload->offset, alloc_offset);
if (unlikely(!upload->map)) {
upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
offset,
@ -224,8 +227,8 @@ u_upload_alloc(struct u_upload_mgr *upload,
upload->map -= offset;
}
assert(offset < upload->buffer->width0);
assert(offset + size <= upload->buffer->width0);
assert(offset < buffer_size);
assert(offset + size <= buffer_size);
assert(size);
/* Emit the return values: */
@ -233,19 +236,20 @@ u_upload_alloc(struct u_upload_mgr *upload,
pipe_resource_reference(outbuf, upload->buffer);
*out_offset = offset;
upload->offset = offset + alloc_size;
upload->offset = offset + size;
}
void u_upload_data(struct u_upload_mgr *upload,
unsigned min_out_offset,
unsigned size,
unsigned alignment,
const void *data,
unsigned *out_offset,
struct pipe_resource **outbuf)
{
uint8_t *ptr;
u_upload_alloc(upload, min_out_offset, size,
u_upload_alloc(upload, min_out_offset, size, alignment,
out_offset, outbuf,
(void**)&ptr);
if (ptr)
@ -257,6 +261,7 @@ void u_upload_buffer(struct u_upload_mgr *upload,
unsigned min_out_offset,
unsigned offset,
unsigned size,
unsigned alignment,
struct pipe_resource *inbuf,
unsigned *out_offset,
struct pipe_resource **outbuf)
@ -278,6 +283,7 @@ void u_upload_buffer(struct u_upload_mgr *upload,
if (0)
debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);
u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
u_upload_data(upload, min_out_offset, size, alignment,
map, out_offset, outbuf);
pipe_buffer_unmap( upload->pipe, transfer );
}

View file

@ -43,13 +43,12 @@ struct pipe_resource;
*
* \param pipe Pipe driver.
* \param default_size Minimum size of the upload buffer, in bytes.
* \param alignment Alignment of each suballocation in the upload buffer.
* \param bind Bitmask of PIPE_BIND_* flags.
* \param usage PIPE_USAGE_*
*/
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,
unsigned default_size,
unsigned alignment,
unsigned bind );
struct u_upload_mgr *
u_upload_create(struct pipe_context *pipe, unsigned default_size,
unsigned bind, unsigned usage);
/**
* Destroy the upload manager.
@ -74,6 +73,7 @@ void u_upload_unmap( struct u_upload_mgr *upload );
* \param upload Upload manager
* \param min_out_offset Minimum offset that should be returned in out_offset.
* \param size Size of the allocation.
* \param alignment Alignment of the suballocation within the buffer
* \param out_offset Pointer to where the new buffer offset will be returned.
* \param outbuf Pointer to where the upload buffer will be returned.
* \param ptr Pointer to the allocated memory that is returned.
@ -81,6 +81,7 @@ void u_upload_unmap( struct u_upload_mgr *upload );
void u_upload_alloc(struct u_upload_mgr *upload,
unsigned min_out_offset,
unsigned size,
unsigned alignment,
unsigned *out_offset,
struct pipe_resource **outbuf,
void **ptr);
@ -95,6 +96,7 @@ void u_upload_alloc(struct u_upload_mgr *upload,
void u_upload_data(struct u_upload_mgr *upload,
unsigned min_out_offset,
unsigned size,
unsigned alignment,
const void *data,
unsigned *out_offset,
struct pipe_resource **outbuf);
@ -110,6 +112,7 @@ void u_upload_buffer(struct u_upload_mgr *upload,
unsigned min_out_offset,
unsigned offset,
unsigned size,
unsigned alignment,
struct pipe_resource *inbuf,
unsigned *out_offset,
struct pipe_resource **outbuf);

View file

@ -314,8 +314,9 @@ u_vbuf_create(struct pipe_context *pipe,
mgr->translate_cache = translate_cache_create();
memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
mgr->uploader = u_upload_create(pipe, 1024 * 1024, 4,
PIPE_BIND_VERTEX_BUFFER);
mgr->uploader = u_upload_create(pipe, 1024 * 1024,
PIPE_BIND_VERTEX_BUFFER,
PIPE_USAGE_STREAM);
return mgr;
}
@ -454,7 +455,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
/* Create and map the output buffer. */
u_upload_alloc(mgr->uploader, 0,
key->output_stride * num_indices,
key->output_stride * num_indices, 4,
&out_offset, &out_buffer,
(void**)&out_map);
if (!out_buffer)
@ -487,7 +488,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
/* Create and map the output buffer. */
u_upload_alloc(mgr->uploader,
key->output_stride * start_vertex,
key->output_stride * num_vertices,
key->output_stride * num_vertices, 4,
&out_offset, &out_buffer,
(void**)&out_map);
if (!out_buffer)
@ -987,7 +988,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
real_vb = &mgr->real_vertex_buffer[i];
ptr = mgr->vertex_buffer[i].user_buffer;
u_upload_data(mgr->uploader, start, end - start, ptr + start,
u_upload_data(mgr->uploader, start, end - start, 4, ptr + start,
&real_vb->buffer_offset, &real_vb->buffer);
if (!real_vb->buffer)
return PIPE_ERROR_OUT_OF_MEMORY;

View file

@ -716,6 +716,7 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u
/* Allocate new memory for vertices. */
u_upload_alloc(c->upload, 0,
c->vertex_buf.stride * VL_COMPOSITOR_MAX_LAYERS * 4, /* size */
4, /* alignment */
&c->vertex_buf.buffer_offset, &c->vertex_buf.buffer,
(void**)&vb);
@ -1090,7 +1091,8 @@ vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
c->pipe = pipe;
c->upload = u_upload_create(pipe, 128 * 1024, 4, PIPE_BIND_VERTEX_BUFFER);
c->upload = u_upload_create(pipe, 128 * 1024, PIPE_BIND_VERTEX_BUFFER,
PIPE_USAGE_STREAM);
if (!c->upload)
return false;

View file

@ -79,14 +79,18 @@ calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src bloc
}
static struct ureg_dst
calc_line(struct ureg_program *shader)
calc_line(struct pipe_screen *screen, struct ureg_program *shader)
{
struct ureg_dst tmp;
struct ureg_src pos;
tmp = ureg_DECL_temporary(shader);
pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR);
if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL))
pos = ureg_DECL_system_value(shader, TGSI_SEMANTIC_POSITION, 0);
else
pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS,
TGSI_INTERPOLATE_LINEAR);
/*
* tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
@ -177,7 +181,7 @@ create_ref_frag_shader(struct vl_mc *r)
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
field = calc_line(shader);
field = calc_line(r->pipe->screen, shader);
/*
* ref = field.z ? tc[1] : tc[0]
@ -324,7 +328,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
tmp = calc_line(shader);
tmp = calc_line(r->pipe->screen, shader);
/*
* if (field == tc.w)

View file

@ -792,7 +792,7 @@ vl_mpeg12_end_frame(struct pipe_video_codec *decoder,
for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
if (!ref_frames[j] || !ref_frames[j][i]) continue;
vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);
dec->context->set_vertex_buffers(dec->context, 0, 3, vb);
vl_mc_render_ref(i ? &dec->mc_c : &dec->mc_y, &buf->mc[i], ref_frames[j][i]);

View file

@ -213,6 +213,11 @@ The integer capabilities:
* ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw arguments
{ count, instance_count, start, index_bias } from a PIPE_BUFFER resource.
See pipe_draw_info.
* ``PIPE_CAP_MULTI_DRAW_INDIRECT``: Whether the driver supports
pipe_draw_info::indirect_stride and ::indirect_count
* ``PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS``: Whether the driver supports
taking the number of indirect draws from a separate parameter
buffer, see pipe_draw_info::indirect_params.
* ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader supports
the FINE versions of DDX/DDY.
* ``PIPE_CAP_VENDOR_ID``: The vendor ID of the underlying hardware. If it's
@ -239,8 +244,7 @@ The integer capabilities:
will need to lower TGSI_SEMANTIC_VERTEXID to TGSI_SEMANTIC_VERTEXID_NOBASE
and TGSI_SEMANTIC_BASEVERTEX, so drivers setting this must handle both these
semantics. Only relevant if geometry shaders are supported.
(Currently not possible to query availability of these two semantics outside
this, at least BASEVERTEX should be exposed separately too).
(BASEVERTEX could be exposed separately too via ``PIPE_CAP_DRAW_PARAMETERS``).
* ``PIPE_CAP_POLYGON_OFFSET_CLAMP``: If true, the driver implements support
for ``pipe_rasterizer_state::offset_clamp``.
* ``PIPE_CAP_MULTISAMPLE_Z_RESOLVE``: Whether the driver supports blitting
@ -283,6 +287,20 @@ The integer capabilities:
a compressed block is copied to/from a plain pixel of the same size.
* ``PIPE_CAP_CLEAR_TEXTURE``: Whether `clear_texture` will be
available in contexts.
* ``PIPE_CAP_DRAW_PARAMETERS``: Whether ``TGSI_SEMANTIC_BASEVERTEX``,
``TGSI_SEMANTIC_BASEINSTANCE``, and ``TGSI_SEMANTIC_DRAWID`` are
supported in vertex shaders.
* ``PIPE_CAP_TGSI_PACK_HALF_FLOAT``: Whether the ``UP2H`` and ``PK2H``
TGSI opcodes are supported.
* ``PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL``: If state trackers should use
a system value for the POSITION fragment shader input.
* ``PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL``: If state trackers should use
a system value for the FACE fragment shader input.
Also, the FACE system value is integer, not float.
* ``PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT``: Describes the required
alignment for pipe_shader_buffer::buffer_offset, in bytes. Maximum
value allowed is 256 (for GL conformance). 0 is only allowed if
shader buffers are not supported.
.. _pipe_capf:
@ -375,6 +393,10 @@ to be 0.
of iterations that loops are allowed to have to be unrolled. It is only
a hint to state trackers. Whether any loops will be unrolled is not
guaranteed.
* ``PIPE_SHADER_CAP_MAX_SHADER_BUFFERS``: Maximum number of memory buffers
(also used to implement atomic counters). Having this be non-0 also
implies support for the ``LOAD``, ``STORE``, and ``ATOM*`` TGSI
opcodes.
.. _pipe_compute_cap:

View file

@ -458,7 +458,11 @@ while DDY is allowed to be the same for the entire 2x2 quad.
.. opcode:: PK2H - Pack Two 16-bit Floats
TBD
This instruction replicates its result.
.. math::
dst = f32\_to\_f16(src.x) | f32\_to\_f16(src.y) << 16
.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
@ -615,7 +619,15 @@ This instruction replicates its result.
.. opcode:: UP2H - Unpack Two 16-Bit Floats
TBD
.. math::
dst.x = f16\_to\_f32(src0.x \& 0xffff)
dst.y = f16\_to\_f32(src0.x >> 16)
dst.z = f16\_to\_f32(src0.x \& 0xffff)
dst.w = f16\_to\_f32(src0.x >> 16)
.. note::
@ -2252,11 +2264,11 @@ after lookup.
Resource Access Opcodes
^^^^^^^^^^^^^^^^^^^^^^^
.. opcode:: LOAD - Fetch data from a shader resource
.. opcode:: LOAD - Fetch data from a shader buffer or image
Syntax: ``LOAD dst, resource, address``
Example: ``LOAD TEMP[0], RES[0], TEMP[1]``
Example: ``LOAD TEMP[0], BUFFER[0], TEMP[1]``
Using the provided integer address, LOAD fetches data
from the specified buffer or texture without any
@ -2280,7 +2292,7 @@ Resource Access Opcodes
Syntax: ``STORE resource, address, src``
Example: ``STORE RES[0], TEMP[0], TEMP[1]``
Example: ``STORE BUFFER[0], TEMP[0], TEMP[1]``
Using the provided integer address, STORE writes data
to the specified buffer or texture.
@ -2299,6 +2311,18 @@ Resource Access Opcodes
texture arrays and 2D textures. address.w is always
ignored.
.. opcode:: RESQ - Query information about a resource
Syntax: ``RESQ dst, resource``
Example: ``RESQ TEMP[0], BUFFER[0]``
Returns information about the buffer or image resource. For buffer
resources, the size (in bytes) is returned in the x component. For
image resources, .xyz will contain the width/height/layers of the
image, while .w will contain the number of samples for multi-sampled
images.
.. _threadsyncopcodes:
@ -2358,158 +2382,159 @@ These opcodes provide atomic variants of some common arithmetic and
logical operations. In this context atomicity means that another
concurrent memory access operation that affects the same memory
location is guaranteed to be performed strictly before or after the
entire execution of the atomic operation.
For the moment they're only valid in compute programs.
entire execution of the atomic operation. The resource may be a buffer
or an image. In the case of an image, the offset works the same as for
``LOAD`` and ``STORE``, specified above. These atomic operations may
only be used with 32-bit integer image formats.
.. opcode:: ATOMUADD - Atomic integer addition
Syntax: ``ATOMUADD dst, resource, offset, src``
Example: ``ATOMUADD TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMUADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = dst_i + src_i
resource[offset] = dst_x + src_x
.. opcode:: ATOMXCHG - Atomic exchange
Syntax: ``ATOMXCHG dst, resource, offset, src``
Example: ``ATOMXCHG TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMXCHG TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = src_i
resource[offset] = src_x
.. opcode:: ATOMCAS - Atomic compare-and-exchange
Syntax: ``ATOMCAS dst, resource, offset, cmp, src``
Example: ``ATOMCAS TEMP[0], RES[0], TEMP[1], TEMP[2], TEMP[3]``
Example: ``ATOMCAS TEMP[0], BUFFER[0], TEMP[1], TEMP[2], TEMP[3]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = (dst_i == cmp_i ? src_i : dst_i)
resource[offset] = (dst_x == cmp_x ? src_x : dst_x)
.. opcode:: ATOMAND - Atomic bitwise And
Syntax: ``ATOMAND dst, resource, offset, src``
Example: ``ATOMAND TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMAND TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = dst_i \& src_i
resource[offset] = dst_x \& src_x
.. opcode:: ATOMOR - Atomic bitwise Or
Syntax: ``ATOMOR dst, resource, offset, src``
Example: ``ATOMOR TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = dst_i | src_i
resource[offset] = dst_x | src_x
.. opcode:: ATOMXOR - Atomic bitwise Xor
Syntax: ``ATOMXOR dst, resource, offset, src``
Example: ``ATOMXOR TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMXOR TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = dst_i \oplus src_i
resource[offset] = dst_x \oplus src_x
.. opcode:: ATOMUMIN - Atomic unsigned minimum
Syntax: ``ATOMUMIN dst, resource, offset, src``
Example: ``ATOMUMIN TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMUMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = (dst_i < src_i ? dst_i : src_i)
resource[offset] = (dst_x < src_x ? dst_x : src_x)
.. opcode:: ATOMUMAX - Atomic unsigned maximum
Syntax: ``ATOMUMAX dst, resource, offset, src``
Example: ``ATOMUMAX TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMUMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = (dst_i > src_i ? dst_i : src_i)
resource[offset] = (dst_x > src_x ? dst_x : src_x)
.. opcode:: ATOMIMIN - Atomic signed minimum
Syntax: ``ATOMIMIN dst, resource, offset, src``
Example: ``ATOMIMIN TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMIMIN TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = (dst_i < src_i ? dst_i : src_i)
resource[offset] = (dst_x < src_x ? dst_x : src_x)
.. opcode:: ATOMIMAX - Atomic signed maximum
Syntax: ``ATOMIMAX dst, resource, offset, src``
Example: ``ATOMIMAX TEMP[0], RES[0], TEMP[1], TEMP[2]``
Example: ``ATOMIMAX TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
The following operation is performed atomically on each component:
The following operation is performed atomically:
.. math::
dst_i = resource[offset]_i
dst_x = resource[offset]
resource[offset]_i = (dst_i > src_i ? dst_i : src_i)
resource[offset] = (dst_x > src_x ? dst_x : src_x)
@ -2646,7 +2671,8 @@ space coordinate system. After clipping, the X, Y and Z components of the
vertex will be divided by the W value to get normalized device coordinates.
For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
fragment shader input contains the fragment's window position. The X
fragment shader input (or system value, depending on which one is
supported by the driver) contains the fragment's window position. The X
component starts at zero and always increases from left to right.
The Y component starts at zero and always increases but Y=0 may either
indicate the top of the window or the bottom depending on the fragment
@ -2758,11 +2784,17 @@ typically only used for legacy graphics APIs.
TGSI_SEMANTIC_FACE
""""""""""""""""""
This label applies to fragment shader inputs only and indicates that
the register contains front/back-face information of the form (F, 0,
0, 1). The first component will be positive when the fragment belongs
to a front-facing polygon, and negative when the fragment belongs to a
back-facing polygon.
This label applies to fragment shader inputs (or system values,
depending on which one is supported by the driver) and indicates that
the register contains front/back-face information.
If it is an input, it will be a floating-point vector in the form (F, 0, 0, 1),
where F will be positive when the fragment belongs to a front-facing polygon,
and negative when the fragment belongs to a back-facing polygon.
If it is a system value, it will be an integer vector in the form (F, 0, 0, 1),
where F is 0xffffffff when the fragment belongs to a front-facing polygon and
0 when the fragment belongs to a back-facing polygon.
TGSI_SEMANTIC_EDGEFLAG
@ -2949,6 +2981,19 @@ invocation is covered or not. Helper invocations are created in order
to properly compute derivatives, however it may be desirable to skip
some of the logic in those cases. See ``gl_HelperInvocation`` documentation.
TGSI_SEMANTIC_BASEINSTANCE
""""""""""""""""""""""""""
For vertex shaders, the base instance argument supplied for this
draw. This is an integer value, and only the X component is used.
TGSI_SEMANTIC_DRAWID
""""""""""""""""""""
For vertex shaders, the zero-based index of the current draw in a
``glMultiDraw*`` invocation. This is an integer value, and only the X
component is used.
Declaration Interpolate
^^^^^^^^^^^^^^^^^^^^^^^

View file

@ -0,0 +1 @@
ir3_compiler

View file

@ -128,6 +128,7 @@ ir3_SOURCES := \
ir3/ir3_group.c \
ir3/ir3.h \
ir3/ir3_legalize.c \
ir3/ir3_nir.c \
ir3/ir3_nir.h \
ir3/ir3_nir_lower_if_else.c \
ir3/ir3_print.c \

View file

@ -171,8 +171,8 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
fd3_query_context_init(pctx);
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
PIPE_USAGE_STREAM);
return pctx;
}

View file

@ -145,7 +145,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
void *ptr;
u_upload_alloc(fd3_ctx->border_color_uploader,
0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
0, BORDER_COLOR_UPLOAD_SIZE,
BORDER_COLOR_UPLOAD_SIZE, &off,
&fd3_ctx->border_color_buf,
&ptr);

View file

@ -171,8 +171,8 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
fd4_query_context_init(pctx);
fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0,
PIPE_USAGE_STREAM);
return pctx;
}

View file

@ -133,7 +133,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
void *ptr;
u_upload_alloc(fd4_ctx->border_color_uploader,
0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
0, BORDER_COLOR_UPLOAD_SIZE,
BORDER_COLOR_UPLOAD_SIZE, &off,
&fd4_ctx->border_color_buf,
&ptr);

View file

@ -40,6 +40,8 @@
#include "freedreno_gmem.h"
#include "freedreno_util.h"
#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
struct fd_vertex_stateobj;
struct fd_texture_stateobj {

View file

@ -226,6 +226,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
@ -238,6 +240,11 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@ -414,6 +421,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
}
debug_printf("unknown shader param %d\n", param);
return 0;

View file

@ -40,6 +40,7 @@
#include "freedreno_util.h"
#include "ir3_compiler.h"
#include "ir3_nir.h"
#include "instr-a3xx.h"
#include "ir3.h"
@ -105,10 +106,10 @@ int main(int argc, char **argv)
const char *filename;
struct tgsi_token toks[65536];
struct tgsi_parse_context parse;
struct ir3_compiler *compiler;
struct ir3_shader_variant v;
struct ir3_shader s;
struct ir3_shader_key key = {};
/* TODO cmdline option to target different gpus: */
unsigned gpu_id = 320;
const char *info;
void *ptr;
@ -228,7 +229,12 @@ int main(int argc, char **argv)
if (!tgsi_text_translate(ptr, toks, Elements(toks)))
errx(1, "could not parse `%s'", filename);
s.tokens = toks;
if (fd_mesa_debug & FD_DBG_OPTMSGS)
tgsi_dump(toks, 0);
nir_shader *nir = ir3_tgsi_to_nir(toks);
s.compiler = ir3_compiler_create(gpu_id);
s.nir = ir3_optimize_nir(&s, nir, NULL);
v.key = key;
v.shader = &s;
@ -246,11 +252,8 @@ int main(int argc, char **argv)
break;
}
/* TODO cmdline option to target different gpus: */
compiler = ir3_compiler_create(gpu_id);
info = "NIR compiler";
ret = ir3_compile_shader_nir(compiler, &v);
ret = ir3_compile_shader_nir(s.compiler, &v);
if (ret) {
fprintf(stderr, "compiler failed!\n");
return ret;

View file

@ -32,10 +32,6 @@
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_strings.h"
#include "nir/tgsi_to_nir.h"
#include "freedreno_util.h"
@ -123,97 +119,10 @@ struct ir3_compile {
static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
static struct nir_shader *to_nir(struct ir3_compile *ctx,
const struct tgsi_token *tokens, struct ir3_shader_variant *so)
{
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_fsat = true,
.lower_scmp = true,
.lower_flrp = true,
.lower_ffract = true,
.native_integers = true,
};
struct nir_lower_tex_options tex_options = {
.lower_rect = 0,
};
bool progress;
switch (so->type) {
case SHADER_FRAGMENT:
case SHADER_COMPUTE:
tex_options.saturate_s = so->key.fsaturate_s;
tex_options.saturate_t = so->key.fsaturate_t;
tex_options.saturate_r = so->key.fsaturate_r;
break;
case SHADER_VERTEX:
tex_options.saturate_s = so->key.vsaturate_s;
tex_options.saturate_t = so->key.vsaturate_t;
tex_options.saturate_r = so->key.vsaturate_r;
break;
}
if (ctx->compiler->gpu_id >= 400) {
/* a4xx seems to have *no* sam.p */
tex_options.lower_txp = ~0; /* lower all txp */
} else {
/* a3xx just needs to avoid sam.p for 3d tex */
tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
}
struct nir_shader *s = tgsi_to_nir(tokens, &options);
if (fd_mesa_debug & FD_DBG_DISASM) {
debug_printf("----------------------\n");
nir_print_shader(s, stdout);
debug_printf("----------------------\n");
}
nir_opt_global_to_local(s);
nir_convert_to_ssa(s);
if (s->stage == MESA_SHADER_VERTEX) {
nir_lower_clip_vs(s, so->key.ucp_enables);
} else if (s->stage == MESA_SHADER_FRAGMENT) {
nir_lower_clip_fs(s, so->key.ucp_enables);
}
nir_lower_tex(s, &tex_options);
if (so->key.color_two_side)
nir_lower_two_sided_color(s);
nir_lower_idiv(s);
nir_lower_load_const_to_scalar(s);
do {
progress = false;
nir_lower_vars_to_ssa(s);
nir_lower_alu_to_scalar(s);
nir_lower_phis_to_scalar(s);
progress |= nir_copy_prop(s);
progress |= nir_opt_dce(s);
progress |= nir_opt_cse(s);
progress |= ir3_nir_lower_if_else(s);
progress |= nir_opt_algebraic(s);
progress |= nir_opt_constant_folding(s);
} while (progress);
nir_remove_dead_variables(s);
nir_validate_shader(s);
if (fd_mesa_debug & FD_DBG_DISASM) {
debug_printf("----------------------\n");
nir_print_shader(s, stdout);
debug_printf("----------------------\n");
}
return s;
}
static struct ir3_compile *
compile_init(struct ir3_compiler *compiler,
struct ir3_shader_variant *so,
const struct tgsi_token *tokens)
struct ir3_shader_variant *so)
{
struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
@ -239,7 +148,28 @@ compile_init(struct ir3_compiler *compiler,
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->s = to_nir(ctx, tokens, so);
/* TODO: maybe generate some sort of bitmask of what key
* lowers vs what shader has (ie. no need to lower
* texture clamp lowering if no texture sample instrs)..
* although should be done further up the stack to avoid
* creating duplicate variants..
*/
if (ir3_key_lowers_nir(&so->key)) {
nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
} else {
/* fast-path for shader key that lowers nothing in NIR: */
ctx->s = so->shader->nir;
}
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump nir%dv%d: type=%d, k={bp=%u,cts=%u,hp=%u}",
so->shader->id, so->id, so->type,
so->key.binning_pass, so->key.color_two_side,
so->key.half_precision);
nir_print_shader(ctx->s, stdout);
}
so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
@ -1954,8 +1884,6 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
case nir_texop_query_levels:
emit_tex_query_levels(ctx, tex);
break;
case nir_texop_samples_identical:
unreachable("nir_texop_samples_identical");
default:
emit_tex(ctx, tex);
break;
@ -2170,6 +2098,8 @@ emit_stream_out(struct ir3_compile *ctx)
static void
emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
{
nir_metadata_require(impl, nir_metadata_block_index);
emit_cf_list(ctx, &impl->body);
emit_block(ctx, impl->end_block);
@ -2499,7 +2429,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
assert(!so->ir);
ctx = compile_init(compiler, so, so->shader->tokens);
ctx = compile_init(compiler, so);
if (!ctx) {
DBG("INIT failed!");
ret = -1;

View file

@ -0,0 +1,153 @@
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
/*
* Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "freedreno_util.h"
#include "ir3_nir.h"
#include "ir3_compiler.h"
#include "ir3_shader.h"
#include "nir/tgsi_to_nir.h"
struct nir_shader *
ir3_tgsi_to_nir(const struct tgsi_token *tokens)
{
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_fsat = true,
.lower_scmp = true,
.lower_flrp = true,
.lower_ffract = true,
.native_integers = true,
};
return tgsi_to_nir(tokens, &options);
}
/* for given shader key, are any steps handled in nir? */
bool
ir3_key_lowers_nir(const struct ir3_shader_key *key)
{
return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
key->ucp_enables | key->color_two_side;
}
#define OPT(nir, pass, ...) ({ \
bool this_progress = false; \
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
this_progress; \
})
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
struct nir_shader *
ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
const struct ir3_shader_key *key)
{
struct nir_lower_tex_options tex_options = {
.lower_rect = 0,
};
bool progress;
if (key) {
switch (shader->type) {
case SHADER_FRAGMENT:
case SHADER_COMPUTE:
tex_options.saturate_s = key->fsaturate_s;
tex_options.saturate_t = key->fsaturate_t;
tex_options.saturate_r = key->fsaturate_r;
break;
case SHADER_VERTEX:
tex_options.saturate_s = key->vsaturate_s;
tex_options.saturate_t = key->vsaturate_t;
tex_options.saturate_r = key->vsaturate_r;
break;
}
}
if (shader->compiler->gpu_id >= 400) {
/* a4xx seems to have *no* sam.p */
tex_options.lower_txp = ~0; /* lower all txp */
} else {
/* a3xx just needs to avoid sam.p for 3d tex */
tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
}
if (fd_mesa_debug & FD_DBG_DISASM) {
debug_printf("----------------------\n");
nir_print_shader(s, stdout);
debug_printf("----------------------\n");
}
OPT_V(s, nir_opt_global_to_local);
OPT_V(s, nir_convert_to_ssa);
if (key) {
if (s->stage == MESA_SHADER_VERTEX) {
OPT_V(s, nir_lower_clip_vs, key->ucp_enables);
} else if (s->stage == MESA_SHADER_FRAGMENT) {
OPT_V(s, nir_lower_clip_fs, key->ucp_enables);
}
if (key->color_two_side) {
OPT_V(s, nir_lower_two_sided_color);
}
}
OPT_V(s, nir_lower_tex, &tex_options);
OPT_V(s, nir_lower_idiv);
OPT_V(s, nir_lower_load_const_to_scalar);
do {
progress = false;
OPT_V(s, nir_lower_vars_to_ssa);
OPT_V(s, nir_lower_alu_to_scalar);
OPT_V(s, nir_lower_phis_to_scalar);
progress |= OPT(s, nir_copy_prop);
progress |= OPT(s, nir_opt_dce);
progress |= OPT(s, nir_opt_cse);
progress |= OPT(s, ir3_nir_lower_if_else);
progress |= OPT(s, nir_opt_algebraic);
progress |= OPT(s, nir_opt_constant_folding);
} while (progress);
OPT_V(s, nir_remove_dead_variables);
if (fd_mesa_debug & FD_DBG_DISASM) {
debug_printf("----------------------\n");
nir_print_shader(s, stdout);
debug_printf("----------------------\n");
}
nir_sweep(s);
return s;
}

View file

@ -32,6 +32,13 @@
#include "glsl/nir/nir.h"
#include "glsl/nir/shader_enums.h"
#include "ir3_shader.h"
bool ir3_nir_lower_if_else(nir_shader *shader);
struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens);
bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
const struct ir3_shader_key *key);
#endif /* IR3_NIR_H_ */

View file

@ -39,7 +39,7 @@
#include "ir3_shader.h"
#include "ir3_compiler.h"
#include "ir3_nir.h"
static void
delete_variant(struct ir3_shader_variant *v)
@ -187,12 +187,6 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
v->key = key;
v->type = shader->type;
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
key.binning_pass, key.color_two_side, key.half_precision);
tgsi_dump(shader->tokens, 0);
}
ret = ir3_compile_shader_nir(shader->compiler, v);
if (ret) {
debug_error("compile failed!");
@ -267,7 +261,7 @@ ir3_shader_destroy(struct ir3_shader *shader)
v = v->next;
delete_variant(t);
}
free((void *)shader->tokens);
ralloc_free(shader->nir);
free(shader);
}
@ -281,14 +275,24 @@ ir3_shader_create(struct pipe_context *pctx,
shader->id = ++shader->compiler->shader_count;
shader->pctx = pctx;
shader->type = type;
shader->tokens = tgsi_dup_tokens(cso->tokens);
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump tgsi: type=%d", shader->type);
tgsi_dump(cso->tokens, 0);
}
nir_shader *nir = ir3_tgsi_to_nir(cso->tokens);
/* do first pass optimization, ignoring the key: */
shader->nir = ir3_optimize_nir(shader, nir, NULL);
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump nir%d: type=%d", shader->id, shader->type);
nir_print_shader(shader->nir, stdout);
}
shader->stream_output = cso->stream_output;
if (fd_mesa_debug & FD_DBG_SHADERDB) {
/* if shader-db run, create a standard variant immediately
* (as otherwise nothing will trigger the shader to be
* actually compiled)
*/
static struct ir3_shader_key key = {};
static struct ir3_shader_key key = {0};
ir3_shader_variant(shader, key);
}
return shader;

View file

@ -230,6 +230,8 @@ struct ir3_shader_variant {
struct ir3_shader *shader;
};
typedef struct nir_shader nir_shader;
struct ir3_shader {
enum shader_t type;
@ -240,7 +242,7 @@ struct ir3_shader {
struct ir3_compiler *compiler;
struct pipe_context *pctx; /* TODO replace w/ pipe_screen */
const struct tgsi_token *tokens;
nir_shader *nir;
struct pipe_stream_output_info stream_output;
struct ir3_shader_variant *variants;

View file

@ -195,7 +195,6 @@ struct i915_rasterizer_state {
unsigned light_twoside : 1;
unsigned st;
enum interp_mode color_interp;
unsigned LIS4;
unsigned LIS7;

View file

@ -254,6 +254,11 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@ -264,6 +269,8 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
return 0;

View file

@ -423,7 +423,7 @@ i915_prepare_vertex_sampling(struct i915_context *i915)
for (j = view->u.tex.first_level; j <= tex->last_level; j++) {
mip_offsets[j] = i915_texture_offset(i915_tex, j , 0 /* FIXME depth */);
row_stride[j] = i915_tex->stride;
img_stride[j] = 0; /* FIXME */;
img_stride[j] = 0; /* FIXME */
}
draw_set_mapped_texture(i915->draw,
@ -920,7 +920,6 @@ i915_create_rasterizer_state(struct pipe_context *pipe,
struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state );
cso->templ = *rasterizer;
cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
cso->light_twoside = rasterizer->light_twoside;
cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE;
cso->ds[1].f = rasterizer->offset_scale;

View file

@ -57,7 +57,6 @@ static uint find_mapping(const struct i915_fragment_shader* fs, int unit)
static void calculate_vertex_layout(struct i915_context *i915)
{
const struct i915_fragment_shader *fs = i915->fs;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW, face;
uint i;
@ -107,12 +106,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
/* pos */
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
draw_emit_vertex_attr(&vinfo, EMIT_4F, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
vinfo.attrib[0].emit = EMIT_4F;
}
else {
draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
draw_emit_vertex_attr(&vinfo, EMIT_3F, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
vinfo.attrib[0].emit = EMIT_3F;
}
@ -123,21 +122,21 @@ static void calculate_vertex_layout(struct i915_context *i915)
/* primary color */
if (colors[0]) {
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, colorInterp, src);
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, src);
vinfo.hwfmt[0] |= S4_VFMT_COLOR;
}
/* secondary color */
if (colors[1]) {
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, colorInterp, src);
draw_emit_vertex_attr(&vinfo, EMIT_4UB_BGRA, src);
vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
}
/* fog coord, not fog blend factor */
if (fog) {
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
draw_emit_vertex_attr(&vinfo, EMIT_1F, src);
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
}
@ -147,7 +146,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]);
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
draw_emit_vertex_attr(&vinfo, EMIT_4F, src);
}
else {
hwtc = TEXCOORDFMT_NOT_PRESENT;
@ -164,7 +163,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
* module by adding an extra shader output.
*/
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FACE, 0);
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_CONSTANT, src);
draw_emit_vertex_attr(&vinfo, EMIT_1F, src);
vinfo.hwfmt[1] &= ~(TEXCOORDFMT_NOT_PRESENT << (slot * 4));
vinfo.hwfmt[1] |= TEXCOORDFMT_1D << (slot * 4);
}
@ -185,7 +184,7 @@ static void calculate_vertex_layout(struct i915_context *i915)
struct i915_tracked_state i915_update_vertex_layout = {
"vertex_layout",
calculate_vertex_layout,
I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS
I915_NEW_FS | I915_NEW_VS
};

View file

@ -333,7 +333,7 @@ ilo_builder_init(struct ilo_builder *builder,
const struct ilo_dev *dev,
struct intel_winsys *winsys)
{
int i;
unsigned i;
assert(ilo_is_zeroed(builder, sizeof(*builder)));
@ -366,7 +366,7 @@ ilo_builder_init(struct ilo_builder *builder,
void
ilo_builder_reset(struct ilo_builder *builder)
{
int i;
unsigned i;
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
ilo_builder_writer_reset(builder, i);
@ -382,7 +382,7 @@ ilo_builder_reset(struct ilo_builder *builder)
bool
ilo_builder_begin(struct ilo_builder *builder)
{
int i;
unsigned i;
for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++) {
if (!ilo_builder_writer_alloc_and_map(builder, i)) {
@ -407,7 +407,7 @@ struct intel_bo *
ilo_builder_end(struct ilo_builder *builder, unsigned *used)
{
struct ilo_builder_writer *bat;
int i;
unsigned i;
ilo_builder_batch_patch_sba(builder);

View file

@ -189,8 +189,9 @@ ilo_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
* These must be called last as u_upload/u_blitter are clients of the pipe
* context.
*/
ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024, 16,
PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER);
ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024,
PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER,
PIPE_USAGE_STREAM);
if (!ilo->uploader) {
ilo_context_destroy(&ilo->base);
return NULL;

View file

@ -92,7 +92,7 @@ ilo_launch_grid(struct pipe_context *pipe,
input_buf.buffer_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
if (input_buf.buffer_size) {
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, input,
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input,
&input_buf.buffer_offset, &input_buf.buffer);
}

View file

@ -463,6 +463,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
case PIPE_CAP_MAX_VERTEX_STREAMS:
case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
@ -476,6 +478,11 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
case PIPE_CAP_VENDOR_ID:

View file

@ -376,7 +376,7 @@ finalize_cbuf_state(struct ilo_context *ilo,
if (cbuf->cso[i].resource)
continue;
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size, 16,
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
@ -426,12 +426,12 @@ finalize_index_buffer(struct ilo_context *ilo)
unsigned hw_offset;
if (vec->ib.state.user_buffer) {
u_upload_data(ilo->uploader, 0, size,
u_upload_data(ilo->uploader, 0, size, 16,
vec->ib.state.user_buffer + offset,
&hw_offset, &vec->ib.hw_resource);
} else {
u_upload_buffer(ilo->uploader, 0,
vec->ib.state.offset + offset, size, vec->ib.state.buffer,
vec->ib.state.offset + offset, size, 16, vec->ib.state.buffer,
&hw_offset, &vec->ib.hw_resource);
}

View file

@ -266,7 +266,7 @@ fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
struct toy_inst *inst;
struct toy_src desc, real_src[4];
struct toy_dst tmp, real_dst[4];
int i;
unsigned i;
tsrc_transpose(idx, real_src);
@ -319,7 +319,7 @@ fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
const int grf_subreg = (idx.val32 & 1) * 16;
struct toy_src src;
struct toy_dst real_dst[4];
int i;
unsigned i;
if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
grf >= fcc->first_attr_grf)
@ -350,7 +350,7 @@ fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
struct toy_inst *inst;
struct toy_src desc;
struct toy_dst tmp, real_dst[4];
int i;
unsigned i;
if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
return;
@ -396,7 +396,7 @@ fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
struct toy_src desc;
struct toy_inst *inst;
struct toy_dst tmp, real_dst[4];
int i;
unsigned i;
if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
return;
@ -1168,7 +1168,7 @@ fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_dst dst[4];
struct toy_src src[4];
int i;
unsigned i;
tdst_transpose(inst->dst, dst);
tsrc_transpose(inst->src[0], src);
@ -1257,7 +1257,7 @@ fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
}
else {
struct toy_src src[4];
int i;
unsigned i;
tsrc_transpose(inst->src[0], src);
/* mask out killed pixels */
@ -1583,7 +1583,7 @@ fs_write_fb(struct fs_compile_context *fcc)
static void
fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
{
int i;
unsigned i;
sh->out.count = tgsi->num_outputs;
for (i = 0; i < tgsi->num_outputs; i++) {
@ -1603,7 +1603,7 @@ static void
fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
bool flatshade)
{
int i;
unsigned i;
sh->in.count = tgsi->num_inputs;
for (i = 0; i < tgsi->num_inputs; i++) {

View file

@ -126,7 +126,7 @@ vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
tc_MOV(tc, block_offsets, idx);
msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ;
msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;;
msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;
msg_len = 2;
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
@ -522,7 +522,7 @@ vs_prepare_tgsi_sampling(struct vs_compile_context *vcc,
if (num_coords >= 3) {
struct toy_dst tmp, max;
struct toy_src abs_coords[3];
int i;
unsigned i;
tmp = tc_alloc_tmp(tc);
max = tdst_writemask(tmp, TOY_WRITEMASK_W);
@ -804,7 +804,7 @@ static int
vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
{
const struct toy_tgsi *tgsi = &vcc->tgsi;
int i;
unsigned i;
for (i = 0; i < vcc->shader->out.count; i++) {
const int slot = vcc->output_map[i];

View file

@ -70,7 +70,7 @@ struct linear_scan {
static void
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
{
int i;
unsigned i;
for (i = 0; i < count; i++)
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
@ -221,7 +221,7 @@ linear_scan_spill(struct linear_scan *ls,
static void
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
{
int i;
unsigned i;
for (i = 0; i < count; i++) {
struct linear_scan_live_interval *interval = &ls->intervals[first + i];

View file

@ -1593,7 +1593,7 @@ ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst
tgsi_inst->Src[operand].Register.File;
switch (file) {
case TGSI_FILE_SAMPLER:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
type = TOY_TYPE_D;
break;
@ -1834,7 +1834,7 @@ ra_get_src_indirect(struct toy_tgsi *tgsi,
src = tsrc_null();
break;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
is_resource = true;
/* fall through */
@ -1918,7 +1918,7 @@ ra_get_src(struct toy_tgsi *tgsi,
need_vrf = true;
break;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
assert(!s->Register.Dimension);
src = tsrc_imm_d(s->Register.Index);
@ -2256,7 +2256,7 @@ parse_declaration(struct toy_tgsi *tgsi,
case TGSI_FILE_SAMPLER:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_IMAGE:
case TGSI_FILE_SAMPLER_VIEW:
/* nothing to do */
break;

View file

@ -63,8 +63,7 @@ enum lp_interp {
LP_INTERP_LINEAR,
LP_INTERP_PERSPECTIVE,
LP_INTERP_POSITION,
LP_INTERP_FACING,
LP_INTERP_ZERO
LP_INTERP_FACING
};
struct lp_shader_input {

View file

@ -108,28 +108,22 @@ struct llvmpipe_context {
struct vertex_info vertex_info;
/** Which vertex shader output slot contains color */
uint8_t color_slot[2];
int8_t color_slot[2];
/** Which vertex shader output slot contains bcolor */
uint8_t bcolor_slot[2];
int8_t bcolor_slot[2];
/** Which vertex shader output slot contains point size */
uint8_t psize_slot;
int8_t psize_slot;
/** Which vertex shader output slot contains viewport index */
uint8_t viewport_index_slot;
int8_t viewport_index_slot;
/** Which geometry shader output slot contains layer */
uint8_t layer_slot;
int8_t layer_slot;
/** A fake frontface output for unfilled primitives */
uint8_t face_slot;
/** Which output slot is used for the fake vp index info */
uint8_t fake_vpindex_slot;
/** Which output slot is used for the fake layer info */
uint8_t fake_layer_slot;
int8_t face_slot;
/** Depth format and bias settings. */
boolean floating_point_depth;

View file

@ -115,7 +115,7 @@ struct lp_rast_plane {
int32_t dcdy;
/* one-pixel sized trivial reject offsets for each plane */
int64_t eo;
uint32_t eo;
};
/**

View file

@ -133,36 +133,8 @@ lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
lp_rast_triangle_4(task, arg2);
}
#if !defined(PIPE_ARCH_SSE)
#if defined(PIPE_ARCH_SSE)
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
union lp_rast_cmd_arg arg2;
arg2.triangle.tri = arg.triangle.tri;
arg2.triangle.plane_mask = (1<<3)-1;
lp_rast_triangle_32_3(task, arg2);
}
void
lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
union lp_rast_cmd_arg arg2;
arg2.triangle.tri = arg.triangle.tri;
arg2.triangle.plane_mask = (1<<4)-1;
lp_rast_triangle_32_4(task, arg2);
}
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
lp_rast_triangle_32_3_16(task, arg);
}
#else
#include <emmintrin.h>
#include "util/u_sse.h"
@ -265,12 +237,6 @@ sign_bits4(const __m128i *cstep, int cdiff)
#define NR_PLANES 3
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
@ -381,10 +347,6 @@ lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
0xffff & ~out[i].mask);
}
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
@ -471,6 +433,254 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
}
#undef NR_PLANES
#else
#if defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
#include <altivec.h>
#include "util/u_pwr8.h"
static inline void
build_masks_32(int c,
int cdiff,
int dcdx,
int dcdy,
unsigned *outmask,
unsigned *partmask)
{
__m128i cstep0 = vec_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
__m128i xdcdy = (__m128i) vec_splats(dcdy);
/* Get values across the quad
*/
__m128i cstep1 = vec_add_epi32(cstep0, xdcdy);
__m128i cstep2 = vec_add_epi32(cstep1, xdcdy);
__m128i cstep3 = vec_add_epi32(cstep2, xdcdy);
{
__m128i cstep01, cstep23, result;
cstep01 = vec_packs_epi32(cstep0, cstep1);
cstep23 = vec_packs_epi32(cstep2, cstep3);
result = vec_packs_epi16(cstep01, cstep23);
*outmask |= vec_movemask_epi8(result);
}
{
__m128i cio4 = (__m128i) vec_splats(cdiff);
__m128i cstep01, cstep23, result;
cstep0 = vec_add_epi32(cstep0, cio4);
cstep1 = vec_add_epi32(cstep1, cio4);
cstep2 = vec_add_epi32(cstep2, cio4);
cstep3 = vec_add_epi32(cstep3, cio4);
cstep01 = vec_packs_epi32(cstep0, cstep1);
cstep23 = vec_packs_epi32(cstep2, cstep3);
result = vec_packs_epi16(cstep01, cstep23);
*partmask |= vec_movemask_epi8(result);
}
}
static inline unsigned
build_mask_linear_32(int c, int dcdx, int dcdy)
{
__m128i cstep0 = vec_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
__m128i xdcdy = (__m128i) vec_splats(dcdy);
/* Get values across the quad
*/
__m128i cstep1 = vec_add_epi32(cstep0, xdcdy);
__m128i cstep2 = vec_add_epi32(cstep1, xdcdy);
__m128i cstep3 = vec_add_epi32(cstep2, xdcdy);
/* pack pairs of results into epi16
*/
__m128i cstep01 = vec_packs_epi32(cstep0, cstep1);
__m128i cstep23 = vec_packs_epi32(cstep2, cstep3);
/* pack into epi8, preserving sign bits
*/
__m128i result = vec_packs_epi16(cstep01, cstep23);
/* extract sign bits to create mask
*/
return vec_movemask_epi8(result);
}
static inline __m128i
lp_plane_to_m128i(const struct lp_rast_plane *plane)
{
return vec_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
(int32_t)plane->dcdy, (int32_t)plane->eo);
}
#define NR_PLANES 3
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
int x = (arg.triangle.plane_mask & 0xff) + task->x;
int y = (arg.triangle.plane_mask >> 8) + task->y;
unsigned i, j;
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
unsigned nr = 0;
__m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
__m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
__m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
__m128i zero = vec_splats((unsigned char) 0);
__m128i c;
__m128i dcdx;
__m128i dcdy;
__m128i rej4;
__m128i dcdx2;
__m128i dcdx3;
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
__m128i vshuf_mask0;
__m128i vshuf_mask1;
__m128i vshuf_mask2;
#ifdef PIPE_ARCH_LITTLE_ENDIAN
vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x03020100);
vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x07060504);
vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x0B0A0908);
#else
vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x0C0D0E0F);
vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x08090A0B);
vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x04050607);
#endif
transpose4_epi32(&p0, &p1, &p2, &zero,
&c, &dcdx, &dcdy, &rej4);
/* Adjust dcdx;
*/
dcdx = vec_sub_epi32(zero, dcdx);
c = vec_add_epi32(c, vec_mullo_epi32(dcdx, (__m128i) vec_splats(x)));
c = vec_add_epi32(c, vec_mullo_epi32(dcdy, (__m128i) vec_splats(y)));
rej4 = vec_slli_epi32(rej4, 2);
/*
* Adjust so we can just check the sign bit (< 0 comparison),
* instead of having to do a less efficient <= 0 comparison
*/
c = vec_sub_epi32(c, (__m128i) vec_splats((unsigned int) 1));
rej4 = vec_add_epi32(rej4, (__m128i) vec_splats((unsigned int) 1));
dcdx2 = vec_add_epi32(dcdx, dcdx);
dcdx3 = vec_add_epi32(dcdx2, dcdx);
transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
&span_0, &span_1, &span_2, &unused);
for (i = 0; i < 4; i++) {
__m128i cx = c;
for (j = 0; j < 4; j++) {
__m128i c4rej = vec_add_epi32(cx, rej4);
__m128i rej_masks = vec_srai_epi32(c4rej, 31);
/* if (is_zero(rej_masks)) */
if (vec_movemask_epi8(rej_masks) == 0) {
__m128i c0_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask0), span_0);
__m128i c1_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask1), span_1);
__m128i c2_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask2), span_2);
__m128i c_0 = vec_or(vec_or(c0_0, c1_0), c2_0);
__m128i c0_1 = vec_add_epi32(c0_0, vec_perm(dcdy, dcdy, vshuf_mask0));
__m128i c1_1 = vec_add_epi32(c1_0, vec_perm(dcdy, dcdy, vshuf_mask1));
__m128i c2_1 = vec_add_epi32(c2_0, vec_perm(dcdy, dcdy, vshuf_mask2));
__m128i c_1 = vec_or(vec_or(c0_1, c1_1), c2_1);
__m128i c_01 = vec_packs_epi32(c_0, c_1);
__m128i c0_2 = vec_add_epi32(c0_1, vec_perm(dcdy, dcdy, vshuf_mask0));
__m128i c1_2 = vec_add_epi32(c1_1, vec_perm(dcdy, dcdy, vshuf_mask1));
__m128i c2_2 = vec_add_epi32(c2_1, vec_perm(dcdy, dcdy, vshuf_mask2));
__m128i c_2 = vec_or(vec_or(c0_2, c1_2), c2_2);
__m128i c0_3 = vec_add_epi32(c0_2, vec_perm(dcdy, dcdy, vshuf_mask0));
__m128i c1_3 = vec_add_epi32(c1_2, vec_perm(dcdy, dcdy, vshuf_mask1));
__m128i c2_3 = vec_add_epi32(c2_2, vec_perm(dcdy, dcdy, vshuf_mask2));
__m128i c_3 = vec_or(vec_or(c0_3, c1_3), c2_3);
__m128i c_23 = vec_packs_epi32(c_2, c_3);
__m128i c_0123 = vec_packs_epi16(c_01, c_23);
unsigned mask = vec_movemask_epi8(c_0123);
out[nr].i = i;
out[nr].j = j;
out[nr].mask = mask;
if (mask != 0xffff)
nr++;
}
cx = vec_add_epi32(cx, vec_slli_epi32(dcdx, 2));
}
c = vec_add_epi32(c, vec_slli_epi32(dcdy, 2));
}
for (i = 0; i < nr; i++)
lp_rast_shade_quads_mask(task,
&tri->inputs,
x + 4 * out[i].j,
y + 4 * out[i].i,
0xffff & ~out[i].mask);
}
#undef NR_PLANES
#else
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
union lp_rast_cmd_arg arg2;
arg2.triangle.tri = arg.triangle.tri;
arg2.triangle.plane_mask = (1<<3)-1;
lp_rast_triangle_32_3(task, arg2);
}
#endif /* _ARCH_PWR8 && PIPE_ARCH_LITTLE_ENDIAN */
void
lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
union lp_rast_cmd_arg arg2;
arg2.triangle.tri = arg.triangle.tri;
arg2.triangle.plane_mask = (1<<4)-1;
lp_rast_triangle_32_4(task, arg2);
}
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
lp_rast_triangle_32_3_16(task, arg);
}
#endif
@ -512,7 +722,7 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
#define NR_PLANES 8
#include "lp_rast_tri_tmp.h"
#ifdef PIPE_ARCH_SSE
#if defined(PIPE_ARCH_SSE) || (defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN))
#undef BUILD_MASKS
#undef BUILD_MASK_LINEAR
#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask)

View file

@ -82,7 +82,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int64_t dcdx = -IMUL64(plane[j].dcdx, 4);
const int64_t dcdy = IMUL64(plane[j].dcdy, 4);
const int64_t cox = IMUL64(plane[j].eo, 4);
const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
const int64_t cio = IMUL64(ei, 4) - 1;
BUILD_MASKS(c[j] + cox,
@ -182,7 +182,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
const int64_t dcdx = -IMUL64(plane[j].dcdx, 16);
const int64_t dcdy = IMUL64(plane[j].dcdy, 16);
const int64_t cox = IMUL64(plane[j].eo, 16);
const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
const int64_t ei = plane[j].dcdy - plane[j].dcdx - (int64_t)plane[j].eo;
const int64_t cio = IMUL64(ei, 16) - 1;
BUILD_MASKS(c[j] + cox,

View file

@ -301,6 +301,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 0;
}
/* should only get here on unhandled cases */

View file

@ -486,6 +486,11 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
depth,
stencil);
/*
* XXX: should make a full mask here for things like D24X8,
* otherwise we'll do a read-modify-write clear later which
* should be unnecessary.
*/
zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
zmask32,
smask8);

View file

@ -105,10 +105,10 @@ struct lp_setup_context
float pixel_offset;
float line_width;
float point_size;
uint8_t psize_slot;
uint8_t viewport_index_slot;
uint8_t layer_slot;
uint8_t face_slot;
int8_t psize_slot;
int8_t viewport_index_slot;
int8_t layer_slot;
int8_t face_slot;
struct pipe_framebuffer_state fb;
struct u_rect framebuffer;

View file

@ -644,19 +644,25 @@ try_setup_line( struct lp_setup_context *setup,
line->inputs.layer = layer;
line->inputs.viewport_index = viewport_index;
/*
* XXX: this code is mostly identical to the one in lp_setup_tri, except it
* uses 4 planes instead of 3. Could share the code (including the sse
* assembly, in fact we'd get the 4th plane for free).
* The only difference apart from storing the 4th plane would be some
* different shuffle for calculating dcdx/dcdy.
*/
for (i = 0; i < 4; i++) {
/* half-edge constants, will be interated over the whole render
/* half-edge constants, will be iterated over the whole render
* target.
*/
plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]);
/* correct for top-left vs. bottom-left fill convention.
*/
/* correct for top-left vs. bottom-left fill convention.
*/
if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
plane[i].c++;
plane[i].c++;
}
else if (plane[i].dcdx == 0) {
if (setup->pixel_offset == 0) {

View file

@ -46,6 +46,9 @@
#if defined(PIPE_ARCH_SSE)
#include <emmintrin.h>
#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
#include <altivec.h>
#include "util/u_pwr8.h"
#endif
static inline int
@ -387,25 +390,21 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane = GET_PLANES(tri);
#if defined(PIPE_ARCH_SSE)
if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
setup->fb.height <= MAX_FIXED_LENGTH32 &&
(bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
(bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
if (1) {
__m128i vertx, verty;
__m128i shufx, shufy;
__m128i dcdx, dcdy, c;
__m128i unused;
__m128i dcdx, dcdy;
__m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
__m128i c01, c23, unused;
__m128i dcdx_neg_mask;
__m128i dcdy_neg_mask;
__m128i dcdx_zero_mask;
__m128i top_left_flag;
__m128i c_inc_mask, c_inc;
__m128i top_left_flag, c_dec;
__m128i eo, p0, p1, p2;
__m128i zero = _mm_setzero_si128();
PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
@ -419,42 +418,161 @@ do_triangle_ccw(struct lp_setup_context *setup,
top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
c_inc_mask = _mm_or_si128(dcdx_neg_mask,
_mm_and_si128(dcdx_zero_mask,
_mm_xor_si128(dcdy_neg_mask,
top_left_flag)));
c_dec = _mm_or_si128(dcdx_neg_mask,
_mm_and_si128(dcdx_zero_mask,
_mm_xor_si128(dcdy_neg_mask,
top_left_flag)));
c_inc = _mm_srli_epi32(c_inc_mask, 31);
/*
* 64 bit arithmetic.
* Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
*/
cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
c02 = _mm_sub_epi64(cdx02, cdy02);
c13 = _mm_sub_epi64(cdx13, cdy13);
c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
_MM_SHUFFLE(2,2,0,0)));
c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
_MM_SHUFFLE(3,3,1,1)));
c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
mm_mullo_epi32(dcdy, verty));
c = _mm_add_epi32(c, c_inc);
/*
* Useful for very small fbs/tris (or fewer subpixel bits) only:
* c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
* mm_mullo_epi32(dcdy, verty));
*
* c = _mm_sub_epi32(c, c_dec);
*/
/* Scale up to match c:
*/
dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
/* Calculate trivial reject values:
/*
* Calculate trivial reject values:
* Note eo cannot overflow even if dcdx/dcdy would already have
* 31 bits (which they shouldn't have). This is because eo
* is never negative (albeit if we rely on that need to be careful...)
*/
eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
_mm_and_si128(dcdx_neg_mask, dcdx));
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
/*
* Pointless transpose which gets undone immediately in
* rasterization.
* It is actually difficult to do away with it - would essentially
* need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
* for this then would need to depend on the number of planes.
* The transpose is quite special here due to c being 64bit...
* The store has to be unaligned (unless we'd make the plane size
* a multiple of 128), and of course storing eo separately...
*/
c01 = _mm_unpacklo_epi64(c02, c13);
c23 = _mm_unpackhi_epi64(c02, c13);
transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
&p0, &p1, &p2, &unused);
_mm_storeu_si128((__m128i *)&plane[0], p0);
plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
_mm_storeu_si128((__m128i *)&plane[1], p1);
eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
_mm_storeu_si128((__m128i *)&plane[2], p2);
eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
} else
#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
/*
* XXX this code is effectively disabled for all practical purposes,
* as the allowed fb size is tiny if FIXED_ORDER is 8.
*/
if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
setup->fb.height <= MAX_FIXED_LENGTH32 &&
(bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
(bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
unsigned int bottom_edge;
__m128i vertx, verty;
__m128i shufx, shufy;
__m128i dcdx, dcdy, c;
__m128i unused;
__m128i dcdx_neg_mask;
__m128i dcdy_neg_mask;
__m128i dcdx_zero_mask;
__m128i top_left_flag;
__m128i c_inc_mask, c_inc;
__m128i eo, p0, p1, p2;
__m128i_union vshuf_mask;
__m128i zero = vec_splats((unsigned char) 0);
PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
#ifdef PIPE_ARCH_LITTLE_ENDIAN
vshuf_mask.i[0] = 0x07060504;
vshuf_mask.i[1] = 0x0B0A0908;
vshuf_mask.i[2] = 0x03020100;
vshuf_mask.i[3] = 0x0F0E0D0C;
#else
vshuf_mask.i[0] = 0x00010203;
vshuf_mask.i[1] = 0x0C0D0E0F;
vshuf_mask.i[2] = 0x04050607;
vshuf_mask.i[3] = 0x08090A0B;
#endif
/* vertex x coords */
vertx = vec_load_si128((const uint32_t *) position->x);
/* vertex y coords */
verty = vec_load_si128((const uint32_t *) position->y);
shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
shufy = vec_perm (verty, verty, vshuf_mask.m128i);
dcdx = vec_sub_epi32(verty, shufy);
dcdy = vec_sub_epi32(vertx, shufx);
dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
top_left_flag = (__m128i) vec_splats(bottom_edge);
c_inc_mask = vec_or(dcdx_neg_mask,
vec_and(dcdx_zero_mask,
vec_xor(dcdy_neg_mask,
top_left_flag)));
c_inc = vec_srli_epi32(c_inc_mask, 31);
c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
vec_mullo_epi32(dcdy, verty));
c = vec_add_epi32(c, c_inc);
/* Scale up to match c:
*/
dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
/* Calculate trivial reject values:
*/
eo = vec_sub_epi32(vec_andc(dcdy_neg_mask, dcdy),
vec_and(dcdx_neg_mask, dcdx));
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
/* Pointless transpose which gets undone immediately in
* rasterization:
*/
transpose4_epi32(&c, &dcdx, &dcdy, &eo,
&p0, &p1, &p2, &unused);
#define STORE_PLANE(plane, vec) do { \
_mm_store_si128((__m128i *)&temp_vec, vec); \
plane.c = (int64_t)temp_vec[0]; \
plane.dcdx = temp_vec[1]; \
plane.dcdy = temp_vec[2]; \
plane.eo = temp_vec[3]; \
#define STORE_PLANE(plane, vec) do { \
vec_store_si128((uint32_t *)&temp_vec, vec); \
plane.c = (int64_t)temp_vec[0]; \
plane.dcdx = temp_vec[1]; \
plane.dcdy = temp_vec[2]; \
plane.eo = temp_vec[3]; \
} while(0)
STORE_PLANE(plane[0], p0);
@ -473,17 +591,17 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane[2].dcdx = position->dy20;
for (i = 0; i < 3; i++) {
/* half-edge constants, will be interated over the whole render
/* half-edge constants, will be iterated over the whole render
* target.
*/
plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
IMUL64(plane[i].dcdy, position->y[i]);
IMUL64(plane[i].dcdy, position->y[i]);
/* correct for top-left vs. bottom-left fill convention.
*/
*/
if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
plane[i].c++;
plane[i].c++;
}
else if (plane[i].dcdx == 0) {
if (setup->bottom_edge_rule == 0){
@ -517,19 +635,19 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
if (0) {
debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
plane[0].c,
plane[0].dcdx,
plane[0].dcdy,
plane[0].eo);
debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
plane[1].c,
plane[1].dcdx,
plane[1].dcdy,
plane[1].eo);
debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
plane[2].c,
plane[2].dcdx,
plane[2].dcdy,
@ -590,7 +708,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
static inline uint32_t
floor_pot(uint32_t n)
{
#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
if (n == 0)
return 0;
@ -738,9 +856,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
ei[i] = (plane[i].dcdy -
plane[i].dcdx -
plane[i].eo) << TILE_ORDER;
(int64_t)plane[i].eo) << TILE_ORDER;
eo[i] = plane[i].eo << TILE_ORDER;
eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
}
@ -932,12 +1050,12 @@ rotate_fixed_position_12( struct fixed_position* position )
/**
* Draw triangle if it's CW, cull otherwise.
*/
static void triangle_cw( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
static void triangle_cw(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4])
{
struct fixed_position position;
PIPE_ALIGN_VAR(16) struct fixed_position position;
calc_fixed_position(setup, &position, v0, v1, v2);
@ -953,12 +1071,12 @@ static void triangle_cw( struct lp_setup_context *setup,
}
static void triangle_ccw( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4])
static void triangle_ccw(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4])
{
struct fixed_position position;
PIPE_ALIGN_VAR(16) struct fixed_position position;
calc_fixed_position(setup, &position, v0, v1, v2);
@ -969,12 +1087,12 @@ static void triangle_ccw( struct lp_setup_context *setup,
/**
* Draw triangle whether it's CW or CCW.
*/
static void triangle_both( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
static void triangle_both(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4])
{
struct fixed_position position;
PIPE_ALIGN_VAR(16) struct fixed_position position;
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
if (lp_context->active_statistics_queries &&

View file

@ -48,21 +48,26 @@
static void
compute_vertex_info(struct llvmpipe_context *llvmpipe)
{
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
const struct tgsi_shader_info *fsInfo = &llvmpipe->fs->info.base;
struct vertex_info *vinfo = &llvmpipe->vertex_info;
int vs_index;
uint i;
draw_prepare_shader_outputs(llvmpipe->draw);
llvmpipe->color_slot[0] = 0;
llvmpipe->color_slot[1] = 0;
llvmpipe->bcolor_slot[0] = 0;
llvmpipe->bcolor_slot[1] = 0;
llvmpipe->viewport_index_slot = 0;
llvmpipe->layer_slot = 0;
llvmpipe->face_slot = 0;
llvmpipe->psize_slot = 0;
/*
* Those can't actually be 0 (because pos is always at 0).
* But use ints anyway to avoid confusion (in vs outputs, they
* can very well be at pos 0).
*/
llvmpipe->color_slot[0] = -1;
llvmpipe->color_slot[1] = -1;
llvmpipe->bcolor_slot[0] = -1;
llvmpipe->bcolor_slot[1] = -1;
llvmpipe->viewport_index_slot = -1;
llvmpipe->layer_slot = -1;
llvmpipe->face_slot = -1;
llvmpipe->psize_slot = -1;
/*
* Match FS inputs against VS outputs, emitting the necessary
@ -73,60 +78,49 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
vinfo->num_attribs = 0;
vs_index = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_POSITION,
0);
TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
for (i = 0; i < lpfs->info.base.num_inputs; i++) {
for (i = 0; i < fsInfo->num_inputs; i++) {
/*
* Search for each input in current vs output:
*/
vs_index = draw_find_shader_output(llvmpipe->draw,
lpfs->info.base.input_semantic_name[i],
lpfs->info.base.input_semantic_index[i]);
fsInfo->input_semantic_name[i],
fsInfo->input_semantic_index[i]);
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
lpfs->info.base.input_semantic_index[i] < 2) {
int idx = lpfs->info.base.input_semantic_index[i];
llvmpipe->color_slot[idx] = vinfo->num_attribs;
if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
fsInfo->input_semantic_index[i] < 2) {
int idx = fsInfo->input_semantic_index[i];
llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
}
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
llvmpipe->face_slot = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
llvmpipe->face_slot = (int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
/*
* For vp index and layer, if the fs requires them but the vs doesn't
* provide them, store the slot - we'll later replace the data directly
* with zero (as required by ARB_fragment_layer_viewport). This is
* because draw itself just redirects them to whatever was at output 0.
* We'll also store the real vpindex/layer slot for setup use.
* provide them, draw (vbuf) will give us the required 0 (slot -1).
* (This means in this case we'll also use those slots in setup, which
* isn't necessary but they'll contain the correct (0) value.)
*/
} else if (lpfs->info.base.input_semantic_name[i] ==
} else if (fsInfo->input_semantic_name[i] ==
TGSI_SEMANTIC_VIEWPORT_INDEX) {
if (vs_index >= 0) {
llvmpipe->viewport_index_slot = vinfo->num_attribs;
}
else {
llvmpipe->fake_vpindex_slot = vinfo->num_attribs;
}
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
if (vs_index >= 0) {
llvmpipe->layer_slot = vinfo->num_attribs;
}
else {
llvmpipe->fake_layer_slot = vinfo->num_attribs;
}
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
llvmpipe->viewport_index_slot = (int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
} else if (fsInfo->input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
llvmpipe->layer_slot = (int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
} else {
/*
* Emit the requested fs attribute for all but position.
* Note that we'd actually want to skip position (as we won't use
* the attribute in the fs) but can't. The reason is that we don't
* actually have a input/output map for setup (even though it looks
* like we do...). Could adjust for this though even without a map
* (in llvmpipe_create_fs_state()).
*/
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
@ -137,8 +131,8 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
TGSI_SEMANTIC_BCOLOR, i);
if (vs_index >= 0) {
llvmpipe->bcolor_slot[i] = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
@ -148,29 +142,29 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
TGSI_SEMANTIC_PSIZE, 0);
if (vs_index >= 0) {
llvmpipe->psize_slot = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
llvmpipe->psize_slot = (int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
/* Figure out if we need viewport index (if it wasn't already in fs input) */
if (llvmpipe->viewport_index_slot == 0) {
if (llvmpipe->viewport_index_slot < 0) {
vs_index = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_VIEWPORT_INDEX,
0);
if (vs_index >= 0) {
llvmpipe->viewport_index_slot = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
llvmpipe->viewport_index_slot =(int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
/* Figure out if we need layer (if it wasn't already in fs input) */
if (llvmpipe->layer_slot == 0) {
if (llvmpipe->layer_slot < 0) {
vs_index = draw_find_shader_output(llvmpipe->draw,
TGSI_SEMANTIC_LAYER,
0);
if (vs_index >= 0) {
llvmpipe->layer_slot = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
llvmpipe->layer_slot = (int)vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, vs_index);
}
}
@ -197,10 +191,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
}
if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
LP_NEW_FS |
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_VS))
compute_vertex_info( llvmpipe );
compute_vertex_info(llvmpipe);
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_FRAMEBUFFER |

View file

@ -2695,34 +2695,35 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
switch (shader->info.base.input_interpolate[i]) {
case TGSI_INTERPOLATE_CONSTANT:
shader->inputs[i].interp = LP_INTERP_CONSTANT;
break;
shader->inputs[i].interp = LP_INTERP_CONSTANT;
break;
case TGSI_INTERPOLATE_LINEAR:
shader->inputs[i].interp = LP_INTERP_LINEAR;
break;
shader->inputs[i].interp = LP_INTERP_LINEAR;
break;
case TGSI_INTERPOLATE_PERSPECTIVE:
shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
break;
shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
break;
case TGSI_INTERPOLATE_COLOR:
shader->inputs[i].interp = LP_INTERP_COLOR;
break;
shader->inputs[i].interp = LP_INTERP_COLOR;
break;
default:
assert(0);
break;
assert(0);
break;
}
switch (shader->info.base.input_semantic_name[i]) {
case TGSI_SEMANTIC_FACE:
shader->inputs[i].interp = LP_INTERP_FACING;
break;
shader->inputs[i].interp = LP_INTERP_FACING;
break;
case TGSI_SEMANTIC_POSITION:
/* Position was already emitted above
*/
shader->inputs[i].interp = LP_INTERP_POSITION;
shader->inputs[i].src_index = 0;
continue;
/* Position was already emitted above
*/
shader->inputs[i].interp = LP_INTERP_POSITION;
shader->inputs[i].src_index = 0;
continue;
}
/* XXX this is a completely pointless index map... */
shader->inputs[i].src_index = i+1;
}

View file

@ -372,9 +372,9 @@ load_attribute(struct gallivm_state *gallivm,
/* Potentially modify it according to twoside, etc:
*/
if (key->twoside) {
if (vert_attr == key->color_slot && key->bcolor_slot > 0)
if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
else if (vert_attr == key->spec_slot && key->bspec_slot > 0)
else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
}
}
@ -602,13 +602,6 @@ emit_tri_coef( struct gallivm_state *gallivm,
*/
break;
case LP_INTERP_ZERO:
/*
* The information we get from the output is bogus, replace it
* with zero.
*/
emit_constant_coef4(gallivm, args, slot+1, args->bld.zero);
break;
case LP_INTERP_FACING:
emit_facing_coef(gallivm, args, slot+1);
break;
@ -879,13 +872,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
key->pad = 0;
memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
for (i = 0; i < key->num_inputs; i++) {
if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
if (key->inputs[i].src_index == lp->fake_vpindex_slot ||
key->inputs[i].src_index == lp->fake_layer_slot) {
key->inputs[i].interp = LP_INTERP_ZERO;
}
}
else if (key->inputs[i].interp == LP_INTERP_COLOR) {
if (key->inputs[i].interp == LP_INTERP_COLOR) {
if (lp->rasterizer->flatshade)
key->inputs[i].interp = LP_INTERP_CONSTANT;
else

View file

@ -17,10 +17,10 @@ struct lp_setup_variant_list_item
struct lp_setup_variant_key {
unsigned size:16;
unsigned num_inputs:8;
unsigned color_slot:8;
unsigned bcolor_slot:8;
unsigned spec_slot:8;
unsigned bspec_slot:8;
int color_slot:8;
int bcolor_slot:8;
int spec_slot:8;
int bspec_slot:8;
unsigned flatshade_first:1;
unsigned pixel_center_half:1;
unsigned twoside:1;

View file

@ -184,7 +184,7 @@ add_blend_test(struct gallivm_state *gallivm,
LLVMBuildStore(builder, res, res_ptr);
LLVMBuildRetVoid(builder);;
LLVMBuildRetVoid(builder);
gallivm_verify_function(gallivm, func);

View file

@ -140,7 +140,7 @@ add_conv_test(struct gallivm_state *gallivm,
LLVMBuildStore(builder, dst[i], ptr);
}
LLVMBuildRetVoid(builder);;
LLVMBuildRetVoid(builder);
gallivm_verify_function(gallivm, func);

View file

@ -390,6 +390,9 @@ enum SVSemantic
SV_VERTEX_STRIDE,
SV_INVOCATION_INFO,
SV_THREAD_KILL,
SV_BASEVERTEX,
SV_BASEINSTANCE,
SV_DRAWID,
SV_UNDEFINED,
SV_LAST
};

View file

@ -124,6 +124,7 @@ struct nv50_ir_prog_info
union {
struct {
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
bool usesDrawParameters;
} vp;
struct {
uint8_t inputPatchSize;
@ -160,8 +161,9 @@ struct nv50_ir_prog_info
uint8_t clipDistances; /* number of clip distance outputs */
uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
uint16_t ucpBase; /* base address for UCPs */
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
uint16_t drawInfoBase; /* base address for draw parameters */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */

View file

@ -740,6 +740,7 @@ CodeEmitterGM107::emitF2F()
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
emitFMZ (0x2c, 1);
emitField(0x29, 1, insn->subOp);
emitRND (0x27, rnd, 0x2a);
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));

View file

@ -1030,7 +1030,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
// for 8/16 source types, the byte/word is in subOp. word 1 is
// represented as 2.
code[1] |= i->subOp << 0x17;
if (!isFloatType(i->sType))
code[1] |= i->subOp << 0x17;
else
code[1] |= i->subOp << 0x18;
if (sat)
code[0] |= 0x20;

View file

@ -319,6 +319,10 @@ unsigned int Instruction::srcMask(unsigned int s) const
x |= 2;
return x;
}
case TGSI_OPCODE_PK2H:
return 0x3;
case TGSI_OPCODE_UP2H:
return 0x1;
default:
break;
}
@ -348,7 +352,7 @@ static nv50_ir::DataFile translateFile(uint file)
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
//case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
@ -377,6 +381,9 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
default:
assert(0);
return nv50_ir::SV_CLOCK;
@ -449,6 +456,7 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_UBFE:
case TGSI_OPCODE_UMSB:
case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
@ -513,10 +521,12 @@ nv50_ir::DataType Instruction::inferDstType() const
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
case TGSI_OPCODE_PK2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
@ -861,7 +871,7 @@ bool Source::scanSource()
clipVertexOutput = -1;
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
info->immd.bufSize = 0;
@ -1128,6 +1138,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_SAMPLEPOS:
info->prop.fp.sampleInterp = 1;
break;
case TGSI_SEMANTIC_BASEVERTEX:
case TGSI_SEMANTIC_BASEINSTANCE:
case TGSI_SEMANTIC_DRAWID:
info->prop.vp.usesDrawParameters = true;
break;
default:
break;
}
@ -1144,6 +1159,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
}
}
break;
/*
case TGSI_FILE_RESOURCE:
for (i = first; i <= last; ++i) {
resources[i].target = decl->Resource.Resource;
@ -1151,6 +1167,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
resources[i].slot = i;
}
break;
*/
case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i)
textureViews[i].target = decl->SamplerView.Resource;
@ -1216,11 +1233,13 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (src.isIndirect(0))
mainTempsInLMem = true;
} else
/*
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
@ -1271,9 +1290,11 @@ Instruction::getTexture(const tgsi::Source *code, int s) const
unsigned int r;
switch (getSrc(s).getFile()) {
/*
case TGSI_FILE_RESOURCE:
r = getSrc(s).getIndex(0);
return translateTexture(code->resources.at(r).target);
*/
case TGSI_FILE_SAMPLER_VIEW:
r = getSrc(s).getIndex(0);
return translateTexture(code->textureViews.at(r).target);
@ -1639,8 +1660,6 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
return interpolate(src, c, shiftAddress(ptr));
} else
if (prog->getType() == Program::TYPE_GEOMETRY) {
@ -1681,7 +1700,7 @@ Converter::acquireDst(int d, int c)
const int idx = dst.getIndex(0);
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
return NULL;
if (dst.isIndirect(0) ||
@ -2799,6 +2818,21 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
case TGSI_OPCODE_PK2H:
val0 = getScratch();
val1 = getScratch();
mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
break;
case TGSI_OPCODE_UP2H:
src0 = fetchSrc(0, 0);
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
geni->subOp = c & 1;
}
break;
case TGSI_OPCODE_EMIT:
/* export the saved viewport index */
if (viewport != NULL) {
@ -3252,7 +3286,7 @@ Converter::handleUserClipPlanes()
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
if (c == 0)

View file

@ -1576,6 +1576,17 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
break;
case SV_BASEVERTEX:
case SV_BASEINSTANCE:
case SV_DRAWID:
ld = bld.mkLoad(TYPE_U32, i->getDef(0),
bld.mkSymbol(FILE_MEMORY_CONST,
prog->driver->io.auxCBSlot,
TYPE_U32,
prog->driver->io.drawInfoBase +
4 * (sv - SV_BASEVERTEX)),
NULL);
break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));

View file

@ -676,23 +676,22 @@ ConstantFolding::expr(Instruction *i,
switch (i->op) {
case OP_MAD:
case OP_FMA: {
i->op = OP_ADD;
ImmediateValue src0, src1 = *i->getSrc(0)->asImm();
/* Move the immediate to the second arg, otherwise the ADD operation
* won't be emittable
*/
i->setSrc(1, i->getSrc(0));
// Move the immediate into position 1, where we know it might be
// emittable. However it might not be anyways, as there may be other
// restrictions, so move it into a separate LValue.
bld.setPosition(i, false);
i->op = OP_ADD;
i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0));
i->setSrc(0, i->getSrc(2));
i->src(0).mod = i->src(2).mod;
i->setSrc(2, NULL);
ImmediateValue src0;
if (i->src(0).getImmediate(src0))
expr(i, src0, *i->getSrc(1)->asImm());
if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
bld.setPosition(i, false);
i->setSrc(1, bld.loadImm(NULL, res.data.u32));
}
expr(i, src0, src1);
else
opnd(i, src1, 1);
break;
}
case OP_PFETCH:

View file

@ -295,6 +295,9 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
case SV_SAMPLE_MASK: return 0;
case SV_BASEVERTEX: return 0;
case SV_BASEINSTANCE: return 0;
case SV_DRAWID: return 0;
default:
return 0xffffffff;
}

Some files were not shown because too many files have changed in this diff Show more