Merge remote-tracking branch 'mesa-public/master' into vulkan

This commit is contained in:
Jason Ekstrand 2015-10-21 17:40:13 -07:00
commit fed60e3c73
102 changed files with 2593 additions and 1232 deletions

View file

@ -108,6 +108,8 @@ AC_SYS_LARGEFILE
LT_PREREQ([2.2])
LT_INIT([disable-static])
AC_CHECK_PROG(RM, rm, [rm -f])
AX_PROG_BISON([],
AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
[AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))

View file

@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe, softpipe)
GL_ARB_texture_view DONE (i965, nv50, nvc0, radeonsi, llvmpipe, softpipe)
GL_ARB_vertex_attrib_binding DONE (all drivers)

View file

@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
<li>GL_ARB_texture_query_lod on softpipe</li>
<li>GL_ARB_texture_view on radeonsi</li>
<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
</ul>

View file

@ -355,8 +355,9 @@ struct draw_vertex_info {
};
/* these flags are set if the primitive is a segment of a larger one */
#define DRAW_SPLIT_BEFORE 0x1
#define DRAW_SPLIT_AFTER 0x2
#define DRAW_SPLIT_BEFORE 0x1
#define DRAW_SPLIT_AFTER 0x2
#define DRAW_LINE_LOOP_AS_STRIP 0x4
struct draw_prim_info {
boolean linear;

View file

@ -359,6 +359,16 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
}
static inline unsigned
prim_type(unsigned prim, unsigned flags)
{
if (flags & DRAW_LINE_LOOP_AS_STRIP)
return PIPE_PRIM_LINE_STRIP;
else
return prim;
}
static void
fetch_pipeline_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
@ -380,7 +390,7 @@ fetch_pipeline_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@ -408,7 +418,7 @@ fetch_pipeline_linear_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@ -439,7 +449,7 @@ fetch_pipeline_linear_run_elts(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;

View file

@ -473,6 +473,16 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
}
static inline unsigned
prim_type(unsigned prim, unsigned flags)
{
if (flags & DRAW_LINE_LOOP_AS_STRIP)
return PIPE_PRIM_LINE_STRIP;
else
return prim;
}
static void
llvm_middle_end_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
@ -494,7 +504,7 @@ llvm_middle_end_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@ -522,7 +532,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@ -552,7 +562,7 @@ llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;

View file

@ -249,6 +249,9 @@ vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
assert(icount + !!close_loop <= vsplit->segment_size);
/* need to draw the sections of the line loop as line strips */
flags |= DRAW_LINE_LOOP_AS_STRIP;
if (close_loop) {
for (nr = 0; nr < icount; nr++)
vsplit->fetch_elts[nr] = istart + nr;

View file

@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
/* if we get here, we missed a shader cap above (and should have seen
* a compiler warning.)

View file

@ -474,6 +474,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
/* if we get here, we missed a shader cap above (and should have seen
* a compiler warning.)

View file

@ -369,19 +369,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
procType == TGSI_PROCESSOR_GEOMETRY ||
procType == TGSI_PROCESSOR_TESS_CTRL ||
procType == TGSI_PROCESSOR_TESS_EVAL) {
if (semName == TGSI_SEMANTIC_CLIPDIST) {
info->num_written_clipdistance +=
util_bitcount(fulldecl->Declaration.UsageMask);
info->clipdist_writemask |=
fulldecl->Declaration.UsageMask << (semIndex*4);
}
else if (semName == TGSI_SEMANTIC_CULLDIST) {
info->num_written_culldistance +=
util_bitcount(fulldecl->Declaration.UsageMask);
info->culldist_writemask |=
fulldecl->Declaration.UsageMask << (semIndex*4);
}
else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
info->writes_viewport_index = TRUE;
}
else if (semName == TGSI_SEMANTIC_LAYER) {
@ -432,9 +420,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
const struct tgsi_full_property *fullprop
= &parse.FullToken.FullProperty;
unsigned name = fullprop->Property.PropertyName;
unsigned value = fullprop->u[0].Data;
assert(name < Elements(info->properties));
info->properties[name] = fullprop->u[0].Data;
info->properties[name] = value;
switch (name) {
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
info->num_written_clipdistance = value;
info->clipdist_writemask |= (1 << value) - 1;
break;
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
info->num_written_culldistance = value;
info->culldist_writemask |= (1 << value) - 1;
break;
}
}
break;

View file

@ -137,6 +137,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"TES_SPACING",
"TES_VERTEX_ORDER_CW",
"TES_POINT_MODE",
"NUM_CLIPDIST_ENABLED",
"NUM_CULLDIST_ENABLED",
};
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =

View file

@ -276,6 +276,8 @@ The integer capabilities:
GL4 hardware will likely need to emulate it with a shader variant, or by
selecting the interpolation weights with a conditional assignment
in the shader.
* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any
pipe_context.
@ -365,6 +367,10 @@ to be 0.
are supported.
* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
ignore tgsi_declaration_range::Last for shader inputs and outputs.
* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number
of iterations that loops are allowed to have to be unrolled. It is only
a hint to state trackers. Whether any loops will be unrolled is not
guaranteed.
.. _pipe_compute_cap:

View file

@ -3126,6 +3126,16 @@ TES_POINT_MODE
If set to a non-zero value, this turns on point mode for the tessellator,
which means that points will be generated instead of primitives.
NUM_CLIPDIST_ENABLED
""""""""""""""""
How many clip distance scalar outputs are enabled.
NUM_CULLDIST_ENABLED
""""""""""""""""
How many cull distance scalar outputs are enabled.
Texture Sampling and Texture Formats
------------------------------------

View file

@ -237,6 +237,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@ -411,6 +412,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
debug_printf("unknown shader param %d\n", param);
return 0;

View file

@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
return 0;
@ -249,6 +251,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:

View file

@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
return 0;
@ -471,6 +473,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:

View file

@ -298,6 +298,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
}
/* should only get here on unhandled cases */

View file

@ -171,6 +171,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@ -263,6 +264,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
debug_printf("unknown vertex shader param %d\n", param);
return 0;
@ -304,6 +307,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
debug_printf("unknown fragment shader param %d\n", param);
return 0;

View file

@ -216,6 +216,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@ -299,6 +300,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;

View file

@ -202,6 +202,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@ -312,6 +313,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16; /* XXX not sure if more are really safe */
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;

View file

@ -197,6 +197,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
/* SWTCL-only features. */
@ -302,6 +303,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
}
@ -358,6 +361,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
}

View file

@ -343,6 +343,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
/* Stream output. */
@ -510,6 +511,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
/* due to a bug in the shader compiler, some loops hang
* if they are not unrolled, see:
* https://bugs.freedesktop.org/show_bug.cgi?id=86720
*/
return 255;
}
return 0;
}

View file

@ -55,11 +55,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);

View file

@ -31,15 +31,15 @@
#include "ddebug/dd_util.h"
static void si_dump_shader(struct si_shader_selector *sel, const char *name,
static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
FILE *f)
{
if (!sel || !sel->current)
if (!state->cso || !state->current)
return;
fprintf(f, "%s shader disassembly:\n", name);
si_dump_shader_key(sel->type, &sel->current->key, f);
fprintf(f, "%s\n\n", sel->current->binary.disasm_string);
si_dump_shader_key(state->cso->type, &state->current->key, f);
fprintf(f, "%s\n\n", state->current->binary.disasm_string);
}
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
@ -536,11 +536,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
if (flags & PIPE_DEBUG_DEVICE_IS_HUNG)
si_dump_debug_registers(sctx, f);
si_dump_shader(sctx->vs_shader, "Vertex", f);
si_dump_shader(sctx->tcs_shader, "Tessellation control", f);
si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f);
si_dump_shader(sctx->gs_shader, "Geometry", f);
si_dump_shader(sctx->ps_shader, "Fragment", f);
si_dump_shader(&sctx->vs_shader, "Vertex", f);
si_dump_shader(&sctx->tcs_shader, "Tessellation control", f);
si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f);
si_dump_shader(&sctx->gs_shader, "Geometry", f);
si_dump_shader(&sctx->ps_shader, "Fragment", f);
si_dump_last_bo_list(sctx, f);
si_dump_last_ib(sctx, f);

View file

@ -915,10 +915,10 @@ static void si_set_user_data_base(struct si_context *sctx,
void si_shader_change_notify(struct si_context *sctx)
{
/* VS can be bound as VS, ES, or LS. */
if (sctx->tes_shader)
if (sctx->tes_shader.cso)
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B530_SPI_SHADER_USER_DATA_LS_0);
else if (sctx->gs_shader)
else if (sctx->gs_shader.cso)
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
else
@ -926,8 +926,8 @@ void si_shader_change_notify(struct si_context *sctx)
R_00B130_SPI_SHADER_USER_DATA_VS_0);
/* TES can be bound as ES, VS, or not bound. */
if (sctx->tes_shader) {
if (sctx->gs_shader)
if (sctx->tes_shader.cso) {
if (sctx->gs_shader.cso)
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
else
@ -964,7 +964,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
unsigned i;
uint32_t *sh_base = sctx->shader_userdata.sh_base;
if (sctx->gs_shader) {
if (sctx->gs_shader.cso) {
/* The VS copy shader needs these for clipping, streamout, and rings. */
unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
unsigned i = PIPE_SHADER_VERTEX;
@ -975,7 +975,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
/* The TESSEVAL shader needs this for the ESGS ring buffer. */
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
} else if (sctx->tes_shader) {
} else if (sctx->tes_shader.cso) {
/* The TESSEVAL shader needs this for streamout. */
si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
R_00B130_SPI_SHADER_USER_DATA_VS_0, true);

View file

@ -57,8 +57,8 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
if (sctx->dummy_pixel_shader)
sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
if (sctx->fixed_func_tcs_shader)
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
if (sctx->fixed_func_tcs_shader.cso)
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
if (sctx->custom_dsa_flush)
sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
if (sctx->custom_blend_resolve)
@ -293,7 +293,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TXQS:
@ -335,7 +337,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
@ -507,6 +508,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
return 0;
}

View file

@ -152,6 +152,15 @@ struct si_viewports {
struct pipe_viewport_state states[SI_MAX_VIEWPORTS];
};
/* A shader state consists of the shader selector, which is a constant state
* object shared by multiple contexts and shouldn't be modified, and
* the current shader variant selected for this context.
*/
struct si_shader_ctx_state {
struct si_shader_selector *cso;
struct si_shader *current;
};
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
@ -162,7 +171,7 @@ struct si_context {
void *pstipple_sampler_state;
struct si_screen *screen;
struct pipe_fence_handle *last_gfx_fence;
struct si_shader_selector *fixed_func_tcs_shader;
struct si_shader_ctx_state fixed_func_tcs_shader;
LLVMTargetMachineRef tm;
/* Atoms (direct states). */
@ -199,11 +208,11 @@ struct si_context {
void *dummy_pixel_shader;
/* shaders */
struct si_shader_selector *ps_shader;
struct si_shader_selector *gs_shader;
struct si_shader_selector *vs_shader;
struct si_shader_selector *tcs_shader;
struct si_shader_selector *tes_shader;
struct si_shader_ctx_state ps_shader;
struct si_shader_ctx_state gs_shader;
struct si_shader_ctx_state vs_shader;
struct si_shader_ctx_state tcs_shader;
struct si_shader_ctx_state tes_shader;
struct si_cs_shader_state cs_shader_state;
/* shader information */

View file

@ -179,15 +179,18 @@ struct radeon_shader_reloc;
struct si_shader;
/* A shader selector is a gallium CSO and contains shader variants and
* binaries for one TGSI program. This can be shared by multiple contexts.
*/
struct si_shader_selector {
struct si_shader *current;
pipe_mutex mutex;
struct si_shader *first_variant; /* immutable after the first variant */
struct si_shader *last_variant; /* mutable */
struct tgsi_token *tokens;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
unsigned num_shaders;
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
@ -241,7 +244,7 @@ union si_shader_key {
uint64_t es_enabled_outputs;
unsigned as_es:1; /* export shader */
unsigned as_ls:1; /* local shader */
unsigned export_prim_id; /* when PS needs it and GS is disabled */
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} vs;
struct {
unsigned prim_mode:3;
@ -252,7 +255,7 @@ union si_shader_key {
* This describes how outputs are laid out in memory. */
uint64_t es_enabled_outputs;
unsigned as_es:1; /* export shader */
unsigned export_prim_id; /* when PS needs it and GS is disabled */
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} tes; /* tessellation evaluation shader */
};
@ -293,24 +296,24 @@ struct si_shader {
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
{
if (sctx->gs_shader)
return &sctx->gs_shader->info;
else if (sctx->tes_shader)
return &sctx->tes_shader->info;
else if (sctx->vs_shader)
return &sctx->vs_shader->info;
if (sctx->gs_shader.cso)
return &sctx->gs_shader.cso->info;
else if (sctx->tes_shader.cso)
return &sctx->tes_shader.cso->info;
else if (sctx->vs_shader.cso)
return &sctx->vs_shader.cso->info;
else
return NULL;
}
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
{
if (sctx->gs_shader)
return sctx->gs_shader->current->gs_copy_shader;
else if (sctx->tes_shader)
return sctx->tes_shader->current;
if (sctx->gs_shader.current)
return sctx->gs_shader.current->gs_copy_shader;
else if (sctx->tes_shader.current)
return sctx->tes_shader.current;
else
return sctx->vs_shader->current;
return sctx->vs_shader.current;
}
static inline bool si_vs_exports_prim_id(struct si_shader *shader)

View file

@ -266,7 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
* Reproducible with Unigine Heaven 4.0 and drirc missing.
*/
if (blend->dual_src_blend &&
(sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
mask = 0;
radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
@ -1535,9 +1535,14 @@ static unsigned si_tex_compare(unsigned compare)
}
}
static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
unsigned nr_samples)
{
switch (dim) {
if (view_target == PIPE_TEXTURE_CUBE ||
view_target == PIPE_TEXTURE_CUBE_ARRAY)
res_target = view_target;
switch (res_target) {
default:
case PIPE_TEXTURE_1D:
return V_008F1C_SQ_RSRC_IMG_1D;
@ -2391,6 +2396,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
struct radeon_surf_level *surflevel;
int first_non_void;
uint64_t va;
unsigned last_layer = state->u.tex.last_layer;
if (view == NULL)
return NULL;
@ -2596,6 +2602,13 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
depth = texture->array_size / 6;
/* This is not needed if state trackers set last_layer correctly. */
if (state->target == PIPE_TEXTURE_1D ||
state->target == PIPE_TEXTURE_2D ||
state->target == PIPE_TEXTURE_RECT ||
state->target == PIPE_TEXTURE_CUBE)
last_layer = state->u.tex.first_layer;
va = tmp->resource.gpu_address + surflevel[base_level].offset;
view->state[0] = va >> 8;
@ -2615,10 +2628,11 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
last_level) |
S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
S_008F1C_POW2_PAD(texture->last_level > 0) |
S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
S_008F1C_TYPE(si_tex_dim(texture->target, state->target,
texture->nr_samples)));
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
S_008F24_LAST_ARRAY(state->u.tex.last_layer));
S_008F24_LAST_ARRAY(last_layer));
view->state[6] = 0;
view->state[7] = 0;
@ -2653,11 +2667,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
S_008F1C_TYPE(si_tex_dim(texture->target, 0));
S_008F1C_TYPE(si_tex_dim(texture->target,
state->target, 0));
view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
S_008F20_PITCH(tmp->fmask.pitch - 1);
view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
S_008F24_LAST_ARRAY(state->u.tex.last_layer);
S_008F24_LAST_ARRAY(last_layer);
view->fmask_state[6] = 0;
view->fmask_state[7] = 0;
}

View file

@ -109,11 +109,11 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
unsigned *num_patches)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader_selector *ls = sctx->vs_shader;
struct si_shader_ctx_state *ls = &sctx->vs_shader;
/* The TES pointer will only be used for sctx->last_tcs.
* It would be wrong to think that TCS = TES. */
struct si_shader_selector *tcs =
sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
unsigned num_tcs_input_cp = info->vertices_per_patch;
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
@ -138,9 +138,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
num_tcs_inputs = util_last_bit64(ls->outputs_written);
num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
if (sctx->tcs_shader) {
if (sctx->tcs_shader.cso) {
num_tcs_outputs = util_last_bit64(tcs->outputs_written);
num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
@ -159,7 +159,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0;
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
lds_size = output_patch0_offset + output_patch_size * *num_patches;
@ -231,13 +231,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
bool partial_vs_wave = false;
bool partial_es_wave = false;
if (sctx->gs_shader)
if (sctx->gs_shader.cso)
primgroup_size = 64; /* recommended with a GS */
if (sctx->tes_shader) {
if (sctx->tes_shader.cso) {
unsigned num_cp_out =
sctx->tcs_shader ?
sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
sctx->tcs_shader.cso ?
sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
info->vertices_per_patch;
unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
@ -248,8 +248,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* SWITCH_ON_EOI must be set if PrimID is used.
* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
sctx->tes_shader->info.uses_primid) {
if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
sctx->tes_shader.cso->info.uses_primid) {
ia_switch_on_eoi = true;
partial_es_wave = true;
}
@ -258,7 +258,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if ((sctx->b.family == CHIP_TAHITI ||
sctx->b.family == CHIP_PITCAIRN ||
sctx->b.family == CHIP_BONAIRE) &&
sctx->gs_shader)
sctx->gs_shader.cso)
partial_vs_wave = true;
}
@ -328,11 +328,11 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
{
unsigned num_output_cp;
if (!sctx->tes_shader)
if (!sctx->tes_shader.cso)
return 0;
num_output_cp = sctx->tcs_shader ?
sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
num_output_cp = sctx->tcs_shader.cso ?
sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
info->vertices_per_patch;
return S_028B58_NUM_PATCHES(num_patches) |
@ -395,7 +395,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
if (sctx->tes_shader)
if (sctx->tes_shader.cso)
si_emit_derived_tess_state(sctx, info, &num_patches);
ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
@ -735,11 +735,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
if (!sctx->ps_shader || !sctx->vs_shader) {
if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
assert(0);
return;
}
if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
if (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)) {
assert(0);
return;
}
@ -751,11 +751,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
* This must be done after si_decompress_textures, which can call
* draw_vbo recursively, and before si_update_shaders, which uses
* current_rast_prim for this draw_vbo call. */
if (sctx->gs_shader)
sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
else if (sctx->tes_shader)
if (sctx->gs_shader.cso)
sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim;
else if (sctx->tes_shader.cso)
sctx->current_rast_prim =
sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
else
sctx->current_rast_prim = info->mode;

View file

@ -404,6 +404,7 @@ static void si_shader_ps(struct si_shader *shader)
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = 0;
uint64_t va;
bool has_centroid;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@ -435,8 +436,11 @@ static void si_shader_ps(struct si_shader *shader)
}
}
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena);
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
S_0286D8_BC_OPTIMIZE_DISABLE(1);
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
@ -523,26 +527,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->vs.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
if (sctx->tes_shader)
if (sctx->tes_shader.cso)
key->vs.as_ls = 1;
else if (sctx->gs_shader) {
else if (sctx->gs_shader.cso) {
key->vs.as_es = 1;
key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
}
if (!sctx->gs_shader && sctx->ps_shader &&
sctx->ps_shader->info.uses_primid)
if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
sctx->ps_shader.cso->info.uses_primid)
key->vs.export_prim_id = 1;
break;
case PIPE_SHADER_TESS_CTRL:
key->tcs.prim_mode =
sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
break;
case PIPE_SHADER_TESS_EVAL:
if (sctx->gs_shader) {
if (sctx->gs_shader.cso) {
key->tes.as_es = 1;
key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
} else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
} else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
@ -589,11 +593,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
/* Select the hw shader variant depending on the current state. */
static int si_shader_select(struct pipe_context *ctx,
struct si_shader_selector *sel)
struct si_shader_ctx_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
union si_shader_key key;
struct si_shader * shader = NULL;
struct si_shader *iter, *shader = NULL;
int r;
si_shader_selector_key(ctx, sel, &key);
@ -602,49 +608,51 @@ static int si_shader_select(struct pipe_context *ctx,
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
if (likely(current && memcmp(&current->key, &key, sizeof(key)) == 0))
return 0;
}
/* lookup if we have other variants in the list */
if (sel->num_shaders > 1) {
struct si_shader *p = sel->current, *c = p->next_variant;
pipe_mutex_lock(sel->mutex);
while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
p = c;
c = c->next_variant;
}
if (c) {
p->next_variant = c->next_variant;
shader = c;
/* Find the shader variant. */
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
memcmp(&iter->key, &key, sizeof(key)) == 0) {
state->current = iter;
pipe_mutex_unlock(sel->mutex);
return 0;
}
}
if (shader) {
shader->next_variant = sel->current;
sel->current = shader;
/* Build a new shader. */
shader = CALLOC_STRUCT(si_shader);
if (!shader) {
pipe_mutex_unlock(sel->mutex);
return -ENOMEM;
}
shader->selector = sel;
shader->key = key;
r = si_shader_create(sctx->screen, sctx->tm, shader);
if (unlikely(r)) {
R600_ERR("Failed to build shader variant (type=%u) %d\n",
sel->type, r);
FREE(shader);
pipe_mutex_unlock(sel->mutex);
return r;
}
si_shader_init_pm4_state(shader);
if (!sel->last_variant) {
sel->first_variant = shader;
sel->last_variant = shader;
} else {
shader = CALLOC(1, sizeof(struct si_shader));
shader->selector = sel;
shader->key = key;
shader->next_variant = sel->current;
sel->current = shader;
r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
shader);
if (unlikely(r)) {
R600_ERR("Failed to build shader variant (type=%u) %d\n",
sel->type, r);
sel->current = NULL;
FREE(shader);
return r;
}
si_shader_init_pm4_state(shader);
sel->num_shaders++;
p_atomic_inc(&sctx->screen->b.num_compilations);
sel->last_variant->next_variant = shader;
sel->last_variant = shader;
}
state->current = shader;
p_atomic_inc(&sctx->screen->b.num_compilations);
pipe_mutex_unlock(sel->mutex);
return 0;
}
@ -752,14 +760,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
break;
}
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
if (si_shader_select(ctx, sel)) {
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
if (si_shader_select(ctx, &state)) {
fprintf(stderr, "radeonsi: can't create a shader\n");
tgsi_free_tokens(sel->tokens);
FREE(sel);
return NULL;
}
}
pipe_mutex_init(sel->mutex);
return sel;
}
@ -787,10 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
if (sctx->vs_shader == sel || !sel)
if (sctx->vs_shader.cso == sel || !sel)
return;
sctx->vs_shader = sel;
sctx->vs_shader.cso = sel;
sctx->vs_shader.current = sel->first_variant;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
si_update_viewports_and_scissors(sctx);
}
@ -799,12 +812,13 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
bool enable_changed = !!sctx->gs_shader != !!sel;
bool enable_changed = !!sctx->gs_shader.cso != !!sel;
if (sctx->gs_shader == sel)
if (sctx->gs_shader.cso == sel)
return;
sctx->gs_shader = sel;
sctx->gs_shader.cso = sel;
sctx->gs_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
@ -817,12 +831,13 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
bool enable_changed = !!sctx->tcs_shader != !!sel;
bool enable_changed = !!sctx->tcs_shader.cso != !!sel;
if (sctx->tcs_shader == sel)
if (sctx->tcs_shader.cso == sel)
return;
sctx->tcs_shader = sel;
sctx->tcs_shader.cso = sel;
sctx->tcs_shader.current = sel ? sel->first_variant : NULL;
if (enable_changed)
sctx->last_tcs = NULL; /* invalidate derived tess state */
@ -832,12 +847,13 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
bool enable_changed = !!sctx->tes_shader != !!sel;
bool enable_changed = !!sctx->tes_shader.cso != !!sel;
if (sctx->tes_shader == sel)
if (sctx->tes_shader.cso == sel)
return;
sctx->tes_shader = sel;
sctx->tes_shader.cso = sel;
sctx->tes_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
@ -864,7 +880,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
struct si_shader_selector *sel = state;
/* skip if supplied shader is one already in use */
if (sctx->ps_shader == sel)
if (sctx->ps_shader.cso == sel)
return;
/* use a dummy shader if binding a NULL shader */
@ -873,7 +889,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
sel = sctx->dummy_pixel_shader;
}
sctx->ps_shader = sel;
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel->first_variant;
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
@ -881,8 +898,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = (struct si_shader_selector *)state;
struct si_shader *p = sel->current, *c;
struct si_shader_selector **current_shader[SI_NUM_SHADERS] = {
struct si_shader *p = sel->first_variant, *c;
struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
[PIPE_SHADER_VERTEX] = &sctx->vs_shader,
[PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader,
[PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader,
@ -890,8 +907,10 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
[PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
};
if (*current_shader[sel->type] == sel)
*current_shader[sel->type] = NULL;
if (current_shader[sel->type]->cso == sel) {
current_shader[sel->type]->cso = NULL;
current_shader[sel->type]->current = NULL;
}
while (p) {
c = p->next_variant;
@ -927,6 +946,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
p = c;
}
pipe_mutex_destroy(sel->mutex);
free(sel->tokens);
free(sel);
}
@ -934,7 +954,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader->current;
struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
struct tgsi_shader_info *psinfo = &ps->selector->info;
struct tgsi_shader_info *vsinfo = &vs->selector->info;
@ -1004,7 +1024,7 @@ bcolor:
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
struct si_shader *ps = sctx->ps_shader->current;
struct si_shader *ps = sctx->ps_shader.current;
unsigned input_ena = ps->spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
@ -1133,7 +1153,7 @@ static void si_init_gs_rings(struct si_context *sctx)
static void si_update_gs_rings(struct si_context *sctx)
{
unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize;
uint64_t offset;
if (gsvs_itemsize == sctx->last_gsvs_itemsize)
@ -1167,17 +1187,14 @@ static void si_update_gs_rings(struct si_context *sctx)
* < 0 if there was a failure
*/
static int si_update_scratch_buffer(struct si_context *sctx,
struct si_shader_selector *sel)
struct si_shader *shader)
{
struct si_shader *shader;
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
int r;
if (!sel)
if (!shader)
return 0;
shader = sel->current;
/* This shader doesn't need a scratch buffer */
if (shader->scratch_bytes_per_wave == 0)
return 0;
@ -1209,20 +1226,20 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
}
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel)
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
{
return sel ? sel->current->scratch_bytes_per_wave : 0;
return shader ? shader->scratch_bytes_per_wave : 0;
}
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
{
unsigned bytes = 0;
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader.current));
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
return bytes;
}
@ -1256,46 +1273,46 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
* last used, so we still need to try to update them, even if
* they require scratch buffers smaller than the current size.
*/
r = si_update_scratch_buffer(sctx, sctx->ps_shader);
r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
if (r < 0)
return false;
if (r == 1)
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
r = si_update_scratch_buffer(sctx, sctx->gs_shader);
r = si_update_scratch_buffer(sctx, sctx->gs_shader.current);
if (r < 0)
return false;
if (r == 1)
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current);
if (r < 0)
return false;
if (r == 1)
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
/* VS can be bound as LS, ES, or VS. */
r = si_update_scratch_buffer(sctx, sctx->vs_shader);
r = si_update_scratch_buffer(sctx, sctx->vs_shader.current);
if (r < 0)
return false;
if (r == 1) {
if (sctx->tes_shader)
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
else if (sctx->gs_shader)
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
if (sctx->tes_shader.current)
si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
else if (sctx->gs_shader.current)
si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
else
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
}
/* TES can be bound as ES or VS. */
r = si_update_scratch_buffer(sctx, sctx->tes_shader);
r = si_update_scratch_buffer(sctx, sctx->tes_shader.current);
if (r < 0)
return false;
if (r == 1) {
if (sctx->gs_shader)
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
if (sctx->gs_shader.current)
si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
else
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
}
}
@ -1361,7 +1378,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
if (!ureg)
return; /* if we get here, we're screwed */
assert(!sctx->fixed_func_tcs_shader);
assert(!sctx->fixed_func_tcs_shader.cso);
ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
@ -1376,7 +1393,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
ureg_MOV(ureg, tessinner, const1);
ureg_END(ureg);
sctx->fixed_func_tcs_shader =
sctx->fixed_func_tcs_shader.cso =
ureg_create_shader_and_destroy(ureg, &sctx->b.b);
}
@ -1384,7 +1401,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
{
/* Calculate the index of the config.
* 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso;
struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
if (!*pm4) {
@ -1392,17 +1409,17 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
*pm4 = CALLOC_STRUCT(si_pm4_state);
if (sctx->tes_shader) {
if (sctx->tes_shader.cso) {
stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
S_028B54_HS_EN(1);
if (sctx->gs_shader)
if (sctx->gs_shader.cso)
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
S_028B54_GS_EN(1) |
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
else
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
} else if (sctx->gs_shader) {
} else if (sctx->gs_shader.cso) {
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
S_028B54_GS_EN(1) |
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
@ -1432,7 +1449,7 @@ bool si_update_shaders(struct si_context *sctx)
int r;
/* Update stages before GS. */
if (sctx->tes_shader) {
if (sctx->tes_shader.cso) {
if (!sctx->tf_ring) {
si_init_tess_factor_ring(sctx);
if (!sctx->tf_ring)
@ -1440,65 +1457,65 @@ bool si_update_shaders(struct si_context *sctx)
}
/* VS as LS */
r = si_shader_select(ctx, sctx->vs_shader);
r = si_shader_select(ctx, &sctx->vs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
if (sctx->tcs_shader) {
r = si_shader_select(ctx, sctx->tcs_shader);
if (sctx->tcs_shader.cso) {
r = si_shader_select(ctx, &sctx->tcs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
} else {
if (!sctx->fixed_func_tcs_shader) {
if (!sctx->fixed_func_tcs_shader.cso) {
si_generate_fixed_func_tcs(sctx);
if (!sctx->fixed_func_tcs_shader)
if (!sctx->fixed_func_tcs_shader.cso)
return false;
}
r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, hs,
sctx->fixed_func_tcs_shader->current->pm4);
sctx->fixed_func_tcs_shader.current->pm4);
}
r = si_shader_select(ctx, sctx->tes_shader);
r = si_shader_select(ctx, &sctx->tes_shader);
if (r)
return false;
if (sctx->gs_shader) {
if (sctx->gs_shader.cso) {
/* TES as ES */
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
} else {
/* TES as VS */
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
si_update_so(sctx, sctx->tes_shader);
si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
si_update_so(sctx, sctx->tes_shader.cso);
}
} else if (sctx->gs_shader) {
} else if (sctx->gs_shader.cso) {
/* VS as ES */
r = si_shader_select(ctx, sctx->vs_shader);
r = si_shader_select(ctx, &sctx->vs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
} else {
/* VS as VS */
r = si_shader_select(ctx, sctx->vs_shader);
r = si_shader_select(ctx, &sctx->vs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
si_update_so(sctx, sctx->vs_shader);
si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
si_update_so(sctx, sctx->vs_shader.cso);
}
/* Update GS. */
if (sctx->gs_shader) {
r = si_shader_select(ctx, sctx->gs_shader);
if (sctx->gs_shader.cso) {
r = si_shader_select(ctx, &sctx->gs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
si_update_so(sctx, sctx->gs_shader);
si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4);
si_update_so(sctx, sctx->gs_shader.cso);
if (!sctx->gsvs_ring) {
si_init_gs_rings(sctx);
@ -1514,10 +1531,10 @@ bool si_update_shaders(struct si_context *sctx)
si_update_vgt_shader_config(sctx);
r = si_shader_select(ctx, sctx->ps_shader);
r = si_shader_select(ctx, &sctx->ps_shader);
if (r)
return false;
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
sctx->sprite_coord_enable != rs->sprite_coord_enable ||
@ -1543,13 +1560,13 @@ bool si_update_shaders(struct si_context *sctx)
return false;
}
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)

View file

@ -248,6 +248,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
}
/* should only get here on unhandled cases */

View file

@ -381,6 +381,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
}
@ -455,6 +456,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
/* If we get here, we failed to handle a cap above */
debug_printf("Unexpected fragment shader query %u\n", param);
@ -511,6 +514,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
}
/* If we get here, we failed to handle a cap above */
debug_printf("Unexpected vertex shader query %u\n", param);
@ -600,6 +605,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
debug_printf("Unexpected vgpu10 shader query %u\n", param);
return 0;

View file

@ -393,7 +393,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
continue;
nir_variable *output_var = NULL;
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
nir_foreach_variable(var, &c->s->outputs) {
if (var->data.driver_location == intr->const_index[0]) {
output_var = var;
break;

View file

@ -23,6 +23,7 @@
#include "vc4_qir.h"
#include "glsl/nir/nir_builder.h"
#include "util/u_format.h"
/**
* Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
@ -50,20 +51,188 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
nir_instr_remove(&intr->instr);
}
static nir_ssa_def *
vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan)
{
return nir_ubitfield_extract(b,
src,
nir_imm_int(b, 8 * chan),
nir_imm_int(b, 8));
}
/** Returns the 16 bit field as a sign-extended 32-bit value. */
static nir_ssa_def *
vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan)
{
return nir_ibitfield_extract(b,
src,
nir_imm_int(b, 16 * chan),
nir_imm_int(b, 16));
}
/** Returns the 16 bit field as an unsigned 32 bit value. */
static nir_ssa_def *
vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
{
if (chan == 0) {
return nir_iand(b, src, nir_imm_int(b, 0xffff));
} else {
return nir_ushr(b, src, nir_imm_int(b, 16));
}
}
static nir_ssa_def *
vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
{
return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
}
static nir_ssa_def *
vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
nir_builder *b,
nir_ssa_def **vpm_reads,
uint8_t swiz,
const struct util_format_description *desc)
{
const struct util_format_channel_description *chan =
&desc->channel[swiz];
nir_ssa_def *temp;
if (swiz > UTIL_FORMAT_SWIZZLE_W) {
return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
} else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) {
return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
} else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) {
if (chan->normalized) {
return nir_fmul(b,
nir_i2f(b, vpm_reads[swiz]),
nir_imm_float(b,
1.0 / 0x7fffffff));
} else {
return nir_i2f(b, vpm_reads[swiz]);
}
} else if (chan->size == 8 &&
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
nir_ssa_def *vpm = vpm_reads[0];
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080));
if (chan->normalized) {
return nir_fsub(b, nir_fmul(b,
vc4_nir_unpack_8f(b, temp, swiz),
nir_imm_float(b, 2.0)),
nir_imm_float(b, 1.0));
} else {
return nir_fadd(b,
nir_i2f(b,
vc4_nir_unpack_8i(b, temp,
swiz)),
nir_imm_float(b, -128.0));
}
} else {
if (chan->normalized) {
return vc4_nir_unpack_8f(b, vpm, swiz);
} else {
return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz));
}
}
} else if (chan->size == 16 &&
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
nir_ssa_def *vpm = vpm_reads[swiz / 2];
/* Note that UNPACK_16F eats a half float, not ints, so we use
* UNPACK_16_I for all of these.
*/
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
if (chan->normalized) {
return nir_fmul(b, temp,
nir_imm_float(b, 1/32768.0f));
} else {
return temp;
}
} else {
temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
if (chan->normalized) {
return nir_fmul(b, temp,
nir_imm_float(b, 1 / 65535.0));
} else {
return temp;
}
}
} else {
return NULL;
}
}
static void
vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
VC4_NIR_TLB_COLOR_READ_INPUT) {
int attr = intr->const_index[0];
enum pipe_format format = c->vs_key->attr_formats[attr];
uint32_t attr_size = util_format_get_blocksize(format);
/* All TGSI-to-NIR inputs are vec4. */
assert(intr->num_components == 4);
/* Generate dword loads for the VPM values (Since these intrinsics may
* be reordered, the actual reads will be generated at the top of the
* shader by ntq_setup_inputs().
*/
nir_ssa_def *vpm_reads[4];
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
nir_intrinsic_instr *intr_comp =
nir_intrinsic_instr_create(c->s,
nir_intrinsic_load_input);
intr_comp->num_components = 1;
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
vpm_reads[i] = &intr_comp->dest.ssa;
}
bool format_warned = false;
const struct util_format_description *desc =
util_format_description(format);
nir_ssa_def *dests[4];
for (int i = 0; i < 4; i++) {
uint8_t swiz = desc->swizzle[i];
dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz,
desc);
if (!dests[i]) {
if (!format_warned) {
fprintf(stderr,
"vtx element %d unsupported type: %s\n",
attr, util_format_name(format));
format_warned = true;
}
dests[i] = nir_imm_float(b, 0.0);
}
}
replace_intrinsic_with_vec4(b, intr, dests);
}
static void
vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
/* This doesn't need any lowering. */
return;
}
nir_variable *input_var = NULL;
foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
nir_foreach_variable(var, &c->s->inputs) {
if (var->data.driver_location == intr->const_index[0]) {
input_var = var;
break;
@ -87,38 +256,31 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
dests[i] = &intr_comp->dest.ssa;
}
switch (c->stage) {
case QSTAGE_FRAG:
if (input_var->data.location == VARYING_SLOT_FACE) {
dests[0] = nir_fsub(b,
nir_imm_float(b, 1.0),
nir_fmul(b,
nir_i2f(b, dests[0]),
nir_imm_float(b, 2.0)));
dests[1] = nir_imm_float(b, 0.0);
if (input_var->data.location == VARYING_SLOT_FACE) {
dests[0] = nir_fsub(b,
nir_imm_float(b, 1.0),
nir_fmul(b,
nir_i2f(b, dests[0]),
nir_imm_float(b, 2.0)));
dests[1] = nir_imm_float(b, 0.0);
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
} else if (input_var->data.location >= VARYING_SLOT_VAR0) {
if (c->fs_key->point_sprite_mask &
(1 << (input_var->data.location -
VARYING_SLOT_VAR0))) {
if (!c->fs_key->is_points) {
dests[0] = nir_imm_float(b, 0.0);
dests[1] = nir_imm_float(b, 0.0);
}
if (c->fs_key->point_coord_upper_left) {
dests[1] = nir_fsub(b,
nir_imm_float(b, 1.0),
dests[1]);
}
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
} else if (input_var->data.location >= VARYING_SLOT_VAR0) {
if (c->fs_key->point_sprite_mask &
(1 << (input_var->data.location -
VARYING_SLOT_VAR0))) {
if (!c->fs_key->is_points) {
dests[0] = nir_imm_float(b, 0.0);
dests[1] = nir_imm_float(b, 0.0);
}
if (c->fs_key->point_coord_upper_left) {
dests[1] = nir_fsub(b,
nir_imm_float(b, 1.0),
dests[1]);
}
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
}
}
break;
case QSTAGE_COORD:
case QSTAGE_VERT:
break;
}
replace_intrinsic_with_vec4(b, intr, dests);
@ -129,7 +291,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
nir_variable *output_var = NULL;
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
nir_foreach_variable(var, &c->s->outputs) {
if (var->data.driver_location == intr->const_index[0]) {
output_var = var;
break;
@ -232,7 +394,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
vc4_nir_lower_input(c, b, intr);
if (c->stage == QSTAGE_FRAG)
vc4_nir_lower_fs_input(c, b, intr);
else
vc4_nir_lower_vertex_attr(c, b, intr);
break;
case nir_intrinsic_store_output:

View file

@ -602,126 +602,18 @@ ntq_fsign(struct vc4_compile *c, struct qreg src)
qir_uniform_f(c, -1.0));
}
static struct qreg
get_channel_from_vpm(struct vc4_compile *c,
struct qreg *vpm_reads,
uint8_t swiz,
const struct util_format_description *desc)
{
const struct util_format_channel_description *chan =
&desc->channel[swiz];
struct qreg temp;
if (swiz > UTIL_FORMAT_SWIZZLE_W)
return get_swizzled_channel(c, vpm_reads, swiz);
else if (chan->size == 32 &&
chan->type == UTIL_FORMAT_TYPE_FLOAT) {
return get_swizzled_channel(c, vpm_reads, swiz);
} else if (chan->size == 32 &&
chan->type == UTIL_FORMAT_TYPE_SIGNED) {
if (chan->normalized) {
return qir_FMUL(c,
qir_ITOF(c, vpm_reads[swiz]),
qir_uniform_f(c,
1.0 / 0x7fffffff));
} else {
return qir_ITOF(c, vpm_reads[swiz]);
}
} else if (chan->size == 8 &&
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
struct qreg vpm = vpm_reads[0];
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
if (chan->normalized) {
return qir_FSUB(c, qir_FMUL(c,
qir_UNPACK_8_F(c, temp, swiz),
qir_uniform_f(c, 2.0)),
qir_uniform_f(c, 1.0));
} else {
return qir_FADD(c,
qir_ITOF(c,
qir_UNPACK_8_I(c, temp,
swiz)),
qir_uniform_f(c, -128.0));
}
} else {
if (chan->normalized) {
return qir_UNPACK_8_F(c, vpm, swiz);
} else {
return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz));
}
}
} else if (chan->size == 16 &&
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
struct qreg vpm = vpm_reads[swiz / 2];
/* Note that UNPACK_16F eats a half float, not ints, so we use
* UNPACK_16_I for all of these.
*/
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2));
if (chan->normalized) {
return qir_FMUL(c, temp,
qir_uniform_f(c, 1/32768.0f));
} else {
return temp;
}
} else {
/* UNPACK_16I sign-extends, so we have to emit ANDs. */
temp = vpm;
if (swiz == 1 || swiz == 3)
temp = qir_UNPACK_16_I(c, temp, 1);
temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff));
temp = qir_ITOF(c, temp);
if (chan->normalized) {
return qir_FMUL(c, temp,
qir_uniform_f(c, 1 / 65535.0));
} else {
return temp;
}
}
} else {
return c->undef;
}
}
static void
emit_vertex_input(struct vc4_compile *c, int attr)
{
enum pipe_format format = c->vs_key->attr_formats[attr];
uint32_t attr_size = util_format_get_blocksize(format);
struct qreg vpm_reads[4];
c->vattr_sizes[attr] = align(attr_size, 4);
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
struct qreg vpm = { QFILE_VPM, attr * 4 + i };
vpm_reads[i] = qir_MOV(c, vpm);
c->inputs[attr * 4 + i] = qir_MOV(c, vpm);
c->num_inputs++;
}
bool format_warned = false;
const struct util_format_description *desc =
util_format_description(format);
for (int i = 0; i < 4; i++) {
uint8_t swiz = desc->swizzle[i];
struct qreg result = get_channel_from_vpm(c, vpm_reads,
swiz, desc);
if (result.file == QFILE_NULL) {
if (!format_warned) {
fprintf(stderr,
"vtx element %d unsupported type: %s\n",
attr, util_format_name(format));
format_warned = true;
}
result = qir_uniform_f(c, 0.0);
}
c->inputs[attr * 4 + i] = result;
}
}
static void
@ -876,6 +768,40 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
*dest = result;
}
/** Handles sign-extended bitfield extracts for 16 bits. */
static struct qreg
ntq_emit_ibfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
struct qreg bits)
{
assert(bits.file == QFILE_UNIF &&
c->uniform_contents[bits.index] == QUNIFORM_CONSTANT &&
c->uniform_data[bits.index] == 16);
assert(offset.file == QFILE_UNIF &&
c->uniform_contents[offset.index] == QUNIFORM_CONSTANT);
int offset_bit = c->uniform_data[offset.index];
assert(offset_bit % 16 == 0);
return qir_UNPACK_16_I(c, base, offset_bit / 16);
}
/** Handles unsigned bitfield extracts for 8 bits. */
static struct qreg
ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
struct qreg bits)
{
assert(bits.file == QFILE_UNIF &&
c->uniform_contents[bits.index] == QUNIFORM_CONSTANT &&
c->uniform_data[bits.index] == 8);
assert(offset.file == QFILE_UNIF &&
c->uniform_contents[offset.index] == QUNIFORM_CONSTANT);
int offset_bit = c->uniform_data[offset.index];
assert(offset_bit % 8 == 0);
return qir_UNPACK_8_I(c, base, offset_bit / 8);
}
static void
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
{
@ -1106,6 +1032,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
qir_SUB(c, qir_uniform_ui(c, 0), src[0]));
break;
case nir_op_ibitfield_extract:
*dest = ntq_emit_ibfe(c, src[0], src[1], src[2]);
break;
case nir_op_ubitfield_extract:
*dest = ntq_emit_ubfe(c, src[0], src[1], src[2]);
break;
default:
fprintf(stderr, "unknown NIR ALU inst: ");
nir_print_instr(&instr->instr, stderr);
@ -1383,13 +1317,13 @@ static void
ntq_setup_inputs(struct vc4_compile *c)
{
unsigned num_entries = 0;
foreach_list_typed(nir_variable, var, node, &c->s->inputs)
nir_foreach_variable(var, &c->s->inputs)
num_entries++;
nir_variable *vars[num_entries];
unsigned i = 0;
foreach_list_typed(nir_variable, var, node, &c->s->inputs)
nir_foreach_variable(var, &c->s->inputs)
vars[i++] = var;
/* Sort the variables so that we emit the input setup in
@ -1432,7 +1366,7 @@ ntq_setup_inputs(struct vc4_compile *c)
static void
ntq_setup_outputs(struct vc4_compile *c)
{
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
nir_foreach_variable(var, &c->s->outputs) {
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
unsigned loc = var->data.driver_location * 4;
@ -1471,7 +1405,7 @@ ntq_setup_outputs(struct vc4_compile *c)
static void
ntq_setup_uniforms(struct vc4_compile *c)
{
foreach_list_typed(nir_variable, var, node, &c->s->uniforms) {
nir_foreach_variable(var, &c->s->uniforms) {
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
unsigned array_elem_size = 4 * sizeof(float);

View file

@ -182,6 +182,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
/* Stream output. */
@ -336,6 +337,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return VC4_MAX_TEXTURE_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
default:
fprintf(stderr, "unknown shader param %d\n", param);
return 0;

View file

@ -632,6 +632,7 @@ enum pipe_cap
PIPE_CAP_DEPTH_BOUNDS_TEST,
PIPE_CAP_TGSI_TXQS,
PIPE_CAP_FORCE_PERSAMPLE_INTERP,
PIPE_CAP_SHAREABLE_SHADERS,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
@ -696,7 +697,8 @@ enum pipe_shader_cap
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
};
/**

View file

@ -267,7 +267,9 @@ union tgsi_immediate_data
#define TGSI_PROPERTY_TES_SPACING 12
#define TGSI_PROPERTY_TES_VERTEX_ORDER_CW 13
#define TGSI_PROPERTY_TES_POINT_MODE 14
#define TGSI_PROPERTY_COUNT 15
#define TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 15
#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16
#define TGSI_PROPERTY_COUNT 17
struct tgsi_property {
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */

View file

@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
}
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
if (!priv->picture.h264.field_pic_flag)
priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom;
if (!priv->picture.h264.field_pic_flag) {
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
priv->codec_data.h264.delta_pic_order_cnt_bottom;
} else if (!priv->picture.h264.bottom_field_flag)
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
else
priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
} else if (sps->pic_order_cnt_type == 1) {
unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);

View file

@ -14,3 +14,340 @@ EXPORTS
OSMesaGetProcAddress
OSMesaColorClamp
OSMesaPostprocess
glAccum
glAlphaFunc
glAreTexturesResident
glArrayElement
glBegin
glBindTexture
glBitmap
glBlendFunc
glCallList
glCallLists
glClear
glClearAccum
glClearColor
glClearDepth
glClearIndex
glClearStencil
glClipPlane
glColor3b
glColor3bv
glColor3d
glColor3dv
glColor3f
glColor3fv
glColor3i
glColor3iv
glColor3s
glColor3sv
glColor3ub
glColor3ubv
glColor3ui
glColor3uiv
glColor3us
glColor3usv
glColor4b
glColor4bv
glColor4d
glColor4dv
glColor4f
glColor4fv
glColor4i
glColor4iv
glColor4s
glColor4sv
glColor4ub
glColor4ubv
glColor4ui
glColor4uiv
glColor4us
glColor4usv
glColorMask
glColorMaterial
glColorPointer
glCopyPixels
glCopyTexImage1D
glCopyTexImage2D
glCopyTexSubImage1D
glCopyTexSubImage2D
glCullFace
; glDebugEntry
glDeleteLists
glDeleteTextures
glDepthFunc
glDepthMask
glDepthRange
glDisable
glDisableClientState
glDrawArrays
glDrawBuffer
glDrawElements
glDrawPixels
glEdgeFlag
glEdgeFlagPointer
glEdgeFlagv
glEnable
glEnableClientState
glEnd
glEndList
glEvalCoord1d
glEvalCoord1dv
glEvalCoord1f
glEvalCoord1fv
glEvalCoord2d
glEvalCoord2dv
glEvalCoord2f
glEvalCoord2fv
glEvalMesh1
glEvalMesh2
glEvalPoint1
glEvalPoint2
glFeedbackBuffer
glFinish
glFlush
glFogf
glFogfv
glFogi
glFogiv
glFrontFace
glFrustum
glGenLists
glGenTextures
glGetBooleanv
glGetClipPlane
glGetDoublev
glGetError
glGetFloatv
glGetIntegerv
glGetLightfv
glGetLightiv
glGetMapdv
glGetMapfv
glGetMapiv
glGetMaterialfv
glGetMaterialiv
glGetPixelMapfv
glGetPixelMapuiv
glGetPixelMapusv
glGetPointerv
glGetPolygonStipple
glGetString
glGetTexEnvfv
glGetTexEnviv
glGetTexGendv
glGetTexGenfv
glGetTexGeniv
glGetTexImage
glGetTexLevelParameterfv
glGetTexLevelParameteriv
glGetTexParameterfv
glGetTexParameteriv
glHint
glIndexMask
glIndexPointer
glIndexd
glIndexdv
glIndexf
glIndexfv
glIndexi
glIndexiv
glIndexs
glIndexsv
glIndexub
glIndexubv
glInitNames
glInterleavedArrays
glIsEnabled
glIsList
glIsTexture
glLightModelf
glLightModelfv
glLightModeli
glLightModeliv
glLightf
glLightfv
glLighti
glLightiv
glLineStipple
glLineWidth
glListBase
glLoadIdentity
glLoadMatrixd
glLoadMatrixf
glLoadName
glLogicOp
glMap1d
glMap1f
glMap2d
glMap2f
glMapGrid1d
glMapGrid1f
glMapGrid2d
glMapGrid2f
glMaterialf
glMaterialfv
glMateriali
glMaterialiv
glMatrixMode
glMultMatrixd
glMultMatrixf
glNewList
glNormal3b
glNormal3bv
glNormal3d
glNormal3dv
glNormal3f
glNormal3fv
glNormal3i
glNormal3iv
glNormal3s
glNormal3sv
glNormalPointer
glOrtho
glPassThrough
glPixelMapfv
glPixelMapuiv
glPixelMapusv
glPixelStoref
glPixelStorei
glPixelTransferf
glPixelTransferi
glPixelZoom
glPointSize
glPolygonMode
glPolygonOffset
glPolygonStipple
glPopAttrib
glPopClientAttrib
glPopMatrix
glPopName
glPrioritizeTextures
glPushAttrib
glPushClientAttrib
glPushMatrix
glPushName
glRasterPos2d
glRasterPos2dv
glRasterPos2f
glRasterPos2fv
glRasterPos2i
glRasterPos2iv
glRasterPos2s
glRasterPos2sv
glRasterPos3d
glRasterPos3dv
glRasterPos3f
glRasterPos3fv
glRasterPos3i
glRasterPos3iv
glRasterPos3s
glRasterPos3sv
glRasterPos4d
glRasterPos4dv
glRasterPos4f
glRasterPos4fv
glRasterPos4i
glRasterPos4iv
glRasterPos4s
glRasterPos4sv
glReadBuffer
glReadPixels
glRectd
glRectdv
glRectf
glRectfv
glRecti
glRectiv
glRects
glRectsv
glRenderMode
glRotated
glRotatef
glScaled
glScalef
glScissor
glSelectBuffer
glShadeModel
glStencilFunc
glStencilMask
glStencilOp
glTexCoord1d
glTexCoord1dv
glTexCoord1f
glTexCoord1fv
glTexCoord1i
glTexCoord1iv
glTexCoord1s
glTexCoord1sv
glTexCoord2d
glTexCoord2dv
glTexCoord2f
glTexCoord2fv
glTexCoord2i
glTexCoord2iv
glTexCoord2s
glTexCoord2sv
glTexCoord3d
glTexCoord3dv
glTexCoord3f
glTexCoord3fv
glTexCoord3i
glTexCoord3iv
glTexCoord3s
glTexCoord3sv
glTexCoord4d
glTexCoord4dv
glTexCoord4f
glTexCoord4fv
glTexCoord4i
glTexCoord4iv
glTexCoord4s
glTexCoord4sv
glTexCoordPointer
glTexEnvf
glTexEnvfv
glTexEnvi
glTexEnviv
glTexGend
glTexGendv
glTexGenf
glTexGenfv
glTexGeni
glTexGeniv
glTexImage1D
glTexImage2D
glTexParameterf
glTexParameterfv
glTexParameteri
glTexParameteriv
glTexSubImage1D
glTexSubImage2D
glTranslated
glTranslatef
glVertex2d
glVertex2dv
glVertex2f
glVertex2fv
glVertex2i
glVertex2iv
glVertex2s
glVertex2sv
glVertex3d
glVertex3dv
glVertex3f
glVertex3fv
glVertex3i
glVertex3iv
glVertex3s
glVertex3sv
glVertex4d
glVertex4dv
glVertex4f
glVertex4fv
glVertex4i
glVertex4iv
glVertex4s
glVertex4sv
glVertexPointer
glViewport

View file

@ -11,3 +11,340 @@ EXPORTS
OSMesaGetProcAddress = OSMesaGetProcAddress@4
OSMesaColorClamp = OSMesaColorClamp@4
OSMesaPostprocess = OSMesaPostprocess@12
glAccum = glAccum@8
glAlphaFunc = glAlphaFunc@8
glAreTexturesResident = glAreTexturesResident@12
glArrayElement = glArrayElement@4
glBegin = glBegin@4
glBindTexture = glBindTexture@8
glBitmap = glBitmap@28
glBlendFunc = glBlendFunc@8
glCallList = glCallList@4
glCallLists = glCallLists@12
glClear = glClear@4
glClearAccum = glClearAccum@16
glClearColor = glClearColor@16
glClearDepth = glClearDepth@8
glClearIndex = glClearIndex@4
glClearStencil = glClearStencil@4
glClipPlane = glClipPlane@8
glColor3b = glColor3b@12
glColor3bv = glColor3bv@4
glColor3d = glColor3d@24
glColor3dv = glColor3dv@4
glColor3f = glColor3f@12
glColor3fv = glColor3fv@4
glColor3i = glColor3i@12
glColor3iv = glColor3iv@4
glColor3s = glColor3s@12
glColor3sv = glColor3sv@4
glColor3ub = glColor3ub@12
glColor3ubv = glColor3ubv@4
glColor3ui = glColor3ui@12
glColor3uiv = glColor3uiv@4
glColor3us = glColor3us@12
glColor3usv = glColor3usv@4
glColor4b = glColor4b@16
glColor4bv = glColor4bv@4
glColor4d = glColor4d@32
glColor4dv = glColor4dv@4
glColor4f = glColor4f@16
glColor4fv = glColor4fv@4
glColor4i = glColor4i@16
glColor4iv = glColor4iv@4
glColor4s = glColor4s@16
glColor4sv = glColor4sv@4
glColor4ub = glColor4ub@16
glColor4ubv = glColor4ubv@4
glColor4ui = glColor4ui@16
glColor4uiv = glColor4uiv@4
glColor4us = glColor4us@16
glColor4usv = glColor4usv@4
glColorMask = glColorMask@16
glColorMaterial = glColorMaterial@8
glColorPointer = glColorPointer@16
glCopyPixels = glCopyPixels@20
glCopyTexImage1D = glCopyTexImage1D@28
glCopyTexImage2D = glCopyTexImage2D@32
glCopyTexSubImage1D = glCopyTexSubImage1D@24
glCopyTexSubImage2D = glCopyTexSubImage2D@32
glCullFace = glCullFace@4
; glDebugEntry = glDebugEntry@8
glDeleteLists = glDeleteLists@8
glDeleteTextures = glDeleteTextures@8
glDepthFunc = glDepthFunc@4
glDepthMask = glDepthMask@4
glDepthRange = glDepthRange@16
glDisable = glDisable@4
glDisableClientState = glDisableClientState@4
glDrawArrays = glDrawArrays@12
glDrawBuffer = glDrawBuffer@4
glDrawElements = glDrawElements@16
glDrawPixels = glDrawPixels@20
glEdgeFlag = glEdgeFlag@4
glEdgeFlagPointer = glEdgeFlagPointer@8
glEdgeFlagv = glEdgeFlagv@4
glEnable = glEnable@4
glEnableClientState = glEnableClientState@4
glEnd = glEnd@0
glEndList = glEndList@0
glEvalCoord1d = glEvalCoord1d@8
glEvalCoord1dv = glEvalCoord1dv@4
glEvalCoord1f = glEvalCoord1f@4
glEvalCoord1fv = glEvalCoord1fv@4
glEvalCoord2d = glEvalCoord2d@16
glEvalCoord2dv = glEvalCoord2dv@4
glEvalCoord2f = glEvalCoord2f@8
glEvalCoord2fv = glEvalCoord2fv@4
glEvalMesh1 = glEvalMesh1@12
glEvalMesh2 = glEvalMesh2@20
glEvalPoint1 = glEvalPoint1@4
glEvalPoint2 = glEvalPoint2@8
glFeedbackBuffer = glFeedbackBuffer@12
glFinish = glFinish@0
glFlush = glFlush@0
glFogf = glFogf@8
glFogfv = glFogfv@8
glFogi = glFogi@8
glFogiv = glFogiv@8
glFrontFace = glFrontFace@4
glFrustum = glFrustum@48
glGenLists = glGenLists@4
glGenTextures = glGenTextures@8
glGetBooleanv = glGetBooleanv@8
glGetClipPlane = glGetClipPlane@8
glGetDoublev = glGetDoublev@8
glGetError = glGetError@0
glGetFloatv = glGetFloatv@8
glGetIntegerv = glGetIntegerv@8
glGetLightfv = glGetLightfv@12
glGetLightiv = glGetLightiv@12
glGetMapdv = glGetMapdv@12
glGetMapfv = glGetMapfv@12
glGetMapiv = glGetMapiv@12
glGetMaterialfv = glGetMaterialfv@12
glGetMaterialiv = glGetMaterialiv@12
glGetPixelMapfv = glGetPixelMapfv@8
glGetPixelMapuiv = glGetPixelMapuiv@8
glGetPixelMapusv = glGetPixelMapusv@8
glGetPointerv = glGetPointerv@8
glGetPolygonStipple = glGetPolygonStipple@4
glGetString = glGetString@4
glGetTexEnvfv = glGetTexEnvfv@12
glGetTexEnviv = glGetTexEnviv@12
glGetTexGendv = glGetTexGendv@12
glGetTexGenfv = glGetTexGenfv@12
glGetTexGeniv = glGetTexGeniv@12
glGetTexImage = glGetTexImage@20
glGetTexLevelParameterfv = glGetTexLevelParameterfv@16
glGetTexLevelParameteriv = glGetTexLevelParameteriv@16
glGetTexParameterfv = glGetTexParameterfv@12
glGetTexParameteriv = glGetTexParameteriv@12
glHint = glHint@8
glIndexMask = glIndexMask@4
glIndexPointer = glIndexPointer@12
glIndexd = glIndexd@8
glIndexdv = glIndexdv@4
glIndexf = glIndexf@4
glIndexfv = glIndexfv@4
glIndexi = glIndexi@4
glIndexiv = glIndexiv@4
glIndexs = glIndexs@4
glIndexsv = glIndexsv@4
glIndexub = glIndexub@4
glIndexubv = glIndexubv@4
glInitNames = glInitNames@0
glInterleavedArrays = glInterleavedArrays@12
glIsEnabled = glIsEnabled@4
glIsList = glIsList@4
glIsTexture = glIsTexture@4
glLightModelf = glLightModelf@8
glLightModelfv = glLightModelfv@8
glLightModeli = glLightModeli@8
glLightModeliv = glLightModeliv@8
glLightf = glLightf@12
glLightfv = glLightfv@12
glLighti = glLighti@12
glLightiv = glLightiv@12
glLineStipple = glLineStipple@8
glLineWidth = glLineWidth@4
glListBase = glListBase@4
glLoadIdentity = glLoadIdentity@0
glLoadMatrixd = glLoadMatrixd@4
glLoadMatrixf = glLoadMatrixf@4
glLoadName = glLoadName@4
glLogicOp = glLogicOp@4
glMap1d = glMap1d@32
glMap1f = glMap1f@24
glMap2d = glMap2d@56
glMap2f = glMap2f@40
glMapGrid1d = glMapGrid1d@20
glMapGrid1f = glMapGrid1f@12
glMapGrid2d = glMapGrid2d@40
glMapGrid2f = glMapGrid2f@24
glMaterialf = glMaterialf@12
glMaterialfv = glMaterialfv@12
glMateriali = glMateriali@12
glMaterialiv = glMaterialiv@12
glMatrixMode = glMatrixMode@4
glMultMatrixd = glMultMatrixd@4
glMultMatrixf = glMultMatrixf@4
glNewList = glNewList@8
glNormal3b = glNormal3b@12
glNormal3bv = glNormal3bv@4
glNormal3d = glNormal3d@24
glNormal3dv = glNormal3dv@4
glNormal3f = glNormal3f@12
glNormal3fv = glNormal3fv@4
glNormal3i = glNormal3i@12
glNormal3iv = glNormal3iv@4
glNormal3s = glNormal3s@12
glNormal3sv = glNormal3sv@4
glNormalPointer = glNormalPointer@12
glOrtho = glOrtho@48
glPassThrough = glPassThrough@4
glPixelMapfv = glPixelMapfv@12
glPixelMapuiv = glPixelMapuiv@12
glPixelMapusv = glPixelMapusv@12
glPixelStoref = glPixelStoref@8
glPixelStorei = glPixelStorei@8
glPixelTransferf = glPixelTransferf@8
glPixelTransferi = glPixelTransferi@8
glPixelZoom = glPixelZoom@8
glPointSize = glPointSize@4
glPolygonMode = glPolygonMode@8
glPolygonOffset = glPolygonOffset@8
glPolygonStipple = glPolygonStipple@4
glPopAttrib = glPopAttrib@0
glPopClientAttrib = glPopClientAttrib@0
glPopMatrix = glPopMatrix@0
glPopName = glPopName@0
glPrioritizeTextures = glPrioritizeTextures@12
glPushAttrib = glPushAttrib@4
glPushClientAttrib = glPushClientAttrib@4
glPushMatrix = glPushMatrix@0
glPushName = glPushName@4
glRasterPos2d = glRasterPos2d@16
glRasterPos2dv = glRasterPos2dv@4
glRasterPos2f = glRasterPos2f@8
glRasterPos2fv = glRasterPos2fv@4
glRasterPos2i = glRasterPos2i@8
glRasterPos2iv = glRasterPos2iv@4
glRasterPos2s = glRasterPos2s@8
glRasterPos2sv = glRasterPos2sv@4
glRasterPos3d = glRasterPos3d@24
glRasterPos3dv = glRasterPos3dv@4
glRasterPos3f = glRasterPos3f@12
glRasterPos3fv = glRasterPos3fv@4
glRasterPos3i = glRasterPos3i@12
glRasterPos3iv = glRasterPos3iv@4
glRasterPos3s = glRasterPos3s@12
glRasterPos3sv = glRasterPos3sv@4
glRasterPos4d = glRasterPos4d@32
glRasterPos4dv = glRasterPos4dv@4
glRasterPos4f = glRasterPos4f@16
glRasterPos4fv = glRasterPos4fv@4
glRasterPos4i = glRasterPos4i@16
glRasterPos4iv = glRasterPos4iv@4
glRasterPos4s = glRasterPos4s@16
glRasterPos4sv = glRasterPos4sv@4
glReadBuffer = glReadBuffer@4
glReadPixels = glReadPixels@28
glRectd = glRectd@32
glRectdv = glRectdv@8
glRectf = glRectf@16
glRectfv = glRectfv@8
glRecti = glRecti@16
glRectiv = glRectiv@8
glRects = glRects@16
glRectsv = glRectsv@8
glRenderMode = glRenderMode@4
glRotated = glRotated@32
glRotatef = glRotatef@16
glScaled = glScaled@24
glScalef = glScalef@12
glScissor = glScissor@16
glSelectBuffer = glSelectBuffer@8
glShadeModel = glShadeModel@4
glStencilFunc = glStencilFunc@12
glStencilMask = glStencilMask@4
glStencilOp = glStencilOp@12
glTexCoord1d = glTexCoord1d@8
glTexCoord1dv = glTexCoord1dv@4
glTexCoord1f = glTexCoord1f@4
glTexCoord1fv = glTexCoord1fv@4
glTexCoord1i = glTexCoord1i@4
glTexCoord1iv = glTexCoord1iv@4
glTexCoord1s = glTexCoord1s@4
glTexCoord1sv = glTexCoord1sv@4
glTexCoord2d = glTexCoord2d@16
glTexCoord2dv = glTexCoord2dv@4
glTexCoord2f = glTexCoord2f@8
glTexCoord2fv = glTexCoord2fv@4
glTexCoord2i = glTexCoord2i@8
glTexCoord2iv = glTexCoord2iv@4
glTexCoord2s = glTexCoord2s@8
glTexCoord2sv = glTexCoord2sv@4
glTexCoord3d = glTexCoord3d@24
glTexCoord3dv = glTexCoord3dv@4
glTexCoord3f = glTexCoord3f@12
glTexCoord3fv = glTexCoord3fv@4
glTexCoord3i = glTexCoord3i@12
glTexCoord3iv = glTexCoord3iv@4
glTexCoord3s = glTexCoord3s@12
glTexCoord3sv = glTexCoord3sv@4
glTexCoord4d = glTexCoord4d@32
glTexCoord4dv = glTexCoord4dv@4
glTexCoord4f = glTexCoord4f@16
glTexCoord4fv = glTexCoord4fv@4
glTexCoord4i = glTexCoord4i@16
glTexCoord4iv = glTexCoord4iv@4
glTexCoord4s = glTexCoord4s@16
glTexCoord4sv = glTexCoord4sv@4
glTexCoordPointer = glTexCoordPointer@16
glTexEnvf = glTexEnvf@12
glTexEnvfv = glTexEnvfv@12
glTexEnvi = glTexEnvi@12
glTexEnviv = glTexEnviv@12
glTexGend = glTexGend@16
glTexGendv = glTexGendv@12
glTexGenf = glTexGenf@12
glTexGenfv = glTexGenfv@12
glTexGeni = glTexGeni@12
glTexGeniv = glTexGeniv@12
glTexImage1D = glTexImage1D@32
glTexImage2D = glTexImage2D@36
glTexParameterf = glTexParameterf@12
glTexParameterfv = glTexParameterfv@12
glTexParameteri = glTexParameteri@12
glTexParameteriv = glTexParameteriv@12
glTexSubImage1D = glTexSubImage1D@28
glTexSubImage2D = glTexSubImage2D@36
glTranslated = glTranslated@24
glTranslatef = glTranslatef@12
glVertex2d = glVertex2d@16
glVertex2dv = glVertex2dv@4
glVertex2f = glVertex2f@8
glVertex2fv = glVertex2fv@4
glVertex2i = glVertex2i@8
glVertex2iv = glVertex2iv@4
glVertex2s = glVertex2s@8
glVertex2sv = glVertex2sv@4
glVertex3d = glVertex3d@24
glVertex3dv = glVertex3dv@4
glVertex3f = glVertex3f@12
glVertex3fv = glVertex3fv@4
glVertex3i = glVertex3i@12
glVertex3iv = glVertex3iv@4
glVertex3s = glVertex3s@12
glVertex3sv = glVertex3sv@4
glVertex4d = glVertex4d@32
glVertex4dv = glVertex4dv@4
glVertex4f = glVertex4f@16
glVertex4fv = glVertex4fv@4
glVertex4i = glVertex4i@16
glVertex4iv = glVertex4iv@4
glVertex4s = glVertex4s@16
glVertex4sv = glVertex4sv@4
glVertexPointer = glVertexPointer@16
glViewport = glViewport@16

View file

@ -610,6 +610,37 @@ match_subroutine_by_name(const char *name,
return sig;
}
static ir_rvalue *
generate_array_index(void *mem_ctx, exec_list *instructions,
struct _mesa_glsl_parse_state *state, YYLTYPE loc,
const ast_expression *array, ast_expression *idx,
const char **function_name, exec_list *actual_parameters)
{
if (array->oper == ast_array_index) {
/* This handles arrays of arrays */
ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions,
state, loc,
array->subexpressions[0],
array->subexpressions[1],
function_name, actual_parameters);
ir_rvalue *outer_array_idx = idx->hir(instructions, state);
YYLTYPE index_loc = idx->get_location();
return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array,
outer_array_idx, loc,
index_loc);
} else {
ir_variable *sub_var = NULL;
*function_name = array->primary_expression.identifier;
match_subroutine_by_name(*function_name, actual_parameters,
state, &sub_var);
ir_rvalue *outer_array_idx = idx->hir(instructions, state);
return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
}
}
static void
print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc,
ir_function *f)
@ -1989,16 +2020,18 @@ ast_function_expression::hir(exec_list *instructions,
ir_variable *sub_var = NULL;
ir_rvalue *array_idx = NULL;
process_parameters(instructions, &actual_parameters, &this->expressions,
state);
if (id->oper == ast_array_index) {
func_name = id->subexpressions[0]->primary_expression.identifier;
array_idx = id->subexpressions[1]->hir(instructions, state);
array_idx = generate_array_index(ctx, instructions, state, loc,
id->subexpressions[0],
id->subexpressions[1], &func_name,
&actual_parameters);
} else {
func_name = id->primary_expression.identifier;
}
process_parameters(instructions, &actual_parameters, &this->expressions,
state);
ir_function_signature *sig =
match_function_by_name(func_name, &actual_parameters, state);

View file

@ -487,54 +487,54 @@ bit_logic_result_type(const struct glsl_type *type_a,
ast_operators op,
struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
{
if (!state->check_bitwise_operations_allowed(loc)) {
return glsl_type::error_type;
}
if (!state->check_bitwise_operations_allowed(loc)) {
return glsl_type::error_type;
}
/* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
*
* "The bitwise operators and (&), exclusive-or (^), and inclusive-or
* (|). The operands must be of type signed or unsigned integers or
* integer vectors."
*/
if (!type_a->is_integer()) {
_mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
ast_expression::operator_string(op));
return glsl_type::error_type;
}
if (!type_b->is_integer()) {
_mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
/* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
*
* "The bitwise operators and (&), exclusive-or (^), and inclusive-or
* (|). The operands must be of type signed or unsigned integers or
* integer vectors."
*/
if (!type_a->is_integer()) {
_mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
ast_expression::operator_string(op));
return glsl_type::error_type;
}
return glsl_type::error_type;
}
if (!type_b->is_integer()) {
_mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
ast_expression::operator_string(op));
return glsl_type::error_type;
}
/* "The fundamental types of the operands (signed or unsigned) must
* match,"
*/
if (type_a->base_type != type_b->base_type) {
_mesa_glsl_error(loc, state, "operands of `%s' must have the same "
"base type", ast_expression::operator_string(op));
return glsl_type::error_type;
}
/* "The fundamental types of the operands (signed or unsigned) must
* match,"
*/
if (type_a->base_type != type_b->base_type) {
_mesa_glsl_error(loc, state, "operands of `%s' must have the same "
"base type", ast_expression::operator_string(op));
return glsl_type::error_type;
}
/* "The operands cannot be vectors of differing size." */
if (type_a->is_vector() &&
type_b->is_vector() &&
type_a->vector_elements != type_b->vector_elements) {
_mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
"different sizes", ast_expression::operator_string(op));
return glsl_type::error_type;
}
/* "The operands cannot be vectors of differing size." */
if (type_a->is_vector() &&
type_b->is_vector() &&
type_a->vector_elements != type_b->vector_elements) {
_mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
"different sizes", ast_expression::operator_string(op));
return glsl_type::error_type;
}
/* "If one operand is a scalar and the other a vector, the scalar is
* applied component-wise to the vector, resulting in the same type as
* the vector. The fundamental types of the operands [...] will be the
* resulting fundamental type."
*/
if (type_a->is_scalar())
return type_b;
else
return type_a;
/* "If one operand is a scalar and the other a vector, the scalar is
* applied component-wise to the vector, resulting in the same type as
* the vector. The fundamental types of the operands [...] will be the
* resulting fundamental type."
*/
if (type_a->is_scalar())
return type_b;
else
return type_a;
}
static const struct glsl_type *
@ -6294,6 +6294,18 @@ ast_interface_block::hir(exec_list *instructions,
state->struct_specifier_depth--;
for (unsigned i = 0; i < num_variables; i++) {
if (fields[i].stream != -1 &&
(unsigned) fields[i].stream != this->layout.stream) {
_mesa_glsl_error(&loc, state,
"stream layout qualifier on "
"interface block member `%s' does not match "
"the interface block (%d vs %d)",
fields[i].name, fields[i].stream,
this->layout.stream);
}
}
if (!redeclaring_per_vertex) {
validate_identifier(this->block_name, loc, state);
@ -6634,6 +6646,8 @@ ast_interface_block::hir(exec_list *instructions,
var->data.explicit_binding = this->layout.flags.q.explicit_binding;
var->data.binding = this->layout.binding;
var->data.stream = this->layout.stream;
state->symbols->add_variable(var);
instructions->push_tail(var);
}
@ -6652,6 +6666,7 @@ ast_interface_block::hir(exec_list *instructions,
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
var->data.patch = fields[i].patch;
var->data.stream = this->layout.stream;
var->init_interface_type(block_type);
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@ -6664,17 +6679,6 @@ ast_interface_block::hir(exec_list *instructions,
var->data.matrix_layout = fields[i].matrix_layout;
}
if (fields[i].stream != -1 &&
((unsigned)fields[i].stream) != this->layout.stream) {
_mesa_glsl_error(&loc, state,
"stream layout qualifier on "
"interface block member `%s' does not match "
"the interface block (%d vs %d)",
var->name, fields[i].stream, this->layout.stream);
}
var->data.stream = this->layout.stream;
if (var->data.mode == ir_var_shader_storage) {
var->data.image_read_only = fields[i].image_read_only;
var->data.image_write_only = fields[i].image_write_only;

View file

@ -2609,17 +2609,6 @@ interface_block:
block->layout.is_default_qualifier = false;
foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
ast_type_qualifier& qualifier = member->type->qualifier;
if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) {
_mesa_glsl_error(& @1, state,
"stream layout qualifier on "
"interface block member does not match "
"the interface block (%d vs %d)",
qualifier.stream, block->layout.stream);
YYERROR;
}
}
$$ = block;
}
| memory_qualifier interface_block

View file

@ -763,7 +763,8 @@ private:
/* Assign explicit locations. */
if (current_var->data.explicit_location) {
/* Set sequential locations for struct fields. */
if (record_type != NULL) {
if (current_var->type->without_array()->is_record() ||
current_var->type->is_array_of_arrays()) {
const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
this->uniforms[id].remap_location =
this->explicit_location + field_counter;
@ -1180,7 +1181,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
/* Reserve all the explicit locations of the active uniforms. */
for (unsigned i = 0; i < num_uniforms; i++) {
if (uniforms[i].type->is_subroutine())
if (uniforms[i].type->is_subroutine() ||
uniforms[i].is_shader_storage)
continue;
if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) {
@ -1200,8 +1202,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
/* Reserve locations for rest of the uniforms. */
for (unsigned i = 0; i < num_uniforms; i++) {
if (uniforms[i].type->is_subroutine())
if (uniforms[i].type->is_subroutine() ||
uniforms[i].is_shader_storage)
continue;
/* Built-in uniforms should not get any location. */
if (uniforms[i].builtin)
continue;

View file

@ -651,7 +651,7 @@ link_invalidate_variable_locations(exec_list *ir)
/**
* Set UsesClipDistance and ClipDistanceArraySize based on the given shader.
* Set clip_distance_array_size based on the given shader.
*
* Also check for errors based on incorrect usage of gl_ClipVertex and
* gl_ClipDistance.
@ -660,10 +660,10 @@ link_invalidate_variable_locations(exec_list *ir)
*/
static void
analyze_clip_usage(struct gl_shader_program *prog,
struct gl_shader *shader, GLboolean *UsesClipDistance,
GLuint *ClipDistanceArraySize)
struct gl_shader *shader,
GLuint *clip_distance_array_size)
{
*ClipDistanceArraySize = 0;
*clip_distance_array_size = 0;
if (!prog->IsES && prog->Version >= 130) {
/* From section 7.1 (Vertex Shader Special Variables) of the
@ -686,13 +686,14 @@ analyze_clip_usage(struct gl_shader_program *prog,
_mesa_shader_stage_to_string(shader->Stage));
return;
}
*UsesClipDistance = clip_distance.variable_found();
ir_variable *clip_distance_var =
shader->symbols->get_variable("gl_ClipDistance");
if (clip_distance_var)
*ClipDistanceArraySize = clip_distance_var->type->length;
} else {
*UsesClipDistance = false;
if (clip_distance.variable_found()) {
ir_variable *clip_distance_var =
shader->symbols->get_variable("gl_ClipDistance");
assert(clip_distance_var);
*clip_distance_array_size = clip_distance_var->type->length;
}
}
}
@ -700,8 +701,7 @@ analyze_clip_usage(struct gl_shader_program *prog,
/**
* Verify that a vertex shader executable meets all semantic requirements.
*
* Also sets prog->Vert.UsesClipDistance and prog->Vert.ClipDistanceArraySize
* as a side effect.
* Also sets prog->Vert.ClipDistanceArraySize as a side effect.
*
* \param shader Vertex shader executable to be verified
*/
@ -754,8 +754,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog,
}
}
analyze_clip_usage(prog, shader, &prog->Vert.UsesClipDistance,
&prog->Vert.ClipDistanceArraySize);
analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize);
}
void
@ -765,8 +764,7 @@ validate_tess_eval_shader_executable(struct gl_shader_program *prog,
if (shader == NULL)
return;
analyze_clip_usage(prog, shader, &prog->TessEval.UsesClipDistance,
&prog->TessEval.ClipDistanceArraySize);
analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize);
}
@ -797,8 +795,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog,
/**
* Verify that a geometry shader executable meets all semantic requirements
*
* Also sets prog->Geom.VerticesIn, prog->Geom.UsesClipDistance, and
* prog->Geom.ClipDistanceArraySize as a side effect.
* Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as
* a side effect.
*
* \param shader Geometry shader executable to be verified
*/
@ -812,8 +810,7 @@ validate_geometry_shader_executable(struct gl_shader_program *prog,
unsigned num_vertices = vertices_per_prim(prog->Geom.InputType);
prog->Geom.VerticesIn = num_vertices;
analyze_clip_usage(prog, shader, &prog->Geom.UsesClipDistance,
&prog->Geom.ClipDistanceArraySize);
analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize);
}
/**
@ -3117,8 +3114,8 @@ check_explicit_uniform_locations(struct gl_context *ctx,
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (var && (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) &&
var->data.explicit_location) {
if (var && (var->data.mode == ir_var_uniform &&
var->data.explicit_location)) {
bool ret;
if (var->type->is_subroutine())
ret = reserve_subroutine_explicit_locations(prog, sh, var);

View file

@ -186,6 +186,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new_var->data.centroid = iface_t->fields.structure[i].centroid;
new_var->data.sample = iface_t->fields.structure[i].sample;
new_var->data.patch = iface_t->fields.structure[i].patch;
new_var->data.stream = var->data.stream;
new_var->init_interface_type(iface_t);
hash_table_insert(interface_namespace, new_var,

View file

@ -84,7 +84,7 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
continue;
if (ir->array_idx != NULL)
var = new(mem_ctx) ir_dereference_array(ir->sub_var, ir->array_idx->clone(mem_ctx, NULL));
var = ir->array_idx->clone(mem_ctx, NULL);
else
var = new(mem_ctx) ir_dereference_variable(ir->sub_var);

View file

@ -238,6 +238,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
case ir_type_swizzle: {
ir_swizzle *s = (ir_swizzle *) ir;
ir = s->val->as_dereference();
/* Skip swizzle in the next pass */
d = ir;
break;
}

View file

@ -164,15 +164,20 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
shader->info.outputs_written = sh->Program->OutputsWritten;
shader->info.system_values_read = sh->Program->SystemValuesRead;
shader->info.uses_texture_gather = sh->Program->UsesGather;
shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
shader->info.uses_clip_distance_out =
sh->Program->ClipDistanceArraySize != 0;
shader->info.separate_shader = shader_prog->SeparateShader;
shader->info.has_transform_feedback_varyings =
shader_prog->TransformFeedback.NumVarying > 0;
switch (stage) {
case MESA_SHADER_GEOMETRY:
shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
shader->info.gs.output_primitive = sh->Geom.OutputType;
shader->info.gs.vertices_out = sh->Geom.VerticesOut;
shader->info.gs.invocations = sh->Geom.Invocations;
shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive;
shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams;
break;
case MESA_SHADER_FRAGMENT: {

View file

@ -521,6 +521,11 @@ struct glsl_type {
return base_type == GLSL_TYPE_ARRAY;
}
bool is_array_of_arrays() const
{
return is_array() && fields.array->is_array();
}
/**
* Query whether or not a type is a record
*/

View file

@ -1521,11 +1521,23 @@ typedef struct nir_shader_info {
union {
struct {
/** The number of vertices recieves per input primitive */
unsigned vertices_in;
/** The output primitive type (GL enum value) */
unsigned output_primitive;
/** The maximum number of vertices the geometry shader might write. */
unsigned vertices_out;
/** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
unsigned invocations;
/** Whether or not this shader uses EndPrimitive */
bool uses_end_primitive;
/** Whether or not this shader uses non-zero streams */
bool uses_streams;
} gs;
struct {
@ -1924,7 +1936,7 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
void nir_dump_cfg(nir_shader *shader, FILE *fp);
int nir_gs_count_vertices(nir_shader *shader);
int nir_gs_count_vertices(const nir_shader *shader);
bool nir_split_var_copies(nir_shader *shader);

View file

@ -51,7 +51,7 @@ as_set_vertex_count(nir_instr *instr)
* counting at the NIR level.
*/
int
nir_gs_count_vertices(nir_shader *shader)
nir_gs_count_vertices(const nir_shader *shader)
{
int count = -1;

View file

@ -327,12 +327,12 @@ struct cfg_t {
#define foreach_inst_in_block_reverse_safe(__type, __inst, __block) \
foreach_in_list_reverse_safe(__type, __inst, &(__block)->instructions)
#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst, __block) \
#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst) \
for (__type *__scan_inst = (__type *)__inst->next; \
!__scan_inst->is_tail_sentinel(); \
__scan_inst = (__type *)__scan_inst->next)
#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst, __block) \
#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst) \
for (__type *__scan_inst = (__type *)__inst->prev; \
!__scan_inst->is_head_sentinel(); \
__scan_inst = (__type *)__scan_inst->prev)

View file

@ -90,6 +90,7 @@ struct brw_compiler {
void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
bool scalar_vs;
bool scalar_gs;
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
};
@ -488,6 +489,9 @@ struct brw_vue_prog_data {
struct brw_stage_prog_data base;
struct brw_vue_map vue_map;
/** Should the hardware deliver input VUE handles for URB pull loads? */
bool include_vue_handles;
GLuint urb_read_length;
GLuint total_grf;
@ -596,21 +600,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
unsigned *final_assembly_size,
char **error_str);
/**
* Scratch data used when compiling a GLSL geometry shader.
*/
struct brw_gs_compile
{
struct brw_gs_prog_key key;
struct brw_gs_prog_data prog_data;
struct brw_vue_map input_vue_map;
struct brw_geometry_program *gp;
unsigned control_data_bits_per_vertex;
unsigned control_data_header_size_bits;
};
/**
* Compile a vertex shader.
*
@ -618,10 +607,11 @@ struct brw_gs_compile
*/
const unsigned *
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
struct brw_gs_compile *c,
void *mem_ctx,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
const struct nir_shader *shader,
struct gl_shader_program *shader_prog,
void *mem_ctx,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str);

View file

@ -918,8 +918,8 @@ enum opcode {
* Source 0: [required] Color 0.
* Source 1: [optional] Color 1 (for dual source blend messages).
* Source 2: [optional] Src0 Alpha.
* Source 3: [optional] Source Depth (passthrough from the thread payload).
* Source 4: [optional] Destination Depth (gl_FragDepth).
* Source 3: [optional] Source Depth (gl_FragDepth)
* Source 4: [optional (gen4-5)] Destination Depth passthrough from thread
* Source 5: [optional] Sample Mask (gl_SampleMask).
* Source 6: [required] Number of color components (as a UD immediate).
*/
@ -1033,7 +1033,19 @@ enum opcode {
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
/**
* Gen8+ SIMD8 URB Read message.
*
* Source 0: The header register, containing URB handles (g1).
*
* Currently only supports constant offsets, in inst->offset.
*/
SHADER_OPCODE_URB_READ_SIMD8,
SHADER_OPCODE_URB_WRITE_SIMD8,
SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT,
/**
* Return the index of an arbitrary live channel (i.e. one of the channels
@ -2385,7 +2397,7 @@ enum brw_pixel_shader_coverage_mask_mode {
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
# define GEN9_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)

View file

@ -690,7 +690,7 @@ set_control_index(const struct brw_device_info *devinfo,
for (int i = 0; i < 32; i++) {
if (control_index_table[i] == uncompacted) {
brw_compact_inst_set_control_index(dst, i);
brw_compact_inst_set_control_index(devinfo, dst, i);
return true;
}
}
@ -711,7 +711,7 @@ set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
for (int i = 0; i < 32; i++) {
if (datatype_table[i] == uncompacted) {
brw_compact_inst_set_datatype_index(dst, i);
brw_compact_inst_set_datatype_index(devinfo, dst, i);
return true;
}
}
@ -732,7 +732,7 @@ set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
for (int i = 0; i < 32; i++) {
if (subreg_table[i] == uncompacted) {
brw_compact_inst_set_subreg_index(dst, i);
brw_compact_inst_set_subreg_index(devinfo, dst, i);
return true;
}
}
@ -764,7 +764,7 @@ set_src0_index(const struct brw_device_info *devinfo,
if (!get_src_index(uncompacted, &compacted))
return false;
brw_compact_inst_set_src0_index(dst, compacted);
brw_compact_inst_set_src0_index(devinfo, dst, compacted);
return true;
}
@ -784,7 +784,7 @@ set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
return false;
}
brw_compact_inst_set_src1_index(dst, compacted);
brw_compact_inst_set_src1_index(devinfo, dst, compacted);
return true;
}
@ -804,7 +804,7 @@ set_3src_control_index(const struct brw_device_info *devinfo,
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
if (gen8_3src_control_index_table[i] == uncompacted) {
brw_compact_inst_set_3src_control_index(dst, i);
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
return true;
}
}
@ -838,7 +838,7 @@ set_3src_source_index(const struct brw_device_info *devinfo,
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
if (gen8_3src_source_index_table[i] == uncompacted) {
brw_compact_inst_set_3src_source_index(dst, i);
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
return true;
}
}
@ -909,7 +909,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
return false;
#define compact(field) \
brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src))
brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
compact(opcode);
@ -921,7 +921,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
compact(dst_reg_nr);
compact(src0_rep_ctrl);
brw_compact_inst_set_3src_cmpt_control(dst, true);
brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
compact(debug_control);
compact(saturate);
compact(src1_rep_ctrl);
@ -1003,36 +1003,52 @@ brw_try_compact_instruction(const struct brw_device_info *devinfo,
memset(&temp, 0, sizeof(temp));
brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src));
brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src));
#define compact(field) \
brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
compact(opcode);
compact(debug_control);
if (!set_control_index(devinfo, &temp, src))
return false;
if (!set_datatype_index(devinfo, &temp, src))
return false;
if (!set_subreg_index(devinfo, &temp, src, is_immediate))
return false;
brw_compact_inst_set_acc_wr_control(&temp,
brw_inst_acc_wr_control(devinfo, src));
brw_compact_inst_set_cond_modifier(&temp,
brw_inst_cond_modifier(devinfo, src));
if (devinfo->gen >= 6) {
compact(acc_wr_control);
} else {
compact(mask_control_ex);
}
compact(cond_modifier);
if (devinfo->gen <= 6)
brw_compact_inst_set_flag_subreg_nr(&temp,
brw_inst_flag_subreg_nr(devinfo, src));
brw_compact_inst_set_cmpt_control(&temp, true);
compact(flag_subreg_nr);
brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
if (!set_src0_index(devinfo, &temp, src))
return false;
if (!set_src1_index(devinfo, &temp, src, is_immediate))
return false;
brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src));
brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src));
brw_compact_inst_set_dst_reg_nr(devinfo, &temp,
brw_inst_dst_da_reg_nr(devinfo, src));
brw_compact_inst_set_src0_reg_nr(devinfo, &temp,
brw_inst_src0_da_reg_nr(devinfo, src));
if (is_immediate) {
brw_compact_inst_set_src1_reg_nr(&temp,
brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
brw_inst_imm_ud(devinfo, src) & 0xff);
} else {
brw_compact_inst_set_src1_reg_nr(&temp,
brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
brw_inst_src1_da_reg_nr(devinfo, src));
}
#undef compact
*dst = temp;
return true;
@ -1043,7 +1059,7 @@ set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
uint32_t uncompacted =
control_index_table[brw_compact_inst_control_index(src)];
control_index_table[brw_compact_inst_control_index(devinfo, src)];
if (devinfo->gen >= 8) {
brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
@ -1064,7 +1080,8 @@ static void
set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
uint32_t uncompacted =
datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
if (devinfo->gen >= 8) {
brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
@ -1080,7 +1097,8 @@ static void
set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
uint16_t uncompacted =
subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
@ -1091,7 +1109,7 @@ static void
set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
uint32_t compacted = brw_compact_inst_src0_index(src);
uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
uint16_t uncompacted = src_index_table[compacted];
brw_inst_set_bits(dst, 88, 77, uncompacted);
@ -1102,11 +1120,12 @@ set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src, bool is_immediate)
{
if (is_immediate) {
signed high5 = brw_compact_inst_src1_index(src);
signed high5 = brw_compact_inst_src1_index(devinfo, src);
/* Replicate top bit of src1_index into high 20 bits of the immediate. */
brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
} else {
uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
uint16_t uncompacted =
src_index_table[brw_compact_inst_src1_index(devinfo, src)];
brw_inst_set_bits(dst, 120, 109, uncompacted);
}
@ -1118,7 +1137,7 @@ set_uncompacted_3src_control_index(const struct brw_device_info *devinfo,
{
assert(devinfo->gen >= 8);
uint32_t compacted = brw_compact_inst_3src_control_index(src);
uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
uint32_t uncompacted = gen8_3src_control_index_table[compacted];
brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
@ -1134,7 +1153,7 @@ set_uncompacted_3src_source_index(const struct brw_device_info *devinfo,
{
assert(devinfo->gen >= 8);
uint32_t compacted = brw_compact_inst_3src_source_index(src);
uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
uint64_t uncompacted = gen8_3src_source_index_table[compacted];
brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
@ -1160,7 +1179,7 @@ brw_uncompact_3src_instruction(const struct brw_device_info *devinfo,
assert(devinfo->gen >= 8);
#define uncompact(field) \
brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src))
brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
uncompact(opcode);
@ -1190,13 +1209,16 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
{
memset(dst, 0, sizeof(*dst));
if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(devinfo, src))) {
brw_uncompact_3src_instruction(devinfo, dst, src);
return;
}
brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src));
brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src));
#define uncompact(field) \
brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
uncompact(opcode);
uncompact(debug_control);
set_uncompacted_control(devinfo, dst, src);
set_uncompacted_datatype(devinfo, dst, src);
@ -1206,22 +1228,36 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
set_uncompacted_subreg(devinfo, dst, src);
brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src));
brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src));
if (devinfo->gen >= 6) {
uncompact(acc_wr_control);
} else {
uncompact(mask_control_ex);
}
uncompact(cond_modifier);
if (devinfo->gen <= 6)
brw_inst_set_flag_subreg_nr(devinfo, dst,
brw_compact_inst_flag_subreg_nr(src));
uncompact(flag_subreg_nr);
set_uncompacted_src0(devinfo, dst, src);
set_uncompacted_src1(devinfo, dst, src, is_immediate);
brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src));
brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src));
brw_inst_set_dst_da_reg_nr(devinfo, dst,
brw_compact_inst_dst_reg_nr(devinfo, src));
brw_inst_set_src0_da_reg_nr(devinfo, dst,
brw_compact_inst_src0_reg_nr(devinfo, src));
if (is_immediate) {
brw_inst_set_imm_ud(devinfo, dst,
brw_inst_imm_ud(devinfo, dst) |
brw_compact_inst_src1_reg_nr(src));
brw_compact_inst_src1_reg_nr(devinfo, src));
} else {
brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src));
brw_inst_set_src1_da_reg_nr(devinfo, dst,
brw_compact_inst_src1_reg_nr(devinfo, src));
}
#undef uncompact
}
void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
@ -1415,8 +1451,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
brw_compact_inst *align = store + offset;
memset(align, 0, sizeof(*align));
brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP);
brw_compact_inst_set_cmpt_control(align, true);
brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP);
brw_compact_inst_set_cmpt_control(devinfo, align, true);
offset += sizeof(brw_compact_inst);
compacted_count--;
compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
@ -1524,8 +1560,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
if (p->next_insn_offset & sizeof(brw_compact_inst)) {
brw_compact_inst *align = store + offset;
memset(align, 0, sizeof(*align));
brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
brw_compact_inst_set_cmpt_control(align, true);
brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP);
brw_compact_inst_set_cmpt_control(devinfo, align, true);
p->next_insn_offset += sizeof(brw_compact_inst);
}
p->nr_insn = p->next_insn_offset / sizeof(brw_inst);

View file

@ -281,6 +281,10 @@ fs_inst::is_send_from_grf() const
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == GRF;
@ -782,6 +786,10 @@ fs_inst::regs_read(int arg) const
switch (opcode) {
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@ -911,6 +919,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
@ -2239,13 +2250,15 @@ fs_visitor::opt_sampler_eot()
if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex())
return false;
/* This optimisation doesn't seem to work for textureGather for some
* reason. I can't find any documentation or known workarounds to indicate
* that this is expected, but considering that it is probably pretty
* unlikely that a shader would directly write out the results from
* textureGather we might as well just disable it.
/* 3D Sampler » Messages » Message Format
*
* Response Length of zero is allowed on all SIMD8* and SIMD16* sampler
* messages except sample+killpix, resinfo, sampleinfo, LOD, and gather4*
*/
if (tex_inst->opcode == SHADER_OPCODE_TG4 ||
if (tex_inst->opcode == SHADER_OPCODE_TXS ||
tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO ||
tex_inst->opcode == SHADER_OPCODE_LOD ||
tex_inst->opcode == SHADER_OPCODE_TG4 ||
tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET)
return false;
@ -2457,7 +2470,7 @@ fs_visitor::compute_to_mrf()
/* Found a move of a GRF to a MRF. Let's see if we can go
* rewrite the thing that made this GRF to write into the MRF.
*/
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->dst.file == GRF &&
scan_inst->dst.reg == inst->src[0].reg) {
/* Found the last thing to write our reg we want to turn
@ -2805,7 +2818,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
* we assume that there are no outstanding dependencies on entry to the
* program.
*/
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
/* If we hit control flow, assume that there *are* outstanding
* dependencies, and force their cleanup before our instruction.
*/
@ -2871,7 +2884,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
/* Walk forwards looking for writes to registers we're writing which aren't
* read before being written.
*/
foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst, block) {
foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst) {
/* If we hit control flow, force resolve all remaining dependencies. */
if (block->end() == scan_inst) {
for (int i = 0; i < write_len; i++) {

View file

@ -62,6 +62,8 @@ namespace brw {
class fs_live_variables;
}
struct brw_gs_compile;
static inline fs_reg
offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
{
@ -99,7 +101,12 @@ public:
const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index);
fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *gs_compile,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader);
void init();
~fs_visitor();
fs_reg vgrf(const glsl_type *const type);
@ -298,6 +305,8 @@ public:
const void *const key;
const struct brw_sampler_prog_key_data *key_tex;
struct brw_gs_compile *gs_compile;
struct brw_stage_prog_data *prog_data;
struct gl_program *prog;
@ -415,6 +424,7 @@ private:
struct brw_reg implied_header,
GLuint nr);
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
void generate_barrier(fs_inst *inst, struct brw_reg src);

View file

@ -87,8 +87,7 @@ opt_cmod_propagation_local(bblock_t *block)
continue;
bool read_flag = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst,
block) {
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->overwrites_reg(inst->src[0])) {
if (scan_inst->is_partial_write() ||
scan_inst->dst.reg_offset != inst->src[0].reg_offset)

View file

@ -354,6 +354,28 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
}
}
void
fs_generator::generate_urb_read(fs_inst *inst,
struct brw_reg dst,
struct brw_reg header)
{
assert(header.file == BRW_GENERAL_REGISTER_FILE);
assert(header.type == BRW_REGISTER_TYPE_UD);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
brw_set_src1(p, send, brw_imm_ud(0u));
brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
brw_inst_set_mlen(p->devinfo, send, inst->mlen);
brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
brw_inst_set_header_present(p->devinfo, send, true);
brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
}
void
fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
{
@ -368,6 +390,14 @@ fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
brw_inst_set_urb_per_slot_offset(p->devinfo, insn, true);
if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
brw_inst_set_urb_channel_mask_present(p->devinfo, insn, true);
brw_inst_set_mlen(p->devinfo, insn, inst->mlen);
brw_inst_set_rlen(p->devinfo, insn, 0);
brw_inst_set_eot(p->devinfo, insn, inst->eot);
@ -2001,7 +2031,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
fill_count++;
break;
case SHADER_OPCODE_URB_READ_SIMD8:
generate_urb_read(inst, dst, src[0]);
break;
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
generate_urb_write(inst, src[0]);
break;

View file

@ -30,6 +30,7 @@
#include "brw_fs_surface_builder.h"
#include "brw_nir.h"
#include "brw_fs_surface_builder.h"
#include "brw_vec4_gs_visitor.h"
using namespace brw;
using namespace brw::surface_access;
@ -188,6 +189,18 @@ emit_system_values_block(nir_block *block, void *void_visitor)
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
break;
case nir_intrinsic_load_invocation_id:
assert(v->stage == MESA_SHADER_GEOMETRY);
reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
if (reg->file == BAD_FILE) {
const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL);
fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.SHR(iid, g1, fs_reg(27u));
*reg = iid;
}
break;
case nir_intrinsic_load_sample_pos:
assert(v->stage == MESA_SHADER_FRAGMENT);
reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
@ -1367,9 +1380,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id()");
case nir_intrinsic_load_primitive_id:
assert(stage == MESA_SHADER_GEOMETRY);
assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
break;
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_sample_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);

View file

@ -64,7 +64,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
int src_end_ip = v->live_intervals->end[src_var];
bool interfered = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->overwrites_reg(inst->src[0])) {
if (scan_inst->is_partial_write() ||
(scan_inst->dst.type != inst->dst.type &&

View file

@ -41,6 +41,7 @@
#include "brw_wm.h"
#include "brw_cs.h"
#include "brw_vec4.h"
#include "brw_vec4_gs_visitor.h"
#include "brw_fs.h"
#include "main/uniforms.h"
#include "glsl/nir/glsl_types.h"
@ -868,13 +869,14 @@ void
fs_visitor::emit_urb_writes()
{
int slot, urb_offset, length;
struct brw_vs_prog_data *vs_prog_data =
(struct brw_vs_prog_data *) prog_data;
const struct brw_vs_prog_key *key =
int starting_urb_offset = 0;
const struct brw_vue_prog_data *vue_prog_data =
(const struct brw_vue_prog_data *) this->prog_data;
const struct brw_vs_prog_key *vs_key =
(const struct brw_vs_prog_key *) this->key;
const GLbitfield64 psiz_mask =
VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ;
const struct brw_vue_map *vue_map = &vs_prog_data->base.vue_map;
const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
bool flush;
fs_reg sources[8];
@ -900,8 +902,21 @@ fs_visitor::emit_urb_writes()
return;
}
if (stage == MESA_SHADER_GEOMETRY) {
const struct brw_gs_prog_data *gs_prog_data =
(const struct brw_gs_prog_data *) prog_data;
/* We need to increment the Global Offset to skip over the control data
* header and the extra "Vertex Count" field (1 HWord) at the beginning
* of the VUE. We're counting in OWords, so the units are doubled.
*/
starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords;
if (gs_prog_data->static_vertex_count == -1)
starting_urb_offset += 2;
}
length = 0;
urb_offset = 0;
urb_offset = starting_urb_offset;
flush = false;
for (slot = 0; slot < vue_map->num_slots; slot++) {
int varying = vue_map->slot_to_varying[slot];
@ -961,11 +976,11 @@ fs_visitor::emit_urb_writes()
break;
}
if ((varying == VARYING_SLOT_COL0 ||
if (stage == MESA_SHADER_VERTEX && vs_key->clamp_vertex_color &&
(varying == VARYING_SLOT_COL0 ||
varying == VARYING_SLOT_COL1 ||
varying == VARYING_SLOT_BFC0 ||
varying == VARYING_SLOT_BFC1) &&
key->clamp_vertex_color) {
varying == VARYING_SLOT_BFC1)) {
/* We need to clamp these guys, so do a saturating MOV into a
* temp register and use that for the payload.
*/
@ -1005,10 +1020,10 @@ fs_visitor::emit_urb_writes()
fs_inst *inst =
abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
inst->eot = last;
inst->eot = last && stage == MESA_SHADER_VERTEX;
inst->mlen = length + 1;
inst->offset = urb_offset;
urb_offset = slot + 1;
urb_offset = starting_urb_offset + slot + 1;
length = 0;
flush = false;
}
@ -1071,11 +1086,33 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
unsigned dispatch_width,
int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
key(key), prog_data(prog_data), prog(prog),
key(key), gs_compile(NULL), prog_data(prog_data), prog(prog),
dispatch_width(dispatch_width),
shader_time_index(shader_time_index),
promoted_constants(0),
bld(fs_builder(this, dispatch_width).at_end())
{
init();
}
fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *c,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader)
: backend_shader(compiler, log_data, mem_ctx, shader,
&prog_data->base.base),
key(&c->key), gs_compile(c),
prog_data(&prog_data->base.base), prog(NULL),
dispatch_width(8),
shader_time_index(ST_GS),
bld(fs_builder(this, dispatch_width).at_end())
{
init();
}
void
fs_visitor::init()
{
switch (stage) {
case MESA_SHADER_FRAGMENT:
@ -1094,6 +1131,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
unreachable("unhandled shader stage");
}
this->prog_data = this->stage_prog_data;
this->failed = false;
this->simd16_unsupported = false;
this->no16_msg = NULL;
@ -1119,6 +1158,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
this->pull_constant_loc = NULL;
this->push_constant_loc = NULL;
this->promoted_constants = 0,
this->spilled_any_registers = false;
this->do_dual_src = false;

View file

@ -57,20 +57,14 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct brw_geometry_program *gp,
struct brw_gs_prog_key *key)
{
struct brw_compiler *compiler = brw->intelScreen->compiler;
struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_compile c;
memset(&c, 0, sizeof(c));
c.key = *key;
c.gp = gp;
c.prog_data.include_primitive_id =
(gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
c.prog_data.invocations = gp->program.Invocations;
struct brw_gs_prog_data prog_data;
memset(&prog_data, 0, sizeof(prog_data));
assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
&gp->program.Base, &c.prog_data);
&gp->program.Base, &prog_data);
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
@ -83,215 +77,24 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
int param_count = gp->program.Base.nir->num_uniforms * 4;
c.prog_data.base.base.param =
prog_data.base.base.param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
c.prog_data.base.base.pull_param =
prog_data.base.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
c.prog_data.base.base.image_param =
prog_data.base.base.image_param =
rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
c.prog_data.base.base.nr_params = param_count;
c.prog_data.base.base.nr_image_params = gs->NumImages;
prog_data.base.base.nr_params = param_count;
prog_data.base.base.nr_image_params = gs->NumImages;
brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
&c.prog_data.base.base, false);
if (brw->gen >= 8) {
c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
nir_gs_count_vertices(gp->program.Base.nir);
}
if (brw->gen >= 7) {
if (gp->program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
* to multiple streams, and EndPrimitive() has no effect. So we
* configure the hardware to interpret the control data as stream ID.
*/
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
/* We only have to emit control bits if we are using streams */
if (prog->Geom.UsesStreams)
c.control_data_bits_per_vertex = 2;
else
c.control_data_bits_per_vertex = 0;
} else {
/* When the output type is triangle_strip or line_strip, EndPrimitive()
* may be used to terminate the current strip and start a new one
* (similar to primitive restart), and outputting data to multiple
* streams is not supported. So we configure the hardware to interpret
* the control data as EndPrimitive information (a.k.a. "cut bits").
*/
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
/* We only need to output control data if the shader actually calls
* EndPrimitive().
*/
c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
}
} else {
/* There are no control data bits in gen6. */
c.control_data_bits_per_vertex = 0;
/* If it is using transform feedback, enable it */
if (prog->TransformFeedback.NumVarying)
c.prog_data.gen6_xfb_enabled = true;
else
c.prog_data.gen6_xfb_enabled = false;
}
c.control_data_header_size_bits =
gp->program.VerticesOut * c.control_data_bits_per_vertex;
/* 1 HWORD = 32 bytes = 256 bits */
c.prog_data.control_data_header_size_hwords =
ALIGN(c.control_data_header_size_bits, 256) / 256;
&prog_data.base.base, compiler->scalar_gs);
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
brw_compute_vue_map(brw->intelScreen->devinfo,
&c.prog_data.base.vue_map, outputs_written,
&prog_data.base.vue_map, outputs_written,
prog ? prog->SeparateShader : false);
/* Compute the output vertex size.
*
* From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
* Size (p168):
*
* [0,62] indicating [1,63] 16B units
*
* Specifies the size of each vertex stored in the GS output entry
* (following any Control Header data) as a number of 128-bit units
* (minus one).
*
* Programming Restrictions: The vertex size must be programmed as a
* multiple of 32B units with the following exception: Rendering is
* disabled (as per SOL stage state) and the vertex size output by the
* GS thread is 16B.
*
* If rendering is enabled (as per SOL state) the vertex size must be
* programmed as a multiple of 32B units. In other words, the only time
* software can program a vertex size with an odd number of 16B units
* is when rendering is disabled.
*
* Note: B=bytes in the above text.
*
* It doesn't seem worth the extra trouble to optimize the case where the
* vertex size is 16B (especially since this would require special-casing
* the GEN assembly that writes to the URB). So we just set the vertex
* size to a multiple of 32B (2 vec4's) in all cases.
*
* The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
* budget that as follows:
*
* 512 bytes for varyings (a varying component is 4 bytes and
* gl_MaxGeometryOutputComponents = 128)
* 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
* bytes)
* 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
* even if it's not used)
* 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
* whenever clip planes are enabled, even if the shader doesn't
* write to gl_ClipDistance)
* 16 bytes overhead since the VUE size must be a multiple of 32 bytes
* (see below)--this causes up to 1 VUE slot to be wasted
* 400 bytes available for varying packing overhead
*
* Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
* per interpolation type, so this is plenty.
*
*/
unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
assert(brw->gen == 6 ||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
c.prog_data.output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
* That divides up as follows:
*
* 64 bytes for the control data header (cut indices or StreamID bits)
* 4096 bytes for varyings (a varying component is 4 bytes and
* gl_MaxGeometryTotalOutputComponents = 1024)
* 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
* bytes/vertex and gl_MaxGeometryOutputVertices is 256)
* 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
* even if it's not used)
* 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
* whenever clip planes are enabled, even if the shader doesn't
* write to gl_ClipDistance)
* 4096 bytes overhead since the VUE size must be a multiple of 32
* bytes (see above)--this causes up to 1 VUE slot to be wasted
* 8128 bytes available for varying packing overhead
*
* Worst-case varying packing overhead is 3/4 of a varying slot per
* interpolation type, which works out to 3072 bytes, so this would allow
* us to accommodate 2 interpolation types without any danger of running
* out of URB space.
*
* In practice, the risk of running out of URB space is very small, since
* the above figures are all worst-case, and most of them scale with the
* number of output vertices. So we'll just calculate the amount of space
* we need, and if it's too large, fail to compile.
*
* The above is for gen7+ where we have a single URB entry that will hold
* all the output. In gen6, we will have to allocate URB entries for every
* vertex we emit, so our URB entries only need to be large enough to hold
* a single vertex. Also, gen6 does not have a control data header.
*/
unsigned output_size_bytes;
if (brw->gen >= 7) {
output_size_bytes =
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
} else {
output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
}
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
* which comes before the control header.
*/
if (brw->gen >= 8)
output_size_bytes += 32;
assert(output_size_bytes >= 1);
int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (brw->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
return false;
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
* a multiple of 128 bytes in gen6.
*/
if (brw->gen >= 7)
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
else
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
c.prog_data.output_topology =
get_hw_prim_for_gl_prim(gp->program.OutputType);
/* The GLSL linker will have already matched up GS inputs and the outputs
* of prior stages. The driver does extend VS outputs in some cases, but
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
* geometry shader support. So we can safely ignore that.
*
* For SSO pipelines, we use a fixed VUE map layout based on variable
* locations, so we can rely on rendezvous-by-location making this work.
*
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
* written by previous stages and shows up via payload magic.
*/
GLbitfield64 inputs_read =
gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
brw_compute_vue_map(brw->intelScreen->devinfo,
&c.input_vue_map, inputs_read,
prog->SeparateShader);
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
*/
c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
if (unlikely(INTEL_DEBUG & DEBUG_GS))
brw_dump_ir("geometry", prog, gs, NULL);
@ -303,25 +106,25 @@ brw_codegen_gs_prog(struct brw_context *brw,
unsigned program_size;
char *error_str;
const unsigned *program =
brw_compile_gs(brw->intelScreen->compiler, brw, &c,
shader->Program->nir, prog,
mem_ctx, st_index, &program_size, &error_str);
brw_compile_gs(brw->intelScreen->compiler, brw, mem_ctx, key,
&prog_data, shader->Program->nir, prog,
st_index, &program_size, &error_str);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
}
/* Scratch space is used for register spilling */
if (c.prog_data.base.base.total_scratch) {
if (prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
c.prog_data.base.base.total_scratch *
prog_data.base.base.total_scratch *
brw->max_gs_threads);
}
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
&c.key, sizeof(c.key),
key, sizeof(*key),
program, program_size,
&c.prog_data, sizeof(c.prog_data),
&prog_data, sizeof(prog_data),
&stage_state->prog_offset, &brw->gs.prog_data);
ralloc_free(mem_ctx);

View file

@ -181,7 +181,8 @@ F(saturate, 31, 31)
F(debug_control, 30, 30)
F(cmpt_control, 29, 29)
FC(branch_control, 28, 28, devinfo->gen >= 8)
F(acc_wr_control, 28, 28)
FC(acc_wr_control, 28, 28, devinfo->gen >= 6)
FC(mask_control_ex, 28, 28, devinfo->is_g4x || devinfo->gen == 5)
F(cond_modifier, 27, 24)
FC(math_function, 27, 24, devinfo->gen >= 6)
F(exec_size, 23, 21)
@ -392,6 +393,7 @@ FF(urb_per_slot_offset,
/* 4-6: */ -1, -1, -1, -1, -1, -1, -1, -1,
/* 7: */ MD(16), MD(16),
/* 8: */ MD(17), MD(17))
FC(urb_channel_mask_present, MD(15), MD(15), devinfo->gen >= 8)
FC(urb_complete, MD(15), MD(15), devinfo->gen < 8)
FC(urb_used, MD(14), MD(14), devinfo->gen < 7)
FC(urb_allocate, MD(13), MD(13), devinfo->gen < 7)
@ -738,7 +740,7 @@ typedef struct {
* Bits indices range from 0..63.
*/
static inline unsigned
brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low)
brw_compact_inst_bits(const brw_compact_inst *inst, unsigned high, unsigned low)
{
const uint64_t mask = (1ull << (high - low + 1)) - 1;
@ -762,56 +764,65 @@ brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low,
inst->data = (inst->data & ~mask) | (value << low);
}
#define F(name, high, low) \
static inline void \
brw_compact_inst_set_##name(brw_compact_inst *inst, unsigned v) \
{ \
brw_compact_inst_set_bits(inst, high, low, v); \
} \
\
static inline unsigned \
brw_compact_inst_##name(brw_compact_inst *inst) \
{ \
return brw_compact_inst_bits(inst, high, low); \
#define FC(name, high, low, assertions) \
static inline void \
brw_compact_inst_set_##name(const struct brw_device_info *devinfo, \
brw_compact_inst *inst, unsigned v) \
{ \
assert(assertions); \
(void) devinfo; \
brw_compact_inst_set_bits(inst, high, low, v); \
} \
static inline unsigned \
brw_compact_inst_##name(const struct brw_device_info *devinfo, \
const brw_compact_inst *inst) \
{ \
assert(assertions); \
(void) devinfo; \
return brw_compact_inst_bits(inst, high, low); \
}
F(src1_reg_nr, 63, 56)
F(src0_reg_nr, 55, 48)
F(dst_reg_nr, 47, 40)
F(src1_index, 39, 35)
F(src0_index, 34, 30)
F(cmpt_control, 29, 29) /* Same location as brw_inst */
F(flag_subreg_nr, 28, 28) /* <= Gen6 only */
F(cond_modifier, 27, 24) /* Same location as brw_inst */
F(acc_wr_control, 23, 23)
F(subreg_index, 22, 18)
F(datatype_index, 17, 13)
F(control_index, 12, 8)
F(debug_control, 7, 7)
F(opcode, 6, 0) /* Same location as brw_inst */
/* A simple macro for fields which stay in the same place on all generations. */
#define F(name, high, low) FC(name, high, low, true)
F(src1_reg_nr, 63, 56)
F(src0_reg_nr, 55, 48)
F(dst_reg_nr, 47, 40)
F(src1_index, 39, 35)
F(src0_index, 34, 30)
F(cmpt_control, 29, 29) /* Same location as brw_inst */
FC(flag_subreg_nr, 28, 28, devinfo->gen <= 6)
F(cond_modifier, 27, 24) /* Same location as brw_inst */
FC(acc_wr_control, 23, 23, devinfo->gen >= 6)
FC(mask_control_ex, 23, 23, devinfo->is_g4x || devinfo->gen == 5)
F(subreg_index, 22, 18)
F(datatype_index, 17, 13)
F(control_index, 12, 8)
F(debug_control, 7, 7)
F(opcode, 6, 0) /* Same location as brw_inst */
/**
* (Gen8+) Compacted three-source instructions:
* @{
*/
F(3src_src2_reg_nr, 63, 57)
F(3src_src1_reg_nr, 56, 50)
F(3src_src0_reg_nr, 49, 43)
F(3src_src2_subreg_nr, 42, 40)
F(3src_src1_subreg_nr, 39, 37)
F(3src_src0_subreg_nr, 36, 34)
F(3src_src2_rep_ctrl, 33, 33)
F(3src_src1_rep_ctrl, 32, 32)
F(3src_saturate, 31, 31)
F(3src_debug_control, 30, 30)
F(3src_cmpt_control, 29, 29)
F(3src_src0_rep_ctrl, 28, 28)
FC(3src_src2_reg_nr, 63, 57, devinfo->gen >= 8)
FC(3src_src1_reg_nr, 56, 50, devinfo->gen >= 8)
FC(3src_src0_reg_nr, 49, 43, devinfo->gen >= 8)
FC(3src_src2_subreg_nr, 42, 40, devinfo->gen >= 8)
FC(3src_src1_subreg_nr, 39, 37, devinfo->gen >= 8)
FC(3src_src0_subreg_nr, 36, 34, devinfo->gen >= 8)
FC(3src_src2_rep_ctrl, 33, 33, devinfo->gen >= 8)
FC(3src_src1_rep_ctrl, 32, 32, devinfo->gen >= 8)
FC(3src_saturate, 31, 31, devinfo->gen >= 8)
FC(3src_debug_control, 30, 30, devinfo->gen >= 8)
FC(3src_cmpt_control, 29, 29, devinfo->gen >= 8)
FC(3src_src0_rep_ctrl, 28, 28, devinfo->gen >= 8)
/* Reserved */
F(3src_dst_reg_nr, 18, 12)
F(3src_source_index, 11, 10)
F(3src_control_index, 9, 8)
FC(3src_dst_reg_nr, 18, 12, devinfo->gen >= 8)
FC(3src_source_index, 11, 10, devinfo->gen >= 8)
FC(3src_control_index, 9, 8, devinfo->gen >= 8)
/* Bit 7 is Reserved (for future Opcode expansion) */
F(3src_opcode, 6, 0)
FC(3src_opcode, 6, 0, devinfo->gen >= 8)
/** @} */
#undef F

View file

@ -91,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
return _mesa_init_gl_program(&prog->program, target, id);
return _mesa_init_gl_program(&prog->program.Base, target, id);
} else {
return NULL;
}

View file

@ -79,6 +79,8 @@ is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
case MESA_SHADER_FRAGMENT:
case MESA_SHADER_COMPUTE:
return true;
case MESA_SHADER_GEOMETRY:
return compiler->scalar_gs;
case MESA_SHADER_VERTEX:
return compiler->scalar_vs;
default:
@ -101,6 +103,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
compiler->scalar_vs = true;
if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false))
compiler->scalar_gs = true;
nir_shader_compiler_options *nir_options =
rzalloc(compiler, nir_shader_compiler_options);
nir_options->native_integers = true;
@ -411,6 +416,14 @@ brw_instruction_name(enum opcode op)
return "gen7_scratch_read";
case SHADER_OPCODE_URB_WRITE_SIMD8:
return "gen8_urb_write_simd8";
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
return "gen8_urb_write_simd8_per_slot";
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
return "gen8_urb_write_simd8_masked";
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
return "gen8_urb_write_simd8_masked_per_slot";
case SHADER_OPCODE_URB_READ_SIMD8:
return "urb_read_simd8";
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
@ -964,6 +977,9 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_BARRIER:
return true;

View file

@ -233,6 +233,18 @@ bool opt_predicated_break(struct backend_shader *s);
extern "C" {
#endif
/**
* Scratch data used when compiling a GLSL geometry shader.
*/
struct brw_gs_compile
{
struct brw_gs_prog_key key;
struct brw_vue_map input_vue_map;
unsigned control_data_bits_per_vertex;
unsigned control_data_header_size_bits;
};
void
brw_assign_common_binding_table_offsets(gl_shader_stage stage,
const struct brw_device_info *devinfo,

View file

@ -40,36 +40,32 @@
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
static unsigned int
tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
const struct intel_mipmap_tree *mt)
tr_mode_horizontal_texture_alignment(const struct intel_mipmap_tree *mt)
{
const unsigned *align_yf, *align_ys;
const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
unsigned ret_align, divisor;
unsigned ret_align, divisor, multiplier_ys;
/* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below
* tables specifies the horizontal alignment requirement in elements
* for the surface. An element is defined as a pixel in uncompressed
* surface formats, and as a compression block in compressed surface
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
/* Values in below tables specifiy the horizontal alignment requirement
* in elements for TRMODE_YF surface. An element is defined as a pixel in
* uncompressed surface formats, and as a compression block in compressed
* surface formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
* element is a sample.
*/
const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256};
const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096};
const unsigned align_2d_yf[] = {64, 64, 32, 32, 16};
const unsigned align_2d_ys[] = {256, 256, 128, 128, 64};
const unsigned align_3d_yf[] = {16, 8, 8, 8, 4};
const unsigned align_3d_ys[] = {64, 32, 32, 32, 16};
int i = 0;
/* Alignment computations below assume bpp >= 8 and a power of 2. */
assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
/* Alignment computations below assume a power of 2 cpp. */
assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp));
/* Compute array index. */
const int i = ffs(mt->cpp) - 1;
switch(mt->target) {
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
align_yf = align_1d_yf;
align_ys = align_1d_ys;
ret_align = align_1d_yf[i];
multiplier_ys = 16;
break;
case GL_TEXTURE_2D:
case GL_TEXTURE_RECTANGLE:
@ -78,22 +74,19 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
case GL_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
align_yf = align_2d_yf;
align_ys = align_2d_ys;
ret_align = align_2d_yf[i];
multiplier_ys = 4;
break;
case GL_TEXTURE_3D:
align_yf = align_3d_yf;
align_ys = align_3d_ys;
ret_align = align_3d_yf[i];
multiplier_ys = 4;
break;
default:
unreachable("not reached");
}
/* Compute array index. */
i = ffs(bpp/8) - 1;
ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
align_yf[i] : align_ys[i];
if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
ret_align *= multiplier_ys;
assert(_mesa_is_pow_two(mt->num_samples));
@ -148,26 +141,20 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
}
static unsigned int
tr_mode_vertical_texture_alignment(const struct brw_context *brw,
const struct intel_mipmap_tree *mt)
tr_mode_vertical_texture_alignment(const struct intel_mipmap_tree *mt)
{
const unsigned *align_yf, *align_ys;
const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
unsigned ret_align, divisor;
unsigned ret_align, divisor, multiplier_ys;
/* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */
/* Vertical alignment tables for TRMODE_YF */
const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
const unsigned align_2d_ys[] = {256, 128, 128, 64, 64};
const unsigned align_3d_yf[] = {16, 16, 16, 8, 8};
const unsigned align_3d_ys[] = {32, 32, 32, 16, 16};
int i = 0;
assert(brw->gen >= 9 &&
mt->target != GL_TEXTURE_1D &&
mt->target != GL_TEXTURE_1D_ARRAY);
assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
/* Alignment computations below assume bpp >= 8 and a power of 2. */
assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ;
/* Alignment computations below assume a power of 2 cpp. */
assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp)) ;
/* Compute array index. */
const int i = ffs(mt->cpp) - 1;
switch(mt->target) {
case GL_TEXTURE_2D:
@ -177,22 +164,21 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw,
case GL_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
align_yf = align_2d_yf;
align_ys = align_2d_ys;
ret_align = align_2d_yf[i];
multiplier_ys = 4;
break;
case GL_TEXTURE_3D:
align_yf = align_3d_yf;
align_ys = align_3d_ys;
ret_align = align_3d_yf[i];
multiplier_ys = 2;
break;
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
default:
unreachable("not reached");
unreachable("Unexpected miptree target");
}
/* Compute array index. */
i = ffs(bpp / 8) - 1;
ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
align_yf[i] : align_ys[i];
if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
ret_align *= multiplier_ys;
assert(_mesa_is_pow_two(mt->num_samples));
@ -779,8 +765,8 @@ intel_miptree_set_alignment(struct brw_context *brw,
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
* vertical alignment < 64. */
mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
mt->halign = MAX2(tr_mode_horizontal_texture_alignment(mt), 32);
mt->valign = MAX2(tr_mode_vertical_texture_alignment(mt), 64);
} else {
mt->halign =
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);

View file

@ -1111,7 +1111,7 @@ vec4_visitor::opt_register_coalesce()
*/
vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev;
foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
inst, block) {
inst) {
_scan_inst = scan_inst;
if (inst->src[0].in_range(scan_inst->dst, scan_inst->regs_written)) {

View file

@ -104,7 +104,7 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_primitive_id:
assert(c->prog_data.include_primitive_id);
assert(gs_prog_data->include_primitive_id);
dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
break;

View file

@ -35,14 +35,16 @@ namespace brw {
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
: vec4_visitor(compiler, log_data, &c->key.tex,
&c->prog_data.base, shader, mem_ctx,
&prog_data->base, shader, mem_ctx,
no_spills, shader_time_index),
c(c)
c(c),
gs_prog_data(prog_data)
{
}
@ -78,9 +80,9 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
* so the total number of input slots that will be delivered to the GS (and
* thus the stride of the input arrays) is urb_read_length * 2.
*/
const unsigned num_input_vertices = c->gp->program.VerticesIn;
const unsigned num_input_vertices = nir->info.gs.vertices_in;
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
unsigned input_array_stride = c->prog_data.base.urb_read_length * 2;
unsigned input_array_stride = prog_data->urb_read_length * 2;
for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
int varying = c->input_vue_map.slot_to_varying[slot];
@ -106,7 +108,7 @@ vec4_gs_visitor::setup_payload()
* to be interleaved, so one register contains two attribute slots.
*/
int attributes_per_reg =
c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
/* If a geometry shader tries to read from an input that wasn't written by
* the vertex shader, that produces undefined results, but it shouldn't
@ -124,7 +126,7 @@ vec4_gs_visitor::setup_payload()
reg++;
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
if (c->prog_data.include_primitive_id)
if (gs_prog_data->include_primitive_id)
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
reg = setup_uniforms(reg);
@ -182,9 +184,9 @@ vec4_gs_visitor::emit_prolog()
* to account for the fact that the vertex shader stored it in the w
* component of VARYING_SLOT_PSIZ.
*/
if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
if (nir->info.inputs_read & VARYING_BIT_PSIZ) {
this->current_annotation = "swizzle gl_PointSize input";
for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
for (int vertex = 0; vertex < (int)nir->info.gs.vertices_in; vertex++) {
dst_reg dst(ATTR,
BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
dst.type = BRW_REGISTER_TYPE_F;
@ -222,7 +224,7 @@ vec4_gs_visitor::emit_thread_end()
*/
int base_mrf = 1;
bool static_vertex_count = c->prog_data.static_vertex_count != -1;
bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
/* If the previous instruction was a URB write, we don't need to issue
* a second one - we can just set the EOT bit on the previous write.
@ -271,7 +273,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
(uint32_t) c->prog_data.output_vertex_size_hwords);
(uint32_t) gs_prog_data->output_vertex_size_hwords);
}
@ -285,12 +287,12 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
(void) complete;
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
inst->offset = c->prog_data.control_data_header_size_hwords;
inst->offset = gs_prog_data->control_data_header_size_hwords;
/* We need to increment Global Offset by 1 to make room for Broadwell's
* extra "Vertex Count" payload at the beginning of the URB entry.
*/
if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset++;
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
@ -409,7 +411,7 @@ vec4_gs_visitor::emit_control_data_bits()
* URB entry. Since this is an OWord message, Global Offset is counted
* in 128-bit units, so we must set it to 2.
*/
if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset = 2;
inst->base_mrf = base_mrf;
inst->mlen = 2;
@ -536,7 +538,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
* do for GL_POINTS outputs that don't use streams).
*/
if (c->control_data_header_size_bits > 0 &&
c->prog_data.control_data_format ==
gs_prog_data->control_data_format ==
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
this->current_annotation = "emit vertex: Stream control data bits";
set_stream_control_data_bits(stream_id);
@ -552,7 +554,7 @@ vec4_gs_visitor::gs_end_primitive()
* consists of cut bits. Fortunately, the only time it isn't is when the
* output type is points, in which case EndPrimitive() is a no-op.
*/
if (c->prog_data.control_data_format !=
if (gs_prog_data->control_data_format !=
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
return;
}
@ -598,27 +600,231 @@ vec4_gs_visitor::gs_end_primitive()
extern "C" const unsigned *
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
struct brw_gs_compile *c,
void *mem_ctx,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
struct gl_shader_program *shader_prog,
void *mem_ctx,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str)
{
struct brw_gs_compile c;
memset(&c, 0, sizeof(c));
c.key = *key;
prog_data->include_primitive_id =
(shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
prog_data->invocations = shader->info.gs.invocations;
if (compiler->devinfo->gen >= 8)
prog_data->static_vertex_count = nir_gs_count_vertices(shader);
if (compiler->devinfo->gen >= 7) {
if (shader->info.gs.output_primitive == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
* to multiple streams, and EndPrimitive() has no effect. So we
* configure the hardware to interpret the control data as stream ID.
*/
prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
/* We only have to emit control bits if we are using streams */
if (shader_prog && shader_prog->Geom.UsesStreams)
c.control_data_bits_per_vertex = 2;
else
c.control_data_bits_per_vertex = 0;
} else {
/* When the output type is triangle_strip or line_strip, EndPrimitive()
* may be used to terminate the current strip and start a new one
* (similar to primitive restart), and outputting data to multiple
* streams is not supported. So we configure the hardware to interpret
* the control data as EndPrimitive information (a.k.a. "cut bits").
*/
prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
/* We only need to output control data if the shader actually calls
* EndPrimitive().
*/
c.control_data_bits_per_vertex =
shader->info.gs.uses_end_primitive ? 1 : 0;
}
} else {
/* There are no control data bits in gen6. */
c.control_data_bits_per_vertex = 0;
/* If it is using transform feedback, enable it */
if (shader->info.has_transform_feedback_varyings)
prog_data->gen6_xfb_enabled = true;
else
prog_data->gen6_xfb_enabled = false;
}
c.control_data_header_size_bits =
shader->info.gs.vertices_out * c.control_data_bits_per_vertex;
/* 1 HWORD = 32 bytes = 256 bits */
prog_data->control_data_header_size_hwords =
ALIGN(c.control_data_header_size_bits, 256) / 256;
/* Compute the output vertex size.
*
* From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
* Size (p168):
*
* [0,62] indicating [1,63] 16B units
*
* Specifies the size of each vertex stored in the GS output entry
* (following any Control Header data) as a number of 128-bit units
* (minus one).
*
* Programming Restrictions: The vertex size must be programmed as a
* multiple of 32B units with the following exception: Rendering is
* disabled (as per SOL stage state) and the vertex size output by the
* GS thread is 16B.
*
* If rendering is enabled (as per SOL state) the vertex size must be
* programmed as a multiple of 32B units. In other words, the only time
* software can program a vertex size with an odd number of 16B units
* is when rendering is disabled.
*
* Note: B=bytes in the above text.
*
* It doesn't seem worth the extra trouble to optimize the case where the
* vertex size is 16B (especially since this would require special-casing
* the GEN assembly that writes to the URB). So we just set the vertex
* size to a multiple of 32B (2 vec4's) in all cases.
*
* The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
* budget that as follows:
*
* 512 bytes for varyings (a varying component is 4 bytes and
* gl_MaxGeometryOutputComponents = 128)
* 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
* bytes)
* 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
* even if it's not used)
* 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
* whenever clip planes are enabled, even if the shader doesn't
* write to gl_ClipDistance)
* 16 bytes overhead since the VUE size must be a multiple of 32 bytes
* (see below)--this causes up to 1 VUE slot to be wasted
* 400 bytes available for varying packing overhead
*
* Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
* per interpolation type, so this is plenty.
*
*/
unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
assert(compiler->devinfo->gen == 6 ||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
prog_data->output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
* That divides up as follows:
*
* 64 bytes for the control data header (cut indices or StreamID bits)
* 4096 bytes for varyings (a varying component is 4 bytes and
* gl_MaxGeometryTotalOutputComponents = 1024)
* 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
* bytes/vertex and gl_MaxGeometryOutputVertices is 256)
* 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
* even if it's not used)
* 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
* whenever clip planes are enabled, even if the shader doesn't
* write to gl_ClipDistance)
* 4096 bytes overhead since the VUE size must be a multiple of 32
* bytes (see above)--this causes up to 1 VUE slot to be wasted
* 8128 bytes available for varying packing overhead
*
* Worst-case varying packing overhead is 3/4 of a varying slot per
* interpolation type, which works out to 3072 bytes, so this would allow
* us to accommodate 2 interpolation types without any danger of running
* out of URB space.
*
* In practice, the risk of running out of URB space is very small, since
* the above figures are all worst-case, and most of them scale with the
* number of output vertices. So we'll just calculate the amount of space
* we need, and if it's too large, fail to compile.
*
* The above is for gen7+ where we have a single URB entry that will hold
* all the output. In gen6, we will have to allocate URB entries for every
* vertex we emit, so our URB entries only need to be large enough to hold
* a single vertex. Also, gen6 does not have a control data header.
*/
unsigned output_size_bytes;
if (compiler->devinfo->gen >= 7) {
output_size_bytes =
prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out;
output_size_bytes += 32 * prog_data->control_data_header_size_hwords;
} else {
output_size_bytes = prog_data->output_vertex_size_hwords * 32;
}
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
* which comes before the control header.
*/
if (compiler->devinfo->gen >= 8)
output_size_bytes += 32;
assert(output_size_bytes >= 1);
int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
return false;
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
* a multiple of 128 bytes in gen6.
*/
if (compiler->devinfo->gen >= 7)
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
else
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
prog_data->output_topology =
get_hw_prim_for_gl_prim(shader->info.gs.output_primitive);
/* The GLSL linker will have already matched up GS inputs and the outputs
* of prior stages. The driver does extend VS outputs in some cases, but
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
* geometry shader support. So we can safely ignore that.
*
* For SSO pipelines, we use a fixed VUE map layout based on variable
* locations, so we can rely on rendezvous-by-location making this work.
*
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
* written by previous stages and shows up via payload magic.
*/
GLbitfield64 inputs_read =
shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
brw_compute_vue_map(compiler->devinfo,
&c.input_vue_map, inputs_read,
shader->info.separate_shader);
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
*/
prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
/* Now that prog_data setup is done, we are ready to actually compile the
* program.
*/
if (compiler->devinfo->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
* so without spilling. If the GS invocations count > 1, then we can't use
* dual object mode.
*/
if (c->prog_data.invocations <= 1 &&
if (prog_data->invocations <= 1 &&
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_gs_visitor v(compiler, log_data, c, shader,
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
return g.generate_assembly(v.cfg, final_assembly_size, shader);
}
@ -648,28 +854,28 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
* mode is more performant when invocations > 1. Gen6 only supports
* SINGLE mode.
*/
if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7)
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7)
prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
else
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
if (compiler->devinfo->gen >= 7)
gs = new vec4_gs_visitor(compiler, log_data, c, shader,
mem_ctx, false /* no_spills */,
gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data,
shader, mem_ctx, false /* no_spills */,
shader_time_index);
else
gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader,
mem_ctx, false /* no_spills */,
gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, shader_prog,
shader, mem_ctx, false /* no_spills */,
shader_time_index);
if (!gs->run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
}

View file

@ -41,6 +41,7 @@ public:
vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
@ -70,6 +71,7 @@ protected:
src_reg vertex_count;
src_reg control_data_bits;
const struct brw_gs_compile * const c;
struct brw_gs_prog_data * const gs_prog_data;
};
} /* namespace brw */

View file

@ -1222,6 +1222,9 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
void
vec4_visitor::emit_ndc_computation()
{
if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
return;
/* Get the position */
src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
@ -1287,7 +1290,8 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
if (devinfo->has_negative_rhw_bug) {
if (devinfo->has_negative_rhw_bug &&
output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
@ -1335,8 +1339,10 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
assert(varying < VARYING_SLOT_MAX);
assert(output_reg[varying].type == reg.type);
current_annotation = output_reg_annotation[varying];
/* Copy the register, saturating if necessary */
return emit(MOV(reg, src_reg(output_reg[varying])));
if (output_reg[varying].file != BAD_FILE)
return emit(MOV(reg, src_reg(output_reg[varying])));
else
return NULL;
}
void
@ -1355,11 +1361,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
}
case BRW_VARYING_SLOT_NDC:
current_annotation = "NDC";
emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
break;
case VARYING_SLOT_POS:
current_annotation = "gl_Position";
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
break;
case VARYING_SLOT_EDGE:
/* This is present when doing unfilled polygons. We're supposed to copy

View file

@ -217,7 +217,7 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
* shader.
*/
vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
if (key->clamp_vertex_color)
if (inst && key->clamp_vertex_color)
inst->saturate = true;
break;
}

View file

@ -312,7 +312,7 @@ brw_vs_populate_key(struct brw_context *brw,
if (ctx->Transform.ClipPlanesEnabled != 0 &&
ctx->API == API_OPENGL_COMPAT &&
!vp->program.Base.UsesClipDistanceOut) {
vp->program.Base.ClipDistanceArraySize == 0) {
key->nr_userclip_plane_consts =
_mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
}

View file

@ -63,7 +63,7 @@ gen6_gs_visitor::emit_prolog()
this->vertex_output = src_reg(this,
glsl_type::uint_type,
(prog_data->vue_map.num_slots + 1) *
c->gp->program.VerticesOut);
nir->info.gs.vertices_out);
this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
@ -95,7 +95,7 @@ gen6_gs_visitor::emit_prolog()
this->prim_count = src_reg(this, glsl_type::uint_type);
emit(MOV(dst_reg(this->prim_count), 0u));
if (c->prog_data.gen6_xfb_enabled) {
if (gs_prog_data->gen6_xfb_enabled) {
/* Create a virtual register to hold destination indices in SOL */
this->destination_indices = src_reg(this, glsl_type::uvec4_type);
/* Create a virtual register to hold number of written primitives */
@ -128,7 +128,7 @@ gen6_gs_visitor::emit_prolog()
* in the 3DSTATE_GS state packet. That information can be obtained by other
* means though, so we can safely use r1 for this purpose.
*/
if (c->prog_data.include_primitive_id) {
if (gs_prog_data->include_primitive_id) {
this->primitive_id =
src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
@ -177,7 +177,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
dst_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
if (c->gp->program.OutputType == GL_POINTS) {
if (nir->info.gs.output_primitive == GL_POINTS) {
/* If we are outputting points, then every vertex has PrimStart and
* PrimEnd set.
*/
@ -191,7 +191,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
* vertex.
*/
emit(OR(dst, this->first_vertex,
(c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
(gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
emit(MOV(dst_reg(this->first_vertex), 0u));
}
emit(ADD(dst_reg(this->vertex_output_offset),
@ -205,7 +205,7 @@ gen6_gs_visitor::gs_end_primitive()
/* Calling EndPrimitive() is optional for point output. In this case we set
* the PrimEnd flag when we process EmitVertex().
*/
if (c->gp->program.OutputType == GL_POINTS)
if (nir->info.gs.output_primitive == GL_POINTS)
return;
/* Otherwise we know that the last vertex we have processed was the last
@ -217,7 +217,7 @@ gen6_gs_visitor::gs_end_primitive()
* comparison below (hence the num_output_vertices + 1 in the comparison
* below).
*/
unsigned num_output_vertices = c->gp->program.VerticesOut;
unsigned num_output_vertices = nir->info.gs.vertices_out;
emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
BRW_CONDITIONAL_L));
vec4_instruction *inst = emit(CMP(dst_null_d(),
@ -320,7 +320,7 @@ gen6_gs_visitor::emit_thread_end()
* first_vertex is not zero. This is only relevant for outputs other than
* points because in the point case we set PrimEnd on all vertices.
*/
if (c->gp->program.OutputType != GL_POINTS) {
if (nir->info.gs.output_primitive != GL_POINTS) {
emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
emit(IF(BRW_PREDICATE_NORMAL));
gs_end_primitive();
@ -353,7 +353,7 @@ gen6_gs_visitor::emit_thread_end()
this->current_annotation = "gen6 thread end: ff_sync";
vec4_instruction *inst;
if (c->prog_data.gen6_xfb_enabled) {
if (gs_prog_data->gen6_xfb_enabled) {
src_reg sol_temp(this, glsl_type::uvec4_type);
emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
dst_reg(this->svbi),
@ -443,7 +443,7 @@ gen6_gs_visitor::emit_thread_end()
}
emit(BRW_OPCODE_WHILE);
if (c->prog_data.gen6_xfb_enabled)
if (gs_prog_data->gen6_xfb_enabled)
xfb_write();
}
emit(BRW_OPCODE_ENDIF);
@ -465,7 +465,7 @@ gen6_gs_visitor::emit_thread_end()
*/
this->current_annotation = "gen6 thread end: EOT";
if (c->prog_data.gen6_xfb_enabled) {
if (gs_prog_data->gen6_xfb_enabled) {
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
src_reg data(this, glsl_type::uint_type);
emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
@ -507,7 +507,7 @@ gen6_gs_visitor::setup_payload()
* information (and move the original value to a virtual register if
* necessary).
*/
if (c->prog_data.include_primitive_id)
if (gs_prog_data->include_primitive_id)
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
reg++;
@ -530,9 +530,6 @@ gen6_gs_visitor::xfb_setup()
BRW_SWIZZLE4(3, 3, 3, 3)
};
struct brw_gs_prog_data *prog_data =
(struct brw_gs_prog_data *) &c->prog_data;
const struct gl_transform_feedback_info *linked_xfb_info =
&this->shader_prog->LinkedTransformFeedback;
int i;
@ -548,11 +545,11 @@ gen6_gs_visitor::xfb_setup()
*/
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
prog_data->transform_feedback_bindings[i] =
gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
gs_prog_data->transform_feedback_bindings[i] =
linked_xfb_info->Outputs[i].OutputRegister;
prog_data->transform_feedback_swizzles[i] =
gs_prog_data->transform_feedback_swizzles[i] =
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
}
}
@ -561,13 +558,11 @@ void
gen6_gs_visitor::xfb_write()
{
unsigned num_verts;
struct brw_gs_prog_data *prog_data =
(struct brw_gs_prog_data *) &c->prog_data;
if (!prog_data->num_transform_feedback_bindings)
if (!gs_prog_data->num_transform_feedback_bindings)
return;
switch (c->prog_data.output_topology) {
switch (gs_prog_data->output_topology) {
case _3DPRIM_POINTLIST:
num_verts = 1;
break;
@ -627,7 +622,7 @@ gen6_gs_visitor::xfb_write()
emit(BRW_OPCODE_ENDIF);
/* Write transform feedback data for all processed vertices. */
for (int i = 0; i < c->gp->program.VerticesOut; i++) {
for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
emit(MOV(dst_reg(sol_temp), i));
emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
BRW_CONDITIONAL_L));
@ -642,10 +637,8 @@ gen6_gs_visitor::xfb_write()
void
gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
{
struct brw_gs_prog_data *prog_data =
(struct brw_gs_prog_data *) &c->prog_data;
unsigned binding;
unsigned num_bindings = prog_data->num_transform_feedback_bindings;
unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
src_reg sol_temp(this, glsl_type::uvec4_type);
/* Check for buffer overflow: we need room to write the complete primitive
@ -666,7 +659,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
*/
for (binding = 0; binding < num_bindings; ++binding) {
unsigned char varying =
prog_data->transform_feedback_bindings[binding];
gs_prog_data->transform_feedback_bindings[binding];
/* Set up the correct destination index for this vertex */
vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
@ -704,7 +697,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
else if (varying == VARYING_SLOT_VIEWPORT)
data.swizzle = BRW_SWIZZLE_ZZZZ;
else
data.swizzle = prog_data->transform_feedback_swizzles[binding];
data.swizzle = gs_prog_data->transform_feedback_swizzles[binding];
/* Write data */
inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);

View file

@ -38,12 +38,13 @@ public:
gen6_gs_visitor(const struct brw_compiler *comp,
void *log_data,
struct brw_gs_compile *c,
struct brw_gs_prog_data *prog_data,
struct gl_shader_program *prog,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills,
vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills,
shader_time_index),
shader_prog(prog)
{

View file

@ -68,6 +68,8 @@ gen8_upload_gs_state(struct brw_context *brw)
GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
(brw->gs.prog_data->output_topology <<
GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
(prog_data->include_vue_handles ?
GEN7_GS_INCLUDE_VERTEX_HANDLES : 0) |
(prog_data->urb_read_length <<
GEN6_GS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |

View file

@ -1400,7 +1400,7 @@ save_BlendFunci(GLuint buf, GLenum sfactor, GLenum dfactor)
GET_CURRENT_CONTEXT(ctx);
Node *n;
ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_SEPARATE_I, 3);
n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_I, 3);
if (n) {
n[1].ui = buf;
n[2].e = sfactor;
@ -9741,6 +9741,46 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
n[3].f, n[4].f, n[5].f, n[6].f,
get_pointer(&n[7]));
break;
case OPCODE_BLEND_COLOR:
fprintf(f, "BlendColor %f, %f, %f, %f\n",
n[1].f, n[2].f, n[3].f, n[4].f);
break;
case OPCODE_BLEND_EQUATION:
fprintf(f, "BlendEquation %s\n",
enum_string(n[1].e));
break;
case OPCODE_BLEND_EQUATION_SEPARATE:
fprintf(f, "BlendEquationSeparate %s, %s\n",
enum_string(n[1].e),
enum_string(n[2].e));
break;
case OPCODE_BLEND_FUNC_SEPARATE:
fprintf(f, "BlendFuncSeparate %s, %s, %s, %s\n",
enum_string(n[1].e),
enum_string(n[2].e),
enum_string(n[3].e),
enum_string(n[4].e));
break;
case OPCODE_BLEND_EQUATION_I:
fprintf(f, "BlendEquationi %u, %s\n",
n[1].ui, enum_string(n[2].e));
break;
case OPCODE_BLEND_EQUATION_SEPARATE_I:
fprintf(f, "BlendEquationSeparatei %u, %s, %s\n",
n[1].ui, enum_string(n[2].e), enum_string(n[3].e));
break;
case OPCODE_BLEND_FUNC_I:
fprintf(f, "BlendFunci %u, %s, %s\n",
n[1].ui, enum_string(n[2].e), enum_string(n[3].e));
break;
case OPCODE_BLEND_FUNC_SEPARATE_I:
fprintf(f, "BlendFuncSeparatei %u, %s, %s, %s, %s\n",
n[1].ui,
enum_string(n[2].e),
enum_string(n[3].e),
enum_string(n[4].e),
enum_string(n[5].e));
break;
case OPCODE_CALL_LIST:
fprintf(f, "CallList %d\n", (int) n[1].ui);
break;
@ -9761,6 +9801,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
case OPCODE_LINE_STIPPLE:
fprintf(f, "LineStipple %d %x\n", n[1].i, (int) n[2].us);
break;
case OPCODE_LINE_WIDTH:
fprintf(f, "LineWidth %f\n", n[1].f);
break;
case OPCODE_LOAD_IDENTITY:
fprintf(f, "LoadIdentity\n");
break;
@ -9790,6 +9833,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
fprintf(f, "Ortho %g %g %g %g %g %g\n",
n[1].f, n[2].f, n[3].f, n[4].f, n[5].f, n[6].f);
break;
case OPCODE_POINT_SIZE:
fprintf(f, "PointSize %f\n", n[1].f);
break;
case OPCODE_POP_ATTRIB:
fprintf(f, "PopAttrib\n");
break;

View file

@ -2275,45 +2275,16 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
; /* fallthrough */
}
if (ctx->Extensions.TDFX_texture_compression_FXT1) {
switch (internalFormat) {
case GL_COMPRESSED_RGB_FXT1_3DFX:
return GL_RGB;
case GL_COMPRESSED_RGBA_FXT1_3DFX:
return GL_RGBA;
default:
; /* fallthrough */
}
if (_mesa_is_compressed_format(ctx, internalFormat)) {
GLenum base_compressed =
_mesa_gl_compressed_format_base_format(internalFormat);
if (base_compressed)
return base_compressed;
}
/* Assume that the ANGLE flag will always be set if the EXT flag is set.
*/
if (ctx->Extensions.ANGLE_texture_compression_dxt) {
switch (internalFormat) {
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
return GL_RGB;
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
return GL_RGBA;
default:
; /* fallthrough */
}
}
if (_mesa_is_desktop_gl(ctx)
&& ctx->Extensions.ANGLE_texture_compression_dxt) {
switch (internalFormat) {
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
return GL_RGB;
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
return GL_RGBA;
default:
; /* fallthrough */
}
}
if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
_mesa_is_astc_format(internalFormat))
return GL_RGBA;
if (ctx->Extensions.MESA_ycbcr_texture) {
if (internalFormat == GL_YCBCR_MESA)
@ -2390,16 +2361,10 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
case GL_SRGB8_EXT:
case GL_COMPRESSED_SRGB_EXT:
return GL_RGB;
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
case GL_SRGB_ALPHA_EXT:
case GL_SRGB8_ALPHA8_EXT:
case GL_COMPRESSED_SRGB_ALPHA_EXT:
return GL_RGBA;
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
case GL_SLUMINANCE_ALPHA_EXT:
case GL_SLUMINANCE8_ALPHA8_EXT:
case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
@ -2544,104 +2509,6 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
}
}
if (ctx->Extensions.ARB_texture_compression_rgtc) {
switch (internalFormat) {
case GL_COMPRESSED_RED_RGTC1:
case GL_COMPRESSED_SIGNED_RED_RGTC1:
return GL_RED;
case GL_COMPRESSED_RG_RGTC2:
case GL_COMPRESSED_SIGNED_RG_RGTC2:
return GL_RG;
default:
; /* fallthrough */
}
}
if (ctx->Extensions.EXT_texture_compression_latc) {
switch (internalFormat) {
case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
return GL_LUMINANCE;
case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
return GL_LUMINANCE_ALPHA;
default:
; /* fallthrough */
}
}
if (ctx->Extensions.ATI_texture_compression_3dc) {
switch (internalFormat) {
case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
return GL_LUMINANCE_ALPHA;
default:
; /* fallthrough */
}
}
if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
switch (internalFormat) {
case GL_ETC1_RGB8_OES:
return GL_RGB;
default:
; /* fallthrough */
}
}
if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
switch (internalFormat) {
case GL_COMPRESSED_RGB8_ETC2:
case GL_COMPRESSED_SRGB8_ETC2:
return GL_RGB;
case GL_COMPRESSED_RGBA8_ETC2_EAC:
case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
return GL_RGBA;
case GL_COMPRESSED_R11_EAC:
case GL_COMPRESSED_SIGNED_R11_EAC:
return GL_RED;
case GL_COMPRESSED_RG11_EAC:
case GL_COMPRESSED_SIGNED_RG11_EAC:
return GL_RG;
default:
; /* fallthrough */
}
}
if (_mesa_is_desktop_gl(ctx) &&
ctx->Extensions.ARB_texture_compression_bptc) {
switch (internalFormat) {
case GL_COMPRESSED_RGBA_BPTC_UNORM:
case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
return GL_RGBA;
case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
return GL_RGB;
default:
; /* fallthrough */
}
}
if (ctx->API == API_OPENGLES) {
switch (internalFormat) {
case GL_PALETTE4_RGB8_OES:
case GL_PALETTE4_R5_G6_B5_OES:
case GL_PALETTE8_RGB8_OES:
case GL_PALETTE8_R5_G6_B5_OES:
return GL_RGB;
case GL_PALETTE4_RGBA8_OES:
case GL_PALETTE8_RGB5_A1_OES:
case GL_PALETTE4_RGBA4_OES:
case GL_PALETTE4_RGB5_A1_OES:
case GL_PALETTE8_RGBA8_OES:
case GL_PALETTE8_RGBA4_OES:
return GL_RGBA;
default:
; /* fallthrough */
}
}
return -1; /* error */
}

View file

@ -1891,7 +1891,7 @@ struct gl_program
* For vertex and geometry shaders, true if the program uses the
* gl_ClipDistance output. Ignored for fragment shaders.
*/
GLboolean UsesClipDistanceOut;
unsigned ClipDistanceArraySize;
/** Named parameters, constants, etc. from program text */
@ -2619,7 +2619,6 @@ struct gl_shader_program
* True if gl_ClipDistance is written to. Copied into
* gl_tess_eval_program by _mesa_copy_linked_program_data().
*/
GLboolean UsesClipDistance;
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
0 if not present. */
} TessEval;
@ -2642,7 +2641,6 @@ struct gl_shader_program
* True if gl_ClipDistance is written to. Copied into
* gl_geometry_program by _mesa_copy_linked_program_data().
*/
GLboolean UsesClipDistance;
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
0 if not present. */
bool UsesEndPrimitive;
@ -2655,7 +2653,6 @@ struct gl_shader_program
* True if gl_ClipDistance is written to. Copied into gl_vertex_program
* by _mesa_copy_linked_program_data().
*/
GLboolean UsesClipDistance;
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
0 if not present. */
} Vert;

View file

@ -2068,7 +2068,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
{
switch (type) {
case MESA_SHADER_VERTEX:
dst->UsesClipDistanceOut = src->Vert.UsesClipDistance;
dst->ClipDistanceArraySize = src->Vert.ClipDistanceArraySize;
break;
case MESA_SHADER_TESS_CTRL: {
struct gl_tess_ctrl_program *dst_tcp =
@ -2083,7 +2083,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
dst_tep->Spacing = src->TessEval.Spacing;
dst_tep->VertexOrder = src->TessEval.VertexOrder;
dst_tep->PointMode = src->TessEval.PointMode;
dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance;
dst->ClipDistanceArraySize = src->TessEval.ClipDistanceArraySize;
break;
}
case MESA_SHADER_GEOMETRY: {
@ -2093,7 +2093,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
dst_gp->Invocations = src->Geom.Invocations;
dst_gp->InputType = src->Geom.InputType;
dst_gp->OutputType = src->Geom.OutputType;
dst->UsesClipDistanceOut = src->Geom.UsesClipDistance;
dst->ClipDistanceArraySize = src->Geom.ClipDistanceArraySize;
dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
dst_gp->UsesStreams = src->Geom.UsesStreams;
break;

View file

@ -97,16 +97,16 @@ static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
* No pixel transfer operations or special texel encodings allowed.
* 1D, 2D and 3D images supported.
*/
static void
memcpy_texture(struct gl_context *ctx,
GLuint dimensions,
mesa_format dstFormat,
GLint dstRowStride,
GLubyte **dstSlices,
GLint srcWidth, GLint srcHeight, GLint srcDepth,
GLenum srcFormat, GLenum srcType,
const GLvoid *srcAddr,
const struct gl_pixelstore_attrib *srcPacking)
void
_mesa_memcpy_texture(struct gl_context *ctx,
GLuint dimensions,
mesa_format dstFormat,
GLint dstRowStride,
GLubyte **dstSlices,
GLint srcWidth, GLint srcHeight, GLint srcDepth,
GLenum srcFormat, GLenum srcType,
const GLvoid *srcAddr,
const struct gl_pixelstore_attrib *srcPacking)
{
const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth,
srcFormat, srcType);
@ -296,11 +296,11 @@ _mesa_texstore_ycbcr(TEXSTORE_PARAMS)
assert(baseInternalFormat == GL_YCBCR_MESA);
/* always just memcpy since no pixel transfer ops apply */
memcpy_texture(ctx, dims,
dstFormat,
dstRowStride, dstSlices,
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
srcAddr, srcPacking);
_mesa_memcpy_texture(ctx, dims,
dstFormat,
dstRowStride, dstSlices,
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
srcAddr, srcPacking);
/* Check if we need byte swapping */
/* XXX the logic here _might_ be wrong */
@ -899,13 +899,15 @@ _mesa_texstore_memcpy(TEXSTORE_PARAMS)
return GL_FALSE;
}
memcpy_texture(ctx, dims,
dstFormat,
dstRowStride, dstSlices,
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
srcAddr, srcPacking);
_mesa_memcpy_texture(ctx, dims,
dstFormat,
dstRowStride, dstSlices,
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
srcAddr, srcPacking);
return GL_TRUE;
}
/**
* Store user data into texture memory.
* Called via glTex[Sub]Image1/2/3D()

View file

@ -74,6 +74,17 @@ _mesa_texstore_needs_transfer_ops(struct gl_context *ctx,
GLenum baseInternalFormat,
mesa_format dstFormat);
extern void
_mesa_memcpy_texture(struct gl_context *ctx,
GLuint dimensions,
mesa_format dstFormat,
GLint dstRowStride,
GLubyte **dstSlices,
GLint srcWidth, GLint srcHeight, GLint srcDepth,
GLenum srcFormat, GLenum srcType,
const GLvoid *srcAddr,
const struct gl_pixelstore_attrib *srcPacking);
extern GLboolean
_mesa_texstore_can_use_memcpy(struct gl_context *ctx,
GLenum baseInternalFormat, mesa_format dstFormat,

View file

@ -239,7 +239,7 @@ static void update_raster_state( struct st_context *st )
/* _NEW_MULTISAMPLE | _NEW_BUFFERS */
raster->force_persample_interp =
st->can_force_persample_interp &&
!st->force_persample_in_shader &&
ctx->Multisample._Enabled &&
ctx->Multisample.SampleShading &&
ctx->Multisample.MinSampleShadingValue *

View file

@ -64,7 +64,7 @@ update_fp( struct st_context *st )
assert(stfp->Base.Base.Target == GL_FRAGMENT_PROGRAM_ARB);
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
/* _NEW_FRAG_CLAMP */
key.clamp_color = st->clamp_frag_color_in_shader &&
@ -76,7 +76,7 @@ update_fp( struct st_context *st )
* Ignore sample qualifier while computing this flag.
*/
key.persample_shading =
!st->can_force_persample_interp &&
st->force_persample_in_shader &&
!(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
SYSTEM_BIT_SAMPLE_POS)) &&
_mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1;
@ -119,7 +119,7 @@ update_vp( struct st_context *st )
assert(stvp->Base.Base.Target == GL_VERTEX_PROGRAM_ARB);
memset(&key, 0, sizeof key);
key.st = st; /* variants are per-context */
key.st = st->has_shareable_shaders ? NULL : st;
/* When this is true, we will add an extra input to the vertex
* shader translation (for edgeflags), an extra output with
@ -174,7 +174,7 @@ update_gp( struct st_context *st )
assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV);
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st->gp_variant = st_get_gp_variant(st, stgp, &key);
@ -210,7 +210,7 @@ update_tcp( struct st_context *st )
assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st->tcp_variant = st_get_tcp_variant(st, sttcp, &key);
@ -246,7 +246,7 @@ update_tep( struct st_context *st )
assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st->tep_variant = st_get_tep_variant(st, sttep, &key);

View file

@ -269,7 +269,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
struct pipe_resource *vbuf = NULL;
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
key.bitmap = GL_TRUE;
key.clamp_color = st->clamp_frag_color_in_shader &&
st->ctx->Color._ClampFragmentColor;

View file

@ -395,15 +395,35 @@ make_texture(struct st_context *st,
* Note that the image is actually going to be upside down in
* the texture. We deal with that with texcoords.
*/
success = _mesa_texstore(ctx, 2, /* dims */
baseInternalFormat, /* baseInternalFormat */
mformat, /* mesa_format */
transfer->stride, /* dstRowStride, bytes */
&dest, /* destSlices */
width, height, 1, /* size */
format, type, /* src format/type */
pixels, /* data source */
unpack);
if ((format == GL_RGBA || format == GL_BGRA)
&& type == GL_UNSIGNED_BYTE) {
/* Use a memcpy-based texstore to avoid software pixel swizzling.
* We'll do the necessary swizzling with the pipe_sampler_view to
* give much better performance.
* XXX in the future, expand this to accomodate more format and
* type combinations.
*/
_mesa_memcpy_texture(ctx, 2,
mformat, /* mesa_format */
transfer->stride, /* dstRowStride, bytes */
&dest, /* destSlices */
width, height, 1, /* size */
format, type, /* src format/type */
pixels, /* data source */
unpack);
success = GL_TRUE;
}
else {
success = _mesa_texstore(ctx, 2, /* dims */
baseInternalFormat, /* baseInternalFormat */
mformat, /* mesa_format */
transfer->stride, /* dstRowStride, bytes */
&dest, /* destSlices */
width, height, 1, /* size */
format, type, /* src format/type */
pixels, /* data source */
unpack);
}
/* unmap */
pipe_transfer_unmap(pipe, transfer);
@ -667,7 +687,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* user textures, plus the drawpix textures */
if (fpv) {
struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
uint num = MAX3(fpv->drawpix_sampler + 1,
fpv->pixelmap_sampler + 1,
st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]);
memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT],
@ -914,7 +935,7 @@ get_color_fp_variant(struct st_context *st)
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
key.drawpixels = 1;
key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 ||
ctx->Pixel.RedScale != 1.0 ||
@ -956,6 +977,69 @@ clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height,
}
/**
* Search the array of 4 swizzle components for the named component and return
* its position.
*/
static unsigned
search_swizzle(const unsigned char swizzle[4], unsigned component)
{
unsigned i;
for (i = 0; i < 4; i++) {
if (swizzle[i] == component)
return i;
}
assert(!"search_swizzle() failed");
return 0;
}
/**
* Set the sampler view's swizzle terms. This is used to handle RGBA
* swizzling when the incoming image format isn't an exact match for
* the actual texture format. For example, if we have glDrawPixels(
* GL_RGBA, GL_UNSIGNED_BYTE) and we chose the texture format
* PIPE_FORMAT_B8G8R8A8 then we can do use the sampler view swizzle to
* avoid swizzling all the pixels in software in the texstore code.
*/
static void
setup_sampler_swizzle(struct pipe_sampler_view *sv, GLenum format, GLenum type)
{
if ((format == GL_RGBA || format == GL_BGRA) && type == GL_UNSIGNED_BYTE) {
const struct util_format_description *desc =
util_format_description(sv->texture->format);
unsigned c0, c1, c2, c3;
/* Every gallium driver supports at least one 32-bit packed RGBA format.
* We must have chosen one for (GL_RGBA, GL_UNSIGNED_BYTE).
*/
assert(desc->block.bits == 32);
/* invert the format's swizzle to setup the sampler's swizzle */
if (format == GL_RGBA) {
c0 = UTIL_FORMAT_SWIZZLE_X;
c1 = UTIL_FORMAT_SWIZZLE_Y;
c2 = UTIL_FORMAT_SWIZZLE_Z;
c3 = UTIL_FORMAT_SWIZZLE_W;
}
else {
assert(format == GL_BGRA);
c0 = UTIL_FORMAT_SWIZZLE_Z;
c1 = UTIL_FORMAT_SWIZZLE_Y;
c2 = UTIL_FORMAT_SWIZZLE_X;
c3 = UTIL_FORMAT_SWIZZLE_W;
}
sv->swizzle_r = search_swizzle(desc->swizzle, c0);
sv->swizzle_g = search_swizzle(desc->swizzle, c1);
sv->swizzle_b = search_swizzle(desc->swizzle, c2);
sv->swizzle_a = search_swizzle(desc->swizzle, c3);
}
else {
/* use the default sampler swizzle */
}
}
/**
* Called via ctx->Driver.DrawPixels()
*/
@ -974,6 +1058,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
int num_sampler_view = 1;
struct gl_pixelstore_attrib clippedUnpack;
struct st_fp_variant *fpv = NULL;
struct pipe_resource *pt;
/* Mesa state should be up to date by now */
assert(ctx->NewState == 0x0);
@ -1029,42 +1114,56 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
}
/* draw with textured quad */
{
struct pipe_resource *pt
= make_texture(st, width, height, format, type, unpack, pixels);
if (pt) {
sv[0] = st_create_texture_sampler_view(st->pipe, pt);
if (sv[0]) {
/* Create a second sampler view to read stencil.
* The stencil is written using the shader stencil export
* functionality. */
if (write_stencil) {
enum pipe_format stencil_format =
util_format_stencil_only(pt->format);
/* we should not be doing pixel map/transfer (see above) */
assert(num_sampler_view == 1);
sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
stencil_format);
num_sampler_view++;
}
draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
width, height,
ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
sv,
num_sampler_view,
driver_vp,
driver_fp, fpv,
color, GL_FALSE, write_depth, write_stencil);
pipe_sampler_view_reference(&sv[0], NULL);
if (num_sampler_view > 1)
pipe_sampler_view_reference(&sv[1], NULL);
}
pipe_resource_reference(&pt, NULL);
}
/* Put glDrawPixels image into a texture */
pt = make_texture(st, width, height, format, type, unpack, pixels);
if (!pt) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
return;
}
/* create sampler view for the image */
sv[0] = st_create_texture_sampler_view(st->pipe, pt);
if (!sv[0]) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
pipe_resource_reference(&pt, NULL);
return;
}
/* Set up the sampler view's swizzle */
setup_sampler_swizzle(sv[0], format, type);
/* Create a second sampler view to read stencil. The stencil is
* written using the shader stencil export functionality.
*/
if (write_stencil) {
enum pipe_format stencil_format =
util_format_stencil_only(pt->format);
/* we should not be doing pixel map/transfer (see above) */
assert(num_sampler_view == 1);
sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
stencil_format);
if (!sv[1]) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
pipe_resource_reference(&pt, NULL);
pipe_sampler_view_reference(&sv[0], NULL);
return;
}
num_sampler_view++;
}
draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
width, height,
ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
sv,
num_sampler_view,
driver_vp,
driver_fp, fpv,
color, GL_FALSE, write_depth, write_stencil);
pipe_sampler_view_reference(&sv[0], NULL);
if (num_sampler_view > 1)
pipe_sampler_view_reference(&sv[1], NULL);
pipe_resource_reference(&pt, NULL);
}

View file

@ -224,6 +224,7 @@ st_program_string_notify( struct gl_context *ctx,
struct gl_program *prog )
{
struct st_context *st = st_context(ctx);
gl_shader_stage stage = _mesa_program_enum_to_shader_stage(target);
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
@ -278,10 +279,10 @@ st_program_string_notify( struct gl_context *ctx,
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
}
if (ST_DEBUG & DEBUG_PRECOMPILE)
if (ST_DEBUG & DEBUG_PRECOMPILE ||
st->shader_has_one_variant[stage])
st_precompile_shader_variant(st, prog);
/* XXX check if program is legal, within limits */
return GL_TRUE;
}

View file

@ -235,9 +235,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
PIPE_BIND_SAMPLER_VIEW);
st->prefer_blit_based_texture_transfer = screen->get_param(screen,
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
st->can_force_persample_interp = screen->get_param(screen,
PIPE_CAP_FORCE_PERSAMPLE_INTERP);
st->force_persample_in_shader =
screen->get_param(screen, PIPE_CAP_SAMPLE_SHADING) &&
!screen->get_param(screen, PIPE_CAP_FORCE_PERSAMPLE_INTERP);
st->has_shareable_shaders = screen->get_param(screen,
PIPE_CAP_SHAREABLE_SHADERS);
st->needs_texcoord_semantic =
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
st->apply_texture_swizzle_to_border_color =
@ -292,6 +294,20 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
}
/* Set which shader types can be compiled at link time. */
st->shader_has_one_variant[MESA_SHADER_VERTEX] =
st->has_shareable_shaders &&
!st->clamp_vert_color_in_shader;
st->shader_has_one_variant[MESA_SHADER_FRAGMENT] =
st->has_shareable_shaders &&
!st->clamp_frag_color_in_shader &&
!st->force_persample_in_shader;
st->shader_has_one_variant[MESA_SHADER_TESS_CTRL] = st->has_shareable_shaders;
st->shader_has_one_variant[MESA_SHADER_TESS_EVAL] = st->has_shareable_shaders;
st->shader_has_one_variant[MESA_SHADER_GEOMETRY] = st->has_shareable_shaders;
_mesa_compute_version(ctx);
if (ctx->Version == 0) {

View file

@ -98,7 +98,15 @@ struct st_context
boolean has_etc1;
boolean has_etc2;
boolean prefer_blit_based_texture_transfer;
boolean can_force_persample_interp;
boolean force_persample_in_shader;
boolean has_shareable_shaders;
/**
* If a shader can be created when we get its source.
* This means it has only 1 variant, not counting glBitmap and
* glDrawPixels.
*/
boolean shader_has_one_variant[MESA_SHADER_STAGES];
boolean needs_texcoord_semantic;
boolean apply_texture_swizzle_to_border_color;

View file

@ -249,6 +249,9 @@ void st_init_limits(struct pipe_screen *screen,
if (options->EmitNoLoops)
options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536);
else
options->MaxUnrollIterations = screen->get_shader_param(screen, sh,
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT);
options->LowerClipDistance = true;
}

View file

@ -395,6 +395,10 @@ st_translate_vertex_program(struct st_context *st,
if (ureg == NULL)
return false;
if (stvp->Base.Base.ClipDistanceArraySize)
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
stvp->Base.Base.ClipDistanceArraySize);
if (ST_DEBUG & DEBUG_MESA) {
_mesa_print_program(&stvp->Base.Base);
_mesa_print_program_parameters(st->ctx, &stvp->Base.Base);
@ -1049,6 +1053,10 @@ st_translate_program_common(struct st_context *st,
memset(outputMapping, 0, sizeof(outputMapping));
memset(out_state, 0, sizeof(*out_state));
if (prog->ClipDistanceArraySize)
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
prog->ClipDistanceArraySize);
/*
* Convert Mesa program inputs to TGSI input register semantics.
*/
@ -1728,6 +1736,12 @@ destroy_program_variants_cb(GLuint key, void *data, void *userData)
void
st_destroy_program_variants(struct st_context *st)
{
/* If shaders can be shared with other contexts, the last context will
* call DeleteProgram on all shaders, releasing everything.
*/
if (st->has_shareable_shaders)
return;
/* ARB vert/frag program */
_mesa_HashWalk(st->ctx->Shared->Programs,
destroy_program_variants_cb, st);
@ -1774,7 +1788,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_vp_variant_key key;
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st_get_vp_variant(st, p, &key);
break;
}
@ -1784,7 +1798,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_tcp_variant_key key;
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st_get_tcp_variant(st, p, &key);
break;
}
@ -1794,7 +1808,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_tep_variant_key key;
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st_get_tep_variant(st, p, &key);
break;
}
@ -1804,7 +1818,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_gp_variant_key key;
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st_get_gp_variant(st, p, &key);
break;
}
@ -1814,7 +1828,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_fp_variant_key key;
memset(&key, 0, sizeof(key));
key.st = st;
key.st = st->has_shareable_shaders ? NULL : st;
st_get_fp_variant(st, p, &key);
break;
}

View file

@ -124,19 +124,19 @@ static void TAG(render_line_loop)( struct gl_context *ctx,
GLuint i;
LOCAL_VARS;
(void) flags;
INIT(GL_LINE_LOOP);
if (start+1 < count) {
if (TEST_PRIM_BEGIN(flags)) {
RESET_STIPPLE;
/* draw the first line from v[0] to v[1] */
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
RENDER_LINE( ELT(start), ELT(start+1) );
else
RENDER_LINE( ELT(start+1), ELT(start) );
}
/* draw lines from v[1] to v[n-1] */
for ( i = start+2 ; i < count ; i++) {
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
RENDER_LINE( ELT(i-1), ELT(i) );
@ -145,6 +145,7 @@ static void TAG(render_line_loop)( struct gl_context *ctx,
}
if ( TEST_PRIM_END(flags)) {
/* draw final line from v[n-1] to v[0] (the very first vertex) */
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
RENDER_LINE( ELT(count-1), ELT(start) );
else

View file

@ -196,6 +196,26 @@ vbo_get_default_vals_as_union(GLenum format)
}
}
/**
* Compute the max number of vertices which can be stored in
* a vertex buffer, given the current vertex size, and the amount
* of space already used.
*/
static inline unsigned
vbo_compute_max_verts(const struct vbo_exec_context *exec)
{
unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat));
assert(n > 0);
/* Subtract one so we're always sure to have room for an extra
* vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion.
*/
n--;
return n;
}
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -160,8 +160,6 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap );
void vbo_exec_vtx_map( struct vbo_exec_context *exec );
void vbo_exec_vtx_wrap( struct vbo_exec_context *exec );
void vbo_exec_eval_update( struct vbo_exec_context *exec );
void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec,

View file

@ -61,7 +61,8 @@ static void reset_attrfv( struct vbo_exec_context *exec );
/**
* Close off the last primitive, execute the buffer, restart the
* primitive.
* primitive. This is called when we fill a vertex buffer before
* hitting glEnd.
*/
static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
{
@ -71,17 +72,31 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
}
else {
GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin;
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
const GLuint last_begin = last_prim->begin;
GLuint last_count;
if (_mesa_inside_begin_end(exec->ctx)) {
GLint i = exec->vtx.prim_count - 1;
assert(i >= 0);
exec->vtx.prim[i].count = (exec->vtx.vert_count -
exec->vtx.prim[i].start);
last_prim->count = exec->vtx.vert_count - last_prim->start;
}
last_count = exec->vtx.prim[exec->vtx.prim_count-1].count;
last_count = last_prim->count;
/* Special handling for wrapping GL_LINE_LOOP */
if (last_prim->mode == GL_LINE_LOOP &&
last_count > 0 &&
!last_prim->end) {
/* draw this section of the incomplete line loop as a line strip */
last_prim->mode = GL_LINE_STRIP;
if (!last_prim->begin) {
/* This is not the first section of the line loop, so don't
* draw the 0th vertex. We're saving it until we draw the
* very last section of the loop.
*/
last_prim->start++;
last_prim->count--;
}
}
/* Execute the buffer and save copied vertices.
*/
@ -98,6 +113,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
if (_mesa_inside_begin_end(exec->ctx)) {
exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
exec->vtx.prim[0].begin = 0;
exec->vtx.prim[0].start = 0;
exec->vtx.prim[0].count = 0;
exec->vtx.prim_count++;
@ -113,7 +129,8 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
* Deal with buffer wrapping where provoked by the vertex buffer
* filling up, as opposed to upgrade_vertex().
*/
void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )
static void
vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
{
fi_type *data = exec->vtx.copied.buffer;
GLuint i;
@ -292,8 +309,7 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
*/
exec->vtx.attrsz[attr] = newSize;
exec->vtx.vertex_size += newSize - oldSize;
exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat)));
exec->vtx.max_vert = vbo_compute_max_verts(exec);
exec->vtx.vert_count = 0;
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
@ -446,10 +462,6 @@ do { \
\
assert(sz == 1 || sz == 2); \
\
if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \
vbo_exec_begin_vertices(ctx); \
} \
\
/* check if attribute size or type is changing */ \
if (unlikely(exec->vtx.active_sz[A] != N * sz) || \
unlikely(exec->vtx.attrtype[A] != T)) { \
@ -470,6 +482,15 @@ do { \
/* This is a glVertex call */ \
GLuint i; \
\
if (unlikely((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0)) { \
vbo_exec_begin_vertices(ctx); \
} \
\
if (unlikely(!exec->vtx.buffer_ptr)) { \
vbo_exec_vtx_map(exec); \
} \
assert(exec->vtx.buffer_ptr); \
\
/* copy 32-bit words */ \
for (i = 0; i < exec->vtx.vertex_size; i++) \
exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \
@ -482,7 +503,10 @@ do { \
\
if (++exec->vtx.vert_count >= exec->vtx.max_vert) \
vbo_exec_vtx_wrap( exec ); \
} \
} else { \
/* we now have accumulated per-vertex attributes */ \
ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \
} \
} while (0)
#define ERROR(err) _mesa_error( ctx, err, __func__ )
@ -814,11 +838,28 @@ static void GLAPIENTRY vbo_exec_End( void )
if (exec->vtx.prim_count > 0) {
/* close off current primitive */
int idx = exec->vtx.vert_count;
int i = exec->vtx.prim_count - 1;
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
exec->vtx.prim[i].end = 1;
exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start;
last_prim->end = 1;
last_prim->count = exec->vtx.vert_count - last_prim->start;
/* Special handling for GL_LINE_LOOP */
if (last_prim->mode == GL_LINE_LOOP && last_prim->begin == 0) {
/* We're finishing drawing a line loop. Append 0th vertex onto
* end of vertex buffer so we can draw it as a line strip.
*/
const fi_type *src = exec->vtx.buffer_map;
fi_type *dst = exec->vtx.buffer_map +
exec->vtx.vert_count * exec->vtx.vertex_size;
/* copy 0th vertex to end of buffer */
memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type));
assert(last_prim->start == 0);
last_prim->start++; /* skip vertex0 */
/* note that last_prim->count stays unchanged */
last_prim->mode = GL_LINE_STRIP;
}
try_vbo_merge(exec);
}

Some files were not shown because too many files have changed in this diff Show more