mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
Merge remote-tracking branch 'mesa-public/master' into vulkan
This commit is contained in:
commit
fed60e3c73
102 changed files with 2593 additions and 1232 deletions
|
|
@ -108,6 +108,8 @@ AC_SYS_LARGEFILE
|
|||
LT_PREREQ([2.2])
|
||||
LT_INIT([disable-static])
|
||||
|
||||
AC_CHECK_PROG(RM, rm, [rm -f])
|
||||
|
||||
AX_PROG_BISON([],
|
||||
AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
|
||||
[AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30:
|
|||
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe, softpipe)
|
||||
GL_ARB_texture_view DONE (i965, nv50, nvc0, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_attrib_binding DONE (all drivers)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
|
||||
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
|
||||
<li>GL_ARB_texture_query_lod on softpipe</li>
|
||||
<li>GL_ARB_texture_view on radeonsi</li>
|
||||
<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
|
||||
<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
|
||||
</ul>
|
||||
|
|
|
|||
|
|
@ -355,8 +355,9 @@ struct draw_vertex_info {
|
|||
};
|
||||
|
||||
/* these flags are set if the primitive is a segment of a larger one */
|
||||
#define DRAW_SPLIT_BEFORE 0x1
|
||||
#define DRAW_SPLIT_AFTER 0x2
|
||||
#define DRAW_SPLIT_BEFORE 0x1
|
||||
#define DRAW_SPLIT_AFTER 0x2
|
||||
#define DRAW_LINE_LOOP_AS_STRIP 0x4
|
||||
|
||||
struct draw_prim_info {
|
||||
boolean linear;
|
||||
|
|
|
|||
|
|
@ -359,6 +359,16 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
|
|||
}
|
||||
|
||||
|
||||
static inline unsigned
|
||||
prim_type(unsigned prim, unsigned flags)
|
||||
{
|
||||
if (flags & DRAW_LINE_LOOP_AS_STRIP)
|
||||
return PIPE_PRIM_LINE_STRIP;
|
||||
else
|
||||
return prim;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
fetch_pipeline_run(struct draw_pt_middle_end *middle,
|
||||
const unsigned *fetch_elts,
|
||||
|
|
@ -380,7 +390,7 @@ fetch_pipeline_run(struct draw_pt_middle_end *middle,
|
|||
prim_info.start = 0;
|
||||
prim_info.count = draw_count;
|
||||
prim_info.elts = draw_elts;
|
||||
prim_info.prim = fpme->input_prim;
|
||||
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
|
||||
prim_info.flags = prim_flags;
|
||||
prim_info.primitive_count = 1;
|
||||
prim_info.primitive_lengths = &draw_count;
|
||||
|
|
@ -408,7 +418,7 @@ fetch_pipeline_linear_run(struct draw_pt_middle_end *middle,
|
|||
prim_info.start = 0;
|
||||
prim_info.count = count;
|
||||
prim_info.elts = NULL;
|
||||
prim_info.prim = fpme->input_prim;
|
||||
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
|
||||
prim_info.flags = prim_flags;
|
||||
prim_info.primitive_count = 1;
|
||||
prim_info.primitive_lengths = &count;
|
||||
|
|
@ -439,7 +449,7 @@ fetch_pipeline_linear_run_elts(struct draw_pt_middle_end *middle,
|
|||
prim_info.start = 0;
|
||||
prim_info.count = draw_count;
|
||||
prim_info.elts = draw_elts;
|
||||
prim_info.prim = fpme->input_prim;
|
||||
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
|
||||
prim_info.flags = prim_flags;
|
||||
prim_info.primitive_count = 1;
|
||||
prim_info.primitive_lengths = &draw_count;
|
||||
|
|
|
|||
|
|
@ -473,6 +473,16 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
|
|||
}
|
||||
|
||||
|
||||
static inline unsigned
|
||||
prim_type(unsigned prim, unsigned flags)
|
||||
{
|
||||
if (flags & DRAW_LINE_LOOP_AS_STRIP)
|
||||
return PIPE_PRIM_LINE_STRIP;
|
||||
else
|
||||
return prim;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
llvm_middle_end_run(struct draw_pt_middle_end *middle,
|
||||
const unsigned *fetch_elts,
|
||||
|
|
@ -494,7 +504,7 @@ llvm_middle_end_run(struct draw_pt_middle_end *middle,
|
|||
prim_info.start = 0;
|
||||
prim_info.count = draw_count;
|
||||
prim_info.elts = draw_elts;
|
||||
prim_info.prim = fpme->input_prim;
|
||||
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
|
||||
prim_info.flags = prim_flags;
|
||||
prim_info.primitive_count = 1;
|
||||
prim_info.primitive_lengths = &draw_count;
|
||||
|
|
@ -522,7 +532,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
|
|||
prim_info.start = 0;
|
||||
prim_info.count = count;
|
||||
prim_info.elts = NULL;
|
||||
prim_info.prim = fpme->input_prim;
|
||||
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
|
||||
prim_info.flags = prim_flags;
|
||||
prim_info.primitive_count = 1;
|
||||
prim_info.primitive_lengths = &count;
|
||||
|
|
@ -552,7 +562,7 @@ llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
|
|||
prim_info.start = 0;
|
||||
prim_info.count = draw_count;
|
||||
prim_info.elts = draw_elts;
|
||||
prim_info.prim = fpme->input_prim;
|
||||
prim_info.prim = prim_type(fpme->input_prim, prim_flags);
|
||||
prim_info.flags = prim_flags;
|
||||
prim_info.primitive_count = 1;
|
||||
prim_info.primitive_lengths = &draw_count;
|
||||
|
|
|
|||
|
|
@ -249,6 +249,9 @@ vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
|
|||
|
||||
assert(icount + !!close_loop <= vsplit->segment_size);
|
||||
|
||||
/* need to draw the sections of the line loop as line strips */
|
||||
flags |= DRAW_LINE_LOOP_AS_STRIP;
|
||||
|
||||
if (close_loop) {
|
||||
for (nr = 0; nr < icount; nr++)
|
||||
vsplit->fetch_elts[nr] = istart + nr;
|
||||
|
|
|
|||
|
|
@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
|
|||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* if we get here, we missed a shader cap above (and should have seen
|
||||
* a compiler warning.)
|
||||
|
|
|
|||
|
|
@ -474,6 +474,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
|
|||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* if we get here, we missed a shader cap above (and should have seen
|
||||
* a compiler warning.)
|
||||
|
|
|
|||
|
|
@ -369,19 +369,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
procType == TGSI_PROCESSOR_GEOMETRY ||
|
||||
procType == TGSI_PROCESSOR_TESS_CTRL ||
|
||||
procType == TGSI_PROCESSOR_TESS_EVAL) {
|
||||
if (semName == TGSI_SEMANTIC_CLIPDIST) {
|
||||
info->num_written_clipdistance +=
|
||||
util_bitcount(fulldecl->Declaration.UsageMask);
|
||||
info->clipdist_writemask |=
|
||||
fulldecl->Declaration.UsageMask << (semIndex*4);
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_CULLDIST) {
|
||||
info->num_written_culldistance +=
|
||||
util_bitcount(fulldecl->Declaration.UsageMask);
|
||||
info->culldist_writemask |=
|
||||
fulldecl->Declaration.UsageMask << (semIndex*4);
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
|
||||
if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
|
||||
info->writes_viewport_index = TRUE;
|
||||
}
|
||||
else if (semName == TGSI_SEMANTIC_LAYER) {
|
||||
|
|
@ -432,9 +420,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
const struct tgsi_full_property *fullprop
|
||||
= &parse.FullToken.FullProperty;
|
||||
unsigned name = fullprop->Property.PropertyName;
|
||||
unsigned value = fullprop->u[0].Data;
|
||||
|
||||
assert(name < Elements(info->properties));
|
||||
info->properties[name] = fullprop->u[0].Data;
|
||||
info->properties[name] = value;
|
||||
|
||||
switch (name) {
|
||||
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
|
||||
info->num_written_clipdistance = value;
|
||||
info->clipdist_writemask |= (1 << value) - 1;
|
||||
break;
|
||||
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
|
||||
info->num_written_culldistance = value;
|
||||
info->culldist_writemask |= (1 << value) - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -137,6 +137,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
|
|||
"TES_SPACING",
|
||||
"TES_VERTEX_ORDER_CW",
|
||||
"TES_POINT_MODE",
|
||||
"NUM_CLIPDIST_ENABLED",
|
||||
"NUM_CULLDIST_ENABLED",
|
||||
};
|
||||
|
||||
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
|
||||
|
|
|
|||
|
|
@ -276,6 +276,8 @@ The integer capabilities:
|
|||
GL4 hardware will likely need to emulate it with a shader variant, or by
|
||||
selecting the interpolation weights with a conditional assignment
|
||||
in the shader.
|
||||
* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any
|
||||
pipe_context.
|
||||
|
||||
|
||||
|
||||
|
|
@ -365,6 +367,10 @@ to be 0.
|
|||
are supported.
|
||||
* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
|
||||
ignore tgsi_declaration_range::Last for shader inputs and outputs.
|
||||
* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number
|
||||
of iterations that loops are allowed to have to be unrolled. It is only
|
||||
a hint to state trackers. Whether any loops will be unrolled is not
|
||||
guaranteed.
|
||||
|
||||
|
||||
.. _pipe_compute_cap:
|
||||
|
|
|
|||
|
|
@ -3126,6 +3126,16 @@ TES_POINT_MODE
|
|||
If set to a non-zero value, this turns on point mode for the tessellator,
|
||||
which means that points will be generated instead of primitives.
|
||||
|
||||
NUM_CLIPDIST_ENABLED
|
||||
""""""""""""""""
|
||||
|
||||
How many clip distance scalar outputs are enabled.
|
||||
|
||||
NUM_CULLDIST_ENABLED
|
||||
""""""""""""""""
|
||||
|
||||
How many cull distance scalar outputs are enabled.
|
||||
|
||||
|
||||
Texture Sampling and Texture Formats
|
||||
------------------------------------
|
||||
|
|
|
|||
|
|
@ -237,6 +237,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
@ -411,6 +412,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
debug_printf("unknown shader param %d\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
|
||||
return 0;
|
||||
|
|
@ -249,6 +251,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
|
|||
|
|
@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
|
|||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
|
@ -471,6 +473,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
|||
|
|
@ -298,6 +298,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
|||
|
|
@ -171,6 +171,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -263,6 +264,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("unknown vertex shader param %d\n", param);
|
||||
return 0;
|
||||
|
|
@ -304,6 +307,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("unknown fragment shader param %d\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -216,6 +216,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -299,6 +300,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -202,6 +202,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
@ -312,6 +313,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return 16; /* would be 32 in linked (OpenGL-style) mode */
|
||||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return 16; /* XXX not sure if more are really safe */
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -197,6 +197,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
/* SWTCL-only features. */
|
||||
|
|
@ -302,6 +303,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
|
|||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
|
|
@ -358,6 +361,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
|
|||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -343,6 +343,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
/* Stream output. */
|
||||
|
|
@ -510,6 +511,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
|||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
/* due to a bug in the shader compiler, some loops hang
|
||||
* if they are not unrolled, see:
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=86720
|
||||
*/
|
||||
return 255;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,11 +55,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
|
|||
util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
|
||||
util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
|
||||
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
|
||||
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
|
||||
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
|
||||
util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
|
||||
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
|
||||
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
|
||||
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
|
||||
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
|
||||
util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
|
||||
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
|
||||
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
|
||||
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
|
||||
util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
|
||||
util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
|
||||
|
|
|
|||
|
|
@ -31,15 +31,15 @@
|
|||
#include "ddebug/dd_util.h"
|
||||
|
||||
|
||||
static void si_dump_shader(struct si_shader_selector *sel, const char *name,
|
||||
static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
|
||||
FILE *f)
|
||||
{
|
||||
if (!sel || !sel->current)
|
||||
if (!state->cso || !state->current)
|
||||
return;
|
||||
|
||||
fprintf(f, "%s shader disassembly:\n", name);
|
||||
si_dump_shader_key(sel->type, &sel->current->key, f);
|
||||
fprintf(f, "%s\n\n", sel->current->binary.disasm_string);
|
||||
si_dump_shader_key(state->cso->type, &state->current->key, f);
|
||||
fprintf(f, "%s\n\n", state->current->binary.disasm_string);
|
||||
}
|
||||
|
||||
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
|
||||
|
|
@ -536,11 +536,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
|
|||
if (flags & PIPE_DEBUG_DEVICE_IS_HUNG)
|
||||
si_dump_debug_registers(sctx, f);
|
||||
|
||||
si_dump_shader(sctx->vs_shader, "Vertex", f);
|
||||
si_dump_shader(sctx->tcs_shader, "Tessellation control", f);
|
||||
si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f);
|
||||
si_dump_shader(sctx->gs_shader, "Geometry", f);
|
||||
si_dump_shader(sctx->ps_shader, "Fragment", f);
|
||||
si_dump_shader(&sctx->vs_shader, "Vertex", f);
|
||||
si_dump_shader(&sctx->tcs_shader, "Tessellation control", f);
|
||||
si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f);
|
||||
si_dump_shader(&sctx->gs_shader, "Geometry", f);
|
||||
si_dump_shader(&sctx->ps_shader, "Fragment", f);
|
||||
|
||||
si_dump_last_bo_list(sctx, f);
|
||||
si_dump_last_ib(sctx, f);
|
||||
|
|
|
|||
|
|
@ -915,10 +915,10 @@ static void si_set_user_data_base(struct si_context *sctx,
|
|||
void si_shader_change_notify(struct si_context *sctx)
|
||||
{
|
||||
/* VS can be bound as VS, ES, or LS. */
|
||||
if (sctx->tes_shader)
|
||||
if (sctx->tes_shader.cso)
|
||||
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
|
||||
R_00B530_SPI_SHADER_USER_DATA_LS_0);
|
||||
else if (sctx->gs_shader)
|
||||
else if (sctx->gs_shader.cso)
|
||||
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
|
||||
R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
||||
else
|
||||
|
|
@ -926,8 +926,8 @@ void si_shader_change_notify(struct si_context *sctx)
|
|||
R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
||||
|
||||
/* TES can be bound as ES, VS, or not bound. */
|
||||
if (sctx->tes_shader) {
|
||||
if (sctx->gs_shader)
|
||||
if (sctx->tes_shader.cso) {
|
||||
if (sctx->gs_shader.cso)
|
||||
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
|
||||
R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
||||
else
|
||||
|
|
@ -964,7 +964,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
|
|||
unsigned i;
|
||||
uint32_t *sh_base = sctx->shader_userdata.sh_base;
|
||||
|
||||
if (sctx->gs_shader) {
|
||||
if (sctx->gs_shader.cso) {
|
||||
/* The VS copy shader needs these for clipping, streamout, and rings. */
|
||||
unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
|
||||
unsigned i = PIPE_SHADER_VERTEX;
|
||||
|
|
@ -975,7 +975,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
|
|||
/* The TESSEVAL shader needs this for the ESGS ring buffer. */
|
||||
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
|
||||
R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
|
||||
} else if (sctx->tes_shader) {
|
||||
} else if (sctx->tes_shader.cso) {
|
||||
/* The TESSEVAL shader needs this for streamout. */
|
||||
si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
|
||||
R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
|
||||
|
|
|
|||
|
|
@ -57,8 +57,8 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
|
||||
if (sctx->dummy_pixel_shader)
|
||||
sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
|
||||
if (sctx->fixed_func_tcs_shader)
|
||||
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
|
||||
if (sctx->fixed_func_tcs_shader.cso)
|
||||
sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
|
||||
if (sctx->custom_dsa_flush)
|
||||
sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
|
||||
if (sctx->custom_blend_resolve)
|
||||
|
|
@ -293,7 +293,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
|
||||
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
|
||||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
case PIPE_CAP_TEXTURE_QUERY_LOD:
|
||||
case PIPE_CAP_TEXTURE_GATHER_SM5:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
|
|
@ -335,7 +337,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_USER_VERTEX_BUFFERS:
|
||||
case PIPE_CAP_FAKE_SW_MSAA:
|
||||
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
|
||||
case PIPE_CAP_SAMPLER_VIEW_TARGET:
|
||||
case PIPE_CAP_VERTEXID_NOBASE:
|
||||
return 0;
|
||||
|
||||
|
|
@ -507,6 +508,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -152,6 +152,15 @@ struct si_viewports {
|
|||
struct pipe_viewport_state states[SI_MAX_VIEWPORTS];
|
||||
};
|
||||
|
||||
/* A shader state consists of the shader selector, which is a constant state
|
||||
* object shared by multiple contexts and shouldn't be modified, and
|
||||
* the current shader variant selected for this context.
|
||||
*/
|
||||
struct si_shader_ctx_state {
|
||||
struct si_shader_selector *cso;
|
||||
struct si_shader *current;
|
||||
};
|
||||
|
||||
struct si_context {
|
||||
struct r600_common_context b;
|
||||
struct blitter_context *blitter;
|
||||
|
|
@ -162,7 +171,7 @@ struct si_context {
|
|||
void *pstipple_sampler_state;
|
||||
struct si_screen *screen;
|
||||
struct pipe_fence_handle *last_gfx_fence;
|
||||
struct si_shader_selector *fixed_func_tcs_shader;
|
||||
struct si_shader_ctx_state fixed_func_tcs_shader;
|
||||
LLVMTargetMachineRef tm;
|
||||
|
||||
/* Atoms (direct states). */
|
||||
|
|
@ -199,11 +208,11 @@ struct si_context {
|
|||
void *dummy_pixel_shader;
|
||||
|
||||
/* shaders */
|
||||
struct si_shader_selector *ps_shader;
|
||||
struct si_shader_selector *gs_shader;
|
||||
struct si_shader_selector *vs_shader;
|
||||
struct si_shader_selector *tcs_shader;
|
||||
struct si_shader_selector *tes_shader;
|
||||
struct si_shader_ctx_state ps_shader;
|
||||
struct si_shader_ctx_state gs_shader;
|
||||
struct si_shader_ctx_state vs_shader;
|
||||
struct si_shader_ctx_state tcs_shader;
|
||||
struct si_shader_ctx_state tes_shader;
|
||||
struct si_cs_shader_state cs_shader_state;
|
||||
|
||||
/* shader information */
|
||||
|
|
|
|||
|
|
@ -179,15 +179,18 @@ struct radeon_shader_reloc;
|
|||
|
||||
struct si_shader;
|
||||
|
||||
/* A shader selector is a gallium CSO and contains shader variants and
|
||||
* binaries for one TGSI program. This can be shared by multiple contexts.
|
||||
*/
|
||||
struct si_shader_selector {
|
||||
struct si_shader *current;
|
||||
pipe_mutex mutex;
|
||||
struct si_shader *first_variant; /* immutable after the first variant */
|
||||
struct si_shader *last_variant; /* mutable */
|
||||
|
||||
struct tgsi_token *tokens;
|
||||
struct pipe_stream_output_info so;
|
||||
struct tgsi_shader_info info;
|
||||
|
||||
unsigned num_shaders;
|
||||
|
||||
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
|
||||
unsigned type;
|
||||
|
||||
|
|
@ -241,7 +244,7 @@ union si_shader_key {
|
|||
uint64_t es_enabled_outputs;
|
||||
unsigned as_es:1; /* export shader */
|
||||
unsigned as_ls:1; /* local shader */
|
||||
unsigned export_prim_id; /* when PS needs it and GS is disabled */
|
||||
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
|
||||
} vs;
|
||||
struct {
|
||||
unsigned prim_mode:3;
|
||||
|
|
@ -252,7 +255,7 @@ union si_shader_key {
|
|||
* This describes how outputs are laid out in memory. */
|
||||
uint64_t es_enabled_outputs;
|
||||
unsigned as_es:1; /* export shader */
|
||||
unsigned export_prim_id; /* when PS needs it and GS is disabled */
|
||||
unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
|
||||
} tes; /* tessellation evaluation shader */
|
||||
};
|
||||
|
||||
|
|
@ -293,24 +296,24 @@ struct si_shader {
|
|||
|
||||
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
|
||||
{
|
||||
if (sctx->gs_shader)
|
||||
return &sctx->gs_shader->info;
|
||||
else if (sctx->tes_shader)
|
||||
return &sctx->tes_shader->info;
|
||||
else if (sctx->vs_shader)
|
||||
return &sctx->vs_shader->info;
|
||||
if (sctx->gs_shader.cso)
|
||||
return &sctx->gs_shader.cso->info;
|
||||
else if (sctx->tes_shader.cso)
|
||||
return &sctx->tes_shader.cso->info;
|
||||
else if (sctx->vs_shader.cso)
|
||||
return &sctx->vs_shader.cso->info;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
|
||||
{
|
||||
if (sctx->gs_shader)
|
||||
return sctx->gs_shader->current->gs_copy_shader;
|
||||
else if (sctx->tes_shader)
|
||||
return sctx->tes_shader->current;
|
||||
if (sctx->gs_shader.current)
|
||||
return sctx->gs_shader.current->gs_copy_shader;
|
||||
else if (sctx->tes_shader.current)
|
||||
return sctx->tes_shader.current;
|
||||
else
|
||||
return sctx->vs_shader->current;
|
||||
return sctx->vs_shader.current;
|
||||
}
|
||||
|
||||
static inline bool si_vs_exports_prim_id(struct si_shader *shader)
|
||||
|
|
|
|||
|
|
@ -266,7 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
|
|||
* Reproducible with Unigine Heaven 4.0 and drirc missing.
|
||||
*/
|
||||
if (blend->dual_src_blend &&
|
||||
(sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
|
||||
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
|
||||
mask = 0;
|
||||
|
||||
radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
|
||||
|
|
@ -1535,9 +1535,14 @@ static unsigned si_tex_compare(unsigned compare)
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
|
||||
static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
|
||||
unsigned nr_samples)
|
||||
{
|
||||
switch (dim) {
|
||||
if (view_target == PIPE_TEXTURE_CUBE ||
|
||||
view_target == PIPE_TEXTURE_CUBE_ARRAY)
|
||||
res_target = view_target;
|
||||
|
||||
switch (res_target) {
|
||||
default:
|
||||
case PIPE_TEXTURE_1D:
|
||||
return V_008F1C_SQ_RSRC_IMG_1D;
|
||||
|
|
@ -2391,6 +2396,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
struct radeon_surf_level *surflevel;
|
||||
int first_non_void;
|
||||
uint64_t va;
|
||||
unsigned last_layer = state->u.tex.last_layer;
|
||||
|
||||
if (view == NULL)
|
||||
return NULL;
|
||||
|
|
@ -2596,6 +2602,13 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
|
||||
depth = texture->array_size / 6;
|
||||
|
||||
/* This is not needed if state trackers set last_layer correctly. */
|
||||
if (state->target == PIPE_TEXTURE_1D ||
|
||||
state->target == PIPE_TEXTURE_2D ||
|
||||
state->target == PIPE_TEXTURE_RECT ||
|
||||
state->target == PIPE_TEXTURE_CUBE)
|
||||
last_layer = state->u.tex.first_layer;
|
||||
|
||||
va = tmp->resource.gpu_address + surflevel[base_level].offset;
|
||||
|
||||
view->state[0] = va >> 8;
|
||||
|
|
@ -2615,10 +2628,11 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
last_level) |
|
||||
S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
|
||||
S_008F1C_POW2_PAD(texture->last_level > 0) |
|
||||
S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
|
||||
S_008F1C_TYPE(si_tex_dim(texture->target, state->target,
|
||||
texture->nr_samples)));
|
||||
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
|
||||
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
|
||||
S_008F24_LAST_ARRAY(state->u.tex.last_layer));
|
||||
S_008F24_LAST_ARRAY(last_layer));
|
||||
view->state[6] = 0;
|
||||
view->state[7] = 0;
|
||||
|
||||
|
|
@ -2653,11 +2667,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
|
||||
S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
|
||||
S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
|
||||
S_008F1C_TYPE(si_tex_dim(texture->target, 0));
|
||||
S_008F1C_TYPE(si_tex_dim(texture->target,
|
||||
state->target, 0));
|
||||
view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
|
||||
S_008F20_PITCH(tmp->fmask.pitch - 1);
|
||||
view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
|
||||
S_008F24_LAST_ARRAY(state->u.tex.last_layer);
|
||||
S_008F24_LAST_ARRAY(last_layer);
|
||||
view->fmask_state[6] = 0;
|
||||
view->fmask_state[7] = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,11 +109,11 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
|||
unsigned *num_patches)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
|
||||
struct si_shader_selector *ls = sctx->vs_shader;
|
||||
struct si_shader_ctx_state *ls = &sctx->vs_shader;
|
||||
/* The TES pointer will only be used for sctx->last_tcs.
|
||||
* It would be wrong to think that TCS = TES. */
|
||||
struct si_shader_selector *tcs =
|
||||
sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
|
||||
sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
|
||||
unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
|
||||
unsigned num_tcs_input_cp = info->vertices_per_patch;
|
||||
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
|
||||
|
|
@ -138,9 +138,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
|||
|
||||
/* This calculates how shader inputs and outputs among VS, TCS, and TES
|
||||
* are laid out in LDS. */
|
||||
num_tcs_inputs = util_last_bit64(ls->outputs_written);
|
||||
num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
|
||||
|
||||
if (sctx->tcs_shader) {
|
||||
if (sctx->tcs_shader.cso) {
|
||||
num_tcs_outputs = util_last_bit64(tcs->outputs_written);
|
||||
num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
|
||||
num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
|
||||
|
|
@ -159,7 +159,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
|||
pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
|
||||
output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
|
||||
|
||||
output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
|
||||
output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0;
|
||||
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
|
||||
|
||||
lds_size = output_patch0_offset + output_patch_size * *num_patches;
|
||||
|
|
@ -231,13 +231,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
bool partial_vs_wave = false;
|
||||
bool partial_es_wave = false;
|
||||
|
||||
if (sctx->gs_shader)
|
||||
if (sctx->gs_shader.cso)
|
||||
primgroup_size = 64; /* recommended with a GS */
|
||||
|
||||
if (sctx->tes_shader) {
|
||||
if (sctx->tes_shader.cso) {
|
||||
unsigned num_cp_out =
|
||||
sctx->tcs_shader ?
|
||||
sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
|
||||
sctx->tcs_shader.cso ?
|
||||
sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
|
||||
info->vertices_per_patch;
|
||||
unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
|
||||
|
||||
|
|
@ -248,8 +248,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
|
||||
/* SWITCH_ON_EOI must be set if PrimID is used.
|
||||
* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
|
||||
if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
|
||||
sctx->tes_shader->info.uses_primid) {
|
||||
if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
|
||||
sctx->tes_shader.cso->info.uses_primid) {
|
||||
ia_switch_on_eoi = true;
|
||||
partial_es_wave = true;
|
||||
}
|
||||
|
|
@ -258,7 +258,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
if ((sctx->b.family == CHIP_TAHITI ||
|
||||
sctx->b.family == CHIP_PITCAIRN ||
|
||||
sctx->b.family == CHIP_BONAIRE) &&
|
||||
sctx->gs_shader)
|
||||
sctx->gs_shader.cso)
|
||||
partial_vs_wave = true;
|
||||
}
|
||||
|
||||
|
|
@ -328,11 +328,11 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
|
|||
{
|
||||
unsigned num_output_cp;
|
||||
|
||||
if (!sctx->tes_shader)
|
||||
if (!sctx->tes_shader.cso)
|
||||
return 0;
|
||||
|
||||
num_output_cp = sctx->tcs_shader ?
|
||||
sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
|
||||
num_output_cp = sctx->tcs_shader.cso ?
|
||||
sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
|
||||
info->vertices_per_patch;
|
||||
|
||||
return S_028B58_NUM_PATCHES(num_patches) |
|
||||
|
|
@ -395,7 +395,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
|
|||
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
|
||||
unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
|
||||
|
||||
if (sctx->tes_shader)
|
||||
if (sctx->tes_shader.cso)
|
||||
si_emit_derived_tess_state(sctx, info, &num_patches);
|
||||
|
||||
ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
|
||||
|
|
@ -735,11 +735,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
(info->indexed || !info->count_from_stream_output))
|
||||
return;
|
||||
|
||||
if (!sctx->ps_shader || !sctx->vs_shader) {
|
||||
if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
|
||||
if (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)) {
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
|
@ -751,11 +751,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
* This must be done after si_decompress_textures, which can call
|
||||
* draw_vbo recursively, and before si_update_shaders, which uses
|
||||
* current_rast_prim for this draw_vbo call. */
|
||||
if (sctx->gs_shader)
|
||||
sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
|
||||
else if (sctx->tes_shader)
|
||||
if (sctx->gs_shader.cso)
|
||||
sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim;
|
||||
else if (sctx->tes_shader.cso)
|
||||
sctx->current_rast_prim =
|
||||
sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
|
||||
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
|
||||
else
|
||||
sctx->current_rast_prim = info->mode;
|
||||
|
||||
|
|
|
|||
|
|
@ -404,6 +404,7 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
unsigned num_sgprs, num_user_sgprs;
|
||||
unsigned spi_baryc_cntl = 0;
|
||||
uint64_t va;
|
||||
bool has_centroid;
|
||||
|
||||
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
|
||||
|
|
@ -435,8 +436,11 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
}
|
||||
}
|
||||
|
||||
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena);
|
||||
|
||||
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
|
||||
S_0286D8_BC_OPTIMIZE_DISABLE(1);
|
||||
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
|
||||
|
||||
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
|
||||
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
|
||||
|
|
@ -523,26 +527,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
|||
key->vs.instance_divisors[i] =
|
||||
sctx->vertex_elements->elements[i].instance_divisor;
|
||||
|
||||
if (sctx->tes_shader)
|
||||
if (sctx->tes_shader.cso)
|
||||
key->vs.as_ls = 1;
|
||||
else if (sctx->gs_shader) {
|
||||
else if (sctx->gs_shader.cso) {
|
||||
key->vs.as_es = 1;
|
||||
key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
|
||||
key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
|
||||
}
|
||||
|
||||
if (!sctx->gs_shader && sctx->ps_shader &&
|
||||
sctx->ps_shader->info.uses_primid)
|
||||
if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
|
||||
sctx->ps_shader.cso->info.uses_primid)
|
||||
key->vs.export_prim_id = 1;
|
||||
break;
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
key->tcs.prim_mode =
|
||||
sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
|
||||
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
|
||||
break;
|
||||
case PIPE_SHADER_TESS_EVAL:
|
||||
if (sctx->gs_shader) {
|
||||
if (sctx->gs_shader.cso) {
|
||||
key->tes.as_es = 1;
|
||||
key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
|
||||
} else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
|
||||
key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
|
||||
} else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
|
||||
key->tes.export_prim_id = 1;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
|
|
@ -589,11 +593,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
|||
|
||||
/* Select the hw shader variant depending on the current state. */
|
||||
static int si_shader_select(struct pipe_context *ctx,
|
||||
struct si_shader_selector *sel)
|
||||
struct si_shader_ctx_state *state)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = state->cso;
|
||||
struct si_shader *current = state->current;
|
||||
union si_shader_key key;
|
||||
struct si_shader * shader = NULL;
|
||||
struct si_shader *iter, *shader = NULL;
|
||||
int r;
|
||||
|
||||
si_shader_selector_key(ctx, sel, &key);
|
||||
|
|
@ -602,49 +608,51 @@ static int si_shader_select(struct pipe_context *ctx,
|
|||
* This path is also used for most shaders that don't need multiple
|
||||
* variants, it will cost just a computation of the key and this
|
||||
* test. */
|
||||
if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
|
||||
if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* lookup if we have other variants in the list */
|
||||
if (sel->num_shaders > 1) {
|
||||
struct si_shader *p = sel->current, *c = p->next_variant;
|
||||
pipe_mutex_lock(sel->mutex);
|
||||
|
||||
while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
|
||||
p = c;
|
||||
c = c->next_variant;
|
||||
}
|
||||
|
||||
if (c) {
|
||||
p->next_variant = c->next_variant;
|
||||
shader = c;
|
||||
/* Find the shader variant. */
|
||||
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
|
||||
/* Don't check the "current" shader. We checked it above. */
|
||||
if (current != iter &&
|
||||
memcmp(&iter->key, &key, sizeof(key)) == 0) {
|
||||
state->current = iter;
|
||||
pipe_mutex_unlock(sel->mutex);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (shader) {
|
||||
shader->next_variant = sel->current;
|
||||
sel->current = shader;
|
||||
/* Build a new shader. */
|
||||
shader = CALLOC_STRUCT(si_shader);
|
||||
if (!shader) {
|
||||
pipe_mutex_unlock(sel->mutex);
|
||||
return -ENOMEM;
|
||||
}
|
||||
shader->selector = sel;
|
||||
shader->key = key;
|
||||
|
||||
r = si_shader_create(sctx->screen, sctx->tm, shader);
|
||||
if (unlikely(r)) {
|
||||
R600_ERR("Failed to build shader variant (type=%u) %d\n",
|
||||
sel->type, r);
|
||||
FREE(shader);
|
||||
pipe_mutex_unlock(sel->mutex);
|
||||
return r;
|
||||
}
|
||||
si_shader_init_pm4_state(shader);
|
||||
|
||||
if (!sel->last_variant) {
|
||||
sel->first_variant = shader;
|
||||
sel->last_variant = shader;
|
||||
} else {
|
||||
shader = CALLOC(1, sizeof(struct si_shader));
|
||||
shader->selector = sel;
|
||||
shader->key = key;
|
||||
|
||||
shader->next_variant = sel->current;
|
||||
sel->current = shader;
|
||||
r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
|
||||
shader);
|
||||
if (unlikely(r)) {
|
||||
R600_ERR("Failed to build shader variant (type=%u) %d\n",
|
||||
sel->type, r);
|
||||
sel->current = NULL;
|
||||
FREE(shader);
|
||||
return r;
|
||||
}
|
||||
si_shader_init_pm4_state(shader);
|
||||
sel->num_shaders++;
|
||||
p_atomic_inc(&sctx->screen->b.num_compilations);
|
||||
sel->last_variant->next_variant = shader;
|
||||
sel->last_variant = shader;
|
||||
}
|
||||
|
||||
state->current = shader;
|
||||
p_atomic_inc(&sctx->screen->b.num_compilations);
|
||||
pipe_mutex_unlock(sel->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -752,14 +760,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
break;
|
||||
}
|
||||
|
||||
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
|
||||
if (si_shader_select(ctx, sel)) {
|
||||
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
|
||||
struct si_shader_ctx_state state = {sel};
|
||||
|
||||
if (si_shader_select(ctx, &state)) {
|
||||
fprintf(stderr, "radeonsi: can't create a shader\n");
|
||||
tgsi_free_tokens(sel->tokens);
|
||||
FREE(sel);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
pipe_mutex_init(sel->mutex);
|
||||
return sel;
|
||||
}
|
||||
|
||||
|
|
@ -787,10 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
|||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = state;
|
||||
|
||||
if (sctx->vs_shader == sel || !sel)
|
||||
if (sctx->vs_shader.cso == sel || !sel)
|
||||
return;
|
||||
|
||||
sctx->vs_shader = sel;
|
||||
sctx->vs_shader.cso = sel;
|
||||
sctx->vs_shader.current = sel->first_variant;
|
||||
si_mark_atom_dirty(sctx, &sctx->clip_regs);
|
||||
si_update_viewports_and_scissors(sctx);
|
||||
}
|
||||
|
|
@ -799,12 +812,13 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
|
|||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = state;
|
||||
bool enable_changed = !!sctx->gs_shader != !!sel;
|
||||
bool enable_changed = !!sctx->gs_shader.cso != !!sel;
|
||||
|
||||
if (sctx->gs_shader == sel)
|
||||
if (sctx->gs_shader.cso == sel)
|
||||
return;
|
||||
|
||||
sctx->gs_shader = sel;
|
||||
sctx->gs_shader.cso = sel;
|
||||
sctx->gs_shader.current = sel ? sel->first_variant : NULL;
|
||||
si_mark_atom_dirty(sctx, &sctx->clip_regs);
|
||||
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
|
||||
|
||||
|
|
@ -817,12 +831,13 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
|
|||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = state;
|
||||
bool enable_changed = !!sctx->tcs_shader != !!sel;
|
||||
bool enable_changed = !!sctx->tcs_shader.cso != !!sel;
|
||||
|
||||
if (sctx->tcs_shader == sel)
|
||||
if (sctx->tcs_shader.cso == sel)
|
||||
return;
|
||||
|
||||
sctx->tcs_shader = sel;
|
||||
sctx->tcs_shader.cso = sel;
|
||||
sctx->tcs_shader.current = sel ? sel->first_variant : NULL;
|
||||
|
||||
if (enable_changed)
|
||||
sctx->last_tcs = NULL; /* invalidate derived tess state */
|
||||
|
|
@ -832,12 +847,13 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
|
|||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = state;
|
||||
bool enable_changed = !!sctx->tes_shader != !!sel;
|
||||
bool enable_changed = !!sctx->tes_shader.cso != !!sel;
|
||||
|
||||
if (sctx->tes_shader == sel)
|
||||
if (sctx->tes_shader.cso == sel)
|
||||
return;
|
||||
|
||||
sctx->tes_shader = sel;
|
||||
sctx->tes_shader.cso = sel;
|
||||
sctx->tes_shader.current = sel ? sel->first_variant : NULL;
|
||||
si_mark_atom_dirty(sctx, &sctx->clip_regs);
|
||||
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
|
||||
|
||||
|
|
@ -864,7 +880,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
|
|||
struct si_shader_selector *sel = state;
|
||||
|
||||
/* skip if supplied shader is one already in use */
|
||||
if (sctx->ps_shader == sel)
|
||||
if (sctx->ps_shader.cso == sel)
|
||||
return;
|
||||
|
||||
/* use a dummy shader if binding a NULL shader */
|
||||
|
|
@ -873,7 +889,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
|
|||
sel = sctx->dummy_pixel_shader;
|
||||
}
|
||||
|
||||
sctx->ps_shader = sel;
|
||||
sctx->ps_shader.cso = sel;
|
||||
sctx->ps_shader.current = sel->first_variant;
|
||||
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
|
||||
}
|
||||
|
||||
|
|
@ -881,8 +898,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
|||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_shader_selector *sel = (struct si_shader_selector *)state;
|
||||
struct si_shader *p = sel->current, *c;
|
||||
struct si_shader_selector **current_shader[SI_NUM_SHADERS] = {
|
||||
struct si_shader *p = sel->first_variant, *c;
|
||||
struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
|
||||
[PIPE_SHADER_VERTEX] = &sctx->vs_shader,
|
||||
[PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader,
|
||||
[PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader,
|
||||
|
|
@ -890,8 +907,10 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
|||
[PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
|
||||
};
|
||||
|
||||
if (*current_shader[sel->type] == sel)
|
||||
*current_shader[sel->type] = NULL;
|
||||
if (current_shader[sel->type]->cso == sel) {
|
||||
current_shader[sel->type]->cso = NULL;
|
||||
current_shader[sel->type]->current = NULL;
|
||||
}
|
||||
|
||||
while (p) {
|
||||
c = p->next_variant;
|
||||
|
|
@ -927,6 +946,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
|||
p = c;
|
||||
}
|
||||
|
||||
pipe_mutex_destroy(sel->mutex);
|
||||
free(sel->tokens);
|
||||
free(sel);
|
||||
}
|
||||
|
|
@ -934,7 +954,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
|
|||
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
|
||||
struct si_shader *ps = sctx->ps_shader->current;
|
||||
struct si_shader *ps = sctx->ps_shader.current;
|
||||
struct si_shader *vs = si_get_vs_state(sctx);
|
||||
struct tgsi_shader_info *psinfo = &ps->selector->info;
|
||||
struct tgsi_shader_info *vsinfo = &vs->selector->info;
|
||||
|
|
@ -1004,7 +1024,7 @@ bcolor:
|
|||
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
|
||||
struct si_shader *ps = sctx->ps_shader->current;
|
||||
struct si_shader *ps = sctx->ps_shader.current;
|
||||
unsigned input_ena = ps->spi_ps_input_ena;
|
||||
|
||||
/* we need to enable at least one of them, otherwise we hang the GPU */
|
||||
|
|
@ -1133,7 +1153,7 @@ static void si_init_gs_rings(struct si_context *sctx)
|
|||
|
||||
static void si_update_gs_rings(struct si_context *sctx)
|
||||
{
|
||||
unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
|
||||
unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize;
|
||||
uint64_t offset;
|
||||
|
||||
if (gsvs_itemsize == sctx->last_gsvs_itemsize)
|
||||
|
|
@ -1167,17 +1187,14 @@ static void si_update_gs_rings(struct si_context *sctx)
|
|||
* < 0 if there was a failure
|
||||
*/
|
||||
static int si_update_scratch_buffer(struct si_context *sctx,
|
||||
struct si_shader_selector *sel)
|
||||
struct si_shader *shader)
|
||||
{
|
||||
struct si_shader *shader;
|
||||
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
|
||||
int r;
|
||||
|
||||
if (!sel)
|
||||
if (!shader)
|
||||
return 0;
|
||||
|
||||
shader = sel->current;
|
||||
|
||||
/* This shader doesn't need a scratch buffer */
|
||||
if (shader->scratch_bytes_per_wave == 0)
|
||||
return 0;
|
||||
|
|
@ -1209,20 +1226,20 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
|
|||
return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
|
||||
}
|
||||
|
||||
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel)
|
||||
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
|
||||
{
|
||||
return sel ? sel->current->scratch_bytes_per_wave : 0;
|
||||
return shader ? shader->scratch_bytes_per_wave : 0;
|
||||
}
|
||||
|
||||
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
|
||||
{
|
||||
unsigned bytes = 0;
|
||||
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader.current));
|
||||
bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
|
@ -1256,46 +1273,46 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
|
|||
* last used, so we still need to try to update them, even if
|
||||
* they require scratch buffers smaller than the current size.
|
||||
*/
|
||||
r = si_update_scratch_buffer(sctx, sctx->ps_shader);
|
||||
r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
|
||||
|
||||
r = si_update_scratch_buffer(sctx, sctx->gs_shader);
|
||||
r = si_update_scratch_buffer(sctx, sctx->gs_shader.current);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
|
||||
|
||||
r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
|
||||
r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
|
||||
|
||||
/* VS can be bound as LS, ES, or VS. */
|
||||
r = si_update_scratch_buffer(sctx, sctx->vs_shader);
|
||||
r = si_update_scratch_buffer(sctx, sctx->vs_shader.current);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1) {
|
||||
if (sctx->tes_shader)
|
||||
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
|
||||
else if (sctx->gs_shader)
|
||||
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
|
||||
if (sctx->tes_shader.current)
|
||||
si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
|
||||
else if (sctx->gs_shader.current)
|
||||
si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
|
||||
else
|
||||
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
|
||||
}
|
||||
|
||||
/* TES can be bound as ES or VS. */
|
||||
r = si_update_scratch_buffer(sctx, sctx->tes_shader);
|
||||
r = si_update_scratch_buffer(sctx, sctx->tes_shader.current);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1) {
|
||||
if (sctx->gs_shader)
|
||||
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
|
||||
if (sctx->gs_shader.current)
|
||||
si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
|
||||
else
|
||||
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1361,7 +1378,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
|
|||
if (!ureg)
|
||||
return; /* if we get here, we're screwed */
|
||||
|
||||
assert(!sctx->fixed_func_tcs_shader);
|
||||
assert(!sctx->fixed_func_tcs_shader.cso);
|
||||
|
||||
ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
|
||||
const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
|
||||
|
|
@ -1376,7 +1393,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
|
|||
ureg_MOV(ureg, tessinner, const1);
|
||||
ureg_END(ureg);
|
||||
|
||||
sctx->fixed_func_tcs_shader =
|
||||
sctx->fixed_func_tcs_shader.cso =
|
||||
ureg_create_shader_and_destroy(ureg, &sctx->b.b);
|
||||
}
|
||||
|
||||
|
|
@ -1384,7 +1401,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
|
|||
{
|
||||
/* Calculate the index of the config.
|
||||
* 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
|
||||
unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
|
||||
unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso;
|
||||
struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
|
||||
|
||||
if (!*pm4) {
|
||||
|
|
@ -1392,17 +1409,17 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
|
|||
|
||||
*pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
|
||||
if (sctx->tes_shader) {
|
||||
if (sctx->tes_shader.cso) {
|
||||
stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
|
||||
S_028B54_HS_EN(1);
|
||||
|
||||
if (sctx->gs_shader)
|
||||
if (sctx->gs_shader.cso)
|
||||
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
|
||||
S_028B54_GS_EN(1) |
|
||||
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
||||
else
|
||||
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
|
||||
} else if (sctx->gs_shader) {
|
||||
} else if (sctx->gs_shader.cso) {
|
||||
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
|
||||
S_028B54_GS_EN(1) |
|
||||
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
||||
|
|
@ -1432,7 +1449,7 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
int r;
|
||||
|
||||
/* Update stages before GS. */
|
||||
if (sctx->tes_shader) {
|
||||
if (sctx->tes_shader.cso) {
|
||||
if (!sctx->tf_ring) {
|
||||
si_init_tess_factor_ring(sctx);
|
||||
if (!sctx->tf_ring)
|
||||
|
|
@ -1440,65 +1457,65 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
}
|
||||
|
||||
/* VS as LS */
|
||||
r = si_shader_select(ctx, sctx->vs_shader);
|
||||
r = si_shader_select(ctx, &sctx->vs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
|
||||
|
||||
if (sctx->tcs_shader) {
|
||||
r = si_shader_select(ctx, sctx->tcs_shader);
|
||||
if (sctx->tcs_shader.cso) {
|
||||
r = si_shader_select(ctx, &sctx->tcs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
|
||||
} else {
|
||||
if (!sctx->fixed_func_tcs_shader) {
|
||||
if (!sctx->fixed_func_tcs_shader.cso) {
|
||||
si_generate_fixed_func_tcs(sctx);
|
||||
if (!sctx->fixed_func_tcs_shader)
|
||||
if (!sctx->fixed_func_tcs_shader.cso)
|
||||
return false;
|
||||
}
|
||||
|
||||
r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
|
||||
r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs,
|
||||
sctx->fixed_func_tcs_shader->current->pm4);
|
||||
sctx->fixed_func_tcs_shader.current->pm4);
|
||||
}
|
||||
|
||||
r = si_shader_select(ctx, sctx->tes_shader);
|
||||
r = si_shader_select(ctx, &sctx->tes_shader);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
if (sctx->gs_shader) {
|
||||
if (sctx->gs_shader.cso) {
|
||||
/* TES as ES */
|
||||
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
|
||||
} else {
|
||||
/* TES as VS */
|
||||
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
|
||||
si_update_so(sctx, sctx->tes_shader);
|
||||
si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
|
||||
si_update_so(sctx, sctx->tes_shader.cso);
|
||||
}
|
||||
} else if (sctx->gs_shader) {
|
||||
} else if (sctx->gs_shader.cso) {
|
||||
/* VS as ES */
|
||||
r = si_shader_select(ctx, sctx->vs_shader);
|
||||
r = si_shader_select(ctx, &sctx->vs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
|
||||
} else {
|
||||
/* VS as VS */
|
||||
r = si_shader_select(ctx, sctx->vs_shader);
|
||||
r = si_shader_select(ctx, &sctx->vs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
|
||||
si_update_so(sctx, sctx->vs_shader);
|
||||
si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
|
||||
si_update_so(sctx, sctx->vs_shader.cso);
|
||||
}
|
||||
|
||||
/* Update GS. */
|
||||
if (sctx->gs_shader) {
|
||||
r = si_shader_select(ctx, sctx->gs_shader);
|
||||
if (sctx->gs_shader.cso) {
|
||||
r = si_shader_select(ctx, &sctx->gs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
|
||||
si_update_so(sctx, sctx->gs_shader);
|
||||
si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
|
||||
si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4);
|
||||
si_update_so(sctx, sctx->gs_shader.cso);
|
||||
|
||||
if (!sctx->gsvs_ring) {
|
||||
si_init_gs_rings(sctx);
|
||||
|
|
@ -1514,10 +1531,10 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
|
||||
si_update_vgt_shader_config(sctx);
|
||||
|
||||
r = si_shader_select(ctx, sctx->ps_shader);
|
||||
r = si_shader_select(ctx, &sctx->ps_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
|
||||
|
||||
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
|
||||
sctx->sprite_coord_enable != rs->sprite_coord_enable ||
|
||||
|
|
@ -1543,13 +1560,13 @@ bool si_update_shaders(struct si_context *sctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
|
||||
sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
|
||||
if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
|
||||
sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
|
||||
if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
|
||||
sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
|
||||
if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
|
||||
sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
|
||||
si_mark_atom_dirty(sctx, &sctx->msaa_config);
|
||||
|
||||
if (sctx->b.chip_class == SI)
|
||||
|
|
|
|||
|
|
@ -248,6 +248,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
|||
|
|
@ -381,6 +381,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -455,6 +456,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* If we get here, we failed to handle a cap above */
|
||||
debug_printf("Unexpected fragment shader query %u\n", param);
|
||||
|
|
@ -511,6 +514,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* If we get here, we failed to handle a cap above */
|
||||
debug_printf("Unexpected vertex shader query %u\n", param);
|
||||
|
|
@ -600,6 +605,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("Unexpected vgpu10 shader query %u\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -393,7 +393,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
|
|||
continue;
|
||||
|
||||
nir_variable *output_var = NULL;
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
|
||||
nir_foreach_variable(var, &c->s->outputs) {
|
||||
if (var->data.driver_location == intr->const_index[0]) {
|
||||
output_var = var;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "vc4_qir.h"
|
||||
#include "glsl/nir/nir_builder.h"
|
||||
#include "util/u_format.h"
|
||||
|
||||
/**
|
||||
* Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
|
||||
|
|
@ -50,20 +51,188 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
nir_instr_remove(&intr->instr);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan)
|
||||
{
|
||||
return nir_ubitfield_extract(b,
|
||||
src,
|
||||
nir_imm_int(b, 8 * chan),
|
||||
nir_imm_int(b, 8));
|
||||
}
|
||||
|
||||
/** Returns the 16 bit field as a sign-extended 32-bit value. */
|
||||
static nir_ssa_def *
|
||||
vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan)
|
||||
{
|
||||
return nir_ibitfield_extract(b,
|
||||
src,
|
||||
nir_imm_int(b, 16 * chan),
|
||||
nir_imm_int(b, 16));
|
||||
}
|
||||
|
||||
/** Returns the 16 bit field as an unsigned 32 bit value. */
|
||||
static nir_ssa_def *
|
||||
vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
|
||||
{
|
||||
if (chan == 0) {
|
||||
return nir_iand(b, src, nir_imm_int(b, 0xffff));
|
||||
} else {
|
||||
return nir_ushr(b, src, nir_imm_int(b, 16));
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
|
||||
{
|
||||
return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
|
||||
nir_builder *b,
|
||||
nir_ssa_def **vpm_reads,
|
||||
uint8_t swiz,
|
||||
const struct util_format_description *desc)
|
||||
{
|
||||
const struct util_format_channel_description *chan =
|
||||
&desc->channel[swiz];
|
||||
nir_ssa_def *temp;
|
||||
|
||||
if (swiz > UTIL_FORMAT_SWIZZLE_W) {
|
||||
return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
|
||||
} else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) {
|
||||
return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
|
||||
} else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
if (chan->normalized) {
|
||||
return nir_fmul(b,
|
||||
nir_i2f(b, vpm_reads[swiz]),
|
||||
nir_imm_float(b,
|
||||
1.0 / 0x7fffffff));
|
||||
} else {
|
||||
return nir_i2f(b, vpm_reads[swiz]);
|
||||
}
|
||||
} else if (chan->size == 8 &&
|
||||
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
|
||||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
|
||||
nir_ssa_def *vpm = vpm_reads[0];
|
||||
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080));
|
||||
if (chan->normalized) {
|
||||
return nir_fsub(b, nir_fmul(b,
|
||||
vc4_nir_unpack_8f(b, temp, swiz),
|
||||
nir_imm_float(b, 2.0)),
|
||||
nir_imm_float(b, 1.0));
|
||||
} else {
|
||||
return nir_fadd(b,
|
||||
nir_i2f(b,
|
||||
vc4_nir_unpack_8i(b, temp,
|
||||
swiz)),
|
||||
nir_imm_float(b, -128.0));
|
||||
}
|
||||
} else {
|
||||
if (chan->normalized) {
|
||||
return vc4_nir_unpack_8f(b, vpm, swiz);
|
||||
} else {
|
||||
return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz));
|
||||
}
|
||||
}
|
||||
} else if (chan->size == 16 &&
|
||||
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
|
||||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
|
||||
nir_ssa_def *vpm = vpm_reads[swiz / 2];
|
||||
|
||||
/* Note that UNPACK_16F eats a half float, not ints, so we use
|
||||
* UNPACK_16_I for all of these.
|
||||
*/
|
||||
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
|
||||
if (chan->normalized) {
|
||||
return nir_fmul(b, temp,
|
||||
nir_imm_float(b, 1/32768.0f));
|
||||
} else {
|
||||
return temp;
|
||||
}
|
||||
} else {
|
||||
temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
|
||||
if (chan->normalized) {
|
||||
return nir_fmul(b, temp,
|
||||
nir_imm_float(b, 1 / 65535.0));
|
||||
} else {
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
|
||||
VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
int attr = intr->const_index[0];
|
||||
enum pipe_format format = c->vs_key->attr_formats[attr];
|
||||
uint32_t attr_size = util_format_get_blocksize(format);
|
||||
|
||||
/* All TGSI-to-NIR inputs are vec4. */
|
||||
assert(intr->num_components == 4);
|
||||
|
||||
/* Generate dword loads for the VPM values (Since these intrinsics may
|
||||
* be reordered, the actual reads will be generated at the top of the
|
||||
* shader by ntq_setup_inputs().
|
||||
*/
|
||||
nir_ssa_def *vpm_reads[4];
|
||||
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
|
||||
nir_intrinsic_instr *intr_comp =
|
||||
nir_intrinsic_instr_create(c->s,
|
||||
nir_intrinsic_load_input);
|
||||
intr_comp->num_components = 1;
|
||||
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
|
||||
nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
|
||||
nir_builder_instr_insert(b, &intr_comp->instr);
|
||||
|
||||
vpm_reads[i] = &intr_comp->dest.ssa;
|
||||
}
|
||||
|
||||
bool format_warned = false;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
|
||||
nir_ssa_def *dests[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
uint8_t swiz = desc->swizzle[i];
|
||||
dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz,
|
||||
desc);
|
||||
|
||||
if (!dests[i]) {
|
||||
if (!format_warned) {
|
||||
fprintf(stderr,
|
||||
"vtx element %d unsupported type: %s\n",
|
||||
attr, util_format_name(format));
|
||||
format_warned = true;
|
||||
}
|
||||
dests[i] = nir_imm_float(b, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
replace_intrinsic_with_vec4(b, intr, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
/* This doesn't need any lowering. */
|
||||
return;
|
||||
}
|
||||
|
||||
nir_variable *input_var = NULL;
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
|
||||
nir_foreach_variable(var, &c->s->inputs) {
|
||||
if (var->data.driver_location == intr->const_index[0]) {
|
||||
input_var = var;
|
||||
break;
|
||||
|
|
@ -87,38 +256,31 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
|
|||
dests[i] = &intr_comp->dest.ssa;
|
||||
}
|
||||
|
||||
switch (c->stage) {
|
||||
case QSTAGE_FRAG:
|
||||
if (input_var->data.location == VARYING_SLOT_FACE) {
|
||||
dests[0] = nir_fsub(b,
|
||||
nir_imm_float(b, 1.0),
|
||||
nir_fmul(b,
|
||||
nir_i2f(b, dests[0]),
|
||||
nir_imm_float(b, 2.0)));
|
||||
dests[1] = nir_imm_float(b, 0.0);
|
||||
if (input_var->data.location == VARYING_SLOT_FACE) {
|
||||
dests[0] = nir_fsub(b,
|
||||
nir_imm_float(b, 1.0),
|
||||
nir_fmul(b,
|
||||
nir_i2f(b, dests[0]),
|
||||
nir_imm_float(b, 2.0)));
|
||||
dests[1] = nir_imm_float(b, 0.0);
|
||||
dests[2] = nir_imm_float(b, 0.0);
|
||||
dests[3] = nir_imm_float(b, 1.0);
|
||||
} else if (input_var->data.location >= VARYING_SLOT_VAR0) {
|
||||
if (c->fs_key->point_sprite_mask &
|
||||
(1 << (input_var->data.location -
|
||||
VARYING_SLOT_VAR0))) {
|
||||
if (!c->fs_key->is_points) {
|
||||
dests[0] = nir_imm_float(b, 0.0);
|
||||
dests[1] = nir_imm_float(b, 0.0);
|
||||
}
|
||||
if (c->fs_key->point_coord_upper_left) {
|
||||
dests[1] = nir_fsub(b,
|
||||
nir_imm_float(b, 1.0),
|
||||
dests[1]);
|
||||
}
|
||||
dests[2] = nir_imm_float(b, 0.0);
|
||||
dests[3] = nir_imm_float(b, 1.0);
|
||||
} else if (input_var->data.location >= VARYING_SLOT_VAR0) {
|
||||
if (c->fs_key->point_sprite_mask &
|
||||
(1 << (input_var->data.location -
|
||||
VARYING_SLOT_VAR0))) {
|
||||
if (!c->fs_key->is_points) {
|
||||
dests[0] = nir_imm_float(b, 0.0);
|
||||
dests[1] = nir_imm_float(b, 0.0);
|
||||
}
|
||||
if (c->fs_key->point_coord_upper_left) {
|
||||
dests[1] = nir_fsub(b,
|
||||
nir_imm_float(b, 1.0),
|
||||
dests[1]);
|
||||
}
|
||||
dests[2] = nir_imm_float(b, 0.0);
|
||||
dests[3] = nir_imm_float(b, 1.0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case QSTAGE_COORD:
|
||||
case QSTAGE_VERT:
|
||||
break;
|
||||
}
|
||||
|
||||
replace_intrinsic_with_vec4(b, intr, dests);
|
||||
|
|
@ -129,7 +291,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
|
|||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
nir_variable *output_var = NULL;
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
|
||||
nir_foreach_variable(var, &c->s->outputs) {
|
||||
if (var->data.driver_location == intr->const_index[0]) {
|
||||
output_var = var;
|
||||
break;
|
||||
|
|
@ -232,7 +394,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
|
|||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
vc4_nir_lower_input(c, b, intr);
|
||||
if (c->stage == QSTAGE_FRAG)
|
||||
vc4_nir_lower_fs_input(c, b, intr);
|
||||
else
|
||||
vc4_nir_lower_vertex_attr(c, b, intr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
|
|
|
|||
|
|
@ -602,126 +602,18 @@ ntq_fsign(struct vc4_compile *c, struct qreg src)
|
|||
qir_uniform_f(c, -1.0));
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
get_channel_from_vpm(struct vc4_compile *c,
|
||||
struct qreg *vpm_reads,
|
||||
uint8_t swiz,
|
||||
const struct util_format_description *desc)
|
||||
{
|
||||
const struct util_format_channel_description *chan =
|
||||
&desc->channel[swiz];
|
||||
struct qreg temp;
|
||||
|
||||
if (swiz > UTIL_FORMAT_SWIZZLE_W)
|
||||
return get_swizzled_channel(c, vpm_reads, swiz);
|
||||
else if (chan->size == 32 &&
|
||||
chan->type == UTIL_FORMAT_TYPE_FLOAT) {
|
||||
return get_swizzled_channel(c, vpm_reads, swiz);
|
||||
} else if (chan->size == 32 &&
|
||||
chan->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
if (chan->normalized) {
|
||||
return qir_FMUL(c,
|
||||
qir_ITOF(c, vpm_reads[swiz]),
|
||||
qir_uniform_f(c,
|
||||
1.0 / 0x7fffffff));
|
||||
} else {
|
||||
return qir_ITOF(c, vpm_reads[swiz]);
|
||||
}
|
||||
} else if (chan->size == 8 &&
|
||||
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
|
||||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
|
||||
struct qreg vpm = vpm_reads[0];
|
||||
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
|
||||
if (chan->normalized) {
|
||||
return qir_FSUB(c, qir_FMUL(c,
|
||||
qir_UNPACK_8_F(c, temp, swiz),
|
||||
qir_uniform_f(c, 2.0)),
|
||||
qir_uniform_f(c, 1.0));
|
||||
} else {
|
||||
return qir_FADD(c,
|
||||
qir_ITOF(c,
|
||||
qir_UNPACK_8_I(c, temp,
|
||||
swiz)),
|
||||
qir_uniform_f(c, -128.0));
|
||||
}
|
||||
} else {
|
||||
if (chan->normalized) {
|
||||
return qir_UNPACK_8_F(c, vpm, swiz);
|
||||
} else {
|
||||
return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz));
|
||||
}
|
||||
}
|
||||
} else if (chan->size == 16 &&
|
||||
(chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
|
||||
chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
|
||||
struct qreg vpm = vpm_reads[swiz / 2];
|
||||
|
||||
/* Note that UNPACK_16F eats a half float, not ints, so we use
|
||||
* UNPACK_16_I for all of these.
|
||||
*/
|
||||
if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2));
|
||||
if (chan->normalized) {
|
||||
return qir_FMUL(c, temp,
|
||||
qir_uniform_f(c, 1/32768.0f));
|
||||
} else {
|
||||
return temp;
|
||||
}
|
||||
} else {
|
||||
/* UNPACK_16I sign-extends, so we have to emit ANDs. */
|
||||
temp = vpm;
|
||||
if (swiz == 1 || swiz == 3)
|
||||
temp = qir_UNPACK_16_I(c, temp, 1);
|
||||
temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff));
|
||||
temp = qir_ITOF(c, temp);
|
||||
|
||||
if (chan->normalized) {
|
||||
return qir_FMUL(c, temp,
|
||||
qir_uniform_f(c, 1 / 65535.0));
|
||||
} else {
|
||||
return temp;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return c->undef;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_vertex_input(struct vc4_compile *c, int attr)
|
||||
{
|
||||
enum pipe_format format = c->vs_key->attr_formats[attr];
|
||||
uint32_t attr_size = util_format_get_blocksize(format);
|
||||
struct qreg vpm_reads[4];
|
||||
|
||||
c->vattr_sizes[attr] = align(attr_size, 4);
|
||||
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
|
||||
struct qreg vpm = { QFILE_VPM, attr * 4 + i };
|
||||
vpm_reads[i] = qir_MOV(c, vpm);
|
||||
c->inputs[attr * 4 + i] = qir_MOV(c, vpm);
|
||||
c->num_inputs++;
|
||||
}
|
||||
|
||||
bool format_warned = false;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
uint8_t swiz = desc->swizzle[i];
|
||||
struct qreg result = get_channel_from_vpm(c, vpm_reads,
|
||||
swiz, desc);
|
||||
|
||||
if (result.file == QFILE_NULL) {
|
||||
if (!format_warned) {
|
||||
fprintf(stderr,
|
||||
"vtx element %d unsupported type: %s\n",
|
||||
attr, util_format_name(format));
|
||||
format_warned = true;
|
||||
}
|
||||
result = qir_uniform_f(c, 0.0);
|
||||
}
|
||||
c->inputs[attr * 4 + i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -876,6 +768,40 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
|
|||
*dest = result;
|
||||
}
|
||||
|
||||
/** Handles sign-extended bitfield extracts for 16 bits. */
|
||||
static struct qreg
|
||||
ntq_emit_ibfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
|
||||
struct qreg bits)
|
||||
{
|
||||
assert(bits.file == QFILE_UNIF &&
|
||||
c->uniform_contents[bits.index] == QUNIFORM_CONSTANT &&
|
||||
c->uniform_data[bits.index] == 16);
|
||||
|
||||
assert(offset.file == QFILE_UNIF &&
|
||||
c->uniform_contents[offset.index] == QUNIFORM_CONSTANT);
|
||||
int offset_bit = c->uniform_data[offset.index];
|
||||
assert(offset_bit % 16 == 0);
|
||||
|
||||
return qir_UNPACK_16_I(c, base, offset_bit / 16);
|
||||
}
|
||||
|
||||
/** Handles unsigned bitfield extracts for 8 bits. */
|
||||
static struct qreg
|
||||
ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
|
||||
struct qreg bits)
|
||||
{
|
||||
assert(bits.file == QFILE_UNIF &&
|
||||
c->uniform_contents[bits.index] == QUNIFORM_CONSTANT &&
|
||||
c->uniform_data[bits.index] == 8);
|
||||
|
||||
assert(offset.file == QFILE_UNIF &&
|
||||
c->uniform_contents[offset.index] == QUNIFORM_CONSTANT);
|
||||
int offset_bit = c->uniform_data[offset.index];
|
||||
assert(offset_bit % 8 == 0);
|
||||
|
||||
return qir_UNPACK_8_I(c, base, offset_bit / 8);
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
|
||||
{
|
||||
|
|
@ -1106,6 +1032,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
|
|||
qir_SUB(c, qir_uniform_ui(c, 0), src[0]));
|
||||
break;
|
||||
|
||||
case nir_op_ibitfield_extract:
|
||||
*dest = ntq_emit_ibfe(c, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case nir_op_ubitfield_extract:
|
||||
*dest = ntq_emit_ubfe(c, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "unknown NIR ALU inst: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
|
|
@ -1383,13 +1317,13 @@ static void
|
|||
ntq_setup_inputs(struct vc4_compile *c)
|
||||
{
|
||||
unsigned num_entries = 0;
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->inputs)
|
||||
nir_foreach_variable(var, &c->s->inputs)
|
||||
num_entries++;
|
||||
|
||||
nir_variable *vars[num_entries];
|
||||
|
||||
unsigned i = 0;
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->inputs)
|
||||
nir_foreach_variable(var, &c->s->inputs)
|
||||
vars[i++] = var;
|
||||
|
||||
/* Sort the variables so that we emit the input setup in
|
||||
|
|
@ -1432,7 +1366,7 @@ ntq_setup_inputs(struct vc4_compile *c)
|
|||
static void
|
||||
ntq_setup_outputs(struct vc4_compile *c)
|
||||
{
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
|
||||
nir_foreach_variable(var, &c->s->outputs) {
|
||||
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
|
||||
unsigned loc = var->data.driver_location * 4;
|
||||
|
||||
|
|
@ -1471,7 +1405,7 @@ ntq_setup_outputs(struct vc4_compile *c)
|
|||
static void
|
||||
ntq_setup_uniforms(struct vc4_compile *c)
|
||||
{
|
||||
foreach_list_typed(nir_variable, var, node, &c->s->uniforms) {
|
||||
nir_foreach_variable(var, &c->s->uniforms) {
|
||||
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
|
||||
unsigned array_elem_size = 4 * sizeof(float);
|
||||
|
||||
|
|
|
|||
|
|
@ -182,6 +182,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
/* Stream output. */
|
||||
|
|
@ -336,6 +337,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return VC4_MAX_TEXTURE_SAMPLERS;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
fprintf(stderr, "unknown shader param %d\n", param);
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -632,6 +632,7 @@ enum pipe_cap
|
|||
PIPE_CAP_DEPTH_BOUNDS_TEST,
|
||||
PIPE_CAP_TGSI_TXQS,
|
||||
PIPE_CAP_FORCE_PERSAMPLE_INTERP,
|
||||
PIPE_CAP_SHAREABLE_SHADERS,
|
||||
};
|
||||
|
||||
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
|
||||
|
|
@ -696,7 +697,8 @@ enum pipe_shader_cap
|
|||
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
|
||||
PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
|
||||
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
|
||||
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
|
||||
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
|
||||
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -267,7 +267,9 @@ union tgsi_immediate_data
|
|||
#define TGSI_PROPERTY_TES_SPACING 12
|
||||
#define TGSI_PROPERTY_TES_VERTEX_ORDER_CW 13
|
||||
#define TGSI_PROPERTY_TES_POINT_MODE 14
|
||||
#define TGSI_PROPERTY_COUNT 15
|
||||
#define TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 15
|
||||
#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16
|
||||
#define TGSI_PROPERTY_COUNT 17
|
||||
|
||||
struct tgsi_property {
|
||||
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */
|
||||
|
|
|
|||
|
|
@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
|
|||
priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
|
||||
}
|
||||
|
||||
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
if (!priv->picture.h264.field_pic_flag)
|
||||
priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom;
|
||||
if (!priv->picture.h264.field_pic_flag) {
|
||||
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
|
||||
priv->codec_data.h264.delta_pic_order_cnt_bottom;
|
||||
} else if (!priv->picture.h264.bottom_field_flag)
|
||||
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
else
|
||||
priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
|
||||
} else if (sps->pic_order_cnt_type == 1) {
|
||||
unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
|
||||
|
|
|
|||
|
|
@ -14,3 +14,340 @@ EXPORTS
|
|||
OSMesaGetProcAddress
|
||||
OSMesaColorClamp
|
||||
OSMesaPostprocess
|
||||
glAccum
|
||||
glAlphaFunc
|
||||
glAreTexturesResident
|
||||
glArrayElement
|
||||
glBegin
|
||||
glBindTexture
|
||||
glBitmap
|
||||
glBlendFunc
|
||||
glCallList
|
||||
glCallLists
|
||||
glClear
|
||||
glClearAccum
|
||||
glClearColor
|
||||
glClearDepth
|
||||
glClearIndex
|
||||
glClearStencil
|
||||
glClipPlane
|
||||
glColor3b
|
||||
glColor3bv
|
||||
glColor3d
|
||||
glColor3dv
|
||||
glColor3f
|
||||
glColor3fv
|
||||
glColor3i
|
||||
glColor3iv
|
||||
glColor3s
|
||||
glColor3sv
|
||||
glColor3ub
|
||||
glColor3ubv
|
||||
glColor3ui
|
||||
glColor3uiv
|
||||
glColor3us
|
||||
glColor3usv
|
||||
glColor4b
|
||||
glColor4bv
|
||||
glColor4d
|
||||
glColor4dv
|
||||
glColor4f
|
||||
glColor4fv
|
||||
glColor4i
|
||||
glColor4iv
|
||||
glColor4s
|
||||
glColor4sv
|
||||
glColor4ub
|
||||
glColor4ubv
|
||||
glColor4ui
|
||||
glColor4uiv
|
||||
glColor4us
|
||||
glColor4usv
|
||||
glColorMask
|
||||
glColorMaterial
|
||||
glColorPointer
|
||||
glCopyPixels
|
||||
glCopyTexImage1D
|
||||
glCopyTexImage2D
|
||||
glCopyTexSubImage1D
|
||||
glCopyTexSubImage2D
|
||||
glCullFace
|
||||
; glDebugEntry
|
||||
glDeleteLists
|
||||
glDeleteTextures
|
||||
glDepthFunc
|
||||
glDepthMask
|
||||
glDepthRange
|
||||
glDisable
|
||||
glDisableClientState
|
||||
glDrawArrays
|
||||
glDrawBuffer
|
||||
glDrawElements
|
||||
glDrawPixels
|
||||
glEdgeFlag
|
||||
glEdgeFlagPointer
|
||||
glEdgeFlagv
|
||||
glEnable
|
||||
glEnableClientState
|
||||
glEnd
|
||||
glEndList
|
||||
glEvalCoord1d
|
||||
glEvalCoord1dv
|
||||
glEvalCoord1f
|
||||
glEvalCoord1fv
|
||||
glEvalCoord2d
|
||||
glEvalCoord2dv
|
||||
glEvalCoord2f
|
||||
glEvalCoord2fv
|
||||
glEvalMesh1
|
||||
glEvalMesh2
|
||||
glEvalPoint1
|
||||
glEvalPoint2
|
||||
glFeedbackBuffer
|
||||
glFinish
|
||||
glFlush
|
||||
glFogf
|
||||
glFogfv
|
||||
glFogi
|
||||
glFogiv
|
||||
glFrontFace
|
||||
glFrustum
|
||||
glGenLists
|
||||
glGenTextures
|
||||
glGetBooleanv
|
||||
glGetClipPlane
|
||||
glGetDoublev
|
||||
glGetError
|
||||
glGetFloatv
|
||||
glGetIntegerv
|
||||
glGetLightfv
|
||||
glGetLightiv
|
||||
glGetMapdv
|
||||
glGetMapfv
|
||||
glGetMapiv
|
||||
glGetMaterialfv
|
||||
glGetMaterialiv
|
||||
glGetPixelMapfv
|
||||
glGetPixelMapuiv
|
||||
glGetPixelMapusv
|
||||
glGetPointerv
|
||||
glGetPolygonStipple
|
||||
glGetString
|
||||
glGetTexEnvfv
|
||||
glGetTexEnviv
|
||||
glGetTexGendv
|
||||
glGetTexGenfv
|
||||
glGetTexGeniv
|
||||
glGetTexImage
|
||||
glGetTexLevelParameterfv
|
||||
glGetTexLevelParameteriv
|
||||
glGetTexParameterfv
|
||||
glGetTexParameteriv
|
||||
glHint
|
||||
glIndexMask
|
||||
glIndexPointer
|
||||
glIndexd
|
||||
glIndexdv
|
||||
glIndexf
|
||||
glIndexfv
|
||||
glIndexi
|
||||
glIndexiv
|
||||
glIndexs
|
||||
glIndexsv
|
||||
glIndexub
|
||||
glIndexubv
|
||||
glInitNames
|
||||
glInterleavedArrays
|
||||
glIsEnabled
|
||||
glIsList
|
||||
glIsTexture
|
||||
glLightModelf
|
||||
glLightModelfv
|
||||
glLightModeli
|
||||
glLightModeliv
|
||||
glLightf
|
||||
glLightfv
|
||||
glLighti
|
||||
glLightiv
|
||||
glLineStipple
|
||||
glLineWidth
|
||||
glListBase
|
||||
glLoadIdentity
|
||||
glLoadMatrixd
|
||||
glLoadMatrixf
|
||||
glLoadName
|
||||
glLogicOp
|
||||
glMap1d
|
||||
glMap1f
|
||||
glMap2d
|
||||
glMap2f
|
||||
glMapGrid1d
|
||||
glMapGrid1f
|
||||
glMapGrid2d
|
||||
glMapGrid2f
|
||||
glMaterialf
|
||||
glMaterialfv
|
||||
glMateriali
|
||||
glMaterialiv
|
||||
glMatrixMode
|
||||
glMultMatrixd
|
||||
glMultMatrixf
|
||||
glNewList
|
||||
glNormal3b
|
||||
glNormal3bv
|
||||
glNormal3d
|
||||
glNormal3dv
|
||||
glNormal3f
|
||||
glNormal3fv
|
||||
glNormal3i
|
||||
glNormal3iv
|
||||
glNormal3s
|
||||
glNormal3sv
|
||||
glNormalPointer
|
||||
glOrtho
|
||||
glPassThrough
|
||||
glPixelMapfv
|
||||
glPixelMapuiv
|
||||
glPixelMapusv
|
||||
glPixelStoref
|
||||
glPixelStorei
|
||||
glPixelTransferf
|
||||
glPixelTransferi
|
||||
glPixelZoom
|
||||
glPointSize
|
||||
glPolygonMode
|
||||
glPolygonOffset
|
||||
glPolygonStipple
|
||||
glPopAttrib
|
||||
glPopClientAttrib
|
||||
glPopMatrix
|
||||
glPopName
|
||||
glPrioritizeTextures
|
||||
glPushAttrib
|
||||
glPushClientAttrib
|
||||
glPushMatrix
|
||||
glPushName
|
||||
glRasterPos2d
|
||||
glRasterPos2dv
|
||||
glRasterPos2f
|
||||
glRasterPos2fv
|
||||
glRasterPos2i
|
||||
glRasterPos2iv
|
||||
glRasterPos2s
|
||||
glRasterPos2sv
|
||||
glRasterPos3d
|
||||
glRasterPos3dv
|
||||
glRasterPos3f
|
||||
glRasterPos3fv
|
||||
glRasterPos3i
|
||||
glRasterPos3iv
|
||||
glRasterPos3s
|
||||
glRasterPos3sv
|
||||
glRasterPos4d
|
||||
glRasterPos4dv
|
||||
glRasterPos4f
|
||||
glRasterPos4fv
|
||||
glRasterPos4i
|
||||
glRasterPos4iv
|
||||
glRasterPos4s
|
||||
glRasterPos4sv
|
||||
glReadBuffer
|
||||
glReadPixels
|
||||
glRectd
|
||||
glRectdv
|
||||
glRectf
|
||||
glRectfv
|
||||
glRecti
|
||||
glRectiv
|
||||
glRects
|
||||
glRectsv
|
||||
glRenderMode
|
||||
glRotated
|
||||
glRotatef
|
||||
glScaled
|
||||
glScalef
|
||||
glScissor
|
||||
glSelectBuffer
|
||||
glShadeModel
|
||||
glStencilFunc
|
||||
glStencilMask
|
||||
glStencilOp
|
||||
glTexCoord1d
|
||||
glTexCoord1dv
|
||||
glTexCoord1f
|
||||
glTexCoord1fv
|
||||
glTexCoord1i
|
||||
glTexCoord1iv
|
||||
glTexCoord1s
|
||||
glTexCoord1sv
|
||||
glTexCoord2d
|
||||
glTexCoord2dv
|
||||
glTexCoord2f
|
||||
glTexCoord2fv
|
||||
glTexCoord2i
|
||||
glTexCoord2iv
|
||||
glTexCoord2s
|
||||
glTexCoord2sv
|
||||
glTexCoord3d
|
||||
glTexCoord3dv
|
||||
glTexCoord3f
|
||||
glTexCoord3fv
|
||||
glTexCoord3i
|
||||
glTexCoord3iv
|
||||
glTexCoord3s
|
||||
glTexCoord3sv
|
||||
glTexCoord4d
|
||||
glTexCoord4dv
|
||||
glTexCoord4f
|
||||
glTexCoord4fv
|
||||
glTexCoord4i
|
||||
glTexCoord4iv
|
||||
glTexCoord4s
|
||||
glTexCoord4sv
|
||||
glTexCoordPointer
|
||||
glTexEnvf
|
||||
glTexEnvfv
|
||||
glTexEnvi
|
||||
glTexEnviv
|
||||
glTexGend
|
||||
glTexGendv
|
||||
glTexGenf
|
||||
glTexGenfv
|
||||
glTexGeni
|
||||
glTexGeniv
|
||||
glTexImage1D
|
||||
glTexImage2D
|
||||
glTexParameterf
|
||||
glTexParameterfv
|
||||
glTexParameteri
|
||||
glTexParameteriv
|
||||
glTexSubImage1D
|
||||
glTexSubImage2D
|
||||
glTranslated
|
||||
glTranslatef
|
||||
glVertex2d
|
||||
glVertex2dv
|
||||
glVertex2f
|
||||
glVertex2fv
|
||||
glVertex2i
|
||||
glVertex2iv
|
||||
glVertex2s
|
||||
glVertex2sv
|
||||
glVertex3d
|
||||
glVertex3dv
|
||||
glVertex3f
|
||||
glVertex3fv
|
||||
glVertex3i
|
||||
glVertex3iv
|
||||
glVertex3s
|
||||
glVertex3sv
|
||||
glVertex4d
|
||||
glVertex4dv
|
||||
glVertex4f
|
||||
glVertex4fv
|
||||
glVertex4i
|
||||
glVertex4iv
|
||||
glVertex4s
|
||||
glVertex4sv
|
||||
glVertexPointer
|
||||
glViewport
|
||||
|
|
|
|||
|
|
@ -11,3 +11,340 @@ EXPORTS
|
|||
OSMesaGetProcAddress = OSMesaGetProcAddress@4
|
||||
OSMesaColorClamp = OSMesaColorClamp@4
|
||||
OSMesaPostprocess = OSMesaPostprocess@12
|
||||
glAccum = glAccum@8
|
||||
glAlphaFunc = glAlphaFunc@8
|
||||
glAreTexturesResident = glAreTexturesResident@12
|
||||
glArrayElement = glArrayElement@4
|
||||
glBegin = glBegin@4
|
||||
glBindTexture = glBindTexture@8
|
||||
glBitmap = glBitmap@28
|
||||
glBlendFunc = glBlendFunc@8
|
||||
glCallList = glCallList@4
|
||||
glCallLists = glCallLists@12
|
||||
glClear = glClear@4
|
||||
glClearAccum = glClearAccum@16
|
||||
glClearColor = glClearColor@16
|
||||
glClearDepth = glClearDepth@8
|
||||
glClearIndex = glClearIndex@4
|
||||
glClearStencil = glClearStencil@4
|
||||
glClipPlane = glClipPlane@8
|
||||
glColor3b = glColor3b@12
|
||||
glColor3bv = glColor3bv@4
|
||||
glColor3d = glColor3d@24
|
||||
glColor3dv = glColor3dv@4
|
||||
glColor3f = glColor3f@12
|
||||
glColor3fv = glColor3fv@4
|
||||
glColor3i = glColor3i@12
|
||||
glColor3iv = glColor3iv@4
|
||||
glColor3s = glColor3s@12
|
||||
glColor3sv = glColor3sv@4
|
||||
glColor3ub = glColor3ub@12
|
||||
glColor3ubv = glColor3ubv@4
|
||||
glColor3ui = glColor3ui@12
|
||||
glColor3uiv = glColor3uiv@4
|
||||
glColor3us = glColor3us@12
|
||||
glColor3usv = glColor3usv@4
|
||||
glColor4b = glColor4b@16
|
||||
glColor4bv = glColor4bv@4
|
||||
glColor4d = glColor4d@32
|
||||
glColor4dv = glColor4dv@4
|
||||
glColor4f = glColor4f@16
|
||||
glColor4fv = glColor4fv@4
|
||||
glColor4i = glColor4i@16
|
||||
glColor4iv = glColor4iv@4
|
||||
glColor4s = glColor4s@16
|
||||
glColor4sv = glColor4sv@4
|
||||
glColor4ub = glColor4ub@16
|
||||
glColor4ubv = glColor4ubv@4
|
||||
glColor4ui = glColor4ui@16
|
||||
glColor4uiv = glColor4uiv@4
|
||||
glColor4us = glColor4us@16
|
||||
glColor4usv = glColor4usv@4
|
||||
glColorMask = glColorMask@16
|
||||
glColorMaterial = glColorMaterial@8
|
||||
glColorPointer = glColorPointer@16
|
||||
glCopyPixels = glCopyPixels@20
|
||||
glCopyTexImage1D = glCopyTexImage1D@28
|
||||
glCopyTexImage2D = glCopyTexImage2D@32
|
||||
glCopyTexSubImage1D = glCopyTexSubImage1D@24
|
||||
glCopyTexSubImage2D = glCopyTexSubImage2D@32
|
||||
glCullFace = glCullFace@4
|
||||
; glDebugEntry = glDebugEntry@8
|
||||
glDeleteLists = glDeleteLists@8
|
||||
glDeleteTextures = glDeleteTextures@8
|
||||
glDepthFunc = glDepthFunc@4
|
||||
glDepthMask = glDepthMask@4
|
||||
glDepthRange = glDepthRange@16
|
||||
glDisable = glDisable@4
|
||||
glDisableClientState = glDisableClientState@4
|
||||
glDrawArrays = glDrawArrays@12
|
||||
glDrawBuffer = glDrawBuffer@4
|
||||
glDrawElements = glDrawElements@16
|
||||
glDrawPixels = glDrawPixels@20
|
||||
glEdgeFlag = glEdgeFlag@4
|
||||
glEdgeFlagPointer = glEdgeFlagPointer@8
|
||||
glEdgeFlagv = glEdgeFlagv@4
|
||||
glEnable = glEnable@4
|
||||
glEnableClientState = glEnableClientState@4
|
||||
glEnd = glEnd@0
|
||||
glEndList = glEndList@0
|
||||
glEvalCoord1d = glEvalCoord1d@8
|
||||
glEvalCoord1dv = glEvalCoord1dv@4
|
||||
glEvalCoord1f = glEvalCoord1f@4
|
||||
glEvalCoord1fv = glEvalCoord1fv@4
|
||||
glEvalCoord2d = glEvalCoord2d@16
|
||||
glEvalCoord2dv = glEvalCoord2dv@4
|
||||
glEvalCoord2f = glEvalCoord2f@8
|
||||
glEvalCoord2fv = glEvalCoord2fv@4
|
||||
glEvalMesh1 = glEvalMesh1@12
|
||||
glEvalMesh2 = glEvalMesh2@20
|
||||
glEvalPoint1 = glEvalPoint1@4
|
||||
glEvalPoint2 = glEvalPoint2@8
|
||||
glFeedbackBuffer = glFeedbackBuffer@12
|
||||
glFinish = glFinish@0
|
||||
glFlush = glFlush@0
|
||||
glFogf = glFogf@8
|
||||
glFogfv = glFogfv@8
|
||||
glFogi = glFogi@8
|
||||
glFogiv = glFogiv@8
|
||||
glFrontFace = glFrontFace@4
|
||||
glFrustum = glFrustum@48
|
||||
glGenLists = glGenLists@4
|
||||
glGenTextures = glGenTextures@8
|
||||
glGetBooleanv = glGetBooleanv@8
|
||||
glGetClipPlane = glGetClipPlane@8
|
||||
glGetDoublev = glGetDoublev@8
|
||||
glGetError = glGetError@0
|
||||
glGetFloatv = glGetFloatv@8
|
||||
glGetIntegerv = glGetIntegerv@8
|
||||
glGetLightfv = glGetLightfv@12
|
||||
glGetLightiv = glGetLightiv@12
|
||||
glGetMapdv = glGetMapdv@12
|
||||
glGetMapfv = glGetMapfv@12
|
||||
glGetMapiv = glGetMapiv@12
|
||||
glGetMaterialfv = glGetMaterialfv@12
|
||||
glGetMaterialiv = glGetMaterialiv@12
|
||||
glGetPixelMapfv = glGetPixelMapfv@8
|
||||
glGetPixelMapuiv = glGetPixelMapuiv@8
|
||||
glGetPixelMapusv = glGetPixelMapusv@8
|
||||
glGetPointerv = glGetPointerv@8
|
||||
glGetPolygonStipple = glGetPolygonStipple@4
|
||||
glGetString = glGetString@4
|
||||
glGetTexEnvfv = glGetTexEnvfv@12
|
||||
glGetTexEnviv = glGetTexEnviv@12
|
||||
glGetTexGendv = glGetTexGendv@12
|
||||
glGetTexGenfv = glGetTexGenfv@12
|
||||
glGetTexGeniv = glGetTexGeniv@12
|
||||
glGetTexImage = glGetTexImage@20
|
||||
glGetTexLevelParameterfv = glGetTexLevelParameterfv@16
|
||||
glGetTexLevelParameteriv = glGetTexLevelParameteriv@16
|
||||
glGetTexParameterfv = glGetTexParameterfv@12
|
||||
glGetTexParameteriv = glGetTexParameteriv@12
|
||||
glHint = glHint@8
|
||||
glIndexMask = glIndexMask@4
|
||||
glIndexPointer = glIndexPointer@12
|
||||
glIndexd = glIndexd@8
|
||||
glIndexdv = glIndexdv@4
|
||||
glIndexf = glIndexf@4
|
||||
glIndexfv = glIndexfv@4
|
||||
glIndexi = glIndexi@4
|
||||
glIndexiv = glIndexiv@4
|
||||
glIndexs = glIndexs@4
|
||||
glIndexsv = glIndexsv@4
|
||||
glIndexub = glIndexub@4
|
||||
glIndexubv = glIndexubv@4
|
||||
glInitNames = glInitNames@0
|
||||
glInterleavedArrays = glInterleavedArrays@12
|
||||
glIsEnabled = glIsEnabled@4
|
||||
glIsList = glIsList@4
|
||||
glIsTexture = glIsTexture@4
|
||||
glLightModelf = glLightModelf@8
|
||||
glLightModelfv = glLightModelfv@8
|
||||
glLightModeli = glLightModeli@8
|
||||
glLightModeliv = glLightModeliv@8
|
||||
glLightf = glLightf@12
|
||||
glLightfv = glLightfv@12
|
||||
glLighti = glLighti@12
|
||||
glLightiv = glLightiv@12
|
||||
glLineStipple = glLineStipple@8
|
||||
glLineWidth = glLineWidth@4
|
||||
glListBase = glListBase@4
|
||||
glLoadIdentity = glLoadIdentity@0
|
||||
glLoadMatrixd = glLoadMatrixd@4
|
||||
glLoadMatrixf = glLoadMatrixf@4
|
||||
glLoadName = glLoadName@4
|
||||
glLogicOp = glLogicOp@4
|
||||
glMap1d = glMap1d@32
|
||||
glMap1f = glMap1f@24
|
||||
glMap2d = glMap2d@56
|
||||
glMap2f = glMap2f@40
|
||||
glMapGrid1d = glMapGrid1d@20
|
||||
glMapGrid1f = glMapGrid1f@12
|
||||
glMapGrid2d = glMapGrid2d@40
|
||||
glMapGrid2f = glMapGrid2f@24
|
||||
glMaterialf = glMaterialf@12
|
||||
glMaterialfv = glMaterialfv@12
|
||||
glMateriali = glMateriali@12
|
||||
glMaterialiv = glMaterialiv@12
|
||||
glMatrixMode = glMatrixMode@4
|
||||
glMultMatrixd = glMultMatrixd@4
|
||||
glMultMatrixf = glMultMatrixf@4
|
||||
glNewList = glNewList@8
|
||||
glNormal3b = glNormal3b@12
|
||||
glNormal3bv = glNormal3bv@4
|
||||
glNormal3d = glNormal3d@24
|
||||
glNormal3dv = glNormal3dv@4
|
||||
glNormal3f = glNormal3f@12
|
||||
glNormal3fv = glNormal3fv@4
|
||||
glNormal3i = glNormal3i@12
|
||||
glNormal3iv = glNormal3iv@4
|
||||
glNormal3s = glNormal3s@12
|
||||
glNormal3sv = glNormal3sv@4
|
||||
glNormalPointer = glNormalPointer@12
|
||||
glOrtho = glOrtho@48
|
||||
glPassThrough = glPassThrough@4
|
||||
glPixelMapfv = glPixelMapfv@12
|
||||
glPixelMapuiv = glPixelMapuiv@12
|
||||
glPixelMapusv = glPixelMapusv@12
|
||||
glPixelStoref = glPixelStoref@8
|
||||
glPixelStorei = glPixelStorei@8
|
||||
glPixelTransferf = glPixelTransferf@8
|
||||
glPixelTransferi = glPixelTransferi@8
|
||||
glPixelZoom = glPixelZoom@8
|
||||
glPointSize = glPointSize@4
|
||||
glPolygonMode = glPolygonMode@8
|
||||
glPolygonOffset = glPolygonOffset@8
|
||||
glPolygonStipple = glPolygonStipple@4
|
||||
glPopAttrib = glPopAttrib@0
|
||||
glPopClientAttrib = glPopClientAttrib@0
|
||||
glPopMatrix = glPopMatrix@0
|
||||
glPopName = glPopName@0
|
||||
glPrioritizeTextures = glPrioritizeTextures@12
|
||||
glPushAttrib = glPushAttrib@4
|
||||
glPushClientAttrib = glPushClientAttrib@4
|
||||
glPushMatrix = glPushMatrix@0
|
||||
glPushName = glPushName@4
|
||||
glRasterPos2d = glRasterPos2d@16
|
||||
glRasterPos2dv = glRasterPos2dv@4
|
||||
glRasterPos2f = glRasterPos2f@8
|
||||
glRasterPos2fv = glRasterPos2fv@4
|
||||
glRasterPos2i = glRasterPos2i@8
|
||||
glRasterPos2iv = glRasterPos2iv@4
|
||||
glRasterPos2s = glRasterPos2s@8
|
||||
glRasterPos2sv = glRasterPos2sv@4
|
||||
glRasterPos3d = glRasterPos3d@24
|
||||
glRasterPos3dv = glRasterPos3dv@4
|
||||
glRasterPos3f = glRasterPos3f@12
|
||||
glRasterPos3fv = glRasterPos3fv@4
|
||||
glRasterPos3i = glRasterPos3i@12
|
||||
glRasterPos3iv = glRasterPos3iv@4
|
||||
glRasterPos3s = glRasterPos3s@12
|
||||
glRasterPos3sv = glRasterPos3sv@4
|
||||
glRasterPos4d = glRasterPos4d@32
|
||||
glRasterPos4dv = glRasterPos4dv@4
|
||||
glRasterPos4f = glRasterPos4f@16
|
||||
glRasterPos4fv = glRasterPos4fv@4
|
||||
glRasterPos4i = glRasterPos4i@16
|
||||
glRasterPos4iv = glRasterPos4iv@4
|
||||
glRasterPos4s = glRasterPos4s@16
|
||||
glRasterPos4sv = glRasterPos4sv@4
|
||||
glReadBuffer = glReadBuffer@4
|
||||
glReadPixels = glReadPixels@28
|
||||
glRectd = glRectd@32
|
||||
glRectdv = glRectdv@8
|
||||
glRectf = glRectf@16
|
||||
glRectfv = glRectfv@8
|
||||
glRecti = glRecti@16
|
||||
glRectiv = glRectiv@8
|
||||
glRects = glRects@16
|
||||
glRectsv = glRectsv@8
|
||||
glRenderMode = glRenderMode@4
|
||||
glRotated = glRotated@32
|
||||
glRotatef = glRotatef@16
|
||||
glScaled = glScaled@24
|
||||
glScalef = glScalef@12
|
||||
glScissor = glScissor@16
|
||||
glSelectBuffer = glSelectBuffer@8
|
||||
glShadeModel = glShadeModel@4
|
||||
glStencilFunc = glStencilFunc@12
|
||||
glStencilMask = glStencilMask@4
|
||||
glStencilOp = glStencilOp@12
|
||||
glTexCoord1d = glTexCoord1d@8
|
||||
glTexCoord1dv = glTexCoord1dv@4
|
||||
glTexCoord1f = glTexCoord1f@4
|
||||
glTexCoord1fv = glTexCoord1fv@4
|
||||
glTexCoord1i = glTexCoord1i@4
|
||||
glTexCoord1iv = glTexCoord1iv@4
|
||||
glTexCoord1s = glTexCoord1s@4
|
||||
glTexCoord1sv = glTexCoord1sv@4
|
||||
glTexCoord2d = glTexCoord2d@16
|
||||
glTexCoord2dv = glTexCoord2dv@4
|
||||
glTexCoord2f = glTexCoord2f@8
|
||||
glTexCoord2fv = glTexCoord2fv@4
|
||||
glTexCoord2i = glTexCoord2i@8
|
||||
glTexCoord2iv = glTexCoord2iv@4
|
||||
glTexCoord2s = glTexCoord2s@8
|
||||
glTexCoord2sv = glTexCoord2sv@4
|
||||
glTexCoord3d = glTexCoord3d@24
|
||||
glTexCoord3dv = glTexCoord3dv@4
|
||||
glTexCoord3f = glTexCoord3f@12
|
||||
glTexCoord3fv = glTexCoord3fv@4
|
||||
glTexCoord3i = glTexCoord3i@12
|
||||
glTexCoord3iv = glTexCoord3iv@4
|
||||
glTexCoord3s = glTexCoord3s@12
|
||||
glTexCoord3sv = glTexCoord3sv@4
|
||||
glTexCoord4d = glTexCoord4d@32
|
||||
glTexCoord4dv = glTexCoord4dv@4
|
||||
glTexCoord4f = glTexCoord4f@16
|
||||
glTexCoord4fv = glTexCoord4fv@4
|
||||
glTexCoord4i = glTexCoord4i@16
|
||||
glTexCoord4iv = glTexCoord4iv@4
|
||||
glTexCoord4s = glTexCoord4s@16
|
||||
glTexCoord4sv = glTexCoord4sv@4
|
||||
glTexCoordPointer = glTexCoordPointer@16
|
||||
glTexEnvf = glTexEnvf@12
|
||||
glTexEnvfv = glTexEnvfv@12
|
||||
glTexEnvi = glTexEnvi@12
|
||||
glTexEnviv = glTexEnviv@12
|
||||
glTexGend = glTexGend@16
|
||||
glTexGendv = glTexGendv@12
|
||||
glTexGenf = glTexGenf@12
|
||||
glTexGenfv = glTexGenfv@12
|
||||
glTexGeni = glTexGeni@12
|
||||
glTexGeniv = glTexGeniv@12
|
||||
glTexImage1D = glTexImage1D@32
|
||||
glTexImage2D = glTexImage2D@36
|
||||
glTexParameterf = glTexParameterf@12
|
||||
glTexParameterfv = glTexParameterfv@12
|
||||
glTexParameteri = glTexParameteri@12
|
||||
glTexParameteriv = glTexParameteriv@12
|
||||
glTexSubImage1D = glTexSubImage1D@28
|
||||
glTexSubImage2D = glTexSubImage2D@36
|
||||
glTranslated = glTranslated@24
|
||||
glTranslatef = glTranslatef@12
|
||||
glVertex2d = glVertex2d@16
|
||||
glVertex2dv = glVertex2dv@4
|
||||
glVertex2f = glVertex2f@8
|
||||
glVertex2fv = glVertex2fv@4
|
||||
glVertex2i = glVertex2i@8
|
||||
glVertex2iv = glVertex2iv@4
|
||||
glVertex2s = glVertex2s@8
|
||||
glVertex2sv = glVertex2sv@4
|
||||
glVertex3d = glVertex3d@24
|
||||
glVertex3dv = glVertex3dv@4
|
||||
glVertex3f = glVertex3f@12
|
||||
glVertex3fv = glVertex3fv@4
|
||||
glVertex3i = glVertex3i@12
|
||||
glVertex3iv = glVertex3iv@4
|
||||
glVertex3s = glVertex3s@12
|
||||
glVertex3sv = glVertex3sv@4
|
||||
glVertex4d = glVertex4d@32
|
||||
glVertex4dv = glVertex4dv@4
|
||||
glVertex4f = glVertex4f@16
|
||||
glVertex4fv = glVertex4fv@4
|
||||
glVertex4i = glVertex4i@16
|
||||
glVertex4iv = glVertex4iv@4
|
||||
glVertex4s = glVertex4s@16
|
||||
glVertex4sv = glVertex4sv@4
|
||||
glVertexPointer = glVertexPointer@16
|
||||
glViewport = glViewport@16
|
||||
|
|
|
|||
|
|
@ -610,6 +610,37 @@ match_subroutine_by_name(const char *name,
|
|||
return sig;
|
||||
}
|
||||
|
||||
static ir_rvalue *
|
||||
generate_array_index(void *mem_ctx, exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state, YYLTYPE loc,
|
||||
const ast_expression *array, ast_expression *idx,
|
||||
const char **function_name, exec_list *actual_parameters)
|
||||
{
|
||||
if (array->oper == ast_array_index) {
|
||||
/* This handles arrays of arrays */
|
||||
ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions,
|
||||
state, loc,
|
||||
array->subexpressions[0],
|
||||
array->subexpressions[1],
|
||||
function_name, actual_parameters);
|
||||
ir_rvalue *outer_array_idx = idx->hir(instructions, state);
|
||||
|
||||
YYLTYPE index_loc = idx->get_location();
|
||||
return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array,
|
||||
outer_array_idx, loc,
|
||||
index_loc);
|
||||
} else {
|
||||
ir_variable *sub_var = NULL;
|
||||
*function_name = array->primary_expression.identifier;
|
||||
|
||||
match_subroutine_by_name(*function_name, actual_parameters,
|
||||
state, &sub_var);
|
||||
|
||||
ir_rvalue *outer_array_idx = idx->hir(instructions, state);
|
||||
return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc,
|
||||
ir_function *f)
|
||||
|
|
@ -1989,16 +2020,18 @@ ast_function_expression::hir(exec_list *instructions,
|
|||
ir_variable *sub_var = NULL;
|
||||
ir_rvalue *array_idx = NULL;
|
||||
|
||||
process_parameters(instructions, &actual_parameters, &this->expressions,
|
||||
state);
|
||||
|
||||
if (id->oper == ast_array_index) {
|
||||
func_name = id->subexpressions[0]->primary_expression.identifier;
|
||||
array_idx = id->subexpressions[1]->hir(instructions, state);
|
||||
array_idx = generate_array_index(ctx, instructions, state, loc,
|
||||
id->subexpressions[0],
|
||||
id->subexpressions[1], &func_name,
|
||||
&actual_parameters);
|
||||
} else {
|
||||
func_name = id->primary_expression.identifier;
|
||||
}
|
||||
|
||||
process_parameters(instructions, &actual_parameters, &this->expressions,
|
||||
state);
|
||||
|
||||
ir_function_signature *sig =
|
||||
match_function_by_name(func_name, &actual_parameters, state);
|
||||
|
||||
|
|
|
|||
|
|
@ -487,54 +487,54 @@ bit_logic_result_type(const struct glsl_type *type_a,
|
|||
ast_operators op,
|
||||
struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
|
||||
{
|
||||
if (!state->check_bitwise_operations_allowed(loc)) {
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
if (!state->check_bitwise_operations_allowed(loc)) {
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
|
||||
/* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
|
||||
*
|
||||
* "The bitwise operators and (&), exclusive-or (^), and inclusive-or
|
||||
* (|). The operands must be of type signed or unsigned integers or
|
||||
* integer vectors."
|
||||
*/
|
||||
if (!type_a->is_integer()) {
|
||||
_mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
|
||||
ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
if (!type_b->is_integer()) {
|
||||
_mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
|
||||
/* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
|
||||
*
|
||||
* "The bitwise operators and (&), exclusive-or (^), and inclusive-or
|
||||
* (|). The operands must be of type signed or unsigned integers or
|
||||
* integer vectors."
|
||||
*/
|
||||
if (!type_a->is_integer()) {
|
||||
_mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
|
||||
ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
if (!type_b->is_integer()) {
|
||||
_mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
|
||||
ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
|
||||
/* "The fundamental types of the operands (signed or unsigned) must
|
||||
* match,"
|
||||
*/
|
||||
if (type_a->base_type != type_b->base_type) {
|
||||
_mesa_glsl_error(loc, state, "operands of `%s' must have the same "
|
||||
"base type", ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
/* "The fundamental types of the operands (signed or unsigned) must
|
||||
* match,"
|
||||
*/
|
||||
if (type_a->base_type != type_b->base_type) {
|
||||
_mesa_glsl_error(loc, state, "operands of `%s' must have the same "
|
||||
"base type", ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
|
||||
/* "The operands cannot be vectors of differing size." */
|
||||
if (type_a->is_vector() &&
|
||||
type_b->is_vector() &&
|
||||
type_a->vector_elements != type_b->vector_elements) {
|
||||
_mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
|
||||
"different sizes", ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
/* "The operands cannot be vectors of differing size." */
|
||||
if (type_a->is_vector() &&
|
||||
type_b->is_vector() &&
|
||||
type_a->vector_elements != type_b->vector_elements) {
|
||||
_mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
|
||||
"different sizes", ast_expression::operator_string(op));
|
||||
return glsl_type::error_type;
|
||||
}
|
||||
|
||||
/* "If one operand is a scalar and the other a vector, the scalar is
|
||||
* applied component-wise to the vector, resulting in the same type as
|
||||
* the vector. The fundamental types of the operands [...] will be the
|
||||
* resulting fundamental type."
|
||||
*/
|
||||
if (type_a->is_scalar())
|
||||
return type_b;
|
||||
else
|
||||
return type_a;
|
||||
/* "If one operand is a scalar and the other a vector, the scalar is
|
||||
* applied component-wise to the vector, resulting in the same type as
|
||||
* the vector. The fundamental types of the operands [...] will be the
|
||||
* resulting fundamental type."
|
||||
*/
|
||||
if (type_a->is_scalar())
|
||||
return type_b;
|
||||
else
|
||||
return type_a;
|
||||
}
|
||||
|
||||
static const struct glsl_type *
|
||||
|
|
@ -6294,6 +6294,18 @@ ast_interface_block::hir(exec_list *instructions,
|
|||
|
||||
state->struct_specifier_depth--;
|
||||
|
||||
for (unsigned i = 0; i < num_variables; i++) {
|
||||
if (fields[i].stream != -1 &&
|
||||
(unsigned) fields[i].stream != this->layout.stream) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"stream layout qualifier on "
|
||||
"interface block member `%s' does not match "
|
||||
"the interface block (%d vs %d)",
|
||||
fields[i].name, fields[i].stream,
|
||||
this->layout.stream);
|
||||
}
|
||||
}
|
||||
|
||||
if (!redeclaring_per_vertex) {
|
||||
validate_identifier(this->block_name, loc, state);
|
||||
|
||||
|
|
@ -6634,6 +6646,8 @@ ast_interface_block::hir(exec_list *instructions,
|
|||
var->data.explicit_binding = this->layout.flags.q.explicit_binding;
|
||||
var->data.binding = this->layout.binding;
|
||||
|
||||
var->data.stream = this->layout.stream;
|
||||
|
||||
state->symbols->add_variable(var);
|
||||
instructions->push_tail(var);
|
||||
}
|
||||
|
|
@ -6652,6 +6666,7 @@ ast_interface_block::hir(exec_list *instructions,
|
|||
var->data.centroid = fields[i].centroid;
|
||||
var->data.sample = fields[i].sample;
|
||||
var->data.patch = fields[i].patch;
|
||||
var->data.stream = this->layout.stream;
|
||||
var->init_interface_type(block_type);
|
||||
|
||||
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
|
||||
|
|
@ -6664,17 +6679,6 @@ ast_interface_block::hir(exec_list *instructions,
|
|||
var->data.matrix_layout = fields[i].matrix_layout;
|
||||
}
|
||||
|
||||
if (fields[i].stream != -1 &&
|
||||
((unsigned)fields[i].stream) != this->layout.stream) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"stream layout qualifier on "
|
||||
"interface block member `%s' does not match "
|
||||
"the interface block (%d vs %d)",
|
||||
var->name, fields[i].stream, this->layout.stream);
|
||||
}
|
||||
|
||||
var->data.stream = this->layout.stream;
|
||||
|
||||
if (var->data.mode == ir_var_shader_storage) {
|
||||
var->data.image_read_only = fields[i].image_read_only;
|
||||
var->data.image_write_only = fields[i].image_write_only;
|
||||
|
|
|
|||
|
|
@ -2609,17 +2609,6 @@ interface_block:
|
|||
|
||||
block->layout.is_default_qualifier = false;
|
||||
|
||||
foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
|
||||
ast_type_qualifier& qualifier = member->type->qualifier;
|
||||
if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) {
|
||||
_mesa_glsl_error(& @1, state,
|
||||
"stream layout qualifier on "
|
||||
"interface block member does not match "
|
||||
"the interface block (%d vs %d)",
|
||||
qualifier.stream, block->layout.stream);
|
||||
YYERROR;
|
||||
}
|
||||
}
|
||||
$$ = block;
|
||||
}
|
||||
| memory_qualifier interface_block
|
||||
|
|
|
|||
|
|
@ -763,7 +763,8 @@ private:
|
|||
/* Assign explicit locations. */
|
||||
if (current_var->data.explicit_location) {
|
||||
/* Set sequential locations for struct fields. */
|
||||
if (record_type != NULL) {
|
||||
if (current_var->type->without_array()->is_record() ||
|
||||
current_var->type->is_array_of_arrays()) {
|
||||
const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
|
||||
this->uniforms[id].remap_location =
|
||||
this->explicit_location + field_counter;
|
||||
|
|
@ -1180,7 +1181,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
|
||||
/* Reserve all the explicit locations of the active uniforms. */
|
||||
for (unsigned i = 0; i < num_uniforms; i++) {
|
||||
if (uniforms[i].type->is_subroutine())
|
||||
if (uniforms[i].type->is_subroutine() ||
|
||||
uniforms[i].is_shader_storage)
|
||||
continue;
|
||||
|
||||
if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) {
|
||||
|
|
@ -1200,8 +1202,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
/* Reserve locations for rest of the uniforms. */
|
||||
for (unsigned i = 0; i < num_uniforms; i++) {
|
||||
|
||||
if (uniforms[i].type->is_subroutine())
|
||||
if (uniforms[i].type->is_subroutine() ||
|
||||
uniforms[i].is_shader_storage)
|
||||
continue;
|
||||
|
||||
/* Built-in uniforms should not get any location. */
|
||||
if (uniforms[i].builtin)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -651,7 +651,7 @@ link_invalidate_variable_locations(exec_list *ir)
|
|||
|
||||
|
||||
/**
|
||||
* Set UsesClipDistance and ClipDistanceArraySize based on the given shader.
|
||||
* Set clip_distance_array_size based on the given shader.
|
||||
*
|
||||
* Also check for errors based on incorrect usage of gl_ClipVertex and
|
||||
* gl_ClipDistance.
|
||||
|
|
@ -660,10 +660,10 @@ link_invalidate_variable_locations(exec_list *ir)
|
|||
*/
|
||||
static void
|
||||
analyze_clip_usage(struct gl_shader_program *prog,
|
||||
struct gl_shader *shader, GLboolean *UsesClipDistance,
|
||||
GLuint *ClipDistanceArraySize)
|
||||
struct gl_shader *shader,
|
||||
GLuint *clip_distance_array_size)
|
||||
{
|
||||
*ClipDistanceArraySize = 0;
|
||||
*clip_distance_array_size = 0;
|
||||
|
||||
if (!prog->IsES && prog->Version >= 130) {
|
||||
/* From section 7.1 (Vertex Shader Special Variables) of the
|
||||
|
|
@ -686,13 +686,14 @@ analyze_clip_usage(struct gl_shader_program *prog,
|
|||
_mesa_shader_stage_to_string(shader->Stage));
|
||||
return;
|
||||
}
|
||||
*UsesClipDistance = clip_distance.variable_found();
|
||||
ir_variable *clip_distance_var =
|
||||
shader->symbols->get_variable("gl_ClipDistance");
|
||||
if (clip_distance_var)
|
||||
*ClipDistanceArraySize = clip_distance_var->type->length;
|
||||
} else {
|
||||
*UsesClipDistance = false;
|
||||
|
||||
if (clip_distance.variable_found()) {
|
||||
ir_variable *clip_distance_var =
|
||||
shader->symbols->get_variable("gl_ClipDistance");
|
||||
|
||||
assert(clip_distance_var);
|
||||
*clip_distance_array_size = clip_distance_var->type->length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -700,8 +701,7 @@ analyze_clip_usage(struct gl_shader_program *prog,
|
|||
/**
|
||||
* Verify that a vertex shader executable meets all semantic requirements.
|
||||
*
|
||||
* Also sets prog->Vert.UsesClipDistance and prog->Vert.ClipDistanceArraySize
|
||||
* as a side effect.
|
||||
* Also sets prog->Vert.ClipDistanceArraySize as a side effect.
|
||||
*
|
||||
* \param shader Vertex shader executable to be verified
|
||||
*/
|
||||
|
|
@ -754,8 +754,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog,
|
|||
}
|
||||
}
|
||||
|
||||
analyze_clip_usage(prog, shader, &prog->Vert.UsesClipDistance,
|
||||
&prog->Vert.ClipDistanceArraySize);
|
||||
analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -765,8 +764,7 @@ validate_tess_eval_shader_executable(struct gl_shader_program *prog,
|
|||
if (shader == NULL)
|
||||
return;
|
||||
|
||||
analyze_clip_usage(prog, shader, &prog->TessEval.UsesClipDistance,
|
||||
&prog->TessEval.ClipDistanceArraySize);
|
||||
analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -797,8 +795,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog,
|
|||
/**
|
||||
* Verify that a geometry shader executable meets all semantic requirements
|
||||
*
|
||||
* Also sets prog->Geom.VerticesIn, prog->Geom.UsesClipDistance, and
|
||||
* prog->Geom.ClipDistanceArraySize as a side effect.
|
||||
* Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as
|
||||
* a side effect.
|
||||
*
|
||||
* \param shader Geometry shader executable to be verified
|
||||
*/
|
||||
|
|
@ -812,8 +810,7 @@ validate_geometry_shader_executable(struct gl_shader_program *prog,
|
|||
unsigned num_vertices = vertices_per_prim(prog->Geom.InputType);
|
||||
prog->Geom.VerticesIn = num_vertices;
|
||||
|
||||
analyze_clip_usage(prog, shader, &prog->Geom.UsesClipDistance,
|
||||
&prog->Geom.ClipDistanceArraySize);
|
||||
analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -3117,8 +3114,8 @@ check_explicit_uniform_locations(struct gl_context *ctx,
|
|||
|
||||
foreach_in_list(ir_instruction, node, sh->ir) {
|
||||
ir_variable *var = node->as_variable();
|
||||
if (var && (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) &&
|
||||
var->data.explicit_location) {
|
||||
if (var && (var->data.mode == ir_var_uniform &&
|
||||
var->data.explicit_location)) {
|
||||
bool ret;
|
||||
if (var->type->is_subroutine())
|
||||
ret = reserve_subroutine_explicit_locations(prog, sh, var);
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
|
|||
new_var->data.centroid = iface_t->fields.structure[i].centroid;
|
||||
new_var->data.sample = iface_t->fields.structure[i].sample;
|
||||
new_var->data.patch = iface_t->fields.structure[i].patch;
|
||||
new_var->data.stream = var->data.stream;
|
||||
|
||||
new_var->init_interface_type(iface_t);
|
||||
hash_table_insert(interface_namespace, new_var,
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
|
|||
continue;
|
||||
|
||||
if (ir->array_idx != NULL)
|
||||
var = new(mem_ctx) ir_dereference_array(ir->sub_var, ir->array_idx->clone(mem_ctx, NULL));
|
||||
var = ir->array_idx->clone(mem_ctx, NULL);
|
||||
else
|
||||
var = new(mem_ctx) ir_dereference_variable(ir->sub_var);
|
||||
|
||||
|
|
|
|||
|
|
@ -238,6 +238,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
|
|||
case ir_type_swizzle: {
|
||||
ir_swizzle *s = (ir_swizzle *) ir;
|
||||
ir = s->val->as_dereference();
|
||||
/* Skip swizzle in the next pass */
|
||||
d = ir;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,15 +164,20 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
|
|||
shader->info.outputs_written = sh->Program->OutputsWritten;
|
||||
shader->info.system_values_read = sh->Program->SystemValuesRead;
|
||||
shader->info.uses_texture_gather = sh->Program->UsesGather;
|
||||
shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
|
||||
shader->info.uses_clip_distance_out =
|
||||
sh->Program->ClipDistanceArraySize != 0;
|
||||
shader->info.separate_shader = shader_prog->SeparateShader;
|
||||
shader->info.has_transform_feedback_varyings =
|
||||
shader_prog->TransformFeedback.NumVarying > 0;
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
|
||||
shader->info.gs.output_primitive = sh->Geom.OutputType;
|
||||
shader->info.gs.vertices_out = sh->Geom.VerticesOut;
|
||||
shader->info.gs.invocations = sh->Geom.Invocations;
|
||||
shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive;
|
||||
shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams;
|
||||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT: {
|
||||
|
|
|
|||
|
|
@ -521,6 +521,11 @@ struct glsl_type {
|
|||
return base_type == GLSL_TYPE_ARRAY;
|
||||
}
|
||||
|
||||
bool is_array_of_arrays() const
|
||||
{
|
||||
return is_array() && fields.array->is_array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query whether or not a type is a record
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1521,11 +1521,23 @@ typedef struct nir_shader_info {
|
|||
|
||||
union {
|
||||
struct {
|
||||
/** The number of vertices recieves per input primitive */
|
||||
unsigned vertices_in;
|
||||
|
||||
/** The output primitive type (GL enum value) */
|
||||
unsigned output_primitive;
|
||||
|
||||
/** The maximum number of vertices the geometry shader might write. */
|
||||
unsigned vertices_out;
|
||||
|
||||
/** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
|
||||
unsigned invocations;
|
||||
|
||||
/** Whether or not this shader uses EndPrimitive */
|
||||
bool uses_end_primitive;
|
||||
|
||||
/** Whether or not this shader uses non-zero streams */
|
||||
bool uses_streams;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
|
|
@ -1924,7 +1936,7 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
|
|||
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
|
||||
void nir_dump_cfg(nir_shader *shader, FILE *fp);
|
||||
|
||||
int nir_gs_count_vertices(nir_shader *shader);
|
||||
int nir_gs_count_vertices(const nir_shader *shader);
|
||||
|
||||
bool nir_split_var_copies(nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ as_set_vertex_count(nir_instr *instr)
|
|||
* counting at the NIR level.
|
||||
*/
|
||||
int
|
||||
nir_gs_count_vertices(nir_shader *shader)
|
||||
nir_gs_count_vertices(const nir_shader *shader)
|
||||
{
|
||||
int count = -1;
|
||||
|
||||
|
|
|
|||
|
|
@ -327,12 +327,12 @@ struct cfg_t {
|
|||
#define foreach_inst_in_block_reverse_safe(__type, __inst, __block) \
|
||||
foreach_in_list_reverse_safe(__type, __inst, &(__block)->instructions)
|
||||
|
||||
#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst, __block) \
|
||||
#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst) \
|
||||
for (__type *__scan_inst = (__type *)__inst->next; \
|
||||
!__scan_inst->is_tail_sentinel(); \
|
||||
__scan_inst = (__type *)__scan_inst->next)
|
||||
|
||||
#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst, __block) \
|
||||
#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst) \
|
||||
for (__type *__scan_inst = (__type *)__inst->prev; \
|
||||
!__scan_inst->is_head_sentinel(); \
|
||||
__scan_inst = (__type *)__scan_inst->prev)
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ struct brw_compiler {
|
|||
void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
|
||||
|
||||
bool scalar_vs;
|
||||
bool scalar_gs;
|
||||
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
|
||||
};
|
||||
|
||||
|
|
@ -488,6 +489,9 @@ struct brw_vue_prog_data {
|
|||
struct brw_stage_prog_data base;
|
||||
struct brw_vue_map vue_map;
|
||||
|
||||
/** Should the hardware deliver input VUE handles for URB pull loads? */
|
||||
bool include_vue_handles;
|
||||
|
||||
GLuint urb_read_length;
|
||||
GLuint total_grf;
|
||||
|
||||
|
|
@ -596,21 +600,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
|||
unsigned *final_assembly_size,
|
||||
char **error_str);
|
||||
|
||||
/**
|
||||
* Scratch data used when compiling a GLSL geometry shader.
|
||||
*/
|
||||
struct brw_gs_compile
|
||||
{
|
||||
struct brw_gs_prog_key key;
|
||||
struct brw_gs_prog_data prog_data;
|
||||
struct brw_vue_map input_vue_map;
|
||||
|
||||
struct brw_geometry_program *gp;
|
||||
|
||||
unsigned control_data_bits_per_vertex;
|
||||
unsigned control_data_header_size_bits;
|
||||
};
|
||||
|
||||
/**
|
||||
* Compile a vertex shader.
|
||||
*
|
||||
|
|
@ -618,10 +607,11 @@ struct brw_gs_compile
|
|||
*/
|
||||
const unsigned *
|
||||
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
void *mem_ctx,
|
||||
const struct brw_gs_prog_key *key,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
const struct nir_shader *shader,
|
||||
struct gl_shader_program *shader_prog,
|
||||
void *mem_ctx,
|
||||
int shader_time_index,
|
||||
unsigned *final_assembly_size,
|
||||
char **error_str);
|
||||
|
|
|
|||
|
|
@ -918,8 +918,8 @@ enum opcode {
|
|||
* Source 0: [required] Color 0.
|
||||
* Source 1: [optional] Color 1 (for dual source blend messages).
|
||||
* Source 2: [optional] Src0 Alpha.
|
||||
* Source 3: [optional] Source Depth (passthrough from the thread payload).
|
||||
* Source 4: [optional] Destination Depth (gl_FragDepth).
|
||||
* Source 3: [optional] Source Depth (gl_FragDepth)
|
||||
* Source 4: [optional (gen4-5)] Destination Depth passthrough from thread
|
||||
* Source 5: [optional] Sample Mask (gl_SampleMask).
|
||||
* Source 6: [required] Number of color components (as a UD immediate).
|
||||
*/
|
||||
|
|
@ -1033,7 +1033,19 @@ enum opcode {
|
|||
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
|
||||
SHADER_OPCODE_GEN7_SCRATCH_READ,
|
||||
|
||||
/**
|
||||
* Gen8+ SIMD8 URB Read message.
|
||||
*
|
||||
* Source 0: The header register, containing URB handles (g1).
|
||||
*
|
||||
* Currently only supports constant offsets, in inst->offset.
|
||||
*/
|
||||
SHADER_OPCODE_URB_READ_SIMD8,
|
||||
|
||||
SHADER_OPCODE_URB_WRITE_SIMD8,
|
||||
SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
|
||||
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
|
||||
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT,
|
||||
|
||||
/**
|
||||
* Return the index of an arbitrary live channel (i.e. one of the channels
|
||||
|
|
@ -2385,7 +2397,7 @@ enum brw_pixel_shader_coverage_mask_mode {
|
|||
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
|
||||
# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
|
||||
# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
|
||||
# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
|
||||
# define GEN9_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
|
||||
# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
|
||||
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
|
||||
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
|
||||
|
|
|
|||
|
|
@ -690,7 +690,7 @@ set_control_index(const struct brw_device_info *devinfo,
|
|||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (control_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_control_index(dst, i);
|
||||
brw_compact_inst_set_control_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -711,7 +711,7 @@ set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
|
|||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (datatype_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_datatype_index(dst, i);
|
||||
brw_compact_inst_set_datatype_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -732,7 +732,7 @@ set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
|
|||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (subreg_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_subreg_index(dst, i);
|
||||
brw_compact_inst_set_subreg_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -764,7 +764,7 @@ set_src0_index(const struct brw_device_info *devinfo,
|
|||
if (!get_src_index(uncompacted, &compacted))
|
||||
return false;
|
||||
|
||||
brw_compact_inst_set_src0_index(dst, compacted);
|
||||
brw_compact_inst_set_src0_index(devinfo, dst, compacted);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -784,7 +784,7 @@ set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
|
|||
return false;
|
||||
}
|
||||
|
||||
brw_compact_inst_set_src1_index(dst, compacted);
|
||||
brw_compact_inst_set_src1_index(devinfo, dst, compacted);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -804,7 +804,7 @@ set_3src_control_index(const struct brw_device_info *devinfo,
|
|||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
|
||||
if (gen8_3src_control_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_control_index(dst, i);
|
||||
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -838,7 +838,7 @@ set_3src_source_index(const struct brw_device_info *devinfo,
|
|||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
|
||||
if (gen8_3src_source_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_source_index(dst, i);
|
||||
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -909,7 +909,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
|
|||
return false;
|
||||
|
||||
#define compact(field) \
|
||||
brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src))
|
||||
brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
|
||||
|
||||
compact(opcode);
|
||||
|
||||
|
|
@ -921,7 +921,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
|
|||
|
||||
compact(dst_reg_nr);
|
||||
compact(src0_rep_ctrl);
|
||||
brw_compact_inst_set_3src_cmpt_control(dst, true);
|
||||
brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
|
||||
compact(debug_control);
|
||||
compact(saturate);
|
||||
compact(src1_rep_ctrl);
|
||||
|
|
@ -1003,36 +1003,52 @@ brw_try_compact_instruction(const struct brw_device_info *devinfo,
|
|||
|
||||
memset(&temp, 0, sizeof(temp));
|
||||
|
||||
brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src));
|
||||
brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src));
|
||||
#define compact(field) \
|
||||
brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
|
||||
|
||||
compact(opcode);
|
||||
compact(debug_control);
|
||||
|
||||
if (!set_control_index(devinfo, &temp, src))
|
||||
return false;
|
||||
if (!set_datatype_index(devinfo, &temp, src))
|
||||
return false;
|
||||
if (!set_subreg_index(devinfo, &temp, src, is_immediate))
|
||||
return false;
|
||||
brw_compact_inst_set_acc_wr_control(&temp,
|
||||
brw_inst_acc_wr_control(devinfo, src));
|
||||
brw_compact_inst_set_cond_modifier(&temp,
|
||||
brw_inst_cond_modifier(devinfo, src));
|
||||
|
||||
if (devinfo->gen >= 6) {
|
||||
compact(acc_wr_control);
|
||||
} else {
|
||||
compact(mask_control_ex);
|
||||
}
|
||||
|
||||
compact(cond_modifier);
|
||||
|
||||
if (devinfo->gen <= 6)
|
||||
brw_compact_inst_set_flag_subreg_nr(&temp,
|
||||
brw_inst_flag_subreg_nr(devinfo, src));
|
||||
brw_compact_inst_set_cmpt_control(&temp, true);
|
||||
compact(flag_subreg_nr);
|
||||
|
||||
brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
|
||||
|
||||
if (!set_src0_index(devinfo, &temp, src))
|
||||
return false;
|
||||
if (!set_src1_index(devinfo, &temp, src, is_immediate))
|
||||
return false;
|
||||
brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src));
|
||||
brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src));
|
||||
|
||||
brw_compact_inst_set_dst_reg_nr(devinfo, &temp,
|
||||
brw_inst_dst_da_reg_nr(devinfo, src));
|
||||
brw_compact_inst_set_src0_reg_nr(devinfo, &temp,
|
||||
brw_inst_src0_da_reg_nr(devinfo, src));
|
||||
|
||||
if (is_immediate) {
|
||||
brw_compact_inst_set_src1_reg_nr(&temp,
|
||||
brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
|
||||
brw_inst_imm_ud(devinfo, src) & 0xff);
|
||||
} else {
|
||||
brw_compact_inst_set_src1_reg_nr(&temp,
|
||||
brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
|
||||
brw_inst_src1_da_reg_nr(devinfo, src));
|
||||
}
|
||||
|
||||
#undef compact
|
||||
|
||||
*dst = temp;
|
||||
|
||||
return true;
|
||||
|
|
@ -1043,7 +1059,7 @@ set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst,
|
|||
brw_compact_inst *src)
|
||||
{
|
||||
uint32_t uncompacted =
|
||||
control_index_table[brw_compact_inst_control_index(src)];
|
||||
control_index_table[brw_compact_inst_control_index(devinfo, src)];
|
||||
|
||||
if (devinfo->gen >= 8) {
|
||||
brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
|
||||
|
|
@ -1064,7 +1080,8 @@ static void
|
|||
set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst,
|
||||
brw_compact_inst *src)
|
||||
{
|
||||
uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
|
||||
uint32_t uncompacted =
|
||||
datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
|
||||
|
||||
if (devinfo->gen >= 8) {
|
||||
brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
|
||||
|
|
@ -1080,7 +1097,8 @@ static void
|
|||
set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst,
|
||||
brw_compact_inst *src)
|
||||
{
|
||||
uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
|
||||
uint16_t uncompacted =
|
||||
subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
|
||||
|
||||
brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
|
||||
brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
|
||||
|
|
@ -1091,7 +1109,7 @@ static void
|
|||
set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst,
|
||||
brw_compact_inst *src)
|
||||
{
|
||||
uint32_t compacted = brw_compact_inst_src0_index(src);
|
||||
uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
|
||||
uint16_t uncompacted = src_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 88, 77, uncompacted);
|
||||
|
|
@ -1102,11 +1120,12 @@ set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst,
|
|||
brw_compact_inst *src, bool is_immediate)
|
||||
{
|
||||
if (is_immediate) {
|
||||
signed high5 = brw_compact_inst_src1_index(src);
|
||||
signed high5 = brw_compact_inst_src1_index(devinfo, src);
|
||||
/* Replicate top bit of src1_index into high 20 bits of the immediate. */
|
||||
brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
|
||||
} else {
|
||||
uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
|
||||
uint16_t uncompacted =
|
||||
src_index_table[brw_compact_inst_src1_index(devinfo, src)];
|
||||
|
||||
brw_inst_set_bits(dst, 120, 109, uncompacted);
|
||||
}
|
||||
|
|
@ -1118,7 +1137,7 @@ set_uncompacted_3src_control_index(const struct brw_device_info *devinfo,
|
|||
{
|
||||
assert(devinfo->gen >= 8);
|
||||
|
||||
uint32_t compacted = brw_compact_inst_3src_control_index(src);
|
||||
uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
|
||||
uint32_t uncompacted = gen8_3src_control_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
|
||||
|
|
@ -1134,7 +1153,7 @@ set_uncompacted_3src_source_index(const struct brw_device_info *devinfo,
|
|||
{
|
||||
assert(devinfo->gen >= 8);
|
||||
|
||||
uint32_t compacted = brw_compact_inst_3src_source_index(src);
|
||||
uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
|
||||
uint64_t uncompacted = gen8_3src_source_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
|
||||
|
|
@ -1160,7 +1179,7 @@ brw_uncompact_3src_instruction(const struct brw_device_info *devinfo,
|
|||
assert(devinfo->gen >= 8);
|
||||
|
||||
#define uncompact(field) \
|
||||
brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src))
|
||||
brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
|
||||
|
||||
uncompact(opcode);
|
||||
|
||||
|
|
@ -1190,13 +1209,16 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
|
|||
{
|
||||
memset(dst, 0, sizeof(*dst));
|
||||
|
||||
if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
|
||||
if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(devinfo, src))) {
|
||||
brw_uncompact_3src_instruction(devinfo, dst, src);
|
||||
return;
|
||||
}
|
||||
|
||||
brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src));
|
||||
brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src));
|
||||
#define uncompact(field) \
|
||||
brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
|
||||
|
||||
uncompact(opcode);
|
||||
uncompact(debug_control);
|
||||
|
||||
set_uncompacted_control(devinfo, dst, src);
|
||||
set_uncompacted_datatype(devinfo, dst, src);
|
||||
|
|
@ -1206,22 +1228,36 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
|
|||
brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
|
||||
|
||||
set_uncompacted_subreg(devinfo, dst, src);
|
||||
brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src));
|
||||
brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src));
|
||||
|
||||
if (devinfo->gen >= 6) {
|
||||
uncompact(acc_wr_control);
|
||||
} else {
|
||||
uncompact(mask_control_ex);
|
||||
}
|
||||
|
||||
uncompact(cond_modifier);
|
||||
|
||||
if (devinfo->gen <= 6)
|
||||
brw_inst_set_flag_subreg_nr(devinfo, dst,
|
||||
brw_compact_inst_flag_subreg_nr(src));
|
||||
uncompact(flag_subreg_nr);
|
||||
|
||||
set_uncompacted_src0(devinfo, dst, src);
|
||||
set_uncompacted_src1(devinfo, dst, src, is_immediate);
|
||||
brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src));
|
||||
brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src));
|
||||
|
||||
brw_inst_set_dst_da_reg_nr(devinfo, dst,
|
||||
brw_compact_inst_dst_reg_nr(devinfo, src));
|
||||
brw_inst_set_src0_da_reg_nr(devinfo, dst,
|
||||
brw_compact_inst_src0_reg_nr(devinfo, src));
|
||||
|
||||
if (is_immediate) {
|
||||
brw_inst_set_imm_ud(devinfo, dst,
|
||||
brw_inst_imm_ud(devinfo, dst) |
|
||||
brw_compact_inst_src1_reg_nr(src));
|
||||
brw_compact_inst_src1_reg_nr(devinfo, src));
|
||||
} else {
|
||||
brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src));
|
||||
brw_inst_set_src1_da_reg_nr(devinfo, dst,
|
||||
brw_compact_inst_src1_reg_nr(devinfo, src));
|
||||
}
|
||||
|
||||
#undef uncompact
|
||||
}
|
||||
|
||||
void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
|
||||
|
|
@ -1415,8 +1451,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
|
|||
if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
|
||||
brw_compact_inst *align = store + offset;
|
||||
memset(align, 0, sizeof(*align));
|
||||
brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP);
|
||||
brw_compact_inst_set_cmpt_control(align, true);
|
||||
brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP);
|
||||
brw_compact_inst_set_cmpt_control(devinfo, align, true);
|
||||
offset += sizeof(brw_compact_inst);
|
||||
compacted_count--;
|
||||
compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
|
||||
|
|
@ -1524,8 +1560,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
|
|||
if (p->next_insn_offset & sizeof(brw_compact_inst)) {
|
||||
brw_compact_inst *align = store + offset;
|
||||
memset(align, 0, sizeof(*align));
|
||||
brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
|
||||
brw_compact_inst_set_cmpt_control(align, true);
|
||||
brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP);
|
||||
brw_compact_inst_set_cmpt_control(devinfo, align, true);
|
||||
p->next_insn_offset += sizeof(brw_compact_inst);
|
||||
}
|
||||
p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
|
||||
|
|
|
|||
|
|
@ -281,6 +281,10 @@ fs_inst::is_send_from_grf() const
|
|||
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||
return true;
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||
return src[1].file == GRF;
|
||||
|
|
@ -782,6 +786,10 @@ fs_inst::regs_read(int arg) const
|
|||
switch (opcode) {
|
||||
case FS_OPCODE_FB_WRITE:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||
|
|
@ -911,6 +919,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
|
|||
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||
|
|
@ -2239,13 +2250,15 @@ fs_visitor::opt_sampler_eot()
|
|||
if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex())
|
||||
return false;
|
||||
|
||||
/* This optimisation doesn't seem to work for textureGather for some
|
||||
* reason. I can't find any documentation or known workarounds to indicate
|
||||
* that this is expected, but considering that it is probably pretty
|
||||
* unlikely that a shader would directly write out the results from
|
||||
* textureGather we might as well just disable it.
|
||||
/* 3D Sampler » Messages » Message Format
|
||||
*
|
||||
* “Response Length of zero is allowed on all SIMD8* and SIMD16* sampler
|
||||
* messages except sample+killpix, resinfo, sampleinfo, LOD, and gather4*”
|
||||
*/
|
||||
if (tex_inst->opcode == SHADER_OPCODE_TG4 ||
|
||||
if (tex_inst->opcode == SHADER_OPCODE_TXS ||
|
||||
tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO ||
|
||||
tex_inst->opcode == SHADER_OPCODE_LOD ||
|
||||
tex_inst->opcode == SHADER_OPCODE_TG4 ||
|
||||
tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET)
|
||||
return false;
|
||||
|
||||
|
|
@ -2457,7 +2470,7 @@ fs_visitor::compute_to_mrf()
|
|||
/* Found a move of a GRF to a MRF. Let's see if we can go
|
||||
* rewrite the thing that made this GRF to write into the MRF.
|
||||
*/
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (scan_inst->dst.file == GRF &&
|
||||
scan_inst->dst.reg == inst->src[0].reg) {
|
||||
/* Found the last thing to write our reg we want to turn
|
||||
|
|
@ -2805,7 +2818,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
|
|||
* we assume that there are no outstanding dependencies on entry to the
|
||||
* program.
|
||||
*/
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
/* If we hit control flow, assume that there *are* outstanding
|
||||
* dependencies, and force their cleanup before our instruction.
|
||||
*/
|
||||
|
|
@ -2871,7 +2884,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
|
|||
/* Walk forwards looking for writes to registers we're writing which aren't
|
||||
* read before being written.
|
||||
*/
|
||||
foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst, block) {
|
||||
foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst) {
|
||||
/* If we hit control flow, force resolve all remaining dependencies. */
|
||||
if (block->end() == scan_inst) {
|
||||
for (int i = 0; i < write_len; i++) {
|
||||
|
|
|
|||
|
|
@ -62,6 +62,8 @@ namespace brw {
|
|||
class fs_live_variables;
|
||||
}
|
||||
|
||||
struct brw_gs_compile;
|
||||
|
||||
static inline fs_reg
|
||||
offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
|
||||
{
|
||||
|
|
@ -99,7 +101,12 @@ public:
|
|||
const nir_shader *shader,
|
||||
unsigned dispatch_width,
|
||||
int shader_time_index);
|
||||
|
||||
fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
struct brw_gs_compile *gs_compile,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
const nir_shader *shader);
|
||||
void init();
|
||||
~fs_visitor();
|
||||
|
||||
fs_reg vgrf(const glsl_type *const type);
|
||||
|
|
@ -298,6 +305,8 @@ public:
|
|||
const void *const key;
|
||||
const struct brw_sampler_prog_key_data *key_tex;
|
||||
|
||||
struct brw_gs_compile *gs_compile;
|
||||
|
||||
struct brw_stage_prog_data *prog_data;
|
||||
struct gl_program *prog;
|
||||
|
||||
|
|
@ -415,6 +424,7 @@ private:
|
|||
struct brw_reg implied_header,
|
||||
GLuint nr);
|
||||
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
|
||||
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
|
||||
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
|
||||
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
|
||||
void generate_barrier(fs_inst *inst, struct brw_reg src);
|
||||
|
|
|
|||
|
|
@ -87,8 +87,7 @@ opt_cmod_propagation_local(bblock_t *block)
|
|||
continue;
|
||||
|
||||
bool read_flag = false;
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst,
|
||||
block) {
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (scan_inst->overwrites_reg(inst->src[0])) {
|
||||
if (scan_inst->is_partial_write() ||
|
||||
scan_inst->dst.reg_offset != inst->src[0].reg_offset)
|
||||
|
|
|
|||
|
|
@ -354,6 +354,28 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_urb_read(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg header)
|
||||
{
|
||||
assert(header.file == BRW_GENERAL_REGISTER_FILE);
|
||||
assert(header.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_set_dest(p, send, dst);
|
||||
brw_set_src0(p, send, header);
|
||||
brw_set_src1(p, send, brw_imm_ud(0u));
|
||||
|
||||
brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
|
||||
brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
|
||||
|
||||
brw_inst_set_mlen(p->devinfo, send, inst->mlen);
|
||||
brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
|
||||
brw_inst_set_header_present(p->devinfo, send, true);
|
||||
brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
|
||||
{
|
||||
|
|
@ -368,6 +390,14 @@ fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
|
|||
brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
|
||||
brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
|
||||
inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
|
||||
brw_inst_set_urb_per_slot_offset(p->devinfo, insn, true);
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
|
||||
inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
|
||||
brw_inst_set_urb_channel_mask_present(p->devinfo, insn, true);
|
||||
|
||||
brw_inst_set_mlen(p->devinfo, insn, inst->mlen);
|
||||
brw_inst_set_rlen(p->devinfo, insn, 0);
|
||||
brw_inst_set_eot(p->devinfo, insn, inst->eot);
|
||||
|
|
@ -2001,7 +2031,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||
fill_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||
generate_urb_read(inst, dst, src[0]);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
generate_urb_write(inst, src[0]);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include "brw_fs_surface_builder.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_fs_surface_builder.h"
|
||||
#include "brw_vec4_gs_visitor.h"
|
||||
|
||||
using namespace brw;
|
||||
using namespace brw::surface_access;
|
||||
|
|
@ -188,6 +189,18 @@ emit_system_values_block(nir_block *block, void *void_visitor)
|
|||
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
assert(v->stage == MESA_SHADER_GEOMETRY);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
|
||||
if (reg->file == BAD_FILE) {
|
||||
const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL);
|
||||
fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
||||
abld.SHR(iid, g1, fs_reg(27u));
|
||||
*reg = iid;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
assert(v->stage == MESA_SHADER_FRAGMENT);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
|
||||
|
|
@ -1367,9 +1380,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
case nir_intrinsic_load_vertex_id:
|
||||
unreachable("should be lowered by lower_vertex_id()");
|
||||
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
assert(stage == MESA_SHADER_GEOMETRY);
|
||||
assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
|
||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
|
||||
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_vertex_id_zero_base:
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
case nir_intrinsic_load_sample_mask_in:
|
||||
case nir_intrinsic_load_sample_id: {
|
||||
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
|
|||
int src_end_ip = v->live_intervals->end[src_var];
|
||||
|
||||
bool interfered = false;
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (scan_inst->overwrites_reg(inst->src[0])) {
|
||||
if (scan_inst->is_partial_write() ||
|
||||
(scan_inst->dst.type != inst->dst.type &&
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@
|
|||
#include "brw_wm.h"
|
||||
#include "brw_cs.h"
|
||||
#include "brw_vec4.h"
|
||||
#include "brw_vec4_gs_visitor.h"
|
||||
#include "brw_fs.h"
|
||||
#include "main/uniforms.h"
|
||||
#include "glsl/nir/glsl_types.h"
|
||||
|
|
@ -868,13 +869,14 @@ void
|
|||
fs_visitor::emit_urb_writes()
|
||||
{
|
||||
int slot, urb_offset, length;
|
||||
struct brw_vs_prog_data *vs_prog_data =
|
||||
(struct brw_vs_prog_data *) prog_data;
|
||||
const struct brw_vs_prog_key *key =
|
||||
int starting_urb_offset = 0;
|
||||
const struct brw_vue_prog_data *vue_prog_data =
|
||||
(const struct brw_vue_prog_data *) this->prog_data;
|
||||
const struct brw_vs_prog_key *vs_key =
|
||||
(const struct brw_vs_prog_key *) this->key;
|
||||
const GLbitfield64 psiz_mask =
|
||||
VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ;
|
||||
const struct brw_vue_map *vue_map = &vs_prog_data->base.vue_map;
|
||||
const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
|
||||
bool flush;
|
||||
fs_reg sources[8];
|
||||
|
||||
|
|
@ -900,8 +902,21 @@ fs_visitor::emit_urb_writes()
|
|||
return;
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_GEOMETRY) {
|
||||
const struct brw_gs_prog_data *gs_prog_data =
|
||||
(const struct brw_gs_prog_data *) prog_data;
|
||||
|
||||
/* We need to increment the Global Offset to skip over the control data
|
||||
* header and the extra "Vertex Count" field (1 HWord) at the beginning
|
||||
* of the VUE. We're counting in OWords, so the units are doubled.
|
||||
*/
|
||||
starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords;
|
||||
if (gs_prog_data->static_vertex_count == -1)
|
||||
starting_urb_offset += 2;
|
||||
}
|
||||
|
||||
length = 0;
|
||||
urb_offset = 0;
|
||||
urb_offset = starting_urb_offset;
|
||||
flush = false;
|
||||
for (slot = 0; slot < vue_map->num_slots; slot++) {
|
||||
int varying = vue_map->slot_to_varying[slot];
|
||||
|
|
@ -961,11 +976,11 @@ fs_visitor::emit_urb_writes()
|
|||
break;
|
||||
}
|
||||
|
||||
if ((varying == VARYING_SLOT_COL0 ||
|
||||
if (stage == MESA_SHADER_VERTEX && vs_key->clamp_vertex_color &&
|
||||
(varying == VARYING_SLOT_COL0 ||
|
||||
varying == VARYING_SLOT_COL1 ||
|
||||
varying == VARYING_SLOT_BFC0 ||
|
||||
varying == VARYING_SLOT_BFC1) &&
|
||||
key->clamp_vertex_color) {
|
||||
varying == VARYING_SLOT_BFC1)) {
|
||||
/* We need to clamp these guys, so do a saturating MOV into a
|
||||
* temp register and use that for the payload.
|
||||
*/
|
||||
|
|
@ -1005,10 +1020,10 @@ fs_visitor::emit_urb_writes()
|
|||
|
||||
fs_inst *inst =
|
||||
abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
|
||||
inst->eot = last;
|
||||
inst->eot = last && stage == MESA_SHADER_VERTEX;
|
||||
inst->mlen = length + 1;
|
||||
inst->offset = urb_offset;
|
||||
urb_offset = slot + 1;
|
||||
urb_offset = starting_urb_offset + slot + 1;
|
||||
length = 0;
|
||||
flush = false;
|
||||
}
|
||||
|
|
@ -1071,11 +1086,33 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
|||
unsigned dispatch_width,
|
||||
int shader_time_index)
|
||||
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
|
||||
key(key), prog_data(prog_data), prog(prog),
|
||||
key(key), gs_compile(NULL), prog_data(prog_data), prog(prog),
|
||||
dispatch_width(dispatch_width),
|
||||
shader_time_index(shader_time_index),
|
||||
promoted_constants(0),
|
||||
bld(fs_builder(this, dispatch_width).at_end())
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
struct brw_gs_compile *c,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
const nir_shader *shader)
|
||||
: backend_shader(compiler, log_data, mem_ctx, shader,
|
||||
&prog_data->base.base),
|
||||
key(&c->key), gs_compile(c),
|
||||
prog_data(&prog_data->base.base), prog(NULL),
|
||||
dispatch_width(8),
|
||||
shader_time_index(ST_GS),
|
||||
bld(fs_builder(this, dispatch_width).at_end())
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
fs_visitor::init()
|
||||
{
|
||||
switch (stage) {
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
|
|
@ -1094,6 +1131,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
|||
unreachable("unhandled shader stage");
|
||||
}
|
||||
|
||||
this->prog_data = this->stage_prog_data;
|
||||
|
||||
this->failed = false;
|
||||
this->simd16_unsupported = false;
|
||||
this->no16_msg = NULL;
|
||||
|
|
@ -1119,6 +1158,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
|||
this->pull_constant_loc = NULL;
|
||||
this->push_constant_loc = NULL;
|
||||
|
||||
this->promoted_constants = 0,
|
||||
|
||||
this->spilled_any_registers = false;
|
||||
this->do_dual_src = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,20 +57,14 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
struct brw_geometry_program *gp,
|
||||
struct brw_gs_prog_key *key)
|
||||
{
|
||||
struct brw_compiler *compiler = brw->intelScreen->compiler;
|
||||
struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
struct brw_stage_state *stage_state = &brw->gs.base;
|
||||
struct brw_gs_compile c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
c.key = *key;
|
||||
c.gp = gp;
|
||||
|
||||
c.prog_data.include_primitive_id =
|
||||
(gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
|
||||
|
||||
c.prog_data.invocations = gp->program.Invocations;
|
||||
struct brw_gs_prog_data prog_data;
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
|
||||
&gp->program.Base, &c.prog_data);
|
||||
&gp->program.Base, &prog_data);
|
||||
|
||||
/* Allocate the references to the uniforms that will end up in the
|
||||
* prog_data associated with the compiled program, and which will be freed
|
||||
|
|
@ -83,215 +77,24 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
int param_count = gp->program.Base.nir->num_uniforms * 4;
|
||||
|
||||
c.prog_data.base.base.param =
|
||||
prog_data.base.base.param =
|
||||
rzalloc_array(NULL, const gl_constant_value *, param_count);
|
||||
c.prog_data.base.base.pull_param =
|
||||
prog_data.base.base.pull_param =
|
||||
rzalloc_array(NULL, const gl_constant_value *, param_count);
|
||||
c.prog_data.base.base.image_param =
|
||||
prog_data.base.base.image_param =
|
||||
rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
|
||||
c.prog_data.base.base.nr_params = param_count;
|
||||
c.prog_data.base.base.nr_image_params = gs->NumImages;
|
||||
prog_data.base.base.nr_params = param_count;
|
||||
prog_data.base.base.nr_image_params = gs->NumImages;
|
||||
|
||||
brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
|
||||
&c.prog_data.base.base, false);
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
|
||||
nir_gs_count_vertices(gp->program.Base.nir);
|
||||
}
|
||||
|
||||
if (brw->gen >= 7) {
|
||||
if (gp->program.OutputType == GL_POINTS) {
|
||||
/* When the output type is points, the geometry shader may output data
|
||||
* to multiple streams, and EndPrimitive() has no effect. So we
|
||||
* configure the hardware to interpret the control data as stream ID.
|
||||
*/
|
||||
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
|
||||
|
||||
/* We only have to emit control bits if we are using streams */
|
||||
if (prog->Geom.UsesStreams)
|
||||
c.control_data_bits_per_vertex = 2;
|
||||
else
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
} else {
|
||||
/* When the output type is triangle_strip or line_strip, EndPrimitive()
|
||||
* may be used to terminate the current strip and start a new one
|
||||
* (similar to primitive restart), and outputting data to multiple
|
||||
* streams is not supported. So we configure the hardware to interpret
|
||||
* the control data as EndPrimitive information (a.k.a. "cut bits").
|
||||
*/
|
||||
c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
|
||||
|
||||
/* We only need to output control data if the shader actually calls
|
||||
* EndPrimitive().
|
||||
*/
|
||||
c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
/* There are no control data bits in gen6. */
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
|
||||
/* If it is using transform feedback, enable it */
|
||||
if (prog->TransformFeedback.NumVarying)
|
||||
c.prog_data.gen6_xfb_enabled = true;
|
||||
else
|
||||
c.prog_data.gen6_xfb_enabled = false;
|
||||
}
|
||||
c.control_data_header_size_bits =
|
||||
gp->program.VerticesOut * c.control_data_bits_per_vertex;
|
||||
|
||||
/* 1 HWORD = 32 bytes = 256 bits */
|
||||
c.prog_data.control_data_header_size_hwords =
|
||||
ALIGN(c.control_data_header_size_bits, 256) / 256;
|
||||
&prog_data.base.base, compiler->scalar_gs);
|
||||
|
||||
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
|
||||
|
||||
brw_compute_vue_map(brw->intelScreen->devinfo,
|
||||
&c.prog_data.base.vue_map, outputs_written,
|
||||
&prog_data.base.vue_map, outputs_written,
|
||||
prog ? prog->SeparateShader : false);
|
||||
|
||||
/* Compute the output vertex size.
|
||||
*
|
||||
* From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
|
||||
* Size (p168):
|
||||
*
|
||||
* [0,62] indicating [1,63] 16B units
|
||||
*
|
||||
* Specifies the size of each vertex stored in the GS output entry
|
||||
* (following any Control Header data) as a number of 128-bit units
|
||||
* (minus one).
|
||||
*
|
||||
* Programming Restrictions: The vertex size must be programmed as a
|
||||
* multiple of 32B units with the following exception: Rendering is
|
||||
* disabled (as per SOL stage state) and the vertex size output by the
|
||||
* GS thread is 16B.
|
||||
*
|
||||
* If rendering is enabled (as per SOL state) the vertex size must be
|
||||
* programmed as a multiple of 32B units. In other words, the only time
|
||||
* software can program a vertex size with an odd number of 16B units
|
||||
* is when rendering is disabled.
|
||||
*
|
||||
* Note: B=bytes in the above text.
|
||||
*
|
||||
* It doesn't seem worth the extra trouble to optimize the case where the
|
||||
* vertex size is 16B (especially since this would require special-casing
|
||||
* the GEN assembly that writes to the URB). So we just set the vertex
|
||||
* size to a multiple of 32B (2 vec4's) in all cases.
|
||||
*
|
||||
* The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
|
||||
* budget that as follows:
|
||||
*
|
||||
* 512 bytes for varyings (a varying component is 4 bytes and
|
||||
* gl_MaxGeometryOutputComponents = 128)
|
||||
* 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
|
||||
* bytes)
|
||||
* 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
|
||||
* even if it's not used)
|
||||
* 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
|
||||
* whenever clip planes are enabled, even if the shader doesn't
|
||||
* write to gl_ClipDistance)
|
||||
* 16 bytes overhead since the VUE size must be a multiple of 32 bytes
|
||||
* (see below)--this causes up to 1 VUE slot to be wasted
|
||||
* 400 bytes available for varying packing overhead
|
||||
*
|
||||
* Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
|
||||
* per interpolation type, so this is plenty.
|
||||
*
|
||||
*/
|
||||
unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
|
||||
assert(brw->gen == 6 ||
|
||||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
|
||||
c.prog_data.output_vertex_size_hwords =
|
||||
ALIGN(output_vertex_size_bytes, 32) / 32;
|
||||
|
||||
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
|
||||
* That divides up as follows:
|
||||
*
|
||||
* 64 bytes for the control data header (cut indices or StreamID bits)
|
||||
* 4096 bytes for varyings (a varying component is 4 bytes and
|
||||
* gl_MaxGeometryTotalOutputComponents = 1024)
|
||||
* 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
|
||||
* bytes/vertex and gl_MaxGeometryOutputVertices is 256)
|
||||
* 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
|
||||
* even if it's not used)
|
||||
* 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
|
||||
* whenever clip planes are enabled, even if the shader doesn't
|
||||
* write to gl_ClipDistance)
|
||||
* 4096 bytes overhead since the VUE size must be a multiple of 32
|
||||
* bytes (see above)--this causes up to 1 VUE slot to be wasted
|
||||
* 8128 bytes available for varying packing overhead
|
||||
*
|
||||
* Worst-case varying packing overhead is 3/4 of a varying slot per
|
||||
* interpolation type, which works out to 3072 bytes, so this would allow
|
||||
* us to accommodate 2 interpolation types without any danger of running
|
||||
* out of URB space.
|
||||
*
|
||||
* In practice, the risk of running out of URB space is very small, since
|
||||
* the above figures are all worst-case, and most of them scale with the
|
||||
* number of output vertices. So we'll just calculate the amount of space
|
||||
* we need, and if it's too large, fail to compile.
|
||||
*
|
||||
* The above is for gen7+ where we have a single URB entry that will hold
|
||||
* all the output. In gen6, we will have to allocate URB entries for every
|
||||
* vertex we emit, so our URB entries only need to be large enough to hold
|
||||
* a single vertex. Also, gen6 does not have a control data header.
|
||||
*/
|
||||
unsigned output_size_bytes;
|
||||
if (brw->gen >= 7) {
|
||||
output_size_bytes =
|
||||
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
|
||||
output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
|
||||
} else {
|
||||
output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
|
||||
}
|
||||
|
||||
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
|
||||
* which comes before the control header.
|
||||
*/
|
||||
if (brw->gen >= 8)
|
||||
output_size_bytes += 32;
|
||||
|
||||
assert(output_size_bytes >= 1);
|
||||
int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
|
||||
if (brw->gen == 6)
|
||||
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
|
||||
if (output_size_bytes > max_output_size_bytes)
|
||||
return false;
|
||||
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
|
||||
* a multiple of 128 bytes in gen6.
|
||||
*/
|
||||
if (brw->gen >= 7)
|
||||
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
else
|
||||
c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
|
||||
|
||||
c.prog_data.output_topology =
|
||||
get_hw_prim_for_gl_prim(gp->program.OutputType);
|
||||
|
||||
/* The GLSL linker will have already matched up GS inputs and the outputs
|
||||
* of prior stages. The driver does extend VS outputs in some cases, but
|
||||
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
|
||||
* geometry shader support. So we can safely ignore that.
|
||||
*
|
||||
* For SSO pipelines, we use a fixed VUE map layout based on variable
|
||||
* locations, so we can rely on rendezvous-by-location making this work.
|
||||
*
|
||||
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
|
||||
* written by previous stages and shows up via payload magic.
|
||||
*/
|
||||
GLbitfield64 inputs_read =
|
||||
gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
|
||||
brw_compute_vue_map(brw->intelScreen->devinfo,
|
||||
&c.input_vue_map, inputs_read,
|
||||
prog->SeparateShader);
|
||||
|
||||
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
|
||||
* need to program a URB read length of ceiling(num_slots / 2).
|
||||
*/
|
||||
c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_GS))
|
||||
brw_dump_ir("geometry", prog, gs, NULL);
|
||||
|
||||
|
|
@ -303,25 +106,25 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
unsigned program_size;
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
brw_compile_gs(brw->intelScreen->compiler, brw, &c,
|
||||
shader->Program->nir, prog,
|
||||
mem_ctx, st_index, &program_size, &error_str);
|
||||
brw_compile_gs(brw->intelScreen->compiler, brw, mem_ctx, key,
|
||||
&prog_data, shader->Program->nir, prog,
|
||||
st_index, &program_size, &error_str);
|
||||
if (program == NULL) {
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (c.prog_data.base.base.total_scratch) {
|
||||
if (prog_data.base.base.total_scratch) {
|
||||
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
|
||||
c.prog_data.base.base.total_scratch *
|
||||
prog_data.base.base.total_scratch *
|
||||
brw->max_gs_threads);
|
||||
}
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
|
||||
&c.key, sizeof(c.key),
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
&c.prog_data, sizeof(c.prog_data),
|
||||
&prog_data, sizeof(prog_data),
|
||||
&stage_state->prog_offset, &brw->gs.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,8 @@ F(saturate, 31, 31)
|
|||
F(debug_control, 30, 30)
|
||||
F(cmpt_control, 29, 29)
|
||||
FC(branch_control, 28, 28, devinfo->gen >= 8)
|
||||
F(acc_wr_control, 28, 28)
|
||||
FC(acc_wr_control, 28, 28, devinfo->gen >= 6)
|
||||
FC(mask_control_ex, 28, 28, devinfo->is_g4x || devinfo->gen == 5)
|
||||
F(cond_modifier, 27, 24)
|
||||
FC(math_function, 27, 24, devinfo->gen >= 6)
|
||||
F(exec_size, 23, 21)
|
||||
|
|
@ -392,6 +393,7 @@ FF(urb_per_slot_offset,
|
|||
/* 4-6: */ -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
/* 7: */ MD(16), MD(16),
|
||||
/* 8: */ MD(17), MD(17))
|
||||
FC(urb_channel_mask_present, MD(15), MD(15), devinfo->gen >= 8)
|
||||
FC(urb_complete, MD(15), MD(15), devinfo->gen < 8)
|
||||
FC(urb_used, MD(14), MD(14), devinfo->gen < 7)
|
||||
FC(urb_allocate, MD(13), MD(13), devinfo->gen < 7)
|
||||
|
|
@ -738,7 +740,7 @@ typedef struct {
|
|||
* Bits indices range from 0..63.
|
||||
*/
|
||||
static inline unsigned
|
||||
brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low)
|
||||
brw_compact_inst_bits(const brw_compact_inst *inst, unsigned high, unsigned low)
|
||||
{
|
||||
const uint64_t mask = (1ull << (high - low + 1)) - 1;
|
||||
|
||||
|
|
@ -762,56 +764,65 @@ brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low,
|
|||
inst->data = (inst->data & ~mask) | (value << low);
|
||||
}
|
||||
|
||||
#define F(name, high, low) \
|
||||
static inline void \
|
||||
brw_compact_inst_set_##name(brw_compact_inst *inst, unsigned v) \
|
||||
{ \
|
||||
brw_compact_inst_set_bits(inst, high, low, v); \
|
||||
} \
|
||||
\
|
||||
static inline unsigned \
|
||||
brw_compact_inst_##name(brw_compact_inst *inst) \
|
||||
{ \
|
||||
return brw_compact_inst_bits(inst, high, low); \
|
||||
#define FC(name, high, low, assertions) \
|
||||
static inline void \
|
||||
brw_compact_inst_set_##name(const struct brw_device_info *devinfo, \
|
||||
brw_compact_inst *inst, unsigned v) \
|
||||
{ \
|
||||
assert(assertions); \
|
||||
(void) devinfo; \
|
||||
brw_compact_inst_set_bits(inst, high, low, v); \
|
||||
} \
|
||||
static inline unsigned \
|
||||
brw_compact_inst_##name(const struct brw_device_info *devinfo, \
|
||||
const brw_compact_inst *inst) \
|
||||
{ \
|
||||
assert(assertions); \
|
||||
(void) devinfo; \
|
||||
return brw_compact_inst_bits(inst, high, low); \
|
||||
}
|
||||
|
||||
F(src1_reg_nr, 63, 56)
|
||||
F(src0_reg_nr, 55, 48)
|
||||
F(dst_reg_nr, 47, 40)
|
||||
F(src1_index, 39, 35)
|
||||
F(src0_index, 34, 30)
|
||||
F(cmpt_control, 29, 29) /* Same location as brw_inst */
|
||||
F(flag_subreg_nr, 28, 28) /* <= Gen6 only */
|
||||
F(cond_modifier, 27, 24) /* Same location as brw_inst */
|
||||
F(acc_wr_control, 23, 23)
|
||||
F(subreg_index, 22, 18)
|
||||
F(datatype_index, 17, 13)
|
||||
F(control_index, 12, 8)
|
||||
F(debug_control, 7, 7)
|
||||
F(opcode, 6, 0) /* Same location as brw_inst */
|
||||
/* A simple macro for fields which stay in the same place on all generations. */
|
||||
#define F(name, high, low) FC(name, high, low, true)
|
||||
|
||||
F(src1_reg_nr, 63, 56)
|
||||
F(src0_reg_nr, 55, 48)
|
||||
F(dst_reg_nr, 47, 40)
|
||||
F(src1_index, 39, 35)
|
||||
F(src0_index, 34, 30)
|
||||
F(cmpt_control, 29, 29) /* Same location as brw_inst */
|
||||
FC(flag_subreg_nr, 28, 28, devinfo->gen <= 6)
|
||||
F(cond_modifier, 27, 24) /* Same location as brw_inst */
|
||||
FC(acc_wr_control, 23, 23, devinfo->gen >= 6)
|
||||
FC(mask_control_ex, 23, 23, devinfo->is_g4x || devinfo->gen == 5)
|
||||
F(subreg_index, 22, 18)
|
||||
F(datatype_index, 17, 13)
|
||||
F(control_index, 12, 8)
|
||||
F(debug_control, 7, 7)
|
||||
F(opcode, 6, 0) /* Same location as brw_inst */
|
||||
|
||||
/**
|
||||
* (Gen8+) Compacted three-source instructions:
|
||||
* @{
|
||||
*/
|
||||
F(3src_src2_reg_nr, 63, 57)
|
||||
F(3src_src1_reg_nr, 56, 50)
|
||||
F(3src_src0_reg_nr, 49, 43)
|
||||
F(3src_src2_subreg_nr, 42, 40)
|
||||
F(3src_src1_subreg_nr, 39, 37)
|
||||
F(3src_src0_subreg_nr, 36, 34)
|
||||
F(3src_src2_rep_ctrl, 33, 33)
|
||||
F(3src_src1_rep_ctrl, 32, 32)
|
||||
F(3src_saturate, 31, 31)
|
||||
F(3src_debug_control, 30, 30)
|
||||
F(3src_cmpt_control, 29, 29)
|
||||
F(3src_src0_rep_ctrl, 28, 28)
|
||||
FC(3src_src2_reg_nr, 63, 57, devinfo->gen >= 8)
|
||||
FC(3src_src1_reg_nr, 56, 50, devinfo->gen >= 8)
|
||||
FC(3src_src0_reg_nr, 49, 43, devinfo->gen >= 8)
|
||||
FC(3src_src2_subreg_nr, 42, 40, devinfo->gen >= 8)
|
||||
FC(3src_src1_subreg_nr, 39, 37, devinfo->gen >= 8)
|
||||
FC(3src_src0_subreg_nr, 36, 34, devinfo->gen >= 8)
|
||||
FC(3src_src2_rep_ctrl, 33, 33, devinfo->gen >= 8)
|
||||
FC(3src_src1_rep_ctrl, 32, 32, devinfo->gen >= 8)
|
||||
FC(3src_saturate, 31, 31, devinfo->gen >= 8)
|
||||
FC(3src_debug_control, 30, 30, devinfo->gen >= 8)
|
||||
FC(3src_cmpt_control, 29, 29, devinfo->gen >= 8)
|
||||
FC(3src_src0_rep_ctrl, 28, 28, devinfo->gen >= 8)
|
||||
/* Reserved */
|
||||
F(3src_dst_reg_nr, 18, 12)
|
||||
F(3src_source_index, 11, 10)
|
||||
F(3src_control_index, 9, 8)
|
||||
FC(3src_dst_reg_nr, 18, 12, devinfo->gen >= 8)
|
||||
FC(3src_source_index, 11, 10, devinfo->gen >= 8)
|
||||
FC(3src_control_index, 9, 8, devinfo->gen >= 8)
|
||||
/* Bit 7 is Reserved (for future Opcode expansion) */
|
||||
F(3src_opcode, 6, 0)
|
||||
FC(3src_opcode, 6, 0, devinfo->gen >= 8)
|
||||
/** @} */
|
||||
|
||||
#undef F
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
|
|||
if (prog) {
|
||||
prog->id = get_new_program_id(brw->intelScreen);
|
||||
|
||||
return _mesa_init_gl_program(&prog->program, target, id);
|
||||
return _mesa_init_gl_program(&prog->program.Base, target, id);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,6 +79,8 @@ is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
|
|||
case MESA_SHADER_FRAGMENT:
|
||||
case MESA_SHADER_COMPUTE:
|
||||
return true;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
return compiler->scalar_gs;
|
||||
case MESA_SHADER_VERTEX:
|
||||
return compiler->scalar_vs;
|
||||
default:
|
||||
|
|
@ -101,6 +103,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
|||
if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
|
||||
compiler->scalar_vs = true;
|
||||
|
||||
if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false))
|
||||
compiler->scalar_gs = true;
|
||||
|
||||
nir_shader_compiler_options *nir_options =
|
||||
rzalloc(compiler, nir_shader_compiler_options);
|
||||
nir_options->native_integers = true;
|
||||
|
|
@ -411,6 +416,14 @@ brw_instruction_name(enum opcode op)
|
|||
return "gen7_scratch_read";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
return "gen8_urb_write_simd8";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
return "gen8_urb_write_simd8_per_slot";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
return "gen8_urb_write_simd8_masked";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
return "gen8_urb_write_simd8_masked_per_slot";
|
||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||
return "urb_read_simd8";
|
||||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
return "find_live_channel";
|
||||
|
|
@ -964,6 +977,9 @@ backend_instruction::has_side_effects() const
|
|||
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
case FS_OPCODE_FB_WRITE:
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -233,6 +233,18 @@ bool opt_predicated_break(struct backend_shader *s);
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Scratch data used when compiling a GLSL geometry shader.
|
||||
*/
|
||||
struct brw_gs_compile
|
||||
{
|
||||
struct brw_gs_prog_key key;
|
||||
struct brw_vue_map input_vue_map;
|
||||
|
||||
unsigned control_data_bits_per_vertex;
|
||||
unsigned control_data_header_size_bits;
|
||||
};
|
||||
|
||||
void
|
||||
brw_assign_common_binding_table_offsets(gl_shader_stage stage,
|
||||
const struct brw_device_info *devinfo,
|
||||
|
|
|
|||
|
|
@ -40,36 +40,32 @@
|
|||
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
|
||||
|
||||
static unsigned int
|
||||
tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
|
||||
const struct intel_mipmap_tree *mt)
|
||||
tr_mode_horizontal_texture_alignment(const struct intel_mipmap_tree *mt)
|
||||
{
|
||||
const unsigned *align_yf, *align_ys;
|
||||
const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
|
||||
unsigned ret_align, divisor;
|
||||
unsigned ret_align, divisor, multiplier_ys;
|
||||
|
||||
/* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below
|
||||
* tables specifies the horizontal alignment requirement in elements
|
||||
* for the surface. An element is defined as a pixel in uncompressed
|
||||
* surface formats, and as a compression block in compressed surface
|
||||
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
|
||||
/* Values in below tables specifiy the horizontal alignment requirement
|
||||
* in elements for TRMODE_YF surface. An element is defined as a pixel in
|
||||
* uncompressed surface formats, and as a compression block in compressed
|
||||
* surface formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
|
||||
* element is a sample.
|
||||
*/
|
||||
const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256};
|
||||
const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096};
|
||||
const unsigned align_2d_yf[] = {64, 64, 32, 32, 16};
|
||||
const unsigned align_2d_ys[] = {256, 256, 128, 128, 64};
|
||||
const unsigned align_3d_yf[] = {16, 8, 8, 8, 4};
|
||||
const unsigned align_3d_ys[] = {64, 32, 32, 32, 16};
|
||||
int i = 0;
|
||||
|
||||
/* Alignment computations below assume bpp >= 8 and a power of 2. */
|
||||
assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
|
||||
assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
|
||||
|
||||
/* Alignment computations below assume a power of 2 cpp. */
|
||||
assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp));
|
||||
/* Compute array index. */
|
||||
const int i = ffs(mt->cpp) - 1;
|
||||
|
||||
switch(mt->target) {
|
||||
case GL_TEXTURE_1D:
|
||||
case GL_TEXTURE_1D_ARRAY:
|
||||
align_yf = align_1d_yf;
|
||||
align_ys = align_1d_ys;
|
||||
ret_align = align_1d_yf[i];
|
||||
multiplier_ys = 16;
|
||||
break;
|
||||
case GL_TEXTURE_2D:
|
||||
case GL_TEXTURE_RECTANGLE:
|
||||
|
|
@ -78,22 +74,19 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
|
|||
case GL_TEXTURE_CUBE_MAP_ARRAY:
|
||||
case GL_TEXTURE_2D_MULTISAMPLE:
|
||||
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
|
||||
align_yf = align_2d_yf;
|
||||
align_ys = align_2d_ys;
|
||||
ret_align = align_2d_yf[i];
|
||||
multiplier_ys = 4;
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
align_yf = align_3d_yf;
|
||||
align_ys = align_3d_ys;
|
||||
ret_align = align_3d_yf[i];
|
||||
multiplier_ys = 4;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
/* Compute array index. */
|
||||
i = ffs(bpp/8) - 1;
|
||||
|
||||
ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
|
||||
align_yf[i] : align_ys[i];
|
||||
if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
|
||||
ret_align *= multiplier_ys;
|
||||
|
||||
assert(_mesa_is_pow_two(mt->num_samples));
|
||||
|
||||
|
|
@ -148,26 +141,20 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
|
|||
}
|
||||
|
||||
static unsigned int
|
||||
tr_mode_vertical_texture_alignment(const struct brw_context *brw,
|
||||
const struct intel_mipmap_tree *mt)
|
||||
tr_mode_vertical_texture_alignment(const struct intel_mipmap_tree *mt)
|
||||
{
|
||||
const unsigned *align_yf, *align_ys;
|
||||
const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
|
||||
unsigned ret_align, divisor;
|
||||
unsigned ret_align, divisor, multiplier_ys;
|
||||
|
||||
/* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */
|
||||
/* Vertical alignment tables for TRMODE_YF */
|
||||
const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
|
||||
const unsigned align_2d_ys[] = {256, 128, 128, 64, 64};
|
||||
const unsigned align_3d_yf[] = {16, 16, 16, 8, 8};
|
||||
const unsigned align_3d_ys[] = {32, 32, 32, 16, 16};
|
||||
int i = 0;
|
||||
|
||||
assert(brw->gen >= 9 &&
|
||||
mt->target != GL_TEXTURE_1D &&
|
||||
mt->target != GL_TEXTURE_1D_ARRAY);
|
||||
assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
|
||||
|
||||
/* Alignment computations below assume bpp >= 8 and a power of 2. */
|
||||
assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ;
|
||||
/* Alignment computations below assume a power of 2 cpp. */
|
||||
assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp)) ;
|
||||
/* Compute array index. */
|
||||
const int i = ffs(mt->cpp) - 1;
|
||||
|
||||
switch(mt->target) {
|
||||
case GL_TEXTURE_2D:
|
||||
|
|
@ -177,22 +164,21 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw,
|
|||
case GL_TEXTURE_CUBE_MAP_ARRAY:
|
||||
case GL_TEXTURE_2D_MULTISAMPLE:
|
||||
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
|
||||
align_yf = align_2d_yf;
|
||||
align_ys = align_2d_ys;
|
||||
ret_align = align_2d_yf[i];
|
||||
multiplier_ys = 4;
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
align_yf = align_3d_yf;
|
||||
align_ys = align_3d_ys;
|
||||
ret_align = align_3d_yf[i];
|
||||
multiplier_ys = 2;
|
||||
break;
|
||||
case GL_TEXTURE_1D:
|
||||
case GL_TEXTURE_1D_ARRAY:
|
||||
default:
|
||||
unreachable("not reached");
|
||||
unreachable("Unexpected miptree target");
|
||||
}
|
||||
|
||||
/* Compute array index. */
|
||||
i = ffs(bpp / 8) - 1;
|
||||
|
||||
ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
|
||||
align_yf[i] : align_ys[i];
|
||||
if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
|
||||
ret_align *= multiplier_ys;
|
||||
|
||||
assert(_mesa_is_pow_two(mt->num_samples));
|
||||
|
||||
|
|
@ -779,8 +765,8 @@ intel_miptree_set_alignment(struct brw_context *brw,
|
|||
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
|
||||
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
|
||||
* vertical alignment < 64. */
|
||||
mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
|
||||
mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
|
||||
mt->halign = MAX2(tr_mode_horizontal_texture_alignment(mt), 32);
|
||||
mt->valign = MAX2(tr_mode_vertical_texture_alignment(mt), 64);
|
||||
} else {
|
||||
mt->halign =
|
||||
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
|
||||
|
|
|
|||
|
|
@ -1111,7 +1111,7 @@ vec4_visitor::opt_register_coalesce()
|
|||
*/
|
||||
vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev;
|
||||
foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
|
||||
inst, block) {
|
||||
inst) {
|
||||
_scan_inst = scan_inst;
|
||||
|
||||
if (inst->src[0].in_range(scan_inst->dst, scan_inst->regs_written)) {
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
assert(c->prog_data.include_primitive_id);
|
||||
assert(gs_prog_data->include_primitive_id);
|
||||
dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
||||
emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -35,14 +35,16 @@ namespace brw {
|
|||
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
const nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
int shader_time_index)
|
||||
: vec4_visitor(compiler, log_data, &c->key.tex,
|
||||
&c->prog_data.base, shader, mem_ctx,
|
||||
&prog_data->base, shader, mem_ctx,
|
||||
no_spills, shader_time_index),
|
||||
c(c)
|
||||
c(c),
|
||||
gs_prog_data(prog_data)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
@ -78,9 +80,9 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
|
|||
* so the total number of input slots that will be delivered to the GS (and
|
||||
* thus the stride of the input arrays) is urb_read_length * 2.
|
||||
*/
|
||||
const unsigned num_input_vertices = c->gp->program.VerticesIn;
|
||||
const unsigned num_input_vertices = nir->info.gs.vertices_in;
|
||||
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
|
||||
unsigned input_array_stride = c->prog_data.base.urb_read_length * 2;
|
||||
unsigned input_array_stride = prog_data->urb_read_length * 2;
|
||||
|
||||
for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
|
||||
int varying = c->input_vue_map.slot_to_varying[slot];
|
||||
|
|
@ -106,7 +108,7 @@ vec4_gs_visitor::setup_payload()
|
|||
* to be interleaved, so one register contains two attribute slots.
|
||||
*/
|
||||
int attributes_per_reg =
|
||||
c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
|
||||
prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
|
||||
|
||||
/* If a geometry shader tries to read from an input that wasn't written by
|
||||
* the vertex shader, that produces undefined results, but it shouldn't
|
||||
|
|
@ -124,7 +126,7 @@ vec4_gs_visitor::setup_payload()
|
|||
reg++;
|
||||
|
||||
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
|
||||
if (c->prog_data.include_primitive_id)
|
||||
if (gs_prog_data->include_primitive_id)
|
||||
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
|
||||
|
||||
reg = setup_uniforms(reg);
|
||||
|
|
@ -182,9 +184,9 @@ vec4_gs_visitor::emit_prolog()
|
|||
* to account for the fact that the vertex shader stored it in the w
|
||||
* component of VARYING_SLOT_PSIZ.
|
||||
*/
|
||||
if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
|
||||
if (nir->info.inputs_read & VARYING_BIT_PSIZ) {
|
||||
this->current_annotation = "swizzle gl_PointSize input";
|
||||
for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
|
||||
for (int vertex = 0; vertex < (int)nir->info.gs.vertices_in; vertex++) {
|
||||
dst_reg dst(ATTR,
|
||||
BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
|
||||
dst.type = BRW_REGISTER_TYPE_F;
|
||||
|
|
@ -222,7 +224,7 @@ vec4_gs_visitor::emit_thread_end()
|
|||
*/
|
||||
int base_mrf = 1;
|
||||
|
||||
bool static_vertex_count = c->prog_data.static_vertex_count != -1;
|
||||
bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
|
||||
|
||||
/* If the previous instruction was a URB write, we don't need to issue
|
||||
* a second one - we can just set the EOT bit on the previous write.
|
||||
|
|
@ -271,7 +273,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
|
|||
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
|
||||
inst->force_writemask_all = true;
|
||||
emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
|
||||
(uint32_t) c->prog_data.output_vertex_size_hwords);
|
||||
(uint32_t) gs_prog_data->output_vertex_size_hwords);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -285,12 +287,12 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
|
|||
(void) complete;
|
||||
|
||||
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
|
||||
inst->offset = c->prog_data.control_data_header_size_hwords;
|
||||
inst->offset = gs_prog_data->control_data_header_size_hwords;
|
||||
|
||||
/* We need to increment Global Offset by 1 to make room for Broadwell's
|
||||
* extra "Vertex Count" payload at the beginning of the URB entry.
|
||||
*/
|
||||
if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
|
||||
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
|
||||
inst->offset++;
|
||||
|
||||
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
|
|
@ -409,7 +411,7 @@ vec4_gs_visitor::emit_control_data_bits()
|
|||
* URB entry. Since this is an OWord message, Global Offset is counted
|
||||
* in 128-bit units, so we must set it to 2.
|
||||
*/
|
||||
if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
|
||||
if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
|
||||
inst->offset = 2;
|
||||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = 2;
|
||||
|
|
@ -536,7 +538,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
* do for GL_POINTS outputs that don't use streams).
|
||||
*/
|
||||
if (c->control_data_header_size_bits > 0 &&
|
||||
c->prog_data.control_data_format ==
|
||||
gs_prog_data->control_data_format ==
|
||||
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
|
||||
this->current_annotation = "emit vertex: Stream control data bits";
|
||||
set_stream_control_data_bits(stream_id);
|
||||
|
|
@ -552,7 +554,7 @@ vec4_gs_visitor::gs_end_primitive()
|
|||
* consists of cut bits. Fortunately, the only time it isn't is when the
|
||||
* output type is points, in which case EndPrimitive() is a no-op.
|
||||
*/
|
||||
if (c->prog_data.control_data_format !=
|
||||
if (gs_prog_data->control_data_format !=
|
||||
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -598,27 +600,231 @@ vec4_gs_visitor::gs_end_primitive()
|
|||
|
||||
extern "C" const unsigned *
|
||||
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
void *mem_ctx,
|
||||
const struct brw_gs_prog_key *key,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
const nir_shader *shader,
|
||||
struct gl_shader_program *shader_prog,
|
||||
void *mem_ctx,
|
||||
int shader_time_index,
|
||||
unsigned *final_assembly_size,
|
||||
char **error_str)
|
||||
{
|
||||
struct brw_gs_compile c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
c.key = *key;
|
||||
|
||||
prog_data->include_primitive_id =
|
||||
(shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
|
||||
|
||||
prog_data->invocations = shader->info.gs.invocations;
|
||||
|
||||
if (compiler->devinfo->gen >= 8)
|
||||
prog_data->static_vertex_count = nir_gs_count_vertices(shader);
|
||||
|
||||
if (compiler->devinfo->gen >= 7) {
|
||||
if (shader->info.gs.output_primitive == GL_POINTS) {
|
||||
/* When the output type is points, the geometry shader may output data
|
||||
* to multiple streams, and EndPrimitive() has no effect. So we
|
||||
* configure the hardware to interpret the control data as stream ID.
|
||||
*/
|
||||
prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
|
||||
|
||||
/* We only have to emit control bits if we are using streams */
|
||||
if (shader_prog && shader_prog->Geom.UsesStreams)
|
||||
c.control_data_bits_per_vertex = 2;
|
||||
else
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
} else {
|
||||
/* When the output type is triangle_strip or line_strip, EndPrimitive()
|
||||
* may be used to terminate the current strip and start a new one
|
||||
* (similar to primitive restart), and outputting data to multiple
|
||||
* streams is not supported. So we configure the hardware to interpret
|
||||
* the control data as EndPrimitive information (a.k.a. "cut bits").
|
||||
*/
|
||||
prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
|
||||
|
||||
/* We only need to output control data if the shader actually calls
|
||||
* EndPrimitive().
|
||||
*/
|
||||
c.control_data_bits_per_vertex =
|
||||
shader->info.gs.uses_end_primitive ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
/* There are no control data bits in gen6. */
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
|
||||
/* If it is using transform feedback, enable it */
|
||||
if (shader->info.has_transform_feedback_varyings)
|
||||
prog_data->gen6_xfb_enabled = true;
|
||||
else
|
||||
prog_data->gen6_xfb_enabled = false;
|
||||
}
|
||||
c.control_data_header_size_bits =
|
||||
shader->info.gs.vertices_out * c.control_data_bits_per_vertex;
|
||||
|
||||
/* 1 HWORD = 32 bytes = 256 bits */
|
||||
prog_data->control_data_header_size_hwords =
|
||||
ALIGN(c.control_data_header_size_bits, 256) / 256;
|
||||
|
||||
/* Compute the output vertex size.
|
||||
*
|
||||
* From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
|
||||
* Size (p168):
|
||||
*
|
||||
* [0,62] indicating [1,63] 16B units
|
||||
*
|
||||
* Specifies the size of each vertex stored in the GS output entry
|
||||
* (following any Control Header data) as a number of 128-bit units
|
||||
* (minus one).
|
||||
*
|
||||
* Programming Restrictions: The vertex size must be programmed as a
|
||||
* multiple of 32B units with the following exception: Rendering is
|
||||
* disabled (as per SOL stage state) and the vertex size output by the
|
||||
* GS thread is 16B.
|
||||
*
|
||||
* If rendering is enabled (as per SOL state) the vertex size must be
|
||||
* programmed as a multiple of 32B units. In other words, the only time
|
||||
* software can program a vertex size with an odd number of 16B units
|
||||
* is when rendering is disabled.
|
||||
*
|
||||
* Note: B=bytes in the above text.
|
||||
*
|
||||
* It doesn't seem worth the extra trouble to optimize the case where the
|
||||
* vertex size is 16B (especially since this would require special-casing
|
||||
* the GEN assembly that writes to the URB). So we just set the vertex
|
||||
* size to a multiple of 32B (2 vec4's) in all cases.
|
||||
*
|
||||
* The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
|
||||
* budget that as follows:
|
||||
*
|
||||
* 512 bytes for varyings (a varying component is 4 bytes and
|
||||
* gl_MaxGeometryOutputComponents = 128)
|
||||
* 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
|
||||
* bytes)
|
||||
* 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
|
||||
* even if it's not used)
|
||||
* 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
|
||||
* whenever clip planes are enabled, even if the shader doesn't
|
||||
* write to gl_ClipDistance)
|
||||
* 16 bytes overhead since the VUE size must be a multiple of 32 bytes
|
||||
* (see below)--this causes up to 1 VUE slot to be wasted
|
||||
* 400 bytes available for varying packing overhead
|
||||
*
|
||||
* Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
|
||||
* per interpolation type, so this is plenty.
|
||||
*
|
||||
*/
|
||||
unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
|
||||
assert(compiler->devinfo->gen == 6 ||
|
||||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
|
||||
prog_data->output_vertex_size_hwords =
|
||||
ALIGN(output_vertex_size_bytes, 32) / 32;
|
||||
|
||||
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
|
||||
* That divides up as follows:
|
||||
*
|
||||
* 64 bytes for the control data header (cut indices or StreamID bits)
|
||||
* 4096 bytes for varyings (a varying component is 4 bytes and
|
||||
* gl_MaxGeometryTotalOutputComponents = 1024)
|
||||
* 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
|
||||
* bytes/vertex and gl_MaxGeometryOutputVertices is 256)
|
||||
* 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
|
||||
* even if it's not used)
|
||||
* 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
|
||||
* whenever clip planes are enabled, even if the shader doesn't
|
||||
* write to gl_ClipDistance)
|
||||
* 4096 bytes overhead since the VUE size must be a multiple of 32
|
||||
* bytes (see above)--this causes up to 1 VUE slot to be wasted
|
||||
* 8128 bytes available for varying packing overhead
|
||||
*
|
||||
* Worst-case varying packing overhead is 3/4 of a varying slot per
|
||||
* interpolation type, which works out to 3072 bytes, so this would allow
|
||||
* us to accommodate 2 interpolation types without any danger of running
|
||||
* out of URB space.
|
||||
*
|
||||
* In practice, the risk of running out of URB space is very small, since
|
||||
* the above figures are all worst-case, and most of them scale with the
|
||||
* number of output vertices. So we'll just calculate the amount of space
|
||||
* we need, and if it's too large, fail to compile.
|
||||
*
|
||||
* The above is for gen7+ where we have a single URB entry that will hold
|
||||
* all the output. In gen6, we will have to allocate URB entries for every
|
||||
* vertex we emit, so our URB entries only need to be large enough to hold
|
||||
* a single vertex. Also, gen6 does not have a control data header.
|
||||
*/
|
||||
unsigned output_size_bytes;
|
||||
if (compiler->devinfo->gen >= 7) {
|
||||
output_size_bytes =
|
||||
prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out;
|
||||
output_size_bytes += 32 * prog_data->control_data_header_size_hwords;
|
||||
} else {
|
||||
output_size_bytes = prog_data->output_vertex_size_hwords * 32;
|
||||
}
|
||||
|
||||
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
|
||||
* which comes before the control header.
|
||||
*/
|
||||
if (compiler->devinfo->gen >= 8)
|
||||
output_size_bytes += 32;
|
||||
|
||||
assert(output_size_bytes >= 1);
|
||||
int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
|
||||
if (compiler->devinfo->gen == 6)
|
||||
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
|
||||
if (output_size_bytes > max_output_size_bytes)
|
||||
return false;
|
||||
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
|
||||
* a multiple of 128 bytes in gen6.
|
||||
*/
|
||||
if (compiler->devinfo->gen >= 7)
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
else
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
|
||||
|
||||
prog_data->output_topology =
|
||||
get_hw_prim_for_gl_prim(shader->info.gs.output_primitive);
|
||||
|
||||
/* The GLSL linker will have already matched up GS inputs and the outputs
|
||||
* of prior stages. The driver does extend VS outputs in some cases, but
|
||||
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
|
||||
* geometry shader support. So we can safely ignore that.
|
||||
*
|
||||
* For SSO pipelines, we use a fixed VUE map layout based on variable
|
||||
* locations, so we can rely on rendezvous-by-location making this work.
|
||||
*
|
||||
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
|
||||
* written by previous stages and shows up via payload magic.
|
||||
*/
|
||||
GLbitfield64 inputs_read =
|
||||
shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
|
||||
brw_compute_vue_map(compiler->devinfo,
|
||||
&c.input_vue_map, inputs_read,
|
||||
shader->info.separate_shader);
|
||||
|
||||
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
|
||||
* need to program a URB read length of ceiling(num_slots / 2).
|
||||
*/
|
||||
prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
|
||||
|
||||
/* Now that prog_data setup is done, we are ready to actually compile the
|
||||
* program.
|
||||
*/
|
||||
|
||||
if (compiler->devinfo->gen >= 7) {
|
||||
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
|
||||
* so without spilling. If the GS invocations count > 1, then we can't use
|
||||
* dual object mode.
|
||||
*/
|
||||
if (c->prog_data.invocations <= 1 &&
|
||||
if (prog_data->invocations <= 1 &&
|
||||
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
|
||||
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
|
||||
|
||||
vec4_gs_visitor v(compiler, log_data, c, shader,
|
||||
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
|
||||
mem_ctx, true /* no_spills */, shader_time_index);
|
||||
if (v.run()) {
|
||||
vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
|
||||
vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
|
||||
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
|
||||
return g.generate_assembly(v.cfg, final_assembly_size, shader);
|
||||
}
|
||||
|
|
@ -648,28 +854,28 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
* mode is more performant when invocations > 1. Gen6 only supports
|
||||
* SINGLE mode.
|
||||
*/
|
||||
if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7)
|
||||
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
|
||||
if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7)
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
|
||||
else
|
||||
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
|
||||
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
|
||||
|
||||
vec4_gs_visitor *gs = NULL;
|
||||
const unsigned *ret = NULL;
|
||||
|
||||
if (compiler->devinfo->gen >= 7)
|
||||
gs = new vec4_gs_visitor(compiler, log_data, c, shader,
|
||||
mem_ctx, false /* no_spills */,
|
||||
gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data,
|
||||
shader, mem_ctx, false /* no_spills */,
|
||||
shader_time_index);
|
||||
else
|
||||
gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader,
|
||||
mem_ctx, false /* no_spills */,
|
||||
gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, shader_prog,
|
||||
shader, mem_ctx, false /* no_spills */,
|
||||
shader_time_index);
|
||||
|
||||
if (!gs->run()) {
|
||||
if (error_str)
|
||||
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
|
||||
} else {
|
||||
vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
|
||||
vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
|
||||
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
|
||||
ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ public:
|
|||
vec4_gs_visitor(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
const nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
|
|
@ -70,6 +71,7 @@ protected:
|
|||
src_reg vertex_count;
|
||||
src_reg control_data_bits;
|
||||
const struct brw_gs_compile * const c;
|
||||
struct brw_gs_prog_data * const gs_prog_data;
|
||||
};
|
||||
|
||||
} /* namespace brw */
|
||||
|
|
|
|||
|
|
@ -1222,6 +1222,9 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
|
|||
void
|
||||
vec4_visitor::emit_ndc_computation()
|
||||
{
|
||||
if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
|
||||
return;
|
||||
|
||||
/* Get the position */
|
||||
src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
|
||||
|
||||
|
|
@ -1287,7 +1290,8 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
|
|||
* Later, clipping will detect ucp[6] and ensure the primitive is
|
||||
* clipped against all fixed planes.
|
||||
*/
|
||||
if (devinfo->has_negative_rhw_bug) {
|
||||
if (devinfo->has_negative_rhw_bug &&
|
||||
output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
|
||||
src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
|
||||
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
|
||||
emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
|
||||
|
|
@ -1335,8 +1339,10 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
|
|||
assert(varying < VARYING_SLOT_MAX);
|
||||
assert(output_reg[varying].type == reg.type);
|
||||
current_annotation = output_reg_annotation[varying];
|
||||
/* Copy the register, saturating if necessary */
|
||||
return emit(MOV(reg, src_reg(output_reg[varying])));
|
||||
if (output_reg[varying].file != BAD_FILE)
|
||||
return emit(MOV(reg, src_reg(output_reg[varying])));
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1355,11 +1361,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
|
|||
}
|
||||
case BRW_VARYING_SLOT_NDC:
|
||||
current_annotation = "NDC";
|
||||
emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
|
||||
if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
|
||||
emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
|
||||
break;
|
||||
case VARYING_SLOT_POS:
|
||||
current_annotation = "gl_Position";
|
||||
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
|
||||
if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
|
||||
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
|
||||
break;
|
||||
case VARYING_SLOT_EDGE:
|
||||
/* This is present when doing unfilled polygons. We're supposed to copy
|
||||
|
|
|
|||
|
|
@ -217,7 +217,7 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
|
|||
* shader.
|
||||
*/
|
||||
vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
|
||||
if (key->clamp_vertex_color)
|
||||
if (inst && key->clamp_vertex_color)
|
||||
inst->saturate = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -312,7 +312,7 @@ brw_vs_populate_key(struct brw_context *brw,
|
|||
|
||||
if (ctx->Transform.ClipPlanesEnabled != 0 &&
|
||||
ctx->API == API_OPENGL_COMPAT &&
|
||||
!vp->program.Base.UsesClipDistanceOut) {
|
||||
vp->program.Base.ClipDistanceArraySize == 0) {
|
||||
key->nr_userclip_plane_consts =
|
||||
_mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ gen6_gs_visitor::emit_prolog()
|
|||
this->vertex_output = src_reg(this,
|
||||
glsl_type::uint_type,
|
||||
(prog_data->vue_map.num_slots + 1) *
|
||||
c->gp->program.VerticesOut);
|
||||
nir->info.gs.vertices_out);
|
||||
this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
|
||||
emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ gen6_gs_visitor::emit_prolog()
|
|||
this->prim_count = src_reg(this, glsl_type::uint_type);
|
||||
emit(MOV(dst_reg(this->prim_count), 0u));
|
||||
|
||||
if (c->prog_data.gen6_xfb_enabled) {
|
||||
if (gs_prog_data->gen6_xfb_enabled) {
|
||||
/* Create a virtual register to hold destination indices in SOL */
|
||||
this->destination_indices = src_reg(this, glsl_type::uvec4_type);
|
||||
/* Create a virtual register to hold number of written primitives */
|
||||
|
|
@ -128,7 +128,7 @@ gen6_gs_visitor::emit_prolog()
|
|||
* in the 3DSTATE_GS state packet. That information can be obtained by other
|
||||
* means though, so we can safely use r1 for this purpose.
|
||||
*/
|
||||
if (c->prog_data.include_primitive_id) {
|
||||
if (gs_prog_data->include_primitive_id) {
|
||||
this->primitive_id =
|
||||
src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
|
||||
|
|
@ -177,7 +177,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
dst_reg dst(this->vertex_output);
|
||||
dst.reladdr = ralloc(mem_ctx, src_reg);
|
||||
memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
|
||||
if (c->gp->program.OutputType == GL_POINTS) {
|
||||
if (nir->info.gs.output_primitive == GL_POINTS) {
|
||||
/* If we are outputting points, then every vertex has PrimStart and
|
||||
* PrimEnd set.
|
||||
*/
|
||||
|
|
@ -191,7 +191,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
* vertex.
|
||||
*/
|
||||
emit(OR(dst, this->first_vertex,
|
||||
(c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
|
||||
(gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
|
||||
emit(MOV(dst_reg(this->first_vertex), 0u));
|
||||
}
|
||||
emit(ADD(dst_reg(this->vertex_output_offset),
|
||||
|
|
@ -205,7 +205,7 @@ gen6_gs_visitor::gs_end_primitive()
|
|||
/* Calling EndPrimitive() is optional for point output. In this case we set
|
||||
* the PrimEnd flag when we process EmitVertex().
|
||||
*/
|
||||
if (c->gp->program.OutputType == GL_POINTS)
|
||||
if (nir->info.gs.output_primitive == GL_POINTS)
|
||||
return;
|
||||
|
||||
/* Otherwise we know that the last vertex we have processed was the last
|
||||
|
|
@ -217,7 +217,7 @@ gen6_gs_visitor::gs_end_primitive()
|
|||
* comparison below (hence the num_output_vertices + 1 in the comparison
|
||||
* below).
|
||||
*/
|
||||
unsigned num_output_vertices = c->gp->program.VerticesOut;
|
||||
unsigned num_output_vertices = nir->info.gs.vertices_out;
|
||||
emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
|
||||
BRW_CONDITIONAL_L));
|
||||
vec4_instruction *inst = emit(CMP(dst_null_d(),
|
||||
|
|
@ -320,7 +320,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
* first_vertex is not zero. This is only relevant for outputs other than
|
||||
* points because in the point case we set PrimEnd on all vertices.
|
||||
*/
|
||||
if (c->gp->program.OutputType != GL_POINTS) {
|
||||
if (nir->info.gs.output_primitive != GL_POINTS) {
|
||||
emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
gs_end_primitive();
|
||||
|
|
@ -353,7 +353,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
this->current_annotation = "gen6 thread end: ff_sync";
|
||||
|
||||
vec4_instruction *inst;
|
||||
if (c->prog_data.gen6_xfb_enabled) {
|
||||
if (gs_prog_data->gen6_xfb_enabled) {
|
||||
src_reg sol_temp(this, glsl_type::uvec4_type);
|
||||
emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
|
||||
dst_reg(this->svbi),
|
||||
|
|
@ -443,7 +443,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
}
|
||||
emit(BRW_OPCODE_WHILE);
|
||||
|
||||
if (c->prog_data.gen6_xfb_enabled)
|
||||
if (gs_prog_data->gen6_xfb_enabled)
|
||||
xfb_write();
|
||||
}
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
|
|
@ -465,7 +465,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
*/
|
||||
this->current_annotation = "gen6 thread end: EOT";
|
||||
|
||||
if (c->prog_data.gen6_xfb_enabled) {
|
||||
if (gs_prog_data->gen6_xfb_enabled) {
|
||||
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
|
||||
src_reg data(this, glsl_type::uint_type);
|
||||
emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
|
||||
|
|
@ -507,7 +507,7 @@ gen6_gs_visitor::setup_payload()
|
|||
* information (and move the original value to a virtual register if
|
||||
* necessary).
|
||||
*/
|
||||
if (c->prog_data.include_primitive_id)
|
||||
if (gs_prog_data->include_primitive_id)
|
||||
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
|
||||
reg++;
|
||||
|
||||
|
|
@ -530,9 +530,6 @@ gen6_gs_visitor::xfb_setup()
|
|||
BRW_SWIZZLE4(3, 3, 3, 3)
|
||||
};
|
||||
|
||||
struct brw_gs_prog_data *prog_data =
|
||||
(struct brw_gs_prog_data *) &c->prog_data;
|
||||
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
&this->shader_prog->LinkedTransformFeedback;
|
||||
int i;
|
||||
|
|
@ -548,11 +545,11 @@ gen6_gs_visitor::xfb_setup()
|
|||
*/
|
||||
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
|
||||
|
||||
prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
|
||||
for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
|
||||
prog_data->transform_feedback_bindings[i] =
|
||||
gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
|
||||
for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
|
||||
gs_prog_data->transform_feedback_bindings[i] =
|
||||
linked_xfb_info->Outputs[i].OutputRegister;
|
||||
prog_data->transform_feedback_swizzles[i] =
|
||||
gs_prog_data->transform_feedback_swizzles[i] =
|
||||
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
|
||||
}
|
||||
}
|
||||
|
|
@ -561,13 +558,11 @@ void
|
|||
gen6_gs_visitor::xfb_write()
|
||||
{
|
||||
unsigned num_verts;
|
||||
struct brw_gs_prog_data *prog_data =
|
||||
(struct brw_gs_prog_data *) &c->prog_data;
|
||||
|
||||
if (!prog_data->num_transform_feedback_bindings)
|
||||
if (!gs_prog_data->num_transform_feedback_bindings)
|
||||
return;
|
||||
|
||||
switch (c->prog_data.output_topology) {
|
||||
switch (gs_prog_data->output_topology) {
|
||||
case _3DPRIM_POINTLIST:
|
||||
num_verts = 1;
|
||||
break;
|
||||
|
|
@ -627,7 +622,7 @@ gen6_gs_visitor::xfb_write()
|
|||
emit(BRW_OPCODE_ENDIF);
|
||||
|
||||
/* Write transform feedback data for all processed vertices. */
|
||||
for (int i = 0; i < c->gp->program.VerticesOut; i++) {
|
||||
for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
|
||||
emit(MOV(dst_reg(sol_temp), i));
|
||||
emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
|
||||
BRW_CONDITIONAL_L));
|
||||
|
|
@ -642,10 +637,8 @@ gen6_gs_visitor::xfb_write()
|
|||
void
|
||||
gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
||||
{
|
||||
struct brw_gs_prog_data *prog_data =
|
||||
(struct brw_gs_prog_data *) &c->prog_data;
|
||||
unsigned binding;
|
||||
unsigned num_bindings = prog_data->num_transform_feedback_bindings;
|
||||
unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
|
||||
src_reg sol_temp(this, glsl_type::uvec4_type);
|
||||
|
||||
/* Check for buffer overflow: we need room to write the complete primitive
|
||||
|
|
@ -666,7 +659,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
|||
*/
|
||||
for (binding = 0; binding < num_bindings; ++binding) {
|
||||
unsigned char varying =
|
||||
prog_data->transform_feedback_bindings[binding];
|
||||
gs_prog_data->transform_feedback_bindings[binding];
|
||||
|
||||
/* Set up the correct destination index for this vertex */
|
||||
vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
|
||||
|
|
@ -704,7 +697,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
|||
else if (varying == VARYING_SLOT_VIEWPORT)
|
||||
data.swizzle = BRW_SWIZZLE_ZZZZ;
|
||||
else
|
||||
data.swizzle = prog_data->transform_feedback_swizzles[binding];
|
||||
data.swizzle = gs_prog_data->transform_feedback_swizzles[binding];
|
||||
|
||||
/* Write data */
|
||||
inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
|
||||
|
|
|
|||
|
|
@ -38,12 +38,13 @@ public:
|
|||
gen6_gs_visitor(const struct brw_compiler *comp,
|
||||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
struct gl_shader_program *prog,
|
||||
const nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
bool no_spills,
|
||||
int shader_time_index) :
|
||||
vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills,
|
||||
vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills,
|
||||
shader_time_index),
|
||||
shader_prog(prog)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -68,6 +68,8 @@ gen8_upload_gs_state(struct brw_context *brw)
|
|||
GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
|
||||
(brw->gs.prog_data->output_topology <<
|
||||
GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
|
||||
(prog_data->include_vue_handles ?
|
||||
GEN7_GS_INCLUDE_VERTEX_HANDLES : 0) |
|
||||
(prog_data->urb_read_length <<
|
||||
GEN6_GS_URB_READ_LENGTH_SHIFT) |
|
||||
(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
|
||||
|
|
|
|||
|
|
@ -1400,7 +1400,7 @@ save_BlendFunci(GLuint buf, GLenum sfactor, GLenum dfactor)
|
|||
GET_CURRENT_CONTEXT(ctx);
|
||||
Node *n;
|
||||
ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
|
||||
n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_SEPARATE_I, 3);
|
||||
n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_I, 3);
|
||||
if (n) {
|
||||
n[1].ui = buf;
|
||||
n[2].e = sfactor;
|
||||
|
|
@ -9741,6 +9741,46 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
|
|||
n[3].f, n[4].f, n[5].f, n[6].f,
|
||||
get_pointer(&n[7]));
|
||||
break;
|
||||
case OPCODE_BLEND_COLOR:
|
||||
fprintf(f, "BlendColor %f, %f, %f, %f\n",
|
||||
n[1].f, n[2].f, n[3].f, n[4].f);
|
||||
break;
|
||||
case OPCODE_BLEND_EQUATION:
|
||||
fprintf(f, "BlendEquation %s\n",
|
||||
enum_string(n[1].e));
|
||||
break;
|
||||
case OPCODE_BLEND_EQUATION_SEPARATE:
|
||||
fprintf(f, "BlendEquationSeparate %s, %s\n",
|
||||
enum_string(n[1].e),
|
||||
enum_string(n[2].e));
|
||||
break;
|
||||
case OPCODE_BLEND_FUNC_SEPARATE:
|
||||
fprintf(f, "BlendFuncSeparate %s, %s, %s, %s\n",
|
||||
enum_string(n[1].e),
|
||||
enum_string(n[2].e),
|
||||
enum_string(n[3].e),
|
||||
enum_string(n[4].e));
|
||||
break;
|
||||
case OPCODE_BLEND_EQUATION_I:
|
||||
fprintf(f, "BlendEquationi %u, %s\n",
|
||||
n[1].ui, enum_string(n[2].e));
|
||||
break;
|
||||
case OPCODE_BLEND_EQUATION_SEPARATE_I:
|
||||
fprintf(f, "BlendEquationSeparatei %u, %s, %s\n",
|
||||
n[1].ui, enum_string(n[2].e), enum_string(n[3].e));
|
||||
break;
|
||||
case OPCODE_BLEND_FUNC_I:
|
||||
fprintf(f, "BlendFunci %u, %s, %s\n",
|
||||
n[1].ui, enum_string(n[2].e), enum_string(n[3].e));
|
||||
break;
|
||||
case OPCODE_BLEND_FUNC_SEPARATE_I:
|
||||
fprintf(f, "BlendFuncSeparatei %u, %s, %s, %s, %s\n",
|
||||
n[1].ui,
|
||||
enum_string(n[2].e),
|
||||
enum_string(n[3].e),
|
||||
enum_string(n[4].e),
|
||||
enum_string(n[5].e));
|
||||
break;
|
||||
case OPCODE_CALL_LIST:
|
||||
fprintf(f, "CallList %d\n", (int) n[1].ui);
|
||||
break;
|
||||
|
|
@ -9761,6 +9801,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
|
|||
case OPCODE_LINE_STIPPLE:
|
||||
fprintf(f, "LineStipple %d %x\n", n[1].i, (int) n[2].us);
|
||||
break;
|
||||
case OPCODE_LINE_WIDTH:
|
||||
fprintf(f, "LineWidth %f\n", n[1].f);
|
||||
break;
|
||||
case OPCODE_LOAD_IDENTITY:
|
||||
fprintf(f, "LoadIdentity\n");
|
||||
break;
|
||||
|
|
@ -9790,6 +9833,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
|
|||
fprintf(f, "Ortho %g %g %g %g %g %g\n",
|
||||
n[1].f, n[2].f, n[3].f, n[4].f, n[5].f, n[6].f);
|
||||
break;
|
||||
case OPCODE_POINT_SIZE:
|
||||
fprintf(f, "PointSize %f\n", n[1].f);
|
||||
break;
|
||||
case OPCODE_POP_ATTRIB:
|
||||
fprintf(f, "PopAttrib\n");
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -2275,45 +2275,16 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
|
|||
; /* fallthrough */
|
||||
}
|
||||
|
||||
if (ctx->Extensions.TDFX_texture_compression_FXT1) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_RGB_FXT1_3DFX:
|
||||
return GL_RGB;
|
||||
case GL_COMPRESSED_RGBA_FXT1_3DFX:
|
||||
return GL_RGBA;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
if (_mesa_is_compressed_format(ctx, internalFormat)) {
|
||||
GLenum base_compressed =
|
||||
_mesa_gl_compressed_format_base_format(internalFormat);
|
||||
if (base_compressed)
|
||||
return base_compressed;
|
||||
}
|
||||
|
||||
/* Assume that the ANGLE flag will always be set if the EXT flag is set.
|
||||
*/
|
||||
if (ctx->Extensions.ANGLE_texture_compression_dxt) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
|
||||
return GL_RGB;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
|
||||
return GL_RGBA;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (_mesa_is_desktop_gl(ctx)
|
||||
&& ctx->Extensions.ANGLE_texture_compression_dxt) {
|
||||
switch (internalFormat) {
|
||||
case GL_RGB_S3TC:
|
||||
case GL_RGB4_S3TC:
|
||||
return GL_RGB;
|
||||
case GL_RGBA_S3TC:
|
||||
case GL_RGBA4_S3TC:
|
||||
return GL_RGBA;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
|
||||
_mesa_is_astc_format(internalFormat))
|
||||
return GL_RGBA;
|
||||
|
||||
if (ctx->Extensions.MESA_ycbcr_texture) {
|
||||
if (internalFormat == GL_YCBCR_MESA)
|
||||
|
|
@ -2390,16 +2361,10 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
|
|||
case GL_SRGB8_EXT:
|
||||
case GL_COMPRESSED_SRGB_EXT:
|
||||
return GL_RGB;
|
||||
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
|
||||
return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
|
||||
case GL_SRGB_ALPHA_EXT:
|
||||
case GL_SRGB8_ALPHA8_EXT:
|
||||
case GL_COMPRESSED_SRGB_ALPHA_EXT:
|
||||
return GL_RGBA;
|
||||
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
|
||||
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
|
||||
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
|
||||
return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
|
||||
case GL_SLUMINANCE_ALPHA_EXT:
|
||||
case GL_SLUMINANCE8_ALPHA8_EXT:
|
||||
case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
|
||||
|
|
@ -2544,104 +2509,6 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
|
|||
}
|
||||
}
|
||||
|
||||
if (ctx->Extensions.ARB_texture_compression_rgtc) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_RED_RGTC1:
|
||||
case GL_COMPRESSED_SIGNED_RED_RGTC1:
|
||||
return GL_RED;
|
||||
case GL_COMPRESSED_RG_RGTC2:
|
||||
case GL_COMPRESSED_SIGNED_RG_RGTC2:
|
||||
return GL_RG;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->Extensions.EXT_texture_compression_latc) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
|
||||
case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
|
||||
return GL_LUMINANCE;
|
||||
case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
|
||||
case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
|
||||
return GL_LUMINANCE_ALPHA;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->Extensions.ATI_texture_compression_3dc) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
|
||||
return GL_LUMINANCE_ALPHA;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
|
||||
switch (internalFormat) {
|
||||
case GL_ETC1_RGB8_OES:
|
||||
return GL_RGB;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_RGB8_ETC2:
|
||||
case GL_COMPRESSED_SRGB8_ETC2:
|
||||
return GL_RGB;
|
||||
case GL_COMPRESSED_RGBA8_ETC2_EAC:
|
||||
case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
|
||||
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
||||
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
||||
return GL_RGBA;
|
||||
case GL_COMPRESSED_R11_EAC:
|
||||
case GL_COMPRESSED_SIGNED_R11_EAC:
|
||||
return GL_RED;
|
||||
case GL_COMPRESSED_RG11_EAC:
|
||||
case GL_COMPRESSED_SIGNED_RG11_EAC:
|
||||
return GL_RG;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (_mesa_is_desktop_gl(ctx) &&
|
||||
ctx->Extensions.ARB_texture_compression_bptc) {
|
||||
switch (internalFormat) {
|
||||
case GL_COMPRESSED_RGBA_BPTC_UNORM:
|
||||
case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
|
||||
return GL_RGBA;
|
||||
case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
|
||||
case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
|
||||
return GL_RGB;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->API == API_OPENGLES) {
|
||||
switch (internalFormat) {
|
||||
case GL_PALETTE4_RGB8_OES:
|
||||
case GL_PALETTE4_R5_G6_B5_OES:
|
||||
case GL_PALETTE8_RGB8_OES:
|
||||
case GL_PALETTE8_R5_G6_B5_OES:
|
||||
return GL_RGB;
|
||||
case GL_PALETTE4_RGBA8_OES:
|
||||
case GL_PALETTE8_RGB5_A1_OES:
|
||||
case GL_PALETTE4_RGBA4_OES:
|
||||
case GL_PALETTE4_RGB5_A1_OES:
|
||||
case GL_PALETTE8_RGBA8_OES:
|
||||
case GL_PALETTE8_RGBA4_OES:
|
||||
return GL_RGBA;
|
||||
default:
|
||||
; /* fallthrough */
|
||||
}
|
||||
}
|
||||
|
||||
return -1; /* error */
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1891,7 +1891,7 @@ struct gl_program
|
|||
* For vertex and geometry shaders, true if the program uses the
|
||||
* gl_ClipDistance output. Ignored for fragment shaders.
|
||||
*/
|
||||
GLboolean UsesClipDistanceOut;
|
||||
unsigned ClipDistanceArraySize;
|
||||
|
||||
|
||||
/** Named parameters, constants, etc. from program text */
|
||||
|
|
@ -2619,7 +2619,6 @@ struct gl_shader_program
|
|||
* True if gl_ClipDistance is written to. Copied into
|
||||
* gl_tess_eval_program by _mesa_copy_linked_program_data().
|
||||
*/
|
||||
GLboolean UsesClipDistance;
|
||||
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
|
||||
0 if not present. */
|
||||
} TessEval;
|
||||
|
|
@ -2642,7 +2641,6 @@ struct gl_shader_program
|
|||
* True if gl_ClipDistance is written to. Copied into
|
||||
* gl_geometry_program by _mesa_copy_linked_program_data().
|
||||
*/
|
||||
GLboolean UsesClipDistance;
|
||||
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
|
||||
0 if not present. */
|
||||
bool UsesEndPrimitive;
|
||||
|
|
@ -2655,7 +2653,6 @@ struct gl_shader_program
|
|||
* True if gl_ClipDistance is written to. Copied into gl_vertex_program
|
||||
* by _mesa_copy_linked_program_data().
|
||||
*/
|
||||
GLboolean UsesClipDistance;
|
||||
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
|
||||
0 if not present. */
|
||||
} Vert;
|
||||
|
|
|
|||
|
|
@ -2068,7 +2068,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
|
|||
{
|
||||
switch (type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
dst->UsesClipDistanceOut = src->Vert.UsesClipDistance;
|
||||
dst->ClipDistanceArraySize = src->Vert.ClipDistanceArraySize;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL: {
|
||||
struct gl_tess_ctrl_program *dst_tcp =
|
||||
|
|
@ -2083,7 +2083,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
|
|||
dst_tep->Spacing = src->TessEval.Spacing;
|
||||
dst_tep->VertexOrder = src->TessEval.VertexOrder;
|
||||
dst_tep->PointMode = src->TessEval.PointMode;
|
||||
dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance;
|
||||
dst->ClipDistanceArraySize = src->TessEval.ClipDistanceArraySize;
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_GEOMETRY: {
|
||||
|
|
@ -2093,7 +2093,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
|
|||
dst_gp->Invocations = src->Geom.Invocations;
|
||||
dst_gp->InputType = src->Geom.InputType;
|
||||
dst_gp->OutputType = src->Geom.OutputType;
|
||||
dst->UsesClipDistanceOut = src->Geom.UsesClipDistance;
|
||||
dst->ClipDistanceArraySize = src->Geom.ClipDistanceArraySize;
|
||||
dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
|
||||
dst_gp->UsesStreams = src->Geom.UsesStreams;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -97,16 +97,16 @@ static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
|
|||
* No pixel transfer operations or special texel encodings allowed.
|
||||
* 1D, 2D and 3D images supported.
|
||||
*/
|
||||
static void
|
||||
memcpy_texture(struct gl_context *ctx,
|
||||
GLuint dimensions,
|
||||
mesa_format dstFormat,
|
||||
GLint dstRowStride,
|
||||
GLubyte **dstSlices,
|
||||
GLint srcWidth, GLint srcHeight, GLint srcDepth,
|
||||
GLenum srcFormat, GLenum srcType,
|
||||
const GLvoid *srcAddr,
|
||||
const struct gl_pixelstore_attrib *srcPacking)
|
||||
void
|
||||
_mesa_memcpy_texture(struct gl_context *ctx,
|
||||
GLuint dimensions,
|
||||
mesa_format dstFormat,
|
||||
GLint dstRowStride,
|
||||
GLubyte **dstSlices,
|
||||
GLint srcWidth, GLint srcHeight, GLint srcDepth,
|
||||
GLenum srcFormat, GLenum srcType,
|
||||
const GLvoid *srcAddr,
|
||||
const struct gl_pixelstore_attrib *srcPacking)
|
||||
{
|
||||
const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth,
|
||||
srcFormat, srcType);
|
||||
|
|
@ -296,11 +296,11 @@ _mesa_texstore_ycbcr(TEXSTORE_PARAMS)
|
|||
assert(baseInternalFormat == GL_YCBCR_MESA);
|
||||
|
||||
/* always just memcpy since no pixel transfer ops apply */
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat,
|
||||
dstRowStride, dstSlices,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
srcAddr, srcPacking);
|
||||
_mesa_memcpy_texture(ctx, dims,
|
||||
dstFormat,
|
||||
dstRowStride, dstSlices,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
srcAddr, srcPacking);
|
||||
|
||||
/* Check if we need byte swapping */
|
||||
/* XXX the logic here _might_ be wrong */
|
||||
|
|
@ -899,13 +899,15 @@ _mesa_texstore_memcpy(TEXSTORE_PARAMS)
|
|||
return GL_FALSE;
|
||||
}
|
||||
|
||||
memcpy_texture(ctx, dims,
|
||||
dstFormat,
|
||||
dstRowStride, dstSlices,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
srcAddr, srcPacking);
|
||||
_mesa_memcpy_texture(ctx, dims,
|
||||
dstFormat,
|
||||
dstRowStride, dstSlices,
|
||||
srcWidth, srcHeight, srcDepth, srcFormat, srcType,
|
||||
srcAddr, srcPacking);
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Store user data into texture memory.
|
||||
* Called via glTex[Sub]Image1/2/3D()
|
||||
|
|
|
|||
|
|
@ -74,6 +74,17 @@ _mesa_texstore_needs_transfer_ops(struct gl_context *ctx,
|
|||
GLenum baseInternalFormat,
|
||||
mesa_format dstFormat);
|
||||
|
||||
extern void
|
||||
_mesa_memcpy_texture(struct gl_context *ctx,
|
||||
GLuint dimensions,
|
||||
mesa_format dstFormat,
|
||||
GLint dstRowStride,
|
||||
GLubyte **dstSlices,
|
||||
GLint srcWidth, GLint srcHeight, GLint srcDepth,
|
||||
GLenum srcFormat, GLenum srcType,
|
||||
const GLvoid *srcAddr,
|
||||
const struct gl_pixelstore_attrib *srcPacking);
|
||||
|
||||
extern GLboolean
|
||||
_mesa_texstore_can_use_memcpy(struct gl_context *ctx,
|
||||
GLenum baseInternalFormat, mesa_format dstFormat,
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ static void update_raster_state( struct st_context *st )
|
|||
|
||||
/* _NEW_MULTISAMPLE | _NEW_BUFFERS */
|
||||
raster->force_persample_interp =
|
||||
st->can_force_persample_interp &&
|
||||
!st->force_persample_in_shader &&
|
||||
ctx->Multisample._Enabled &&
|
||||
ctx->Multisample.SampleShading &&
|
||||
ctx->Multisample.MinSampleShadingValue *
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ update_fp( struct st_context *st )
|
|||
assert(stfp->Base.Base.Target == GL_FRAGMENT_PROGRAM_ARB);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
|
||||
/* _NEW_FRAG_CLAMP */
|
||||
key.clamp_color = st->clamp_frag_color_in_shader &&
|
||||
|
|
@ -76,7 +76,7 @@ update_fp( struct st_context *st )
|
|||
* Ignore sample qualifier while computing this flag.
|
||||
*/
|
||||
key.persample_shading =
|
||||
!st->can_force_persample_interp &&
|
||||
st->force_persample_in_shader &&
|
||||
!(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
|
||||
SYSTEM_BIT_SAMPLE_POS)) &&
|
||||
_mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1;
|
||||
|
|
@ -119,7 +119,7 @@ update_vp( struct st_context *st )
|
|||
assert(stvp->Base.Base.Target == GL_VERTEX_PROGRAM_ARB);
|
||||
|
||||
memset(&key, 0, sizeof key);
|
||||
key.st = st; /* variants are per-context */
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
|
||||
/* When this is true, we will add an extra input to the vertex
|
||||
* shader translation (for edgeflags), an extra output with
|
||||
|
|
@ -174,7 +174,7 @@ update_gp( struct st_context *st )
|
|||
assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
|
||||
st->gp_variant = st_get_gp_variant(st, stgp, &key);
|
||||
|
||||
|
|
@ -210,7 +210,7 @@ update_tcp( struct st_context *st )
|
|||
assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
|
||||
st->tcp_variant = st_get_tcp_variant(st, sttcp, &key);
|
||||
|
||||
|
|
@ -246,7 +246,7 @@ update_tep( struct st_context *st )
|
|||
assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
|
||||
st->tep_variant = st_get_tep_variant(st, sttep, &key);
|
||||
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
|
|||
struct pipe_resource *vbuf = NULL;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
key.bitmap = GL_TRUE;
|
||||
key.clamp_color = st->clamp_frag_color_in_shader &&
|
||||
st->ctx->Color._ClampFragmentColor;
|
||||
|
|
|
|||
|
|
@ -395,15 +395,35 @@ make_texture(struct st_context *st,
|
|||
* Note that the image is actually going to be upside down in
|
||||
* the texture. We deal with that with texcoords.
|
||||
*/
|
||||
success = _mesa_texstore(ctx, 2, /* dims */
|
||||
baseInternalFormat, /* baseInternalFormat */
|
||||
mformat, /* mesa_format */
|
||||
transfer->stride, /* dstRowStride, bytes */
|
||||
&dest, /* destSlices */
|
||||
width, height, 1, /* size */
|
||||
format, type, /* src format/type */
|
||||
pixels, /* data source */
|
||||
unpack);
|
||||
if ((format == GL_RGBA || format == GL_BGRA)
|
||||
&& type == GL_UNSIGNED_BYTE) {
|
||||
/* Use a memcpy-based texstore to avoid software pixel swizzling.
|
||||
* We'll do the necessary swizzling with the pipe_sampler_view to
|
||||
* give much better performance.
|
||||
* XXX in the future, expand this to accomodate more format and
|
||||
* type combinations.
|
||||
*/
|
||||
_mesa_memcpy_texture(ctx, 2,
|
||||
mformat, /* mesa_format */
|
||||
transfer->stride, /* dstRowStride, bytes */
|
||||
&dest, /* destSlices */
|
||||
width, height, 1, /* size */
|
||||
format, type, /* src format/type */
|
||||
pixels, /* data source */
|
||||
unpack);
|
||||
success = GL_TRUE;
|
||||
}
|
||||
else {
|
||||
success = _mesa_texstore(ctx, 2, /* dims */
|
||||
baseInternalFormat, /* baseInternalFormat */
|
||||
mformat, /* mesa_format */
|
||||
transfer->stride, /* dstRowStride, bytes */
|
||||
&dest, /* destSlices */
|
||||
width, height, 1, /* size */
|
||||
format, type, /* src format/type */
|
||||
pixels, /* data source */
|
||||
unpack);
|
||||
}
|
||||
|
||||
/* unmap */
|
||||
pipe_transfer_unmap(pipe, transfer);
|
||||
|
|
@ -667,7 +687,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
|
|||
/* user textures, plus the drawpix textures */
|
||||
if (fpv) {
|
||||
struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
|
||||
uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
|
||||
uint num = MAX3(fpv->drawpix_sampler + 1,
|
||||
fpv->pixelmap_sampler + 1,
|
||||
st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]);
|
||||
|
||||
memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT],
|
||||
|
|
@ -914,7 +935,7 @@ get_color_fp_variant(struct st_context *st)
|
|||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
key.drawpixels = 1;
|
||||
key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 ||
|
||||
ctx->Pixel.RedScale != 1.0 ||
|
||||
|
|
@ -956,6 +977,69 @@ clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Search the array of 4 swizzle components for the named component and return
|
||||
* its position.
|
||||
*/
|
||||
static unsigned
|
||||
search_swizzle(const unsigned char swizzle[4], unsigned component)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (swizzle[i] == component)
|
||||
return i;
|
||||
}
|
||||
assert(!"search_swizzle() failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the sampler view's swizzle terms. This is used to handle RGBA
|
||||
* swizzling when the incoming image format isn't an exact match for
|
||||
* the actual texture format. For example, if we have glDrawPixels(
|
||||
* GL_RGBA, GL_UNSIGNED_BYTE) and we chose the texture format
|
||||
* PIPE_FORMAT_B8G8R8A8 then we can do use the sampler view swizzle to
|
||||
* avoid swizzling all the pixels in software in the texstore code.
|
||||
*/
|
||||
static void
|
||||
setup_sampler_swizzle(struct pipe_sampler_view *sv, GLenum format, GLenum type)
|
||||
{
|
||||
if ((format == GL_RGBA || format == GL_BGRA) && type == GL_UNSIGNED_BYTE) {
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(sv->texture->format);
|
||||
unsigned c0, c1, c2, c3;
|
||||
|
||||
/* Every gallium driver supports at least one 32-bit packed RGBA format.
|
||||
* We must have chosen one for (GL_RGBA, GL_UNSIGNED_BYTE).
|
||||
*/
|
||||
assert(desc->block.bits == 32);
|
||||
|
||||
/* invert the format's swizzle to setup the sampler's swizzle */
|
||||
if (format == GL_RGBA) {
|
||||
c0 = UTIL_FORMAT_SWIZZLE_X;
|
||||
c1 = UTIL_FORMAT_SWIZZLE_Y;
|
||||
c2 = UTIL_FORMAT_SWIZZLE_Z;
|
||||
c3 = UTIL_FORMAT_SWIZZLE_W;
|
||||
}
|
||||
else {
|
||||
assert(format == GL_BGRA);
|
||||
c0 = UTIL_FORMAT_SWIZZLE_Z;
|
||||
c1 = UTIL_FORMAT_SWIZZLE_Y;
|
||||
c2 = UTIL_FORMAT_SWIZZLE_X;
|
||||
c3 = UTIL_FORMAT_SWIZZLE_W;
|
||||
}
|
||||
sv->swizzle_r = search_swizzle(desc->swizzle, c0);
|
||||
sv->swizzle_g = search_swizzle(desc->swizzle, c1);
|
||||
sv->swizzle_b = search_swizzle(desc->swizzle, c2);
|
||||
sv->swizzle_a = search_swizzle(desc->swizzle, c3);
|
||||
}
|
||||
else {
|
||||
/* use the default sampler swizzle */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called via ctx->Driver.DrawPixels()
|
||||
*/
|
||||
|
|
@ -974,6 +1058,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
|
|||
int num_sampler_view = 1;
|
||||
struct gl_pixelstore_attrib clippedUnpack;
|
||||
struct st_fp_variant *fpv = NULL;
|
||||
struct pipe_resource *pt;
|
||||
|
||||
/* Mesa state should be up to date by now */
|
||||
assert(ctx->NewState == 0x0);
|
||||
|
|
@ -1029,42 +1114,56 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
|
|||
st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
/* draw with textured quad */
|
||||
{
|
||||
struct pipe_resource *pt
|
||||
= make_texture(st, width, height, format, type, unpack, pixels);
|
||||
if (pt) {
|
||||
sv[0] = st_create_texture_sampler_view(st->pipe, pt);
|
||||
|
||||
if (sv[0]) {
|
||||
/* Create a second sampler view to read stencil.
|
||||
* The stencil is written using the shader stencil export
|
||||
* functionality. */
|
||||
if (write_stencil) {
|
||||
enum pipe_format stencil_format =
|
||||
util_format_stencil_only(pt->format);
|
||||
/* we should not be doing pixel map/transfer (see above) */
|
||||
assert(num_sampler_view == 1);
|
||||
sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
|
||||
stencil_format);
|
||||
num_sampler_view++;
|
||||
}
|
||||
|
||||
draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
|
||||
width, height,
|
||||
ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
|
||||
sv,
|
||||
num_sampler_view,
|
||||
driver_vp,
|
||||
driver_fp, fpv,
|
||||
color, GL_FALSE, write_depth, write_stencil);
|
||||
pipe_sampler_view_reference(&sv[0], NULL);
|
||||
if (num_sampler_view > 1)
|
||||
pipe_sampler_view_reference(&sv[1], NULL);
|
||||
}
|
||||
pipe_resource_reference(&pt, NULL);
|
||||
}
|
||||
/* Put glDrawPixels image into a texture */
|
||||
pt = make_texture(st, width, height, format, type, unpack, pixels);
|
||||
if (!pt) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
|
||||
return;
|
||||
}
|
||||
|
||||
/* create sampler view for the image */
|
||||
sv[0] = st_create_texture_sampler_view(st->pipe, pt);
|
||||
if (!sv[0]) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
|
||||
pipe_resource_reference(&pt, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set up the sampler view's swizzle */
|
||||
setup_sampler_swizzle(sv[0], format, type);
|
||||
|
||||
/* Create a second sampler view to read stencil. The stencil is
|
||||
* written using the shader stencil export functionality.
|
||||
*/
|
||||
if (write_stencil) {
|
||||
enum pipe_format stencil_format =
|
||||
util_format_stencil_only(pt->format);
|
||||
/* we should not be doing pixel map/transfer (see above) */
|
||||
assert(num_sampler_view == 1);
|
||||
sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
|
||||
stencil_format);
|
||||
if (!sv[1]) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
|
||||
pipe_resource_reference(&pt, NULL);
|
||||
pipe_sampler_view_reference(&sv[0], NULL);
|
||||
return;
|
||||
}
|
||||
num_sampler_view++;
|
||||
}
|
||||
|
||||
draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
|
||||
width, height,
|
||||
ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
|
||||
sv,
|
||||
num_sampler_view,
|
||||
driver_vp,
|
||||
driver_fp, fpv,
|
||||
color, GL_FALSE, write_depth, write_stencil);
|
||||
pipe_sampler_view_reference(&sv[0], NULL);
|
||||
if (num_sampler_view > 1)
|
||||
pipe_sampler_view_reference(&sv[1], NULL);
|
||||
|
||||
pipe_resource_reference(&pt, NULL);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -224,6 +224,7 @@ st_program_string_notify( struct gl_context *ctx,
|
|||
struct gl_program *prog )
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
gl_shader_stage stage = _mesa_program_enum_to_shader_stage(target);
|
||||
|
||||
if (target == GL_FRAGMENT_PROGRAM_ARB) {
|
||||
struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
|
||||
|
|
@ -278,10 +279,10 @@ st_program_string_notify( struct gl_context *ctx,
|
|||
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
|
||||
}
|
||||
|
||||
if (ST_DEBUG & DEBUG_PRECOMPILE)
|
||||
if (ST_DEBUG & DEBUG_PRECOMPILE ||
|
||||
st->shader_has_one_variant[stage])
|
||||
st_precompile_shader_variant(st, prog);
|
||||
|
||||
/* XXX check if program is legal, within limits */
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -235,9 +235,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
|
|||
PIPE_BIND_SAMPLER_VIEW);
|
||||
st->prefer_blit_based_texture_transfer = screen->get_param(screen,
|
||||
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
|
||||
st->can_force_persample_interp = screen->get_param(screen,
|
||||
PIPE_CAP_FORCE_PERSAMPLE_INTERP);
|
||||
|
||||
st->force_persample_in_shader =
|
||||
screen->get_param(screen, PIPE_CAP_SAMPLE_SHADING) &&
|
||||
!screen->get_param(screen, PIPE_CAP_FORCE_PERSAMPLE_INTERP);
|
||||
st->has_shareable_shaders = screen->get_param(screen,
|
||||
PIPE_CAP_SHAREABLE_SHADERS);
|
||||
st->needs_texcoord_semantic =
|
||||
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
|
||||
st->apply_texture_swizzle_to_border_color =
|
||||
|
|
@ -292,6 +294,20 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
|
|||
ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
|
||||
}
|
||||
|
||||
/* Set which shader types can be compiled at link time. */
|
||||
st->shader_has_one_variant[MESA_SHADER_VERTEX] =
|
||||
st->has_shareable_shaders &&
|
||||
!st->clamp_vert_color_in_shader;
|
||||
|
||||
st->shader_has_one_variant[MESA_SHADER_FRAGMENT] =
|
||||
st->has_shareable_shaders &&
|
||||
!st->clamp_frag_color_in_shader &&
|
||||
!st->force_persample_in_shader;
|
||||
|
||||
st->shader_has_one_variant[MESA_SHADER_TESS_CTRL] = st->has_shareable_shaders;
|
||||
st->shader_has_one_variant[MESA_SHADER_TESS_EVAL] = st->has_shareable_shaders;
|
||||
st->shader_has_one_variant[MESA_SHADER_GEOMETRY] = st->has_shareable_shaders;
|
||||
|
||||
_mesa_compute_version(ctx);
|
||||
|
||||
if (ctx->Version == 0) {
|
||||
|
|
|
|||
|
|
@ -98,7 +98,15 @@ struct st_context
|
|||
boolean has_etc1;
|
||||
boolean has_etc2;
|
||||
boolean prefer_blit_based_texture_transfer;
|
||||
boolean can_force_persample_interp;
|
||||
boolean force_persample_in_shader;
|
||||
boolean has_shareable_shaders;
|
||||
|
||||
/**
|
||||
* If a shader can be created when we get its source.
|
||||
* This means it has only 1 variant, not counting glBitmap and
|
||||
* glDrawPixels.
|
||||
*/
|
||||
boolean shader_has_one_variant[MESA_SHADER_STAGES];
|
||||
|
||||
boolean needs_texcoord_semantic;
|
||||
boolean apply_texture_swizzle_to_border_color;
|
||||
|
|
|
|||
|
|
@ -249,6 +249,9 @@ void st_init_limits(struct pipe_screen *screen,
|
|||
|
||||
if (options->EmitNoLoops)
|
||||
options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536);
|
||||
else
|
||||
options->MaxUnrollIterations = screen->get_shader_param(screen, sh,
|
||||
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT);
|
||||
|
||||
options->LowerClipDistance = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -395,6 +395,10 @@ st_translate_vertex_program(struct st_context *st,
|
|||
if (ureg == NULL)
|
||||
return false;
|
||||
|
||||
if (stvp->Base.Base.ClipDistanceArraySize)
|
||||
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
|
||||
stvp->Base.Base.ClipDistanceArraySize);
|
||||
|
||||
if (ST_DEBUG & DEBUG_MESA) {
|
||||
_mesa_print_program(&stvp->Base.Base);
|
||||
_mesa_print_program_parameters(st->ctx, &stvp->Base.Base);
|
||||
|
|
@ -1049,6 +1053,10 @@ st_translate_program_common(struct st_context *st,
|
|||
memset(outputMapping, 0, sizeof(outputMapping));
|
||||
memset(out_state, 0, sizeof(*out_state));
|
||||
|
||||
if (prog->ClipDistanceArraySize)
|
||||
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
|
||||
prog->ClipDistanceArraySize);
|
||||
|
||||
/*
|
||||
* Convert Mesa program inputs to TGSI input register semantics.
|
||||
*/
|
||||
|
|
@ -1728,6 +1736,12 @@ destroy_program_variants_cb(GLuint key, void *data, void *userData)
|
|||
void
|
||||
st_destroy_program_variants(struct st_context *st)
|
||||
{
|
||||
/* If shaders can be shared with other contexts, the last context will
|
||||
* call DeleteProgram on all shaders, releasing everything.
|
||||
*/
|
||||
if (st->has_shareable_shaders)
|
||||
return;
|
||||
|
||||
/* ARB vert/frag program */
|
||||
_mesa_HashWalk(st->ctx->Shared->Programs,
|
||||
destroy_program_variants_cb, st);
|
||||
|
|
@ -1774,7 +1788,7 @@ st_precompile_shader_variant(struct st_context *st,
|
|||
struct st_vp_variant_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
st_get_vp_variant(st, p, &key);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1784,7 +1798,7 @@ st_precompile_shader_variant(struct st_context *st,
|
|||
struct st_tcp_variant_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
st_get_tcp_variant(st, p, &key);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1794,7 +1808,7 @@ st_precompile_shader_variant(struct st_context *st,
|
|||
struct st_tep_variant_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
st_get_tep_variant(st, p, &key);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1804,7 +1818,7 @@ st_precompile_shader_variant(struct st_context *st,
|
|||
struct st_gp_variant_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
st_get_gp_variant(st, p, &key);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1814,7 +1828,7 @@ st_precompile_shader_variant(struct st_context *st,
|
|||
struct st_fp_variant_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.st = st;
|
||||
key.st = st->has_shareable_shaders ? NULL : st;
|
||||
st_get_fp_variant(st, p, &key);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,19 +124,19 @@ static void TAG(render_line_loop)( struct gl_context *ctx,
|
|||
GLuint i;
|
||||
LOCAL_VARS;
|
||||
|
||||
(void) flags;
|
||||
|
||||
INIT(GL_LINE_LOOP);
|
||||
|
||||
if (start+1 < count) {
|
||||
if (TEST_PRIM_BEGIN(flags)) {
|
||||
RESET_STIPPLE;
|
||||
/* draw the first line from v[0] to v[1] */
|
||||
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
|
||||
RENDER_LINE( ELT(start), ELT(start+1) );
|
||||
else
|
||||
RENDER_LINE( ELT(start+1), ELT(start) );
|
||||
}
|
||||
|
||||
/* draw lines from v[1] to v[n-1] */
|
||||
for ( i = start+2 ; i < count ; i++) {
|
||||
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
|
||||
RENDER_LINE( ELT(i-1), ELT(i) );
|
||||
|
|
@ -145,6 +145,7 @@ static void TAG(render_line_loop)( struct gl_context *ctx,
|
|||
}
|
||||
|
||||
if ( TEST_PRIM_END(flags)) {
|
||||
/* draw final line from v[n-1] to v[0] (the very first vertex) */
|
||||
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
|
||||
RENDER_LINE( ELT(count-1), ELT(start) );
|
||||
else
|
||||
|
|
|
|||
|
|
@ -196,6 +196,26 @@ vbo_get_default_vals_as_union(GLenum format)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the max number of vertices which can be stored in
|
||||
* a vertex buffer, given the current vertex size, and the amount
|
||||
* of space already used.
|
||||
*/
|
||||
static inline unsigned
|
||||
vbo_compute_max_verts(const struct vbo_exec_context *exec)
|
||||
{
|
||||
unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
|
||||
(exec->vtx.vertex_size * sizeof(GLfloat));
|
||||
assert(n > 0);
|
||||
/* Subtract one so we're always sure to have room for an extra
|
||||
* vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion.
|
||||
*/
|
||||
n--;
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -160,8 +160,6 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap );
|
|||
void vbo_exec_vtx_map( struct vbo_exec_context *exec );
|
||||
|
||||
|
||||
void vbo_exec_vtx_wrap( struct vbo_exec_context *exec );
|
||||
|
||||
void vbo_exec_eval_update( struct vbo_exec_context *exec );
|
||||
|
||||
void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec,
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ static void reset_attrfv( struct vbo_exec_context *exec );
|
|||
|
||||
/**
|
||||
* Close off the last primitive, execute the buffer, restart the
|
||||
* primitive.
|
||||
* primitive. This is called when we fill a vertex buffer before
|
||||
* hitting glEnd.
|
||||
*/
|
||||
static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
|
||||
{
|
||||
|
|
@ -71,17 +72,31 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
|
|||
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
|
||||
}
|
||||
else {
|
||||
GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin;
|
||||
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
|
||||
const GLuint last_begin = last_prim->begin;
|
||||
GLuint last_count;
|
||||
|
||||
if (_mesa_inside_begin_end(exec->ctx)) {
|
||||
GLint i = exec->vtx.prim_count - 1;
|
||||
assert(i >= 0);
|
||||
exec->vtx.prim[i].count = (exec->vtx.vert_count -
|
||||
exec->vtx.prim[i].start);
|
||||
last_prim->count = exec->vtx.vert_count - last_prim->start;
|
||||
}
|
||||
|
||||
last_count = exec->vtx.prim[exec->vtx.prim_count-1].count;
|
||||
last_count = last_prim->count;
|
||||
|
||||
/* Special handling for wrapping GL_LINE_LOOP */
|
||||
if (last_prim->mode == GL_LINE_LOOP &&
|
||||
last_count > 0 &&
|
||||
!last_prim->end) {
|
||||
/* draw this section of the incomplete line loop as a line strip */
|
||||
last_prim->mode = GL_LINE_STRIP;
|
||||
if (!last_prim->begin) {
|
||||
/* This is not the first section of the line loop, so don't
|
||||
* draw the 0th vertex. We're saving it until we draw the
|
||||
* very last section of the loop.
|
||||
*/
|
||||
last_prim->start++;
|
||||
last_prim->count--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Execute the buffer and save copied vertices.
|
||||
*/
|
||||
|
|
@ -98,6 +113,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
|
|||
|
||||
if (_mesa_inside_begin_end(exec->ctx)) {
|
||||
exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
|
||||
exec->vtx.prim[0].begin = 0;
|
||||
exec->vtx.prim[0].start = 0;
|
||||
exec->vtx.prim[0].count = 0;
|
||||
exec->vtx.prim_count++;
|
||||
|
|
@ -113,7 +129,8 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
|
|||
* Deal with buffer wrapping where provoked by the vertex buffer
|
||||
* filling up, as opposed to upgrade_vertex().
|
||||
*/
|
||||
void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )
|
||||
static void
|
||||
vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
|
||||
{
|
||||
fi_type *data = exec->vtx.copied.buffer;
|
||||
GLuint i;
|
||||
|
|
@ -292,8 +309,7 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
|
|||
*/
|
||||
exec->vtx.attrsz[attr] = newSize;
|
||||
exec->vtx.vertex_size += newSize - oldSize;
|
||||
exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
|
||||
(exec->vtx.vertex_size * sizeof(GLfloat)));
|
||||
exec->vtx.max_vert = vbo_compute_max_verts(exec);
|
||||
exec->vtx.vert_count = 0;
|
||||
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
|
||||
|
||||
|
|
@ -446,10 +462,6 @@ do { \
|
|||
\
|
||||
assert(sz == 1 || sz == 2); \
|
||||
\
|
||||
if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \
|
||||
vbo_exec_begin_vertices(ctx); \
|
||||
} \
|
||||
\
|
||||
/* check if attribute size or type is changing */ \
|
||||
if (unlikely(exec->vtx.active_sz[A] != N * sz) || \
|
||||
unlikely(exec->vtx.attrtype[A] != T)) { \
|
||||
|
|
@ -470,6 +482,15 @@ do { \
|
|||
/* This is a glVertex call */ \
|
||||
GLuint i; \
|
||||
\
|
||||
if (unlikely((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0)) { \
|
||||
vbo_exec_begin_vertices(ctx); \
|
||||
} \
|
||||
\
|
||||
if (unlikely(!exec->vtx.buffer_ptr)) { \
|
||||
vbo_exec_vtx_map(exec); \
|
||||
} \
|
||||
assert(exec->vtx.buffer_ptr); \
|
||||
\
|
||||
/* copy 32-bit words */ \
|
||||
for (i = 0; i < exec->vtx.vertex_size; i++) \
|
||||
exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \
|
||||
|
|
@ -482,7 +503,10 @@ do { \
|
|||
\
|
||||
if (++exec->vtx.vert_count >= exec->vtx.max_vert) \
|
||||
vbo_exec_vtx_wrap( exec ); \
|
||||
} \
|
||||
} else { \
|
||||
/* we now have accumulated per-vertex attributes */ \
|
||||
ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ERROR(err) _mesa_error( ctx, err, __func__ )
|
||||
|
|
@ -814,11 +838,28 @@ static void GLAPIENTRY vbo_exec_End( void )
|
|||
|
||||
if (exec->vtx.prim_count > 0) {
|
||||
/* close off current primitive */
|
||||
int idx = exec->vtx.vert_count;
|
||||
int i = exec->vtx.prim_count - 1;
|
||||
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
|
||||
|
||||
exec->vtx.prim[i].end = 1;
|
||||
exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start;
|
||||
last_prim->end = 1;
|
||||
last_prim->count = exec->vtx.vert_count - last_prim->start;
|
||||
|
||||
/* Special handling for GL_LINE_LOOP */
|
||||
if (last_prim->mode == GL_LINE_LOOP && last_prim->begin == 0) {
|
||||
/* We're finishing drawing a line loop. Append 0th vertex onto
|
||||
* end of vertex buffer so we can draw it as a line strip.
|
||||
*/
|
||||
const fi_type *src = exec->vtx.buffer_map;
|
||||
fi_type *dst = exec->vtx.buffer_map +
|
||||
exec->vtx.vert_count * exec->vtx.vertex_size;
|
||||
|
||||
/* copy 0th vertex to end of buffer */
|
||||
memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type));
|
||||
|
||||
assert(last_prim->start == 0);
|
||||
last_prim->start++; /* skip vertex0 */
|
||||
/* note that last_prim->count stays unchanged */
|
||||
last_prim->mode = GL_LINE_STRIP;
|
||||
}
|
||||
|
||||
try_vbo_merge(exec);
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue