diff --git a/configure.ac b/configure.ac
index f236dad6441..0c88db9f66f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -108,6 +108,8 @@ AC_SYS_LARGEFILE
LT_PREREQ([2.2])
LT_INIT([disable-static])
+AC_CHECK_PROG(RM, rm, [rm -f])
+
AX_PROG_BISON([],
AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
[AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 6503e2ab1da..167321676df 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
- GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe, softpipe)
+ GL_ARB_texture_view DONE (i965, nv50, nvc0, radeonsi, llvmpipe, softpipe)
GL_ARB_vertex_attrib_binding DONE (all drivers)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index dcf425e4c68..d3dbe9dda13 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers.
GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi
GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
GL_ARB_texture_query_lod on softpipe
+GL_ARB_texture_view on radeonsi
EGL_KHR_create_context on softpipe, llvmpipe
EGL_KHR_gl_colorspace on softpipe, llvmpipe
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 0ad94bb031f..5584c4a222c 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -355,8 +355,9 @@ struct draw_vertex_info {
};
/* these flags are set if the primitive is a segment of a larger one */
-#define DRAW_SPLIT_BEFORE 0x1
-#define DRAW_SPLIT_AFTER 0x2
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER 0x2
+#define DRAW_LINE_LOOP_AS_STRIP 0x4
struct draw_prim_info {
boolean linear;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index ffec863ae6f..aa20b918f50 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -359,6 +359,16 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
}
+static inline unsigned
+prim_type(unsigned prim, unsigned flags)
+{
+ if (flags & DRAW_LINE_LOOP_AS_STRIP)
+ return PIPE_PRIM_LINE_STRIP;
+ else
+ return prim;
+}
+
+
static void
fetch_pipeline_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
@@ -380,7 +390,7 @@ fetch_pipeline_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
- prim_info.prim = fpme->input_prim;
+ prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -408,7 +418,7 @@ fetch_pipeline_linear_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = count;
prim_info.elts = NULL;
- prim_info.prim = fpme->input_prim;
+ prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -439,7 +449,7 @@ fetch_pipeline_linear_run_elts(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
- prim_info.prim = fpme->input_prim;
+ prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index e42c4af0e70..2d7569b0fdf 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -473,6 +473,16 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
}
+static inline unsigned
+prim_type(unsigned prim, unsigned flags)
+{
+ if (flags & DRAW_LINE_LOOP_AS_STRIP)
+ return PIPE_PRIM_LINE_STRIP;
+ else
+ return prim;
+}
+
+
static void
llvm_middle_end_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
@@ -494,7 +504,7 @@ llvm_middle_end_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
- prim_info.prim = fpme->input_prim;
+ prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -522,7 +532,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = count;
prim_info.elts = NULL;
- prim_info.prim = fpme->input_prim;
+ prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -552,7 +562,7 @@ llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
prim_info.start = 0;
prim_info.count = draw_count;
prim_info.elts = draw_elts;
- prim_info.prim = fpme->input_prim;
+ prim_info.prim = prim_type(fpme->input_prim, prim_flags);
prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index 0afabb01398..6da79b9490b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -249,6 +249,9 @@ vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
assert(icount + !!close_loop <= vsplit->segment_size);
+ /* need to draw the sections of the line loop as line strips */
+ flags |= DRAW_LINE_LOOP_AS_STRIP;
+
if (close_loop) {
for (nr = 0; nr < icount; nr++)
vsplit->fetch_elts[nr] = istart + nr;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index 571c615f9f8..ad64ae058b6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
}
/* if we get here, we missed a shader cap above (and should have seen
* a compiler warning.)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index a371aa95e70..f86adcec506 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -474,6 +474,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
}
/* if we get here, we missed a shader cap above (and should have seen
* a compiler warning.)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index b84a1753eeb..4645ef26cab 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -369,19 +369,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
procType == TGSI_PROCESSOR_GEOMETRY ||
procType == TGSI_PROCESSOR_TESS_CTRL ||
procType == TGSI_PROCESSOR_TESS_EVAL) {
- if (semName == TGSI_SEMANTIC_CLIPDIST) {
- info->num_written_clipdistance +=
- util_bitcount(fulldecl->Declaration.UsageMask);
- info->clipdist_writemask |=
- fulldecl->Declaration.UsageMask << (semIndex*4);
- }
- else if (semName == TGSI_SEMANTIC_CULLDIST) {
- info->num_written_culldistance +=
- util_bitcount(fulldecl->Declaration.UsageMask);
- info->culldist_writemask |=
- fulldecl->Declaration.UsageMask << (semIndex*4);
- }
- else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
+ if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) {
info->writes_viewport_index = TRUE;
}
else if (semName == TGSI_SEMANTIC_LAYER) {
@@ -432,9 +420,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
const struct tgsi_full_property *fullprop
= &parse.FullToken.FullProperty;
unsigned name = fullprop->Property.PropertyName;
+ unsigned value = fullprop->u[0].Data;
assert(name < Elements(info->properties));
- info->properties[name] = fullprop->u[0].Data;
+ info->properties[name] = value;
+
+ switch (name) {
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ info->num_written_clipdistance = value;
+ info->clipdist_writemask |= (1 << value) - 1;
+ break;
+ case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
+ info->num_written_culldistance = value;
+ info->culldist_writemask |= (1 << value) - 1;
+ break;
+ }
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 8271ea08177..89369d60f4e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -137,6 +137,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"TES_SPACING",
"TES_VERTEX_ORDER_CW",
"TES_POINT_MODE",
+ "NUM_CLIPDIST_ENABLED",
+ "NUM_CULLDIST_ENABLED",
};
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index e08844b2f0b..151afb2dffe 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -276,6 +276,8 @@ The integer capabilities:
GL4 hardware will likely need to emulate it with a shader variant, or by
selecting the interpolation weights with a conditional assignment
in the shader.
+* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any
+ pipe_context.
@@ -365,6 +367,10 @@ to be 0.
are supported.
* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
ignore tgsi_declaration_range::Last for shader inputs and outputs.
+* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number
+ of iterations that loops are allowed to have to be unrolled. It is only
+ a hint to state trackers. Whether any loops will be unrolled is not
+ guaranteed.
.. _pipe_compute_cap:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 314fe1bb74f..01e18f3084e 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -3126,6 +3126,16 @@ TES_POINT_MODE
If set to a non-zero value, this turns on point mode for the tessellator,
which means that points will be generated instead of primitives.
+NUM_CLIPDIST_ENABLED
+""""""""""""""""
+
+How many clip distance scalar outputs are enabled.
+
+NUM_CULLDIST_ENABLED
+""""""""""""""""
+
+How many cull distance scalar outputs are enabled.
+
Texture Sampling and Texture Formats
------------------------------------
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index b64f78ca32b..50d140fe903 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -237,6 +237,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -411,6 +412,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
}
debug_printf("unknown shader param %d\n", param);
return 0;
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 9d6b3d39183..5812af626cb 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
return 0;
@@ -249,6 +251,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 76812a666a0..e1a7dc56685 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
return 0;
@@ -471,6 +473,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 50c3781f5f8..e2ed267da78 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -298,6 +298,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 335c163b661..03301649e38 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -171,6 +171,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -263,6 +264,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
debug_printf("unknown vertex shader param %d\n", param);
return 0;
@@ -304,6 +307,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
debug_printf("unknown fragment shader param %d\n", param);
return 0;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 812b246ea0e..ec51d00f266 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -216,6 +216,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -299,6 +300,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f34ad0ed5d1..af8e5f72670 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -202,6 +202,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -312,6 +313,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16; /* XXX not sure if more are really safe */
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 1165ac8a9c0..a576abdfaf2 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -197,6 +197,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
/* SWTCL-only features. */
@@ -302,6 +303,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
}
@@ -358,6 +361,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 32ce76a9e07..9a97de9965e 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -343,6 +343,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
/* Stream output. */
@@ -510,6 +511,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ /* due to a bug in the shader compiler, some loops hang
+ * if they are not unrolled, see:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=86720
+ */
+ return 255;
}
return 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index d5c5db30029..082ea850675 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -55,11 +55,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
- util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
- util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
- util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
- util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
- util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
+ util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
+ util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
+ util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
+ util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
+ util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 7d41e8d00e0..53062187b88 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -31,15 +31,15 @@
#include "ddebug/dd_util.h"
-static void si_dump_shader(struct si_shader_selector *sel, const char *name,
+static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
FILE *f)
{
- if (!sel || !sel->current)
+ if (!state->cso || !state->current)
return;
fprintf(f, "%s shader disassembly:\n", name);
- si_dump_shader_key(sel->type, &sel->current->key, f);
- fprintf(f, "%s\n\n", sel->current->binary.disasm_string);
+ si_dump_shader_key(state->cso->type, &state->current->key, f);
+ fprintf(f, "%s\n\n", state->current->binary.disasm_string);
}
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
@@ -536,11 +536,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
if (flags & PIPE_DEBUG_DEVICE_IS_HUNG)
si_dump_debug_registers(sctx, f);
- si_dump_shader(sctx->vs_shader, "Vertex", f);
- si_dump_shader(sctx->tcs_shader, "Tessellation control", f);
- si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f);
- si_dump_shader(sctx->gs_shader, "Geometry", f);
- si_dump_shader(sctx->ps_shader, "Fragment", f);
+ si_dump_shader(&sctx->vs_shader, "Vertex", f);
+ si_dump_shader(&sctx->tcs_shader, "Tessellation control", f);
+ si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f);
+ si_dump_shader(&sctx->gs_shader, "Geometry", f);
+ si_dump_shader(&sctx->ps_shader, "Fragment", f);
si_dump_last_bo_list(sctx, f);
si_dump_last_ib(sctx, f);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 19dd14f9b6f..13738da5e2c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -915,10 +915,10 @@ static void si_set_user_data_base(struct si_context *sctx,
void si_shader_change_notify(struct si_context *sctx)
{
/* VS can be bound as VS, ES, or LS. */
- if (sctx->tes_shader)
+ if (sctx->tes_shader.cso)
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B530_SPI_SHADER_USER_DATA_LS_0);
- else if (sctx->gs_shader)
+ else if (sctx->gs_shader.cso)
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
else
@@ -926,8 +926,8 @@ void si_shader_change_notify(struct si_context *sctx)
R_00B130_SPI_SHADER_USER_DATA_VS_0);
/* TES can be bound as ES, VS, or not bound. */
- if (sctx->tes_shader) {
- if (sctx->gs_shader)
+ if (sctx->tes_shader.cso) {
+ if (sctx->gs_shader.cso)
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
else
@@ -964,7 +964,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
unsigned i;
uint32_t *sh_base = sctx->shader_userdata.sh_base;
- if (sctx->gs_shader) {
+ if (sctx->gs_shader.cso) {
/* The VS copy shader needs these for clipping, streamout, and rings. */
unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
unsigned i = PIPE_SHADER_VERTEX;
@@ -975,7 +975,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
/* The TESSEVAL shader needs this for the ESGS ring buffer. */
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
- } else if (sctx->tes_shader) {
+ } else if (sctx->tes_shader.cso) {
/* The TESSEVAL shader needs this for streamout. */
si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 53c80dba602..5f910c95ef3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -57,8 +57,8 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
if (sctx->dummy_pixel_shader)
sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
- if (sctx->fixed_func_tcs_shader)
- sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
+ if (sctx->fixed_func_tcs_shader.cso)
+ sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
if (sctx->custom_dsa_flush)
sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
if (sctx->custom_blend_resolve)
@@ -293,7 +293,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TGSI_TXQS:
@@ -335,7 +337,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
@@ -507,6 +508,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
}
return 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2abd5b5a0c3..d7a2282952a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -152,6 +152,15 @@ struct si_viewports {
struct pipe_viewport_state states[SI_MAX_VIEWPORTS];
};
+/* A shader state consists of the shader selector, which is a constant state
+ * object shared by multiple contexts and shouldn't be modified, and
+ * the current shader variant selected for this context.
+ */
+struct si_shader_ctx_state {
+ struct si_shader_selector *cso;
+ struct si_shader *current;
+};
+
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
@@ -162,7 +171,7 @@ struct si_context {
void *pstipple_sampler_state;
struct si_screen *screen;
struct pipe_fence_handle *last_gfx_fence;
- struct si_shader_selector *fixed_func_tcs_shader;
+ struct si_shader_ctx_state fixed_func_tcs_shader;
LLVMTargetMachineRef tm;
/* Atoms (direct states). */
@@ -199,11 +208,11 @@ struct si_context {
void *dummy_pixel_shader;
/* shaders */
- struct si_shader_selector *ps_shader;
- struct si_shader_selector *gs_shader;
- struct si_shader_selector *vs_shader;
- struct si_shader_selector *tcs_shader;
- struct si_shader_selector *tes_shader;
+ struct si_shader_ctx_state ps_shader;
+ struct si_shader_ctx_state gs_shader;
+ struct si_shader_ctx_state vs_shader;
+ struct si_shader_ctx_state tcs_shader;
+ struct si_shader_ctx_state tes_shader;
struct si_cs_shader_state cs_shader_state;
/* shader information */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 54dad726d01..fd5500c1ab3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -179,15 +179,18 @@ struct radeon_shader_reloc;
struct si_shader;
+/* A shader selector is a gallium CSO and contains shader variants and
+ * binaries for one TGSI program. This can be shared by multiple contexts.
+ */
struct si_shader_selector {
- struct si_shader *current;
+ pipe_mutex mutex;
+ struct si_shader *first_variant; /* immutable after the first variant */
+ struct si_shader *last_variant; /* mutable */
struct tgsi_token *tokens;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
- unsigned num_shaders;
-
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
@@ -241,7 +244,7 @@ union si_shader_key {
uint64_t es_enabled_outputs;
unsigned as_es:1; /* export shader */
unsigned as_ls:1; /* local shader */
- unsigned export_prim_id; /* when PS needs it and GS is disabled */
+ unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} vs;
struct {
unsigned prim_mode:3;
@@ -252,7 +255,7 @@ union si_shader_key {
* This describes how outputs are laid out in memory. */
uint64_t es_enabled_outputs;
unsigned as_es:1; /* export shader */
- unsigned export_prim_id; /* when PS needs it and GS is disabled */
+ unsigned export_prim_id:1; /* when PS needs it and GS is disabled */
} tes; /* tessellation evaluation shader */
};
@@ -293,24 +296,24 @@ struct si_shader {
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
{
- if (sctx->gs_shader)
- return &sctx->gs_shader->info;
- else if (sctx->tes_shader)
- return &sctx->tes_shader->info;
- else if (sctx->vs_shader)
- return &sctx->vs_shader->info;
+ if (sctx->gs_shader.cso)
+ return &sctx->gs_shader.cso->info;
+ else if (sctx->tes_shader.cso)
+ return &sctx->tes_shader.cso->info;
+ else if (sctx->vs_shader.cso)
+ return &sctx->vs_shader.cso->info;
else
return NULL;
}
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
{
- if (sctx->gs_shader)
- return sctx->gs_shader->current->gs_copy_shader;
- else if (sctx->tes_shader)
- return sctx->tes_shader->current;
+ if (sctx->gs_shader.current)
+ return sctx->gs_shader.current->gs_copy_shader;
+ else if (sctx->tes_shader.current)
+ return sctx->tes_shader.current;
else
- return sctx->vs_shader->current;
+ return sctx->vs_shader.current;
}
static inline bool si_vs_exports_prim_id(struct si_shader *shader)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index e6475364f98..243bdc6e6d7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -266,7 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
* Reproducible with Unigine Heaven 4.0 and drirc missing.
*/
if (blend->dual_src_blend &&
- (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+ (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
mask = 0;
radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
@@ -1535,9 +1535,14 @@ static unsigned si_tex_compare(unsigned compare)
}
}
-static unsigned si_tex_dim(unsigned dim, unsigned nr_samples)
+static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
+ unsigned nr_samples)
{
- switch (dim) {
+ if (view_target == PIPE_TEXTURE_CUBE ||
+ view_target == PIPE_TEXTURE_CUBE_ARRAY)
+ res_target = view_target;
+
+ switch (res_target) {
default:
case PIPE_TEXTURE_1D:
return V_008F1C_SQ_RSRC_IMG_1D;
@@ -2391,6 +2396,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
struct radeon_surf_level *surflevel;
int first_non_void;
uint64_t va;
+ unsigned last_layer = state->u.tex.last_layer;
if (view == NULL)
return NULL;
@@ -2596,6 +2602,13 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
depth = texture->array_size / 6;
+ /* This is not needed if state trackers set last_layer correctly. */
+ if (state->target == PIPE_TEXTURE_1D ||
+ state->target == PIPE_TEXTURE_2D ||
+ state->target == PIPE_TEXTURE_RECT ||
+ state->target == PIPE_TEXTURE_CUBE)
+ last_layer = state->u.tex.first_layer;
+
va = tmp->resource.gpu_address + surflevel[base_level].offset;
view->state[0] = va >> 8;
@@ -2615,10 +2628,11 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
last_level) |
S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
S_008F1C_POW2_PAD(texture->last_level > 0) |
- S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
+ S_008F1C_TYPE(si_tex_dim(texture->target, state->target,
+ texture->nr_samples)));
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
- S_008F24_LAST_ARRAY(state->u.tex.last_layer));
+ S_008F24_LAST_ARRAY(last_layer));
view->state[6] = 0;
view->state[7] = 0;
@@ -2653,11 +2667,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) |
- S_008F1C_TYPE(si_tex_dim(texture->target, 0));
+ S_008F1C_TYPE(si_tex_dim(texture->target,
+ state->target, 0));
view->fmask_state[4] = S_008F20_DEPTH(depth - 1) |
S_008F20_PITCH(tmp->fmask.pitch - 1);
view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
- S_008F24_LAST_ARRAY(state->u.tex.last_layer);
+ S_008F24_LAST_ARRAY(last_layer);
view->fmask_state[6] = 0;
view->fmask_state[7] = 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 5face423941..ce6c98c3124 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -109,11 +109,11 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
unsigned *num_patches)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- struct si_shader_selector *ls = sctx->vs_shader;
+ struct si_shader_ctx_state *ls = &sctx->vs_shader;
/* The TES pointer will only be used for sctx->last_tcs.
* It would be wrong to think that TCS = TES. */
struct si_shader_selector *tcs =
- sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
+ sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
unsigned num_tcs_input_cp = info->vertices_per_patch;
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
@@ -138,9 +138,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
- num_tcs_inputs = util_last_bit64(ls->outputs_written);
+ num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
- if (sctx->tcs_shader) {
+ if (sctx->tcs_shader.cso) {
num_tcs_outputs = util_last_bit64(tcs->outputs_written);
num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
@@ -159,7 +159,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
- output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
+ output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0;
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
lds_size = output_patch0_offset + output_patch_size * *num_patches;
@@ -231,13 +231,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
bool partial_vs_wave = false;
bool partial_es_wave = false;
- if (sctx->gs_shader)
+ if (sctx->gs_shader.cso)
primgroup_size = 64; /* recommended with a GS */
- if (sctx->tes_shader) {
+ if (sctx->tes_shader.cso) {
unsigned num_cp_out =
- sctx->tcs_shader ?
- sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ sctx->tcs_shader.cso ?
+ sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
info->vertices_per_patch;
unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
@@ -248,8 +248,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* SWITCH_ON_EOI must be set if PrimID is used.
* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
- if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
- sctx->tes_shader->info.uses_primid) {
+ if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) ||
+ sctx->tes_shader.cso->info.uses_primid) {
ia_switch_on_eoi = true;
partial_es_wave = true;
}
@@ -258,7 +258,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
if ((sctx->b.family == CHIP_TAHITI ||
sctx->b.family == CHIP_PITCAIRN ||
sctx->b.family == CHIP_BONAIRE) &&
- sctx->gs_shader)
+ sctx->gs_shader.cso)
partial_vs_wave = true;
}
@@ -328,11 +328,11 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
{
unsigned num_output_cp;
- if (!sctx->tes_shader)
+ if (!sctx->tes_shader.cso)
return 0;
- num_output_cp = sctx->tcs_shader ?
- sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ num_output_cp = sctx->tcs_shader.cso ?
+ sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
info->vertices_per_patch;
return S_028B58_NUM_PATCHES(num_patches) |
@@ -395,7 +395,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
- if (sctx->tes_shader)
+ if (sctx->tes_shader.cso)
si_emit_derived_tess_state(sctx, info, &num_patches);
ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
@@ -735,11 +735,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
- if (!sctx->ps_shader || !sctx->vs_shader) {
+ if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) {
assert(0);
return;
}
- if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
+ if (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)) {
assert(0);
return;
}
@@ -751,11 +751,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
* This must be done after si_decompress_textures, which can call
* draw_vbo recursively, and before si_update_shaders, which uses
* current_rast_prim for this draw_vbo call. */
- if (sctx->gs_shader)
- sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
- else if (sctx->tes_shader)
+ if (sctx->gs_shader.cso)
+ sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim;
+ else if (sctx->tes_shader.cso)
sctx->current_rast_prim =
- sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
else
sctx->current_rast_prim = info->mode;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c98509bb0b9..eea00e0fafc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -404,6 +404,7 @@ static void si_shader_ps(struct si_shader *shader)
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = 0;
uint64_t va;
+ bool has_centroid;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -435,8 +436,11 @@ static void si_shader_ps(struct si_shader *shader)
}
}
+ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) ||
+ G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena);
+
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
- S_0286D8_BC_OPTIMIZE_DISABLE(1);
+ S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
@@ -523,26 +527,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->vs.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
- if (sctx->tes_shader)
+ if (sctx->tes_shader.cso)
key->vs.as_ls = 1;
- else if (sctx->gs_shader) {
+ else if (sctx->gs_shader.cso) {
key->vs.as_es = 1;
- key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
+ key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
}
- if (!sctx->gs_shader && sctx->ps_shader &&
- sctx->ps_shader->info.uses_primid)
+ if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
+ sctx->ps_shader.cso->info.uses_primid)
key->vs.export_prim_id = 1;
break;
case PIPE_SHADER_TESS_CTRL:
key->tcs.prim_mode =
- sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
break;
case PIPE_SHADER_TESS_EVAL:
- if (sctx->gs_shader) {
+ if (sctx->gs_shader.cso) {
key->tes.as_es = 1;
- key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
- } else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
+ key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
+ } else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
@@ -589,11 +593,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
/* Select the hw shader variant depending on the current state. */
static int si_shader_select(struct pipe_context *ctx,
- struct si_shader_selector *sel)
+ struct si_shader_ctx_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = state->cso;
+ struct si_shader *current = state->current;
union si_shader_key key;
- struct si_shader * shader = NULL;
+ struct si_shader *iter, *shader = NULL;
int r;
si_shader_selector_key(ctx, sel, &key);
@@ -602,49 +608,51 @@ static int si_shader_select(struct pipe_context *ctx,
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
- if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) {
+ if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0))
return 0;
- }
- /* lookup if we have other variants in the list */
- if (sel->num_shaders > 1) {
- struct si_shader *p = sel->current, *c = p->next_variant;
+ pipe_mutex_lock(sel->mutex);
- while (c && memcmp(&c->key, &key, sizeof(key)) != 0) {
- p = c;
- c = c->next_variant;
- }
-
- if (c) {
- p->next_variant = c->next_variant;
- shader = c;
+ /* Find the shader variant. */
+ for (iter = sel->first_variant; iter; iter = iter->next_variant) {
+ /* Don't check the "current" shader. We checked it above. */
+ if (current != iter &&
+ memcmp(&iter->key, &key, sizeof(key)) == 0) {
+ state->current = iter;
+ pipe_mutex_unlock(sel->mutex);
+ return 0;
}
}
- if (shader) {
- shader->next_variant = sel->current;
- sel->current = shader;
+ /* Build a new shader. */
+ shader = CALLOC_STRUCT(si_shader);
+ if (!shader) {
+ pipe_mutex_unlock(sel->mutex);
+ return -ENOMEM;
+ }
+ shader->selector = sel;
+ shader->key = key;
+
+ r = si_shader_create(sctx->screen, sctx->tm, shader);
+ if (unlikely(r)) {
+ R600_ERR("Failed to build shader variant (type=%u) %d\n",
+ sel->type, r);
+ FREE(shader);
+ pipe_mutex_unlock(sel->mutex);
+ return r;
+ }
+ si_shader_init_pm4_state(shader);
+
+ if (!sel->last_variant) {
+ sel->first_variant = shader;
+ sel->last_variant = shader;
} else {
- shader = CALLOC(1, sizeof(struct si_shader));
- shader->selector = sel;
- shader->key = key;
-
- shader->next_variant = sel->current;
- sel->current = shader;
- r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
- shader);
- if (unlikely(r)) {
- R600_ERR("Failed to build shader variant (type=%u) %d\n",
- sel->type, r);
- sel->current = NULL;
- FREE(shader);
- return r;
- }
- si_shader_init_pm4_state(shader);
- sel->num_shaders++;
- p_atomic_inc(&sctx->screen->b.num_compilations);
+ sel->last_variant->next_variant = shader;
+ sel->last_variant = shader;
}
-
+ state->current = shader;
+ p_atomic_inc(&sctx->screen->b.num_compilations);
+ pipe_mutex_unlock(sel->mutex);
return 0;
}
@@ -752,14 +760,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
break;
}
- if (sscreen->b.debug_flags & DBG_PRECOMPILE)
- if (si_shader_select(ctx, sel)) {
+ if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
+ struct si_shader_ctx_state state = {sel};
+
+ if (si_shader_select(ctx, &state)) {
fprintf(stderr, "radeonsi: can't create a shader\n");
tgsi_free_tokens(sel->tokens);
FREE(sel);
return NULL;
}
+ }
+ pipe_mutex_init(sel->mutex);
return sel;
}
@@ -787,10 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- if (sctx->vs_shader == sel || !sel)
+ if (sctx->vs_shader.cso == sel || !sel)
return;
- sctx->vs_shader = sel;
+ sctx->vs_shader.cso = sel;
+ sctx->vs_shader.current = sel->first_variant;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
si_update_viewports_and_scissors(sctx);
}
@@ -799,12 +812,13 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- bool enable_changed = !!sctx->gs_shader != !!sel;
+ bool enable_changed = !!sctx->gs_shader.cso != !!sel;
- if (sctx->gs_shader == sel)
+ if (sctx->gs_shader.cso == sel)
return;
- sctx->gs_shader = sel;
+ sctx->gs_shader.cso = sel;
+ sctx->gs_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
@@ -817,12 +831,13 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- bool enable_changed = !!sctx->tcs_shader != !!sel;
+ bool enable_changed = !!sctx->tcs_shader.cso != !!sel;
- if (sctx->tcs_shader == sel)
+ if (sctx->tcs_shader.cso == sel)
return;
- sctx->tcs_shader = sel;
+ sctx->tcs_shader.cso = sel;
+ sctx->tcs_shader.current = sel ? sel->first_variant : NULL;
if (enable_changed)
sctx->last_tcs = NULL; /* invalidate derived tess state */
@@ -832,12 +847,13 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
- bool enable_changed = !!sctx->tes_shader != !!sel;
+ bool enable_changed = !!sctx->tes_shader.cso != !!sel;
- if (sctx->tes_shader == sel)
+ if (sctx->tes_shader.cso == sel)
return;
- sctx->tes_shader = sel;
+ sctx->tes_shader.cso = sel;
+ sctx->tes_shader.current = sel ? sel->first_variant : NULL;
si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
@@ -864,7 +880,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
struct si_shader_selector *sel = state;
/* skip if supplied shader is one already in use */
- if (sctx->ps_shader == sel)
+ if (sctx->ps_shader.cso == sel)
return;
/* use a dummy shader if binding a NULL shader */
@@ -873,7 +889,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
sel = sctx->dummy_pixel_shader;
}
- sctx->ps_shader = sel;
+ sctx->ps_shader.cso = sel;
+ sctx->ps_shader.current = sel->first_variant;
si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
}
@@ -881,8 +898,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = (struct si_shader_selector *)state;
- struct si_shader *p = sel->current, *c;
- struct si_shader_selector **current_shader[SI_NUM_SHADERS] = {
+ struct si_shader *p = sel->first_variant, *c;
+ struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
[PIPE_SHADER_VERTEX] = &sctx->vs_shader,
[PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader,
[PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader,
@@ -890,8 +907,10 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
[PIPE_SHADER_FRAGMENT] = &sctx->ps_shader,
};
- if (*current_shader[sel->type] == sel)
- *current_shader[sel->type] = NULL;
+ if (current_shader[sel->type]->cso == sel) {
+ current_shader[sel->type]->cso = NULL;
+ current_shader[sel->type]->current = NULL;
+ }
while (p) {
c = p->next_variant;
@@ -927,6 +946,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
p = c;
}
+ pipe_mutex_destroy(sel->mutex);
free(sel->tokens);
free(sel);
}
@@ -934,7 +954,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- struct si_shader *ps = sctx->ps_shader->current;
+ struct si_shader *ps = sctx->ps_shader.current;
struct si_shader *vs = si_get_vs_state(sctx);
struct tgsi_shader_info *psinfo = &ps->selector->info;
struct tgsi_shader_info *vsinfo = &vs->selector->info;
@@ -1004,7 +1024,7 @@ bcolor:
static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- struct si_shader *ps = sctx->ps_shader->current;
+ struct si_shader *ps = sctx->ps_shader.current;
unsigned input_ena = ps->spi_ps_input_ena;
/* we need to enable at least one of them, otherwise we hang the GPU */
@@ -1133,7 +1153,7 @@ static void si_init_gs_rings(struct si_context *sctx)
static void si_update_gs_rings(struct si_context *sctx)
{
- unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize;
+ unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize;
uint64_t offset;
if (gsvs_itemsize == sctx->last_gsvs_itemsize)
@@ -1167,17 +1187,14 @@ static void si_update_gs_rings(struct si_context *sctx)
* < 0 if there was a failure
*/
static int si_update_scratch_buffer(struct si_context *sctx,
- struct si_shader_selector *sel)
+ struct si_shader *shader)
{
- struct si_shader *shader;
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
int r;
- if (!sel)
+ if (!shader)
return 0;
- shader = sel->current;
-
/* This shader doesn't need a scratch buffer */
if (shader->scratch_bytes_per_wave == 0)
return 0;
@@ -1209,20 +1226,20 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
}
-static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel)
+static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
{
- return sel ? sel->current->scratch_bytes_per_wave : 0;
+ return shader ? shader->scratch_bytes_per_wave : 0;
}
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
{
unsigned bytes = 0;
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
return bytes;
}
@@ -1256,46 +1273,46 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
* last used, so we still need to try to update them, even if
* they require scratch buffers smaller than the current size.
*/
- r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+ r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
if (r < 0)
return false;
if (r == 1)
- si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
+ si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
- r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+ r = si_update_scratch_buffer(sctx, sctx->gs_shader.current);
if (r < 0)
return false;
if (r == 1)
- si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+ si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
- r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+ r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current);
if (r < 0)
return false;
if (r == 1)
- si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
/* VS can be bound as LS, ES, or VS. */
- r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+ r = si_update_scratch_buffer(sctx, sctx->vs_shader.current);
if (r < 0)
return false;
if (r == 1) {
- if (sctx->tes_shader)
- si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
- else if (sctx->gs_shader)
- si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ if (sctx->tes_shader.current)
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
+ else if (sctx->gs_shader.current)
+ si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
else
- si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
}
/* TES can be bound as ES or VS. */
- r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+ r = si_update_scratch_buffer(sctx, sctx->tes_shader.current);
if (r < 0)
return false;
if (r == 1) {
- if (sctx->gs_shader)
- si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ if (sctx->gs_shader.current)
+ si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
else
- si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
}
}
@@ -1361,7 +1378,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
if (!ureg)
return; /* if we get here, we're screwed */
- assert(!sctx->fixed_func_tcs_shader);
+ assert(!sctx->fixed_func_tcs_shader.cso);
ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
@@ -1376,7 +1393,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
ureg_MOV(ureg, tessinner, const1);
ureg_END(ureg);
- sctx->fixed_func_tcs_shader =
+ sctx->fixed_func_tcs_shader.cso =
ureg_create_shader_and_destroy(ureg, &sctx->b.b);
}
@@ -1384,7 +1401,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
{
/* Calculate the index of the config.
* 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
- unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
+ unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso;
struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
if (!*pm4) {
@@ -1392,17 +1409,17 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
*pm4 = CALLOC_STRUCT(si_pm4_state);
- if (sctx->tes_shader) {
+ if (sctx->tes_shader.cso) {
stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
S_028B54_HS_EN(1);
- if (sctx->gs_shader)
+ if (sctx->gs_shader.cso)
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
S_028B54_GS_EN(1) |
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
else
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
- } else if (sctx->gs_shader) {
+ } else if (sctx->gs_shader.cso) {
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
S_028B54_GS_EN(1) |
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
@@ -1432,7 +1449,7 @@ bool si_update_shaders(struct si_context *sctx)
int r;
/* Update stages before GS. */
- if (sctx->tes_shader) {
+ if (sctx->tes_shader.cso) {
if (!sctx->tf_ring) {
si_init_tess_factor_ring(sctx);
if (!sctx->tf_ring)
@@ -1440,65 +1457,65 @@ bool si_update_shaders(struct si_context *sctx)
}
/* VS as LS */
- r = si_shader_select(ctx, sctx->vs_shader);
+ r = si_shader_select(ctx, &sctx->vs_shader);
if (r)
return false;
- si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4);
- if (sctx->tcs_shader) {
- r = si_shader_select(ctx, sctx->tcs_shader);
+ if (sctx->tcs_shader.cso) {
+ r = si_shader_select(ctx, &sctx->tcs_shader);
if (r)
return false;
- si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4);
} else {
- if (!sctx->fixed_func_tcs_shader) {
+ if (!sctx->fixed_func_tcs_shader.cso) {
si_generate_fixed_func_tcs(sctx);
- if (!sctx->fixed_func_tcs_shader)
+ if (!sctx->fixed_func_tcs_shader.cso)
return false;
}
- r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+ r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
if (r)
return false;
si_pm4_bind_state(sctx, hs,
- sctx->fixed_func_tcs_shader->current->pm4);
+ sctx->fixed_func_tcs_shader.current->pm4);
}
- r = si_shader_select(ctx, sctx->tes_shader);
+ r = si_shader_select(ctx, &sctx->tes_shader);
if (r)
return false;
- if (sctx->gs_shader) {
+ if (sctx->gs_shader.cso) {
/* TES as ES */
- si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4);
} else {
/* TES as VS */
- si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
- si_update_so(sctx, sctx->tes_shader);
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
+ si_update_so(sctx, sctx->tes_shader.cso);
}
- } else if (sctx->gs_shader) {
+ } else if (sctx->gs_shader.cso) {
/* VS as ES */
- r = si_shader_select(ctx, sctx->vs_shader);
+ r = si_shader_select(ctx, &sctx->vs_shader);
if (r)
return false;
- si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4);
} else {
/* VS as VS */
- r = si_shader_select(ctx, sctx->vs_shader);
+ r = si_shader_select(ctx, &sctx->vs_shader);
if (r)
return false;
- si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
- si_update_so(sctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4);
+ si_update_so(sctx, sctx->vs_shader.cso);
}
/* Update GS. */
- if (sctx->gs_shader) {
- r = si_shader_select(ctx, sctx->gs_shader);
+ if (sctx->gs_shader.cso) {
+ r = si_shader_select(ctx, &sctx->gs_shader);
if (r)
return false;
- si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
- si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
- si_update_so(sctx, sctx->gs_shader);
+ si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4);
+ si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4);
+ si_update_so(sctx, sctx->gs_shader.cso);
if (!sctx->gsvs_ring) {
si_init_gs_rings(sctx);
@@ -1514,10 +1531,10 @@ bool si_update_shaders(struct si_context *sctx)
si_update_vgt_shader_config(sctx);
- r = si_shader_select(ctx, sctx->ps_shader);
+ r = si_shader_select(ctx, &sctx->ps_shader);
if (r)
return false;
- si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
+ si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
sctx->sprite_coord_enable != rs->sprite_coord_enable ||
@@ -1543,13 +1560,13 @@ bool si_update_shaders(struct si_context *sctx)
return false;
}
- if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
- sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
+ if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) {
+ sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
- if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
- sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
+ if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) {
+ sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index d468cf4de54..e7006d2fa0d 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -248,6 +248,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index dab89814334..f6fafca5c0b 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -381,6 +381,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
}
@@ -455,6 +456,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
}
/* If we get here, we failed to handle a cap above */
debug_printf("Unexpected fragment shader query %u\n", param);
@@ -511,6 +514,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
}
/* If we get here, we failed to handle a cap above */
debug_printf("Unexpected vertex shader query %u\n", param);
@@ -600,6 +605,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
debug_printf("Unexpected vgpu10 shader query %u\n", param);
return 0;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index a842d604a51..17b524653bb 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -393,7 +393,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
continue;
nir_variable *output_var = NULL;
- foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ nir_foreach_variable(var, &c->s->outputs) {
if (var->data.driver_location == intr->const_index[0]) {
output_var = var;
break;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index a98d70da7d8..caf706aa2a6 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -23,6 +23,7 @@
#include "vc4_qir.h"
#include "glsl/nir/nir_builder.h"
+#include "util/u_format.h"
/**
* Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
@@ -50,20 +51,188 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
nir_instr_remove(&intr->instr);
}
+static nir_ssa_def *
+vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ return nir_ubitfield_extract(b,
+ src,
+ nir_imm_int(b, 8 * chan),
+ nir_imm_int(b, 8));
+}
+
+/** Returns the 16 bit field as a sign-extended 32-bit value. */
+static nir_ssa_def *
+vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ return nir_ibitfield_extract(b,
+ src,
+ nir_imm_int(b, 16 * chan),
+ nir_imm_int(b, 16));
+}
+
+/** Returns the 16 bit field as an unsigned 32 bit value. */
+static nir_ssa_def *
+vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ if (chan == 0) {
+ return nir_iand(b, src, nir_imm_int(b, 0xffff));
+ } else {
+ return nir_ushr(b, src, nir_imm_int(b, 16));
+ }
+}
+
+static nir_ssa_def *
+vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
+}
+
+static nir_ssa_def *
+vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
+ nir_builder *b,
+ nir_ssa_def **vpm_reads,
+ uint8_t swiz,
+ const struct util_format_description *desc)
+{
+ const struct util_format_channel_description *chan =
+ &desc->channel[swiz];
+ nir_ssa_def *temp;
+
+ if (swiz > UTIL_FORMAT_SWIZZLE_W) {
+ return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
+ } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) {
+ return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
+ } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+ if (chan->normalized) {
+ return nir_fmul(b,
+ nir_i2f(b, vpm_reads[swiz]),
+ nir_imm_float(b,
+ 1.0 / 0x7fffffff));
+ } else {
+ return nir_i2f(b, vpm_reads[swiz]);
+ }
+ } else if (chan->size == 8 &&
+ (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
+ nir_ssa_def *vpm = vpm_reads[0];
+ if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+ temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080));
+ if (chan->normalized) {
+ return nir_fsub(b, nir_fmul(b,
+ vc4_nir_unpack_8f(b, temp, swiz),
+ nir_imm_float(b, 2.0)),
+ nir_imm_float(b, 1.0));
+ } else {
+ return nir_fadd(b,
+ nir_i2f(b,
+ vc4_nir_unpack_8i(b, temp,
+ swiz)),
+ nir_imm_float(b, -128.0));
+ }
+ } else {
+ if (chan->normalized) {
+ return vc4_nir_unpack_8f(b, vpm, swiz);
+ } else {
+ return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz));
+ }
+ }
+ } else if (chan->size == 16 &&
+ (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
+ nir_ssa_def *vpm = vpm_reads[swiz / 2];
+
+ /* Note that UNPACK_16F eats a half float, not ints, so we use
+ * UNPACK_16_I for all of these.
+ */
+ if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+ temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
+ if (chan->normalized) {
+ return nir_fmul(b, temp,
+ nir_imm_float(b, 1/32768.0f));
+ } else {
+ return temp;
+ }
+ } else {
+ temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
+ if (chan->normalized) {
+ return nir_fmul(b, temp,
+ nir_imm_float(b, 1 / 65535.0));
+ } else {
+ return temp;
+ }
+ }
+ } else {
+ return NULL;
+ }
+}
+
static void
-vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
- nir_intrinsic_instr *intr)
+vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
- if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
- VC4_NIR_TLB_COLOR_READ_INPUT) {
+ int attr = intr->const_index[0];
+ enum pipe_format format = c->vs_key->attr_formats[attr];
+ uint32_t attr_size = util_format_get_blocksize(format);
+
+ /* All TGSI-to-NIR inputs are vec4. */
+ assert(intr->num_components == 4);
+
+ /* Generate dword loads for the VPM values (Since these intrinsics may
+ * be reordered, the actual reads will be generated at the top of the
+ * shader by ntq_setup_inputs().
+ */
+ nir_ssa_def *vpm_reads[4];
+ for (int i = 0; i < align(attr_size, 4) / 4; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s,
+ nir_intrinsic_load_input);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr_comp->instr);
+
+ vpm_reads[i] = &intr_comp->dest.ssa;
+ }
+
+ bool format_warned = false;
+ const struct util_format_description *desc =
+ util_format_description(format);
+
+ nir_ssa_def *dests[4];
+ for (int i = 0; i < 4; i++) {
+ uint8_t swiz = desc->swizzle[i];
+ dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz,
+ desc);
+
+ if (!dests[i]) {
+ if (!format_warned) {
+ fprintf(stderr,
+ "vtx element %d unsupported type: %s\n",
+ attr, util_format_name(format));
+ format_warned = true;
+ }
+ dests[i] = nir_imm_float(b, 0.0);
+ }
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
/* This doesn't need any lowering. */
return;
}
nir_variable *input_var = NULL;
- foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
+ nir_foreach_variable(var, &c->s->inputs) {
if (var->data.driver_location == intr->const_index[0]) {
input_var = var;
break;
@@ -87,38 +256,31 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
dests[i] = &intr_comp->dest.ssa;
}
- switch (c->stage) {
- case QSTAGE_FRAG:
- if (input_var->data.location == VARYING_SLOT_FACE) {
- dests[0] = nir_fsub(b,
- nir_imm_float(b, 1.0),
- nir_fmul(b,
- nir_i2f(b, dests[0]),
- nir_imm_float(b, 2.0)));
- dests[1] = nir_imm_float(b, 0.0);
+ if (input_var->data.location == VARYING_SLOT_FACE) {
+ dests[0] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ nir_fmul(b,
+ nir_i2f(b, dests[0]),
+ nir_imm_float(b, 2.0)));
+ dests[1] = nir_imm_float(b, 0.0);
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
+ if (c->fs_key->point_sprite_mask &
+ (1 << (input_var->data.location -
+ VARYING_SLOT_VAR0))) {
+ if (!c->fs_key->is_points) {
+ dests[0] = nir_imm_float(b, 0.0);
+ dests[1] = nir_imm_float(b, 0.0);
+ }
+ if (c->fs_key->point_coord_upper_left) {
+ dests[1] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dests[1]);
+ }
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
- } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
- if (c->fs_key->point_sprite_mask &
- (1 << (input_var->data.location -
- VARYING_SLOT_VAR0))) {
- if (!c->fs_key->is_points) {
- dests[0] = nir_imm_float(b, 0.0);
- dests[1] = nir_imm_float(b, 0.0);
- }
- if (c->fs_key->point_coord_upper_left) {
- dests[1] = nir_fsub(b,
- nir_imm_float(b, 1.0),
- dests[1]);
- }
- dests[2] = nir_imm_float(b, 0.0);
- dests[3] = nir_imm_float(b, 1.0);
- }
}
- break;
- case QSTAGE_COORD:
- case QSTAGE_VERT:
- break;
}
replace_intrinsic_with_vec4(b, intr, dests);
@@ -129,7 +291,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
nir_variable *output_var = NULL;
- foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ nir_foreach_variable(var, &c->s->outputs) {
if (var->data.driver_location == intr->const_index[0]) {
output_var = var;
break;
@@ -232,7 +394,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
- vc4_nir_lower_input(c, b, intr);
+ if (c->stage == QSTAGE_FRAG)
+ vc4_nir_lower_fs_input(c, b, intr);
+ else
+ vc4_nir_lower_vertex_attr(c, b, intr);
break;
case nir_intrinsic_store_output:
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 31c7e28ff57..6e9ec6530c6 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -602,126 +602,18 @@ ntq_fsign(struct vc4_compile *c, struct qreg src)
qir_uniform_f(c, -1.0));
}
-static struct qreg
-get_channel_from_vpm(struct vc4_compile *c,
- struct qreg *vpm_reads,
- uint8_t swiz,
- const struct util_format_description *desc)
-{
- const struct util_format_channel_description *chan =
- &desc->channel[swiz];
- struct qreg temp;
-
- if (swiz > UTIL_FORMAT_SWIZZLE_W)
- return get_swizzled_channel(c, vpm_reads, swiz);
- else if (chan->size == 32 &&
- chan->type == UTIL_FORMAT_TYPE_FLOAT) {
- return get_swizzled_channel(c, vpm_reads, swiz);
- } else if (chan->size == 32 &&
- chan->type == UTIL_FORMAT_TYPE_SIGNED) {
- if (chan->normalized) {
- return qir_FMUL(c,
- qir_ITOF(c, vpm_reads[swiz]),
- qir_uniform_f(c,
- 1.0 / 0x7fffffff));
- } else {
- return qir_ITOF(c, vpm_reads[swiz]);
- }
- } else if (chan->size == 8 &&
- (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
- chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
- struct qreg vpm = vpm_reads[0];
- if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
- temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
- if (chan->normalized) {
- return qir_FSUB(c, qir_FMUL(c,
- qir_UNPACK_8_F(c, temp, swiz),
- qir_uniform_f(c, 2.0)),
- qir_uniform_f(c, 1.0));
- } else {
- return qir_FADD(c,
- qir_ITOF(c,
- qir_UNPACK_8_I(c, temp,
- swiz)),
- qir_uniform_f(c, -128.0));
- }
- } else {
- if (chan->normalized) {
- return qir_UNPACK_8_F(c, vpm, swiz);
- } else {
- return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz));
- }
- }
- } else if (chan->size == 16 &&
- (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
- chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
- struct qreg vpm = vpm_reads[swiz / 2];
-
- /* Note that UNPACK_16F eats a half float, not ints, so we use
- * UNPACK_16_I for all of these.
- */
- if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
- temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2));
- if (chan->normalized) {
- return qir_FMUL(c, temp,
- qir_uniform_f(c, 1/32768.0f));
- } else {
- return temp;
- }
- } else {
- /* UNPACK_16I sign-extends, so we have to emit ANDs. */
- temp = vpm;
- if (swiz == 1 || swiz == 3)
- temp = qir_UNPACK_16_I(c, temp, 1);
- temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff));
- temp = qir_ITOF(c, temp);
-
- if (chan->normalized) {
- return qir_FMUL(c, temp,
- qir_uniform_f(c, 1 / 65535.0));
- } else {
- return temp;
- }
- }
- } else {
- return c->undef;
- }
-}
-
static void
emit_vertex_input(struct vc4_compile *c, int attr)
{
enum pipe_format format = c->vs_key->attr_formats[attr];
uint32_t attr_size = util_format_get_blocksize(format);
- struct qreg vpm_reads[4];
c->vattr_sizes[attr] = align(attr_size, 4);
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
struct qreg vpm = { QFILE_VPM, attr * 4 + i };
- vpm_reads[i] = qir_MOV(c, vpm);
+ c->inputs[attr * 4 + i] = qir_MOV(c, vpm);
c->num_inputs++;
}
-
- bool format_warned = false;
- const struct util_format_description *desc =
- util_format_description(format);
-
- for (int i = 0; i < 4; i++) {
- uint8_t swiz = desc->swizzle[i];
- struct qreg result = get_channel_from_vpm(c, vpm_reads,
- swiz, desc);
-
- if (result.file == QFILE_NULL) {
- if (!format_warned) {
- fprintf(stderr,
- "vtx element %d unsupported type: %s\n",
- attr, util_format_name(format));
- format_warned = true;
- }
- result = qir_uniform_f(c, 0.0);
- }
- c->inputs[attr * 4 + i] = result;
- }
}
static void
@@ -876,6 +768,40 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
*dest = result;
}
+/** Handles sign-extended bitfield extracts for 16 bits. */
+static struct qreg
+ntq_emit_ibfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
+ struct qreg bits)
+{
+ assert(bits.file == QFILE_UNIF &&
+ c->uniform_contents[bits.index] == QUNIFORM_CONSTANT &&
+ c->uniform_data[bits.index] == 16);
+
+ assert(offset.file == QFILE_UNIF &&
+ c->uniform_contents[offset.index] == QUNIFORM_CONSTANT);
+ int offset_bit = c->uniform_data[offset.index];
+ assert(offset_bit % 16 == 0);
+
+ return qir_UNPACK_16_I(c, base, offset_bit / 16);
+}
+
+/** Handles unsigned bitfield extracts for 8 bits. */
+static struct qreg
+ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
+ struct qreg bits)
+{
+ assert(bits.file == QFILE_UNIF &&
+ c->uniform_contents[bits.index] == QUNIFORM_CONSTANT &&
+ c->uniform_data[bits.index] == 8);
+
+ assert(offset.file == QFILE_UNIF &&
+ c->uniform_contents[offset.index] == QUNIFORM_CONSTANT);
+ int offset_bit = c->uniform_data[offset.index];
+ assert(offset_bit % 8 == 0);
+
+ return qir_UNPACK_8_I(c, base, offset_bit / 8);
+}
+
static void
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
{
@@ -1106,6 +1032,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
qir_SUB(c, qir_uniform_ui(c, 0), src[0]));
break;
+ case nir_op_ibitfield_extract:
+ *dest = ntq_emit_ibfe(c, src[0], src[1], src[2]);
+ break;
+
+ case nir_op_ubitfield_extract:
+ *dest = ntq_emit_ubfe(c, src[0], src[1], src[2]);
+ break;
+
default:
fprintf(stderr, "unknown NIR ALU inst: ");
nir_print_instr(&instr->instr, stderr);
@@ -1383,13 +1317,13 @@ static void
ntq_setup_inputs(struct vc4_compile *c)
{
unsigned num_entries = 0;
- foreach_list_typed(nir_variable, var, node, &c->s->inputs)
+ nir_foreach_variable(var, &c->s->inputs)
num_entries++;
nir_variable *vars[num_entries];
unsigned i = 0;
- foreach_list_typed(nir_variable, var, node, &c->s->inputs)
+ nir_foreach_variable(var, &c->s->inputs)
vars[i++] = var;
/* Sort the variables so that we emit the input setup in
@@ -1432,7 +1366,7 @@ ntq_setup_inputs(struct vc4_compile *c)
static void
ntq_setup_outputs(struct vc4_compile *c)
{
- foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ nir_foreach_variable(var, &c->s->outputs) {
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
unsigned loc = var->data.driver_location * 4;
@@ -1471,7 +1405,7 @@ ntq_setup_outputs(struct vc4_compile *c)
static void
ntq_setup_uniforms(struct vc4_compile *c)
{
- foreach_list_typed(nir_variable, var, node, &c->s->uniforms) {
+ nir_foreach_variable(var, &c->s->uniforms) {
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
unsigned array_elem_size = 4 * sizeof(float);
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 739ac86193a..774ec095652 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -182,6 +182,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
/* Stream output. */
@@ -336,6 +337,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return VC4_MAX_TEXTURE_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+ return 32;
default:
fprintf(stderr, "unknown shader param %d\n", param);
return 0;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index a4947154f17..1ad545aae09 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -632,6 +632,7 @@ enum pipe_cap
PIPE_CAP_DEPTH_BOUNDS_TEST,
PIPE_CAP_TGSI_TXQS,
PIPE_CAP_FORCE_PERSAMPLE_INTERP,
+ PIPE_CAP_SHAREABLE_SHADERS,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
@@ -696,7 +697,8 @@ enum pipe_shader_cap
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
- PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
+ PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
+ PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
};
/**
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index b36e0a35b8d..e0ab9013dd5 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -267,7 +267,9 @@ union tgsi_immediate_data
#define TGSI_PROPERTY_TES_SPACING 12
#define TGSI_PROPERTY_TES_VERTEX_ORDER_CW 13
#define TGSI_PROPERTY_TES_POINT_MODE 14
-#define TGSI_PROPERTY_COUNT 15
+#define TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 15
+#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16
+#define TGSI_PROPERTY_COUNT 17
struct tgsi_property {
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */
diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
index 18d88039579..f66ed896e62 100644
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
}
- priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
- priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
- if (!priv->picture.h264.field_pic_flag)
- priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom;
+ if (!priv->picture.h264.field_pic_flag) {
+ priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+ priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
+ priv->codec_data.h264.delta_pic_order_cnt_bottom;
+ } else if (!priv->picture.h264.bottom_field_flag)
+ priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+ else
+ priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
} else if (sps->pic_order_cnt_type == 1) {
unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
diff --git a/src/gallium/targets/osmesa/osmesa.def b/src/gallium/targets/osmesa/osmesa.def
index e2a31ab5457..e347463de9f 100644
--- a/src/gallium/targets/osmesa/osmesa.def
+++ b/src/gallium/targets/osmesa/osmesa.def
@@ -14,3 +14,340 @@ EXPORTS
OSMesaGetProcAddress
OSMesaColorClamp
OSMesaPostprocess
+ glAccum
+ glAlphaFunc
+ glAreTexturesResident
+ glArrayElement
+ glBegin
+ glBindTexture
+ glBitmap
+ glBlendFunc
+ glCallList
+ glCallLists
+ glClear
+ glClearAccum
+ glClearColor
+ glClearDepth
+ glClearIndex
+ glClearStencil
+ glClipPlane
+ glColor3b
+ glColor3bv
+ glColor3d
+ glColor3dv
+ glColor3f
+ glColor3fv
+ glColor3i
+ glColor3iv
+ glColor3s
+ glColor3sv
+ glColor3ub
+ glColor3ubv
+ glColor3ui
+ glColor3uiv
+ glColor3us
+ glColor3usv
+ glColor4b
+ glColor4bv
+ glColor4d
+ glColor4dv
+ glColor4f
+ glColor4fv
+ glColor4i
+ glColor4iv
+ glColor4s
+ glColor4sv
+ glColor4ub
+ glColor4ubv
+ glColor4ui
+ glColor4uiv
+ glColor4us
+ glColor4usv
+ glColorMask
+ glColorMaterial
+ glColorPointer
+ glCopyPixels
+ glCopyTexImage1D
+ glCopyTexImage2D
+ glCopyTexSubImage1D
+ glCopyTexSubImage2D
+ glCullFace
+; glDebugEntry
+ glDeleteLists
+ glDeleteTextures
+ glDepthFunc
+ glDepthMask
+ glDepthRange
+ glDisable
+ glDisableClientState
+ glDrawArrays
+ glDrawBuffer
+ glDrawElements
+ glDrawPixels
+ glEdgeFlag
+ glEdgeFlagPointer
+ glEdgeFlagv
+ glEnable
+ glEnableClientState
+ glEnd
+ glEndList
+ glEvalCoord1d
+ glEvalCoord1dv
+ glEvalCoord1f
+ glEvalCoord1fv
+ glEvalCoord2d
+ glEvalCoord2dv
+ glEvalCoord2f
+ glEvalCoord2fv
+ glEvalMesh1
+ glEvalMesh2
+ glEvalPoint1
+ glEvalPoint2
+ glFeedbackBuffer
+ glFinish
+ glFlush
+ glFogf
+ glFogfv
+ glFogi
+ glFogiv
+ glFrontFace
+ glFrustum
+ glGenLists
+ glGenTextures
+ glGetBooleanv
+ glGetClipPlane
+ glGetDoublev
+ glGetError
+ glGetFloatv
+ glGetIntegerv
+ glGetLightfv
+ glGetLightiv
+ glGetMapdv
+ glGetMapfv
+ glGetMapiv
+ glGetMaterialfv
+ glGetMaterialiv
+ glGetPixelMapfv
+ glGetPixelMapuiv
+ glGetPixelMapusv
+ glGetPointerv
+ glGetPolygonStipple
+ glGetString
+ glGetTexEnvfv
+ glGetTexEnviv
+ glGetTexGendv
+ glGetTexGenfv
+ glGetTexGeniv
+ glGetTexImage
+ glGetTexLevelParameterfv
+ glGetTexLevelParameteriv
+ glGetTexParameterfv
+ glGetTexParameteriv
+ glHint
+ glIndexMask
+ glIndexPointer
+ glIndexd
+ glIndexdv
+ glIndexf
+ glIndexfv
+ glIndexi
+ glIndexiv
+ glIndexs
+ glIndexsv
+ glIndexub
+ glIndexubv
+ glInitNames
+ glInterleavedArrays
+ glIsEnabled
+ glIsList
+ glIsTexture
+ glLightModelf
+ glLightModelfv
+ glLightModeli
+ glLightModeliv
+ glLightf
+ glLightfv
+ glLighti
+ glLightiv
+ glLineStipple
+ glLineWidth
+ glListBase
+ glLoadIdentity
+ glLoadMatrixd
+ glLoadMatrixf
+ glLoadName
+ glLogicOp
+ glMap1d
+ glMap1f
+ glMap2d
+ glMap2f
+ glMapGrid1d
+ glMapGrid1f
+ glMapGrid2d
+ glMapGrid2f
+ glMaterialf
+ glMaterialfv
+ glMateriali
+ glMaterialiv
+ glMatrixMode
+ glMultMatrixd
+ glMultMatrixf
+ glNewList
+ glNormal3b
+ glNormal3bv
+ glNormal3d
+ glNormal3dv
+ glNormal3f
+ glNormal3fv
+ glNormal3i
+ glNormal3iv
+ glNormal3s
+ glNormal3sv
+ glNormalPointer
+ glOrtho
+ glPassThrough
+ glPixelMapfv
+ glPixelMapuiv
+ glPixelMapusv
+ glPixelStoref
+ glPixelStorei
+ glPixelTransferf
+ glPixelTransferi
+ glPixelZoom
+ glPointSize
+ glPolygonMode
+ glPolygonOffset
+ glPolygonStipple
+ glPopAttrib
+ glPopClientAttrib
+ glPopMatrix
+ glPopName
+ glPrioritizeTextures
+ glPushAttrib
+ glPushClientAttrib
+ glPushMatrix
+ glPushName
+ glRasterPos2d
+ glRasterPos2dv
+ glRasterPos2f
+ glRasterPos2fv
+ glRasterPos2i
+ glRasterPos2iv
+ glRasterPos2s
+ glRasterPos2sv
+ glRasterPos3d
+ glRasterPos3dv
+ glRasterPos3f
+ glRasterPos3fv
+ glRasterPos3i
+ glRasterPos3iv
+ glRasterPos3s
+ glRasterPos3sv
+ glRasterPos4d
+ glRasterPos4dv
+ glRasterPos4f
+ glRasterPos4fv
+ glRasterPos4i
+ glRasterPos4iv
+ glRasterPos4s
+ glRasterPos4sv
+ glReadBuffer
+ glReadPixels
+ glRectd
+ glRectdv
+ glRectf
+ glRectfv
+ glRecti
+ glRectiv
+ glRects
+ glRectsv
+ glRenderMode
+ glRotated
+ glRotatef
+ glScaled
+ glScalef
+ glScissor
+ glSelectBuffer
+ glShadeModel
+ glStencilFunc
+ glStencilMask
+ glStencilOp
+ glTexCoord1d
+ glTexCoord1dv
+ glTexCoord1f
+ glTexCoord1fv
+ glTexCoord1i
+ glTexCoord1iv
+ glTexCoord1s
+ glTexCoord1sv
+ glTexCoord2d
+ glTexCoord2dv
+ glTexCoord2f
+ glTexCoord2fv
+ glTexCoord2i
+ glTexCoord2iv
+ glTexCoord2s
+ glTexCoord2sv
+ glTexCoord3d
+ glTexCoord3dv
+ glTexCoord3f
+ glTexCoord3fv
+ glTexCoord3i
+ glTexCoord3iv
+ glTexCoord3s
+ glTexCoord3sv
+ glTexCoord4d
+ glTexCoord4dv
+ glTexCoord4f
+ glTexCoord4fv
+ glTexCoord4i
+ glTexCoord4iv
+ glTexCoord4s
+ glTexCoord4sv
+ glTexCoordPointer
+ glTexEnvf
+ glTexEnvfv
+ glTexEnvi
+ glTexEnviv
+ glTexGend
+ glTexGendv
+ glTexGenf
+ glTexGenfv
+ glTexGeni
+ glTexGeniv
+ glTexImage1D
+ glTexImage2D
+ glTexParameterf
+ glTexParameterfv
+ glTexParameteri
+ glTexParameteriv
+ glTexSubImage1D
+ glTexSubImage2D
+ glTranslated
+ glTranslatef
+ glVertex2d
+ glVertex2dv
+ glVertex2f
+ glVertex2fv
+ glVertex2i
+ glVertex2iv
+ glVertex2s
+ glVertex2sv
+ glVertex3d
+ glVertex3dv
+ glVertex3f
+ glVertex3fv
+ glVertex3i
+ glVertex3iv
+ glVertex3s
+ glVertex3sv
+ glVertex4d
+ glVertex4dv
+ glVertex4f
+ glVertex4fv
+ glVertex4i
+ glVertex4iv
+ glVertex4s
+ glVertex4sv
+ glVertexPointer
+ glViewport
diff --git a/src/gallium/targets/osmesa/osmesa.mingw.def b/src/gallium/targets/osmesa/osmesa.mingw.def
index 874ac544084..945201c9d83 100644
--- a/src/gallium/targets/osmesa/osmesa.mingw.def
+++ b/src/gallium/targets/osmesa/osmesa.mingw.def
@@ -11,3 +11,340 @@ EXPORTS
OSMesaGetProcAddress = OSMesaGetProcAddress@4
OSMesaColorClamp = OSMesaColorClamp@4
OSMesaPostprocess = OSMesaPostprocess@12
+ glAccum = glAccum@8
+ glAlphaFunc = glAlphaFunc@8
+ glAreTexturesResident = glAreTexturesResident@12
+ glArrayElement = glArrayElement@4
+ glBegin = glBegin@4
+ glBindTexture = glBindTexture@8
+ glBitmap = glBitmap@28
+ glBlendFunc = glBlendFunc@8
+ glCallList = glCallList@4
+ glCallLists = glCallLists@12
+ glClear = glClear@4
+ glClearAccum = glClearAccum@16
+ glClearColor = glClearColor@16
+ glClearDepth = glClearDepth@8
+ glClearIndex = glClearIndex@4
+ glClearStencil = glClearStencil@4
+ glClipPlane = glClipPlane@8
+ glColor3b = glColor3b@12
+ glColor3bv = glColor3bv@4
+ glColor3d = glColor3d@24
+ glColor3dv = glColor3dv@4
+ glColor3f = glColor3f@12
+ glColor3fv = glColor3fv@4
+ glColor3i = glColor3i@12
+ glColor3iv = glColor3iv@4
+ glColor3s = glColor3s@12
+ glColor3sv = glColor3sv@4
+ glColor3ub = glColor3ub@12
+ glColor3ubv = glColor3ubv@4
+ glColor3ui = glColor3ui@12
+ glColor3uiv = glColor3uiv@4
+ glColor3us = glColor3us@12
+ glColor3usv = glColor3usv@4
+ glColor4b = glColor4b@16
+ glColor4bv = glColor4bv@4
+ glColor4d = glColor4d@32
+ glColor4dv = glColor4dv@4
+ glColor4f = glColor4f@16
+ glColor4fv = glColor4fv@4
+ glColor4i = glColor4i@16
+ glColor4iv = glColor4iv@4
+ glColor4s = glColor4s@16
+ glColor4sv = glColor4sv@4
+ glColor4ub = glColor4ub@16
+ glColor4ubv = glColor4ubv@4
+ glColor4ui = glColor4ui@16
+ glColor4uiv = glColor4uiv@4
+ glColor4us = glColor4us@16
+ glColor4usv = glColor4usv@4
+ glColorMask = glColorMask@16
+ glColorMaterial = glColorMaterial@8
+ glColorPointer = glColorPointer@16
+ glCopyPixels = glCopyPixels@20
+ glCopyTexImage1D = glCopyTexImage1D@28
+ glCopyTexImage2D = glCopyTexImage2D@32
+ glCopyTexSubImage1D = glCopyTexSubImage1D@24
+ glCopyTexSubImage2D = glCopyTexSubImage2D@32
+ glCullFace = glCullFace@4
+; glDebugEntry = glDebugEntry@8
+ glDeleteLists = glDeleteLists@8
+ glDeleteTextures = glDeleteTextures@8
+ glDepthFunc = glDepthFunc@4
+ glDepthMask = glDepthMask@4
+ glDepthRange = glDepthRange@16
+ glDisable = glDisable@4
+ glDisableClientState = glDisableClientState@4
+ glDrawArrays = glDrawArrays@12
+ glDrawBuffer = glDrawBuffer@4
+ glDrawElements = glDrawElements@16
+ glDrawPixels = glDrawPixels@20
+ glEdgeFlag = glEdgeFlag@4
+ glEdgeFlagPointer = glEdgeFlagPointer@8
+ glEdgeFlagv = glEdgeFlagv@4
+ glEnable = glEnable@4
+ glEnableClientState = glEnableClientState@4
+ glEnd = glEnd@0
+ glEndList = glEndList@0
+ glEvalCoord1d = glEvalCoord1d@8
+ glEvalCoord1dv = glEvalCoord1dv@4
+ glEvalCoord1f = glEvalCoord1f@4
+ glEvalCoord1fv = glEvalCoord1fv@4
+ glEvalCoord2d = glEvalCoord2d@16
+ glEvalCoord2dv = glEvalCoord2dv@4
+ glEvalCoord2f = glEvalCoord2f@8
+ glEvalCoord2fv = glEvalCoord2fv@4
+ glEvalMesh1 = glEvalMesh1@12
+ glEvalMesh2 = glEvalMesh2@20
+ glEvalPoint1 = glEvalPoint1@4
+ glEvalPoint2 = glEvalPoint2@8
+ glFeedbackBuffer = glFeedbackBuffer@12
+ glFinish = glFinish@0
+ glFlush = glFlush@0
+ glFogf = glFogf@8
+ glFogfv = glFogfv@8
+ glFogi = glFogi@8
+ glFogiv = glFogiv@8
+ glFrontFace = glFrontFace@4
+ glFrustum = glFrustum@48
+ glGenLists = glGenLists@4
+ glGenTextures = glGenTextures@8
+ glGetBooleanv = glGetBooleanv@8
+ glGetClipPlane = glGetClipPlane@8
+ glGetDoublev = glGetDoublev@8
+ glGetError = glGetError@0
+ glGetFloatv = glGetFloatv@8
+ glGetIntegerv = glGetIntegerv@8
+ glGetLightfv = glGetLightfv@12
+ glGetLightiv = glGetLightiv@12
+ glGetMapdv = glGetMapdv@12
+ glGetMapfv = glGetMapfv@12
+ glGetMapiv = glGetMapiv@12
+ glGetMaterialfv = glGetMaterialfv@12
+ glGetMaterialiv = glGetMaterialiv@12
+ glGetPixelMapfv = glGetPixelMapfv@8
+ glGetPixelMapuiv = glGetPixelMapuiv@8
+ glGetPixelMapusv = glGetPixelMapusv@8
+ glGetPointerv = glGetPointerv@8
+ glGetPolygonStipple = glGetPolygonStipple@4
+ glGetString = glGetString@4
+ glGetTexEnvfv = glGetTexEnvfv@12
+ glGetTexEnviv = glGetTexEnviv@12
+ glGetTexGendv = glGetTexGendv@12
+ glGetTexGenfv = glGetTexGenfv@12
+ glGetTexGeniv = glGetTexGeniv@12
+ glGetTexImage = glGetTexImage@20
+ glGetTexLevelParameterfv = glGetTexLevelParameterfv@16
+ glGetTexLevelParameteriv = glGetTexLevelParameteriv@16
+ glGetTexParameterfv = glGetTexParameterfv@12
+ glGetTexParameteriv = glGetTexParameteriv@12
+ glHint = glHint@8
+ glIndexMask = glIndexMask@4
+ glIndexPointer = glIndexPointer@12
+ glIndexd = glIndexd@8
+ glIndexdv = glIndexdv@4
+ glIndexf = glIndexf@4
+ glIndexfv = glIndexfv@4
+ glIndexi = glIndexi@4
+ glIndexiv = glIndexiv@4
+ glIndexs = glIndexs@4
+ glIndexsv = glIndexsv@4
+ glIndexub = glIndexub@4
+ glIndexubv = glIndexubv@4
+ glInitNames = glInitNames@0
+ glInterleavedArrays = glInterleavedArrays@12
+ glIsEnabled = glIsEnabled@4
+ glIsList = glIsList@4
+ glIsTexture = glIsTexture@4
+ glLightModelf = glLightModelf@8
+ glLightModelfv = glLightModelfv@8
+ glLightModeli = glLightModeli@8
+ glLightModeliv = glLightModeliv@8
+ glLightf = glLightf@12
+ glLightfv = glLightfv@12
+ glLighti = glLighti@12
+ glLightiv = glLightiv@12
+ glLineStipple = glLineStipple@8
+ glLineWidth = glLineWidth@4
+ glListBase = glListBase@4
+ glLoadIdentity = glLoadIdentity@0
+ glLoadMatrixd = glLoadMatrixd@4
+ glLoadMatrixf = glLoadMatrixf@4
+ glLoadName = glLoadName@4
+ glLogicOp = glLogicOp@4
+ glMap1d = glMap1d@32
+ glMap1f = glMap1f@24
+ glMap2d = glMap2d@56
+ glMap2f = glMap2f@40
+ glMapGrid1d = glMapGrid1d@20
+ glMapGrid1f = glMapGrid1f@12
+ glMapGrid2d = glMapGrid2d@40
+ glMapGrid2f = glMapGrid2f@24
+ glMaterialf = glMaterialf@12
+ glMaterialfv = glMaterialfv@12
+ glMateriali = glMateriali@12
+ glMaterialiv = glMaterialiv@12
+ glMatrixMode = glMatrixMode@4
+ glMultMatrixd = glMultMatrixd@4
+ glMultMatrixf = glMultMatrixf@4
+ glNewList = glNewList@8
+ glNormal3b = glNormal3b@12
+ glNormal3bv = glNormal3bv@4
+ glNormal3d = glNormal3d@24
+ glNormal3dv = glNormal3dv@4
+ glNormal3f = glNormal3f@12
+ glNormal3fv = glNormal3fv@4
+ glNormal3i = glNormal3i@12
+ glNormal3iv = glNormal3iv@4
+ glNormal3s = glNormal3s@12
+ glNormal3sv = glNormal3sv@4
+ glNormalPointer = glNormalPointer@12
+ glOrtho = glOrtho@48
+ glPassThrough = glPassThrough@4
+ glPixelMapfv = glPixelMapfv@12
+ glPixelMapuiv = glPixelMapuiv@12
+ glPixelMapusv = glPixelMapusv@12
+ glPixelStoref = glPixelStoref@8
+ glPixelStorei = glPixelStorei@8
+ glPixelTransferf = glPixelTransferf@8
+ glPixelTransferi = glPixelTransferi@8
+ glPixelZoom = glPixelZoom@8
+ glPointSize = glPointSize@4
+ glPolygonMode = glPolygonMode@8
+ glPolygonOffset = glPolygonOffset@8
+ glPolygonStipple = glPolygonStipple@4
+ glPopAttrib = glPopAttrib@0
+ glPopClientAttrib = glPopClientAttrib@0
+ glPopMatrix = glPopMatrix@0
+ glPopName = glPopName@0
+ glPrioritizeTextures = glPrioritizeTextures@12
+ glPushAttrib = glPushAttrib@4
+ glPushClientAttrib = glPushClientAttrib@4
+ glPushMatrix = glPushMatrix@0
+ glPushName = glPushName@4
+ glRasterPos2d = glRasterPos2d@16
+ glRasterPos2dv = glRasterPos2dv@4
+ glRasterPos2f = glRasterPos2f@8
+ glRasterPos2fv = glRasterPos2fv@4
+ glRasterPos2i = glRasterPos2i@8
+ glRasterPos2iv = glRasterPos2iv@4
+ glRasterPos2s = glRasterPos2s@8
+ glRasterPos2sv = glRasterPos2sv@4
+ glRasterPos3d = glRasterPos3d@24
+ glRasterPos3dv = glRasterPos3dv@4
+ glRasterPos3f = glRasterPos3f@12
+ glRasterPos3fv = glRasterPos3fv@4
+ glRasterPos3i = glRasterPos3i@12
+ glRasterPos3iv = glRasterPos3iv@4
+ glRasterPos3s = glRasterPos3s@12
+ glRasterPos3sv = glRasterPos3sv@4
+ glRasterPos4d = glRasterPos4d@32
+ glRasterPos4dv = glRasterPos4dv@4
+ glRasterPos4f = glRasterPos4f@16
+ glRasterPos4fv = glRasterPos4fv@4
+ glRasterPos4i = glRasterPos4i@16
+ glRasterPos4iv = glRasterPos4iv@4
+ glRasterPos4s = glRasterPos4s@16
+ glRasterPos4sv = glRasterPos4sv@4
+ glReadBuffer = glReadBuffer@4
+ glReadPixels = glReadPixels@28
+ glRectd = glRectd@32
+ glRectdv = glRectdv@8
+ glRectf = glRectf@16
+ glRectfv = glRectfv@8
+ glRecti = glRecti@16
+ glRectiv = glRectiv@8
+ glRects = glRects@16
+ glRectsv = glRectsv@8
+ glRenderMode = glRenderMode@4
+ glRotated = glRotated@32
+ glRotatef = glRotatef@16
+ glScaled = glScaled@24
+ glScalef = glScalef@12
+ glScissor = glScissor@16
+ glSelectBuffer = glSelectBuffer@8
+ glShadeModel = glShadeModel@4
+ glStencilFunc = glStencilFunc@12
+ glStencilMask = glStencilMask@4
+ glStencilOp = glStencilOp@12
+ glTexCoord1d = glTexCoord1d@8
+ glTexCoord1dv = glTexCoord1dv@4
+ glTexCoord1f = glTexCoord1f@4
+ glTexCoord1fv = glTexCoord1fv@4
+ glTexCoord1i = glTexCoord1i@4
+ glTexCoord1iv = glTexCoord1iv@4
+ glTexCoord1s = glTexCoord1s@4
+ glTexCoord1sv = glTexCoord1sv@4
+ glTexCoord2d = glTexCoord2d@16
+ glTexCoord2dv = glTexCoord2dv@4
+ glTexCoord2f = glTexCoord2f@8
+ glTexCoord2fv = glTexCoord2fv@4
+ glTexCoord2i = glTexCoord2i@8
+ glTexCoord2iv = glTexCoord2iv@4
+ glTexCoord2s = glTexCoord2s@8
+ glTexCoord2sv = glTexCoord2sv@4
+ glTexCoord3d = glTexCoord3d@24
+ glTexCoord3dv = glTexCoord3dv@4
+ glTexCoord3f = glTexCoord3f@12
+ glTexCoord3fv = glTexCoord3fv@4
+ glTexCoord3i = glTexCoord3i@12
+ glTexCoord3iv = glTexCoord3iv@4
+ glTexCoord3s = glTexCoord3s@12
+ glTexCoord3sv = glTexCoord3sv@4
+ glTexCoord4d = glTexCoord4d@32
+ glTexCoord4dv = glTexCoord4dv@4
+ glTexCoord4f = glTexCoord4f@16
+ glTexCoord4fv = glTexCoord4fv@4
+ glTexCoord4i = glTexCoord4i@16
+ glTexCoord4iv = glTexCoord4iv@4
+ glTexCoord4s = glTexCoord4s@16
+ glTexCoord4sv = glTexCoord4sv@4
+ glTexCoordPointer = glTexCoordPointer@16
+ glTexEnvf = glTexEnvf@12
+ glTexEnvfv = glTexEnvfv@12
+ glTexEnvi = glTexEnvi@12
+ glTexEnviv = glTexEnviv@12
+ glTexGend = glTexGend@16
+ glTexGendv = glTexGendv@12
+ glTexGenf = glTexGenf@12
+ glTexGenfv = glTexGenfv@12
+ glTexGeni = glTexGeni@12
+ glTexGeniv = glTexGeniv@12
+ glTexImage1D = glTexImage1D@32
+ glTexImage2D = glTexImage2D@36
+ glTexParameterf = glTexParameterf@12
+ glTexParameterfv = glTexParameterfv@12
+ glTexParameteri = glTexParameteri@12
+ glTexParameteriv = glTexParameteriv@12
+ glTexSubImage1D = glTexSubImage1D@28
+ glTexSubImage2D = glTexSubImage2D@36
+ glTranslated = glTranslated@24
+ glTranslatef = glTranslatef@12
+ glVertex2d = glVertex2d@16
+ glVertex2dv = glVertex2dv@4
+ glVertex2f = glVertex2f@8
+ glVertex2fv = glVertex2fv@4
+ glVertex2i = glVertex2i@8
+ glVertex2iv = glVertex2iv@4
+ glVertex2s = glVertex2s@8
+ glVertex2sv = glVertex2sv@4
+ glVertex3d = glVertex3d@24
+ glVertex3dv = glVertex3dv@4
+ glVertex3f = glVertex3f@12
+ glVertex3fv = glVertex3fv@4
+ glVertex3i = glVertex3i@12
+ glVertex3iv = glVertex3iv@4
+ glVertex3s = glVertex3s@12
+ glVertex3sv = glVertex3sv@4
+ glVertex4d = glVertex4d@32
+ glVertex4dv = glVertex4dv@4
+ glVertex4f = glVertex4f@16
+ glVertex4fv = glVertex4fv@4
+ glVertex4i = glVertex4i@16
+ glVertex4iv = glVertex4iv@4
+ glVertex4s = glVertex4s@16
+ glVertex4sv = glVertex4sv@4
+ glVertexPointer = glVertexPointer@16
+ glViewport = glViewport@16
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index c5c5cae333b..e4e4a3fe148 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -610,6 +610,37 @@ match_subroutine_by_name(const char *name,
return sig;
}
+static ir_rvalue *
+generate_array_index(void *mem_ctx, exec_list *instructions,
+ struct _mesa_glsl_parse_state *state, YYLTYPE loc,
+ const ast_expression *array, ast_expression *idx,
+ const char **function_name, exec_list *actual_parameters)
+{
+ if (array->oper == ast_array_index) {
+ /* This handles arrays of arrays */
+ ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions,
+ state, loc,
+ array->subexpressions[0],
+ array->subexpressions[1],
+ function_name, actual_parameters);
+ ir_rvalue *outer_array_idx = idx->hir(instructions, state);
+
+ YYLTYPE index_loc = idx->get_location();
+ return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array,
+ outer_array_idx, loc,
+ index_loc);
+ } else {
+ ir_variable *sub_var = NULL;
+ *function_name = array->primary_expression.identifier;
+
+ match_subroutine_by_name(*function_name, actual_parameters,
+ state, &sub_var);
+
+ ir_rvalue *outer_array_idx = idx->hir(instructions, state);
+ return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
+ }
+}
+
static void
print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc,
ir_function *f)
@@ -1989,16 +2020,18 @@ ast_function_expression::hir(exec_list *instructions,
ir_variable *sub_var = NULL;
ir_rvalue *array_idx = NULL;
+ process_parameters(instructions, &actual_parameters, &this->expressions,
+ state);
+
if (id->oper == ast_array_index) {
- func_name = id->subexpressions[0]->primary_expression.identifier;
- array_idx = id->subexpressions[1]->hir(instructions, state);
+ array_idx = generate_array_index(ctx, instructions, state, loc,
+ id->subexpressions[0],
+ id->subexpressions[1], &func_name,
+ &actual_parameters);
} else {
func_name = id->primary_expression.identifier;
}
- process_parameters(instructions, &actual_parameters, &this->expressions,
- state);
-
ir_function_signature *sig =
match_function_by_name(func_name, &actual_parameters, state);
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 0c11ec58d20..961183636a9 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -487,54 +487,54 @@ bit_logic_result_type(const struct glsl_type *type_a,
ast_operators op,
struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
{
- if (!state->check_bitwise_operations_allowed(loc)) {
- return glsl_type::error_type;
- }
+ if (!state->check_bitwise_operations_allowed(loc)) {
+ return glsl_type::error_type;
+ }
- /* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
- *
- * "The bitwise operators and (&), exclusive-or (^), and inclusive-or
- * (|). The operands must be of type signed or unsigned integers or
- * integer vectors."
- */
- if (!type_a->is_integer()) {
- _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
- ast_expression::operator_string(op));
- return glsl_type::error_type;
- }
- if (!type_b->is_integer()) {
- _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
+ /* From page 50 (page 56 of PDF) of GLSL 1.30 spec:
+ *
+ * "The bitwise operators and (&), exclusive-or (^), and inclusive-or
+ * (|). The operands must be of type signed or unsigned integers or
+ * integer vectors."
+ */
+ if (!type_a->is_integer()) {
+ _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
ast_expression::operator_string(op));
- return glsl_type::error_type;
- }
+ return glsl_type::error_type;
+ }
+ if (!type_b->is_integer()) {
+ _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
+ ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
- /* "The fundamental types of the operands (signed or unsigned) must
- * match,"
- */
- if (type_a->base_type != type_b->base_type) {
- _mesa_glsl_error(loc, state, "operands of `%s' must have the same "
- "base type", ast_expression::operator_string(op));
- return glsl_type::error_type;
- }
+ /* "The fundamental types of the operands (signed or unsigned) must
+ * match,"
+ */
+ if (type_a->base_type != type_b->base_type) {
+ _mesa_glsl_error(loc, state, "operands of `%s' must have the same "
+ "base type", ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
- /* "The operands cannot be vectors of differing size." */
- if (type_a->is_vector() &&
- type_b->is_vector() &&
- type_a->vector_elements != type_b->vector_elements) {
- _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
- "different sizes", ast_expression::operator_string(op));
- return glsl_type::error_type;
- }
+ /* "The operands cannot be vectors of differing size." */
+ if (type_a->is_vector() &&
+ type_b->is_vector() &&
+ type_a->vector_elements != type_b->vector_elements) {
+ _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of "
+ "different sizes", ast_expression::operator_string(op));
+ return glsl_type::error_type;
+ }
- /* "If one operand is a scalar and the other a vector, the scalar is
- * applied component-wise to the vector, resulting in the same type as
- * the vector. The fundamental types of the operands [...] will be the
- * resulting fundamental type."
- */
- if (type_a->is_scalar())
- return type_b;
- else
- return type_a;
+ /* "If one operand is a scalar and the other a vector, the scalar is
+ * applied component-wise to the vector, resulting in the same type as
+ * the vector. The fundamental types of the operands [...] will be the
+ * resulting fundamental type."
+ */
+ if (type_a->is_scalar())
+ return type_b;
+ else
+ return type_a;
}
static const struct glsl_type *
@@ -6294,6 +6294,18 @@ ast_interface_block::hir(exec_list *instructions,
state->struct_specifier_depth--;
+ for (unsigned i = 0; i < num_variables; i++) {
+ if (fields[i].stream != -1 &&
+ (unsigned) fields[i].stream != this->layout.stream) {
+ _mesa_glsl_error(&loc, state,
+ "stream layout qualifier on "
+ "interface block member `%s' does not match "
+ "the interface block (%d vs %d)",
+ fields[i].name, fields[i].stream,
+ this->layout.stream);
+ }
+ }
+
if (!redeclaring_per_vertex) {
validate_identifier(this->block_name, loc, state);
@@ -6634,6 +6646,8 @@ ast_interface_block::hir(exec_list *instructions,
var->data.explicit_binding = this->layout.flags.q.explicit_binding;
var->data.binding = this->layout.binding;
+ var->data.stream = this->layout.stream;
+
state->symbols->add_variable(var);
instructions->push_tail(var);
}
@@ -6652,6 +6666,7 @@ ast_interface_block::hir(exec_list *instructions,
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
var->data.patch = fields[i].patch;
+ var->data.stream = this->layout.stream;
var->init_interface_type(block_type);
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@@ -6664,17 +6679,6 @@ ast_interface_block::hir(exec_list *instructions,
var->data.matrix_layout = fields[i].matrix_layout;
}
- if (fields[i].stream != -1 &&
- ((unsigned)fields[i].stream) != this->layout.stream) {
- _mesa_glsl_error(&loc, state,
- "stream layout qualifier on "
- "interface block member `%s' does not match "
- "the interface block (%d vs %d)",
- var->name, fields[i].stream, this->layout.stream);
- }
-
- var->data.stream = this->layout.stream;
-
if (var->data.mode == ir_var_shader_storage) {
var->data.image_read_only = fields[i].image_read_only;
var->data.image_write_only = fields[i].image_write_only;
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index cd00f6e085b..2f2e10d7992 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -2609,17 +2609,6 @@ interface_block:
block->layout.is_default_qualifier = false;
- foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
- ast_type_qualifier& qualifier = member->type->qualifier;
- if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) {
- _mesa_glsl_error(& @1, state,
- "stream layout qualifier on "
- "interface block member does not match "
- "the interface block (%d vs %d)",
- qualifier.stream, block->layout.stream);
- YYERROR;
- }
- }
$$ = block;
}
| memory_qualifier interface_block
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index fe00aa30d07..8183e65d2f5 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -763,7 +763,8 @@ private:
/* Assign explicit locations. */
if (current_var->data.explicit_location) {
/* Set sequential locations for struct fields. */
- if (record_type != NULL) {
+ if (current_var->type->without_array()->is_record() ||
+ current_var->type->is_array_of_arrays()) {
const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
this->uniforms[id].remap_location =
this->explicit_location + field_counter;
@@ -1180,7 +1181,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
/* Reserve all the explicit locations of the active uniforms. */
for (unsigned i = 0; i < num_uniforms; i++) {
- if (uniforms[i].type->is_subroutine())
+ if (uniforms[i].type->is_subroutine() ||
+ uniforms[i].is_shader_storage)
continue;
if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) {
@@ -1200,8 +1202,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
/* Reserve locations for rest of the uniforms. */
for (unsigned i = 0; i < num_uniforms; i++) {
- if (uniforms[i].type->is_subroutine())
+ if (uniforms[i].type->is_subroutine() ||
+ uniforms[i].is_shader_storage)
continue;
+
/* Built-in uniforms should not get any location. */
if (uniforms[i].builtin)
continue;
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 25ca928aa43..07ea0e0c7e5 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -651,7 +651,7 @@ link_invalidate_variable_locations(exec_list *ir)
/**
- * Set UsesClipDistance and ClipDistanceArraySize based on the given shader.
+ * Set clip_distance_array_size based on the given shader.
*
* Also check for errors based on incorrect usage of gl_ClipVertex and
* gl_ClipDistance.
@@ -660,10 +660,10 @@ link_invalidate_variable_locations(exec_list *ir)
*/
static void
analyze_clip_usage(struct gl_shader_program *prog,
- struct gl_shader *shader, GLboolean *UsesClipDistance,
- GLuint *ClipDistanceArraySize)
+ struct gl_shader *shader,
+ GLuint *clip_distance_array_size)
{
- *ClipDistanceArraySize = 0;
+ *clip_distance_array_size = 0;
if (!prog->IsES && prog->Version >= 130) {
/* From section 7.1 (Vertex Shader Special Variables) of the
@@ -686,13 +686,14 @@ analyze_clip_usage(struct gl_shader_program *prog,
_mesa_shader_stage_to_string(shader->Stage));
return;
}
- *UsesClipDistance = clip_distance.variable_found();
- ir_variable *clip_distance_var =
- shader->symbols->get_variable("gl_ClipDistance");
- if (clip_distance_var)
- *ClipDistanceArraySize = clip_distance_var->type->length;
- } else {
- *UsesClipDistance = false;
+
+ if (clip_distance.variable_found()) {
+ ir_variable *clip_distance_var =
+ shader->symbols->get_variable("gl_ClipDistance");
+
+ assert(clip_distance_var);
+ *clip_distance_array_size = clip_distance_var->type->length;
+ }
}
}
@@ -700,8 +701,7 @@ analyze_clip_usage(struct gl_shader_program *prog,
/**
* Verify that a vertex shader executable meets all semantic requirements.
*
- * Also sets prog->Vert.UsesClipDistance and prog->Vert.ClipDistanceArraySize
- * as a side effect.
+ * Also sets prog->Vert.ClipDistanceArraySize as a side effect.
*
* \param shader Vertex shader executable to be verified
*/
@@ -754,8 +754,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog,
}
}
- analyze_clip_usage(prog, shader, &prog->Vert.UsesClipDistance,
- &prog->Vert.ClipDistanceArraySize);
+ analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize);
}
void
@@ -765,8 +764,7 @@ validate_tess_eval_shader_executable(struct gl_shader_program *prog,
if (shader == NULL)
return;
- analyze_clip_usage(prog, shader, &prog->TessEval.UsesClipDistance,
- &prog->TessEval.ClipDistanceArraySize);
+ analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize);
}
@@ -797,8 +795,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog,
/**
* Verify that a geometry shader executable meets all semantic requirements
*
- * Also sets prog->Geom.VerticesIn, prog->Geom.UsesClipDistance, and
- * prog->Geom.ClipDistanceArraySize as a side effect.
+ * Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as
+ * a side effect.
*
* \param shader Geometry shader executable to be verified
*/
@@ -812,8 +810,7 @@ validate_geometry_shader_executable(struct gl_shader_program *prog,
unsigned num_vertices = vertices_per_prim(prog->Geom.InputType);
prog->Geom.VerticesIn = num_vertices;
- analyze_clip_usage(prog, shader, &prog->Geom.UsesClipDistance,
- &prog->Geom.ClipDistanceArraySize);
+ analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize);
}
/**
@@ -3117,8 +3114,8 @@ check_explicit_uniform_locations(struct gl_context *ctx,
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
- if (var && (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) &&
- var->data.explicit_location) {
+ if (var && (var->data.mode == ir_var_uniform &&
+ var->data.explicit_location)) {
bool ret;
if (var->type->is_subroutine())
ret = reserve_subroutine_explicit_locations(prog, sh, var);
diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp
index 276a2dedf47..114bb5811b4 100644
--- a/src/glsl/lower_named_interface_blocks.cpp
+++ b/src/glsl/lower_named_interface_blocks.cpp
@@ -186,6 +186,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new_var->data.centroid = iface_t->fields.structure[i].centroid;
new_var->data.sample = iface_t->fields.structure[i].sample;
new_var->data.patch = iface_t->fields.structure[i].patch;
+ new_var->data.stream = var->data.stream;
new_var->init_interface_type(iface_t);
hash_table_insert(interface_namespace, new_var,
diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp
index c1aed61a36a..a0df5e1df81 100644
--- a/src/glsl/lower_subroutine.cpp
+++ b/src/glsl/lower_subroutine.cpp
@@ -84,7 +84,7 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
continue;
if (ir->array_idx != NULL)
- var = new(mem_ctx) ir_dereference_array(ir->sub_var, ir->array_idx->clone(mem_ctx, NULL));
+ var = ir->array_idx->clone(mem_ctx, NULL);
else
var = new(mem_ctx) ir_dereference_variable(ir->sub_var);
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index e818c048461..57a242b4074 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -238,6 +238,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
case ir_type_swizzle: {
ir_swizzle *s = (ir_swizzle *) ir;
ir = s->val->as_dereference();
+ /* Skip swizzle in the next pass */
+ d = ir;
break;
}
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index e57e834d948..129dd02781b 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -164,15 +164,20 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
shader->info.outputs_written = sh->Program->OutputsWritten;
shader->info.system_values_read = sh->Program->SystemValuesRead;
shader->info.uses_texture_gather = sh->Program->UsesGather;
- shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
+ shader->info.uses_clip_distance_out =
+ sh->Program->ClipDistanceArraySize != 0;
shader->info.separate_shader = shader_prog->SeparateShader;
shader->info.has_transform_feedback_varyings =
shader_prog->TransformFeedback.NumVarying > 0;
switch (stage) {
case MESA_SHADER_GEOMETRY:
+ shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
+ shader->info.gs.output_primitive = sh->Geom.OutputType;
shader->info.gs.vertices_out = sh->Geom.VerticesOut;
shader->info.gs.invocations = sh->Geom.Invocations;
+ shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive;
+ shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams;
break;
case MESA_SHADER_FRAGMENT: {
diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index b83e1ca3d2c..a8eade5f9e1 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -521,6 +521,11 @@ struct glsl_type {
return base_type == GLSL_TYPE_ARRAY;
}
+ bool is_array_of_arrays() const
+ {
+ return is_array() && fields.array->is_array();
+ }
+
/**
* Query whether or not a type is a record
*/
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 9939b9e91a2..d0304bebbb0 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1521,11 +1521,23 @@ typedef struct nir_shader_info {
union {
struct {
+ /** The number of vertices recieves per input primitive */
+ unsigned vertices_in;
+
+ /** The output primitive type (GL enum value) */
+ unsigned output_primitive;
+
/** The maximum number of vertices the geometry shader might write. */
unsigned vertices_out;
/** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
unsigned invocations;
+
+ /** Whether or not this shader uses EndPrimitive */
+ bool uses_end_primitive;
+
+ /** Whether or not this shader uses non-zero streams */
+ bool uses_streams;
} gs;
struct {
@@ -1924,7 +1936,7 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
void nir_dump_cfg(nir_shader *shader, FILE *fp);
-int nir_gs_count_vertices(nir_shader *shader);
+int nir_gs_count_vertices(const nir_shader *shader);
bool nir_split_var_copies(nir_shader *shader);
diff --git a/src/glsl/nir/nir_gs_count_vertices.c b/src/glsl/nir/nir_gs_count_vertices.c
index e0bdf170d22..1c360673ddc 100644
--- a/src/glsl/nir/nir_gs_count_vertices.c
+++ b/src/glsl/nir/nir_gs_count_vertices.c
@@ -51,7 +51,7 @@ as_set_vertex_count(nir_instr *instr)
* counting at the NIR level.
*/
int
-nir_gs_count_vertices(nir_shader *shader)
+nir_gs_count_vertices(const nir_shader *shader)
{
int count = -1;
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index a09491781e6..a06b0aa1cd0 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -327,12 +327,12 @@ struct cfg_t {
#define foreach_inst_in_block_reverse_safe(__type, __inst, __block) \
foreach_in_list_reverse_safe(__type, __inst, &(__block)->instructions)
-#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst, __block) \
+#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst) \
for (__type *__scan_inst = (__type *)__inst->next; \
!__scan_inst->is_tail_sentinel(); \
__scan_inst = (__type *)__scan_inst->next)
-#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst, __block) \
+#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst) \
for (__type *__scan_inst = (__type *)__inst->prev; \
!__scan_inst->is_head_sentinel(); \
__scan_inst = (__type *)__scan_inst->prev)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index f6d5ab87be9..d9967143d8a 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -90,6 +90,7 @@ struct brw_compiler {
void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
bool scalar_vs;
+ bool scalar_gs;
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
};
@@ -488,6 +489,9 @@ struct brw_vue_prog_data {
struct brw_stage_prog_data base;
struct brw_vue_map vue_map;
+ /** Should the hardware deliver input VUE handles for URB pull loads? */
+ bool include_vue_handles;
+
GLuint urb_read_length;
GLuint total_grf;
@@ -596,21 +600,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
unsigned *final_assembly_size,
char **error_str);
-/**
- * Scratch data used when compiling a GLSL geometry shader.
- */
-struct brw_gs_compile
-{
- struct brw_gs_prog_key key;
- struct brw_gs_prog_data prog_data;
- struct brw_vue_map input_vue_map;
-
- struct brw_geometry_program *gp;
-
- unsigned control_data_bits_per_vertex;
- unsigned control_data_header_size_bits;
-};
-
/**
* Compile a vertex shader.
*
@@ -618,10 +607,11 @@ struct brw_gs_compile
*/
const unsigned *
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
- struct brw_gs_compile *c,
+ void *mem_ctx,
+ const struct brw_gs_prog_key *key,
+ struct brw_gs_prog_data *prog_data,
const struct nir_shader *shader,
struct gl_shader_program *shader_prog,
- void *mem_ctx,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a8cde20e045..169d092f90e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -918,8 +918,8 @@ enum opcode {
* Source 0: [required] Color 0.
* Source 1: [optional] Color 1 (for dual source blend messages).
* Source 2: [optional] Src0 Alpha.
- * Source 3: [optional] Source Depth (passthrough from the thread payload).
- * Source 4: [optional] Destination Depth (gl_FragDepth).
+ * Source 3: [optional] Source Depth (gl_FragDepth)
+ * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread
* Source 5: [optional] Sample Mask (gl_SampleMask).
* Source 6: [required] Number of color components (as a UD immediate).
*/
@@ -1033,7 +1033,19 @@ enum opcode {
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
+ /**
+ * Gen8+ SIMD8 URB Read message.
+ *
+ * Source 0: The header register, containing URB handles (g1).
+ *
+ * Currently only supports constant offsets, in inst->offset.
+ */
+ SHADER_OPCODE_URB_READ_SIMD8,
+
SHADER_OPCODE_URB_WRITE_SIMD8,
+ SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
+ SHADER_OPCODE_URB_WRITE_SIMD8_MASKED,
+ SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT,
/**
* Return the index of an arbitrary live channel (i.e. one of the channels
@@ -2385,7 +2397,7 @@ enum brw_pixel_shader_coverage_mask_mode {
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
-# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
+# define GEN9_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index b798931140f..f787ea3d4f8 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -690,7 +690,7 @@ set_control_index(const struct brw_device_info *devinfo,
for (int i = 0; i < 32; i++) {
if (control_index_table[i] == uncompacted) {
- brw_compact_inst_set_control_index(dst, i);
+ brw_compact_inst_set_control_index(devinfo, dst, i);
return true;
}
}
@@ -711,7 +711,7 @@ set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
for (int i = 0; i < 32; i++) {
if (datatype_table[i] == uncompacted) {
- brw_compact_inst_set_datatype_index(dst, i);
+ brw_compact_inst_set_datatype_index(devinfo, dst, i);
return true;
}
}
@@ -732,7 +732,7 @@ set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
for (int i = 0; i < 32; i++) {
if (subreg_table[i] == uncompacted) {
- brw_compact_inst_set_subreg_index(dst, i);
+ brw_compact_inst_set_subreg_index(devinfo, dst, i);
return true;
}
}
@@ -764,7 +764,7 @@ set_src0_index(const struct brw_device_info *devinfo,
if (!get_src_index(uncompacted, &compacted))
return false;
- brw_compact_inst_set_src0_index(dst, compacted);
+ brw_compact_inst_set_src0_index(devinfo, dst, compacted);
return true;
}
@@ -784,7 +784,7 @@ set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
return false;
}
- brw_compact_inst_set_src1_index(dst, compacted);
+ brw_compact_inst_set_src1_index(devinfo, dst, compacted);
return true;
}
@@ -804,7 +804,7 @@ set_3src_control_index(const struct brw_device_info *devinfo,
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
if (gen8_3src_control_index_table[i] == uncompacted) {
- brw_compact_inst_set_3src_control_index(dst, i);
+ brw_compact_inst_set_3src_control_index(devinfo, dst, i);
return true;
}
}
@@ -838,7 +838,7 @@ set_3src_source_index(const struct brw_device_info *devinfo,
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
if (gen8_3src_source_index_table[i] == uncompacted) {
- brw_compact_inst_set_3src_source_index(dst, i);
+ brw_compact_inst_set_3src_source_index(devinfo, dst, i);
return true;
}
}
@@ -909,7 +909,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
return false;
#define compact(field) \
- brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src))
+ brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
compact(opcode);
@@ -921,7 +921,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
compact(dst_reg_nr);
compact(src0_rep_ctrl);
- brw_compact_inst_set_3src_cmpt_control(dst, true);
+ brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
compact(debug_control);
compact(saturate);
compact(src1_rep_ctrl);
@@ -1003,36 +1003,52 @@ brw_try_compact_instruction(const struct brw_device_info *devinfo,
memset(&temp, 0, sizeof(temp));
- brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src));
- brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src));
+#define compact(field) \
+ brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
+
+ compact(opcode);
+ compact(debug_control);
+
if (!set_control_index(devinfo, &temp, src))
return false;
if (!set_datatype_index(devinfo, &temp, src))
return false;
if (!set_subreg_index(devinfo, &temp, src, is_immediate))
return false;
- brw_compact_inst_set_acc_wr_control(&temp,
- brw_inst_acc_wr_control(devinfo, src));
- brw_compact_inst_set_cond_modifier(&temp,
- brw_inst_cond_modifier(devinfo, src));
+
+ if (devinfo->gen >= 6) {
+ compact(acc_wr_control);
+ } else {
+ compact(mask_control_ex);
+ }
+
+ compact(cond_modifier);
+
if (devinfo->gen <= 6)
- brw_compact_inst_set_flag_subreg_nr(&temp,
- brw_inst_flag_subreg_nr(devinfo, src));
- brw_compact_inst_set_cmpt_control(&temp, true);
+ compact(flag_subreg_nr);
+
+ brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
+
if (!set_src0_index(devinfo, &temp, src))
return false;
if (!set_src1_index(devinfo, &temp, src, is_immediate))
return false;
- brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src));
- brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src));
+
+ brw_compact_inst_set_dst_reg_nr(devinfo, &temp,
+ brw_inst_dst_da_reg_nr(devinfo, src));
+ brw_compact_inst_set_src0_reg_nr(devinfo, &temp,
+ brw_inst_src0_da_reg_nr(devinfo, src));
+
if (is_immediate) {
- brw_compact_inst_set_src1_reg_nr(&temp,
+ brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
brw_inst_imm_ud(devinfo, src) & 0xff);
} else {
- brw_compact_inst_set_src1_reg_nr(&temp,
+ brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
brw_inst_src1_da_reg_nr(devinfo, src));
}
+#undef compact
+
*dst = temp;
return true;
@@ -1043,7 +1059,7 @@ set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
uint32_t uncompacted =
- control_index_table[brw_compact_inst_control_index(src)];
+ control_index_table[brw_compact_inst_control_index(devinfo, src)];
if (devinfo->gen >= 8) {
brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
@@ -1064,7 +1080,8 @@ static void
set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
- uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
+ uint32_t uncompacted =
+ datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
if (devinfo->gen >= 8) {
brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
@@ -1080,7 +1097,8 @@ static void
set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
- uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
+ uint16_t uncompacted =
+ subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
@@ -1091,7 +1109,7 @@ static void
set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src)
{
- uint32_t compacted = brw_compact_inst_src0_index(src);
+ uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
uint16_t uncompacted = src_index_table[compacted];
brw_inst_set_bits(dst, 88, 77, uncompacted);
@@ -1102,11 +1120,12 @@ set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst,
brw_compact_inst *src, bool is_immediate)
{
if (is_immediate) {
- signed high5 = brw_compact_inst_src1_index(src);
+ signed high5 = brw_compact_inst_src1_index(devinfo, src);
/* Replicate top bit of src1_index into high 20 bits of the immediate. */
brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
} else {
- uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
+ uint16_t uncompacted =
+ src_index_table[brw_compact_inst_src1_index(devinfo, src)];
brw_inst_set_bits(dst, 120, 109, uncompacted);
}
@@ -1118,7 +1137,7 @@ set_uncompacted_3src_control_index(const struct brw_device_info *devinfo,
{
assert(devinfo->gen >= 8);
- uint32_t compacted = brw_compact_inst_3src_control_index(src);
+ uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
uint32_t uncompacted = gen8_3src_control_index_table[compacted];
brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
@@ -1134,7 +1153,7 @@ set_uncompacted_3src_source_index(const struct brw_device_info *devinfo,
{
assert(devinfo->gen >= 8);
- uint32_t compacted = brw_compact_inst_3src_source_index(src);
+ uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
uint64_t uncompacted = gen8_3src_source_index_table[compacted];
brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
@@ -1160,7 +1179,7 @@ brw_uncompact_3src_instruction(const struct brw_device_info *devinfo,
assert(devinfo->gen >= 8);
#define uncompact(field) \
- brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src))
+ brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
uncompact(opcode);
@@ -1190,13 +1209,16 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
{
memset(dst, 0, sizeof(*dst));
- if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
+ if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(devinfo, src))) {
brw_uncompact_3src_instruction(devinfo, dst, src);
return;
}
- brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src));
- brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src));
+#define uncompact(field) \
+ brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
+
+ uncompact(opcode);
+ uncompact(debug_control);
set_uncompacted_control(devinfo, dst, src);
set_uncompacted_datatype(devinfo, dst, src);
@@ -1206,22 +1228,36 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
set_uncompacted_subreg(devinfo, dst, src);
- brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src));
- brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src));
+
+ if (devinfo->gen >= 6) {
+ uncompact(acc_wr_control);
+ } else {
+ uncompact(mask_control_ex);
+ }
+
+ uncompact(cond_modifier);
+
if (devinfo->gen <= 6)
- brw_inst_set_flag_subreg_nr(devinfo, dst,
- brw_compact_inst_flag_subreg_nr(src));
+ uncompact(flag_subreg_nr);
+
set_uncompacted_src0(devinfo, dst, src);
set_uncompacted_src1(devinfo, dst, src, is_immediate);
- brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src));
- brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src));
+
+ brw_inst_set_dst_da_reg_nr(devinfo, dst,
+ brw_compact_inst_dst_reg_nr(devinfo, src));
+ brw_inst_set_src0_da_reg_nr(devinfo, dst,
+ brw_compact_inst_src0_reg_nr(devinfo, src));
+
if (is_immediate) {
brw_inst_set_imm_ud(devinfo, dst,
brw_inst_imm_ud(devinfo, dst) |
- brw_compact_inst_src1_reg_nr(src));
+ brw_compact_inst_src1_reg_nr(devinfo, src));
} else {
- brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src));
+ brw_inst_set_src1_da_reg_nr(devinfo, dst,
+ brw_compact_inst_src1_reg_nr(devinfo, src));
}
+
+#undef uncompact
}
void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
@@ -1415,8 +1451,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
brw_compact_inst *align = store + offset;
memset(align, 0, sizeof(*align));
- brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP);
- brw_compact_inst_set_cmpt_control(align, true);
+ brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP);
+ brw_compact_inst_set_cmpt_control(devinfo, align, true);
offset += sizeof(brw_compact_inst);
compacted_count--;
compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
@@ -1524,8 +1560,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
if (p->next_insn_offset & sizeof(brw_compact_inst)) {
brw_compact_inst *align = store + offset;
memset(align, 0, sizeof(*align));
- brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
- brw_compact_inst_set_cmpt_control(align, true);
+ brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP);
+ brw_compact_inst_set_cmpt_control(devinfo, align, true);
p->next_insn_offset += sizeof(brw_compact_inst);
}
p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0562c5a9981..8320cd77299 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -281,6 +281,10 @@ fs_inst::is_send_from_grf() const
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+ case SHADER_OPCODE_URB_READ_SIMD8:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == GRF;
@@ -782,6 +786,10 @@ fs_inst::regs_read(int arg) const
switch (opcode) {
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+ case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -911,6 +919,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
@@ -2239,13 +2250,15 @@ fs_visitor::opt_sampler_eot()
if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex())
return false;
- /* This optimisation doesn't seem to work for textureGather for some
- * reason. I can't find any documentation or known workarounds to indicate
- * that this is expected, but considering that it is probably pretty
- * unlikely that a shader would directly write out the results from
- * textureGather we might as well just disable it.
+ /* 3D Sampler » Messages » Message Format
+ *
+ * “Response Length of zero is allowed on all SIMD8* and SIMD16* sampler
+ * messages except sample+killpix, resinfo, sampleinfo, LOD, and gather4*”
*/
- if (tex_inst->opcode == SHADER_OPCODE_TG4 ||
+ if (tex_inst->opcode == SHADER_OPCODE_TXS ||
+ tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO ||
+ tex_inst->opcode == SHADER_OPCODE_LOD ||
+ tex_inst->opcode == SHADER_OPCODE_TG4 ||
tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET)
return false;
@@ -2457,7 +2470,7 @@ fs_visitor::compute_to_mrf()
/* Found a move of a GRF to a MRF. Let's see if we can go
* rewrite the thing that made this GRF to write into the MRF.
*/
- foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->dst.file == GRF &&
scan_inst->dst.reg == inst->src[0].reg) {
/* Found the last thing to write our reg we want to turn
@@ -2805,7 +2818,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
* we assume that there are no outstanding dependencies on entry to the
* program.
*/
- foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
/* If we hit control flow, assume that there *are* outstanding
* dependencies, and force their cleanup before our instruction.
*/
@@ -2871,7 +2884,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
/* Walk forwards looking for writes to registers we're writing which aren't
* read before being written.
*/
- foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst, block) {
+ foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst) {
/* If we hit control flow, force resolve all remaining dependencies. */
if (block->end() == scan_inst) {
for (int i = 0; i < write_len; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 171338dcc0b..50e98becf03 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -62,6 +62,8 @@ namespace brw {
class fs_live_variables;
}
+struct brw_gs_compile;
+
static inline fs_reg
offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
{
@@ -99,7 +101,12 @@ public:
const nir_shader *shader,
unsigned dispatch_width,
int shader_time_index);
-
+ fs_visitor(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ struct brw_gs_compile *gs_compile,
+ struct brw_gs_prog_data *prog_data,
+ const nir_shader *shader);
+ void init();
~fs_visitor();
fs_reg vgrf(const glsl_type *const type);
@@ -298,6 +305,8 @@ public:
const void *const key;
const struct brw_sampler_prog_key_data *key_tex;
+ struct brw_gs_compile *gs_compile;
+
struct brw_stage_prog_data *prog_data;
struct gl_program *prog;
@@ -415,6 +424,7 @@ private:
struct brw_reg implied_header,
GLuint nr);
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
+ void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
void generate_barrier(fs_inst *inst, struct brw_reg src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index 469f2ea4e16..883e8d2a49f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -87,8 +87,7 @@ opt_cmod_propagation_local(bblock_t *block)
continue;
bool read_flag = false;
- foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst,
- block) {
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->overwrites_reg(inst->src[0])) {
if (scan_inst->is_partial_write() ||
scan_inst->dst.reg_offset != inst->src[0].reg_offset)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 13c495cd395..bb7e792044f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -354,6 +354,28 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
}
}
+void
+fs_generator::generate_urb_read(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg header)
+{
+ assert(header.file == BRW_GENERAL_REGISTER_FILE);
+ assert(header.type == BRW_REGISTER_TYPE_UD);
+
+ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, header);
+ brw_set_src1(p, send, brw_imm_ud(0u));
+
+ brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
+ brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
+
+ brw_inst_set_mlen(p->devinfo, send, inst->mlen);
+ brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
+ brw_inst_set_header_present(p->devinfo, send, true);
+ brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
+}
+
void
fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
{
@@ -368,6 +390,14 @@ fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB);
brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
+ if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
+ inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
+ brw_inst_set_urb_per_slot_offset(p->devinfo, insn, true);
+
+ if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
+ inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT)
+ brw_inst_set_urb_channel_mask_present(p->devinfo, insn, true);
+
brw_inst_set_mlen(p->devinfo, insn, inst->mlen);
brw_inst_set_rlen(p->devinfo, insn, 0);
brw_inst_set_eot(p->devinfo, insn, inst->eot);
@@ -2001,7 +2031,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
fill_count++;
break;
+ case SHADER_OPCODE_URB_READ_SIMD8:
+ generate_urb_read(inst, dst, src[0]);
+ break;
+
case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
generate_urb_write(inst, src[0]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index feedbfbb2e3..7b5a0482519 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -30,6 +30,7 @@
#include "brw_fs_surface_builder.h"
#include "brw_nir.h"
#include "brw_fs_surface_builder.h"
+#include "brw_vec4_gs_visitor.h"
using namespace brw;
using namespace brw::surface_access;
@@ -188,6 +189,18 @@ emit_system_values_block(nir_block *block, void *void_visitor)
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
break;
+ case nir_intrinsic_load_invocation_id:
+ assert(v->stage == MESA_SHADER_GEOMETRY);
+ reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
+ if (reg->file == BAD_FILE) {
+ const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL);
+ fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.SHR(iid, g1, fs_reg(27u));
+ *reg = iid;
+ }
+ break;
+
case nir_intrinsic_load_sample_pos:
assert(v->stage == MESA_SHADER_FRAGMENT);
reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
@@ -1367,9 +1380,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id()");
+ case nir_intrinsic_load_primitive_id:
+ assert(stage == MESA_SHADER_GEOMETRY);
+ assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
+ retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
+ break;
+
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_sample_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index 8792a8c7b1d..862e3245d43 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -64,7 +64,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
int src_end_ip = v->live_intervals->end[src_var];
bool interfered = false;
- foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
+ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->overwrites_reg(inst->src[0])) {
if (scan_inst->is_partial_write() ||
(scan_inst->dst.type != inst->dst.type &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f825fed4daf..7cc4f3c927a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -41,6 +41,7 @@
#include "brw_wm.h"
#include "brw_cs.h"
#include "brw_vec4.h"
+#include "brw_vec4_gs_visitor.h"
#include "brw_fs.h"
#include "main/uniforms.h"
#include "glsl/nir/glsl_types.h"
@@ -868,13 +869,14 @@ void
fs_visitor::emit_urb_writes()
{
int slot, urb_offset, length;
- struct brw_vs_prog_data *vs_prog_data =
- (struct brw_vs_prog_data *) prog_data;
- const struct brw_vs_prog_key *key =
+ int starting_urb_offset = 0;
+ const struct brw_vue_prog_data *vue_prog_data =
+ (const struct brw_vue_prog_data *) this->prog_data;
+ const struct brw_vs_prog_key *vs_key =
(const struct brw_vs_prog_key *) this->key;
const GLbitfield64 psiz_mask =
VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ;
- const struct brw_vue_map *vue_map = &vs_prog_data->base.vue_map;
+ const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
bool flush;
fs_reg sources[8];
@@ -900,8 +902,21 @@ fs_visitor::emit_urb_writes()
return;
}
+ if (stage == MESA_SHADER_GEOMETRY) {
+ const struct brw_gs_prog_data *gs_prog_data =
+ (const struct brw_gs_prog_data *) prog_data;
+
+ /* We need to increment the Global Offset to skip over the control data
+ * header and the extra "Vertex Count" field (1 HWord) at the beginning
+ * of the VUE. We're counting in OWords, so the units are doubled.
+ */
+ starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords;
+ if (gs_prog_data->static_vertex_count == -1)
+ starting_urb_offset += 2;
+ }
+
length = 0;
- urb_offset = 0;
+ urb_offset = starting_urb_offset;
flush = false;
for (slot = 0; slot < vue_map->num_slots; slot++) {
int varying = vue_map->slot_to_varying[slot];
@@ -961,11 +976,11 @@ fs_visitor::emit_urb_writes()
break;
}
- if ((varying == VARYING_SLOT_COL0 ||
+ if (stage == MESA_SHADER_VERTEX && vs_key->clamp_vertex_color &&
+ (varying == VARYING_SLOT_COL0 ||
varying == VARYING_SLOT_COL1 ||
varying == VARYING_SLOT_BFC0 ||
- varying == VARYING_SLOT_BFC1) &&
- key->clamp_vertex_color) {
+ varying == VARYING_SLOT_BFC1)) {
/* We need to clamp these guys, so do a saturating MOV into a
* temp register and use that for the payload.
*/
@@ -1005,10 +1020,10 @@ fs_visitor::emit_urb_writes()
fs_inst *inst =
abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
- inst->eot = last;
+ inst->eot = last && stage == MESA_SHADER_VERTEX;
inst->mlen = length + 1;
inst->offset = urb_offset;
- urb_offset = slot + 1;
+ urb_offset = starting_urb_offset + slot + 1;
length = 0;
flush = false;
}
@@ -1071,11 +1086,33 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
unsigned dispatch_width,
int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
- key(key), prog_data(prog_data), prog(prog),
+ key(key), gs_compile(NULL), prog_data(prog_data), prog(prog),
dispatch_width(dispatch_width),
shader_time_index(shader_time_index),
- promoted_constants(0),
bld(fs_builder(this, dispatch_width).at_end())
+{
+ init();
+}
+
+fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
+ const nir_shader *shader)
+ : backend_shader(compiler, log_data, mem_ctx, shader,
+ &prog_data->base.base),
+ key(&c->key), gs_compile(c),
+ prog_data(&prog_data->base.base), prog(NULL),
+ dispatch_width(8),
+ shader_time_index(ST_GS),
+ bld(fs_builder(this, dispatch_width).at_end())
+{
+ init();
+}
+
+
+void
+fs_visitor::init()
{
switch (stage) {
case MESA_SHADER_FRAGMENT:
@@ -1094,6 +1131,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
unreachable("unhandled shader stage");
}
+ this->prog_data = this->stage_prog_data;
+
this->failed = false;
this->simd16_unsupported = false;
this->no16_msg = NULL;
@@ -1119,6 +1158,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
this->pull_constant_loc = NULL;
this->push_constant_loc = NULL;
+ this->promoted_constants = 0,
+
this->spilled_any_registers = false;
this->do_dual_src = false;
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 10a7f28fdab..ed0890f430f 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -57,20 +57,14 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct brw_geometry_program *gp,
struct brw_gs_prog_key *key)
{
+ struct brw_compiler *compiler = brw->intelScreen->compiler;
struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct brw_stage_state *stage_state = &brw->gs.base;
- struct brw_gs_compile c;
- memset(&c, 0, sizeof(c));
- c.key = *key;
- c.gp = gp;
-
- c.prog_data.include_primitive_id =
- (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
-
- c.prog_data.invocations = gp->program.Invocations;
+ struct brw_gs_prog_data prog_data;
+ memset(&prog_data, 0, sizeof(prog_data));
assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
- &gp->program.Base, &c.prog_data);
+ &gp->program.Base, &prog_data);
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
@@ -83,215 +77,24 @@ brw_codegen_gs_prog(struct brw_context *brw,
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
int param_count = gp->program.Base.nir->num_uniforms * 4;
- c.prog_data.base.base.param =
+ prog_data.base.base.param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
- c.prog_data.base.base.pull_param =
+ prog_data.base.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
- c.prog_data.base.base.image_param =
+ prog_data.base.base.image_param =
rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
- c.prog_data.base.base.nr_params = param_count;
- c.prog_data.base.base.nr_image_params = gs->NumImages;
+ prog_data.base.base.nr_params = param_count;
+ prog_data.base.base.nr_image_params = gs->NumImages;
brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
- &c.prog_data.base.base, false);
-
- if (brw->gen >= 8) {
- c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
- nir_gs_count_vertices(gp->program.Base.nir);
- }
-
- if (brw->gen >= 7) {
- if (gp->program.OutputType == GL_POINTS) {
- /* When the output type is points, the geometry shader may output data
- * to multiple streams, and EndPrimitive() has no effect. So we
- * configure the hardware to interpret the control data as stream ID.
- */
- c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
-
- /* We only have to emit control bits if we are using streams */
- if (prog->Geom.UsesStreams)
- c.control_data_bits_per_vertex = 2;
- else
- c.control_data_bits_per_vertex = 0;
- } else {
- /* When the output type is triangle_strip or line_strip, EndPrimitive()
- * may be used to terminate the current strip and start a new one
- * (similar to primitive restart), and outputting data to multiple
- * streams is not supported. So we configure the hardware to interpret
- * the control data as EndPrimitive information (a.k.a. "cut bits").
- */
- c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
-
- /* We only need to output control data if the shader actually calls
- * EndPrimitive().
- */
- c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
- }
- } else {
- /* There are no control data bits in gen6. */
- c.control_data_bits_per_vertex = 0;
-
- /* If it is using transform feedback, enable it */
- if (prog->TransformFeedback.NumVarying)
- c.prog_data.gen6_xfb_enabled = true;
- else
- c.prog_data.gen6_xfb_enabled = false;
- }
- c.control_data_header_size_bits =
- gp->program.VerticesOut * c.control_data_bits_per_vertex;
-
- /* 1 HWORD = 32 bytes = 256 bits */
- c.prog_data.control_data_header_size_hwords =
- ALIGN(c.control_data_header_size_bits, 256) / 256;
+ &prog_data.base.base, compiler->scalar_gs);
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.prog_data.base.vue_map, outputs_written,
+ &prog_data.base.vue_map, outputs_written,
prog ? prog->SeparateShader : false);
- /* Compute the output vertex size.
- *
- * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
- * Size (p168):
- *
- * [0,62] indicating [1,63] 16B units
- *
- * Specifies the size of each vertex stored in the GS output entry
- * (following any Control Header data) as a number of 128-bit units
- * (minus one).
- *
- * Programming Restrictions: The vertex size must be programmed as a
- * multiple of 32B units with the following exception: Rendering is
- * disabled (as per SOL stage state) and the vertex size output by the
- * GS thread is 16B.
- *
- * If rendering is enabled (as per SOL state) the vertex size must be
- * programmed as a multiple of 32B units. In other words, the only time
- * software can program a vertex size with an odd number of 16B units
- * is when rendering is disabled.
- *
- * Note: B=bytes in the above text.
- *
- * It doesn't seem worth the extra trouble to optimize the case where the
- * vertex size is 16B (especially since this would require special-casing
- * the GEN assembly that writes to the URB). So we just set the vertex
- * size to a multiple of 32B (2 vec4's) in all cases.
- *
- * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
- * budget that as follows:
- *
- * 512 bytes for varyings (a varying component is 4 bytes and
- * gl_MaxGeometryOutputComponents = 128)
- * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
- * bytes)
- * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
- * even if it's not used)
- * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
- * whenever clip planes are enabled, even if the shader doesn't
- * write to gl_ClipDistance)
- * 16 bytes overhead since the VUE size must be a multiple of 32 bytes
- * (see below)--this causes up to 1 VUE slot to be wasted
- * 400 bytes available for varying packing overhead
- *
- * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
- * per interpolation type, so this is plenty.
- *
- */
- unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
- assert(brw->gen == 6 ||
- output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
- c.prog_data.output_vertex_size_hwords =
- ALIGN(output_vertex_size_bytes, 32) / 32;
-
- /* Compute URB entry size. The maximum allowed URB entry size is 32k.
- * That divides up as follows:
- *
- * 64 bytes for the control data header (cut indices or StreamID bits)
- * 4096 bytes for varyings (a varying component is 4 bytes and
- * gl_MaxGeometryTotalOutputComponents = 1024)
- * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
- * bytes/vertex and gl_MaxGeometryOutputVertices is 256)
- * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
- * even if it's not used)
- * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
- * whenever clip planes are enabled, even if the shader doesn't
- * write to gl_ClipDistance)
- * 4096 bytes overhead since the VUE size must be a multiple of 32
- * bytes (see above)--this causes up to 1 VUE slot to be wasted
- * 8128 bytes available for varying packing overhead
- *
- * Worst-case varying packing overhead is 3/4 of a varying slot per
- * interpolation type, which works out to 3072 bytes, so this would allow
- * us to accommodate 2 interpolation types without any danger of running
- * out of URB space.
- *
- * In practice, the risk of running out of URB space is very small, since
- * the above figures are all worst-case, and most of them scale with the
- * number of output vertices. So we'll just calculate the amount of space
- * we need, and if it's too large, fail to compile.
- *
- * The above is for gen7+ where we have a single URB entry that will hold
- * all the output. In gen6, we will have to allocate URB entries for every
- * vertex we emit, so our URB entries only need to be large enough to hold
- * a single vertex. Also, gen6 does not have a control data header.
- */
- unsigned output_size_bytes;
- if (brw->gen >= 7) {
- output_size_bytes =
- c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
- output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
- } else {
- output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
- }
-
- /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
- * which comes before the control header.
- */
- if (brw->gen >= 8)
- output_size_bytes += 32;
-
- assert(output_size_bytes >= 1);
- int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
- if (brw->gen == 6)
- max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
- if (output_size_bytes > max_output_size_bytes)
- return false;
-
-
- /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
- * a multiple of 128 bytes in gen6.
- */
- if (brw->gen >= 7)
- c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
- else
- c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
-
- c.prog_data.output_topology =
- get_hw_prim_for_gl_prim(gp->program.OutputType);
-
- /* The GLSL linker will have already matched up GS inputs and the outputs
- * of prior stages. The driver does extend VS outputs in some cases, but
- * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
- * geometry shader support. So we can safely ignore that.
- *
- * For SSO pipelines, we use a fixed VUE map layout based on variable
- * locations, so we can rely on rendezvous-by-location making this work.
- *
- * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
- * written by previous stages and shows up via payload magic.
- */
- GLbitfield64 inputs_read =
- gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
- brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.input_vue_map, inputs_read,
- prog->SeparateShader);
-
- /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
- * need to program a URB read length of ceiling(num_slots / 2).
- */
- c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
-
if (unlikely(INTEL_DEBUG & DEBUG_GS))
brw_dump_ir("geometry", prog, gs, NULL);
@@ -303,25 +106,25 @@ brw_codegen_gs_prog(struct brw_context *brw,
unsigned program_size;
char *error_str;
const unsigned *program =
- brw_compile_gs(brw->intelScreen->compiler, brw, &c,
- shader->Program->nir, prog,
- mem_ctx, st_index, &program_size, &error_str);
+ brw_compile_gs(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data, shader->Program->nir, prog,
+ st_index, &program_size, &error_str);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
}
/* Scratch space is used for register spilling */
- if (c.prog_data.base.base.total_scratch) {
+ if (prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- c.prog_data.base.base.total_scratch *
+ prog_data.base.base.total_scratch *
brw->max_gs_threads);
}
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
- &c.key, sizeof(c.key),
+ key, sizeof(*key),
program, program_size,
- &c.prog_data, sizeof(c.prog_data),
+ &prog_data, sizeof(prog_data),
&stage_state->prog_offset, &brw->gs.prog_data);
ralloc_free(mem_ctx);
diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index ab37b709d65..4ed95c473cd 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -181,7 +181,8 @@ F(saturate, 31, 31)
F(debug_control, 30, 30)
F(cmpt_control, 29, 29)
FC(branch_control, 28, 28, devinfo->gen >= 8)
-F(acc_wr_control, 28, 28)
+FC(acc_wr_control, 28, 28, devinfo->gen >= 6)
+FC(mask_control_ex, 28, 28, devinfo->is_g4x || devinfo->gen == 5)
F(cond_modifier, 27, 24)
FC(math_function, 27, 24, devinfo->gen >= 6)
F(exec_size, 23, 21)
@@ -392,6 +393,7 @@ FF(urb_per_slot_offset,
/* 4-6: */ -1, -1, -1, -1, -1, -1, -1, -1,
/* 7: */ MD(16), MD(16),
/* 8: */ MD(17), MD(17))
+FC(urb_channel_mask_present, MD(15), MD(15), devinfo->gen >= 8)
FC(urb_complete, MD(15), MD(15), devinfo->gen < 8)
FC(urb_used, MD(14), MD(14), devinfo->gen < 7)
FC(urb_allocate, MD(13), MD(13), devinfo->gen < 7)
@@ -738,7 +740,7 @@ typedef struct {
* Bits indices range from 0..63.
*/
static inline unsigned
-brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low)
+brw_compact_inst_bits(const brw_compact_inst *inst, unsigned high, unsigned low)
{
const uint64_t mask = (1ull << (high - low + 1)) - 1;
@@ -762,56 +764,65 @@ brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low,
inst->data = (inst->data & ~mask) | (value << low);
}
-#define F(name, high, low) \
-static inline void \
-brw_compact_inst_set_##name(brw_compact_inst *inst, unsigned v) \
-{ \
- brw_compact_inst_set_bits(inst, high, low, v); \
-} \
- \
-static inline unsigned \
-brw_compact_inst_##name(brw_compact_inst *inst) \
-{ \
- return brw_compact_inst_bits(inst, high, low); \
+#define FC(name, high, low, assertions) \
+static inline void \
+brw_compact_inst_set_##name(const struct brw_device_info *devinfo, \
+ brw_compact_inst *inst, unsigned v) \
+{ \
+ assert(assertions); \
+ (void) devinfo; \
+ brw_compact_inst_set_bits(inst, high, low, v); \
+} \
+static inline unsigned \
+brw_compact_inst_##name(const struct brw_device_info *devinfo, \
+ const brw_compact_inst *inst) \
+{ \
+ assert(assertions); \
+ (void) devinfo; \
+ return brw_compact_inst_bits(inst, high, low); \
}
-F(src1_reg_nr, 63, 56)
-F(src0_reg_nr, 55, 48)
-F(dst_reg_nr, 47, 40)
-F(src1_index, 39, 35)
-F(src0_index, 34, 30)
-F(cmpt_control, 29, 29) /* Same location as brw_inst */
-F(flag_subreg_nr, 28, 28) /* <= Gen6 only */
-F(cond_modifier, 27, 24) /* Same location as brw_inst */
-F(acc_wr_control, 23, 23)
-F(subreg_index, 22, 18)
-F(datatype_index, 17, 13)
-F(control_index, 12, 8)
-F(debug_control, 7, 7)
-F(opcode, 6, 0) /* Same location as brw_inst */
+/* A simple macro for fields which stay in the same place on all generations. */
+#define F(name, high, low) FC(name, high, low, true)
+
+F(src1_reg_nr, 63, 56)
+F(src0_reg_nr, 55, 48)
+F(dst_reg_nr, 47, 40)
+F(src1_index, 39, 35)
+F(src0_index, 34, 30)
+F(cmpt_control, 29, 29) /* Same location as brw_inst */
+FC(flag_subreg_nr, 28, 28, devinfo->gen <= 6)
+F(cond_modifier, 27, 24) /* Same location as brw_inst */
+FC(acc_wr_control, 23, 23, devinfo->gen >= 6)
+FC(mask_control_ex, 23, 23, devinfo->is_g4x || devinfo->gen == 5)
+F(subreg_index, 22, 18)
+F(datatype_index, 17, 13)
+F(control_index, 12, 8)
+F(debug_control, 7, 7)
+F(opcode, 6, 0) /* Same location as brw_inst */
/**
* (Gen8+) Compacted three-source instructions:
* @{
*/
-F(3src_src2_reg_nr, 63, 57)
-F(3src_src1_reg_nr, 56, 50)
-F(3src_src0_reg_nr, 49, 43)
-F(3src_src2_subreg_nr, 42, 40)
-F(3src_src1_subreg_nr, 39, 37)
-F(3src_src0_subreg_nr, 36, 34)
-F(3src_src2_rep_ctrl, 33, 33)
-F(3src_src1_rep_ctrl, 32, 32)
-F(3src_saturate, 31, 31)
-F(3src_debug_control, 30, 30)
-F(3src_cmpt_control, 29, 29)
-F(3src_src0_rep_ctrl, 28, 28)
+FC(3src_src2_reg_nr, 63, 57, devinfo->gen >= 8)
+FC(3src_src1_reg_nr, 56, 50, devinfo->gen >= 8)
+FC(3src_src0_reg_nr, 49, 43, devinfo->gen >= 8)
+FC(3src_src2_subreg_nr, 42, 40, devinfo->gen >= 8)
+FC(3src_src1_subreg_nr, 39, 37, devinfo->gen >= 8)
+FC(3src_src0_subreg_nr, 36, 34, devinfo->gen >= 8)
+FC(3src_src2_rep_ctrl, 33, 33, devinfo->gen >= 8)
+FC(3src_src1_rep_ctrl, 32, 32, devinfo->gen >= 8)
+FC(3src_saturate, 31, 31, devinfo->gen >= 8)
+FC(3src_debug_control, 30, 30, devinfo->gen >= 8)
+FC(3src_cmpt_control, 29, 29, devinfo->gen >= 8)
+FC(3src_src0_rep_ctrl, 28, 28, devinfo->gen >= 8)
/* Reserved */
-F(3src_dst_reg_nr, 18, 12)
-F(3src_source_index, 11, 10)
-F(3src_control_index, 9, 8)
+FC(3src_dst_reg_nr, 18, 12, devinfo->gen >= 8)
+FC(3src_source_index, 11, 10, devinfo->gen >= 8)
+FC(3src_control_index, 9, 8, devinfo->gen >= 8)
/* Bit 7 is Reserved (for future Opcode expansion) */
-F(3src_opcode, 6, 0)
+FC(3src_opcode, 6, 0, devinfo->gen >= 8)
/** @} */
#undef F
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 22b0227756e..6433dec9041 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -91,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
if (prog) {
prog->id = get_new_program_id(brw->intelScreen);
- return _mesa_init_gl_program(&prog->program, target, id);
+ return _mesa_init_gl_program(&prog->program.Base, target, id);
} else {
return NULL;
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6be2a6e5b55..e48f559afa7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -79,6 +79,8 @@ is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
case MESA_SHADER_FRAGMENT:
case MESA_SHADER_COMPUTE:
return true;
+ case MESA_SHADER_GEOMETRY:
+ return compiler->scalar_gs;
case MESA_SHADER_VERTEX:
return compiler->scalar_vs;
default:
@@ -101,6 +103,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
compiler->scalar_vs = true;
+ if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false))
+ compiler->scalar_gs = true;
+
nir_shader_compiler_options *nir_options =
rzalloc(compiler, nir_shader_compiler_options);
nir_options->native_integers = true;
@@ -411,6 +416,14 @@ brw_instruction_name(enum opcode op)
return "gen7_scratch_read";
case SHADER_OPCODE_URB_WRITE_SIMD8:
return "gen8_urb_write_simd8";
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ return "gen8_urb_write_simd8_per_slot";
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ return "gen8_urb_write_simd8_masked";
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+ return "gen8_urb_write_simd8_masked_per_slot";
+ case SHADER_OPCODE_URB_READ_SIMD8:
+ return "urb_read_simd8";
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
@@ -964,6 +977,9 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_BARRIER:
return true;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 2e47690d403..8899b30c1ae 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -233,6 +233,18 @@ bool opt_predicated_break(struct backend_shader *s);
extern "C" {
#endif
+/**
+ * Scratch data used when compiling a GLSL geometry shader.
+ */
+struct brw_gs_compile
+{
+ struct brw_gs_prog_key key;
+ struct brw_vue_map input_vue_map;
+
+ unsigned control_data_bits_per_vertex;
+ unsigned control_data_header_size_bits;
+};
+
void
brw_assign_common_binding_table_offsets(gl_shader_stage stage,
const struct brw_device_info *devinfo,
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 2955c8dcc2e..a2948293a62 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -40,36 +40,32 @@
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
static unsigned int
-tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
- const struct intel_mipmap_tree *mt)
+tr_mode_horizontal_texture_alignment(const struct intel_mipmap_tree *mt)
{
- const unsigned *align_yf, *align_ys;
- const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
- unsigned ret_align, divisor;
+ unsigned ret_align, divisor, multiplier_ys;
- /* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below
- * tables specifies the horizontal alignment requirement in elements
- * for the surface. An element is defined as a pixel in uncompressed
- * surface formats, and as a compression block in compressed surface
- * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
+ /* Values in below tables specifiy the horizontal alignment requirement
+ * in elements for TRMODE_YF surface. An element is defined as a pixel in
+ * uncompressed surface formats, and as a compression block in compressed
+ * surface formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
* element is a sample.
*/
const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256};
- const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096};
const unsigned align_2d_yf[] = {64, 64, 32, 32, 16};
- const unsigned align_2d_ys[] = {256, 256, 128, 128, 64};
const unsigned align_3d_yf[] = {16, 8, 8, 8, 4};
- const unsigned align_3d_ys[] = {64, 32, 32, 32, 16};
- int i = 0;
- /* Alignment computations below assume bpp >= 8 and a power of 2. */
- assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
+ assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
+
+ /* Alignment computations below assume a power of 2 cpp. */
+ assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp));
+ /* Compute array index. */
+ const int i = ffs(mt->cpp) - 1;
switch(mt->target) {
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
- align_yf = align_1d_yf;
- align_ys = align_1d_ys;
+ ret_align = align_1d_yf[i];
+ multiplier_ys = 16;
break;
case GL_TEXTURE_2D:
case GL_TEXTURE_RECTANGLE:
@@ -78,22 +74,19 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
case GL_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- align_yf = align_2d_yf;
- align_ys = align_2d_ys;
+ ret_align = align_2d_yf[i];
+ multiplier_ys = 4;
break;
case GL_TEXTURE_3D:
- align_yf = align_3d_yf;
- align_ys = align_3d_ys;
+ ret_align = align_3d_yf[i];
+ multiplier_ys = 4;
break;
default:
unreachable("not reached");
}
- /* Compute array index. */
- i = ffs(bpp/8) - 1;
-
- ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
- align_yf[i] : align_ys[i];
+ if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
+ ret_align *= multiplier_ys;
assert(_mesa_is_pow_two(mt->num_samples));
@@ -148,26 +141,20 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw,
}
static unsigned int
-tr_mode_vertical_texture_alignment(const struct brw_context *brw,
- const struct intel_mipmap_tree *mt)
+tr_mode_vertical_texture_alignment(const struct intel_mipmap_tree *mt)
{
- const unsigned *align_yf, *align_ys;
- const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
- unsigned ret_align, divisor;
+ unsigned ret_align, divisor, multiplier_ys;
- /* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */
+ /* Vertical alignment tables for TRMODE_YF */
const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
- const unsigned align_2d_ys[] = {256, 128, 128, 64, 64};
const unsigned align_3d_yf[] = {16, 16, 16, 8, 8};
- const unsigned align_3d_ys[] = {32, 32, 32, 16, 16};
- int i = 0;
- assert(brw->gen >= 9 &&
- mt->target != GL_TEXTURE_1D &&
- mt->target != GL_TEXTURE_1D_ARRAY);
+ assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
- /* Alignment computations below assume bpp >= 8 and a power of 2. */
- assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ;
+ /* Alignment computations below assume a power of 2 cpp. */
+ assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp)) ;
+ /* Compute array index. */
+ const int i = ffs(mt->cpp) - 1;
switch(mt->target) {
case GL_TEXTURE_2D:
@@ -177,22 +164,21 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw,
case GL_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- align_yf = align_2d_yf;
- align_ys = align_2d_ys;
+ ret_align = align_2d_yf[i];
+ multiplier_ys = 4;
break;
case GL_TEXTURE_3D:
- align_yf = align_3d_yf;
- align_ys = align_3d_ys;
+ ret_align = align_3d_yf[i];
+ multiplier_ys = 2;
break;
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_1D_ARRAY:
default:
- unreachable("not reached");
+ unreachable("Unexpected miptree target");
}
- /* Compute array index. */
- i = ffs(bpp / 8) - 1;
-
- ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
- align_yf[i] : align_ys[i];
+ if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
+ ret_align *= multiplier_ys;
assert(_mesa_is_pow_two(mt->num_samples));
@@ -779,8 +765,8 @@ intel_miptree_set_alignment(struct brw_context *brw,
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
* vertical alignment < 64. */
- mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
- mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
+ mt->halign = MAX2(tr_mode_horizontal_texture_alignment(mt), 32);
+ mt->valign = MAX2(tr_mode_vertical_texture_alignment(mt), 64);
} else {
mt->halign =
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index befc92445d3..3e7078d0b32 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1111,7 +1111,7 @@ vec4_visitor::opt_register_coalesce()
*/
vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev;
foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
- inst, block) {
+ inst) {
_scan_inst = scan_inst;
if (inst->src[0].in_range(scan_inst->dst, scan_inst->regs_written)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index 1b929b3df2c..6bc39473137 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -104,7 +104,7 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
case nir_intrinsic_load_primitive_id:
- assert(c->prog_data.include_primitive_id);
+ assert(gs_prog_data->include_primitive_id);
dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index a715cf5a6cb..9402489e628 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -35,14 +35,16 @@ namespace brw {
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
: vec4_visitor(compiler, log_data, &c->key.tex,
- &c->prog_data.base, shader, mem_ctx,
+ &prog_data->base, shader, mem_ctx,
no_spills, shader_time_index),
- c(c)
+ c(c),
+ gs_prog_data(prog_data)
{
}
@@ -78,9 +80,9 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
* so the total number of input slots that will be delivered to the GS (and
* thus the stride of the input arrays) is urb_read_length * 2.
*/
- const unsigned num_input_vertices = c->gp->program.VerticesIn;
+ const unsigned num_input_vertices = nir->info.gs.vertices_in;
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
- unsigned input_array_stride = c->prog_data.base.urb_read_length * 2;
+ unsigned input_array_stride = prog_data->urb_read_length * 2;
for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
int varying = c->input_vue_map.slot_to_varying[slot];
@@ -106,7 +108,7 @@ vec4_gs_visitor::setup_payload()
* to be interleaved, so one register contains two attribute slots.
*/
int attributes_per_reg =
- c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
+ prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
/* If a geometry shader tries to read from an input that wasn't written by
* the vertex shader, that produces undefined results, but it shouldn't
@@ -124,7 +126,7 @@ vec4_gs_visitor::setup_payload()
reg++;
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
- if (c->prog_data.include_primitive_id)
+ if (gs_prog_data->include_primitive_id)
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
reg = setup_uniforms(reg);
@@ -182,9 +184,9 @@ vec4_gs_visitor::emit_prolog()
* to account for the fact that the vertex shader stored it in the w
* component of VARYING_SLOT_PSIZ.
*/
- if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) {
+ if (nir->info.inputs_read & VARYING_BIT_PSIZ) {
this->current_annotation = "swizzle gl_PointSize input";
- for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) {
+ for (int vertex = 0; vertex < (int)nir->info.gs.vertices_in; vertex++) {
dst_reg dst(ATTR,
BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
dst.type = BRW_REGISTER_TYPE_F;
@@ -222,7 +224,7 @@ vec4_gs_visitor::emit_thread_end()
*/
int base_mrf = 1;
- bool static_vertex_count = c->prog_data.static_vertex_count != -1;
+ bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
/* If the previous instruction was a URB write, we don't need to issue
* a second one - we can just set the EOT bit on the previous write.
@@ -271,7 +273,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
- (uint32_t) c->prog_data.output_vertex_size_hwords);
+ (uint32_t) gs_prog_data->output_vertex_size_hwords);
}
@@ -285,12 +287,12 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
(void) complete;
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
- inst->offset = c->prog_data.control_data_header_size_hwords;
+ inst->offset = gs_prog_data->control_data_header_size_hwords;
/* We need to increment Global Offset by 1 to make room for Broadwell's
* extra "Vertex Count" payload at the beginning of the URB entry.
*/
- if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
+ if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset++;
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
@@ -409,7 +411,7 @@ vec4_gs_visitor::emit_control_data_bits()
* URB entry. Since this is an OWord message, Global Offset is counted
* in 128-bit units, so we must set it to 2.
*/
- if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
+ if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset = 2;
inst->base_mrf = base_mrf;
inst->mlen = 2;
@@ -536,7 +538,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
* do for GL_POINTS outputs that don't use streams).
*/
if (c->control_data_header_size_bits > 0 &&
- c->prog_data.control_data_format ==
+ gs_prog_data->control_data_format ==
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
this->current_annotation = "emit vertex: Stream control data bits";
set_stream_control_data_bits(stream_id);
@@ -552,7 +554,7 @@ vec4_gs_visitor::gs_end_primitive()
* consists of cut bits. Fortunately, the only time it isn't is when the
* output type is points, in which case EndPrimitive() is a no-op.
*/
- if (c->prog_data.control_data_format !=
+ if (gs_prog_data->control_data_format !=
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
return;
}
@@ -598,27 +600,231 @@ vec4_gs_visitor::gs_end_primitive()
extern "C" const unsigned *
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
- struct brw_gs_compile *c,
+ void *mem_ctx,
+ const struct brw_gs_prog_key *key,
+ struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
struct gl_shader_program *shader_prog,
- void *mem_ctx,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str)
{
+ struct brw_gs_compile c;
+ memset(&c, 0, sizeof(c));
+ c.key = *key;
+
+ prog_data->include_primitive_id =
+ (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
+
+ prog_data->invocations = shader->info.gs.invocations;
+
+ if (compiler->devinfo->gen >= 8)
+ prog_data->static_vertex_count = nir_gs_count_vertices(shader);
+
+ if (compiler->devinfo->gen >= 7) {
+ if (shader->info.gs.output_primitive == GL_POINTS) {
+ /* When the output type is points, the geometry shader may output data
+ * to multiple streams, and EndPrimitive() has no effect. So we
+ * configure the hardware to interpret the control data as stream ID.
+ */
+ prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+ /* We only have to emit control bits if we are using streams */
+ if (shader_prog && shader_prog->Geom.UsesStreams)
+ c.control_data_bits_per_vertex = 2;
+ else
+ c.control_data_bits_per_vertex = 0;
+ } else {
+ /* When the output type is triangle_strip or line_strip, EndPrimitive()
+ * may be used to terminate the current strip and start a new one
+ * (similar to primitive restart), and outputting data to multiple
+ * streams is not supported. So we configure the hardware to interpret
+ * the control data as EndPrimitive information (a.k.a. "cut bits").
+ */
+ prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+ /* We only need to output control data if the shader actually calls
+ * EndPrimitive().
+ */
+ c.control_data_bits_per_vertex =
+ shader->info.gs.uses_end_primitive ? 1 : 0;
+ }
+ } else {
+ /* There are no control data bits in gen6. */
+ c.control_data_bits_per_vertex = 0;
+
+ /* If it is using transform feedback, enable it */
+ if (shader->info.has_transform_feedback_varyings)
+ prog_data->gen6_xfb_enabled = true;
+ else
+ prog_data->gen6_xfb_enabled = false;
+ }
+ c.control_data_header_size_bits =
+ shader->info.gs.vertices_out * c.control_data_bits_per_vertex;
+
+ /* 1 HWORD = 32 bytes = 256 bits */
+ prog_data->control_data_header_size_hwords =
+ ALIGN(c.control_data_header_size_bits, 256) / 256;
+
+ /* Compute the output vertex size.
+ *
+ * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex
+ * Size (p168):
+ *
+ * [0,62] indicating [1,63] 16B units
+ *
+ * Specifies the size of each vertex stored in the GS output entry
+ * (following any Control Header data) as a number of 128-bit units
+ * (minus one).
+ *
+ * Programming Restrictions: The vertex size must be programmed as a
+ * multiple of 32B units with the following exception: Rendering is
+ * disabled (as per SOL stage state) and the vertex size output by the
+ * GS thread is 16B.
+ *
+ * If rendering is enabled (as per SOL state) the vertex size must be
+ * programmed as a multiple of 32B units. In other words, the only time
+ * software can program a vertex size with an odd number of 16B units
+ * is when rendering is disabled.
+ *
+ * Note: B=bytes in the above text.
+ *
+ * It doesn't seem worth the extra trouble to optimize the case where the
+ * vertex size is 16B (especially since this would require special-casing
+ * the GEN assembly that writes to the URB). So we just set the vertex
+ * size to a multiple of 32B (2 vec4's) in all cases.
+ *
+ * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We
+ * budget that as follows:
+ *
+ * 512 bytes for varyings (a varying component is 4 bytes and
+ * gl_MaxGeometryOutputComponents = 128)
+ * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
+ * bytes)
+ * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE
+ * even if it's not used)
+ * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
+ * whenever clip planes are enabled, even if the shader doesn't
+ * write to gl_ClipDistance)
+ * 16 bytes overhead since the VUE size must be a multiple of 32 bytes
+ * (see below)--this causes up to 1 VUE slot to be wasted
+ * 400 bytes available for varying packing overhead
+ *
+ * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes)
+ * per interpolation type, so this is plenty.
+ *
+ */
+ unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16;
+ assert(compiler->devinfo->gen == 6 ||
+ output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
+ prog_data->output_vertex_size_hwords =
+ ALIGN(output_vertex_size_bytes, 32) / 32;
+
+ /* Compute URB entry size. The maximum allowed URB entry size is 32k.
+ * That divides up as follows:
+ *
+ * 64 bytes for the control data header (cut indices or StreamID bits)
+ * 4096 bytes for varyings (a varying component is 4 bytes and
+ * gl_MaxGeometryTotalOutputComponents = 1024)
+ * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16
+ * bytes/vertex and gl_MaxGeometryOutputVertices is 256)
+ * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE
+ * even if it's not used)
+ * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots
+ * whenever clip planes are enabled, even if the shader doesn't
+ * write to gl_ClipDistance)
+ * 4096 bytes overhead since the VUE size must be a multiple of 32
+ * bytes (see above)--this causes up to 1 VUE slot to be wasted
+ * 8128 bytes available for varying packing overhead
+ *
+ * Worst-case varying packing overhead is 3/4 of a varying slot per
+ * interpolation type, which works out to 3072 bytes, so this would allow
+ * us to accommodate 2 interpolation types without any danger of running
+ * out of URB space.
+ *
+ * In practice, the risk of running out of URB space is very small, since
+ * the above figures are all worst-case, and most of them scale with the
+ * number of output vertices. So we'll just calculate the amount of space
+ * we need, and if it's too large, fail to compile.
+ *
+ * The above is for gen7+ where we have a single URB entry that will hold
+ * all the output. In gen6, we will have to allocate URB entries for every
+ * vertex we emit, so our URB entries only need to be large enough to hold
+ * a single vertex. Also, gen6 does not have a control data header.
+ */
+ unsigned output_size_bytes;
+ if (compiler->devinfo->gen >= 7) {
+ output_size_bytes =
+ prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out;
+ output_size_bytes += 32 * prog_data->control_data_header_size_hwords;
+ } else {
+ output_size_bytes = prog_data->output_vertex_size_hwords * 32;
+ }
+
+ /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
+ * which comes before the control header.
+ */
+ if (compiler->devinfo->gen >= 8)
+ output_size_bytes += 32;
+
+ assert(output_size_bytes >= 1);
+ int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ if (compiler->devinfo->gen == 6)
+ max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ if (output_size_bytes > max_output_size_bytes)
+ return false;
+
+
+ /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
+ * a multiple of 128 bytes in gen6.
+ */
+ if (compiler->devinfo->gen >= 7)
+ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+ else
+ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+
+ prog_data->output_topology =
+ get_hw_prim_for_gl_prim(shader->info.gs.output_primitive);
+
+ /* The GLSL linker will have already matched up GS inputs and the outputs
+ * of prior stages. The driver does extend VS outputs in some cases, but
+ * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+ * geometry shader support. So we can safely ignore that.
+ *
+ * For SSO pipelines, we use a fixed VUE map layout based on variable
+ * locations, so we can rely on rendezvous-by-location making this work.
+ *
+ * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+ * written by previous stages and shows up via payload magic.
+ */
+ GLbitfield64 inputs_read =
+ shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
+ brw_compute_vue_map(compiler->devinfo,
+ &c.input_vue_map, inputs_read,
+ shader->info.separate_shader);
+
+ /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
+ * need to program a URB read length of ceiling(num_slots / 2).
+ */
+ prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
+
+ /* Now that prog_data setup is done, we are ready to actually compile the
+ * program.
+ */
+
if (compiler->devinfo->gen >= 7) {
/* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
* so without spilling. If the GS invocations count > 1, then we can't use
* dual object mode.
*/
- if (c->prog_data.invocations <= 1 &&
+ if (prog_data->invocations <= 1 &&
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
- c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(compiler, log_data, c, shader,
+ vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
return g.generate_assembly(v.cfg, final_assembly_size, shader);
}
@@ -648,28 +854,28 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
* mode is more performant when invocations > 1. Gen6 only supports
* SINGLE mode.
*/
- if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7)
- c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
+ if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7)
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
else
- c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
if (compiler->devinfo->gen >= 7)
- gs = new vec4_gs_visitor(compiler, log_data, c, shader,
- mem_ctx, false /* no_spills */,
+ gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data,
+ shader, mem_ctx, false /* no_spills */,
shader_time_index);
else
- gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader,
- mem_ctx, false /* no_spills */,
+ gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, shader_prog,
+ shader, mem_ctx, false /* no_spills */,
shader_time_index);
if (!gs->run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index c52552768c8..6ca83a9d9a3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -41,6 +41,7 @@ public:
vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
@@ -70,6 +71,7 @@ protected:
src_reg vertex_count;
src_reg control_data_bits;
const struct brw_gs_compile * const c;
+ struct brw_gs_prog_data * const gs_prog_data;
};
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5be9c6a6b2d..6d155285820 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1222,6 +1222,9 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
void
vec4_visitor::emit_ndc_computation()
{
+ if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
+ return;
+
/* Get the position */
src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
@@ -1287,7 +1290,8 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (devinfo->has_negative_rhw_bug) {
+ if (devinfo->has_negative_rhw_bug &&
+ output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
@@ -1335,8 +1339,10 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
assert(varying < VARYING_SLOT_MAX);
assert(output_reg[varying].type == reg.type);
current_annotation = output_reg_annotation[varying];
- /* Copy the register, saturating if necessary */
- return emit(MOV(reg, src_reg(output_reg[varying])));
+ if (output_reg[varying].file != BAD_FILE)
+ return emit(MOV(reg, src_reg(output_reg[varying])));
+ else
+ return NULL;
}
void
@@ -1355,11 +1361,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
}
case BRW_VARYING_SLOT_NDC:
current_annotation = "NDC";
- emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
+ if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
+ emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
break;
case VARYING_SLOT_POS:
current_annotation = "gl_Position";
- emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
+ if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
+ emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
break;
case VARYING_SLOT_EDGE:
/* This is present when doing unfilled polygons. We're supposed to copy
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 485a80ee2fc..5dd4f98cecc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -217,7 +217,7 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
* shader.
*/
vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
- if (key->clamp_vertex_color)
+ if (inst && key->clamp_vertex_color)
inst->saturate = true;
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index ba680a98f7e..5db4b3a86af 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -312,7 +312,7 @@ brw_vs_populate_key(struct brw_context *brw,
if (ctx->Transform.ClipPlanesEnabled != 0 &&
ctx->API == API_OPENGL_COMPAT &&
- !vp->program.Base.UsesClipDistanceOut) {
+ vp->program.Base.ClipDistanceArraySize == 0) {
key->nr_userclip_plane_consts =
_mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
}
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 671a535a5bd..2fef188c17e 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -63,7 +63,7 @@ gen6_gs_visitor::emit_prolog()
this->vertex_output = src_reg(this,
glsl_type::uint_type,
(prog_data->vue_map.num_slots + 1) *
- c->gp->program.VerticesOut);
+ nir->info.gs.vertices_out);
this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
@@ -95,7 +95,7 @@ gen6_gs_visitor::emit_prolog()
this->prim_count = src_reg(this, glsl_type::uint_type);
emit(MOV(dst_reg(this->prim_count), 0u));
- if (c->prog_data.gen6_xfb_enabled) {
+ if (gs_prog_data->gen6_xfb_enabled) {
/* Create a virtual register to hold destination indices in SOL */
this->destination_indices = src_reg(this, glsl_type::uvec4_type);
/* Create a virtual register to hold number of written primitives */
@@ -128,7 +128,7 @@ gen6_gs_visitor::emit_prolog()
* in the 3DSTATE_GS state packet. That information can be obtained by other
* means though, so we can safely use r1 for this purpose.
*/
- if (c->prog_data.include_primitive_id) {
+ if (gs_prog_data->include_primitive_id) {
this->primitive_id =
src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
@@ -177,7 +177,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
dst_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- if (c->gp->program.OutputType == GL_POINTS) {
+ if (nir->info.gs.output_primitive == GL_POINTS) {
/* If we are outputting points, then every vertex has PrimStart and
* PrimEnd set.
*/
@@ -191,7 +191,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
* vertex.
*/
emit(OR(dst, this->first_vertex,
- (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
+ (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
emit(MOV(dst_reg(this->first_vertex), 0u));
}
emit(ADD(dst_reg(this->vertex_output_offset),
@@ -205,7 +205,7 @@ gen6_gs_visitor::gs_end_primitive()
/* Calling EndPrimitive() is optional for point output. In this case we set
* the PrimEnd flag when we process EmitVertex().
*/
- if (c->gp->program.OutputType == GL_POINTS)
+ if (nir->info.gs.output_primitive == GL_POINTS)
return;
/* Otherwise we know that the last vertex we have processed was the last
@@ -217,7 +217,7 @@ gen6_gs_visitor::gs_end_primitive()
* comparison below (hence the num_output_vertices + 1 in the comparison
* below).
*/
- unsigned num_output_vertices = c->gp->program.VerticesOut;
+ unsigned num_output_vertices = nir->info.gs.vertices_out;
emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
BRW_CONDITIONAL_L));
vec4_instruction *inst = emit(CMP(dst_null_d(),
@@ -320,7 +320,7 @@ gen6_gs_visitor::emit_thread_end()
* first_vertex is not zero. This is only relevant for outputs other than
* points because in the point case we set PrimEnd on all vertices.
*/
- if (c->gp->program.OutputType != GL_POINTS) {
+ if (nir->info.gs.output_primitive != GL_POINTS) {
emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
emit(IF(BRW_PREDICATE_NORMAL));
gs_end_primitive();
@@ -353,7 +353,7 @@ gen6_gs_visitor::emit_thread_end()
this->current_annotation = "gen6 thread end: ff_sync";
vec4_instruction *inst;
- if (c->prog_data.gen6_xfb_enabled) {
+ if (gs_prog_data->gen6_xfb_enabled) {
src_reg sol_temp(this, glsl_type::uvec4_type);
emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
dst_reg(this->svbi),
@@ -443,7 +443,7 @@ gen6_gs_visitor::emit_thread_end()
}
emit(BRW_OPCODE_WHILE);
- if (c->prog_data.gen6_xfb_enabled)
+ if (gs_prog_data->gen6_xfb_enabled)
xfb_write();
}
emit(BRW_OPCODE_ENDIF);
@@ -465,7 +465,7 @@ gen6_gs_visitor::emit_thread_end()
*/
this->current_annotation = "gen6 thread end: EOT";
- if (c->prog_data.gen6_xfb_enabled) {
+ if (gs_prog_data->gen6_xfb_enabled) {
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
src_reg data(this, glsl_type::uint_type);
emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
@@ -507,7 +507,7 @@ gen6_gs_visitor::setup_payload()
* information (and move the original value to a virtual register if
* necessary).
*/
- if (c->prog_data.include_primitive_id)
+ if (gs_prog_data->include_primitive_id)
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
reg++;
@@ -530,9 +530,6 @@ gen6_gs_visitor::xfb_setup()
BRW_SWIZZLE4(3, 3, 3, 3)
};
- struct brw_gs_prog_data *prog_data =
- (struct brw_gs_prog_data *) &c->prog_data;
-
const struct gl_transform_feedback_info *linked_xfb_info =
&this->shader_prog->LinkedTransformFeedback;
int i;
@@ -548,11 +545,11 @@ gen6_gs_visitor::xfb_setup()
*/
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
- prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
- for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
- prog_data->transform_feedback_bindings[i] =
+ gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+ for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
+ gs_prog_data->transform_feedback_bindings[i] =
linked_xfb_info->Outputs[i].OutputRegister;
- prog_data->transform_feedback_swizzles[i] =
+ gs_prog_data->transform_feedback_swizzles[i] =
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
}
}
@@ -561,13 +558,11 @@ void
gen6_gs_visitor::xfb_write()
{
unsigned num_verts;
- struct brw_gs_prog_data *prog_data =
- (struct brw_gs_prog_data *) &c->prog_data;
- if (!prog_data->num_transform_feedback_bindings)
+ if (!gs_prog_data->num_transform_feedback_bindings)
return;
- switch (c->prog_data.output_topology) {
+ switch (gs_prog_data->output_topology) {
case _3DPRIM_POINTLIST:
num_verts = 1;
break;
@@ -627,7 +622,7 @@ gen6_gs_visitor::xfb_write()
emit(BRW_OPCODE_ENDIF);
/* Write transform feedback data for all processed vertices. */
- for (int i = 0; i < c->gp->program.VerticesOut; i++) {
+ for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) {
emit(MOV(dst_reg(sol_temp), i));
emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
BRW_CONDITIONAL_L));
@@ -642,10 +637,8 @@ gen6_gs_visitor::xfb_write()
void
gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
{
- struct brw_gs_prog_data *prog_data =
- (struct brw_gs_prog_data *) &c->prog_data;
unsigned binding;
- unsigned num_bindings = prog_data->num_transform_feedback_bindings;
+ unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
src_reg sol_temp(this, glsl_type::uvec4_type);
/* Check for buffer overflow: we need room to write the complete primitive
@@ -666,7 +659,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
*/
for (binding = 0; binding < num_bindings; ++binding) {
unsigned char varying =
- prog_data->transform_feedback_bindings[binding];
+ gs_prog_data->transform_feedback_bindings[binding];
/* Set up the correct destination index for this vertex */
vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
@@ -704,7 +697,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
else if (varying == VARYING_SLOT_VIEWPORT)
data.swizzle = BRW_SWIZZLE_ZZZZ;
else
- data.swizzle = prog_data->transform_feedback_swizzles[binding];
+ data.swizzle = gs_prog_data->transform_feedback_swizzles[binding];
/* Write data */
inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
index d02c67d8a74..311cf06833c 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -38,12 +38,13 @@ public:
gen6_gs_visitor(const struct brw_compiler *comp,
void *log_data,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
struct gl_shader_program *prog,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
- vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills,
+ vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills,
shader_time_index),
shader_prog(prog)
{
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index d766ca7bebf..6738e85eaba 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -68,6 +68,8 @@ gen8_upload_gs_state(struct brw_context *brw)
GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
(brw->gs.prog_data->output_topology <<
GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
+ (prog_data->include_vue_handles ?
+ GEN7_GS_INCLUDE_VERTEX_HANDLES : 0) |
(prog_data->urb_read_length <<
GEN6_GS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index e8059c7b260..2b65b2ea949 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -1400,7 +1400,7 @@ save_BlendFunci(GLuint buf, GLenum sfactor, GLenum dfactor)
GET_CURRENT_CONTEXT(ctx);
Node *n;
ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
- n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_SEPARATE_I, 3);
+ n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_I, 3);
if (n) {
n[1].ui = buf;
n[2].e = sfactor;
@@ -9741,6 +9741,46 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
n[3].f, n[4].f, n[5].f, n[6].f,
get_pointer(&n[7]));
break;
+ case OPCODE_BLEND_COLOR:
+ fprintf(f, "BlendColor %f, %f, %f, %f\n",
+ n[1].f, n[2].f, n[3].f, n[4].f);
+ break;
+ case OPCODE_BLEND_EQUATION:
+ fprintf(f, "BlendEquation %s\n",
+ enum_string(n[1].e));
+ break;
+ case OPCODE_BLEND_EQUATION_SEPARATE:
+ fprintf(f, "BlendEquationSeparate %s, %s\n",
+ enum_string(n[1].e),
+ enum_string(n[2].e));
+ break;
+ case OPCODE_BLEND_FUNC_SEPARATE:
+ fprintf(f, "BlendFuncSeparate %s, %s, %s, %s\n",
+ enum_string(n[1].e),
+ enum_string(n[2].e),
+ enum_string(n[3].e),
+ enum_string(n[4].e));
+ break;
+ case OPCODE_BLEND_EQUATION_I:
+ fprintf(f, "BlendEquationi %u, %s\n",
+ n[1].ui, enum_string(n[2].e));
+ break;
+ case OPCODE_BLEND_EQUATION_SEPARATE_I:
+ fprintf(f, "BlendEquationSeparatei %u, %s, %s\n",
+ n[1].ui, enum_string(n[2].e), enum_string(n[3].e));
+ break;
+ case OPCODE_BLEND_FUNC_I:
+ fprintf(f, "BlendFunci %u, %s, %s\n",
+ n[1].ui, enum_string(n[2].e), enum_string(n[3].e));
+ break;
+ case OPCODE_BLEND_FUNC_SEPARATE_I:
+ fprintf(f, "BlendFuncSeparatei %u, %s, %s, %s, %s\n",
+ n[1].ui,
+ enum_string(n[2].e),
+ enum_string(n[3].e),
+ enum_string(n[4].e),
+ enum_string(n[5].e));
+ break;
case OPCODE_CALL_LIST:
fprintf(f, "CallList %d\n", (int) n[1].ui);
break;
@@ -9761,6 +9801,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
case OPCODE_LINE_STIPPLE:
fprintf(f, "LineStipple %d %x\n", n[1].i, (int) n[2].us);
break;
+ case OPCODE_LINE_WIDTH:
+ fprintf(f, "LineWidth %f\n", n[1].f);
+ break;
case OPCODE_LOAD_IDENTITY:
fprintf(f, "LoadIdentity\n");
break;
@@ -9790,6 +9833,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
fprintf(f, "Ortho %g %g %g %g %g %g\n",
n[1].f, n[2].f, n[3].f, n[4].f, n[5].f, n[6].f);
break;
+ case OPCODE_POINT_SIZE:
+ fprintf(f, "PointSize %f\n", n[1].f);
+ break;
case OPCODE_POP_ATTRIB:
fprintf(f, "PopAttrib\n");
break;
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index faa63825380..2ed42eaffdd 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2275,45 +2275,16 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
; /* fallthrough */
}
- if (ctx->Extensions.TDFX_texture_compression_FXT1) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGB_FXT1_3DFX:
- return GL_RGB;
- case GL_COMPRESSED_RGBA_FXT1_3DFX:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
+ if (_mesa_is_compressed_format(ctx, internalFormat)) {
+ GLenum base_compressed =
+ _mesa_gl_compressed_format_base_format(internalFormat);
+ if (base_compressed)
+ return base_compressed;
}
- /* Assume that the ANGLE flag will always be set if the EXT flag is set.
- */
- if (ctx->Extensions.ANGLE_texture_compression_dxt) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return GL_RGB;
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (_mesa_is_desktop_gl(ctx)
- && ctx->Extensions.ANGLE_texture_compression_dxt) {
- switch (internalFormat) {
- case GL_RGB_S3TC:
- case GL_RGB4_S3TC:
- return GL_RGB;
- case GL_RGBA_S3TC:
- case GL_RGBA4_S3TC:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
+ if (ctx->Extensions.KHR_texture_compression_astc_ldr &&
+ _mesa_is_astc_format(internalFormat))
+ return GL_RGBA;
if (ctx->Extensions.MESA_ycbcr_texture) {
if (internalFormat == GL_YCBCR_MESA)
@@ -2390,16 +2361,10 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
case GL_SRGB8_EXT:
case GL_COMPRESSED_SRGB_EXT:
return GL_RGB;
- case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
- return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
case GL_SRGB_ALPHA_EXT:
case GL_SRGB8_ALPHA8_EXT:
case GL_COMPRESSED_SRGB_ALPHA_EXT:
return GL_RGBA;
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
- return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
case GL_SLUMINANCE_ALPHA_EXT:
case GL_SLUMINANCE8_ALPHA8_EXT:
case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
@@ -2544,104 +2509,6 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
}
}
- if (ctx->Extensions.ARB_texture_compression_rgtc) {
- switch (internalFormat) {
- case GL_COMPRESSED_RED_RGTC1:
- case GL_COMPRESSED_SIGNED_RED_RGTC1:
- return GL_RED;
- case GL_COMPRESSED_RG_RGTC2:
- case GL_COMPRESSED_SIGNED_RG_RGTC2:
- return GL_RG;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.EXT_texture_compression_latc) {
- switch (internalFormat) {
- case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
- case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
- return GL_LUMINANCE;
- case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
- case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
- return GL_LUMINANCE_ALPHA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.ATI_texture_compression_3dc) {
- switch (internalFormat) {
- case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
- return GL_LUMINANCE_ALPHA;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
- switch (internalFormat) {
- case GL_ETC1_RGB8_OES:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGB8_ETC2:
- case GL_COMPRESSED_SRGB8_ETC2:
- return GL_RGB;
- case GL_COMPRESSED_RGBA8_ETC2_EAC:
- case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
- case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- return GL_RGBA;
- case GL_COMPRESSED_R11_EAC:
- case GL_COMPRESSED_SIGNED_R11_EAC:
- return GL_RED;
- case GL_COMPRESSED_RG11_EAC:
- case GL_COMPRESSED_SIGNED_RG11_EAC:
- return GL_RG;
- default:
- ; /* fallthrough */
- }
- }
-
- if (_mesa_is_desktop_gl(ctx) &&
- ctx->Extensions.ARB_texture_compression_bptc) {
- switch (internalFormat) {
- case GL_COMPRESSED_RGBA_BPTC_UNORM:
- case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
- return GL_RGBA;
- case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
- case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
- return GL_RGB;
- default:
- ; /* fallthrough */
- }
- }
-
- if (ctx->API == API_OPENGLES) {
- switch (internalFormat) {
- case GL_PALETTE4_RGB8_OES:
- case GL_PALETTE4_R5_G6_B5_OES:
- case GL_PALETTE8_RGB8_OES:
- case GL_PALETTE8_R5_G6_B5_OES:
- return GL_RGB;
- case GL_PALETTE4_RGBA8_OES:
- case GL_PALETTE8_RGB5_A1_OES:
- case GL_PALETTE4_RGBA4_OES:
- case GL_PALETTE4_RGB5_A1_OES:
- case GL_PALETTE8_RGBA8_OES:
- case GL_PALETTE8_RGBA4_OES:
- return GL_RGBA;
- default:
- ; /* fallthrough */
- }
- }
-
return -1; /* error */
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index e57b98a412d..ab4fa083672 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1891,7 +1891,7 @@ struct gl_program
* For vertex and geometry shaders, true if the program uses the
* gl_ClipDistance output. Ignored for fragment shaders.
*/
- GLboolean UsesClipDistanceOut;
+ unsigned ClipDistanceArraySize;
/** Named parameters, constants, etc. from program text */
@@ -2619,7 +2619,6 @@ struct gl_shader_program
* True if gl_ClipDistance is written to. Copied into
* gl_tess_eval_program by _mesa_copy_linked_program_data().
*/
- GLboolean UsesClipDistance;
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
0 if not present. */
} TessEval;
@@ -2642,7 +2641,6 @@ struct gl_shader_program
* True if gl_ClipDistance is written to. Copied into
* gl_geometry_program by _mesa_copy_linked_program_data().
*/
- GLboolean UsesClipDistance;
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
0 if not present. */
bool UsesEndPrimitive;
@@ -2655,7 +2653,6 @@ struct gl_shader_program
* True if gl_ClipDistance is written to. Copied into gl_vertex_program
* by _mesa_copy_linked_program_data().
*/
- GLboolean UsesClipDistance;
GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
0 if not present. */
} Vert;
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 18e463d4ccc..765602e50db 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -2068,7 +2068,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
{
switch (type) {
case MESA_SHADER_VERTEX:
- dst->UsesClipDistanceOut = src->Vert.UsesClipDistance;
+ dst->ClipDistanceArraySize = src->Vert.ClipDistanceArraySize;
break;
case MESA_SHADER_TESS_CTRL: {
struct gl_tess_ctrl_program *dst_tcp =
@@ -2083,7 +2083,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
dst_tep->Spacing = src->TessEval.Spacing;
dst_tep->VertexOrder = src->TessEval.VertexOrder;
dst_tep->PointMode = src->TessEval.PointMode;
- dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance;
+ dst->ClipDistanceArraySize = src->TessEval.ClipDistanceArraySize;
break;
}
case MESA_SHADER_GEOMETRY: {
@@ -2093,7 +2093,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
dst_gp->Invocations = src->Geom.Invocations;
dst_gp->InputType = src->Geom.InputType;
dst_gp->OutputType = src->Geom.OutputType;
- dst->UsesClipDistanceOut = src->Geom.UsesClipDistance;
+ dst->ClipDistanceArraySize = src->Geom.ClipDistanceArraySize;
dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
dst_gp->UsesStreams = src->Geom.UsesStreams;
break;
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index e50964e79e4..d7671738b18 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -97,16 +97,16 @@ static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
* No pixel transfer operations or special texel encodings allowed.
* 1D, 2D and 3D images supported.
*/
-static void
-memcpy_texture(struct gl_context *ctx,
- GLuint dimensions,
- mesa_format dstFormat,
- GLint dstRowStride,
- GLubyte **dstSlices,
- GLint srcWidth, GLint srcHeight, GLint srcDepth,
- GLenum srcFormat, GLenum srcType,
- const GLvoid *srcAddr,
- const struct gl_pixelstore_attrib *srcPacking)
+void
+_mesa_memcpy_texture(struct gl_context *ctx,
+ GLuint dimensions,
+ mesa_format dstFormat,
+ GLint dstRowStride,
+ GLubyte **dstSlices,
+ GLint srcWidth, GLint srcHeight, GLint srcDepth,
+ GLenum srcFormat, GLenum srcType,
+ const GLvoid *srcAddr,
+ const struct gl_pixelstore_attrib *srcPacking)
{
const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth,
srcFormat, srcType);
@@ -296,11 +296,11 @@ _mesa_texstore_ycbcr(TEXSTORE_PARAMS)
assert(baseInternalFormat == GL_YCBCR_MESA);
/* always just memcpy since no pixel transfer ops apply */
- memcpy_texture(ctx, dims,
- dstFormat,
- dstRowStride, dstSlices,
- srcWidth, srcHeight, srcDepth, srcFormat, srcType,
- srcAddr, srcPacking);
+ _mesa_memcpy_texture(ctx, dims,
+ dstFormat,
+ dstRowStride, dstSlices,
+ srcWidth, srcHeight, srcDepth, srcFormat, srcType,
+ srcAddr, srcPacking);
/* Check if we need byte swapping */
/* XXX the logic here _might_ be wrong */
@@ -899,13 +899,15 @@ _mesa_texstore_memcpy(TEXSTORE_PARAMS)
return GL_FALSE;
}
- memcpy_texture(ctx, dims,
- dstFormat,
- dstRowStride, dstSlices,
- srcWidth, srcHeight, srcDepth, srcFormat, srcType,
- srcAddr, srcPacking);
+ _mesa_memcpy_texture(ctx, dims,
+ dstFormat,
+ dstRowStride, dstSlices,
+ srcWidth, srcHeight, srcDepth, srcFormat, srcType,
+ srcAddr, srcPacking);
return GL_TRUE;
}
+
+
/**
* Store user data into texture memory.
* Called via glTex[Sub]Image1/2/3D()
diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h
index 2c974f74afb..f08dc08edde 100644
--- a/src/mesa/main/texstore.h
+++ b/src/mesa/main/texstore.h
@@ -74,6 +74,17 @@ _mesa_texstore_needs_transfer_ops(struct gl_context *ctx,
GLenum baseInternalFormat,
mesa_format dstFormat);
+extern void
+_mesa_memcpy_texture(struct gl_context *ctx,
+ GLuint dimensions,
+ mesa_format dstFormat,
+ GLint dstRowStride,
+ GLubyte **dstSlices,
+ GLint srcWidth, GLint srcHeight, GLint srcDepth,
+ GLenum srcFormat, GLenum srcType,
+ const GLvoid *srcAddr,
+ const struct gl_pixelstore_attrib *srcPacking);
+
extern GLboolean
_mesa_texstore_can_use_memcpy(struct gl_context *ctx,
GLenum baseInternalFormat, mesa_format dstFormat,
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index 0f01e9939de..55d5e66243c 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -239,7 +239,7 @@ static void update_raster_state( struct st_context *st )
/* _NEW_MULTISAMPLE | _NEW_BUFFERS */
raster->force_persample_interp =
- st->can_force_persample_interp &&
+ !st->force_persample_in_shader &&
ctx->Multisample._Enabled &&
ctx->Multisample.SampleShading &&
ctx->Multisample.MinSampleShadingValue *
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 1e880a107c0..0f9ea101889 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -64,7 +64,7 @@ update_fp( struct st_context *st )
assert(stfp->Base.Base.Target == GL_FRAGMENT_PROGRAM_ARB);
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
/* _NEW_FRAG_CLAMP */
key.clamp_color = st->clamp_frag_color_in_shader &&
@@ -76,7 +76,7 @@ update_fp( struct st_context *st )
* Ignore sample qualifier while computing this flag.
*/
key.persample_shading =
- !st->can_force_persample_interp &&
+ st->force_persample_in_shader &&
!(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
SYSTEM_BIT_SAMPLE_POS)) &&
_mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1;
@@ -119,7 +119,7 @@ update_vp( struct st_context *st )
assert(stvp->Base.Base.Target == GL_VERTEX_PROGRAM_ARB);
memset(&key, 0, sizeof key);
- key.st = st; /* variants are per-context */
+ key.st = st->has_shareable_shaders ? NULL : st;
/* When this is true, we will add an extra input to the vertex
* shader translation (for edgeflags), an extra output with
@@ -174,7 +174,7 @@ update_gp( struct st_context *st )
assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV);
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st->gp_variant = st_get_gp_variant(st, stgp, &key);
@@ -210,7 +210,7 @@ update_tcp( struct st_context *st )
assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st->tcp_variant = st_get_tcp_variant(st, sttcp, &key);
@@ -246,7 +246,7 @@ update_tep( struct st_context *st )
assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st->tep_variant = st_get_tep_variant(st, sttep, &key);
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index bb6dfe85644..cbc6845d771 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -269,7 +269,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
struct pipe_resource *vbuf = NULL;
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
key.bitmap = GL_TRUE;
key.clamp_color = st->clamp_frag_color_in_shader &&
st->ctx->Color._ClampFragmentColor;
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 7e8633edc1a..262ad809c58 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -395,15 +395,35 @@ make_texture(struct st_context *st,
* Note that the image is actually going to be upside down in
* the texture. We deal with that with texcoords.
*/
- success = _mesa_texstore(ctx, 2, /* dims */
- baseInternalFormat, /* baseInternalFormat */
- mformat, /* mesa_format */
- transfer->stride, /* dstRowStride, bytes */
- &dest, /* destSlices */
- width, height, 1, /* size */
- format, type, /* src format/type */
- pixels, /* data source */
- unpack);
+ if ((format == GL_RGBA || format == GL_BGRA)
+ && type == GL_UNSIGNED_BYTE) {
+ /* Use a memcpy-based texstore to avoid software pixel swizzling.
+ * We'll do the necessary swizzling with the pipe_sampler_view to
+ * give much better performance.
+ * XXX in the future, expand this to accomodate more format and
+ * type combinations.
+ */
+ _mesa_memcpy_texture(ctx, 2,
+ mformat, /* mesa_format */
+ transfer->stride, /* dstRowStride, bytes */
+ &dest, /* destSlices */
+ width, height, 1, /* size */
+ format, type, /* src format/type */
+ pixels, /* data source */
+ unpack);
+ success = GL_TRUE;
+ }
+ else {
+ success = _mesa_texstore(ctx, 2, /* dims */
+ baseInternalFormat, /* baseInternalFormat */
+ mformat, /* mesa_format */
+ transfer->stride, /* dstRowStride, bytes */
+ &dest, /* destSlices */
+ width, height, 1, /* size */
+ format, type, /* src format/type */
+ pixels, /* data source */
+ unpack);
+ }
/* unmap */
pipe_transfer_unmap(pipe, transfer);
@@ -667,7 +687,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* user textures, plus the drawpix textures */
if (fpv) {
struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
- uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1,
+ uint num = MAX3(fpv->drawpix_sampler + 1,
+ fpv->pixelmap_sampler + 1,
st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]);
memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT],
@@ -914,7 +935,7 @@ get_color_fp_variant(struct st_context *st)
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
key.drawpixels = 1;
key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 ||
ctx->Pixel.RedScale != 1.0 ||
@@ -956,6 +977,69 @@ clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height,
}
+/**
+ * Search the array of 4 swizzle components for the named component and return
+ * its position.
+ */
+static unsigned
+search_swizzle(const unsigned char swizzle[4], unsigned component)
+{
+ unsigned i;
+ for (i = 0; i < 4; i++) {
+ if (swizzle[i] == component)
+ return i;
+ }
+ assert(!"search_swizzle() failed");
+ return 0;
+}
+
+
+/**
+ * Set the sampler view's swizzle terms. This is used to handle RGBA
+ * swizzling when the incoming image format isn't an exact match for
+ * the actual texture format. For example, if we have glDrawPixels(
+ * GL_RGBA, GL_UNSIGNED_BYTE) and we chose the texture format
+ * PIPE_FORMAT_B8G8R8A8 then we can do use the sampler view swizzle to
+ * avoid swizzling all the pixels in software in the texstore code.
+ */
+static void
+setup_sampler_swizzle(struct pipe_sampler_view *sv, GLenum format, GLenum type)
+{
+ if ((format == GL_RGBA || format == GL_BGRA) && type == GL_UNSIGNED_BYTE) {
+ const struct util_format_description *desc =
+ util_format_description(sv->texture->format);
+ unsigned c0, c1, c2, c3;
+
+ /* Every gallium driver supports at least one 32-bit packed RGBA format.
+ * We must have chosen one for (GL_RGBA, GL_UNSIGNED_BYTE).
+ */
+ assert(desc->block.bits == 32);
+
+ /* invert the format's swizzle to setup the sampler's swizzle */
+ if (format == GL_RGBA) {
+ c0 = UTIL_FORMAT_SWIZZLE_X;
+ c1 = UTIL_FORMAT_SWIZZLE_Y;
+ c2 = UTIL_FORMAT_SWIZZLE_Z;
+ c3 = UTIL_FORMAT_SWIZZLE_W;
+ }
+ else {
+ assert(format == GL_BGRA);
+ c0 = UTIL_FORMAT_SWIZZLE_Z;
+ c1 = UTIL_FORMAT_SWIZZLE_Y;
+ c2 = UTIL_FORMAT_SWIZZLE_X;
+ c3 = UTIL_FORMAT_SWIZZLE_W;
+ }
+ sv->swizzle_r = search_swizzle(desc->swizzle, c0);
+ sv->swizzle_g = search_swizzle(desc->swizzle, c1);
+ sv->swizzle_b = search_swizzle(desc->swizzle, c2);
+ sv->swizzle_a = search_swizzle(desc->swizzle, c3);
+ }
+ else {
+ /* use the default sampler swizzle */
+ }
+}
+
+
/**
* Called via ctx->Driver.DrawPixels()
*/
@@ -974,6 +1058,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
int num_sampler_view = 1;
struct gl_pixelstore_attrib clippedUnpack;
struct st_fp_variant *fpv = NULL;
+ struct pipe_resource *pt;
/* Mesa state should be up to date by now */
assert(ctx->NewState == 0x0);
@@ -1029,42 +1114,56 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
}
- /* draw with textured quad */
- {
- struct pipe_resource *pt
- = make_texture(st, width, height, format, type, unpack, pixels);
- if (pt) {
- sv[0] = st_create_texture_sampler_view(st->pipe, pt);
-
- if (sv[0]) {
- /* Create a second sampler view to read stencil.
- * The stencil is written using the shader stencil export
- * functionality. */
- if (write_stencil) {
- enum pipe_format stencil_format =
- util_format_stencil_only(pt->format);
- /* we should not be doing pixel map/transfer (see above) */
- assert(num_sampler_view == 1);
- sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
- stencil_format);
- num_sampler_view++;
- }
-
- draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height,
- ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
- sv,
- num_sampler_view,
- driver_vp,
- driver_fp, fpv,
- color, GL_FALSE, write_depth, write_stencil);
- pipe_sampler_view_reference(&sv[0], NULL);
- if (num_sampler_view > 1)
- pipe_sampler_view_reference(&sv[1], NULL);
- }
- pipe_resource_reference(&pt, NULL);
- }
+ /* Put glDrawPixels image into a texture */
+ pt = make_texture(st, width, height, format, type, unpack, pixels);
+ if (!pt) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+ return;
}
+
+ /* create sampler view for the image */
+ sv[0] = st_create_texture_sampler_view(st->pipe, pt);
+ if (!sv[0]) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+ pipe_resource_reference(&pt, NULL);
+ return;
+ }
+
+ /* Set up the sampler view's swizzle */
+ setup_sampler_swizzle(sv[0], format, type);
+
+ /* Create a second sampler view to read stencil. The stencil is
+ * written using the shader stencil export functionality.
+ */
+ if (write_stencil) {
+ enum pipe_format stencil_format =
+ util_format_stencil_only(pt->format);
+ /* we should not be doing pixel map/transfer (see above) */
+ assert(num_sampler_view == 1);
+ sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
+ stencil_format);
+ if (!sv[1]) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+ pipe_resource_reference(&pt, NULL);
+ pipe_sampler_view_reference(&sv[0], NULL);
+ return;
+ }
+ num_sampler_view++;
+ }
+
+ draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
+ width, height,
+ ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
+ sv,
+ num_sampler_view,
+ driver_vp,
+ driver_fp, fpv,
+ color, GL_FALSE, write_depth, write_stencil);
+ pipe_sampler_view_reference(&sv[0], NULL);
+ if (num_sampler_view > 1)
+ pipe_sampler_view_reference(&sv[1], NULL);
+
+ pipe_resource_reference(&pt, NULL);
}
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 708bdf5011e..2c4eccf1e06 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -224,6 +224,7 @@ st_program_string_notify( struct gl_context *ctx,
struct gl_program *prog )
{
struct st_context *st = st_context(ctx);
+ gl_shader_stage stage = _mesa_program_enum_to_shader_stage(target);
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
@@ -278,10 +279,10 @@ st_program_string_notify( struct gl_context *ctx,
st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
}
- if (ST_DEBUG & DEBUG_PRECOMPILE)
+ if (ST_DEBUG & DEBUG_PRECOMPILE ||
+ st->shader_has_one_variant[stage])
st_precompile_shader_variant(st, prog);
- /* XXX check if program is legal, within limits */
return GL_TRUE;
}
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index bef7307bb27..5abb17385c2 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -235,9 +235,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
PIPE_BIND_SAMPLER_VIEW);
st->prefer_blit_based_texture_transfer = screen->get_param(screen,
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
- st->can_force_persample_interp = screen->get_param(screen,
- PIPE_CAP_FORCE_PERSAMPLE_INTERP);
-
+ st->force_persample_in_shader =
+ screen->get_param(screen, PIPE_CAP_SAMPLE_SHADING) &&
+ !screen->get_param(screen, PIPE_CAP_FORCE_PERSAMPLE_INTERP);
+ st->has_shareable_shaders = screen->get_param(screen,
+ PIPE_CAP_SHAREABLE_SHADERS);
st->needs_texcoord_semantic =
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
st->apply_texture_swizzle_to_border_color =
@@ -292,6 +294,20 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
}
+ /* Set which shader types can be compiled at link time. */
+ st->shader_has_one_variant[MESA_SHADER_VERTEX] =
+ st->has_shareable_shaders &&
+ !st->clamp_vert_color_in_shader;
+
+ st->shader_has_one_variant[MESA_SHADER_FRAGMENT] =
+ st->has_shareable_shaders &&
+ !st->clamp_frag_color_in_shader &&
+ !st->force_persample_in_shader;
+
+ st->shader_has_one_variant[MESA_SHADER_TESS_CTRL] = st->has_shareable_shaders;
+ st->shader_has_one_variant[MESA_SHADER_TESS_EVAL] = st->has_shareable_shaders;
+ st->shader_has_one_variant[MESA_SHADER_GEOMETRY] = st->has_shareable_shaders;
+
_mesa_compute_version(ctx);
if (ctx->Version == 0) {
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index f187d82449b..c243f5cd966 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -98,7 +98,15 @@ struct st_context
boolean has_etc1;
boolean has_etc2;
boolean prefer_blit_based_texture_transfer;
- boolean can_force_persample_interp;
+ boolean force_persample_in_shader;
+ boolean has_shareable_shaders;
+
+ /**
+ * If a shader can be created when we get its source.
+ * This means it has only 1 variant, not counting glBitmap and
+ * glDrawPixels.
+ */
+ boolean shader_has_one_variant[MESA_SHADER_STAGES];
boolean needs_texcoord_semantic;
boolean apply_texture_swizzle_to_border_color;
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index e2902923cb7..d4724b46e0a 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -249,6 +249,9 @@ void st_init_limits(struct pipe_screen *screen,
if (options->EmitNoLoops)
options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536);
+ else
+ options->MaxUnrollIterations = screen->get_shader_param(screen, sh,
+ PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT);
options->LowerClipDistance = true;
}
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 6a69ba7aa26..75ccaf2f26b 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -395,6 +395,10 @@ st_translate_vertex_program(struct st_context *st,
if (ureg == NULL)
return false;
+ if (stvp->Base.Base.ClipDistanceArraySize)
+ ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
+ stvp->Base.Base.ClipDistanceArraySize);
+
if (ST_DEBUG & DEBUG_MESA) {
_mesa_print_program(&stvp->Base.Base);
_mesa_print_program_parameters(st->ctx, &stvp->Base.Base);
@@ -1049,6 +1053,10 @@ st_translate_program_common(struct st_context *st,
memset(outputMapping, 0, sizeof(outputMapping));
memset(out_state, 0, sizeof(*out_state));
+ if (prog->ClipDistanceArraySize)
+ ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
+ prog->ClipDistanceArraySize);
+
/*
* Convert Mesa program inputs to TGSI input register semantics.
*/
@@ -1728,6 +1736,12 @@ destroy_program_variants_cb(GLuint key, void *data, void *userData)
void
st_destroy_program_variants(struct st_context *st)
{
+ /* If shaders can be shared with other contexts, the last context will
+ * call DeleteProgram on all shaders, releasing everything.
+ */
+ if (st->has_shareable_shaders)
+ return;
+
/* ARB vert/frag program */
_mesa_HashWalk(st->ctx->Shared->Programs,
destroy_program_variants_cb, st);
@@ -1774,7 +1788,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_vp_variant_key key;
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st_get_vp_variant(st, p, &key);
break;
}
@@ -1784,7 +1798,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_tcp_variant_key key;
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st_get_tcp_variant(st, p, &key);
break;
}
@@ -1794,7 +1808,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_tep_variant_key key;
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st_get_tep_variant(st, p, &key);
break;
}
@@ -1804,7 +1818,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_gp_variant_key key;
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st_get_gp_variant(st, p, &key);
break;
}
@@ -1814,7 +1828,7 @@ st_precompile_shader_variant(struct st_context *st,
struct st_fp_variant_key key;
memset(&key, 0, sizeof(key));
- key.st = st;
+ key.st = st->has_shareable_shaders ? NULL : st;
st_get_fp_variant(st, p, &key);
break;
}
diff --git a/src/mesa/tnl/t_vb_rendertmp.h b/src/mesa/tnl/t_vb_rendertmp.h
index 44dee763594..4bfc6b15d3b 100644
--- a/src/mesa/tnl/t_vb_rendertmp.h
+++ b/src/mesa/tnl/t_vb_rendertmp.h
@@ -124,19 +124,19 @@ static void TAG(render_line_loop)( struct gl_context *ctx,
GLuint i;
LOCAL_VARS;
- (void) flags;
-
INIT(GL_LINE_LOOP);
if (start+1 < count) {
if (TEST_PRIM_BEGIN(flags)) {
RESET_STIPPLE;
+ /* draw the first line from v[0] to v[1] */
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
RENDER_LINE( ELT(start), ELT(start+1) );
else
RENDER_LINE( ELT(start+1), ELT(start) );
}
+ /* draw lines from v[1] to v[n-1] */
for ( i = start+2 ; i < count ; i++) {
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
RENDER_LINE( ELT(i-1), ELT(i) );
@@ -145,6 +145,7 @@ static void TAG(render_line_loop)( struct gl_context *ctx,
}
if ( TEST_PRIM_END(flags)) {
+ /* draw final line from v[n-1] to v[0] (the very first vertex) */
if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT)
RENDER_LINE( ELT(count-1), ELT(start) );
else
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index a376efe34a7..e6b9d890d5f 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -196,6 +196,26 @@ vbo_get_default_vals_as_union(GLenum format)
}
}
+
+/**
+ * Compute the max number of vertices which can be stored in
+ * a vertex buffer, given the current vertex size, and the amount
+ * of space already used.
+ */
+static inline unsigned
+vbo_compute_max_verts(const struct vbo_exec_context *exec)
+{
+ unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
+ (exec->vtx.vertex_size * sizeof(GLfloat));
+ assert(n > 0);
+ /* Subtract one so we're always sure to have room for an extra
+ * vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion.
+ */
+ n--;
+ return n;
+}
+
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
index 00378eb7984..a80b2c908d1 100644
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -160,8 +160,6 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap );
void vbo_exec_vtx_map( struct vbo_exec_context *exec );
-void vbo_exec_vtx_wrap( struct vbo_exec_context *exec );
-
void vbo_exec_eval_update( struct vbo_exec_context *exec );
void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec,
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 7ae08fe3062..a23d5aa08aa 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -61,7 +61,8 @@ static void reset_attrfv( struct vbo_exec_context *exec );
/**
* Close off the last primitive, execute the buffer, restart the
- * primitive.
+ * primitive. This is called when we fill a vertex buffer before
+ * hitting glEnd.
*/
static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
{
@@ -71,17 +72,31 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
}
else {
- GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin;
+ struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
+ const GLuint last_begin = last_prim->begin;
GLuint last_count;
if (_mesa_inside_begin_end(exec->ctx)) {
- GLint i = exec->vtx.prim_count - 1;
- assert(i >= 0);
- exec->vtx.prim[i].count = (exec->vtx.vert_count -
- exec->vtx.prim[i].start);
+ last_prim->count = exec->vtx.vert_count - last_prim->start;
}
- last_count = exec->vtx.prim[exec->vtx.prim_count-1].count;
+ last_count = last_prim->count;
+
+ /* Special handling for wrapping GL_LINE_LOOP */
+ if (last_prim->mode == GL_LINE_LOOP &&
+ last_count > 0 &&
+ !last_prim->end) {
+ /* draw this section of the incomplete line loop as a line strip */
+ last_prim->mode = GL_LINE_STRIP;
+ if (!last_prim->begin) {
+ /* This is not the first section of the line loop, so don't
+ * draw the 0th vertex. We're saving it until we draw the
+ * very last section of the loop.
+ */
+ last_prim->start++;
+ last_prim->count--;
+ }
+ }
/* Execute the buffer and save copied vertices.
*/
@@ -98,6 +113,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
if (_mesa_inside_begin_end(exec->ctx)) {
exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
+ exec->vtx.prim[0].begin = 0;
exec->vtx.prim[0].start = 0;
exec->vtx.prim[0].count = 0;
exec->vtx.prim_count++;
@@ -113,7 +129,8 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
* Deal with buffer wrapping where provoked by the vertex buffer
* filling up, as opposed to upgrade_vertex().
*/
-void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )
+static void
+vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
{
fi_type *data = exec->vtx.copied.buffer;
GLuint i;
@@ -292,8 +309,7 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
*/
exec->vtx.attrsz[attr] = newSize;
exec->vtx.vertex_size += newSize - oldSize;
- exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
- (exec->vtx.vertex_size * sizeof(GLfloat)));
+ exec->vtx.max_vert = vbo_compute_max_verts(exec);
exec->vtx.vert_count = 0;
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
@@ -446,10 +462,6 @@ do { \
\
assert(sz == 1 || sz == 2); \
\
- if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \
- vbo_exec_begin_vertices(ctx); \
- } \
- \
/* check if attribute size or type is changing */ \
if (unlikely(exec->vtx.active_sz[A] != N * sz) || \
unlikely(exec->vtx.attrtype[A] != T)) { \
@@ -470,6 +482,15 @@ do { \
/* This is a glVertex call */ \
GLuint i; \
\
+ if (unlikely((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0)) { \
+ vbo_exec_begin_vertices(ctx); \
+ } \
+ \
+ if (unlikely(!exec->vtx.buffer_ptr)) { \
+ vbo_exec_vtx_map(exec); \
+ } \
+ assert(exec->vtx.buffer_ptr); \
+ \
/* copy 32-bit words */ \
for (i = 0; i < exec->vtx.vertex_size; i++) \
exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \
@@ -482,7 +503,10 @@ do { \
\
if (++exec->vtx.vert_count >= exec->vtx.max_vert) \
vbo_exec_vtx_wrap( exec ); \
- } \
+ } else { \
+ /* we now have accumulated per-vertex attributes */ \
+ ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \
+ } \
} while (0)
#define ERROR(err) _mesa_error( ctx, err, __func__ )
@@ -814,11 +838,28 @@ static void GLAPIENTRY vbo_exec_End( void )
if (exec->vtx.prim_count > 0) {
/* close off current primitive */
- int idx = exec->vtx.vert_count;
- int i = exec->vtx.prim_count - 1;
+ struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
- exec->vtx.prim[i].end = 1;
- exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start;
+ last_prim->end = 1;
+ last_prim->count = exec->vtx.vert_count - last_prim->start;
+
+ /* Special handling for GL_LINE_LOOP */
+ if (last_prim->mode == GL_LINE_LOOP && last_prim->begin == 0) {
+ /* We're finishing drawing a line loop. Append 0th vertex onto
+ * end of vertex buffer so we can draw it as a line strip.
+ */
+ const fi_type *src = exec->vtx.buffer_map;
+ fi_type *dst = exec->vtx.buffer_map +
+ exec->vtx.vert_count * exec->vtx.vertex_size;
+
+ /* copy 0th vertex to end of buffer */
+ memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type));
+
+ assert(last_prim->start == 0);
+ last_prim->start++; /* skip vertex0 */
+ /* note that last_prim->count stays unchanged */
+ last_prim->mode = GL_LINE_STRIP;
+ }
try_vbo_merge(exec);
}
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 174cbc37c26..ed5d9e947b0 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -64,20 +64,23 @@ vbo_exec_debug_verts( struct vbo_exec_context *exec )
}
-/*
- * NOTE: Need to have calculated primitives by this point -- do it on the fly.
- * NOTE: Old 'parity' issue is gone.
+/**
+ * Copy zero, one or two vertices from the current vertex buffer into
+ * the temporary "copy" buffer.
+ * This is used when a single primitive overflows a vertex buffer and
+ * we need to continue the primitive in a new vertex buffer.
+ * The temporary "copy" buffer holds the vertices which need to get
+ * copied from the old buffer to the new one.
*/
static GLuint
vbo_copy_vertices( struct vbo_exec_context *exec )
{
- GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count;
+ struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
+ const GLuint nr = last_prim->count;
GLuint ovf, i;
- GLuint sz = exec->vtx.vertex_size;
+ const GLuint sz = exec->vtx.vertex_size;
fi_type *dst = exec->vtx.copied.buffer;
- const fi_type *src = (exec->vtx.buffer_map +
- exec->vtx.prim[exec->vtx.prim_count-1].start *
- exec->vtx.vertex_size);
+ const fi_type *src = exec->vtx.buffer_map + last_prim->start * sz;
switch (exec->ctx->Driver.CurrentExecPrimitive) {
case GL_POINTS:
@@ -106,6 +109,17 @@ vbo_copy_vertices( struct vbo_exec_context *exec )
return 1;
}
case GL_LINE_LOOP:
+ if (last_prim->begin == 0) {
+ /* We're dealing with the second or later section of a split/wrapped
+ * GL_LINE_LOOP. Since we're converting line loops to line strips,
+ * we've already increment the last_prim->start counter by one to
+ * skip the 0th vertex in the loop. We need to undo that (effectively
+ * subtract one from last_prim->start) so that we copy the 0th vertex
+ * to the next vertex buffer.
+ */
+ src -= sz;
+ }
+ /* fall-through */
case GL_TRIANGLE_FAN:
case GL_POLYGON:
if (nr == 0) {
@@ -123,7 +137,7 @@ vbo_copy_vertices( struct vbo_exec_context *exec )
case GL_TRIANGLE_STRIP:
/* no parity issue, but need to make sure the tri is not drawn twice */
if (nr & 1) {
- exec->vtx.prim[exec->vtx.prim_count-1].count--;
+ last_prim->count--;
}
/* fallthrough */
case GL_QUAD_STRIP:
@@ -432,8 +446,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
if (keepUnmapped || exec->vtx.vertex_size == 0)
exec->vtx.max_vert = 0;
else
- exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
- (exec->vtx.vertex_size * sizeof(GLfloat)));
+ exec->vtx.max_vert = vbo_compute_max_verts(exec);
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
exec->vtx.prim_count = 0;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index fdc677f9a07..d49aa15b1b7 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -330,8 +330,7 @@ _save_reset_counters(struct gl_context *ctx)
* previous prim.
*/
static void
-merge_prims(struct gl_context *ctx,
- struct _mesa_prim *prim_list,
+merge_prims(struct _mesa_prim *prim_list,
GLuint *prim_count)
{
GLuint i;
@@ -361,6 +360,51 @@ merge_prims(struct gl_context *ctx,
*prim_count = prev_prim - prim_list + 1;
}
+
+/**
+ * Convert GL_LINE_LOOP primitive into GL_LINE_STRIP so that drivers
+ * don't have to worry about handling the _mesa_prim::begin/end flags.
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=81174
+ */
+static void
+convert_line_loop_to_strip(struct vbo_save_context *save,
+ struct vbo_save_vertex_list *node)
+{
+ struct _mesa_prim *prim = &node->prim[node->prim_count - 1];
+
+ assert(prim->mode == GL_LINE_LOOP);
+
+ if (prim->end) {
+ /* Copy the 0th vertex to end of the buffer and extend the
+ * vertex count by one to finish the line loop.
+ */
+ const GLuint sz = save->vertex_size;
+ /* 0th vertex: */
+ const fi_type *src = save->buffer + prim->start * sz;
+ /* end of buffer: */
+ fi_type *dst = save->buffer + (prim->start + prim->count) * sz;
+
+ memcpy(dst, src, sz * sizeof(float));
+
+ prim->count++;
+ node->count++;
+ save->vert_count++;
+ save->buffer_ptr += sz;
+ save->vertex_store->used += sz;
+ }
+
+ if (!prim->begin) {
+ /* Drawing the second or later section of a long line loop.
+ * Skip the 0th vertex.
+ */
+ prim->start++;
+ prim->count--;
+ }
+
+ prim->mode = GL_LINE_STRIP;
+}
+
+
/**
* Insert the active immediate struct onto the display list currently
* being built.
@@ -442,7 +486,11 @@ _save_compile_vertex_list(struct gl_context *ctx)
*/
save->copied.nr = _save_copy_vertices(ctx, node, save->buffer);
- merge_prims(ctx, node->prim, &node->prim_count);
+ if (node->prim[node->prim_count - 1].mode == GL_LINE_LOOP) {
+ convert_line_loop_to_strip(save, node);
+ }
+
+ merge_prims(node->prim, &node->prim_count);
/* Deal with GL_COMPILE_AND_EXECUTE:
*/
@@ -483,6 +531,10 @@ _save_compile_vertex_list(struct gl_context *ctx)
save->buffer_ptr = vbo_save_map_vertex_store(ctx, save->vertex_store);
save->out_of_memory = save->buffer_ptr == NULL;
}
+ else {
+ /* update buffer_ptr for next vertex */
+ save->buffer_ptr = save->vertex_store->buffer + save->vertex_store->used;
+ }
if (save->prim_store->used > VBO_SAVE_PRIM_SIZE - 6) {
save->prim_store->refcount--;