diff --git a/configure.ac b/configure.ac index f236dad6441..0c88db9f66f 100644 --- a/configure.ac +++ b/configure.ac @@ -108,6 +108,8 @@ AC_SYS_LARGEFILE LT_PREREQ([2.2]) LT_INIT([disable-static]) +AC_CHECK_PROG(RM, rm, [rm -f]) + AX_PROG_BISON([], AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"], [AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])])) diff --git a/docs/GL3.txt b/docs/GL3.txt index 6503e2ab1da..167321676df 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30: GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample) - GL_ARB_texture_view DONE (i965, nv50, nvc0, llvmpipe, softpipe) + GL_ARB_texture_view DONE (i965, nv50, nvc0, radeonsi, llvmpipe, softpipe) GL_ARB_vertex_attrib_binding DONE (all drivers) diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index dcf425e4c68..d3dbe9dda13 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi
  • GL_ARB_texture_barrier / GL_NV_texture_barrier on i965
  • GL_ARB_texture_query_lod on softpipe
  • +
  • GL_ARB_texture_view on radeonsi
  • EGL_KHR_create_context on softpipe, llvmpipe
  • EGL_KHR_gl_colorspace on softpipe, llvmpipe
  • diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 0ad94bb031f..5584c4a222c 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -355,8 +355,9 @@ struct draw_vertex_info { }; /* these flags are set if the primitive is a segment of a larger one */ -#define DRAW_SPLIT_BEFORE 0x1 -#define DRAW_SPLIT_AFTER 0x2 +#define DRAW_SPLIT_BEFORE 0x1 +#define DRAW_SPLIT_AFTER 0x2 +#define DRAW_LINE_LOOP_AS_STRIP 0x4 struct draw_prim_info { boolean linear; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index ffec863ae6f..aa20b918f50 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -359,6 +359,16 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle, } +static inline unsigned +prim_type(unsigned prim, unsigned flags) +{ + if (flags & DRAW_LINE_LOOP_AS_STRIP) + return PIPE_PRIM_LINE_STRIP; + else + return prim; +} + + static void fetch_pipeline_run(struct draw_pt_middle_end *middle, const unsigned *fetch_elts, @@ -380,7 +390,7 @@ fetch_pipeline_run(struct draw_pt_middle_end *middle, prim_info.start = 0; prim_info.count = draw_count; prim_info.elts = draw_elts; - prim_info.prim = fpme->input_prim; + prim_info.prim = prim_type(fpme->input_prim, prim_flags); prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; @@ -408,7 +418,7 @@ fetch_pipeline_linear_run(struct draw_pt_middle_end *middle, prim_info.start = 0; prim_info.count = count; prim_info.elts = NULL; - prim_info.prim = fpme->input_prim; + prim_info.prim = prim_type(fpme->input_prim, prim_flags); prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &count; @@ -439,7 +449,7 @@ fetch_pipeline_linear_run_elts(struct draw_pt_middle_end *middle, prim_info.start = 0; prim_info.count = draw_count; prim_info.elts = draw_elts; - prim_info.prim = fpme->input_prim; + prim_info.prim = prim_type(fpme->input_prim, prim_flags); prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index e42c4af0e70..2d7569b0fdf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -473,6 +473,16 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, } +static inline unsigned +prim_type(unsigned prim, unsigned flags) +{ + if (flags & DRAW_LINE_LOOP_AS_STRIP) + return PIPE_PRIM_LINE_STRIP; + else + return prim; +} + + static void llvm_middle_end_run(struct draw_pt_middle_end *middle, const unsigned *fetch_elts, @@ -494,7 +504,7 @@ llvm_middle_end_run(struct draw_pt_middle_end *middle, prim_info.start = 0; prim_info.count = draw_count; prim_info.elts = draw_elts; - prim_info.prim = fpme->input_prim; + prim_info.prim = prim_type(fpme->input_prim, prim_flags); prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; @@ -522,7 +532,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle, prim_info.start = 0; prim_info.count = count; prim_info.elts = NULL; - prim_info.prim = fpme->input_prim; + prim_info.prim = prim_type(fpme->input_prim, prim_flags); prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &count; @@ -552,7 +562,7 @@ llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle, prim_info.start = 0; prim_info.count = draw_count; prim_info.elts = draw_elts; - prim_info.prim = fpme->input_prim; + prim_info.prim = prim_type(fpme->input_prim, prim_flags); prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h index 0afabb01398..6da79b9490b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -249,6 +249,9 @@ vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags, assert(icount + !!close_loop <= vsplit->segment_size); + /* need to draw the sections of the line loop as line strips */ + flags |= DRAW_LINE_LOOP_AS_STRIP; + if (close_loop) { for (nr = 0; nr < icount; nr++) vsplit->fetch_elts[nr] = istart + nr; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 571c615f9f8..ad64ae058b6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; } /* if we get here, we missed a shader cap above (and should have seen * a compiler warning.) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index a371aa95e70..f86adcec506 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -474,6 +474,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; } /* if we get here, we missed a shader cap above (and should have seen * a compiler warning.) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index b84a1753eeb..4645ef26cab 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -369,19 +369,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, procType == TGSI_PROCESSOR_GEOMETRY || procType == TGSI_PROCESSOR_TESS_CTRL || procType == TGSI_PROCESSOR_TESS_EVAL) { - if (semName == TGSI_SEMANTIC_CLIPDIST) { - info->num_written_clipdistance += - util_bitcount(fulldecl->Declaration.UsageMask); - info->clipdist_writemask |= - fulldecl->Declaration.UsageMask << (semIndex*4); - } - else if (semName == TGSI_SEMANTIC_CULLDIST) { - info->num_written_culldistance += - util_bitcount(fulldecl->Declaration.UsageMask); - info->culldist_writemask |= - fulldecl->Declaration.UsageMask << (semIndex*4); - } - else if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { + if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { info->writes_viewport_index = TRUE; } else if (semName == TGSI_SEMANTIC_LAYER) { @@ -432,9 +420,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens, const struct tgsi_full_property *fullprop = &parse.FullToken.FullProperty; unsigned name = fullprop->Property.PropertyName; + unsigned value = fullprop->u[0].Data; assert(name < Elements(info->properties)); - info->properties[name] = fullprop->u[0].Data; + info->properties[name] = value; + + switch (name) { + case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: + info->num_written_clipdistance = value; + info->clipdist_writemask |= (1 << value) - 1; + break; + case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: + info->num_written_culldistance = value; + info->culldist_writemask |= (1 << value) - 1; + break; + } } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index 8271ea08177..89369d60f4e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -137,6 +137,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "TES_SPACING", "TES_VERTEX_ORDER_CW", "TES_POINT_MODE", + "NUM_CLIPDIST_ENABLED", + "NUM_CULLDIST_ENABLED", }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index e08844b2f0b..151afb2dffe 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -276,6 +276,8 @@ The integer capabilities: GL4 hardware will likely need to emulate it with a shader variant, or by selecting the interpolation weights with a conditional assignment in the shader. +* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any + pipe_context. @@ -365,6 +367,10 @@ to be 0. are supported. * ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't ignore tgsi_declaration_range::Last for shader inputs and outputs. +* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number + of iterations that loops are allowed to have to be unrolled. It is only + a hint to state trackers. Whether any loops will be unrolled is not + guaranteed. .. _pipe_compute_cap: diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 314fe1bb74f..01e18f3084e 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -3126,6 +3126,16 @@ TES_POINT_MODE If set to a non-zero value, this turns on point mode for the tessellator, which means that points will be generated instead of primitives. +NUM_CLIPDIST_ENABLED +"""""""""""""""" + +How many clip distance scalar outputs are enabled. + +NUM_CULLDIST_ENABLED +"""""""""""""""" + +How many cull distance scalar outputs are enabled. + Texture Sampling and Texture Formats ------------------------------------ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index b64f78ca32b..50d140fe903 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -237,6 +237,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_MAX_VIEWPORTS: @@ -411,6 +412,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; } debug_printf("unknown shader param %d\n", param); return 0; diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 9d6b3d39183..5812af626cb 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); return 0; @@ -249,6 +251,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 76812a666a0..e1a7dc56685 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader, return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: return 0; @@ -471,6 +473,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 50c3781f5f8..e2ed267da78 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -298,6 +298,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 335c163b661..03301649e38 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -171,6 +171,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: @@ -263,6 +264,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: debug_printf("unknown vertex shader param %d\n", param); return 0; @@ -304,6 +307,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: debug_printf("unknown fragment shader param %d\n", param); return 0; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 812b246ea0e..ec51d00f266 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -216,6 +216,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: @@ -299,6 +300,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index f34ad0ed5d1..af8e5f72670 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -202,6 +202,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; case PIPE_CAP_VENDOR_ID: @@ -312,6 +313,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 16; /* would be 32 in linked (OpenGL-style) mode */ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return 16; /* XXX not sure if more are really safe */ + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 1165ac8a9c0..a576abdfaf2 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -197,6 +197,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; /* SWTCL-only features. */ @@ -302,6 +303,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; } @@ -358,6 +361,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 32ce76a9e07..9a97de9965e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -343,6 +343,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; /* Stream output. */ @@ -510,6 +511,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + /* due to a bug in the shader compiler, some loops hang + * if they are not unrolled, see: + * https://bugs.freedesktop.org/show_bug.cgi?id=86720 + */ + return 255; } return 0; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index d5c5db30029..082ea850675 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -55,11 +55,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa); util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state); util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer); - util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader); - util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader); - util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader); - util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader); - util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader); + util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso); + util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso); + util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso); + util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso); + util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso); util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements); util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask); util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]); diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 7d41e8d00e0..53062187b88 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -31,15 +31,15 @@ #include "ddebug/dd_util.h" -static void si_dump_shader(struct si_shader_selector *sel, const char *name, +static void si_dump_shader(struct si_shader_ctx_state *state, const char *name, FILE *f) { - if (!sel || !sel->current) + if (!state->cso || !state->current) return; fprintf(f, "%s shader disassembly:\n", name); - si_dump_shader_key(sel->type, &sel->current->key, f); - fprintf(f, "%s\n\n", sel->current->binary.disasm_string); + si_dump_shader_key(state->cso->type, &state->current->key, f); + fprintf(f, "%s\n\n", state->current->binary.disasm_string); } /* Parsed IBs are difficult to read without colors. Use "less -R file" to @@ -536,11 +536,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, if (flags & PIPE_DEBUG_DEVICE_IS_HUNG) si_dump_debug_registers(sctx, f); - si_dump_shader(sctx->vs_shader, "Vertex", f); - si_dump_shader(sctx->tcs_shader, "Tessellation control", f); - si_dump_shader(sctx->tes_shader, "Tessellation evaluation", f); - si_dump_shader(sctx->gs_shader, "Geometry", f); - si_dump_shader(sctx->ps_shader, "Fragment", f); + si_dump_shader(&sctx->vs_shader, "Vertex", f); + si_dump_shader(&sctx->tcs_shader, "Tessellation control", f); + si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f); + si_dump_shader(&sctx->gs_shader, "Geometry", f); + si_dump_shader(&sctx->ps_shader, "Fragment", f); si_dump_last_bo_list(sctx, f); si_dump_last_ib(sctx, f); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 19dd14f9b6f..13738da5e2c 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -915,10 +915,10 @@ static void si_set_user_data_base(struct si_context *sctx, void si_shader_change_notify(struct si_context *sctx) { /* VS can be bound as VS, ES, or LS. */ - if (sctx->tes_shader) + if (sctx->tes_shader.cso) si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B530_SPI_SHADER_USER_DATA_LS_0); - else if (sctx->gs_shader) + else if (sctx->gs_shader.cso) si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B330_SPI_SHADER_USER_DATA_ES_0); else @@ -926,8 +926,8 @@ void si_shader_change_notify(struct si_context *sctx) R_00B130_SPI_SHADER_USER_DATA_VS_0); /* TES can be bound as ES, VS, or not bound. */ - if (sctx->tes_shader) { - if (sctx->gs_shader) + if (sctx->tes_shader.cso) { + if (sctx->gs_shader.cso) si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, R_00B330_SPI_SHADER_USER_DATA_ES_0); else @@ -964,7 +964,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) unsigned i; uint32_t *sh_base = sctx->shader_userdata.sh_base; - if (sctx->gs_shader) { + if (sctx->gs_shader.cso) { /* The VS copy shader needs these for clipping, streamout, and rings. */ unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0; unsigned i = PIPE_SHADER_VERTEX; @@ -975,7 +975,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) /* The TESSEVAL shader needs this for the ESGS ring buffer. */ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, R_00B330_SPI_SHADER_USER_DATA_ES_0, true); - } else if (sctx->tes_shader) { + } else if (sctx->tes_shader.cso) { /* The TESSEVAL shader needs this for streamout. */ si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc, R_00B130_SPI_SHADER_USER_DATA_VS_0, true); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 53c80dba602..5f910c95ef3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -57,8 +57,8 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state); if (sctx->dummy_pixel_shader) sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); - if (sctx->fixed_func_tcs_shader) - sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader); + if (sctx->fixed_func_tcs_shader.cso) + sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush); if (sctx->custom_blend_resolve) @@ -293,7 +293,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TGSI_TXQS: @@ -335,7 +337,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: return 0; @@ -507,6 +508,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; } return 0; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 2abd5b5a0c3..d7a2282952a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -152,6 +152,15 @@ struct si_viewports { struct pipe_viewport_state states[SI_MAX_VIEWPORTS]; }; +/* A shader state consists of the shader selector, which is a constant state + * object shared by multiple contexts and shouldn't be modified, and + * the current shader variant selected for this context. + */ +struct si_shader_ctx_state { + struct si_shader_selector *cso; + struct si_shader *current; +}; + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -162,7 +171,7 @@ struct si_context { void *pstipple_sampler_state; struct si_screen *screen; struct pipe_fence_handle *last_gfx_fence; - struct si_shader_selector *fixed_func_tcs_shader; + struct si_shader_ctx_state fixed_func_tcs_shader; LLVMTargetMachineRef tm; /* Atoms (direct states). */ @@ -199,11 +208,11 @@ struct si_context { void *dummy_pixel_shader; /* shaders */ - struct si_shader_selector *ps_shader; - struct si_shader_selector *gs_shader; - struct si_shader_selector *vs_shader; - struct si_shader_selector *tcs_shader; - struct si_shader_selector *tes_shader; + struct si_shader_ctx_state ps_shader; + struct si_shader_ctx_state gs_shader; + struct si_shader_ctx_state vs_shader; + struct si_shader_ctx_state tcs_shader; + struct si_shader_ctx_state tes_shader; struct si_cs_shader_state cs_shader_state; /* shader information */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 54dad726d01..fd5500c1ab3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -179,15 +179,18 @@ struct radeon_shader_reloc; struct si_shader; +/* A shader selector is a gallium CSO and contains shader variants and + * binaries for one TGSI program. This can be shared by multiple contexts. + */ struct si_shader_selector { - struct si_shader *current; + pipe_mutex mutex; + struct si_shader *first_variant; /* immutable after the first variant */ + struct si_shader *last_variant; /* mutable */ struct tgsi_token *tokens; struct pipe_stream_output_info so; struct tgsi_shader_info info; - unsigned num_shaders; - /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; @@ -241,7 +244,7 @@ union si_shader_key { uint64_t es_enabled_outputs; unsigned as_es:1; /* export shader */ unsigned as_ls:1; /* local shader */ - unsigned export_prim_id; /* when PS needs it and GS is disabled */ + unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ } vs; struct { unsigned prim_mode:3; @@ -252,7 +255,7 @@ union si_shader_key { * This describes how outputs are laid out in memory. */ uint64_t es_enabled_outputs; unsigned as_es:1; /* export shader */ - unsigned export_prim_id; /* when PS needs it and GS is disabled */ + unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ } tes; /* tessellation evaluation shader */ }; @@ -293,24 +296,24 @@ struct si_shader { static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) { - if (sctx->gs_shader) - return &sctx->gs_shader->info; - else if (sctx->tes_shader) - return &sctx->tes_shader->info; - else if (sctx->vs_shader) - return &sctx->vs_shader->info; + if (sctx->gs_shader.cso) + return &sctx->gs_shader.cso->info; + else if (sctx->tes_shader.cso) + return &sctx->tes_shader.cso->info; + else if (sctx->vs_shader.cso) + return &sctx->vs_shader.cso->info; else return NULL; } static inline struct si_shader* si_get_vs_state(struct si_context *sctx) { - if (sctx->gs_shader) - return sctx->gs_shader->current->gs_copy_shader; - else if (sctx->tes_shader) - return sctx->tes_shader->current; + if (sctx->gs_shader.current) + return sctx->gs_shader.current->gs_copy_shader; + else if (sctx->tes_shader.current) + return sctx->tes_shader.current; else - return sctx->vs_shader->current; + return sctx->vs_shader.current; } static inline bool si_vs_exports_prim_id(struct si_shader *shader) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e6475364f98..243bdc6e6d7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -266,7 +266,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at * Reproducible with Unigine Heaven 4.0 and drirc missing. */ if (blend->dual_src_blend && - (sctx->ps_shader->ps_colors_written & 0x3) != 0x3) + (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3) mask = 0; radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask); @@ -1535,9 +1535,14 @@ static unsigned si_tex_compare(unsigned compare) } } -static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) +static unsigned si_tex_dim(unsigned res_target, unsigned view_target, + unsigned nr_samples) { - switch (dim) { + if (view_target == PIPE_TEXTURE_CUBE || + view_target == PIPE_TEXTURE_CUBE_ARRAY) + res_target = view_target; + + switch (res_target) { default: case PIPE_TEXTURE_1D: return V_008F1C_SQ_RSRC_IMG_1D; @@ -2391,6 +2396,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, struct radeon_surf_level *surflevel; int first_non_void; uint64_t va; + unsigned last_layer = state->u.tex.last_layer; if (view == NULL) return NULL; @@ -2596,6 +2602,13 @@ si_create_sampler_view_custom(struct pipe_context *ctx, } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) depth = texture->array_size / 6; + /* This is not needed if state trackers set last_layer correctly. */ + if (state->target == PIPE_TEXTURE_1D || + state->target == PIPE_TEXTURE_2D || + state->target == PIPE_TEXTURE_RECT || + state->target == PIPE_TEXTURE_CUBE) + last_layer = state->u.tex.first_layer; + va = tmp->resource.gpu_address + surflevel[base_level].offset; view->state[0] = va >> 8; @@ -2615,10 +2628,11 @@ si_create_sampler_view_custom(struct pipe_context *ctx, last_level) | S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) | S_008F1C_POW2_PAD(texture->last_level > 0) | - S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); + S_008F1C_TYPE(si_tex_dim(texture->target, state->target, + texture->nr_samples))); view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | - S_008F24_LAST_ARRAY(state->u.tex.last_layer)); + S_008F24_LAST_ARRAY(last_layer)); view->state[6] = 0; view->state[7] = 0; @@ -2653,11 +2667,12 @@ si_create_sampler_view_custom(struct pipe_context *ctx, S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | - S_008F1C_TYPE(si_tex_dim(texture->target, 0)); + S_008F1C_TYPE(si_tex_dim(texture->target, + state->target, 0)); view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(tmp->fmask.pitch - 1); view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | - S_008F24_LAST_ARRAY(state->u.tex.last_layer); + S_008F24_LAST_ARRAY(last_layer); view->fmask_state[6] = 0; view->fmask_state[7] = 0; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 5face423941..ce6c98c3124 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -109,11 +109,11 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_patches) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - struct si_shader_selector *ls = sctx->vs_shader; + struct si_shader_ctx_state *ls = &sctx->vs_shader; /* The TES pointer will only be used for sctx->last_tcs. * It would be wrong to think that TCS = TES. */ struct si_shader_selector *tcs = - sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader; + sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; @@ -138,9 +138,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ - num_tcs_inputs = util_last_bit64(ls->outputs_written); + num_tcs_inputs = util_last_bit64(ls->cso->outputs_written); - if (sctx->tcs_shader) { + if (sctx->tcs_shader.cso) { num_tcs_outputs = util_last_bit64(tcs->outputs_written); num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written); @@ -159,7 +159,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; - output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0; + output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0; perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; lds_size = output_patch0_offset + output_patch_size * *num_patches; @@ -231,13 +231,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, bool partial_vs_wave = false; bool partial_es_wave = false; - if (sctx->gs_shader) + if (sctx->gs_shader.cso) primgroup_size = 64; /* recommended with a GS */ - if (sctx->tes_shader) { + if (sctx->tes_shader.cso) { unsigned num_cp_out = - sctx->tcs_shader ? - sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : + sctx->tcs_shader.cso ? + sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : info->vertices_per_patch; unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out); @@ -248,8 +248,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, /* SWITCH_ON_EOI must be set if PrimID is used. * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) || - sctx->tes_shader->info.uses_primid) { + if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) || + sctx->tes_shader.cso->info.uses_primid) { ia_switch_on_eoi = true; partial_es_wave = true; } @@ -258,7 +258,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, if ((sctx->b.family == CHIP_TAHITI || sctx->b.family == CHIP_PITCAIRN || sctx->b.family == CHIP_BONAIRE) && - sctx->gs_shader) + sctx->gs_shader.cso) partial_vs_wave = true; } @@ -328,11 +328,11 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx, { unsigned num_output_cp; - if (!sctx->tes_shader) + if (!sctx->tes_shader.cso) return 0; - num_output_cp = sctx->tcs_shader ? - sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : + num_output_cp = sctx->tcs_shader.cso ? + sctx->tcs_shader.cso->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : info->vertices_per_patch; return S_028B58_NUM_PATCHES(num_patches) | @@ -395,7 +395,7 @@ static void si_emit_draw_registers(struct si_context *sctx, unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim); unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0; - if (sctx->tes_shader) + if (sctx->tes_shader.cso) si_emit_derived_tess_state(sctx, info, &num_patches); ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches); @@ -735,11 +735,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) (info->indexed || !info->count_from_stream_output)) return; - if (!sctx->ps_shader || !sctx->vs_shader) { + if (!sctx->ps_shader.cso || !sctx->vs_shader.cso) { assert(0); return; } - if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) { + if (!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES)) { assert(0); return; } @@ -751,11 +751,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) * This must be done after si_decompress_textures, which can call * draw_vbo recursively, and before si_update_shaders, which uses * current_rast_prim for this draw_vbo call. */ - if (sctx->gs_shader) - sctx->current_rast_prim = sctx->gs_shader->gs_output_prim; - else if (sctx->tes_shader) + if (sctx->gs_shader.cso) + sctx->current_rast_prim = sctx->gs_shader.cso->gs_output_prim; + else if (sctx->tes_shader.cso) sctx->current_rast_prim = - sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; else sctx->current_rast_prim = info->mode; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c98509bb0b9..eea00e0fafc 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -404,6 +404,7 @@ static void si_shader_ps(struct si_shader *shader) unsigned num_sgprs, num_user_sgprs; unsigned spi_baryc_cntl = 0; uint64_t va; + bool has_centroid; pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -435,8 +436,11 @@ static void si_shader_ps(struct si_shader *shader) } } + has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena); + spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) | - S_0286D8_BC_OPTIMIZE_DISABLE(1); + S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid); si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); @@ -523,26 +527,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; - if (sctx->tes_shader) + if (sctx->tes_shader.cso) key->vs.as_ls = 1; - else if (sctx->gs_shader) { + else if (sctx->gs_shader.cso) { key->vs.as_es = 1; - key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read; + key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read; } - if (!sctx->gs_shader && sctx->ps_shader && - sctx->ps_shader->info.uses_primid) + if (!sctx->gs_shader.cso && sctx->ps_shader.cso && + sctx->ps_shader.cso->info.uses_primid) key->vs.export_prim_id = 1; break; case PIPE_SHADER_TESS_CTRL: key->tcs.prim_mode = - sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; break; case PIPE_SHADER_TESS_EVAL: - if (sctx->gs_shader) { + if (sctx->gs_shader.cso) { key->tes.as_es = 1; - key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read; - } else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid) + key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read; + } else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) key->tes.export_prim_id = 1; break; case PIPE_SHADER_GEOMETRY: @@ -589,11 +593,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, /* Select the hw shader variant depending on the current state. */ static int si_shader_select(struct pipe_context *ctx, - struct si_shader_selector *sel) + struct si_shader_ctx_state *state) { struct si_context *sctx = (struct si_context *)ctx; + struct si_shader_selector *sel = state->cso; + struct si_shader *current = state->current; union si_shader_key key; - struct si_shader * shader = NULL; + struct si_shader *iter, *shader = NULL; int r; si_shader_selector_key(ctx, sel, &key); @@ -602,49 +608,51 @@ static int si_shader_select(struct pipe_context *ctx, * This path is also used for most shaders that don't need multiple * variants, it will cost just a computation of the key and this * test. */ - if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { + if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0)) return 0; - } - /* lookup if we have other variants in the list */ - if (sel->num_shaders > 1) { - struct si_shader *p = sel->current, *c = p->next_variant; + pipe_mutex_lock(sel->mutex); - while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { - p = c; - c = c->next_variant; - } - - if (c) { - p->next_variant = c->next_variant; - shader = c; + /* Find the shader variant. */ + for (iter = sel->first_variant; iter; iter = iter->next_variant) { + /* Don't check the "current" shader. We checked it above. */ + if (current != iter && + memcmp(&iter->key, &key, sizeof(key)) == 0) { + state->current = iter; + pipe_mutex_unlock(sel->mutex); + return 0; } } - if (shader) { - shader->next_variant = sel->current; - sel->current = shader; + /* Build a new shader. */ + shader = CALLOC_STRUCT(si_shader); + if (!shader) { + pipe_mutex_unlock(sel->mutex); + return -ENOMEM; + } + shader->selector = sel; + shader->key = key; + + r = si_shader_create(sctx->screen, sctx->tm, shader); + if (unlikely(r)) { + R600_ERR("Failed to build shader variant (type=%u) %d\n", + sel->type, r); + FREE(shader); + pipe_mutex_unlock(sel->mutex); + return r; + } + si_shader_init_pm4_state(shader); + + if (!sel->last_variant) { + sel->first_variant = shader; + sel->last_variant = shader; } else { - shader = CALLOC(1, sizeof(struct si_shader)); - shader->selector = sel; - shader->key = key; - - shader->next_variant = sel->current; - sel->current = shader; - r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm, - shader); - if (unlikely(r)) { - R600_ERR("Failed to build shader variant (type=%u) %d\n", - sel->type, r); - sel->current = NULL; - FREE(shader); - return r; - } - si_shader_init_pm4_state(shader); - sel->num_shaders++; - p_atomic_inc(&sctx->screen->b.num_compilations); + sel->last_variant->next_variant = shader; + sel->last_variant = shader; } - + state->current = shader; + p_atomic_inc(&sctx->screen->b.num_compilations); + pipe_mutex_unlock(sel->mutex); return 0; } @@ -752,14 +760,18 @@ static void *si_create_shader_selector(struct pipe_context *ctx, break; } - if (sscreen->b.debug_flags & DBG_PRECOMPILE) - if (si_shader_select(ctx, sel)) { + if (sscreen->b.debug_flags & DBG_PRECOMPILE) { + struct si_shader_ctx_state state = {sel}; + + if (si_shader_select(ctx, &state)) { fprintf(stderr, "radeonsi: can't create a shader\n"); tgsi_free_tokens(sel->tokens); FREE(sel); return NULL; } + } + pipe_mutex_init(sel->mutex); return sel; } @@ -787,10 +799,11 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - if (sctx->vs_shader == sel || !sel) + if (sctx->vs_shader.cso == sel || !sel) return; - sctx->vs_shader = sel; + sctx->vs_shader.cso = sel; + sctx->vs_shader.current = sel->first_variant; si_mark_atom_dirty(sctx, &sctx->clip_regs); si_update_viewports_and_scissors(sctx); } @@ -799,12 +812,13 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - bool enable_changed = !!sctx->gs_shader != !!sel; + bool enable_changed = !!sctx->gs_shader.cso != !!sel; - if (sctx->gs_shader == sel) + if (sctx->gs_shader.cso == sel) return; - sctx->gs_shader = sel; + sctx->gs_shader.cso = sel; + sctx->gs_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); sctx->last_rast_prim = -1; /* reset this so that it gets updated */ @@ -817,12 +831,13 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - bool enable_changed = !!sctx->tcs_shader != !!sel; + bool enable_changed = !!sctx->tcs_shader.cso != !!sel; - if (sctx->tcs_shader == sel) + if (sctx->tcs_shader.cso == sel) return; - sctx->tcs_shader = sel; + sctx->tcs_shader.cso = sel; + sctx->tcs_shader.current = sel ? sel->first_variant : NULL; if (enable_changed) sctx->last_tcs = NULL; /* invalidate derived tess state */ @@ -832,12 +847,13 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; - bool enable_changed = !!sctx->tes_shader != !!sel; + bool enable_changed = !!sctx->tes_shader.cso != !!sel; - if (sctx->tes_shader == sel) + if (sctx->tes_shader.cso == sel) return; - sctx->tes_shader = sel; + sctx->tes_shader.cso = sel; + sctx->tes_shader.current = sel ? sel->first_variant : NULL; si_mark_atom_dirty(sctx, &sctx->clip_regs); sctx->last_rast_prim = -1; /* reset this so that it gets updated */ @@ -864,7 +880,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) struct si_shader_selector *sel = state; /* skip if supplied shader is one already in use */ - if (sctx->ps_shader == sel) + if (sctx->ps_shader.cso == sel) return; /* use a dummy shader if binding a NULL shader */ @@ -873,7 +889,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) sel = sctx->dummy_pixel_shader; } - sctx->ps_shader = sel; + sctx->ps_shader.cso = sel; + sctx->ps_shader.current = sel->first_variant; si_mark_atom_dirty(sctx, &sctx->cb_target_mask); } @@ -881,8 +898,8 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = (struct si_shader_selector *)state; - struct si_shader *p = sel->current, *c; - struct si_shader_selector **current_shader[SI_NUM_SHADERS] = { + struct si_shader *p = sel->first_variant, *c; + struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = { [PIPE_SHADER_VERTEX] = &sctx->vs_shader, [PIPE_SHADER_TESS_CTRL] = &sctx->tcs_shader, [PIPE_SHADER_TESS_EVAL] = &sctx->tes_shader, @@ -890,8 +907,10 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) [PIPE_SHADER_FRAGMENT] = &sctx->ps_shader, }; - if (*current_shader[sel->type] == sel) - *current_shader[sel->type] = NULL; + if (current_shader[sel->type]->cso == sel) { + current_shader[sel->type]->cso = NULL; + current_shader[sel->type]->current = NULL; + } while (p) { c = p->next_variant; @@ -927,6 +946,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) p = c; } + pipe_mutex_destroy(sel->mutex); free(sel->tokens); free(sel); } @@ -934,7 +954,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - struct si_shader *ps = sctx->ps_shader->current; + struct si_shader *ps = sctx->ps_shader.current; struct si_shader *vs = si_get_vs_state(sctx); struct tgsi_shader_info *psinfo = &ps->selector->info; struct tgsi_shader_info *vsinfo = &vs->selector->info; @@ -1004,7 +1024,7 @@ bcolor: static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - struct si_shader *ps = sctx->ps_shader->current; + struct si_shader *ps = sctx->ps_shader.current; unsigned input_ena = ps->spi_ps_input_ena; /* we need to enable at least one of them, otherwise we hang the GPU */ @@ -1133,7 +1153,7 @@ static void si_init_gs_rings(struct si_context *sctx) static void si_update_gs_rings(struct si_context *sctx) { - unsigned gsvs_itemsize = sctx->gs_shader->gsvs_itemsize; + unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize; uint64_t offset; if (gsvs_itemsize == sctx->last_gsvs_itemsize) @@ -1167,17 +1187,14 @@ static void si_update_gs_rings(struct si_context *sctx) * < 0 if there was a failure */ static int si_update_scratch_buffer(struct si_context *sctx, - struct si_shader_selector *sel) + struct si_shader *shader) { - struct si_shader *shader; uint64_t scratch_va = sctx->scratch_buffer->gpu_address; int r; - if (!sel) + if (!shader) return 0; - shader = sel->current; - /* This shader doesn't need a scratch buffer */ if (shader->scratch_bytes_per_wave == 0) return 0; @@ -1209,20 +1226,20 @@ static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx) return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0; } -static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader_selector *sel) +static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader) { - return sel ? sel->current->scratch_bytes_per_wave : 0; + return shader ? shader->scratch_bytes_per_wave : 0; } static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx) { unsigned bytes = 0; - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tcs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current)); return bytes; } @@ -1256,46 +1273,46 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) * last used, so we still need to try to update them, even if * they require scratch buffers smaller than the current size. */ - r = si_update_scratch_buffer(sctx, sctx->ps_shader); + r = si_update_scratch_buffer(sctx, sctx->ps_shader.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); - r = si_update_scratch_buffer(sctx, sctx->gs_shader); + r = si_update_scratch_buffer(sctx, sctx->gs_shader.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); + si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4); - r = si_update_scratch_buffer(sctx, sctx->tcs_shader); + r = si_update_scratch_buffer(sctx, sctx->tcs_shader.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4); + si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4); /* VS can be bound as LS, ES, or VS. */ - r = si_update_scratch_buffer(sctx, sctx->vs_shader); + r = si_update_scratch_buffer(sctx, sctx->vs_shader.current); if (r < 0) return false; if (r == 1) { - if (sctx->tes_shader) - si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4); - else if (sctx->gs_shader) - si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); + if (sctx->tes_shader.current) + si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4); + else if (sctx->gs_shader.current) + si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4); else - si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); + si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4); } /* TES can be bound as ES or VS. */ - r = si_update_scratch_buffer(sctx, sctx->tes_shader); + r = si_update_scratch_buffer(sctx, sctx->tes_shader.current); if (r < 0) return false; if (r == 1) { - if (sctx->gs_shader) - si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4); + if (sctx->gs_shader.current) + si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4); else - si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4); + si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); } } @@ -1361,7 +1378,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx) if (!ureg) return; /* if we get here, we're screwed */ - assert(!sctx->fixed_func_tcs_shader); + assert(!sctx->fixed_func_tcs_shader.cso); ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF); const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0), @@ -1376,7 +1393,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx) ureg_MOV(ureg, tessinner, const1); ureg_END(ureg); - sctx->fixed_func_tcs_shader = + sctx->fixed_func_tcs_shader.cso = ureg_create_shader_and_destroy(ureg, &sctx->b.b); } @@ -1384,7 +1401,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx) { /* Calculate the index of the config. * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */ - unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader; + unsigned index = 2*!!sctx->tes_shader.cso + !!sctx->gs_shader.cso; struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index]; if (!*pm4) { @@ -1392,17 +1409,17 @@ static void si_update_vgt_shader_config(struct si_context *sctx) *pm4 = CALLOC_STRUCT(si_pm4_state); - if (sctx->tes_shader) { + if (sctx->tes_shader.cso) { stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1); - if (sctx->gs_shader) + if (sctx->gs_shader.cso) stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1) | S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); else stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); - } else if (sctx->gs_shader) { + } else if (sctx->gs_shader.cso) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1) | S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); @@ -1432,7 +1449,7 @@ bool si_update_shaders(struct si_context *sctx) int r; /* Update stages before GS. */ - if (sctx->tes_shader) { + if (sctx->tes_shader.cso) { if (!sctx->tf_ring) { si_init_tess_factor_ring(sctx); if (!sctx->tf_ring) @@ -1440,65 +1457,65 @@ bool si_update_shaders(struct si_context *sctx) } /* VS as LS */ - r = si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, &sctx->vs_shader); if (r) return false; - si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4); + si_pm4_bind_state(sctx, ls, sctx->vs_shader.current->pm4); - if (sctx->tcs_shader) { - r = si_shader_select(ctx, sctx->tcs_shader); + if (sctx->tcs_shader.cso) { + r = si_shader_select(ctx, &sctx->tcs_shader); if (r) return false; - si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4); + si_pm4_bind_state(sctx, hs, sctx->tcs_shader.current->pm4); } else { - if (!sctx->fixed_func_tcs_shader) { + if (!sctx->fixed_func_tcs_shader.cso) { si_generate_fixed_func_tcs(sctx); - if (!sctx->fixed_func_tcs_shader) + if (!sctx->fixed_func_tcs_shader.cso) return false; } - r = si_shader_select(ctx, sctx->fixed_func_tcs_shader); + r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader); if (r) return false; si_pm4_bind_state(sctx, hs, - sctx->fixed_func_tcs_shader->current->pm4); + sctx->fixed_func_tcs_shader.current->pm4); } - r = si_shader_select(ctx, sctx->tes_shader); + r = si_shader_select(ctx, &sctx->tes_shader); if (r) return false; - if (sctx->gs_shader) { + if (sctx->gs_shader.cso) { /* TES as ES */ - si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4); + si_pm4_bind_state(sctx, es, sctx->tes_shader.current->pm4); } else { /* TES as VS */ - si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4); - si_update_so(sctx, sctx->tes_shader); + si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); + si_update_so(sctx, sctx->tes_shader.cso); } - } else if (sctx->gs_shader) { + } else if (sctx->gs_shader.cso) { /* VS as ES */ - r = si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, &sctx->vs_shader); if (r) return false; - si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4); + si_pm4_bind_state(sctx, es, sctx->vs_shader.current->pm4); } else { /* VS as VS */ - r = si_shader_select(ctx, sctx->vs_shader); + r = si_shader_select(ctx, &sctx->vs_shader); if (r) return false; - si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4); - si_update_so(sctx, sctx->vs_shader); + si_pm4_bind_state(sctx, vs, sctx->vs_shader.current->pm4); + si_update_so(sctx, sctx->vs_shader.cso); } /* Update GS. */ - if (sctx->gs_shader) { - r = si_shader_select(ctx, sctx->gs_shader); + if (sctx->gs_shader.cso) { + r = si_shader_select(ctx, &sctx->gs_shader); if (r) return false; - si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4); - si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4); - si_update_so(sctx, sctx->gs_shader); + si_pm4_bind_state(sctx, gs, sctx->gs_shader.current->pm4); + si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4); + si_update_so(sctx, sctx->gs_shader.cso); if (!sctx->gsvs_ring) { si_init_gs_rings(sctx); @@ -1514,10 +1531,10 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); - r = si_shader_select(ctx, sctx->ps_shader); + r = si_shader_select(ctx, &sctx->ps_shader); if (r) return false; - si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4); + si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || @@ -1543,13 +1560,13 @@ bool si_update_shaders(struct si_context *sctx) return false; } - if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) { - sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control; + if (sctx->ps_db_shader_control != sctx->ps_shader.current->db_shader_control) { + sctx->ps_db_shader_control = sctx->ps_shader.current->db_shader_control; si_mark_atom_dirty(sctx, &sctx->db_render_state); } - if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing; + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->msaa_config); if (sctx->b.chip_class == SI) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index d468cf4de54..e7006d2fa0d 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -248,6 +248,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index dab89814334..f6fafca5c0b 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -381,6 +381,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; } @@ -455,6 +456,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; } /* If we get here, we failed to handle a cap above */ debug_printf("Unexpected fragment shader query %u\n", param); @@ -511,6 +514,8 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; } /* If we get here, we failed to handle a cap above */ debug_printf("Unexpected vertex shader query %u\n", param); @@ -600,6 +605,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: debug_printf("Unexpected vgpu10 shader query %u\n", param); return 0; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index a842d604a51..17b524653bb 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -393,7 +393,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state) continue; nir_variable *output_var = NULL; - foreach_list_typed(nir_variable, var, node, &c->s->outputs) { + nir_foreach_variable(var, &c->s->outputs) { if (var->data.driver_location == intr->const_index[0]) { output_var = var; break; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index a98d70da7d8..caf706aa2a6 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -23,6 +23,7 @@ #include "vc4_qir.h" #include "glsl/nir/nir_builder.h" +#include "util/u_format.h" /** * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into @@ -50,20 +51,188 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr, nir_instr_remove(&intr->instr); } +static nir_ssa_def * +vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + return nir_ubitfield_extract(b, + src, + nir_imm_int(b, 8 * chan), + nir_imm_int(b, 8)); +} + +/** Returns the 16 bit field as a sign-extended 32-bit value. */ +static nir_ssa_def * +vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + return nir_ibitfield_extract(b, + src, + nir_imm_int(b, 16 * chan), + nir_imm_int(b, 16)); +} + +/** Returns the 16 bit field as an unsigned 32 bit value. */ +static nir_ssa_def * +vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + if (chan == 0) { + return nir_iand(b, src, nir_imm_int(b, 0xffff)); + } else { + return nir_ushr(b, src, nir_imm_int(b, 16)); + } +} + +static nir_ssa_def * +vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan) +{ + return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false); +} + +static nir_ssa_def * +vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, + nir_builder *b, + nir_ssa_def **vpm_reads, + uint8_t swiz, + const struct util_format_description *desc) +{ + const struct util_format_channel_description *chan = + &desc->channel[swiz]; + nir_ssa_def *temp; + + if (swiz > UTIL_FORMAT_SWIZZLE_W) { + return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); + } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) { + return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); + } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) { + if (chan->normalized) { + return nir_fmul(b, + nir_i2f(b, vpm_reads[swiz]), + nir_imm_float(b, + 1.0 / 0x7fffffff)); + } else { + return nir_i2f(b, vpm_reads[swiz]); + } + } else if (chan->size == 8 && + (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || + chan->type == UTIL_FORMAT_TYPE_SIGNED)) { + nir_ssa_def *vpm = vpm_reads[0]; + if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { + temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080)); + if (chan->normalized) { + return nir_fsub(b, nir_fmul(b, + vc4_nir_unpack_8f(b, temp, swiz), + nir_imm_float(b, 2.0)), + nir_imm_float(b, 1.0)); + } else { + return nir_fadd(b, + nir_i2f(b, + vc4_nir_unpack_8i(b, temp, + swiz)), + nir_imm_float(b, -128.0)); + } + } else { + if (chan->normalized) { + return vc4_nir_unpack_8f(b, vpm, swiz); + } else { + return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz)); + } + } + } else if (chan->size == 16 && + (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || + chan->type == UTIL_FORMAT_TYPE_SIGNED)) { + nir_ssa_def *vpm = vpm_reads[swiz / 2]; + + /* Note that UNPACK_16F eats a half float, not ints, so we use + * UNPACK_16_I for all of these. + */ + if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { + temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); + if (chan->normalized) { + return nir_fmul(b, temp, + nir_imm_float(b, 1/32768.0f)); + } else { + return temp; + } + } else { + temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); + if (chan->normalized) { + return nir_fmul(b, temp, + nir_imm_float(b, 1 / 65535.0)); + } else { + return temp; + } + } + } else { + return NULL; + } +} + static void -vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b, - nir_intrinsic_instr *intr) +vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, + nir_intrinsic_instr *intr) { b->cursor = nir_before_instr(&intr->instr); - if (c->stage == QSTAGE_FRAG && intr->const_index[0] == - VC4_NIR_TLB_COLOR_READ_INPUT) { + int attr = intr->const_index[0]; + enum pipe_format format = c->vs_key->attr_formats[attr]; + uint32_t attr_size = util_format_get_blocksize(format); + + /* All TGSI-to-NIR inputs are vec4. */ + assert(intr->num_components == 4); + + /* Generate dword loads for the VPM values (Since these intrinsics may + * be reordered, the actual reads will be generated at the top of the + * shader by ntq_setup_inputs(). + */ + nir_ssa_def *vpm_reads[4]; + for (int i = 0; i < align(attr_size, 4) / 4; i++) { + nir_intrinsic_instr *intr_comp = + nir_intrinsic_instr_create(c->s, + nir_intrinsic_load_input); + intr_comp->num_components = 1; + intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + nir_builder_instr_insert(b, &intr_comp->instr); + + vpm_reads[i] = &intr_comp->dest.ssa; + } + + bool format_warned = false; + const struct util_format_description *desc = + util_format_description(format); + + nir_ssa_def *dests[4]; + for (int i = 0; i < 4; i++) { + uint8_t swiz = desc->swizzle[i]; + dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz, + desc); + + if (!dests[i]) { + if (!format_warned) { + fprintf(stderr, + "vtx element %d unsupported type: %s\n", + attr, util_format_name(format)); + format_warned = true; + } + dests[i] = nir_imm_float(b, 0.0); + } + } + + replace_intrinsic_with_vec4(b, intr, dests); +} + +static void +vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, + nir_intrinsic_instr *intr) +{ + b->cursor = nir_before_instr(&intr->instr); + + if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) { /* This doesn't need any lowering. */ return; } nir_variable *input_var = NULL; - foreach_list_typed(nir_variable, var, node, &c->s->inputs) { + nir_foreach_variable(var, &c->s->inputs) { if (var->data.driver_location == intr->const_index[0]) { input_var = var; break; @@ -87,38 +256,31 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b, dests[i] = &intr_comp->dest.ssa; } - switch (c->stage) { - case QSTAGE_FRAG: - if (input_var->data.location == VARYING_SLOT_FACE) { - dests[0] = nir_fsub(b, - nir_imm_float(b, 1.0), - nir_fmul(b, - nir_i2f(b, dests[0]), - nir_imm_float(b, 2.0))); - dests[1] = nir_imm_float(b, 0.0); + if (input_var->data.location == VARYING_SLOT_FACE) { + dests[0] = nir_fsub(b, + nir_imm_float(b, 1.0), + nir_fmul(b, + nir_i2f(b, dests[0]), + nir_imm_float(b, 2.0))); + dests[1] = nir_imm_float(b, 0.0); + dests[2] = nir_imm_float(b, 0.0); + dests[3] = nir_imm_float(b, 1.0); + } else if (input_var->data.location >= VARYING_SLOT_VAR0) { + if (c->fs_key->point_sprite_mask & + (1 << (input_var->data.location - + VARYING_SLOT_VAR0))) { + if (!c->fs_key->is_points) { + dests[0] = nir_imm_float(b, 0.0); + dests[1] = nir_imm_float(b, 0.0); + } + if (c->fs_key->point_coord_upper_left) { + dests[1] = nir_fsub(b, + nir_imm_float(b, 1.0), + dests[1]); + } dests[2] = nir_imm_float(b, 0.0); dests[3] = nir_imm_float(b, 1.0); - } else if (input_var->data.location >= VARYING_SLOT_VAR0) { - if (c->fs_key->point_sprite_mask & - (1 << (input_var->data.location - - VARYING_SLOT_VAR0))) { - if (!c->fs_key->is_points) { - dests[0] = nir_imm_float(b, 0.0); - dests[1] = nir_imm_float(b, 0.0); - } - if (c->fs_key->point_coord_upper_left) { - dests[1] = nir_fsub(b, - nir_imm_float(b, 1.0), - dests[1]); - } - dests[2] = nir_imm_float(b, 0.0); - dests[3] = nir_imm_float(b, 1.0); - } } - break; - case QSTAGE_COORD: - case QSTAGE_VERT: - break; } replace_intrinsic_with_vec4(b, intr, dests); @@ -129,7 +291,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { nir_variable *output_var = NULL; - foreach_list_typed(nir_variable, var, node, &c->s->outputs) { + nir_foreach_variable(var, &c->s->outputs) { if (var->data.driver_location == intr->const_index[0]) { output_var = var; break; @@ -232,7 +394,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, switch (intr->intrinsic) { case nir_intrinsic_load_input: - vc4_nir_lower_input(c, b, intr); + if (c->stage == QSTAGE_FRAG) + vc4_nir_lower_fs_input(c, b, intr); + else + vc4_nir_lower_vertex_attr(c, b, intr); break; case nir_intrinsic_store_output: diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 31c7e28ff57..6e9ec6530c6 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -602,126 +602,18 @@ ntq_fsign(struct vc4_compile *c, struct qreg src) qir_uniform_f(c, -1.0)); } -static struct qreg -get_channel_from_vpm(struct vc4_compile *c, - struct qreg *vpm_reads, - uint8_t swiz, - const struct util_format_description *desc) -{ - const struct util_format_channel_description *chan = - &desc->channel[swiz]; - struct qreg temp; - - if (swiz > UTIL_FORMAT_SWIZZLE_W) - return get_swizzled_channel(c, vpm_reads, swiz); - else if (chan->size == 32 && - chan->type == UTIL_FORMAT_TYPE_FLOAT) { - return get_swizzled_channel(c, vpm_reads, swiz); - } else if (chan->size == 32 && - chan->type == UTIL_FORMAT_TYPE_SIGNED) { - if (chan->normalized) { - return qir_FMUL(c, - qir_ITOF(c, vpm_reads[swiz]), - qir_uniform_f(c, - 1.0 / 0x7fffffff)); - } else { - return qir_ITOF(c, vpm_reads[swiz]); - } - } else if (chan->size == 8 && - (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || - chan->type == UTIL_FORMAT_TYPE_SIGNED)) { - struct qreg vpm = vpm_reads[0]; - if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { - temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080)); - if (chan->normalized) { - return qir_FSUB(c, qir_FMUL(c, - qir_UNPACK_8_F(c, temp, swiz), - qir_uniform_f(c, 2.0)), - qir_uniform_f(c, 1.0)); - } else { - return qir_FADD(c, - qir_ITOF(c, - qir_UNPACK_8_I(c, temp, - swiz)), - qir_uniform_f(c, -128.0)); - } - } else { - if (chan->normalized) { - return qir_UNPACK_8_F(c, vpm, swiz); - } else { - return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz)); - } - } - } else if (chan->size == 16 && - (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || - chan->type == UTIL_FORMAT_TYPE_SIGNED)) { - struct qreg vpm = vpm_reads[swiz / 2]; - - /* Note that UNPACK_16F eats a half float, not ints, so we use - * UNPACK_16_I for all of these. - */ - if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { - temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2)); - if (chan->normalized) { - return qir_FMUL(c, temp, - qir_uniform_f(c, 1/32768.0f)); - } else { - return temp; - } - } else { - /* UNPACK_16I sign-extends, so we have to emit ANDs. */ - temp = vpm; - if (swiz == 1 || swiz == 3) - temp = qir_UNPACK_16_I(c, temp, 1); - temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff)); - temp = qir_ITOF(c, temp); - - if (chan->normalized) { - return qir_FMUL(c, temp, - qir_uniform_f(c, 1 / 65535.0)); - } else { - return temp; - } - } - } else { - return c->undef; - } -} - static void emit_vertex_input(struct vc4_compile *c, int attr) { enum pipe_format format = c->vs_key->attr_formats[attr]; uint32_t attr_size = util_format_get_blocksize(format); - struct qreg vpm_reads[4]; c->vattr_sizes[attr] = align(attr_size, 4); for (int i = 0; i < align(attr_size, 4) / 4; i++) { struct qreg vpm = { QFILE_VPM, attr * 4 + i }; - vpm_reads[i] = qir_MOV(c, vpm); + c->inputs[attr * 4 + i] = qir_MOV(c, vpm); c->num_inputs++; } - - bool format_warned = false; - const struct util_format_description *desc = - util_format_description(format); - - for (int i = 0; i < 4; i++) { - uint8_t swiz = desc->swizzle[i]; - struct qreg result = get_channel_from_vpm(c, vpm_reads, - swiz, desc); - - if (result.file == QFILE_NULL) { - if (!format_warned) { - fprintf(stderr, - "vtx element %d unsupported type: %s\n", - attr, util_format_name(format)); - format_warned = true; - } - result = qir_uniform_f(c, 0.0); - } - c->inputs[attr * 4 + i] = result; - } } static void @@ -876,6 +768,40 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr) *dest = result; } +/** Handles sign-extended bitfield extracts for 16 bits. */ +static struct qreg +ntq_emit_ibfe(struct vc4_compile *c, struct qreg base, struct qreg offset, + struct qreg bits) +{ + assert(bits.file == QFILE_UNIF && + c->uniform_contents[bits.index] == QUNIFORM_CONSTANT && + c->uniform_data[bits.index] == 16); + + assert(offset.file == QFILE_UNIF && + c->uniform_contents[offset.index] == QUNIFORM_CONSTANT); + int offset_bit = c->uniform_data[offset.index]; + assert(offset_bit % 16 == 0); + + return qir_UNPACK_16_I(c, base, offset_bit / 16); +} + +/** Handles unsigned bitfield extracts for 8 bits. */ +static struct qreg +ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset, + struct qreg bits) +{ + assert(bits.file == QFILE_UNIF && + c->uniform_contents[bits.index] == QUNIFORM_CONSTANT && + c->uniform_data[bits.index] == 8); + + assert(offset.file == QFILE_UNIF && + c->uniform_contents[offset.index] == QUNIFORM_CONSTANT); + int offset_bit = c->uniform_data[offset.index]; + assert(offset_bit % 8 == 0); + + return qir_UNPACK_8_I(c, base, offset_bit / 8); +} + static void ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) { @@ -1106,6 +1032,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) qir_SUB(c, qir_uniform_ui(c, 0), src[0])); break; + case nir_op_ibitfield_extract: + *dest = ntq_emit_ibfe(c, src[0], src[1], src[2]); + break; + + case nir_op_ubitfield_extract: + *dest = ntq_emit_ubfe(c, src[0], src[1], src[2]); + break; + default: fprintf(stderr, "unknown NIR ALU inst: "); nir_print_instr(&instr->instr, stderr); @@ -1383,13 +1317,13 @@ static void ntq_setup_inputs(struct vc4_compile *c) { unsigned num_entries = 0; - foreach_list_typed(nir_variable, var, node, &c->s->inputs) + nir_foreach_variable(var, &c->s->inputs) num_entries++; nir_variable *vars[num_entries]; unsigned i = 0; - foreach_list_typed(nir_variable, var, node, &c->s->inputs) + nir_foreach_variable(var, &c->s->inputs) vars[i++] = var; /* Sort the variables so that we emit the input setup in @@ -1432,7 +1366,7 @@ ntq_setup_inputs(struct vc4_compile *c) static void ntq_setup_outputs(struct vc4_compile *c) { - foreach_list_typed(nir_variable, var, node, &c->s->outputs) { + nir_foreach_variable(var, &c->s->outputs) { unsigned array_len = MAX2(glsl_get_length(var->type), 1); unsigned loc = var->data.driver_location * 4; @@ -1471,7 +1405,7 @@ ntq_setup_outputs(struct vc4_compile *c) static void ntq_setup_uniforms(struct vc4_compile *c) { - foreach_list_typed(nir_variable, var, node, &c->s->uniforms) { + nir_foreach_variable(var, &c->s->uniforms) { unsigned array_len = MAX2(glsl_get_length(var->type), 1); unsigned array_elem_size = 4 * sizeof(float); diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 739ac86193a..774ec095652 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -182,6 +182,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: return 0; /* Stream output. */ @@ -336,6 +337,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return VC4_MAX_TEXTURE_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; default: fprintf(stderr, "unknown shader param %d\n", param); return 0; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index a4947154f17..1ad545aae09 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -632,6 +632,7 @@ enum pipe_cap PIPE_CAP_DEPTH_BOUNDS_TEST, PIPE_CAP_TGSI_TXQS, PIPE_CAP_FORCE_PERSAMPLE_INTERP, + PIPE_CAP_SHAREABLE_SHADERS, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) @@ -696,7 +697,8 @@ enum pipe_shader_cap PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */ PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED, PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED, - PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE + PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE, + PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT, }; /** diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index b36e0a35b8d..e0ab9013dd5 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -267,7 +267,9 @@ union tgsi_immediate_data #define TGSI_PROPERTY_TES_SPACING 12 #define TGSI_PROPERTY_TES_VERTEX_ORDER_CW 13 #define TGSI_PROPERTY_TES_POINT_MODE 14 -#define TGSI_PROPERTY_COUNT 15 +#define TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 15 +#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16 +#define TGSI_PROPERTY_COUNT 17 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c index 18d88039579..f66ed896e62 100644 --- a/src/gallium/state_trackers/omx/vid_dec_h264.c +++ b/src/gallium/state_trackers/omx/vid_dec_h264.c @@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp, priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom; } - priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb; - priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb; - if (!priv->picture.h264.field_pic_flag) - priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom; + if (!priv->picture.h264.field_pic_flag) { + priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb; + priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] + + priv->codec_data.h264.delta_pic_order_cnt_bottom; + } else if (!priv->picture.h264.bottom_field_flag) + priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb; + else + priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb; } else if (sps->pic_order_cnt_type == 1) { unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4); diff --git a/src/gallium/targets/osmesa/osmesa.def b/src/gallium/targets/osmesa/osmesa.def index e2a31ab5457..e347463de9f 100644 --- a/src/gallium/targets/osmesa/osmesa.def +++ b/src/gallium/targets/osmesa/osmesa.def @@ -14,3 +14,340 @@ EXPORTS OSMesaGetProcAddress OSMesaColorClamp OSMesaPostprocess + glAccum + glAlphaFunc + glAreTexturesResident + glArrayElement + glBegin + glBindTexture + glBitmap + glBlendFunc + glCallList + glCallLists + glClear + glClearAccum + glClearColor + glClearDepth + glClearIndex + glClearStencil + glClipPlane + glColor3b + glColor3bv + glColor3d + glColor3dv + glColor3f + glColor3fv + glColor3i + glColor3iv + glColor3s + glColor3sv + glColor3ub + glColor3ubv + glColor3ui + glColor3uiv + glColor3us + glColor3usv + glColor4b + glColor4bv + glColor4d + glColor4dv + glColor4f + glColor4fv + glColor4i + glColor4iv + glColor4s + glColor4sv + glColor4ub + glColor4ubv + glColor4ui + glColor4uiv + glColor4us + glColor4usv + glColorMask + glColorMaterial + glColorPointer + glCopyPixels + glCopyTexImage1D + glCopyTexImage2D + glCopyTexSubImage1D + glCopyTexSubImage2D + glCullFace +; glDebugEntry + glDeleteLists + glDeleteTextures + glDepthFunc + glDepthMask + glDepthRange + glDisable + glDisableClientState + glDrawArrays + glDrawBuffer + glDrawElements + glDrawPixels + glEdgeFlag + glEdgeFlagPointer + glEdgeFlagv + glEnable + glEnableClientState + glEnd + glEndList + glEvalCoord1d + glEvalCoord1dv + glEvalCoord1f + glEvalCoord1fv + glEvalCoord2d + glEvalCoord2dv + glEvalCoord2f + glEvalCoord2fv + glEvalMesh1 + glEvalMesh2 + glEvalPoint1 + glEvalPoint2 + glFeedbackBuffer + glFinish + glFlush + glFogf + glFogfv + glFogi + glFogiv + glFrontFace + glFrustum + glGenLists + glGenTextures + glGetBooleanv + glGetClipPlane + glGetDoublev + glGetError + glGetFloatv + glGetIntegerv + glGetLightfv + glGetLightiv + glGetMapdv + glGetMapfv + glGetMapiv + glGetMaterialfv + glGetMaterialiv + glGetPixelMapfv + glGetPixelMapuiv + glGetPixelMapusv + glGetPointerv + glGetPolygonStipple + glGetString + glGetTexEnvfv + glGetTexEnviv + glGetTexGendv + glGetTexGenfv + glGetTexGeniv + glGetTexImage + glGetTexLevelParameterfv + glGetTexLevelParameteriv + glGetTexParameterfv + glGetTexParameteriv + glHint + glIndexMask + glIndexPointer + glIndexd + glIndexdv + glIndexf + glIndexfv + glIndexi + glIndexiv + glIndexs + glIndexsv + glIndexub + glIndexubv + glInitNames + glInterleavedArrays + glIsEnabled + glIsList + glIsTexture + glLightModelf + glLightModelfv + glLightModeli + glLightModeliv + glLightf + glLightfv + glLighti + glLightiv + glLineStipple + glLineWidth + glListBase + glLoadIdentity + glLoadMatrixd + glLoadMatrixf + glLoadName + glLogicOp + glMap1d + glMap1f + glMap2d + glMap2f + glMapGrid1d + glMapGrid1f + glMapGrid2d + glMapGrid2f + glMaterialf + glMaterialfv + glMateriali + glMaterialiv + glMatrixMode + glMultMatrixd + glMultMatrixf + glNewList + glNormal3b + glNormal3bv + glNormal3d + glNormal3dv + glNormal3f + glNormal3fv + glNormal3i + glNormal3iv + glNormal3s + glNormal3sv + glNormalPointer + glOrtho + glPassThrough + glPixelMapfv + glPixelMapuiv + glPixelMapusv + glPixelStoref + glPixelStorei + glPixelTransferf + glPixelTransferi + glPixelZoom + glPointSize + glPolygonMode + glPolygonOffset + glPolygonStipple + glPopAttrib + glPopClientAttrib + glPopMatrix + glPopName + glPrioritizeTextures + glPushAttrib + glPushClientAttrib + glPushMatrix + glPushName + glRasterPos2d + glRasterPos2dv + glRasterPos2f + glRasterPos2fv + glRasterPos2i + glRasterPos2iv + glRasterPos2s + glRasterPos2sv + glRasterPos3d + glRasterPos3dv + glRasterPos3f + glRasterPos3fv + glRasterPos3i + glRasterPos3iv + glRasterPos3s + glRasterPos3sv + glRasterPos4d + glRasterPos4dv + glRasterPos4f + glRasterPos4fv + glRasterPos4i + glRasterPos4iv + glRasterPos4s + glRasterPos4sv + glReadBuffer + glReadPixels + glRectd + glRectdv + glRectf + glRectfv + glRecti + glRectiv + glRects + glRectsv + glRenderMode + glRotated + glRotatef + glScaled + glScalef + glScissor + glSelectBuffer + glShadeModel + glStencilFunc + glStencilMask + glStencilOp + glTexCoord1d + glTexCoord1dv + glTexCoord1f + glTexCoord1fv + glTexCoord1i + glTexCoord1iv + glTexCoord1s + glTexCoord1sv + glTexCoord2d + glTexCoord2dv + glTexCoord2f + glTexCoord2fv + glTexCoord2i + glTexCoord2iv + glTexCoord2s + glTexCoord2sv + glTexCoord3d + glTexCoord3dv + glTexCoord3f + glTexCoord3fv + glTexCoord3i + glTexCoord3iv + glTexCoord3s + glTexCoord3sv + glTexCoord4d + glTexCoord4dv + glTexCoord4f + glTexCoord4fv + glTexCoord4i + glTexCoord4iv + glTexCoord4s + glTexCoord4sv + glTexCoordPointer + glTexEnvf + glTexEnvfv + glTexEnvi + glTexEnviv + glTexGend + glTexGendv + glTexGenf + glTexGenfv + glTexGeni + glTexGeniv + glTexImage1D + glTexImage2D + glTexParameterf + glTexParameterfv + glTexParameteri + glTexParameteriv + glTexSubImage1D + glTexSubImage2D + glTranslated + glTranslatef + glVertex2d + glVertex2dv + glVertex2f + glVertex2fv + glVertex2i + glVertex2iv + glVertex2s + glVertex2sv + glVertex3d + glVertex3dv + glVertex3f + glVertex3fv + glVertex3i + glVertex3iv + glVertex3s + glVertex3sv + glVertex4d + glVertex4dv + glVertex4f + glVertex4fv + glVertex4i + glVertex4iv + glVertex4s + glVertex4sv + glVertexPointer + glViewport diff --git a/src/gallium/targets/osmesa/osmesa.mingw.def b/src/gallium/targets/osmesa/osmesa.mingw.def index 874ac544084..945201c9d83 100644 --- a/src/gallium/targets/osmesa/osmesa.mingw.def +++ b/src/gallium/targets/osmesa/osmesa.mingw.def @@ -11,3 +11,340 @@ EXPORTS OSMesaGetProcAddress = OSMesaGetProcAddress@4 OSMesaColorClamp = OSMesaColorClamp@4 OSMesaPostprocess = OSMesaPostprocess@12 + glAccum = glAccum@8 + glAlphaFunc = glAlphaFunc@8 + glAreTexturesResident = glAreTexturesResident@12 + glArrayElement = glArrayElement@4 + glBegin = glBegin@4 + glBindTexture = glBindTexture@8 + glBitmap = glBitmap@28 + glBlendFunc = glBlendFunc@8 + glCallList = glCallList@4 + glCallLists = glCallLists@12 + glClear = glClear@4 + glClearAccum = glClearAccum@16 + glClearColor = glClearColor@16 + glClearDepth = glClearDepth@8 + glClearIndex = glClearIndex@4 + glClearStencil = glClearStencil@4 + glClipPlane = glClipPlane@8 + glColor3b = glColor3b@12 + glColor3bv = glColor3bv@4 + glColor3d = glColor3d@24 + glColor3dv = glColor3dv@4 + glColor3f = glColor3f@12 + glColor3fv = glColor3fv@4 + glColor3i = glColor3i@12 + glColor3iv = glColor3iv@4 + glColor3s = glColor3s@12 + glColor3sv = glColor3sv@4 + glColor3ub = glColor3ub@12 + glColor3ubv = glColor3ubv@4 + glColor3ui = glColor3ui@12 + glColor3uiv = glColor3uiv@4 + glColor3us = glColor3us@12 + glColor3usv = glColor3usv@4 + glColor4b = glColor4b@16 + glColor4bv = glColor4bv@4 + glColor4d = glColor4d@32 + glColor4dv = glColor4dv@4 + glColor4f = glColor4f@16 + glColor4fv = glColor4fv@4 + glColor4i = glColor4i@16 + glColor4iv = glColor4iv@4 + glColor4s = glColor4s@16 + glColor4sv = glColor4sv@4 + glColor4ub = glColor4ub@16 + glColor4ubv = glColor4ubv@4 + glColor4ui = glColor4ui@16 + glColor4uiv = glColor4uiv@4 + glColor4us = glColor4us@16 + glColor4usv = glColor4usv@4 + glColorMask = glColorMask@16 + glColorMaterial = glColorMaterial@8 + glColorPointer = glColorPointer@16 + glCopyPixels = glCopyPixels@20 + glCopyTexImage1D = glCopyTexImage1D@28 + glCopyTexImage2D = glCopyTexImage2D@32 + glCopyTexSubImage1D = glCopyTexSubImage1D@24 + glCopyTexSubImage2D = glCopyTexSubImage2D@32 + glCullFace = glCullFace@4 +; glDebugEntry = glDebugEntry@8 + glDeleteLists = glDeleteLists@8 + glDeleteTextures = glDeleteTextures@8 + glDepthFunc = glDepthFunc@4 + glDepthMask = glDepthMask@4 + glDepthRange = glDepthRange@16 + glDisable = glDisable@4 + glDisableClientState = glDisableClientState@4 + glDrawArrays = glDrawArrays@12 + glDrawBuffer = glDrawBuffer@4 + glDrawElements = glDrawElements@16 + glDrawPixels = glDrawPixels@20 + glEdgeFlag = glEdgeFlag@4 + glEdgeFlagPointer = glEdgeFlagPointer@8 + glEdgeFlagv = glEdgeFlagv@4 + glEnable = glEnable@4 + glEnableClientState = glEnableClientState@4 + glEnd = glEnd@0 + glEndList = glEndList@0 + glEvalCoord1d = glEvalCoord1d@8 + glEvalCoord1dv = glEvalCoord1dv@4 + glEvalCoord1f = glEvalCoord1f@4 + glEvalCoord1fv = glEvalCoord1fv@4 + glEvalCoord2d = glEvalCoord2d@16 + glEvalCoord2dv = glEvalCoord2dv@4 + glEvalCoord2f = glEvalCoord2f@8 + glEvalCoord2fv = glEvalCoord2fv@4 + glEvalMesh1 = glEvalMesh1@12 + glEvalMesh2 = glEvalMesh2@20 + glEvalPoint1 = glEvalPoint1@4 + glEvalPoint2 = glEvalPoint2@8 + glFeedbackBuffer = glFeedbackBuffer@12 + glFinish = glFinish@0 + glFlush = glFlush@0 + glFogf = glFogf@8 + glFogfv = glFogfv@8 + glFogi = glFogi@8 + glFogiv = glFogiv@8 + glFrontFace = glFrontFace@4 + glFrustum = glFrustum@48 + glGenLists = glGenLists@4 + glGenTextures = glGenTextures@8 + glGetBooleanv = glGetBooleanv@8 + glGetClipPlane = glGetClipPlane@8 + glGetDoublev = glGetDoublev@8 + glGetError = glGetError@0 + glGetFloatv = glGetFloatv@8 + glGetIntegerv = glGetIntegerv@8 + glGetLightfv = glGetLightfv@12 + glGetLightiv = glGetLightiv@12 + glGetMapdv = glGetMapdv@12 + glGetMapfv = glGetMapfv@12 + glGetMapiv = glGetMapiv@12 + glGetMaterialfv = glGetMaterialfv@12 + glGetMaterialiv = glGetMaterialiv@12 + glGetPixelMapfv = glGetPixelMapfv@8 + glGetPixelMapuiv = glGetPixelMapuiv@8 + glGetPixelMapusv = glGetPixelMapusv@8 + glGetPointerv = glGetPointerv@8 + glGetPolygonStipple = glGetPolygonStipple@4 + glGetString = glGetString@4 + glGetTexEnvfv = glGetTexEnvfv@12 + glGetTexEnviv = glGetTexEnviv@12 + glGetTexGendv = glGetTexGendv@12 + glGetTexGenfv = glGetTexGenfv@12 + glGetTexGeniv = glGetTexGeniv@12 + glGetTexImage = glGetTexImage@20 + glGetTexLevelParameterfv = glGetTexLevelParameterfv@16 + glGetTexLevelParameteriv = glGetTexLevelParameteriv@16 + glGetTexParameterfv = glGetTexParameterfv@12 + glGetTexParameteriv = glGetTexParameteriv@12 + glHint = glHint@8 + glIndexMask = glIndexMask@4 + glIndexPointer = glIndexPointer@12 + glIndexd = glIndexd@8 + glIndexdv = glIndexdv@4 + glIndexf = glIndexf@4 + glIndexfv = glIndexfv@4 + glIndexi = glIndexi@4 + glIndexiv = glIndexiv@4 + glIndexs = glIndexs@4 + glIndexsv = glIndexsv@4 + glIndexub = glIndexub@4 + glIndexubv = glIndexubv@4 + glInitNames = glInitNames@0 + glInterleavedArrays = glInterleavedArrays@12 + glIsEnabled = glIsEnabled@4 + glIsList = glIsList@4 + glIsTexture = glIsTexture@4 + glLightModelf = glLightModelf@8 + glLightModelfv = glLightModelfv@8 + glLightModeli = glLightModeli@8 + glLightModeliv = glLightModeliv@8 + glLightf = glLightf@12 + glLightfv = glLightfv@12 + glLighti = glLighti@12 + glLightiv = glLightiv@12 + glLineStipple = glLineStipple@8 + glLineWidth = glLineWidth@4 + glListBase = glListBase@4 + glLoadIdentity = glLoadIdentity@0 + glLoadMatrixd = glLoadMatrixd@4 + glLoadMatrixf = glLoadMatrixf@4 + glLoadName = glLoadName@4 + glLogicOp = glLogicOp@4 + glMap1d = glMap1d@32 + glMap1f = glMap1f@24 + glMap2d = glMap2d@56 + glMap2f = glMap2f@40 + glMapGrid1d = glMapGrid1d@20 + glMapGrid1f = glMapGrid1f@12 + glMapGrid2d = glMapGrid2d@40 + glMapGrid2f = glMapGrid2f@24 + glMaterialf = glMaterialf@12 + glMaterialfv = glMaterialfv@12 + glMateriali = glMateriali@12 + glMaterialiv = glMaterialiv@12 + glMatrixMode = glMatrixMode@4 + glMultMatrixd = glMultMatrixd@4 + glMultMatrixf = glMultMatrixf@4 + glNewList = glNewList@8 + glNormal3b = glNormal3b@12 + glNormal3bv = glNormal3bv@4 + glNormal3d = glNormal3d@24 + glNormal3dv = glNormal3dv@4 + glNormal3f = glNormal3f@12 + glNormal3fv = glNormal3fv@4 + glNormal3i = glNormal3i@12 + glNormal3iv = glNormal3iv@4 + glNormal3s = glNormal3s@12 + glNormal3sv = glNormal3sv@4 + glNormalPointer = glNormalPointer@12 + glOrtho = glOrtho@48 + glPassThrough = glPassThrough@4 + glPixelMapfv = glPixelMapfv@12 + glPixelMapuiv = glPixelMapuiv@12 + glPixelMapusv = glPixelMapusv@12 + glPixelStoref = glPixelStoref@8 + glPixelStorei = glPixelStorei@8 + glPixelTransferf = glPixelTransferf@8 + glPixelTransferi = glPixelTransferi@8 + glPixelZoom = glPixelZoom@8 + glPointSize = glPointSize@4 + glPolygonMode = glPolygonMode@8 + glPolygonOffset = glPolygonOffset@8 + glPolygonStipple = glPolygonStipple@4 + glPopAttrib = glPopAttrib@0 + glPopClientAttrib = glPopClientAttrib@0 + glPopMatrix = glPopMatrix@0 + glPopName = glPopName@0 + glPrioritizeTextures = glPrioritizeTextures@12 + glPushAttrib = glPushAttrib@4 + glPushClientAttrib = glPushClientAttrib@4 + glPushMatrix = glPushMatrix@0 + glPushName = glPushName@4 + glRasterPos2d = glRasterPos2d@16 + glRasterPos2dv = glRasterPos2dv@4 + glRasterPos2f = glRasterPos2f@8 + glRasterPos2fv = glRasterPos2fv@4 + glRasterPos2i = glRasterPos2i@8 + glRasterPos2iv = glRasterPos2iv@4 + glRasterPos2s = glRasterPos2s@8 + glRasterPos2sv = glRasterPos2sv@4 + glRasterPos3d = glRasterPos3d@24 + glRasterPos3dv = glRasterPos3dv@4 + glRasterPos3f = glRasterPos3f@12 + glRasterPos3fv = glRasterPos3fv@4 + glRasterPos3i = glRasterPos3i@12 + glRasterPos3iv = glRasterPos3iv@4 + glRasterPos3s = glRasterPos3s@12 + glRasterPos3sv = glRasterPos3sv@4 + glRasterPos4d = glRasterPos4d@32 + glRasterPos4dv = glRasterPos4dv@4 + glRasterPos4f = glRasterPos4f@16 + glRasterPos4fv = glRasterPos4fv@4 + glRasterPos4i = glRasterPos4i@16 + glRasterPos4iv = glRasterPos4iv@4 + glRasterPos4s = glRasterPos4s@16 + glRasterPos4sv = glRasterPos4sv@4 + glReadBuffer = glReadBuffer@4 + glReadPixels = glReadPixels@28 + glRectd = glRectd@32 + glRectdv = glRectdv@8 + glRectf = glRectf@16 + glRectfv = glRectfv@8 + glRecti = glRecti@16 + glRectiv = glRectiv@8 + glRects = glRects@16 + glRectsv = glRectsv@8 + glRenderMode = glRenderMode@4 + glRotated = glRotated@32 + glRotatef = glRotatef@16 + glScaled = glScaled@24 + glScalef = glScalef@12 + glScissor = glScissor@16 + glSelectBuffer = glSelectBuffer@8 + glShadeModel = glShadeModel@4 + glStencilFunc = glStencilFunc@12 + glStencilMask = glStencilMask@4 + glStencilOp = glStencilOp@12 + glTexCoord1d = glTexCoord1d@8 + glTexCoord1dv = glTexCoord1dv@4 + glTexCoord1f = glTexCoord1f@4 + glTexCoord1fv = glTexCoord1fv@4 + glTexCoord1i = glTexCoord1i@4 + glTexCoord1iv = glTexCoord1iv@4 + glTexCoord1s = glTexCoord1s@4 + glTexCoord1sv = glTexCoord1sv@4 + glTexCoord2d = glTexCoord2d@16 + glTexCoord2dv = glTexCoord2dv@4 + glTexCoord2f = glTexCoord2f@8 + glTexCoord2fv = glTexCoord2fv@4 + glTexCoord2i = glTexCoord2i@8 + glTexCoord2iv = glTexCoord2iv@4 + glTexCoord2s = glTexCoord2s@8 + glTexCoord2sv = glTexCoord2sv@4 + glTexCoord3d = glTexCoord3d@24 + glTexCoord3dv = glTexCoord3dv@4 + glTexCoord3f = glTexCoord3f@12 + glTexCoord3fv = glTexCoord3fv@4 + glTexCoord3i = glTexCoord3i@12 + glTexCoord3iv = glTexCoord3iv@4 + glTexCoord3s = glTexCoord3s@12 + glTexCoord3sv = glTexCoord3sv@4 + glTexCoord4d = glTexCoord4d@32 + glTexCoord4dv = glTexCoord4dv@4 + glTexCoord4f = glTexCoord4f@16 + glTexCoord4fv = glTexCoord4fv@4 + glTexCoord4i = glTexCoord4i@16 + glTexCoord4iv = glTexCoord4iv@4 + glTexCoord4s = glTexCoord4s@16 + glTexCoord4sv = glTexCoord4sv@4 + glTexCoordPointer = glTexCoordPointer@16 + glTexEnvf = glTexEnvf@12 + glTexEnvfv = glTexEnvfv@12 + glTexEnvi = glTexEnvi@12 + glTexEnviv = glTexEnviv@12 + glTexGend = glTexGend@16 + glTexGendv = glTexGendv@12 + glTexGenf = glTexGenf@12 + glTexGenfv = glTexGenfv@12 + glTexGeni = glTexGeni@12 + glTexGeniv = glTexGeniv@12 + glTexImage1D = glTexImage1D@32 + glTexImage2D = glTexImage2D@36 + glTexParameterf = glTexParameterf@12 + glTexParameterfv = glTexParameterfv@12 + glTexParameteri = glTexParameteri@12 + glTexParameteriv = glTexParameteriv@12 + glTexSubImage1D = glTexSubImage1D@28 + glTexSubImage2D = glTexSubImage2D@36 + glTranslated = glTranslated@24 + glTranslatef = glTranslatef@12 + glVertex2d = glVertex2d@16 + glVertex2dv = glVertex2dv@4 + glVertex2f = glVertex2f@8 + glVertex2fv = glVertex2fv@4 + glVertex2i = glVertex2i@8 + glVertex2iv = glVertex2iv@4 + glVertex2s = glVertex2s@8 + glVertex2sv = glVertex2sv@4 + glVertex3d = glVertex3d@24 + glVertex3dv = glVertex3dv@4 + glVertex3f = glVertex3f@12 + glVertex3fv = glVertex3fv@4 + glVertex3i = glVertex3i@12 + glVertex3iv = glVertex3iv@4 + glVertex3s = glVertex3s@12 + glVertex3sv = glVertex3sv@4 + glVertex4d = glVertex4d@32 + glVertex4dv = glVertex4dv@4 + glVertex4f = glVertex4f@16 + glVertex4fv = glVertex4fv@4 + glVertex4i = glVertex4i@16 + glVertex4iv = glVertex4iv@4 + glVertex4s = glVertex4s@16 + glVertex4sv = glVertex4sv@4 + glVertexPointer = glVertexPointer@16 + glViewport = glViewport@16 diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index c5c5cae333b..e4e4a3fe148 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -610,6 +610,37 @@ match_subroutine_by_name(const char *name, return sig; } +static ir_rvalue * +generate_array_index(void *mem_ctx, exec_list *instructions, + struct _mesa_glsl_parse_state *state, YYLTYPE loc, + const ast_expression *array, ast_expression *idx, + const char **function_name, exec_list *actual_parameters) +{ + if (array->oper == ast_array_index) { + /* This handles arrays of arrays */ + ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions, + state, loc, + array->subexpressions[0], + array->subexpressions[1], + function_name, actual_parameters); + ir_rvalue *outer_array_idx = idx->hir(instructions, state); + + YYLTYPE index_loc = idx->get_location(); + return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array, + outer_array_idx, loc, + index_loc); + } else { + ir_variable *sub_var = NULL; + *function_name = array->primary_expression.identifier; + + match_subroutine_by_name(*function_name, actual_parameters, + state, &sub_var); + + ir_rvalue *outer_array_idx = idx->hir(instructions, state); + return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx); + } +} + static void print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc, ir_function *f) @@ -1989,16 +2020,18 @@ ast_function_expression::hir(exec_list *instructions, ir_variable *sub_var = NULL; ir_rvalue *array_idx = NULL; + process_parameters(instructions, &actual_parameters, &this->expressions, + state); + if (id->oper == ast_array_index) { - func_name = id->subexpressions[0]->primary_expression.identifier; - array_idx = id->subexpressions[1]->hir(instructions, state); + array_idx = generate_array_index(ctx, instructions, state, loc, + id->subexpressions[0], + id->subexpressions[1], &func_name, + &actual_parameters); } else { func_name = id->primary_expression.identifier; } - process_parameters(instructions, &actual_parameters, &this->expressions, - state); - ir_function_signature *sig = match_function_by_name(func_name, &actual_parameters, state); diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 0c11ec58d20..961183636a9 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -487,54 +487,54 @@ bit_logic_result_type(const struct glsl_type *type_a, ast_operators op, struct _mesa_glsl_parse_state *state, YYLTYPE *loc) { - if (!state->check_bitwise_operations_allowed(loc)) { - return glsl_type::error_type; - } + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } - /* From page 50 (page 56 of PDF) of GLSL 1.30 spec: - * - * "The bitwise operators and (&), exclusive-or (^), and inclusive-or - * (|). The operands must be of type signed or unsigned integers or - * integer vectors." - */ - if (!type_a->is_integer()) { - _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer", - ast_expression::operator_string(op)); - return glsl_type::error_type; - } - if (!type_b->is_integer()) { - _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer", + /* From page 50 (page 56 of PDF) of GLSL 1.30 spec: + * + * "The bitwise operators and (&), exclusive-or (^), and inclusive-or + * (|). The operands must be of type signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer", ast_expression::operator_string(op)); - return glsl_type::error_type; - } + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } - /* "The fundamental types of the operands (signed or unsigned) must - * match," - */ - if (type_a->base_type != type_b->base_type) { - _mesa_glsl_error(loc, state, "operands of `%s' must have the same " - "base type", ast_expression::operator_string(op)); - return glsl_type::error_type; - } + /* "The fundamental types of the operands (signed or unsigned) must + * match," + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "operands of `%s' must have the same " + "base type", ast_expression::operator_string(op)); + return glsl_type::error_type; + } - /* "The operands cannot be vectors of differing size." */ - if (type_a->is_vector() && - type_b->is_vector() && - type_a->vector_elements != type_b->vector_elements) { - _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of " - "different sizes", ast_expression::operator_string(op)); - return glsl_type::error_type; - } + /* "The operands cannot be vectors of differing size." */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of " + "different sizes", ast_expression::operator_string(op)); + return glsl_type::error_type; + } - /* "If one operand is a scalar and the other a vector, the scalar is - * applied component-wise to the vector, resulting in the same type as - * the vector. The fundamental types of the operands [...] will be the - * resulting fundamental type." - */ - if (type_a->is_scalar()) - return type_b; - else - return type_a; + /* "If one operand is a scalar and the other a vector, the scalar is + * applied component-wise to the vector, resulting in the same type as + * the vector. The fundamental types of the operands [...] will be the + * resulting fundamental type." + */ + if (type_a->is_scalar()) + return type_b; + else + return type_a; } static const struct glsl_type * @@ -6294,6 +6294,18 @@ ast_interface_block::hir(exec_list *instructions, state->struct_specifier_depth--; + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].stream != -1 && + (unsigned) fields[i].stream != this->layout.stream) { + _mesa_glsl_error(&loc, state, + "stream layout qualifier on " + "interface block member `%s' does not match " + "the interface block (%d vs %d)", + fields[i].name, fields[i].stream, + this->layout.stream); + } + } + if (!redeclaring_per_vertex) { validate_identifier(this->block_name, loc, state); @@ -6634,6 +6646,8 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; + var->data.stream = this->layout.stream; + state->symbols->add_variable(var); instructions->push_tail(var); } @@ -6652,6 +6666,7 @@ ast_interface_block::hir(exec_list *instructions, var->data.centroid = fields[i].centroid; var->data.sample = fields[i].sample; var->data.patch = fields[i].patch; + var->data.stream = this->layout.stream; var->init_interface_type(block_type); if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) @@ -6664,17 +6679,6 @@ ast_interface_block::hir(exec_list *instructions, var->data.matrix_layout = fields[i].matrix_layout; } - if (fields[i].stream != -1 && - ((unsigned)fields[i].stream) != this->layout.stream) { - _mesa_glsl_error(&loc, state, - "stream layout qualifier on " - "interface block member `%s' does not match " - "the interface block (%d vs %d)", - var->name, fields[i].stream, this->layout.stream); - } - - var->data.stream = this->layout.stream; - if (var->data.mode == ir_var_shader_storage) { var->data.image_read_only = fields[i].image_read_only; var->data.image_write_only = fields[i].image_write_only; diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index cd00f6e085b..2f2e10d7992 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -2609,17 +2609,6 @@ interface_block: block->layout.is_default_qualifier = false; - foreach_list_typed (ast_declarator_list, member, link, &block->declarations) { - ast_type_qualifier& qualifier = member->type->qualifier; - if (qualifier.flags.q.stream && qualifier.stream != block->layout.stream) { - _mesa_glsl_error(& @1, state, - "stream layout qualifier on " - "interface block member does not match " - "the interface block (%d vs %d)", - qualifier.stream, block->layout.stream); - YYERROR; - } - } $$ = block; } | memory_qualifier interface_block diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index fe00aa30d07..8183e65d2f5 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -763,7 +763,8 @@ private: /* Assign explicit locations. */ if (current_var->data.explicit_location) { /* Set sequential locations for struct fields. */ - if (record_type != NULL) { + if (current_var->type->without_array()->is_record() || + current_var->type->is_array_of_arrays()) { const unsigned entries = MAX2(1, this->uniforms[id].array_elements); this->uniforms[id].remap_location = this->explicit_location + field_counter; @@ -1180,7 +1181,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog, /* Reserve all the explicit locations of the active uniforms. */ for (unsigned i = 0; i < num_uniforms; i++) { - if (uniforms[i].type->is_subroutine()) + if (uniforms[i].type->is_subroutine() || + uniforms[i].is_shader_storage) continue; if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) { @@ -1200,8 +1202,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog, /* Reserve locations for rest of the uniforms. */ for (unsigned i = 0; i < num_uniforms; i++) { - if (uniforms[i].type->is_subroutine()) + if (uniforms[i].type->is_subroutine() || + uniforms[i].is_shader_storage) continue; + /* Built-in uniforms should not get any location. */ if (uniforms[i].builtin) continue; diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 25ca928aa43..07ea0e0c7e5 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -651,7 +651,7 @@ link_invalidate_variable_locations(exec_list *ir) /** - * Set UsesClipDistance and ClipDistanceArraySize based on the given shader. + * Set clip_distance_array_size based on the given shader. * * Also check for errors based on incorrect usage of gl_ClipVertex and * gl_ClipDistance. @@ -660,10 +660,10 @@ link_invalidate_variable_locations(exec_list *ir) */ static void analyze_clip_usage(struct gl_shader_program *prog, - struct gl_shader *shader, GLboolean *UsesClipDistance, - GLuint *ClipDistanceArraySize) + struct gl_shader *shader, + GLuint *clip_distance_array_size) { - *ClipDistanceArraySize = 0; + *clip_distance_array_size = 0; if (!prog->IsES && prog->Version >= 130) { /* From section 7.1 (Vertex Shader Special Variables) of the @@ -686,13 +686,14 @@ analyze_clip_usage(struct gl_shader_program *prog, _mesa_shader_stage_to_string(shader->Stage)); return; } - *UsesClipDistance = clip_distance.variable_found(); - ir_variable *clip_distance_var = - shader->symbols->get_variable("gl_ClipDistance"); - if (clip_distance_var) - *ClipDistanceArraySize = clip_distance_var->type->length; - } else { - *UsesClipDistance = false; + + if (clip_distance.variable_found()) { + ir_variable *clip_distance_var = + shader->symbols->get_variable("gl_ClipDistance"); + + assert(clip_distance_var); + *clip_distance_array_size = clip_distance_var->type->length; + } } } @@ -700,8 +701,7 @@ analyze_clip_usage(struct gl_shader_program *prog, /** * Verify that a vertex shader executable meets all semantic requirements. * - * Also sets prog->Vert.UsesClipDistance and prog->Vert.ClipDistanceArraySize - * as a side effect. + * Also sets prog->Vert.ClipDistanceArraySize as a side effect. * * \param shader Vertex shader executable to be verified */ @@ -754,8 +754,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog, } } - analyze_clip_usage(prog, shader, &prog->Vert.UsesClipDistance, - &prog->Vert.ClipDistanceArraySize); + analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize); } void @@ -765,8 +764,7 @@ validate_tess_eval_shader_executable(struct gl_shader_program *prog, if (shader == NULL) return; - analyze_clip_usage(prog, shader, &prog->TessEval.UsesClipDistance, - &prog->TessEval.ClipDistanceArraySize); + analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize); } @@ -797,8 +795,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog, /** * Verify that a geometry shader executable meets all semantic requirements * - * Also sets prog->Geom.VerticesIn, prog->Geom.UsesClipDistance, and - * prog->Geom.ClipDistanceArraySize as a side effect. + * Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as + * a side effect. * * \param shader Geometry shader executable to be verified */ @@ -812,8 +810,7 @@ validate_geometry_shader_executable(struct gl_shader_program *prog, unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); prog->Geom.VerticesIn = num_vertices; - analyze_clip_usage(prog, shader, &prog->Geom.UsesClipDistance, - &prog->Geom.ClipDistanceArraySize); + analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize); } /** @@ -3117,8 +3114,8 @@ check_explicit_uniform_locations(struct gl_context *ctx, foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *var = node->as_variable(); - if (var && (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) && - var->data.explicit_location) { + if (var && (var->data.mode == ir_var_uniform && + var->data.explicit_location)) { bool ret; if (var->type->is_subroutine()) ret = reserve_subroutine_explicit_locations(prog, sh, var); diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp index 276a2dedf47..114bb5811b4 100644 --- a/src/glsl/lower_named_interface_blocks.cpp +++ b/src/glsl/lower_named_interface_blocks.cpp @@ -186,6 +186,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions) new_var->data.centroid = iface_t->fields.structure[i].centroid; new_var->data.sample = iface_t->fields.structure[i].sample; new_var->data.patch = iface_t->fields.structure[i].patch; + new_var->data.stream = var->data.stream; new_var->init_interface_type(iface_t); hash_table_insert(interface_namespace, new_var, diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp index c1aed61a36a..a0df5e1df81 100644 --- a/src/glsl/lower_subroutine.cpp +++ b/src/glsl/lower_subroutine.cpp @@ -84,7 +84,7 @@ lower_subroutine_visitor::visit_leave(ir_call *ir) continue; if (ir->array_idx != NULL) - var = new(mem_ctx) ir_dereference_array(ir->sub_var, ir->array_idx->clone(mem_ctx, NULL)); + var = ir->array_idx->clone(mem_ctx, NULL); else var = new(mem_ctx) ir_dereference_variable(ir->sub_var); diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index e818c048461..57a242b4074 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -238,6 +238,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, case ir_type_swizzle: { ir_swizzle *s = (ir_swizzle *) ir; ir = s->val->as_dereference(); + /* Skip swizzle in the next pass */ + d = ir; break; } diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index e57e834d948..129dd02781b 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -164,15 +164,20 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader->info.outputs_written = sh->Program->OutputsWritten; shader->info.system_values_read = sh->Program->SystemValuesRead; shader->info.uses_texture_gather = sh->Program->UsesGather; - shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut; + shader->info.uses_clip_distance_out = + sh->Program->ClipDistanceArraySize != 0; shader->info.separate_shader = shader_prog->SeparateShader; shader->info.has_transform_feedback_varyings = shader_prog->TransformFeedback.NumVarying > 0; switch (stage) { case MESA_SHADER_GEOMETRY: + shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; + shader->info.gs.output_primitive = sh->Geom.OutputType; shader->info.gs.vertices_out = sh->Geom.VerticesOut; shader->info.gs.invocations = sh->Geom.Invocations; + shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive; + shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams; break; case MESA_SHADER_FRAGMENT: { diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h index b83e1ca3d2c..a8eade5f9e1 100644 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@ -521,6 +521,11 @@ struct glsl_type { return base_type == GLSL_TYPE_ARRAY; } + bool is_array_of_arrays() const + { + return is_array() && fields.array->is_array(); + } + /** * Query whether or not a type is a record */ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 9939b9e91a2..d0304bebbb0 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1521,11 +1521,23 @@ typedef struct nir_shader_info { union { struct { + /** The number of vertices recieves per input primitive */ + unsigned vertices_in; + + /** The output primitive type (GL enum value) */ + unsigned output_primitive; + /** The maximum number of vertices the geometry shader might write. */ unsigned vertices_out; /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ unsigned invocations; + + /** Whether or not this shader uses EndPrimitive */ + bool uses_end_primitive; + + /** Whether or not this shader uses non-zero streams */ + bool uses_streams; } gs; struct { @@ -1924,7 +1936,7 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); void nir_dump_cfg(nir_shader *shader, FILE *fp); -int nir_gs_count_vertices(nir_shader *shader); +int nir_gs_count_vertices(const nir_shader *shader); bool nir_split_var_copies(nir_shader *shader); diff --git a/src/glsl/nir/nir_gs_count_vertices.c b/src/glsl/nir/nir_gs_count_vertices.c index e0bdf170d22..1c360673ddc 100644 --- a/src/glsl/nir/nir_gs_count_vertices.c +++ b/src/glsl/nir/nir_gs_count_vertices.c @@ -51,7 +51,7 @@ as_set_vertex_count(nir_instr *instr) * counting at the NIR level. */ int -nir_gs_count_vertices(nir_shader *shader) +nir_gs_count_vertices(const nir_shader *shader) { int count = -1; diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index a09491781e6..a06b0aa1cd0 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -327,12 +327,12 @@ struct cfg_t { #define foreach_inst_in_block_reverse_safe(__type, __inst, __block) \ foreach_in_list_reverse_safe(__type, __inst, &(__block)->instructions) -#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst, __block) \ +#define foreach_inst_in_block_starting_from(__type, __scan_inst, __inst) \ for (__type *__scan_inst = (__type *)__inst->next; \ !__scan_inst->is_tail_sentinel(); \ __scan_inst = (__type *)__scan_inst->next) -#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst, __block) \ +#define foreach_inst_in_block_reverse_starting_from(__type, __scan_inst, __inst) \ for (__type *__scan_inst = (__type *)__inst->prev; \ !__scan_inst->is_head_sentinel(); \ __scan_inst = (__type *)__scan_inst->prev) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index f6d5ab87be9..d9967143d8a 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -90,6 +90,7 @@ struct brw_compiler { void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); bool scalar_vs; + bool scalar_gs; struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; @@ -488,6 +489,9 @@ struct brw_vue_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; + /** Should the hardware deliver input VUE handles for URB pull loads? */ + bool include_vue_handles; + GLuint urb_read_length; GLuint total_grf; @@ -596,21 +600,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, unsigned *final_assembly_size, char **error_str); -/** - * Scratch data used when compiling a GLSL geometry shader. - */ -struct brw_gs_compile -{ - struct brw_gs_prog_key key; - struct brw_gs_prog_data prog_data; - struct brw_vue_map input_vue_map; - - struct brw_geometry_program *gp; - - unsigned control_data_bits_per_vertex; - unsigned control_data_header_size_bits; -}; - /** * Compile a vertex shader. * @@ -618,10 +607,11 @@ struct brw_gs_compile */ const unsigned * brw_compile_gs(const struct brw_compiler *compiler, void *log_data, - struct brw_gs_compile *c, + void *mem_ctx, + const struct brw_gs_prog_key *key, + struct brw_gs_prog_data *prog_data, const struct nir_shader *shader, struct gl_shader_program *shader_prog, - void *mem_ctx, int shader_time_index, unsigned *final_assembly_size, char **error_str); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index a8cde20e045..169d092f90e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -918,8 +918,8 @@ enum opcode { * Source 0: [required] Color 0. * Source 1: [optional] Color 1 (for dual source blend messages). * Source 2: [optional] Src0 Alpha. - * Source 3: [optional] Source Depth (passthrough from the thread payload). - * Source 4: [optional] Destination Depth (gl_FragDepth). + * Source 3: [optional] Source Depth (gl_FragDepth) + * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread * Source 5: [optional] Sample Mask (gl_SampleMask). * Source 6: [required] Number of color components (as a UD immediate). */ @@ -1033,7 +1033,19 @@ enum opcode { SHADER_OPCODE_GEN4_SCRATCH_WRITE, SHADER_OPCODE_GEN7_SCRATCH_READ, + /** + * Gen8+ SIMD8 URB Read message. + * + * Source 0: The header register, containing URB handles (g1). + * + * Currently only supports constant offsets, in inst->offset. + */ + SHADER_OPCODE_URB_READ_SIMD8, + SHADER_OPCODE_URB_WRITE_SIMD8, + SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT, + SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, + SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT, /** * Return the index of an arbitrary live channel (i.e. one of the channels @@ -2385,7 +2397,7 @@ enum brw_pixel_shader_coverage_mask_mode { # define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8) # define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) # define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6) -# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5) +# define GEN9_PSX_SHADER_COMPUTES_STENCIL (1 << 5) # define GEN9_PSX_SHADER_PULLS_BARY (1 << 3) # define GEN8_PSX_SHADER_HAS_UAV (1 << 2) # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index b798931140f..f787ea3d4f8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -690,7 +690,7 @@ set_control_index(const struct brw_device_info *devinfo, for (int i = 0; i < 32; i++) { if (control_index_table[i] == uncompacted) { - brw_compact_inst_set_control_index(dst, i); + brw_compact_inst_set_control_index(devinfo, dst, i); return true; } } @@ -711,7 +711,7 @@ set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, for (int i = 0; i < 32; i++) { if (datatype_table[i] == uncompacted) { - brw_compact_inst_set_datatype_index(dst, i); + brw_compact_inst_set_datatype_index(devinfo, dst, i); return true; } } @@ -732,7 +732,7 @@ set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, for (int i = 0; i < 32; i++) { if (subreg_table[i] == uncompacted) { - brw_compact_inst_set_subreg_index(dst, i); + brw_compact_inst_set_subreg_index(devinfo, dst, i); return true; } } @@ -764,7 +764,7 @@ set_src0_index(const struct brw_device_info *devinfo, if (!get_src_index(uncompacted, &compacted)) return false; - brw_compact_inst_set_src0_index(dst, compacted); + brw_compact_inst_set_src0_index(devinfo, dst, compacted); return true; } @@ -784,7 +784,7 @@ set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, return false; } - brw_compact_inst_set_src1_index(dst, compacted); + brw_compact_inst_set_src1_index(devinfo, dst, compacted); return true; } @@ -804,7 +804,7 @@ set_3src_control_index(const struct brw_device_info *devinfo, for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) { if (gen8_3src_control_index_table[i] == uncompacted) { - brw_compact_inst_set_3src_control_index(dst, i); + brw_compact_inst_set_3src_control_index(devinfo, dst, i); return true; } } @@ -838,7 +838,7 @@ set_3src_source_index(const struct brw_device_info *devinfo, for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) { if (gen8_3src_source_index_table[i] == uncompacted) { - brw_compact_inst_set_3src_source_index(dst, i); + brw_compact_inst_set_3src_source_index(devinfo, dst, i); return true; } } @@ -909,7 +909,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo, return false; #define compact(field) \ - brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src)) + brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src)) compact(opcode); @@ -921,7 +921,7 @@ brw_try_compact_3src_instruction(const struct brw_device_info *devinfo, compact(dst_reg_nr); compact(src0_rep_ctrl); - brw_compact_inst_set_3src_cmpt_control(dst, true); + brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true); compact(debug_control); compact(saturate); compact(src1_rep_ctrl); @@ -1003,36 +1003,52 @@ brw_try_compact_instruction(const struct brw_device_info *devinfo, memset(&temp, 0, sizeof(temp)); - brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src)); - brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src)); +#define compact(field) \ + brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src)) + + compact(opcode); + compact(debug_control); + if (!set_control_index(devinfo, &temp, src)) return false; if (!set_datatype_index(devinfo, &temp, src)) return false; if (!set_subreg_index(devinfo, &temp, src, is_immediate)) return false; - brw_compact_inst_set_acc_wr_control(&temp, - brw_inst_acc_wr_control(devinfo, src)); - brw_compact_inst_set_cond_modifier(&temp, - brw_inst_cond_modifier(devinfo, src)); + + if (devinfo->gen >= 6) { + compact(acc_wr_control); + } else { + compact(mask_control_ex); + } + + compact(cond_modifier); + if (devinfo->gen <= 6) - brw_compact_inst_set_flag_subreg_nr(&temp, - brw_inst_flag_subreg_nr(devinfo, src)); - brw_compact_inst_set_cmpt_control(&temp, true); + compact(flag_subreg_nr); + + brw_compact_inst_set_cmpt_control(devinfo, &temp, true); + if (!set_src0_index(devinfo, &temp, src)) return false; if (!set_src1_index(devinfo, &temp, src, is_immediate)) return false; - brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src)); - brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src)); + + brw_compact_inst_set_dst_reg_nr(devinfo, &temp, + brw_inst_dst_da_reg_nr(devinfo, src)); + brw_compact_inst_set_src0_reg_nr(devinfo, &temp, + brw_inst_src0_da_reg_nr(devinfo, src)); + if (is_immediate) { - brw_compact_inst_set_src1_reg_nr(&temp, + brw_compact_inst_set_src1_reg_nr(devinfo, &temp, brw_inst_imm_ud(devinfo, src) & 0xff); } else { - brw_compact_inst_set_src1_reg_nr(&temp, + brw_compact_inst_set_src1_reg_nr(devinfo, &temp, brw_inst_src1_da_reg_nr(devinfo, src)); } +#undef compact + *dst = temp; return true; @@ -1043,7 +1059,7 @@ set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { uint32_t uncompacted = - control_index_table[brw_compact_inst_control_index(src)]; + control_index_table[brw_compact_inst_control_index(devinfo, src)]; if (devinfo->gen >= 8) { brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16)); @@ -1064,7 +1080,8 @@ static void set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { - uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)]; + uint32_t uncompacted = + datatype_table[brw_compact_inst_datatype_index(devinfo, src)]; if (devinfo->gen >= 8) { brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18)); @@ -1080,7 +1097,8 @@ static void set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { - uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)]; + uint16_t uncompacted = + subreg_table[brw_compact_inst_subreg_index(devinfo, src)]; brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10)); brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f); @@ -1091,7 +1109,7 @@ static void set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { - uint32_t compacted = brw_compact_inst_src0_index(src); + uint32_t compacted = brw_compact_inst_src0_index(devinfo, src); uint16_t uncompacted = src_index_table[compacted]; brw_inst_set_bits(dst, 88, 77, uncompacted); @@ -1102,11 +1120,12 @@ set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src, bool is_immediate) { if (is_immediate) { - signed high5 = brw_compact_inst_src1_index(src); + signed high5 = brw_compact_inst_src1_index(devinfo, src); /* Replicate top bit of src1_index into high 20 bits of the immediate. */ brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19); } else { - uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)]; + uint16_t uncompacted = + src_index_table[brw_compact_inst_src1_index(devinfo, src)]; brw_inst_set_bits(dst, 120, 109, uncompacted); } @@ -1118,7 +1137,7 @@ set_uncompacted_3src_control_index(const struct brw_device_info *devinfo, { assert(devinfo->gen >= 8); - uint32_t compacted = brw_compact_inst_3src_control_index(src); + uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src); uint32_t uncompacted = gen8_3src_control_index_table[compacted]; brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7); @@ -1134,7 +1153,7 @@ set_uncompacted_3src_source_index(const struct brw_device_info *devinfo, { assert(devinfo->gen >= 8); - uint32_t compacted = brw_compact_inst_3src_source_index(src); + uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src); uint64_t uncompacted = gen8_3src_source_index_table[compacted]; brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1); @@ -1160,7 +1179,7 @@ brw_uncompact_3src_instruction(const struct brw_device_info *devinfo, assert(devinfo->gen >= 8); #define uncompact(field) \ - brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src)) + brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src)) uncompact(opcode); @@ -1190,13 +1209,16 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst, { memset(dst, 0, sizeof(*dst)); - if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) { + if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(devinfo, src))) { brw_uncompact_3src_instruction(devinfo, dst, src); return; } - brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src)); - brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src)); +#define uncompact(field) \ + brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src)) + + uncompact(opcode); + uncompact(debug_control); set_uncompacted_control(devinfo, dst, src); set_uncompacted_datatype(devinfo, dst, src); @@ -1206,22 +1228,36 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst, brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE; set_uncompacted_subreg(devinfo, dst, src); - brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src)); - brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src)); + + if (devinfo->gen >= 6) { + uncompact(acc_wr_control); + } else { + uncompact(mask_control_ex); + } + + uncompact(cond_modifier); + if (devinfo->gen <= 6) - brw_inst_set_flag_subreg_nr(devinfo, dst, - brw_compact_inst_flag_subreg_nr(src)); + uncompact(flag_subreg_nr); + set_uncompacted_src0(devinfo, dst, src); set_uncompacted_src1(devinfo, dst, src, is_immediate); - brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src)); - brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src)); + + brw_inst_set_dst_da_reg_nr(devinfo, dst, + brw_compact_inst_dst_reg_nr(devinfo, src)); + brw_inst_set_src0_da_reg_nr(devinfo, dst, + brw_compact_inst_src0_reg_nr(devinfo, src)); + if (is_immediate) { brw_inst_set_imm_ud(devinfo, dst, brw_inst_imm_ud(devinfo, dst) | - brw_compact_inst_src1_reg_nr(src)); + brw_compact_inst_src1_reg_nr(devinfo, src)); } else { - brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src)); + brw_inst_set_src1_da_reg_nr(devinfo, dst, + brw_compact_inst_src1_reg_nr(devinfo, src)); } + +#undef uncompact } void brw_debug_compact_uncompact(const struct brw_device_info *devinfo, @@ -1415,8 +1451,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset, if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){ brw_compact_inst *align = store + offset; memset(align, 0, sizeof(*align)); - brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP); - brw_compact_inst_set_cmpt_control(align, true); + brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP); + brw_compact_inst_set_cmpt_control(devinfo, align, true); offset += sizeof(brw_compact_inst); compacted_count--; compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count; @@ -1524,8 +1560,8 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset, if (p->next_insn_offset & sizeof(brw_compact_inst)) { brw_compact_inst *align = store + offset; memset(align, 0, sizeof(*align)); - brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP); - brw_compact_inst_set_cmpt_control(align, true); + brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP); + brw_compact_inst_set_cmpt_control(devinfo, align, true); p->next_insn_offset += sizeof(brw_compact_inst); } p->nr_insn = p->next_insn_offset / sizeof(brw_inst); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0562c5a9981..8320cd77299 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -281,6 +281,10 @@ fs_inst::is_send_from_grf() const case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: + case SHADER_OPCODE_URB_READ_SIMD8: return true; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return src[1].file == GRF; @@ -782,6 +786,10 @@ fs_inst::regs_read(int arg) const switch (opcode) { case FS_OPCODE_FB_WRITE: case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: + case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: @@ -911,6 +919,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case FS_OPCODE_INTERPOLATE_AT_CENTROID: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: @@ -2239,13 +2250,15 @@ fs_visitor::opt_sampler_eot() if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex()) return false; - /* This optimisation doesn't seem to work for textureGather for some - * reason. I can't find any documentation or known workarounds to indicate - * that this is expected, but considering that it is probably pretty - * unlikely that a shader would directly write out the results from - * textureGather we might as well just disable it. + /* 3D Sampler » Messages » Message Format + * + * “Response Length of zero is allowed on all SIMD8* and SIMD16* sampler + * messages except sample+killpix, resinfo, sampleinfo, LOD, and gather4*” */ - if (tex_inst->opcode == SHADER_OPCODE_TG4 || + if (tex_inst->opcode == SHADER_OPCODE_TXS || + tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO || + tex_inst->opcode == SHADER_OPCODE_LOD || + tex_inst->opcode == SHADER_OPCODE_TG4 || tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET) return false; @@ -2457,7 +2470,7 @@ fs_visitor::compute_to_mrf() /* Found a move of a GRF to a MRF. Let's see if we can go * rewrite the thing that made this GRF to write into the MRF. */ - foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) { + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->dst.file == GRF && scan_inst->dst.reg == inst->src[0].reg) { /* Found the last thing to write our reg we want to turn @@ -2805,7 +2818,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, * we assume that there are no outstanding dependencies on entry to the * program. */ - foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) { + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { /* If we hit control flow, assume that there *are* outstanding * dependencies, and force their cleanup before our instruction. */ @@ -2871,7 +2884,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins /* Walk forwards looking for writes to registers we're writing which aren't * read before being written. */ - foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst, block) { + foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst) { /* If we hit control flow, force resolve all remaining dependencies. */ if (block->end() == scan_inst) { for (int i = 0; i < write_len; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 171338dcc0b..50e98becf03 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -62,6 +62,8 @@ namespace brw { class fs_live_variables; } +struct brw_gs_compile; + static inline fs_reg offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta) { @@ -99,7 +101,12 @@ public: const nir_shader *shader, unsigned dispatch_width, int shader_time_index); - + fs_visitor(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + struct brw_gs_compile *gs_compile, + struct brw_gs_prog_data *prog_data, + const nir_shader *shader); + void init(); ~fs_visitor(); fs_reg vgrf(const glsl_type *const type); @@ -298,6 +305,8 @@ public: const void *const key; const struct brw_sampler_prog_key_data *key_tex; + struct brw_gs_compile *gs_compile; + struct brw_stage_prog_data *prog_data; struct gl_program *prog; @@ -415,6 +424,7 @@ private: struct brw_reg implied_header, GLuint nr); void generate_fb_write(fs_inst *inst, struct brw_reg payload); + void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); void generate_barrier(fs_inst *inst, struct brw_reg src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp index 469f2ea4e16..883e8d2a49f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp @@ -87,8 +87,7 @@ opt_cmod_propagation_local(bblock_t *block) continue; bool read_flag = false; - foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, - block) { + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->overwrites_reg(inst->src[0])) { if (scan_inst->is_partial_write() || scan_inst->dst.reg_offset != inst->src[0].reg_offset) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 13c495cd395..bb7e792044f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -354,6 +354,28 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) } } +void +fs_generator::generate_urb_read(fs_inst *inst, + struct brw_reg dst, + struct brw_reg header) +{ + assert(header.file == BRW_GENERAL_REGISTER_FILE); + assert(header.type == BRW_REGISTER_TYPE_UD); + + brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, header); + brw_set_src1(p, send, brw_imm_ud(0u)); + + brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB); + brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ); + + brw_inst_set_mlen(p->devinfo, send, inst->mlen); + brw_inst_set_rlen(p->devinfo, send, inst->regs_written); + brw_inst_set_header_present(p->devinfo, send, true); + brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset); +} + void fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload) { @@ -368,6 +390,14 @@ fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload) brw_inst_set_sfid(p->devinfo, insn, BRW_SFID_URB); brw_inst_set_urb_opcode(p->devinfo, insn, GEN8_URB_OPCODE_SIMD8_WRITE); + if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT || + inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) + brw_inst_set_urb_per_slot_offset(p->devinfo, insn, true); + + if (inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED || + inst->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) + brw_inst_set_urb_channel_mask_present(p->devinfo, insn, true); + brw_inst_set_mlen(p->devinfo, insn, inst->mlen); brw_inst_set_rlen(p->devinfo, insn, 0); brw_inst_set_eot(p->devinfo, insn, inst->eot); @@ -2001,7 +2031,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count++; break; + case SHADER_OPCODE_URB_READ_SIMD8: + generate_urb_read(inst, dst, src[0]); + break; + case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: generate_urb_write(inst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index feedbfbb2e3..7b5a0482519 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -30,6 +30,7 @@ #include "brw_fs_surface_builder.h" #include "brw_nir.h" #include "brw_fs_surface_builder.h" +#include "brw_vec4_gs_visitor.h" using namespace brw; using namespace brw::surface_access; @@ -188,6 +189,18 @@ emit_system_values_block(nir_block *block, void *void_visitor) *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID); break; + case nir_intrinsic_load_invocation_id: + assert(v->stage == MESA_SHADER_GEOMETRY); + reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; + if (reg->file == BAD_FILE) { + const fs_builder abld = v->bld.annotate("gl_InvocationID", NULL); + fs_reg g1(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + fs_reg iid = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); + abld.SHR(iid, g1, fs_reg(27u)); + *reg = iid; + } + break; + case nir_intrinsic_load_sample_pos: assert(v->stage == MESA_SHADER_FRAGMENT); reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS]; @@ -1367,9 +1380,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_load_vertex_id: unreachable("should be lowered by lower_vertex_id()"); + case nir_intrinsic_load_primitive_id: + assert(stage == MESA_SHADER_GEOMETRY); + assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), + retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); + break; + case nir_intrinsic_load_vertex_id_zero_base: case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_invocation_id: case nir_intrinsic_load_sample_mask_in: case nir_intrinsic_load_sample_id: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index 8792a8c7b1d..862e3245d43 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -64,7 +64,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) int src_end_ip = v->live_intervals->end[src_var]; bool interfered = false; - foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) { + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->overwrites_reg(inst->src[0])) { if (scan_inst->is_partial_write() || (scan_inst->dst.type != inst->dst.type && diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index f825fed4daf..7cc4f3c927a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -41,6 +41,7 @@ #include "brw_wm.h" #include "brw_cs.h" #include "brw_vec4.h" +#include "brw_vec4_gs_visitor.h" #include "brw_fs.h" #include "main/uniforms.h" #include "glsl/nir/glsl_types.h" @@ -868,13 +869,14 @@ void fs_visitor::emit_urb_writes() { int slot, urb_offset, length; - struct brw_vs_prog_data *vs_prog_data = - (struct brw_vs_prog_data *) prog_data; - const struct brw_vs_prog_key *key = + int starting_urb_offset = 0; + const struct brw_vue_prog_data *vue_prog_data = + (const struct brw_vue_prog_data *) this->prog_data; + const struct brw_vs_prog_key *vs_key = (const struct brw_vs_prog_key *) this->key; const GLbitfield64 psiz_mask = VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ; - const struct brw_vue_map *vue_map = &vs_prog_data->base.vue_map; + const struct brw_vue_map *vue_map = &vue_prog_data->vue_map; bool flush; fs_reg sources[8]; @@ -900,8 +902,21 @@ fs_visitor::emit_urb_writes() return; } + if (stage == MESA_SHADER_GEOMETRY) { + const struct brw_gs_prog_data *gs_prog_data = + (const struct brw_gs_prog_data *) prog_data; + + /* We need to increment the Global Offset to skip over the control data + * header and the extra "Vertex Count" field (1 HWord) at the beginning + * of the VUE. We're counting in OWords, so the units are doubled. + */ + starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords; + if (gs_prog_data->static_vertex_count == -1) + starting_urb_offset += 2; + } + length = 0; - urb_offset = 0; + urb_offset = starting_urb_offset; flush = false; for (slot = 0; slot < vue_map->num_slots; slot++) { int varying = vue_map->slot_to_varying[slot]; @@ -961,11 +976,11 @@ fs_visitor::emit_urb_writes() break; } - if ((varying == VARYING_SLOT_COL0 || + if (stage == MESA_SHADER_VERTEX && vs_key->clamp_vertex_color && + (varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1 || varying == VARYING_SLOT_BFC0 || - varying == VARYING_SLOT_BFC1) && - key->clamp_vertex_color) { + varying == VARYING_SLOT_BFC1)) { /* We need to clamp these guys, so do a saturating MOV into a * temp register and use that for the payload. */ @@ -1005,10 +1020,10 @@ fs_visitor::emit_urb_writes() fs_inst *inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); - inst->eot = last; + inst->eot = last && stage == MESA_SHADER_VERTEX; inst->mlen = length + 1; inst->offset = urb_offset; - urb_offset = slot + 1; + urb_offset = starting_urb_offset + slot + 1; length = 0; flush = false; } @@ -1071,11 +1086,33 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, unsigned dispatch_width, int shader_time_index) : backend_shader(compiler, log_data, mem_ctx, shader, prog_data), - key(key), prog_data(prog_data), prog(prog), + key(key), gs_compile(NULL), prog_data(prog_data), prog(prog), dispatch_width(dispatch_width), shader_time_index(shader_time_index), - promoted_constants(0), bld(fs_builder(this, dispatch_width).at_end()) +{ + init(); +} + +fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + struct brw_gs_compile *c, + struct brw_gs_prog_data *prog_data, + const nir_shader *shader) + : backend_shader(compiler, log_data, mem_ctx, shader, + &prog_data->base.base), + key(&c->key), gs_compile(c), + prog_data(&prog_data->base.base), prog(NULL), + dispatch_width(8), + shader_time_index(ST_GS), + bld(fs_builder(this, dispatch_width).at_end()) +{ + init(); +} + + +void +fs_visitor::init() { switch (stage) { case MESA_SHADER_FRAGMENT: @@ -1094,6 +1131,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, unreachable("unhandled shader stage"); } + this->prog_data = this->stage_prog_data; + this->failed = false; this->simd16_unsupported = false; this->no16_msg = NULL; @@ -1119,6 +1158,8 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, this->pull_constant_loc = NULL; this->push_constant_loc = NULL; + this->promoted_constants = 0, + this->spilled_any_registers = false; this->do_dual_src = false; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 10a7f28fdab..ed0890f430f 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -57,20 +57,14 @@ brw_codegen_gs_prog(struct brw_context *brw, struct brw_geometry_program *gp, struct brw_gs_prog_key *key) { + struct brw_compiler *compiler = brw->intelScreen->compiler; struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; struct brw_stage_state *stage_state = &brw->gs.base; - struct brw_gs_compile c; - memset(&c, 0, sizeof(c)); - c.key = *key; - c.gp = gp; - - c.prog_data.include_primitive_id = - (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; - - c.prog_data.invocations = gp->program.Invocations; + struct brw_gs_prog_data prog_data; + memset(&prog_data, 0, sizeof(prog_data)); assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog, - &gp->program.Base, &c.prog_data); + &gp->program.Base, &prog_data); /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed @@ -83,215 +77,24 @@ brw_codegen_gs_prog(struct brw_context *brw, struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; int param_count = gp->program.Base.nir->num_uniforms * 4; - c.prog_data.base.base.param = + prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); - c.prog_data.base.base.pull_param = + prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); - c.prog_data.base.base.image_param = + prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, gs->NumImages); - c.prog_data.base.base.nr_params = param_count; - c.prog_data.base.base.nr_image_params = gs->NumImages; + prog_data.base.base.nr_params = param_count; + prog_data.base.base.nr_image_params = gs->NumImages; brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, - &c.prog_data.base.base, false); - - if (brw->gen >= 8) { - c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 : - nir_gs_count_vertices(gp->program.Base.nir); - } - - if (brw->gen >= 7) { - if (gp->program.OutputType == GL_POINTS) { - /* When the output type is points, the geometry shader may output data - * to multiple streams, and EndPrimitive() has no effect. So we - * configure the hardware to interpret the control data as stream ID. - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; - - /* We only have to emit control bits if we are using streams */ - if (prog->Geom.UsesStreams) - c.control_data_bits_per_vertex = 2; - else - c.control_data_bits_per_vertex = 0; - } else { - /* When the output type is triangle_strip or line_strip, EndPrimitive() - * may be used to terminate the current strip and start a new one - * (similar to primitive restart), and outputting data to multiple - * streams is not supported. So we configure the hardware to interpret - * the control data as EndPrimitive information (a.k.a. "cut bits"). - */ - c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; - - /* We only need to output control data if the shader actually calls - * EndPrimitive(). - */ - c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; - } - } else { - /* There are no control data bits in gen6. */ - c.control_data_bits_per_vertex = 0; - - /* If it is using transform feedback, enable it */ - if (prog->TransformFeedback.NumVarying) - c.prog_data.gen6_xfb_enabled = true; - else - c.prog_data.gen6_xfb_enabled = false; - } - c.control_data_header_size_bits = - gp->program.VerticesOut * c.control_data_bits_per_vertex; - - /* 1 HWORD = 32 bytes = 256 bits */ - c.prog_data.control_data_header_size_hwords = - ALIGN(c.control_data_header_size_bits, 256) / 256; + &prog_data.base.base, compiler->scalar_gs); GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; brw_compute_vue_map(brw->intelScreen->devinfo, - &c.prog_data.base.vue_map, outputs_written, + &prog_data.base.vue_map, outputs_written, prog ? prog->SeparateShader : false); - /* Compute the output vertex size. - * - * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex - * Size (p168): - * - * [0,62] indicating [1,63] 16B units - * - * Specifies the size of each vertex stored in the GS output entry - * (following any Control Header data) as a number of 128-bit units - * (minus one). - * - * Programming Restrictions: The vertex size must be programmed as a - * multiple of 32B units with the following exception: Rendering is - * disabled (as per SOL stage state) and the vertex size output by the - * GS thread is 16B. - * - * If rendering is enabled (as per SOL state) the vertex size must be - * programmed as a multiple of 32B units. In other words, the only time - * software can program a vertex size with an odd number of 16B units - * is when rendering is disabled. - * - * Note: B=bytes in the above text. - * - * It doesn't seem worth the extra trouble to optimize the case where the - * vertex size is 16B (especially since this would require special-casing - * the GEN assembly that writes to the URB). So we just set the vertex - * size to a multiple of 32B (2 vec4's) in all cases. - * - * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We - * budget that as follows: - * - * 512 bytes for varyings (a varying component is 4 bytes and - * gl_MaxGeometryOutputComponents = 128) - * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 - * bytes) - * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE - * even if it's not used) - * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots - * whenever clip planes are enabled, even if the shader doesn't - * write to gl_ClipDistance) - * 16 bytes overhead since the VUE size must be a multiple of 32 bytes - * (see below)--this causes up to 1 VUE slot to be wasted - * 400 bytes available for varying packing overhead - * - * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) - * per interpolation type, so this is plenty. - * - */ - unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16; - assert(brw->gen == 6 || - output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); - c.prog_data.output_vertex_size_hwords = - ALIGN(output_vertex_size_bytes, 32) / 32; - - /* Compute URB entry size. The maximum allowed URB entry size is 32k. - * That divides up as follows: - * - * 64 bytes for the control data header (cut indices or StreamID bits) - * 4096 bytes for varyings (a varying component is 4 bytes and - * gl_MaxGeometryTotalOutputComponents = 1024) - * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 - * bytes/vertex and gl_MaxGeometryOutputVertices is 256) - * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE - * even if it's not used) - * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots - * whenever clip planes are enabled, even if the shader doesn't - * write to gl_ClipDistance) - * 4096 bytes overhead since the VUE size must be a multiple of 32 - * bytes (see above)--this causes up to 1 VUE slot to be wasted - * 8128 bytes available for varying packing overhead - * - * Worst-case varying packing overhead is 3/4 of a varying slot per - * interpolation type, which works out to 3072 bytes, so this would allow - * us to accommodate 2 interpolation types without any danger of running - * out of URB space. - * - * In practice, the risk of running out of URB space is very small, since - * the above figures are all worst-case, and most of them scale with the - * number of output vertices. So we'll just calculate the amount of space - * we need, and if it's too large, fail to compile. - * - * The above is for gen7+ where we have a single URB entry that will hold - * all the output. In gen6, we will have to allocate URB entries for every - * vertex we emit, so our URB entries only need to be large enough to hold - * a single vertex. Also, gen6 does not have a control data header. - */ - unsigned output_size_bytes; - if (brw->gen >= 7) { - output_size_bytes = - c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; - output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; - } else { - output_size_bytes = c.prog_data.output_vertex_size_hwords * 32; - } - - /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, - * which comes before the control header. - */ - if (brw->gen >= 8) - output_size_bytes += 32; - - assert(output_size_bytes >= 1); - int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; - if (brw->gen == 6) - max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; - if (output_size_bytes > max_output_size_bytes) - return false; - - - /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and - * a multiple of 128 bytes in gen6. - */ - if (brw->gen >= 7) - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - else - c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; - - c.prog_data.output_topology = - get_hw_prim_for_gl_prim(gp->program.OutputType); - - /* The GLSL linker will have already matched up GS inputs and the outputs - * of prior stages. The driver does extend VS outputs in some cases, but - * only for legacy OpenGL or Gen4-5 hardware, neither of which offer - * geometry shader support. So we can safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location making this work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - GLbitfield64 inputs_read = - gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(brw->intelScreen->devinfo, - &c.input_vue_map, inputs_read, - prog->SeparateShader); - - /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we - * need to program a URB read length of ceiling(num_slots / 2). - */ - c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; - if (unlikely(INTEL_DEBUG & DEBUG_GS)) brw_dump_ir("geometry", prog, gs, NULL); @@ -303,25 +106,25 @@ brw_codegen_gs_prog(struct brw_context *brw, unsigned program_size; char *error_str; const unsigned *program = - brw_compile_gs(brw->intelScreen->compiler, brw, &c, - shader->Program->nir, prog, - mem_ctx, st_index, &program_size, &error_str); + brw_compile_gs(brw->intelScreen->compiler, brw, mem_ctx, key, + &prog_data, shader->Program->nir, prog, + st_index, &program_size, &error_str); if (program == NULL) { ralloc_free(mem_ctx); return false; } /* Scratch space is used for register spilling */ - if (c.prog_data.base.base.total_scratch) { + if (prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &stage_state->scratch_bo, - c.prog_data.base.base.total_scratch * + prog_data.base.base.total_scratch * brw->max_gs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, - &c.key, sizeof(c.key), + key, sizeof(*key), program, program_size, - &c.prog_data, sizeof(c.prog_data), + &prog_data, sizeof(prog_data), &stage_state->prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index ab37b709d65..4ed95c473cd 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -181,7 +181,8 @@ F(saturate, 31, 31) F(debug_control, 30, 30) F(cmpt_control, 29, 29) FC(branch_control, 28, 28, devinfo->gen >= 8) -F(acc_wr_control, 28, 28) +FC(acc_wr_control, 28, 28, devinfo->gen >= 6) +FC(mask_control_ex, 28, 28, devinfo->is_g4x || devinfo->gen == 5) F(cond_modifier, 27, 24) FC(math_function, 27, 24, devinfo->gen >= 6) F(exec_size, 23, 21) @@ -392,6 +393,7 @@ FF(urb_per_slot_offset, /* 4-6: */ -1, -1, -1, -1, -1, -1, -1, -1, /* 7: */ MD(16), MD(16), /* 8: */ MD(17), MD(17)) +FC(urb_channel_mask_present, MD(15), MD(15), devinfo->gen >= 8) FC(urb_complete, MD(15), MD(15), devinfo->gen < 8) FC(urb_used, MD(14), MD(14), devinfo->gen < 7) FC(urb_allocate, MD(13), MD(13), devinfo->gen < 7) @@ -738,7 +740,7 @@ typedef struct { * Bits indices range from 0..63. */ static inline unsigned -brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low) +brw_compact_inst_bits(const brw_compact_inst *inst, unsigned high, unsigned low) { const uint64_t mask = (1ull << (high - low + 1)) - 1; @@ -762,56 +764,65 @@ brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low, inst->data = (inst->data & ~mask) | (value << low); } -#define F(name, high, low) \ -static inline void \ -brw_compact_inst_set_##name(brw_compact_inst *inst, unsigned v) \ -{ \ - brw_compact_inst_set_bits(inst, high, low, v); \ -} \ - \ -static inline unsigned \ -brw_compact_inst_##name(brw_compact_inst *inst) \ -{ \ - return brw_compact_inst_bits(inst, high, low); \ +#define FC(name, high, low, assertions) \ +static inline void \ +brw_compact_inst_set_##name(const struct brw_device_info *devinfo, \ + brw_compact_inst *inst, unsigned v) \ +{ \ + assert(assertions); \ + (void) devinfo; \ + brw_compact_inst_set_bits(inst, high, low, v); \ +} \ +static inline unsigned \ +brw_compact_inst_##name(const struct brw_device_info *devinfo, \ + const brw_compact_inst *inst) \ +{ \ + assert(assertions); \ + (void) devinfo; \ + return brw_compact_inst_bits(inst, high, low); \ } -F(src1_reg_nr, 63, 56) -F(src0_reg_nr, 55, 48) -F(dst_reg_nr, 47, 40) -F(src1_index, 39, 35) -F(src0_index, 34, 30) -F(cmpt_control, 29, 29) /* Same location as brw_inst */ -F(flag_subreg_nr, 28, 28) /* <= Gen6 only */ -F(cond_modifier, 27, 24) /* Same location as brw_inst */ -F(acc_wr_control, 23, 23) -F(subreg_index, 22, 18) -F(datatype_index, 17, 13) -F(control_index, 12, 8) -F(debug_control, 7, 7) -F(opcode, 6, 0) /* Same location as brw_inst */ +/* A simple macro for fields which stay in the same place on all generations. */ +#define F(name, high, low) FC(name, high, low, true) + +F(src1_reg_nr, 63, 56) +F(src0_reg_nr, 55, 48) +F(dst_reg_nr, 47, 40) +F(src1_index, 39, 35) +F(src0_index, 34, 30) +F(cmpt_control, 29, 29) /* Same location as brw_inst */ +FC(flag_subreg_nr, 28, 28, devinfo->gen <= 6) +F(cond_modifier, 27, 24) /* Same location as brw_inst */ +FC(acc_wr_control, 23, 23, devinfo->gen >= 6) +FC(mask_control_ex, 23, 23, devinfo->is_g4x || devinfo->gen == 5) +F(subreg_index, 22, 18) +F(datatype_index, 17, 13) +F(control_index, 12, 8) +F(debug_control, 7, 7) +F(opcode, 6, 0) /* Same location as brw_inst */ /** * (Gen8+) Compacted three-source instructions: * @{ */ -F(3src_src2_reg_nr, 63, 57) -F(3src_src1_reg_nr, 56, 50) -F(3src_src0_reg_nr, 49, 43) -F(3src_src2_subreg_nr, 42, 40) -F(3src_src1_subreg_nr, 39, 37) -F(3src_src0_subreg_nr, 36, 34) -F(3src_src2_rep_ctrl, 33, 33) -F(3src_src1_rep_ctrl, 32, 32) -F(3src_saturate, 31, 31) -F(3src_debug_control, 30, 30) -F(3src_cmpt_control, 29, 29) -F(3src_src0_rep_ctrl, 28, 28) +FC(3src_src2_reg_nr, 63, 57, devinfo->gen >= 8) +FC(3src_src1_reg_nr, 56, 50, devinfo->gen >= 8) +FC(3src_src0_reg_nr, 49, 43, devinfo->gen >= 8) +FC(3src_src2_subreg_nr, 42, 40, devinfo->gen >= 8) +FC(3src_src1_subreg_nr, 39, 37, devinfo->gen >= 8) +FC(3src_src0_subreg_nr, 36, 34, devinfo->gen >= 8) +FC(3src_src2_rep_ctrl, 33, 33, devinfo->gen >= 8) +FC(3src_src1_rep_ctrl, 32, 32, devinfo->gen >= 8) +FC(3src_saturate, 31, 31, devinfo->gen >= 8) +FC(3src_debug_control, 30, 30, devinfo->gen >= 8) +FC(3src_cmpt_control, 29, 29, devinfo->gen >= 8) +FC(3src_src0_rep_ctrl, 28, 28, devinfo->gen >= 8) /* Reserved */ -F(3src_dst_reg_nr, 18, 12) -F(3src_source_index, 11, 10) -F(3src_control_index, 9, 8) +FC(3src_dst_reg_nr, 18, 12, devinfo->gen >= 8) +FC(3src_source_index, 11, 10, devinfo->gen >= 8) +FC(3src_control_index, 9, 8, devinfo->gen >= 8) /* Bit 7 is Reserved (for future Opcode expansion) */ -F(3src_opcode, 6, 0) +FC(3src_opcode, 6, 0, devinfo->gen >= 8) /** @} */ #undef F diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 22b0227756e..6433dec9041 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -91,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, if (prog) { prog->id = get_new_program_id(brw->intelScreen); - return _mesa_init_gl_program(&prog->program, target, id); + return _mesa_init_gl_program(&prog->program.Base, target, id); } else { return NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6be2a6e5b55..e48f559afa7 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -79,6 +79,8 @@ is_scalar_shader_stage(const struct brw_compiler *compiler, int stage) case MESA_SHADER_FRAGMENT: case MESA_SHADER_COMPUTE: return true; + case MESA_SHADER_GEOMETRY: + return compiler->scalar_gs; case MESA_SHADER_VERTEX: return compiler->scalar_vs; default: @@ -101,6 +103,9 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS)) compiler->scalar_vs = true; + if (devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false)) + compiler->scalar_gs = true; + nir_shader_compiler_options *nir_options = rzalloc(compiler, nir_shader_compiler_options); nir_options->native_integers = true; @@ -411,6 +416,14 @@ brw_instruction_name(enum opcode op) return "gen7_scratch_read"; case SHADER_OPCODE_URB_WRITE_SIMD8: return "gen8_urb_write_simd8"; + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + return "gen8_urb_write_simd8_per_slot"; + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + return "gen8_urb_write_simd8_masked"; + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: + return "gen8_urb_write_simd8_masked_per_slot"; + case SHADER_OPCODE_URB_READ_SIMD8: + return "urb_read_simd8"; case SHADER_OPCODE_FIND_LIVE_CHANNEL: return "find_live_channel"; @@ -964,6 +977,9 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case FS_OPCODE_FB_WRITE: case SHADER_OPCODE_BARRIER: return true; diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 2e47690d403..8899b30c1ae 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -233,6 +233,18 @@ bool opt_predicated_break(struct backend_shader *s); extern "C" { #endif +/** + * Scratch data used when compiling a GLSL geometry shader. + */ +struct brw_gs_compile +{ + struct brw_gs_prog_key key; + struct brw_vue_map input_vue_map; + + unsigned control_data_bits_per_vertex; + unsigned control_data_header_size_bits; +}; + void brw_assign_common_binding_table_offsets(gl_shader_stage stage, const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 2955c8dcc2e..a2948293a62 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -40,36 +40,32 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE static unsigned int -tr_mode_horizontal_texture_alignment(const struct brw_context *brw, - const struct intel_mipmap_tree *mt) +tr_mode_horizontal_texture_alignment(const struct intel_mipmap_tree *mt) { - const unsigned *align_yf, *align_ys; - const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8; - unsigned ret_align, divisor; + unsigned ret_align, divisor, multiplier_ys; - /* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below - * tables specifies the horizontal alignment requirement in elements - * for the surface. An element is defined as a pixel in uncompressed - * surface formats, and as a compression block in compressed surface - * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an + /* Values in below tables specifiy the horizontal alignment requirement + * in elements for TRMODE_YF surface. An element is defined as a pixel in + * uncompressed surface formats, and as a compression block in compressed + * surface formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an * element is a sample. */ const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256}; - const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096}; const unsigned align_2d_yf[] = {64, 64, 32, 32, 16}; - const unsigned align_2d_ys[] = {256, 256, 128, 128, 64}; const unsigned align_3d_yf[] = {16, 8, 8, 8, 4}; - const unsigned align_3d_ys[] = {64, 32, 32, 32, 16}; - int i = 0; - /* Alignment computations below assume bpp >= 8 and a power of 2. */ - assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)); + assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE); + + /* Alignment computations below assume a power of 2 cpp. */ + assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp)); + /* Compute array index. */ + const int i = ffs(mt->cpp) - 1; switch(mt->target) { case GL_TEXTURE_1D: case GL_TEXTURE_1D_ARRAY: - align_yf = align_1d_yf; - align_ys = align_1d_ys; + ret_align = align_1d_yf[i]; + multiplier_ys = 16; break; case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE: @@ -78,22 +74,19 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw, case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - align_yf = align_2d_yf; - align_ys = align_2d_ys; + ret_align = align_2d_yf[i]; + multiplier_ys = 4; break; case GL_TEXTURE_3D: - align_yf = align_3d_yf; - align_ys = align_3d_ys; + ret_align = align_3d_yf[i]; + multiplier_ys = 4; break; default: unreachable("not reached"); } - /* Compute array index. */ - i = ffs(bpp/8) - 1; - - ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? - align_yf[i] : align_ys[i]; + if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS) + ret_align *= multiplier_ys; assert(_mesa_is_pow_two(mt->num_samples)); @@ -148,26 +141,20 @@ intel_horizontal_texture_alignment_unit(struct brw_context *brw, } static unsigned int -tr_mode_vertical_texture_alignment(const struct brw_context *brw, - const struct intel_mipmap_tree *mt) +tr_mode_vertical_texture_alignment(const struct intel_mipmap_tree *mt) { - const unsigned *align_yf, *align_ys; - const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8; - unsigned ret_align, divisor; + unsigned ret_align, divisor, multiplier_ys; - /* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */ + /* Vertical alignment tables for TRMODE_YF */ const unsigned align_2d_yf[] = {64, 32, 32, 16, 16}; - const unsigned align_2d_ys[] = {256, 128, 128, 64, 64}; const unsigned align_3d_yf[] = {16, 16, 16, 8, 8}; - const unsigned align_3d_ys[] = {32, 32, 32, 16, 16}; - int i = 0; - assert(brw->gen >= 9 && - mt->target != GL_TEXTURE_1D && - mt->target != GL_TEXTURE_1D_ARRAY); + assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE); - /* Alignment computations below assume bpp >= 8 and a power of 2. */ - assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ; + /* Alignment computations below assume a power of 2 cpp. */ + assert (mt->cpp >= 1 && mt->cpp <= 16 && _mesa_is_pow_two(mt->cpp)) ; + /* Compute array index. */ + const int i = ffs(mt->cpp) - 1; switch(mt->target) { case GL_TEXTURE_2D: @@ -177,22 +164,21 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw, case GL_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - align_yf = align_2d_yf; - align_ys = align_2d_ys; + ret_align = align_2d_yf[i]; + multiplier_ys = 4; break; case GL_TEXTURE_3D: - align_yf = align_3d_yf; - align_ys = align_3d_ys; + ret_align = align_3d_yf[i]; + multiplier_ys = 2; break; + case GL_TEXTURE_1D: + case GL_TEXTURE_1D_ARRAY: default: - unreachable("not reached"); + unreachable("Unexpected miptree target"); } - /* Compute array index. */ - i = ffs(bpp / 8) - 1; - - ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? - align_yf[i] : align_ys[i]; + if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS) + ret_align *= multiplier_ys; assert(_mesa_is_pow_two(mt->num_samples)); @@ -779,8 +765,8 @@ intel_miptree_set_alignment(struct brw_context *brw, } else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or * vertical alignment < 64. */ - mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32); - mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64); + mt->halign = MAX2(tr_mode_horizontal_texture_alignment(mt), 32); + mt->valign = MAX2(tr_mode_vertical_texture_alignment(mt), 64); } else { mt->halign = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index befc92445d3..3e7078d0b32 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1111,7 +1111,7 @@ vec4_visitor::opt_register_coalesce() */ vec4_instruction *_scan_inst = (vec4_instruction *)inst->prev; foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, - inst, block) { + inst) { _scan_inst = scan_inst; if (inst->src[0].in_range(scan_inst->dst, scan_inst->regs_written)) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index 1b929b3df2c..6bc39473137 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -104,7 +104,7 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; case nir_intrinsic_load_primitive_id: - assert(c->prog_data.include_primitive_id); + assert(gs_prog_data->include_primitive_id); dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D))); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index a715cf5a6cb..9402489e628 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -35,14 +35,16 @@ namespace brw { vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, + struct brw_gs_prog_data *prog_data, const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) : vec4_visitor(compiler, log_data, &c->key.tex, - &c->prog_data.base, shader, mem_ctx, + &prog_data->base, shader, mem_ctx, no_spills, shader_time_index), - c(c) + c(c), + gs_prog_data(prog_data) { } @@ -78,9 +80,9 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, * so the total number of input slots that will be delivered to the GS (and * thus the stride of the input arrays) is urb_read_length * 2. */ - const unsigned num_input_vertices = c->gp->program.VerticesIn; + const unsigned num_input_vertices = nir->info.gs.vertices_in; assert(num_input_vertices <= MAX_GS_INPUT_VERTICES); - unsigned input_array_stride = c->prog_data.base.urb_read_length * 2; + unsigned input_array_stride = prog_data->urb_read_length * 2; for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) { int varying = c->input_vue_map.slot_to_varying[slot]; @@ -106,7 +108,7 @@ vec4_gs_visitor::setup_payload() * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = - c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; + prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; /* If a geometry shader tries to read from an input that wasn't written by * the vertex shader, that produces undefined results, but it shouldn't @@ -124,7 +126,7 @@ vec4_gs_visitor::setup_payload() reg++; /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */ - if (c->prog_data.include_primitive_id) + if (gs_prog_data->include_primitive_id) attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++; reg = setup_uniforms(reg); @@ -182,9 +184,9 @@ vec4_gs_visitor::emit_prolog() * to account for the fact that the vertex shader stored it in the w * component of VARYING_SLOT_PSIZ. */ - if (c->gp->program.Base.InputsRead & VARYING_BIT_PSIZ) { + if (nir->info.inputs_read & VARYING_BIT_PSIZ) { this->current_annotation = "swizzle gl_PointSize input"; - for (int vertex = 0; vertex < c->gp->program.VerticesIn; vertex++) { + for (int vertex = 0; vertex < (int)nir->info.gs.vertices_in; vertex++) { dst_reg dst(ATTR, BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ); dst.type = BRW_REGISTER_TYPE_F; @@ -222,7 +224,7 @@ vec4_gs_visitor::emit_thread_end() */ int base_mrf = 1; - bool static_vertex_count = c->prog_data.static_vertex_count != -1; + bool static_vertex_count = gs_prog_data->static_vertex_count != -1; /* If the previous instruction was a URB write, we don't need to issue * a second one - we can just set the EOT bit on the previous write. @@ -271,7 +273,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf) vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count, - (uint32_t) c->prog_data.output_vertex_size_hwords); + (uint32_t) gs_prog_data->output_vertex_size_hwords); } @@ -285,12 +287,12 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) (void) complete; vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE); - inst->offset = c->prog_data.control_data_header_size_hwords; + inst->offset = gs_prog_data->control_data_header_size_hwords; /* We need to increment Global Offset by 1 to make room for Broadwell's * extra "Vertex Count" payload at the beginning of the URB entry. */ - if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1) + if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1) inst->offset++; inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; @@ -409,7 +411,7 @@ vec4_gs_visitor::emit_control_data_bits() * URB entry. Since this is an OWord message, Global Offset is counted * in 128-bit units, so we must set it to 2. */ - if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1) + if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1) inst->offset = 2; inst->base_mrf = base_mrf; inst->mlen = 2; @@ -536,7 +538,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) * do for GL_POINTS outputs that don't use streams). */ if (c->control_data_header_size_bits > 0 && - c->prog_data.control_data_format == + gs_prog_data->control_data_format == GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { this->current_annotation = "emit vertex: Stream control data bits"; set_stream_control_data_bits(stream_id); @@ -552,7 +554,7 @@ vec4_gs_visitor::gs_end_primitive() * consists of cut bits. Fortunately, the only time it isn't is when the * output type is points, in which case EndPrimitive() is a no-op. */ - if (c->prog_data.control_data_format != + if (gs_prog_data->control_data_format != GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) { return; } @@ -598,27 +600,231 @@ vec4_gs_visitor::gs_end_primitive() extern "C" const unsigned * brw_compile_gs(const struct brw_compiler *compiler, void *log_data, - struct brw_gs_compile *c, + void *mem_ctx, + const struct brw_gs_prog_key *key, + struct brw_gs_prog_data *prog_data, const nir_shader *shader, struct gl_shader_program *shader_prog, - void *mem_ctx, int shader_time_index, unsigned *final_assembly_size, char **error_str) { + struct brw_gs_compile c; + memset(&c, 0, sizeof(c)); + c.key = *key; + + prog_data->include_primitive_id = + (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; + + prog_data->invocations = shader->info.gs.invocations; + + if (compiler->devinfo->gen >= 8) + prog_data->static_vertex_count = nir_gs_count_vertices(shader); + + if (compiler->devinfo->gen >= 7) { + if (shader->info.gs.output_primitive == GL_POINTS) { + /* When the output type is points, the geometry shader may output data + * to multiple streams, and EndPrimitive() has no effect. So we + * configure the hardware to interpret the control data as stream ID. + */ + prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; + + /* We only have to emit control bits if we are using streams */ + if (shader_prog && shader_prog->Geom.UsesStreams) + c.control_data_bits_per_vertex = 2; + else + c.control_data_bits_per_vertex = 0; + } else { + /* When the output type is triangle_strip or line_strip, EndPrimitive() + * may be used to terminate the current strip and start a new one + * (similar to primitive restart), and outputting data to multiple + * streams is not supported. So we configure the hardware to interpret + * the control data as EndPrimitive information (a.k.a. "cut bits"). + */ + prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; + + /* We only need to output control data if the shader actually calls + * EndPrimitive(). + */ + c.control_data_bits_per_vertex = + shader->info.gs.uses_end_primitive ? 1 : 0; + } + } else { + /* There are no control data bits in gen6. */ + c.control_data_bits_per_vertex = 0; + + /* If it is using transform feedback, enable it */ + if (shader->info.has_transform_feedback_varyings) + prog_data->gen6_xfb_enabled = true; + else + prog_data->gen6_xfb_enabled = false; + } + c.control_data_header_size_bits = + shader->info.gs.vertices_out * c.control_data_bits_per_vertex; + + /* 1 HWORD = 32 bytes = 256 bits */ + prog_data->control_data_header_size_hwords = + ALIGN(c.control_data_header_size_bits, 256) / 256; + + /* Compute the output vertex size. + * + * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex + * Size (p168): + * + * [0,62] indicating [1,63] 16B units + * + * Specifies the size of each vertex stored in the GS output entry + * (following any Control Header data) as a number of 128-bit units + * (minus one). + * + * Programming Restrictions: The vertex size must be programmed as a + * multiple of 32B units with the following exception: Rendering is + * disabled (as per SOL stage state) and the vertex size output by the + * GS thread is 16B. + * + * If rendering is enabled (as per SOL state) the vertex size must be + * programmed as a multiple of 32B units. In other words, the only time + * software can program a vertex size with an odd number of 16B units + * is when rendering is disabled. + * + * Note: B=bytes in the above text. + * + * It doesn't seem worth the extra trouble to optimize the case where the + * vertex size is 16B (especially since this would require special-casing + * the GEN assembly that writes to the URB). So we just set the vertex + * size to a multiple of 32B (2 vec4's) in all cases. + * + * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We + * budget that as follows: + * + * 512 bytes for varyings (a varying component is 4 bytes and + * gl_MaxGeometryOutputComponents = 128) + * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 + * bytes) + * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE + * even if it's not used) + * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots + * whenever clip planes are enabled, even if the shader doesn't + * write to gl_ClipDistance) + * 16 bytes overhead since the VUE size must be a multiple of 32 bytes + * (see below)--this causes up to 1 VUE slot to be wasted + * 400 bytes available for varying packing overhead + * + * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) + * per interpolation type, so this is plenty. + * + */ + unsigned output_vertex_size_bytes = prog_data->base.vue_map.num_slots * 16; + assert(compiler->devinfo->gen == 6 || + output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); + prog_data->output_vertex_size_hwords = + ALIGN(output_vertex_size_bytes, 32) / 32; + + /* Compute URB entry size. The maximum allowed URB entry size is 32k. + * That divides up as follows: + * + * 64 bytes for the control data header (cut indices or StreamID bits) + * 4096 bytes for varyings (a varying component is 4 bytes and + * gl_MaxGeometryTotalOutputComponents = 1024) + * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 + * bytes/vertex and gl_MaxGeometryOutputVertices is 256) + * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE + * even if it's not used) + * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots + * whenever clip planes are enabled, even if the shader doesn't + * write to gl_ClipDistance) + * 4096 bytes overhead since the VUE size must be a multiple of 32 + * bytes (see above)--this causes up to 1 VUE slot to be wasted + * 8128 bytes available for varying packing overhead + * + * Worst-case varying packing overhead is 3/4 of a varying slot per + * interpolation type, which works out to 3072 bytes, so this would allow + * us to accommodate 2 interpolation types without any danger of running + * out of URB space. + * + * In practice, the risk of running out of URB space is very small, since + * the above figures are all worst-case, and most of them scale with the + * number of output vertices. So we'll just calculate the amount of space + * we need, and if it's too large, fail to compile. + * + * The above is for gen7+ where we have a single URB entry that will hold + * all the output. In gen6, we will have to allocate URB entries for every + * vertex we emit, so our URB entries only need to be large enough to hold + * a single vertex. Also, gen6 does not have a control data header. + */ + unsigned output_size_bytes; + if (compiler->devinfo->gen >= 7) { + output_size_bytes = + prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out; + output_size_bytes += 32 * prog_data->control_data_header_size_hwords; + } else { + output_size_bytes = prog_data->output_vertex_size_hwords * 32; + } + + /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, + * which comes before the control header. + */ + if (compiler->devinfo->gen >= 8) + output_size_bytes += 32; + + assert(output_size_bytes >= 1); + int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; + if (compiler->devinfo->gen == 6) + max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; + if (output_size_bytes > max_output_size_bytes) + return false; + + + /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and + * a multiple of 128 bytes in gen6. + */ + if (compiler->devinfo->gen >= 7) + prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + else + prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; + + prog_data->output_topology = + get_hw_prim_for_gl_prim(shader->info.gs.output_primitive); + + /* The GLSL linker will have already matched up GS inputs and the outputs + * of prior stages. The driver does extend VS outputs in some cases, but + * only for legacy OpenGL or Gen4-5 hardware, neither of which offer + * geometry shader support. So we can safely ignore that. + * + * For SSO pipelines, we use a fixed VUE map layout based on variable + * locations, so we can rely on rendezvous-by-location making this work. + * + * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not + * written by previous stages and shows up via payload magic. + */ + GLbitfield64 inputs_read = + shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; + brw_compute_vue_map(compiler->devinfo, + &c.input_vue_map, inputs_read, + shader->info.separate_shader); + + /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we + * need to program a URB read length of ceiling(num_slots / 2). + */ + prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; + + /* Now that prog_data setup is done, we are ready to actually compile the + * program. + */ + if (compiler->devinfo->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use * dual object mode. */ - if (c->prog_data.invocations <= 1 && + if (prog_data->invocations <= 1 && likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { - c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(compiler, log_data, c, shader, + vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader, mem_ctx, true /* no_spills */, shader_time_index); if (v.run()) { - vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx, + vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); return g.generate_assembly(v.cfg, final_assembly_size, shader); } @@ -648,28 +854,28 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * mode is more performant when invocations > 1. Gen6 only supports * SINGLE mode. */ - if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7) - c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE; + if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7) + prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE; else - c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; + prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; vec4_gs_visitor *gs = NULL; const unsigned *ret = NULL; if (compiler->devinfo->gen >= 7) - gs = new vec4_gs_visitor(compiler, log_data, c, shader, - mem_ctx, false /* no_spills */, + gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data, + shader, mem_ctx, false /* no_spills */, shader_time_index); else - gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader, - mem_ctx, false /* no_spills */, + gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, shader_prog, + shader, mem_ctx, false /* no_spills */, shader_time_index); if (!gs->run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { - vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx, + vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); ret = g.generate_assembly(gs->cfg, final_assembly_size, shader); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index c52552768c8..6ca83a9d9a3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -41,6 +41,7 @@ public: vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, + struct brw_gs_prog_data *prog_data, const nir_shader *shader, void *mem_ctx, bool no_spills, @@ -70,6 +71,7 @@ protected: src_reg vertex_count; src_reg control_data_bits; const struct brw_gs_compile * const c; + struct brw_gs_prog_data * const gs_prog_data; }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5be9c6a6b2d..6d155285820 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1222,6 +1222,9 @@ vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst, void vec4_visitor::emit_ndc_computation() { + if (output_reg[VARYING_SLOT_POS].file == BAD_FILE) + return; + /* Get the position */ src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]); @@ -1287,7 +1290,8 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (devinfo->has_negative_rhw_bug) { + if (devinfo->has_negative_rhw_bug && + output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) { src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]); ndc_w.swizzle = BRW_SWIZZLE_WWWW; emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L)); @@ -1335,8 +1339,10 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) assert(varying < VARYING_SLOT_MAX); assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; - /* Copy the register, saturating if necessary */ - return emit(MOV(reg, src_reg(output_reg[varying]))); + if (output_reg[varying].file != BAD_FILE) + return emit(MOV(reg, src_reg(output_reg[varying]))); + else + return NULL; } void @@ -1355,11 +1361,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) } case BRW_VARYING_SLOT_NDC: current_annotation = "NDC"; - emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC]))); + if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) + emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC]))); break; case VARYING_SLOT_POS: current_annotation = "gl_Position"; - emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS]))); + if (output_reg[VARYING_SLOT_POS].file != BAD_FILE) + emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS]))); break; case VARYING_SLOT_EDGE: /* This is present when doing unfilled polygons. We're supposed to copy diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 485a80ee2fc..5dd4f98cecc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -217,7 +217,7 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying) * shader. */ vec4_instruction *inst = emit_generic_urb_slot(reg, varying); - if (key->clamp_vertex_color) + if (inst && key->clamp_vertex_color) inst->saturate = true; break; } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index ba680a98f7e..5db4b3a86af 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -312,7 +312,7 @@ brw_vs_populate_key(struct brw_context *brw, if (ctx->Transform.ClipPlanesEnabled != 0 && ctx->API == API_OPENGL_COMPAT && - !vp->program.Base.UsesClipDistanceOut) { + vp->program.Base.ClipDistanceArraySize == 0) { key->nr_userclip_plane_consts = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; } diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 671a535a5bd..2fef188c17e 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -63,7 +63,7 @@ gen6_gs_visitor::emit_prolog() this->vertex_output = src_reg(this, glsl_type::uint_type, (prog_data->vue_map.num_slots + 1) * - c->gp->program.VerticesOut); + nir->info.gs.vertices_out); this->vertex_output_offset = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u))); @@ -95,7 +95,7 @@ gen6_gs_visitor::emit_prolog() this->prim_count = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(this->prim_count), 0u)); - if (c->prog_data.gen6_xfb_enabled) { + if (gs_prog_data->gen6_xfb_enabled) { /* Create a virtual register to hold destination indices in SOL */ this->destination_indices = src_reg(this, glsl_type::uvec4_type); /* Create a virtual register to hold number of written primitives */ @@ -128,7 +128,7 @@ gen6_gs_visitor::emit_prolog() * in the 3DSTATE_GS state packet. That information can be obtained by other * means though, so we can safely use r1 for this purpose. */ - if (c->prog_data.include_primitive_id) { + if (gs_prog_data->include_primitive_id) { this->primitive_id = src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id)); @@ -177,7 +177,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) dst_reg dst(this->vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - if (c->gp->program.OutputType == GL_POINTS) { + if (nir->info.gs.output_primitive == GL_POINTS) { /* If we are outputting points, then every vertex has PrimStart and * PrimEnd set. */ @@ -191,7 +191,7 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) * vertex. */ emit(OR(dst, this->first_vertex, - (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); + (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT))); emit(MOV(dst_reg(this->first_vertex), 0u)); } emit(ADD(dst_reg(this->vertex_output_offset), @@ -205,7 +205,7 @@ gen6_gs_visitor::gs_end_primitive() /* Calling EndPrimitive() is optional for point output. In this case we set * the PrimEnd flag when we process EmitVertex(). */ - if (c->gp->program.OutputType == GL_POINTS) + if (nir->info.gs.output_primitive == GL_POINTS) return; /* Otherwise we know that the last vertex we have processed was the last @@ -217,7 +217,7 @@ gen6_gs_visitor::gs_end_primitive() * comparison below (hence the num_output_vertices + 1 in the comparison * below). */ - unsigned num_output_vertices = c->gp->program.VerticesOut; + unsigned num_output_vertices = nir->info.gs.vertices_out; emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1), BRW_CONDITIONAL_L)); vec4_instruction *inst = emit(CMP(dst_null_d(), @@ -320,7 +320,7 @@ gen6_gs_visitor::emit_thread_end() * first_vertex is not zero. This is only relevant for outputs other than * points because in the point case we set PrimEnd on all vertices. */ - if (c->gp->program.OutputType != GL_POINTS) { + if (nir->info.gs.output_primitive != GL_POINTS) { emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z)); emit(IF(BRW_PREDICATE_NORMAL)); gs_end_primitive(); @@ -353,7 +353,7 @@ gen6_gs_visitor::emit_thread_end() this->current_annotation = "gen6 thread end: ff_sync"; vec4_instruction *inst; - if (c->prog_data.gen6_xfb_enabled) { + if (gs_prog_data->gen6_xfb_enabled) { src_reg sol_temp(this, glsl_type::uvec4_type); emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES, dst_reg(this->svbi), @@ -443,7 +443,7 @@ gen6_gs_visitor::emit_thread_end() } emit(BRW_OPCODE_WHILE); - if (c->prog_data.gen6_xfb_enabled) + if (gs_prog_data->gen6_xfb_enabled) xfb_write(); } emit(BRW_OPCODE_ENDIF); @@ -465,7 +465,7 @@ gen6_gs_visitor::emit_thread_end() */ this->current_annotation = "gen6 thread end: EOT"; - if (c->prog_data.gen6_xfb_enabled) { + if (gs_prog_data->gen6_xfb_enabled) { /* When emitting EOT, set SONumPrimsWritten Increment Value. */ src_reg data(this, glsl_type::uint_type); emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu))); @@ -507,7 +507,7 @@ gen6_gs_visitor::setup_payload() * information (and move the original value to a virtual register if * necessary). */ - if (c->prog_data.include_primitive_id) + if (gs_prog_data->include_primitive_id) attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg; reg++; @@ -530,9 +530,6 @@ gen6_gs_visitor::xfb_setup() BRW_SWIZZLE4(3, 3, 3, 3) }; - struct brw_gs_prog_data *prog_data = - (struct brw_gs_prog_data *) &c->prog_data; - const struct gl_transform_feedback_info *linked_xfb_info = &this->shader_prog->LinkedTransformFeedback; int i; @@ -548,11 +545,11 @@ gen6_gs_visitor::xfb_setup() */ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); - prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; - for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) { - prog_data->transform_feedback_bindings[i] = + gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; + for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) { + gs_prog_data->transform_feedback_bindings[i] = linked_xfb_info->Outputs[i].OutputRegister; - prog_data->transform_feedback_swizzles[i] = + gs_prog_data->transform_feedback_swizzles[i] = swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; } } @@ -561,13 +558,11 @@ void gen6_gs_visitor::xfb_write() { unsigned num_verts; - struct brw_gs_prog_data *prog_data = - (struct brw_gs_prog_data *) &c->prog_data; - if (!prog_data->num_transform_feedback_bindings) + if (!gs_prog_data->num_transform_feedback_bindings) return; - switch (c->prog_data.output_topology) { + switch (gs_prog_data->output_topology) { case _3DPRIM_POINTLIST: num_verts = 1; break; @@ -627,7 +622,7 @@ gen6_gs_visitor::xfb_write() emit(BRW_OPCODE_ENDIF); /* Write transform feedback data for all processed vertices. */ - for (int i = 0; i < c->gp->program.VerticesOut; i++) { + for (int i = 0; i < (int)nir->info.gs.vertices_out; i++) { emit(MOV(dst_reg(sol_temp), i)); emit(CMP(dst_null_d(), sol_temp, this->vertex_count, BRW_CONDITIONAL_L)); @@ -642,10 +637,8 @@ gen6_gs_visitor::xfb_write() void gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) { - struct brw_gs_prog_data *prog_data = - (struct brw_gs_prog_data *) &c->prog_data; unsigned binding; - unsigned num_bindings = prog_data->num_transform_feedback_bindings; + unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings; src_reg sol_temp(this, glsl_type::uvec4_type); /* Check for buffer overflow: we need room to write the complete primitive @@ -666,7 +659,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) */ for (binding = 0; binding < num_bindings; ++binding) { unsigned char varying = - prog_data->transform_feedback_bindings[binding]; + gs_prog_data->transform_feedback_bindings[binding]; /* Set up the correct destination index for this vertex */ vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX, @@ -704,7 +697,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) else if (varying == VARYING_SLOT_VIEWPORT) data.swizzle = BRW_SWIZZLE_ZZZZ; else - data.swizzle = prog_data->transform_feedback_swizzles[binding]; + data.swizzle = gs_prog_data->transform_feedback_swizzles[binding]; /* Write data */ inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp); diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index d02c67d8a74..311cf06833c 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -38,12 +38,13 @@ public: gen6_gs_visitor(const struct brw_compiler *comp, void *log_data, struct brw_gs_compile *c, + struct brw_gs_prog_data *prog_data, struct gl_shader_program *prog, const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) : - vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills, + vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills, shader_time_index), shader_prog(prog) { diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index d766ca7bebf..6738e85eaba 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -68,6 +68,8 @@ gen8_upload_gs_state(struct brw_context *brw) GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | (brw->gs.prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | + (prog_data->include_vue_handles ? + GEN7_GS_INCLUDE_VERTEX_HANDLES : 0) | (prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index e8059c7b260..2b65b2ea949 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -1400,7 +1400,7 @@ save_BlendFunci(GLuint buf, GLenum sfactor, GLenum dfactor) GET_CURRENT_CONTEXT(ctx); Node *n; ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx); - n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_SEPARATE_I, 3); + n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_I, 3); if (n) { n[1].ui = buf; n[2].e = sfactor; @@ -9741,6 +9741,46 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname) n[3].f, n[4].f, n[5].f, n[6].f, get_pointer(&n[7])); break; + case OPCODE_BLEND_COLOR: + fprintf(f, "BlendColor %f, %f, %f, %f\n", + n[1].f, n[2].f, n[3].f, n[4].f); + break; + case OPCODE_BLEND_EQUATION: + fprintf(f, "BlendEquation %s\n", + enum_string(n[1].e)); + break; + case OPCODE_BLEND_EQUATION_SEPARATE: + fprintf(f, "BlendEquationSeparate %s, %s\n", + enum_string(n[1].e), + enum_string(n[2].e)); + break; + case OPCODE_BLEND_FUNC_SEPARATE: + fprintf(f, "BlendFuncSeparate %s, %s, %s, %s\n", + enum_string(n[1].e), + enum_string(n[2].e), + enum_string(n[3].e), + enum_string(n[4].e)); + break; + case OPCODE_BLEND_EQUATION_I: + fprintf(f, "BlendEquationi %u, %s\n", + n[1].ui, enum_string(n[2].e)); + break; + case OPCODE_BLEND_EQUATION_SEPARATE_I: + fprintf(f, "BlendEquationSeparatei %u, %s, %s\n", + n[1].ui, enum_string(n[2].e), enum_string(n[3].e)); + break; + case OPCODE_BLEND_FUNC_I: + fprintf(f, "BlendFunci %u, %s, %s\n", + n[1].ui, enum_string(n[2].e), enum_string(n[3].e)); + break; + case OPCODE_BLEND_FUNC_SEPARATE_I: + fprintf(f, "BlendFuncSeparatei %u, %s, %s, %s, %s\n", + n[1].ui, + enum_string(n[2].e), + enum_string(n[3].e), + enum_string(n[4].e), + enum_string(n[5].e)); + break; case OPCODE_CALL_LIST: fprintf(f, "CallList %d\n", (int) n[1].ui); break; @@ -9761,6 +9801,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname) case OPCODE_LINE_STIPPLE: fprintf(f, "LineStipple %d %x\n", n[1].i, (int) n[2].us); break; + case OPCODE_LINE_WIDTH: + fprintf(f, "LineWidth %f\n", n[1].f); + break; case OPCODE_LOAD_IDENTITY: fprintf(f, "LoadIdentity\n"); break; @@ -9790,6 +9833,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname) fprintf(f, "Ortho %g %g %g %g %g %g\n", n[1].f, n[2].f, n[3].f, n[4].f, n[5].f, n[6].f); break; + case OPCODE_POINT_SIZE: + fprintf(f, "PointSize %f\n", n[1].f); + break; case OPCODE_POP_ATTRIB: fprintf(f, "PopAttrib\n"); break; diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index faa63825380..2ed42eaffdd 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -2275,45 +2275,16 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat) ; /* fallthrough */ } - if (ctx->Extensions.TDFX_texture_compression_FXT1) { - switch (internalFormat) { - case GL_COMPRESSED_RGB_FXT1_3DFX: - return GL_RGB; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return GL_RGBA; - default: - ; /* fallthrough */ - } + if (_mesa_is_compressed_format(ctx, internalFormat)) { + GLenum base_compressed = + _mesa_gl_compressed_format_base_format(internalFormat); + if (base_compressed) + return base_compressed; } - /* Assume that the ANGLE flag will always be set if the EXT flag is set. - */ - if (ctx->Extensions.ANGLE_texture_compression_dxt) { - switch (internalFormat) { - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return GL_RGB; - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return GL_RGBA; - default: - ; /* fallthrough */ - } - } - - if (_mesa_is_desktop_gl(ctx) - && ctx->Extensions.ANGLE_texture_compression_dxt) { - switch (internalFormat) { - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - return GL_RGB; - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - return GL_RGBA; - default: - ; /* fallthrough */ - } - } + if (ctx->Extensions.KHR_texture_compression_astc_ldr && + _mesa_is_astc_format(internalFormat)) + return GL_RGBA; if (ctx->Extensions.MESA_ycbcr_texture) { if (internalFormat == GL_YCBCR_MESA) @@ -2390,16 +2361,10 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat) case GL_SRGB8_EXT: case GL_COMPRESSED_SRGB_EXT: return GL_RGB; - case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: - return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1; case GL_SRGB_ALPHA_EXT: case GL_SRGB8_ALPHA8_EXT: case GL_COMPRESSED_SRGB_ALPHA_EXT: return GL_RGBA; - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1; case GL_SLUMINANCE_ALPHA_EXT: case GL_SLUMINANCE8_ALPHA8_EXT: case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: @@ -2544,104 +2509,6 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat) } } - if (ctx->Extensions.ARB_texture_compression_rgtc) { - switch (internalFormat) { - case GL_COMPRESSED_RED_RGTC1: - case GL_COMPRESSED_SIGNED_RED_RGTC1: - return GL_RED; - case GL_COMPRESSED_RG_RGTC2: - case GL_COMPRESSED_SIGNED_RG_RGTC2: - return GL_RG; - default: - ; /* fallthrough */ - } - } - - if (ctx->Extensions.EXT_texture_compression_latc) { - switch (internalFormat) { - case GL_COMPRESSED_LUMINANCE_LATC1_EXT: - case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: - return GL_LUMINANCE; - case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: - case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: - return GL_LUMINANCE_ALPHA; - default: - ; /* fallthrough */ - } - } - - if (ctx->Extensions.ATI_texture_compression_3dc) { - switch (internalFormat) { - case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: - return GL_LUMINANCE_ALPHA; - default: - ; /* fallthrough */ - } - } - - if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) { - switch (internalFormat) { - case GL_ETC1_RGB8_OES: - return GL_RGB; - default: - ; /* fallthrough */ - } - } - - if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { - switch (internalFormat) { - case GL_COMPRESSED_RGB8_ETC2: - case GL_COMPRESSED_SRGB8_ETC2: - return GL_RGB; - case GL_COMPRESSED_RGBA8_ETC2_EAC: - case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC: - case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: - case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: - return GL_RGBA; - case GL_COMPRESSED_R11_EAC: - case GL_COMPRESSED_SIGNED_R11_EAC: - return GL_RED; - case GL_COMPRESSED_RG11_EAC: - case GL_COMPRESSED_SIGNED_RG11_EAC: - return GL_RG; - default: - ; /* fallthrough */ - } - } - - if (_mesa_is_desktop_gl(ctx) && - ctx->Extensions.ARB_texture_compression_bptc) { - switch (internalFormat) { - case GL_COMPRESSED_RGBA_BPTC_UNORM: - case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: - return GL_RGBA; - case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: - case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: - return GL_RGB; - default: - ; /* fallthrough */ - } - } - - if (ctx->API == API_OPENGLES) { - switch (internalFormat) { - case GL_PALETTE4_RGB8_OES: - case GL_PALETTE4_R5_G6_B5_OES: - case GL_PALETTE8_RGB8_OES: - case GL_PALETTE8_R5_G6_B5_OES: - return GL_RGB; - case GL_PALETTE4_RGBA8_OES: - case GL_PALETTE8_RGB5_A1_OES: - case GL_PALETTE4_RGBA4_OES: - case GL_PALETTE4_RGB5_A1_OES: - case GL_PALETTE8_RGBA8_OES: - case GL_PALETTE8_RGBA4_OES: - return GL_RGBA; - default: - ; /* fallthrough */ - } - } - return -1; /* error */ } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index e57b98a412d..ab4fa083672 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1891,7 +1891,7 @@ struct gl_program * For vertex and geometry shaders, true if the program uses the * gl_ClipDistance output. Ignored for fragment shaders. */ - GLboolean UsesClipDistanceOut; + unsigned ClipDistanceArraySize; /** Named parameters, constants, etc. from program text */ @@ -2619,7 +2619,6 @@ struct gl_shader_program * True if gl_ClipDistance is written to. Copied into * gl_tess_eval_program by _mesa_copy_linked_program_data(). */ - GLboolean UsesClipDistance; GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or 0 if not present. */ } TessEval; @@ -2642,7 +2641,6 @@ struct gl_shader_program * True if gl_ClipDistance is written to. Copied into * gl_geometry_program by _mesa_copy_linked_program_data(). */ - GLboolean UsesClipDistance; GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or 0 if not present. */ bool UsesEndPrimitive; @@ -2655,7 +2653,6 @@ struct gl_shader_program * True if gl_ClipDistance is written to. Copied into gl_vertex_program * by _mesa_copy_linked_program_data(). */ - GLboolean UsesClipDistance; GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or 0 if not present. */ } Vert; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 18e463d4ccc..765602e50db 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2068,7 +2068,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type, { switch (type) { case MESA_SHADER_VERTEX: - dst->UsesClipDistanceOut = src->Vert.UsesClipDistance; + dst->ClipDistanceArraySize = src->Vert.ClipDistanceArraySize; break; case MESA_SHADER_TESS_CTRL: { struct gl_tess_ctrl_program *dst_tcp = @@ -2083,7 +2083,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type, dst_tep->Spacing = src->TessEval.Spacing; dst_tep->VertexOrder = src->TessEval.VertexOrder; dst_tep->PointMode = src->TessEval.PointMode; - dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance; + dst->ClipDistanceArraySize = src->TessEval.ClipDistanceArraySize; break; } case MESA_SHADER_GEOMETRY: { @@ -2093,7 +2093,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type, dst_gp->Invocations = src->Geom.Invocations; dst_gp->InputType = src->Geom.InputType; dst_gp->OutputType = src->Geom.OutputType; - dst->UsesClipDistanceOut = src->Geom.UsesClipDistance; + dst->ClipDistanceArraySize = src->Geom.ClipDistanceArraySize; dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive; dst_gp->UsesStreams = src->Geom.UsesStreams; break; diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index e50964e79e4..d7671738b18 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -97,16 +97,16 @@ static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE }; * No pixel transfer operations or special texel encodings allowed. * 1D, 2D and 3D images supported. */ -static void -memcpy_texture(struct gl_context *ctx, - GLuint dimensions, - mesa_format dstFormat, - GLint dstRowStride, - GLubyte **dstSlices, - GLint srcWidth, GLint srcHeight, GLint srcDepth, - GLenum srcFormat, GLenum srcType, - const GLvoid *srcAddr, - const struct gl_pixelstore_attrib *srcPacking) +void +_mesa_memcpy_texture(struct gl_context *ctx, + GLuint dimensions, + mesa_format dstFormat, + GLint dstRowStride, + GLubyte **dstSlices, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLenum srcFormat, GLenum srcType, + const GLvoid *srcAddr, + const struct gl_pixelstore_attrib *srcPacking) { const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); @@ -296,11 +296,11 @@ _mesa_texstore_ycbcr(TEXSTORE_PARAMS) assert(baseInternalFormat == GL_YCBCR_MESA); /* always just memcpy since no pixel transfer ops apply */ - memcpy_texture(ctx, dims, - dstFormat, - dstRowStride, dstSlices, - srcWidth, srcHeight, srcDepth, srcFormat, srcType, - srcAddr, srcPacking); + _mesa_memcpy_texture(ctx, dims, + dstFormat, + dstRowStride, dstSlices, + srcWidth, srcHeight, srcDepth, srcFormat, srcType, + srcAddr, srcPacking); /* Check if we need byte swapping */ /* XXX the logic here _might_ be wrong */ @@ -899,13 +899,15 @@ _mesa_texstore_memcpy(TEXSTORE_PARAMS) return GL_FALSE; } - memcpy_texture(ctx, dims, - dstFormat, - dstRowStride, dstSlices, - srcWidth, srcHeight, srcDepth, srcFormat, srcType, - srcAddr, srcPacking); + _mesa_memcpy_texture(ctx, dims, + dstFormat, + dstRowStride, dstSlices, + srcWidth, srcHeight, srcDepth, srcFormat, srcType, + srcAddr, srcPacking); return GL_TRUE; } + + /** * Store user data into texture memory. * Called via glTex[Sub]Image1/2/3D() diff --git a/src/mesa/main/texstore.h b/src/mesa/main/texstore.h index 2c974f74afb..f08dc08edde 100644 --- a/src/mesa/main/texstore.h +++ b/src/mesa/main/texstore.h @@ -74,6 +74,17 @@ _mesa_texstore_needs_transfer_ops(struct gl_context *ctx, GLenum baseInternalFormat, mesa_format dstFormat); +extern void +_mesa_memcpy_texture(struct gl_context *ctx, + GLuint dimensions, + mesa_format dstFormat, + GLint dstRowStride, + GLubyte **dstSlices, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLenum srcFormat, GLenum srcType, + const GLvoid *srcAddr, + const struct gl_pixelstore_attrib *srcPacking); + extern GLboolean _mesa_texstore_can_use_memcpy(struct gl_context *ctx, GLenum baseInternalFormat, mesa_format dstFormat, diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 0f01e9939de..55d5e66243c 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -239,7 +239,7 @@ static void update_raster_state( struct st_context *st ) /* _NEW_MULTISAMPLE | _NEW_BUFFERS */ raster->force_persample_interp = - st->can_force_persample_interp && + !st->force_persample_in_shader && ctx->Multisample._Enabled && ctx->Multisample.SampleShading && ctx->Multisample.MinSampleShadingValue * diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 1e880a107c0..0f9ea101889 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -64,7 +64,7 @@ update_fp( struct st_context *st ) assert(stfp->Base.Base.Target == GL_FRAGMENT_PROGRAM_ARB); memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; /* _NEW_FRAG_CLAMP */ key.clamp_color = st->clamp_frag_color_in_shader && @@ -76,7 +76,7 @@ update_fp( struct st_context *st ) * Ignore sample qualifier while computing this flag. */ key.persample_shading = - !st->can_force_persample_interp && + st->force_persample_in_shader && !(stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID | SYSTEM_BIT_SAMPLE_POS)) && _mesa_get_min_invocations_per_fragment(st->ctx, &stfp->Base, true) > 1; @@ -119,7 +119,7 @@ update_vp( struct st_context *st ) assert(stvp->Base.Base.Target == GL_VERTEX_PROGRAM_ARB); memset(&key, 0, sizeof key); - key.st = st; /* variants are per-context */ + key.st = st->has_shareable_shaders ? NULL : st; /* When this is true, we will add an extra input to the vertex * shader translation (for edgeflags), an extra output with @@ -174,7 +174,7 @@ update_gp( struct st_context *st ) assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV); memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st->gp_variant = st_get_gp_variant(st, stgp, &key); @@ -210,7 +210,7 @@ update_tcp( struct st_context *st ) assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV); memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st->tcp_variant = st_get_tcp_variant(st, sttcp, &key); @@ -246,7 +246,7 @@ update_tep( struct st_context *st ) assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV); memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st->tep_variant = st_get_tep_variant(st, sttep, &key); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index bb6dfe85644..cbc6845d771 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -269,7 +269,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, struct pipe_resource *vbuf = NULL; memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; key.bitmap = GL_TRUE; key.clamp_color = st->clamp_frag_color_in_shader && st->ctx->Color._ClampFragmentColor; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 7e8633edc1a..262ad809c58 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -395,15 +395,35 @@ make_texture(struct st_context *st, * Note that the image is actually going to be upside down in * the texture. We deal with that with texcoords. */ - success = _mesa_texstore(ctx, 2, /* dims */ - baseInternalFormat, /* baseInternalFormat */ - mformat, /* mesa_format */ - transfer->stride, /* dstRowStride, bytes */ - &dest, /* destSlices */ - width, height, 1, /* size */ - format, type, /* src format/type */ - pixels, /* data source */ - unpack); + if ((format == GL_RGBA || format == GL_BGRA) + && type == GL_UNSIGNED_BYTE) { + /* Use a memcpy-based texstore to avoid software pixel swizzling. + * We'll do the necessary swizzling with the pipe_sampler_view to + * give much better performance. + * XXX in the future, expand this to accomodate more format and + * type combinations. + */ + _mesa_memcpy_texture(ctx, 2, + mformat, /* mesa_format */ + transfer->stride, /* dstRowStride, bytes */ + &dest, /* destSlices */ + width, height, 1, /* size */ + format, type, /* src format/type */ + pixels, /* data source */ + unpack); + success = GL_TRUE; + } + else { + success = _mesa_texstore(ctx, 2, /* dims */ + baseInternalFormat, /* baseInternalFormat */ + mformat, /* mesa_format */ + transfer->stride, /* dstRowStride, bytes */ + &dest, /* destSlices */ + width, height, 1, /* size */ + format, type, /* src format/type */ + pixels, /* data source */ + unpack); + } /* unmap */ pipe_transfer_unmap(pipe, transfer); @@ -667,7 +687,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* user textures, plus the drawpix textures */ if (fpv) { struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; - uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1, + uint num = MAX3(fpv->drawpix_sampler + 1, + fpv->pixelmap_sampler + 1, st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]); memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT], @@ -914,7 +935,7 @@ get_color_fp_variant(struct st_context *st) memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; key.drawpixels = 1; key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || @@ -956,6 +977,69 @@ clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height, } +/** + * Search the array of 4 swizzle components for the named component and return + * its position. + */ +static unsigned +search_swizzle(const unsigned char swizzle[4], unsigned component) +{ + unsigned i; + for (i = 0; i < 4; i++) { + if (swizzle[i] == component) + return i; + } + assert(!"search_swizzle() failed"); + return 0; +} + + +/** + * Set the sampler view's swizzle terms. This is used to handle RGBA + * swizzling when the incoming image format isn't an exact match for + * the actual texture format. For example, if we have glDrawPixels( + * GL_RGBA, GL_UNSIGNED_BYTE) and we chose the texture format + * PIPE_FORMAT_B8G8R8A8 then we can do use the sampler view swizzle to + * avoid swizzling all the pixels in software in the texstore code. + */ +static void +setup_sampler_swizzle(struct pipe_sampler_view *sv, GLenum format, GLenum type) +{ + if ((format == GL_RGBA || format == GL_BGRA) && type == GL_UNSIGNED_BYTE) { + const struct util_format_description *desc = + util_format_description(sv->texture->format); + unsigned c0, c1, c2, c3; + + /* Every gallium driver supports at least one 32-bit packed RGBA format. + * We must have chosen one for (GL_RGBA, GL_UNSIGNED_BYTE). + */ + assert(desc->block.bits == 32); + + /* invert the format's swizzle to setup the sampler's swizzle */ + if (format == GL_RGBA) { + c0 = UTIL_FORMAT_SWIZZLE_X; + c1 = UTIL_FORMAT_SWIZZLE_Y; + c2 = UTIL_FORMAT_SWIZZLE_Z; + c3 = UTIL_FORMAT_SWIZZLE_W; + } + else { + assert(format == GL_BGRA); + c0 = UTIL_FORMAT_SWIZZLE_Z; + c1 = UTIL_FORMAT_SWIZZLE_Y; + c2 = UTIL_FORMAT_SWIZZLE_X; + c3 = UTIL_FORMAT_SWIZZLE_W; + } + sv->swizzle_r = search_swizzle(desc->swizzle, c0); + sv->swizzle_g = search_swizzle(desc->swizzle, c1); + sv->swizzle_b = search_swizzle(desc->swizzle, c2); + sv->swizzle_a = search_swizzle(desc->swizzle, c3); + } + else { + /* use the default sampler swizzle */ + } +} + + /** * Called via ctx->Driver.DrawPixels() */ @@ -974,6 +1058,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, int num_sampler_view = 1; struct gl_pixelstore_attrib clippedUnpack; struct st_fp_variant *fpv = NULL; + struct pipe_resource *pt; /* Mesa state should be up to date by now */ assert(ctx->NewState == 0x0); @@ -1029,42 +1114,56 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); } - /* draw with textured quad */ - { - struct pipe_resource *pt - = make_texture(st, width, height, format, type, unpack, pixels); - if (pt) { - sv[0] = st_create_texture_sampler_view(st->pipe, pt); - - if (sv[0]) { - /* Create a second sampler view to read stencil. - * The stencil is written using the shader stencil export - * functionality. */ - if (write_stencil) { - enum pipe_format stencil_format = - util_format_stencil_only(pt->format); - /* we should not be doing pixel map/transfer (see above) */ - assert(num_sampler_view == 1); - sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, - stencil_format); - num_sampler_view++; - } - - draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], - width, height, - ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, - sv, - num_sampler_view, - driver_vp, - driver_fp, fpv, - color, GL_FALSE, write_depth, write_stencil); - pipe_sampler_view_reference(&sv[0], NULL); - if (num_sampler_view > 1) - pipe_sampler_view_reference(&sv[1], NULL); - } - pipe_resource_reference(&pt, NULL); - } + /* Put glDrawPixels image into a texture */ + pt = make_texture(st, width, height, format, type, unpack, pixels); + if (!pt) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + return; } + + /* create sampler view for the image */ + sv[0] = st_create_texture_sampler_view(st->pipe, pt); + if (!sv[0]) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + pipe_resource_reference(&pt, NULL); + return; + } + + /* Set up the sampler view's swizzle */ + setup_sampler_swizzle(sv[0], format, type); + + /* Create a second sampler view to read stencil. The stencil is + * written using the shader stencil export functionality. + */ + if (write_stencil) { + enum pipe_format stencil_format = + util_format_stencil_only(pt->format); + /* we should not be doing pixel map/transfer (see above) */ + assert(num_sampler_view == 1); + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, + stencil_format); + if (!sv[1]) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + pipe_resource_reference(&pt, NULL); + pipe_sampler_view_reference(&sv[0], NULL); + return; + } + num_sampler_view++; + } + + draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, + ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + sv, + num_sampler_view, + driver_vp, + driver_fp, fpv, + color, GL_FALSE, write_depth, write_stencil); + pipe_sampler_view_reference(&sv[0], NULL); + if (num_sampler_view > 1) + pipe_sampler_view_reference(&sv[1], NULL); + + pipe_resource_reference(&pt, NULL); } diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 708bdf5011e..2c4eccf1e06 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -224,6 +224,7 @@ st_program_string_notify( struct gl_context *ctx, struct gl_program *prog ) { struct st_context *st = st_context(ctx); + gl_shader_stage stage = _mesa_program_enum_to_shader_stage(target); if (target == GL_FRAGMENT_PROGRAM_ARB) { struct st_fragment_program *stfp = (struct st_fragment_program *) prog; @@ -278,10 +279,10 @@ st_program_string_notify( struct gl_context *ctx, st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; } - if (ST_DEBUG & DEBUG_PRECOMPILE) + if (ST_DEBUG & DEBUG_PRECOMPILE || + st->shader_has_one_variant[stage]) st_precompile_shader_variant(st, prog); - /* XXX check if program is legal, within limits */ return GL_TRUE; } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bef7307bb27..5abb17385c2 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -235,9 +235,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, PIPE_BIND_SAMPLER_VIEW); st->prefer_blit_based_texture_transfer = screen->get_param(screen, PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER); - st->can_force_persample_interp = screen->get_param(screen, - PIPE_CAP_FORCE_PERSAMPLE_INTERP); - + st->force_persample_in_shader = + screen->get_param(screen, PIPE_CAP_SAMPLE_SHADING) && + !screen->get_param(screen, PIPE_CAP_FORCE_PERSAMPLE_INTERP); + st->has_shareable_shaders = screen->get_param(screen, + PIPE_CAP_SHAREABLE_SHADERS); st->needs_texcoord_semantic = screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD); st->apply_texture_swizzle_to_border_color = @@ -292,6 +294,20 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true; } + /* Set which shader types can be compiled at link time. */ + st->shader_has_one_variant[MESA_SHADER_VERTEX] = + st->has_shareable_shaders && + !st->clamp_vert_color_in_shader; + + st->shader_has_one_variant[MESA_SHADER_FRAGMENT] = + st->has_shareable_shaders && + !st->clamp_frag_color_in_shader && + !st->force_persample_in_shader; + + st->shader_has_one_variant[MESA_SHADER_TESS_CTRL] = st->has_shareable_shaders; + st->shader_has_one_variant[MESA_SHADER_TESS_EVAL] = st->has_shareable_shaders; + st->shader_has_one_variant[MESA_SHADER_GEOMETRY] = st->has_shareable_shaders; + _mesa_compute_version(ctx); if (ctx->Version == 0) { diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index f187d82449b..c243f5cd966 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -98,7 +98,15 @@ struct st_context boolean has_etc1; boolean has_etc2; boolean prefer_blit_based_texture_transfer; - boolean can_force_persample_interp; + boolean force_persample_in_shader; + boolean has_shareable_shaders; + + /** + * If a shader can be created when we get its source. + * This means it has only 1 variant, not counting glBitmap and + * glDrawPixels. + */ + boolean shader_has_one_variant[MESA_SHADER_STAGES]; boolean needs_texcoord_semantic; boolean apply_texture_swizzle_to_border_color; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index e2902923cb7..d4724b46e0a 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -249,6 +249,9 @@ void st_init_limits(struct pipe_screen *screen, if (options->EmitNoLoops) options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536); + else + options->MaxUnrollIterations = screen->get_shader_param(screen, sh, + PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT); options->LowerClipDistance = true; } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 6a69ba7aa26..75ccaf2f26b 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -395,6 +395,10 @@ st_translate_vertex_program(struct st_context *st, if (ureg == NULL) return false; + if (stvp->Base.Base.ClipDistanceArraySize) + ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, + stvp->Base.Base.ClipDistanceArraySize); + if (ST_DEBUG & DEBUG_MESA) { _mesa_print_program(&stvp->Base.Base); _mesa_print_program_parameters(st->ctx, &stvp->Base.Base); @@ -1049,6 +1053,10 @@ st_translate_program_common(struct st_context *st, memset(outputMapping, 0, sizeof(outputMapping)); memset(out_state, 0, sizeof(*out_state)); + if (prog->ClipDistanceArraySize) + ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, + prog->ClipDistanceArraySize); + /* * Convert Mesa program inputs to TGSI input register semantics. */ @@ -1728,6 +1736,12 @@ destroy_program_variants_cb(GLuint key, void *data, void *userData) void st_destroy_program_variants(struct st_context *st) { + /* If shaders can be shared with other contexts, the last context will + * call DeleteProgram on all shaders, releasing everything. + */ + if (st->has_shareable_shaders) + return; + /* ARB vert/frag program */ _mesa_HashWalk(st->ctx->Shared->Programs, destroy_program_variants_cb, st); @@ -1774,7 +1788,7 @@ st_precompile_shader_variant(struct st_context *st, struct st_vp_variant_key key; memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st_get_vp_variant(st, p, &key); break; } @@ -1784,7 +1798,7 @@ st_precompile_shader_variant(struct st_context *st, struct st_tcp_variant_key key; memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st_get_tcp_variant(st, p, &key); break; } @@ -1794,7 +1808,7 @@ st_precompile_shader_variant(struct st_context *st, struct st_tep_variant_key key; memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st_get_tep_variant(st, p, &key); break; } @@ -1804,7 +1818,7 @@ st_precompile_shader_variant(struct st_context *st, struct st_gp_variant_key key; memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st_get_gp_variant(st, p, &key); break; } @@ -1814,7 +1828,7 @@ st_precompile_shader_variant(struct st_context *st, struct st_fp_variant_key key; memset(&key, 0, sizeof(key)); - key.st = st; + key.st = st->has_shareable_shaders ? NULL : st; st_get_fp_variant(st, p, &key); break; } diff --git a/src/mesa/tnl/t_vb_rendertmp.h b/src/mesa/tnl/t_vb_rendertmp.h index 44dee763594..4bfc6b15d3b 100644 --- a/src/mesa/tnl/t_vb_rendertmp.h +++ b/src/mesa/tnl/t_vb_rendertmp.h @@ -124,19 +124,19 @@ static void TAG(render_line_loop)( struct gl_context *ctx, GLuint i; LOCAL_VARS; - (void) flags; - INIT(GL_LINE_LOOP); if (start+1 < count) { if (TEST_PRIM_BEGIN(flags)) { RESET_STIPPLE; + /* draw the first line from v[0] to v[1] */ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT) RENDER_LINE( ELT(start), ELT(start+1) ); else RENDER_LINE( ELT(start+1), ELT(start) ); } + /* draw lines from v[1] to v[n-1] */ for ( i = start+2 ; i < count ; i++) { if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT) RENDER_LINE( ELT(i-1), ELT(i) ); @@ -145,6 +145,7 @@ static void TAG(render_line_loop)( struct gl_context *ctx, } if ( TEST_PRIM_END(flags)) { + /* draw final line from v[n-1] to v[0] (the very first vertex) */ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION_EXT) RENDER_LINE( ELT(count-1), ELT(start) ); else diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h index a376efe34a7..e6b9d890d5f 100644 --- a/src/mesa/vbo/vbo_context.h +++ b/src/mesa/vbo/vbo_context.h @@ -196,6 +196,26 @@ vbo_get_default_vals_as_union(GLenum format) } } + +/** + * Compute the max number of vertices which can be stored in + * a vertex buffer, given the current vertex size, and the amount + * of space already used. + */ +static inline unsigned +vbo_compute_max_verts(const struct vbo_exec_context *exec) +{ + unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / + (exec->vtx.vertex_size * sizeof(GLfloat)); + assert(n > 0); + /* Subtract one so we're always sure to have room for an extra + * vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion. + */ + n--; + return n; +} + + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index 00378eb7984..a80b2c908d1 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -160,8 +160,6 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap ); void vbo_exec_vtx_map( struct vbo_exec_context *exec ); -void vbo_exec_vtx_wrap( struct vbo_exec_context *exec ); - void vbo_exec_eval_update( struct vbo_exec_context *exec ); void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec, diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 7ae08fe3062..a23d5aa08aa 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -61,7 +61,8 @@ static void reset_attrfv( struct vbo_exec_context *exec ); /** * Close off the last primitive, execute the buffer, restart the - * primitive. + * primitive. This is called when we fill a vertex buffer before + * hitting glEnd. */ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec ) { @@ -71,17 +72,31 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec ) exec->vtx.buffer_ptr = exec->vtx.buffer_map; } else { - GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin; + struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1]; + const GLuint last_begin = last_prim->begin; GLuint last_count; if (_mesa_inside_begin_end(exec->ctx)) { - GLint i = exec->vtx.prim_count - 1; - assert(i >= 0); - exec->vtx.prim[i].count = (exec->vtx.vert_count - - exec->vtx.prim[i].start); + last_prim->count = exec->vtx.vert_count - last_prim->start; } - last_count = exec->vtx.prim[exec->vtx.prim_count-1].count; + last_count = last_prim->count; + + /* Special handling for wrapping GL_LINE_LOOP */ + if (last_prim->mode == GL_LINE_LOOP && + last_count > 0 && + !last_prim->end) { + /* draw this section of the incomplete line loop as a line strip */ + last_prim->mode = GL_LINE_STRIP; + if (!last_prim->begin) { + /* This is not the first section of the line loop, so don't + * draw the 0th vertex. We're saving it until we draw the + * very last section of the loop. + */ + last_prim->start++; + last_prim->count--; + } + } /* Execute the buffer and save copied vertices. */ @@ -98,6 +113,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec ) if (_mesa_inside_begin_end(exec->ctx)) { exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive; + exec->vtx.prim[0].begin = 0; exec->vtx.prim[0].start = 0; exec->vtx.prim[0].count = 0; exec->vtx.prim_count++; @@ -113,7 +129,8 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec ) * Deal with buffer wrapping where provoked by the vertex buffer * filling up, as opposed to upgrade_vertex(). */ -void vbo_exec_vtx_wrap( struct vbo_exec_context *exec ) +static void +vbo_exec_vtx_wrap(struct vbo_exec_context *exec) { fi_type *data = exec->vtx.copied.buffer; GLuint i; @@ -292,8 +309,7 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec, */ exec->vtx.attrsz[attr] = newSize; exec->vtx.vertex_size += newSize - oldSize; - exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / - (exec->vtx.vertex_size * sizeof(GLfloat))); + exec->vtx.max_vert = vbo_compute_max_verts(exec); exec->vtx.vert_count = 0; exec->vtx.buffer_ptr = exec->vtx.buffer_map; @@ -446,10 +462,6 @@ do { \ \ assert(sz == 1 || sz == 2); \ \ - if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \ - vbo_exec_begin_vertices(ctx); \ - } \ - \ /* check if attribute size or type is changing */ \ if (unlikely(exec->vtx.active_sz[A] != N * sz) || \ unlikely(exec->vtx.attrtype[A] != T)) { \ @@ -470,6 +482,15 @@ do { \ /* This is a glVertex call */ \ GLuint i; \ \ + if (unlikely((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0)) { \ + vbo_exec_begin_vertices(ctx); \ + } \ + \ + if (unlikely(!exec->vtx.buffer_ptr)) { \ + vbo_exec_vtx_map(exec); \ + } \ + assert(exec->vtx.buffer_ptr); \ + \ /* copy 32-bit words */ \ for (i = 0; i < exec->vtx.vertex_size; i++) \ exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \ @@ -482,7 +503,10 @@ do { \ \ if (++exec->vtx.vert_count >= exec->vtx.max_vert) \ vbo_exec_vtx_wrap( exec ); \ - } \ + } else { \ + /* we now have accumulated per-vertex attributes */ \ + ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \ + } \ } while (0) #define ERROR(err) _mesa_error( ctx, err, __func__ ) @@ -814,11 +838,28 @@ static void GLAPIENTRY vbo_exec_End( void ) if (exec->vtx.prim_count > 0) { /* close off current primitive */ - int idx = exec->vtx.vert_count; - int i = exec->vtx.prim_count - 1; + struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1]; - exec->vtx.prim[i].end = 1; - exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start; + last_prim->end = 1; + last_prim->count = exec->vtx.vert_count - last_prim->start; + + /* Special handling for GL_LINE_LOOP */ + if (last_prim->mode == GL_LINE_LOOP && last_prim->begin == 0) { + /* We're finishing drawing a line loop. Append 0th vertex onto + * end of vertex buffer so we can draw it as a line strip. + */ + const fi_type *src = exec->vtx.buffer_map; + fi_type *dst = exec->vtx.buffer_map + + exec->vtx.vert_count * exec->vtx.vertex_size; + + /* copy 0th vertex to end of buffer */ + memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type)); + + assert(last_prim->start == 0); + last_prim->start++; /* skip vertex0 */ + /* note that last_prim->count stays unchanged */ + last_prim->mode = GL_LINE_STRIP; + } try_vbo_merge(exec); } diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 174cbc37c26..ed5d9e947b0 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -64,20 +64,23 @@ vbo_exec_debug_verts( struct vbo_exec_context *exec ) } -/* - * NOTE: Need to have calculated primitives by this point -- do it on the fly. - * NOTE: Old 'parity' issue is gone. +/** + * Copy zero, one or two vertices from the current vertex buffer into + * the temporary "copy" buffer. + * This is used when a single primitive overflows a vertex buffer and + * we need to continue the primitive in a new vertex buffer. + * The temporary "copy" buffer holds the vertices which need to get + * copied from the old buffer to the new one. */ static GLuint vbo_copy_vertices( struct vbo_exec_context *exec ) { - GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count; + struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1]; + const GLuint nr = last_prim->count; GLuint ovf, i; - GLuint sz = exec->vtx.vertex_size; + const GLuint sz = exec->vtx.vertex_size; fi_type *dst = exec->vtx.copied.buffer; - const fi_type *src = (exec->vtx.buffer_map + - exec->vtx.prim[exec->vtx.prim_count-1].start * - exec->vtx.vertex_size); + const fi_type *src = exec->vtx.buffer_map + last_prim->start * sz; switch (exec->ctx->Driver.CurrentExecPrimitive) { case GL_POINTS: @@ -106,6 +109,17 @@ vbo_copy_vertices( struct vbo_exec_context *exec ) return 1; } case GL_LINE_LOOP: + if (last_prim->begin == 0) { + /* We're dealing with the second or later section of a split/wrapped + * GL_LINE_LOOP. Since we're converting line loops to line strips, + * we've already increment the last_prim->start counter by one to + * skip the 0th vertex in the loop. We need to undo that (effectively + * subtract one from last_prim->start) so that we copy the 0th vertex + * to the next vertex buffer. + */ + src -= sz; + } + /* fall-through */ case GL_TRIANGLE_FAN: case GL_POLYGON: if (nr == 0) { @@ -123,7 +137,7 @@ vbo_copy_vertices( struct vbo_exec_context *exec ) case GL_TRIANGLE_STRIP: /* no parity issue, but need to make sure the tri is not drawn twice */ if (nr & 1) { - exec->vtx.prim[exec->vtx.prim_count-1].count--; + last_prim->count--; } /* fallthrough */ case GL_QUAD_STRIP: @@ -432,8 +446,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) if (keepUnmapped || exec->vtx.vertex_size == 0) exec->vtx.max_vert = 0; else - exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / - (exec->vtx.vertex_size * sizeof(GLfloat))); + exec->vtx.max_vert = vbo_compute_max_verts(exec); exec->vtx.buffer_ptr = exec->vtx.buffer_map; exec->vtx.prim_count = 0; diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index fdc677f9a07..d49aa15b1b7 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -330,8 +330,7 @@ _save_reset_counters(struct gl_context *ctx) * previous prim. */ static void -merge_prims(struct gl_context *ctx, - struct _mesa_prim *prim_list, +merge_prims(struct _mesa_prim *prim_list, GLuint *prim_count) { GLuint i; @@ -361,6 +360,51 @@ merge_prims(struct gl_context *ctx, *prim_count = prev_prim - prim_list + 1; } + +/** + * Convert GL_LINE_LOOP primitive into GL_LINE_STRIP so that drivers + * don't have to worry about handling the _mesa_prim::begin/end flags. + * See https://bugs.freedesktop.org/show_bug.cgi?id=81174 + */ +static void +convert_line_loop_to_strip(struct vbo_save_context *save, + struct vbo_save_vertex_list *node) +{ + struct _mesa_prim *prim = &node->prim[node->prim_count - 1]; + + assert(prim->mode == GL_LINE_LOOP); + + if (prim->end) { + /* Copy the 0th vertex to end of the buffer and extend the + * vertex count by one to finish the line loop. + */ + const GLuint sz = save->vertex_size; + /* 0th vertex: */ + const fi_type *src = save->buffer + prim->start * sz; + /* end of buffer: */ + fi_type *dst = save->buffer + (prim->start + prim->count) * sz; + + memcpy(dst, src, sz * sizeof(float)); + + prim->count++; + node->count++; + save->vert_count++; + save->buffer_ptr += sz; + save->vertex_store->used += sz; + } + + if (!prim->begin) { + /* Drawing the second or later section of a long line loop. + * Skip the 0th vertex. + */ + prim->start++; + prim->count--; + } + + prim->mode = GL_LINE_STRIP; +} + + /** * Insert the active immediate struct onto the display list currently * being built. @@ -442,7 +486,11 @@ _save_compile_vertex_list(struct gl_context *ctx) */ save->copied.nr = _save_copy_vertices(ctx, node, save->buffer); - merge_prims(ctx, node->prim, &node->prim_count); + if (node->prim[node->prim_count - 1].mode == GL_LINE_LOOP) { + convert_line_loop_to_strip(save, node); + } + + merge_prims(node->prim, &node->prim_count); /* Deal with GL_COMPILE_AND_EXECUTE: */ @@ -483,6 +531,10 @@ _save_compile_vertex_list(struct gl_context *ctx) save->buffer_ptr = vbo_save_map_vertex_store(ctx, save->vertex_store); save->out_of_memory = save->buffer_ptr == NULL; } + else { + /* update buffer_ptr for next vertex */ + save->buffer_ptr = save->vertex_store->buffer + save->vertex_store->used; + } if (save->prim_store->used > VBO_SAVE_PRIM_SIZE - 6) { save->prim_store->refcount--;